# clock 1
texld r0, t0, s0; # tex fetch
madr r0, r0, c1.r, c1.g # _bx2 in tex
nrm_pp r1.rgb, t4 # nrm in shader 0
dp3 r1.r, r1, r0 # 3D dot product in shader 1
mul r0.a, r0, r0 # dual issue in shader 1
# clock 2
mul r1.a, r0.a, c2.a # dual issue in shader 0
mul r0.rgb, r1.r, r0 # dual issue in shader 0
add r0.a, r1.r, r1.r # fx2 in shader 0
mad r0.rg, r0.a, c1, c1.a # mad w/2 const in shader 1
mul r1.ba, r1.a, r0.a, c2 # dual issue in shader 1
# clock 3
rcp r0.a, r0.a # reciprocal in shader 0
mul r0.rg r0, r0.a # div instruction in shader 0
mul r0.a, r0.a, r1.a # dual issue in shader 0
texld r2, r0, s1 # texture fetch
mad r2.rgb, r0.a, r2, c5 # mad in shader 1
abs r0.a, r0.a # abs in shader 1
log r0.a, r0.a # log in shader 1
# clock 4
rcp r0.a, t1.a # reciprocal in shader 0
mul r0.rg, t1, r0.a # div instruction in shader 0
mul r0.a, r0.a, c2.g # dual issue in shader 0
texld r1, r0, s3 # tex fetch
mad r1.rgb, r1, c4, -r2 # mad in shader 1
exp r0.a, r0.a # dual issue in shader 1
# clock 5
texld r0, r1.bar, s2 # texture coordinates swizzle
mad r0.rgb, r0, v0, r1 # color calculation in shader 1
mul r0.a, r1, v0 # dual issue in shader 1
# clock 6
mul r1.rgb, r0.a, c5.a # mul in shader 0
mad r0.rgb, r1, r0.a, r0 # mad in shader 1
mov r0.a, c3.a # move in shader 1
mov oC0, r0 # move in shader 1
label l0 // 0 SCT
texr h0, v4, #0, 2D // 0 TEX
nrmh h2.xyz, g8_n // 0 TEX
madr r0, h0, cConst01.x, cConst01.y // 0 SCB
dp3r_m2 r0.y, h2, r0 // 1 SCT
mulr r0.w, r0, r0 // 1 SCT
mulr r0.x, r0.w, cConst02.w // 1 SCB
divr r1.zw, g5.xxxy, g5.w // 2 SCT
madr r1.xy, r0.y, cConst01.xyxx, cConst01.w // 2 SCT
madr r0.x, r0, r0.y, cConst02.w // 2 SCB
divr r0.zw, r1.xxxy, r0.y // 3 SCT
texr h3, r1.zwzz, #3, 2D // 3 TEX
divr r2.w, r0.x, r0.y // 4 SCT
texr h0.xyz, r0.zwzz, #1, 2D // 4 TEX
madr r0.xyz, r2.w, h0, cConst05 // 4 SCB
madr r1.xyz, h3, cConst04, -r0 // 5 SCT
movr r0.xy, r1.xzzw // 5 SCB
movr r0.z, h3.w // 5 SCB
movh h4, v1 // 6 TEX
mulr r1.w, h3, h4 // 6 SCB
mulr r0.w, r1, cConst05 // 7 SCT
texr h0.xyz, r0.yzxw, #2, CUBE // 7 TEX
madr r0.xyz, h0, h4, r1 // 7 SCB
madr h0.xyz, r0.w, r1.w, r0 // 8 SCB
movh h0.w, cConst03 // 8 SCB
Pass SCT TEX SCB
1: 50% 100% 100%
2: 50% 0% 25%
3: 100% 0% 25%
4: 50% 100% 0%
5: 25% 100% 75%
6: 75% 0% 75%
7: 0% 0% 25%
8: 25% 100% 75%
9: 0% 0% 100%
MEAN: 41% 44% 55%
Effective passes: 9.00
No, the same instructions are executed for every pixel in the quad.Jaws said:Okay, I'm even more confused by this varying issueing rate!
So a single G70 fragment pipeline can vary between 3-9 instructions/cycle then???
I'm just guessing here but could this be local to a 'single' pipeline but overall for a 'quad' pipeline,it would be constant at 20 instructions/cycle?
i.e. a quad can be considered as an independent SIMD processor that can issue 20 inst./cycle but a single pipe within the quad can have a variance from 5 inst./cycle but the quad remains consistent at 20 inst./cycle?
Xmas said:No, the same instructions are executed for every pixel in the quad.Jaws said:Okay, I'm even more confused by this varying issueing rate!
So a single G70 fragment pipeline can vary between 3-9 instructions/cycle then???
I'm just guessing here but could this be local to a 'single' pipeline but overall for a 'quad' pipeline,it would be constant at 20 instructions/cycle?
i.e. a quad can be considered as an independent SIMD processor that can issue 20 inst./cycle but a single pipe within the quad can have a variance from 5 inst./cycle but the quad remains consistent at 20 inst./cycle?
Where did you get 9 instructions/clock from?
Who stated 5 instructions/cycle?Jaws said:From the code snippet from Jawed, it shows upto 7 instructions/cycle and upto 9 from the other snippet or am I missing something here? This is what's confusing because it's quoted at 5 inst./cycle?
Xmas said:Who stated 5 instructions/cycle?Jaws said:From the code snippet from Jawed, it shows upto 7 instructions/cycle and upto 9 from the other snippet or am I missing something here? This is what's confusing because it's quoted at 5 inst./cycle?
...
It seems to me they count 4 madd/dp (dual- and co-issue) plus FP16 normalize, and the special functions "replacing" one of the co-issued instructions each (which is not entirely true, as you can still perform a madd on the SF result).Jaws said:This diagram shows 5 inst./cycle? It sounds like they're including the 16bit normalise instruction with this number?
I'm trying to get to an idea of what should be counted, it's like the IBM G5's, IIRC, they say 8 or 5 instruction/cycle depending on whether you include branch/load/stores etc...so this seems to be similarly confusing too, depending on what you count?
little OT: so I assume they started to release some in depth info about RSX..ERP said:It's actually even more complex than that.
There are limits on input and output register counts, how the ops use the execution units and how they are paired.