A friend of mine wrote a fillrate test program, I did some primitive test with it.
program #1
program #2
I got 525 MP/s with program #1 and 2476 MP/S with program #2, which suggests that current Catalyst(3.9) can effectly remove those instructions which don't contribute to the final result.
program #1
ps_2_0
dcl t0
mov r0, t0
mov r1, t0
mov r2, t0
mov r3, t0
mov r5, t0
mul r0, r0, c0 // pass1
mul r1, r1, c1 // pass1
mul r2, r2, c2 // pass2
mul r3, r3, c3 // pass2
add r0, r0, r1 // pass3
add r2, r2, r3 // pass3
add r0, r2, r0 // pass4
mov oC0, r0
program #2
ps_2_0
dcl t0
mov r0, t0
mov r1, t0
mov r2, t0
mov r3, t0
mov r5, t0
mul r0, r0, c0 // pass1
mul r1, r1, c1 // pass1
mul r2, r2, c2 // pass2
mul r3, r3, c3 // pass2
add r0, r0, r1 // pass3
add r2, r2, r3 // pass3
add r0, r2, r0 // pass4
mov oC0, r5
I got 525 MP/s with program #1 and 2476 MP/S with program #2, which suggests that current Catalyst(3.9) can effectly remove those instructions which don't contribute to the final result.