ok if fully pipelined and work on 6 vertex in same time but code is big
vload r1,r,r
vload r2,r,r
vload r3,r,r
vload r4,r,r
vload r5,r,r
vload r6,r,r
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
this is matrix-vertex mulytiply on 6 vertexs, it is a HUMOR , but no stall
vload r1,r,r
vload r2,r,r
vload r3,r,r
vload r4,r,r
vload r5,r,r
vload r6,r,r
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
vmul r,r,r1
vmadd r,r,r2
vmadd r,r,r3
vmadd r,r,r4
vmadd r,r,r5
vmadd r,r,r6
this is matrix-vertex mulytiply on 6 vertexs, it is a HUMOR , but no stall