Hi All,
I've found a weird problem here.
It seems that setting a local parameter to zero with glProgramLocalParameter4fARB with any shader causes a slight blip in performance (1-2 ms). Is the shader re-optimizing its self or re-downloading for some bizzare reason.
Note the following test code that runs a simple shader and prints the amount of time to render the quads for 10 frames of execution.
On the first pass it sends 1.0 to the shade via glProgramLocalParameter4fARB, on the second and all other frames it sends 0.0.
On my GF6800 with the latest windows drivers I get the following render times (in milliseconds):
frame 1: 68ms
frame 2: 3.5ms
frame 3: 1.1ms
frame 4: 1.1ms
frame 5: 1.1ms
frame 6: 1.1ms
I can understand the first frame being 68ms as the shader is caching. However, if i set the local parameter to say 0.5 (or any other value besides 0.0) for the second and all subsequent frame I get the following render times:
frame 1: 68ms
frame 2: 1.1ms
frame 3: 1.1ms
frame 4: 1.1ms
frame 5: 1.1ms
frame 6: 1.1ms
So why the blip of 3.5ms on the second frame when the local parameter is set to 0.0?
Has anyone else noticed this problem?
Here is the shader code in CG:
And here it is compiled with fp40 profile:
I've found a weird problem here.
It seems that setting a local parameter to zero with glProgramLocalParameter4fARB with any shader causes a slight blip in performance (1-2 ms). Is the shader re-optimizing its self or re-downloading for some bizzare reason.
Note the following test code that runs a simple shader and prints the amount of time to render the quads for 10 frames of execution.
On the first pass it sends 1.0 to the shade via glProgramLocalParameter4fARB, on the second and all other frames it sends 0.0.
Code:
static int framecount=0;
static shaderASM *shader;
static int first=1;
if (first)
{
shader = new shaderASM(NULL,"asm.fp",mappingList);
first=0;
}
glEnable(GL_FRAGMENT_PROGRAM_NV);
glColor3f(1.0,1.0,1.0);
// Get start time (call glfinish to finish all rendering up to this point
glFinish();
double t1 = getTime();
float tx=0.0,ty=0.0,h=-10.0,quadwidth=20.0;
// Begin shader
shader->begin();
glPushMatrix();
// Init uniform to 1.0 on first frame and 0 on all frames after
if (framecount == 0)
shader->sendUniformfp1f(0, 1.0);
else
shader->sendUniformfp1f(0, 0.0);
glActiveTextureARB(GL_TEXTURE0_ARB);
glBindTexture ( GL_TEXTURE_2D, basetex );
// Draw a load of quads with shader enabled
glBegin(GL_QUADS);
for (float x = -50.0 ; x < 50.0 ; x+= quadwidth)
{
tx=0.0;
for (float y = -50.0 ; y < 50.0 ; y+= quadwidth)
{
glTexCoord2f(tx,ty);
glVertex3f(x,h,y);
glTexCoord2f(tx,ty+0.2);
glVertex3f(x,h,y+quadwidth);
glTexCoord2f(tx+0.2,ty+0.2);
glVertex3f(x+quadwidth,h,y+quadwidth);
glTexCoord2f(tx+0.2,ty);
glVertex3f(x+quadwidth,h,y);
tx+=0.2;
}
ty+=0.2;
}
glEnd();
glPopMatrix();
shader->end();
glFinish();
//Print time taken for first 10 frames
if (framecount < 10)
fprintf(stderr,"%0.2f\n",(getTime() - t1) * 1000.0);
glDisable(GL_FRAGMENT_PROGRAM_NV);
framecount++;
On my GF6800 with the latest windows drivers I get the following render times (in milliseconds):
frame 1: 68ms
frame 2: 3.5ms
frame 3: 1.1ms
frame 4: 1.1ms
frame 5: 1.1ms
frame 6: 1.1ms
I can understand the first frame being 68ms as the shader is caching. However, if i set the local parameter to say 0.5 (or any other value besides 0.0) for the second and all subsequent frame I get the following render times:
frame 1: 68ms
frame 2: 1.1ms
frame 3: 1.1ms
frame 4: 1.1ms
frame 5: 1.1ms
frame 6: 1.1ms
So why the blip of 3.5ms on the second frame when the local parameter is set to 0.0?
Has anyone else noticed this problem?
Here is the shader code in CG:
Code:
uniform sampler2D base;
uniform float testfloat;
float4 main(float2 texCoord: TEXCOORD0,float fogCoord: FOG) : COLOR {
float4 colout;
float4 basecolor = tex2D( base, texCoord );
basecolor.x=pow(basecolor.y,testfloat);
basecolor.y=pow(basecolor.x,testfloat);
basecolor.z=pow(basecolor.z,testfloat);
return basecolor ;
}
And here it is compiled with fp40 profile:
Code:
!!ARBfp1.0
OPTION NV_fragment_program2;
# cgc version 1.4.0001, build date Mar 9 2006 20:52:26
# command line args: -profile fp40
# source file: terrain.fp
#vendor NVIDIA Corporation
#version 1.0.02
#profile fp40
#program main
#semantic base
#semantic testfloat
#var float2 texCoord : $vin.TEXCOORD0 : TEX0 : 0 : 1
#var float fogCoord : $vin.FOG : FOGC : 1 : 0
#var sampler2D base : : texunit 0 : -1 : 1
#var float testfloat : : c[0] : -1 : 1
#var float4 main : $vout.COLOR : COL : -1 : 1
PARAM c[1] = { program.local[0] };
TEMP R0;
TEMP RC;
TEMP HC;
OUTPUT oCol = result.color;
TEX R0.yzw, fragment.texcoord[0], texture[0], 2D;
POWR R0.x, R0.y, c[0].x;
POWR R0.z, R0.z, c[0].x;
POWR R0.y, R0.x, c[0].x;
MOVR oCol, R0;
END
# 5 instructions, 1 R-regs, 0 H-r