Nalu is an NV3x level demo?

update, I found this fragment shader using some sort of dynamic flow control, could be the reason to make the demo only work on NV40.
Code:
// This shader was generated procedurally using the
// NVidia fragment shader generator.  While it may seem
// inefficient to make so many function calls, they will
// become inlined in the compiled CG shader.

#include "nvdemo_fs_lib.fp30"
#include "Mermaid_include.fp30"

struct v2fConnector {
  float4 projCoord		: POSITION;
  float3 tanSpotLight0Dir	: TEX0;
  float3 tanSpotLight0HalfAngle : TEX1; 
  float2 c_texCoord		: TEX2;
  float2 c_texCoord1		: TEX3;    
  float3 eyeNormal              : TEX4;         // for scales cubemap lookup
  float3 tanPointLight0Dir	: TEX5;
  float3 eyeCoord		: TEX6;		// to compute attenuation of Sea creature light
  float4 SPOTLIGHTPROJCOORD     : TEX7;		// projection for caustics / depth shadows  
  
};






//*****************************************************************
//
//*****************************************************************
half3 SkinShader(v2fConnector v2f,
                  float softshadows,
                  half3    diffmap, 
                  texobj2D c_specular,
                  texobj2D c_bumpCol,
                  texobj2D subsurface_blurred,
                  texobj2D shadowDepthMap,                  
                  uniform sampler3D g_caustics,
                  float g_caustics_z,
                  float3 fillcolor,
                  float3 g_eyePointLight0Pos,
                  float3 g_LColor)
{
  // UVs should be computed in VS
  //-----------------------------
    float3 uvs;
    uvs.xy = (v2f.SPOTLIGHTPROJCOORD.xy / v2f.SPOTLIGHTPROJCOORD.w) * float2(3, 3);
    uvs.z  = g_caustics_z;
    half3 causticsAnim    = h3tex3D(g_caustics, uvs);   

  half3 specatt           = h3tex2D(c_specular, v2f.c_texCoord);   
  half3 normalmap         = h3tex2D(c_bumpCol, v2f.c_texCoord);   
  half3 shadowtex         = h3tex2D(subsurface_blurred, v2f.c_texCoord1); 
  half shadowCompare      = x1texcompare2D(shadowDepthMap, v2f.SPOTLIGHTPROJCOORD);
   
  half shadow;
  if (softshadows)
    shadow    = max(0.5, shadowtex.x);
  else
    shadow    = max(0.5, shadowCompare);

  half caustics  = pow( (causticsAnim*shadowtex.x), 2) * 0.3;


  half4 ComputedLighting  = MermaidBlinn(normalmap, -v2f.tanSpotLight0Dir, v2f.tanSpotLight0HalfAngle, 15);  
  
  half PointLighting         = max(0, dot(normalmap, -v2f.tanPointLight0Dir));
  half3 PointLightingcontrib = g_LColor*PointLighting;
  
  half  difflighting      = ComputedLighting.y*shadow; // + fresnel*EdotL;  
  half3 filllighting      = (1-ComputedLighting.y)*fillcolor;
  half  spec              = ComputedLighting.z*shadow;

 
  float3 delta = (g_eyePointLight0Pos-v2f.eyeCoord);
  half dist = sqrt(dot(delta,delta)) * (1.0/400.0);
    half att      = saturate(dist);
  
  half3 FinalSkinfragment;
  FinalSkinfragment.xyz   = (1.0-att)*PointLightingcontrib + (difflighting+filllighting)*diffmap + spec*specatt*0.5 + caustics;
  
  return FinalSkinfragment;
}





//*****************************************************************
//
//*****************************************************************
half3 ScalesShader(v2fConnector v2f,
                  uniform texobj2D c_scaleCol,
                  uniform texobj2D c_scaleBump,
                  uniform texobj2D c_scaleMasks,                  
		  uniform texobj2D c_irid2DCol,	 // common to all scales
                  uniform texobjCUBE c_iridCube, // common to all scales
                  
                  uniform texobj2D subsurface,
                  float skintoneblend,
                  
                  uniform sampler3D g_caustics,                  
                  float g_caustics_z,
                  float3 fillcolor                                  
                  )
{
 
// UVs should be computed in VS
  //-----------------------------
    float3 uvs;
    uvs.xy = (v2f.SPOTLIGHTPROJCOORD.xy / v2f.SPOTLIGHTPROJCOORD.w) * float2(3, 3);
    uvs.z  = g_caustics_z;
    half3 causticsAnim    = h3tex3D(g_caustics, uvs);   
     
  half3 cubemapUVs	= reflect(v2f.eyeNormal, half3(0,0,1)) * (-1,1,-1);   
  half4 scaleColor	= f4tex2D(c_scaleCol, v2f.c_texCoord);  
  half4 scaleNormal	= f4tex2D(c_scaleBump, v2f.c_texCoord);    
  half4 scaleMasks	= f4tex2D(c_scaleMasks, v2f.c_texCoord ); // w = alpha mask
  half4 iridColorCube	= f4texCUBE(c_iridCube, cubemapUVs);  

  //half3 skinColor       = half3(177.0/255.0,  117.0/255.0,  61.0/255.0);
  
  half3 shadow          = f4tex2D(subsurface, v2f.c_texCoord1);;
  shadow		= max(0.4, shadow.x);	// ambient should be computed in shadow pass. (constant # of pixels)
  
  half caustics  = pow( (causticsAnim * shadow.x), 4 ) * 0.2;

  // renaming  
  //----------
  half specatt		= scaleMasks.r;     // 0=no spec, 1=full spec
  half underneathSkin   = scaleMasks.g;     // blend w/ uniform skin tone
  half iridatt 		= scaleMasks.b;     // iridescence amount (1=full irid, 0=no irid)
  half scalesON		= scaleMasks.w;     // 1=scales on
  half self_illum_amount = scaleColor.w;    // 0=normal lighting   1=glow in the dark
  

  // lighting
  //---------
  half specexp		    = 10; 
  half3 ComputedLighting    = MermaidBlinn(scaleNormal.xyz, -v2f.tanSpotLight0Dir, v2f.tanSpotLight0HalfAngle, specexp);
  
  half  diff                = ComputedLighting.y*shadow.x + self_illum_amount;  // renaming  
  half  spec                = ComputedLighting.z;
  half3 diffFill            = (1-diff)*FILL_LIGHT_COLOR;
  half3 FINAL_diffuse       = diff + diffFill;  
  
  half4 FinalIridColor      = saturate(f4tex2D(c_irid2DCol, half2((v2f.eyeNormal.x+scalesON.x)*4, 0) ) + iridColorCube);
  half3 colorNoIrid         = scaleColor.xyz * FINAL_diffuse;
  
  half3 final_scale_difuse  = (scaleColor*scalesON) * FINAL_diffuse * FinalIridColor;
  half3 final_spec          = spec * (FinalIridColor*scalesON + scaleColor*specatt);
  half3 finalScalesColor    = final_scale_difuse + final_spec;
  //finalScalesColor	    = lerp(skinColor, finalScalesColor.xyz, underneathSkin);

  half3 ScaleColor          = lerp(colorNoIrid, finalScalesColor, iridatt);// + caustics;


  return ScaleColor;
}




float4 main(v2fConnector v2f,

				// Skin & common
				//--------------
                  uniform texobj2D c_diffuseCol,
                  uniform texobj2D c_specular,
                  uniform texobj2D c_bumpCol,
                  uniform texobj2D subsurface_blurred,
                  
		    // NEEDED FOR EXPORT - do not remove
		    uniform texobj2D subsurface,
		    uniform texobj2D shadowDepthMap,
		    uniform texobj2D shadowCMap,
		    // NEEDED FOR EXPORT - do not remove
                  
                  uniform sampler3D g_caustics,                  
                  uniform float     g_softshadows,                                                    
                  // Skin & common - END
                  //---------------------                  
                  
                  uniform float     g_skintoneblend,
                  uniform float3    g_fillcolor,
                  uniform float     g_caustics_z,
                  uniform float3    g_eyePointLight0Pos,
                  uniform float3    g_LColor,
                  
                  uniform texobj2D c_scaleCol,
                  uniform texobj2D c_scaleBump,
                  uniform texobj2D c_scaleMasks,                  
                  uniform texobj2D c_irid2DCol,   // common to all scales
                  uniform texobjCUBE c_iridCube   // common to all scales                  
                  ) : COLOR
{
//  f2fConnector f2f;
  half3 Skin, Scales;
  Skin, Scales = 0;

  half4 diffmap = h4tex2D(c_diffuseCol, v2f.c_texCoord);	

  if (diffmap.w < 1.)
    Skin =  SkinShader(v2f,
 	          g_softshadows,
                  diffmap.rgb,
                  c_specular,
                  c_bumpCol,
                  subsurface_blurred,
                  shadowDepthMap,
                  g_caustics,
                  g_caustics_z,
                  g_fillcolor,
                  g_eyePointLight0Pos,
                  g_LColor);

  if (diffmap.w > 0.)
	Scales = ScalesShader(v2f,
                  c_scaleCol,
                  c_scaleBump,
                  c_scaleMasks,                  
		  c_irid2DCol,	 // common to all scales
                  c_iridCube,    // common to all scales                  
                  subsurface_blurred,
                  g_skintoneblend,
                  g_caustics,
                  g_caustics_z,
                  g_fillcolor
                  );
 
  float4 COL;
  COL.rgb  = lerp(Skin.xyz, Scales.xyz, diffmap.w);
  COL.w    = 0;
    
  return COL;
}
 
991060 said:
update, I found this fragment shader using some sort of dynamic flow control, could be the reason to make the demo only work on NV40.

You're right. The documentation lists the new features of the NV_fragment_program2 as:

*structured branching support, including data-dependent IF tests, loops supporting a fixed number of iterations, and a data-dependent loop exit instruction (BRK),

* subroutine calls,

* instructions to perform vector normalization, divide vector components by a scalar, and perform two-component dot products (with or without a scalar add),

* an instruction to perform a texture lookup with an explicit LOD,

* a loop index register for indirect access into the texture coordinate attribute array, and

* a facing attribute that indicates whether the fragment is generated from a front- or back-facing primitive.
 
MikeC said:
991060 said:
update, I found this fragment shader using some sort of dynamic flow control, could be the reason to make the demo only work on NV40.

You're right. The documentation lists the new features of the NV_fragment_program2 as:
Yes, I was hoping Nalu to use NV_fragment_program2 extension coz it's a NV40 exclusive feature, but the shot I made didn't suggest that extension to be a necessity. Well, it could a bug in the demo to check the hardware support, who knows.
 
I was think about how to emulate dynamic branches, and saw in the OpenGL documentation that the stencil test is performed after the alpha test.

Would it be possible to write values to the stencil buffer while rejecting pixels based on the branching condition, and then use the stencil buffer to isolate pixels in subsequent passes? Who knows, maybe it'll even turn out faster if geometry is not a bottleneck!

It seems possible with this Nalu shader, anyway.

Maybe I should post this in the coding forum, and ask Colourless very nicely for his wrapper framework :)
 
dan2097 said:
I was actually under the impression that nalu did run on the geforce fx, just very slowly.

I know there's a hidden option in the drivers to emulate NV30 for cards such as GF4, and even though I haven't personally tried it, I believe there's a similar option for NV40.
 
Lezmaka said:
dan2097 said:
I was actually under the impression that nalu did run on the geforce fx, just very slowly.

I know there's a hidden option in the drivers to emulate NV30 for cards such as GF4, and even though I haven't personally tried it, I believe there's a similar option for NV40.


Ya there is, I believe even a geforce 3 can emulate the NV40 though.
 
oddfellow said:
I'd love to try it on my 5900, but I can't find the demo download anywhere on nVidia's site! :?
It's not released to the public yet, someone on the BBS reported that the pirates demo works on NV3x, but I think the pirates demo uses vertex textureing on the ocean which is obviously not supported by NV3x, kinda weird. :?
 
ChrisRay said:
Ya there is, I believe even a geforce 3 can emulate the NV40 though.
Well, yeah, via refrast on CPU. But then, why a GF4? Let's use good old S3 Trio64!

Welcome to the world of the offline rendering! ;)
 
DegustatoR said:
ChrisRay said:
Ya there is, I believe even a geforce 3 can emulate the NV40 though.
Well, yeah, via refrast on CPU. But then, why a GF4? Let's use good old S3 Trio64!
Welcome to the world of the offline rendering! ;)

I had one of those!! Awww the memories...
 
Back
Top