Unroll failed in PS 4.0 shader code

Hey guys!

I am currently trying to get the code from MJP (aka Matt) on PCF shadow maps working. When trying to compile the adopted code, the compilation fails with

- forced to unroll loop, but unrolling failed

- unable to unroll loop, loop does not appear to terminate in a timely manner (36 iterations), use the [unroll(n)] attribute to force an exact higher number

Since my shader abilities are slim to none I am stuck here , the code did compile well on XNA, though.

Has any shader guru here some suggestions on how and why to fix this?

Full shader code:

int param_filterSize;
float4x4 param_worldMatrix;
float4x4 param_viewProjectionMatrix;
float4x4 param_inverseViewMatrix;
float param_farClip;
float2 param_shadowMapSize;
float param_BIAS;

float2 param_occlusionTextureSize;
float3 param_frustumCornersVS[4];
float4x4 param_lightViewProjectionMatrix;
texture param_depthTexture;
texture param_shadowMap;

sampler2D DepthTextureSampler = sampler_state
{
Texture = <param_depthTexture>;
MinFilter = point;
MagFilter = point;
MipFilter = none;
};

sampler2D ShadowMapSampler = sampler_state
{
Texture = <param_shadowMap>;
MinFilter = point;
MagFilter = point;
MipFilter = none;
};

// Define VS input
struct VSIn
{
float3 Position : POSITION0;
float3 TextureCoordsAndCornerIndex : TEXCOORD0;
};

// Define VS output and therefor PS input
struct VSOut
{
float4 PositionCS : POSITION0;
float2 TextureCoords : TEXCOORD0;
float3 FrustumCornerVS : TEXCOORD1;
};

// Define PS output
struct PSOut
{
float4 Color : COLOR0;
};

/**************************************************
Calculates the shadow term using PCF soft-shadowing
**************************************************/
float CalcShadowTermSoftPCF(float fLightDepth, float2 vShadowTexCoord, int iSqrtSamples)
{
float fShadowTerm = 0.0f;

float fRadius = (iSqrtSamples - 1.0f) / 2;
float fWeightAccum = 0.0f;

for (float y = -fRadius; y <= fRadius; y++)
{
for (float x = -fRadius; x <= fRadius; x++)
{
float2 vOffset = 0;
vOffset = float2(x, y);
vOffset /= param_shadowMapSize;
float2 vSamplePoint = vShadowTexCoord + vOffset;
float fDepth = tex2D(ShadowMapSampler, vSamplePoint).x;
float fSample = (fLightDepth <= fDepth + param_BIAS);

  	// Edge tap smoothing
  	float xWeight = 1;
  	float yWeight = 1;
  	if (x == -fRadius)
  		xWeight = 1 - frac(vShadowTexCoord.x * param_shadowMapSize.x);
  	else if (x == fRadius)
  		xWeight = frac(vShadowTexCoord.x * param_shadowMapSize.x);
  	if (y == -fRadius)
  		yWeight = 1 - frac(vShadowTexCoord.y * param_shadowMapSize.y);
  	else if (y == fRadius)
  		yWeight = frac(vShadowTexCoord.y * param_shadowMapSize.y);
  	fShadowTerm += fSample * xWeight * yWeight;
  	fWeightAccum = xWeight * yWeight;
  }

}

fShadowTerm /= (iSqrtSamples * iSqrtSamples);
fShadowTerm *= 1.55f;

return fShadowTerm;
}

/**************************************************
Vertex shader.
**************************************************/
VSOut MainVS(VSIn input)
{
VSOut output;

// Offset the position by half a pixel to correctly align texels to pixels
output.PositionCS.x = input.Position.x - (1.0f / param_occlusionTextureSize.x);
output.PositionCS.y = input.Position.y + (1.0f / param_occlusionTextureSize.y);
output.PositionCS.z = input.Position.z;
output.PositionCS.w = 1.0f;

// Pass along the texture coordiante and the position of the frustum corner
output.TextureCoords = input.TextureCoordsAndCornerIndex.xy;
output.FrustumCornerVS = param_frustumCornersVS[input.TextureCoordsAndCornerIndex.z];

return output;
}

/**************************************************
Pixel shader.
**************************************************/
PSOut MainPS(VSOut input)
{
PSOut output;

// Reconstruct view-space position from the depth buffer
float fPixelDepth = tex2D(DepthTextureSampler, input.TextureCoords).r;
float4 vPositionVS = float4(fPixelDepth * input.FrustumCornerVS, 1.0f);

// Determine the depth of the pixel with respect to the light
float4x4 matViewToLightViewProj = mul(param_inverseViewMatrix, param_lightViewProjectionMatrix);
float4 vPositionLightCS = mul(vPositionVS, matViewToLightViewProj);

float fLightDepth = vPositionLightCS.z / vPositionLightCS.w;

// Transform from light space to shadow map texture space.
float2 vShadowTexCoord = 0.5 * vPositionLightCS.xy / vPositionLightCS.w + float2(0.5f, 0.5f);
vShadowTexCoord.y = 1.0f - vShadowTexCoord.y;

// Offset the coordinate by half a texel so we sample it correctly
vShadowTexCoord += (0.5f / param_shadowMapSize);

// Get the shadow occlusion factor and output it
float fShadowTerm = 0;
if (param_filterSize == 2)
// fShadowTerm = CalcShadowTermPCF(fLightDepth, vShadowTexCoord);
fShadowTerm = CalcShadowTermSoftPCF(fLightDepth, vShadowTexCoord, param_filterSize);
else
fShadowTerm = CalcShadowTermSoftPCF(fLightDepth, vShadowTexCoord, param_filterSize);

output.Color = float4(fShadowTerm, 1, 1, 1);
return output;
}

/**************************************************
Techniques

  • CreateShadowTerm
    *************************************************/
    technique CreateShadowTerm
    {
    pass p0
    {
    /

    ZWriteEnable = false;
    ZEnable = false;
    AlphaBlendEnable = false;
    CullMode = NONE;
    */
  VertexShader = compile vs_4_0_level_9_1 MainVS();
  PixelShader = compile ps_4_0_level_9_1 MainPS();

}
}

When I do enforce the unrolling using [unroll(7)] on the loops I get

  • Shader uses texture addressing operations in a dependency chain that is too complex for the target shader model (ps_2_0) to handle

Why is it shader model 2 when I compile the code using ps_4_0_level_9_1?

Okay , even stranger …
If I use ps_4_0 to compile the whole thing all is fine…

What is the major difference between ps_4_0 and ps_4_0_level_9_1 here? And why is the code compiling on ps_3_0 in the first place?

4_0 is much, much newer than level9_1 (which is around 2001 level hardware afaik).

Im on mobile, so I can’t easily link the reference, but I suggest sticking to 4_0 or 5_0 for your shaders

So … vs_4_0_level_9_1 is some kind of shader level 4 compat to DX 9.1 thing?

Got it!!

Specifying Compiler Targets - Win32 apps | Microsoft Learn
Pixel shader for 9.1 and 9.2 (similar limits to ps_2_0)

I always thought ps_4_0_level_9_1 is some kind of fallback for XNA shader level 3 … falling back to level 2 and introducing the error makes it even stranger to me.

Anyway. ps4 works and I am out of here :wink: