Ideas to optimize this effect?

GeonBit · August 24, 2017, 1:29am

Hi all,

Just wondering if any of you got ideas to optimize the following effect:

#if OPENGL
#define SV_POSITION POSITION
#define VS_SHADERMODEL vs_3_0
#define PS_SHADERMODEL ps_3_0
#else
#define VS_SHADERMODEL vs_4_0_level_9_1
#define PS_SHADERMODEL ps_4_0_level_9_1
#endif

// world / view / projection matrix
matrix WorldViewProjection;

// world matrix
matrix World;

// define white color value
float3 WhiteColor = float3(1, 1, 1);

// ambient light value
float3 AmbientColor = float3(1, 1, 1);

// diffuse color
float3 DiffuseColor = float3(1, 1, 1);

// emissive
float3 EmissiveColor = float3(0, 0, 0);

// max intensity, eg allowing lights to overflow original color
float MaxLightIntensity = 1.0f;

// rendering alpha
float Alpha = 1.0f;

// main texture
texture MainTexture;

// are we using texture?
bool TextureEnabled = false;

// max lights count
#define MAX_LIGHTS_COUNT 7

// light sources.
// note: 
//	- lights with range 0 = directional lights (in which case light pos is direction).
//	- lights with intensity 0 = disabled lights.
float3 LightColor[MAX_LIGHTS_COUNT];
float3 LightPosition[MAX_LIGHTS_COUNT];
float LightIntensity[MAX_LIGHTS_COUNT];
float LightRange[MAX_LIGHTS_COUNT];
float LightSpecular[MAX_LIGHTS_COUNT];

// how many active lights we have
int ActiveLightsCount = 0;

// main texture sampler
sampler2D MainTextureSampler = sampler_state {
	Texture = (MainTexture);
};

// vertex shader input
struct VertexShaderInput
{
	float4 Position : POSITION0;
	float4 Normal : NORMAL0;
	float2 TextureCoordinate : TEXCOORD0;
};

// vertex shader output
struct VertexShaderOutput
{
	float4 Position : SV_POSITION;
	float3 Normal : TEXCOORD0;
	float2 TextureCoordinate : TEXCOORD1;
	float4 WorldPos : TEXCOORD2;
};

// main vertex shader for flat lighting
VertexShaderOutput FlatLightingMainVS(in VertexShaderInput input)
{
	VertexShaderOutput output;
	output.Position = mul(input.Position, WorldViewProjection);
	output.WorldPos = mul(input.Position, World);
	output.Normal = normalize(mul(input.Normal, (float3x3)World));
	output.TextureCoordinate = input.TextureCoordinate;
	return output;
}

// calculate dot product for given light position, point, and normal
float DotProduct(float3 lightPos, float3 pos3D, float3 normal)
{
	float3 lightDir = normalize(pos3D - lightPos);
	return dot(-lightDir, normal);
}

// main pixel shader for flat lighting
float4 FlatLightingMainPS(VertexShaderOutput input) : COLOR
{
	// pixel color to return
	float4 retColor;

// set color either from texture if enabled or white
if (TextureEnabled == true)
{
	retColor = tex2D(MainTextureSampler, input.TextureCoordinate);
}
else
{
	retColor = 1.0f;
}

// start calcing lights strength
float3 LightsColor = AmbientColor + EmissiveColor;

// add all point / directional lights
for (int i = 0; i < ActiveLightsCount; ++i)
{
	// if fully lit stop here
	if (LightsColor.r > 1 && LightsColor.g > 1 && LightsColor.b > 1) { break; }

	// angle factor
	float cosTheta;

	// distance factor
	float disFactor = 1;

	// calculate distance and angle factors for point light
	if (LightRange[i] > 0)
	{
		disFactor = 1.0f - (distance(input.WorldPos, LightPosition[i]) / LightRange[i]);

		// out of range? skip this light.
		if (disFactor > 0)
		{
			// power distance factor
			disFactor = pow(disFactor, 2);

			// calc with normal factor
			cosTheta = clamp(DotProduct(LightPosition[i], input.WorldPos, input.Normal), 0, 1);

			// add light to pixel
			LightsColor.rgb += (LightColor[i]) * (cosTheta * LightIntensity[i] * (disFactor));
		}
	}
	// calculate angle factor for directional light
	else
	{
		// calculate angle factor
		cosTheta = dot(LightPosition[i], input.Normal);

		// add light to pixel
		LightsColor.rgb += (LightColor[i]) * (cosTheta * LightIntensity[i]);
	}
}

// make sure lights doesn't overflow
LightsColor.rgb = min(LightsColor.rgb, MaxLightIntensity);

// apply lighting and diffuse on return color
retColor.rgb = saturate(retColor.rgb * LightsColor * DiffuseColor);

// apply alpha
retColor.a *= Alpha;

// return final
return retColor;
}

// default technique with flat lighting 
technique FlatLighting
{
	pass P0
	{
		VertexShader = compile VS_SHADERMODEL FlatLightingMainVS();
		PixelShader = compile PS_SHADERMODEL FlatLightingMainPS();
	}
};

Its not too slow, but not fast enough either. And maybe there are some silly mistakes there that burn FPS for nothing…

Any ideas to optimize?
Thanks

PS. its a basic phong light effect, with point / directional lights (but without specular). Feel free to use it if it helps.

Ravendarke · August 24, 2017, 2:41am

GPUs generally don’t like branching because of parallelization. Then again it is not as bad on modern GPUs as it used to be… on the other hand full branching inside loop isn’t something I would recommend. You can use saturate(x) instead clamp(x,0,1) some compilers might have extremely mirror performance difference between these two, tho I wouldnt expect anything measurable in your case.

From code I expect this is part of forward rendering pipeline, if you are dealing with large amount of lights and large amount of object this is obviously going to cost. In which case I suggest deferred approach, only thing is it seems like you are using alpha for transparency, that is going to be an issue in deferred but performance will be on different level.

GeonBit · August 24, 2017, 11:33am

Thanks, I took your advice and changed to this:

#if OPENGL
#define SV_POSITION POSITION
#define VS_SHADERMODEL vs_3_0
#define PS_SHADERMODEL ps_3_0
#else
#define VS_SHADERMODEL vs_4_0_level_9_1
#define PS_SHADERMODEL ps_4_0_level_9_1
#endif

// world / view / projection matrix
matrix WorldViewProjection;

// world matrix
matrix World;

// define white color value
float3 WhiteColor = float3(1, 1, 1);

// ambient light value
float3 AmbientColor = float3(1, 1, 1);

// diffuse color
float3 DiffuseColor = float3(1, 1, 1);

// emissive
float3 EmissiveColor = float3(0, 0, 0);

// max intensity, eg allowing lights to overflow original color
float MaxLightIntensity = 1.0f;

// rendering alpha
float Alpha = 1.0f;

// main texture
texture MainTexture;

// are we using texture?
bool TextureEnabled = false;

// max lights count
#define MAX_LIGHTS_COUNT 7

// light sources.
// note: 
//	- lights with range 0 = directional lights (in which case light pos is direction).
//	- lights with intensity 0 = disabled lights.
float3 LightColor[MAX_LIGHTS_COUNT];
float3 LightPosition[MAX_LIGHTS_COUNT];
float LightIntensity[MAX_LIGHTS_COUNT];
float LightRange[MAX_LIGHTS_COUNT];
float LightSpecular[MAX_LIGHTS_COUNT];

// how many active lights we have
int ActiveLightsCount = 0;

// how many of the active lights are directional (direction lights come first)
int DirectionalLightsCount = 0;

// main texture sampler
sampler2D MainTextureSampler = sampler_state {
	Texture = (MainTexture);
};

// vertex shader input
struct VertexShaderInput
{
	float4 Position : POSITION0;
	float4 Normal : NORMAL0;
	float2 TextureCoordinate : TEXCOORD0;
};

// vertex shader output
struct VertexShaderOutput
{
	float4 Position : SV_POSITION;
	float3 Normal : TEXCOORD0;
	float2 TextureCoordinate : TEXCOORD1;
	float4 WorldPos : TEXCOORD2;
};

// main vertex shader for flat lighting
VertexShaderOutput FlatLightingMainVS(in VertexShaderInput input)
{
	VertexShaderOutput output;
	output.Position = mul(input.Position, WorldViewProjection);
	output.WorldPos = mul(input.Position, World);
	output.Normal = normalize(mul(input.Normal, (float3x3)World));
	output.TextureCoordinate = input.TextureCoordinate;
	return output;
}

// main pixel shader for flat lighting
float4 FlatLightingMainPS(VertexShaderOutput input) : COLOR
{
	// pixel color to return
	float4 retColor;

	// set color either from texture if enabled or white
	retColor = TextureEnabled ? tex2D(MainTextureSampler, input.TextureCoordinate) : 1.0f;

	// start calcing lights strength
	float3 LightsColor = AmbientColor + EmissiveColor;

	// process directional lights
	int i = 0;
	for (i = 0; i < DirectionalLightsCount; ++i)
	{
		// calculate angle factor
		float cosTheta = dot(LightPosition[i], input.Normal);

		// add light to pixel
		LightsColor.rgb += (LightColor[i]) * (cosTheta * LightIntensity[i]);
	}

	// now process all point lights
	for (i = DirectionalLightsCount; i < ActiveLightsCount; ++i)
	{
		// if fully lit stop here
		if (LightsColor.r > 1 && LightsColor.g > 1 && LightsColor.b > 1) { break; }

		// calculate distance and angle factors for point light
		float disFactor = 1.0f - (distance(input.WorldPos, LightPosition[i]) / LightRange[i]);

		// out of range? skip this light.
		if (disFactor > 0)
		{
			// power distance factor
			disFactor = pow(disFactor, 2);

			// calc with normal factor
			float3 lightDir = normalize(input.WorldPos - LightPosition[i]);
			float cosTheta = saturate(dot(-lightDir, input.Normal));

			// add light to pixel
			LightsColor.rgb += (LightColor[i]) * (cosTheta * LightIntensity[i] * disFactor);
		}
	}

	// make sure lights doesn't overflow
	LightsColor.rgb = min(LightsColor.rgb, MaxLightIntensity);

	// apply lighting and diffuse on return color
	retColor.rgb = saturate(retColor.rgb * LightsColor * DiffuseColor);

	// apply alpha
	retColor.a *= Alpha;

	// return final
	return retColor;
}

// default technique with flat lighting 
technique FlatLighting
{
	pass P0
	{
		VertexShader = compile VS_SHADERMODEL FlatLightingMainVS();
		PixelShader = compile PS_SHADERMODEL FlatLightingMainPS();
	}
};

eg using saturate and two loops instead of condition (where directional lights always come first). I think there’s a slight improvement.

But I also think its time to make some differed lighting.

PumpkinPudding · August 24, 2017, 2:10pm

consider to use foward+ or deferred as Ravendarke mentioned above

Ravendarke · August 25, 2017, 1:18pm

I hope this wont be considered hijacking thread… but link you posted, page 12 - Optimization #3 (HLSL)Reduce Typecasting.

Everyone is 100% sure that optimized solution is really optimized? Because I think he might be forgetting one thing. Will sampler in both cases sample all 4 channels of texture? Wont “unoptimized” variant sample RGB and overall leave out unused alpha? Not to mention those two variants can and mostly will return two different results if image has any alpha as in first case alpha will be always zero, in second… whatever was in texture… obviously this can be suppressed by using for instance opaque blending mode, or by zero in diffuse 0 which is dynamic parameter but then I am pretty sure sending unnecessary large parameter every frame will be worse than one float retype inside shader.

Edit: Oh wait, it´s from ATI, I know this article made me mad for several reasons in past.