i went ahead and fixed it, but I was too late it seems. Unlike Jjagg i did the texture size in the vertex shader though.
Here is the replacement part in my shader. Add the struct at the beginning and change the replace the vertex shader
…
struct vsOut
{
float4 pos0 : SV_POSITION;
float2 tex0 : TEXCOORD0;
float4 tex1 : TEXCOORD1;
};
float4 df(float4 A, float4 B)
{
return abs(A - B);
}
float4 weighted_distance(float4 a, float4 b, float4 c, float4 d,
float4 e, float4 f, float4 g, float4 h)
{
return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
}
vsOut main_vertex(float2 tex0 : TEXCOORD0, float4 pos0 : POSITION0)
{
vsOut output;
output.pos0 = mul(pos0, matrixTransform);
output.tex0 = tex0;
float2 ps = 1.0 / textureSize;
output.tex1 = float4(ps.x, 0, 0, ps.y);
return output;
}
// all identified optimizations have been amalgamated into this file
float2 textureSize;
float4x4 matrixTransform;
const static float coef = 2.0;
const static float3 yuv_weighted = float3(14.352, 28.176, 5.472);
// Our sampler for the texture, which is just going to be pretty simple
sampler decal : register(s0);
struct vsOut
{
float4 pos0 : SV_POSITION;
float2 tex0 : TEXCOORD0;
float4 tex1 : TEXCOORD1;
};
float4 df(float4 A, float4 B)
{
return abs(A - B);
}
float4 weighted_distance(float4 a, float4 b, float4 c, float4 d,
float4 e, float4 f, float4 g, float4 h)
{
return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
}
vsOut main_vertex(float2 tex0 : TEXCOORD0,
float4 pos0 : POSITION0)
{
vsOut output;
output.pos0 = mul(pos0, matrixTransform);
output.tex0 = tex0;
float2 ps = 1.0 / textureSize;
output.tex1 = float4(ps.x, 0, 0, ps.y);
return output;
}
float4 main_fragment(float4 pos0 : SV_POSITION, float2 tex0 : TEXCOORD0,
float4 tex1 : TEXCOORD1) : SV_TARGET
{
bool4 edr, edr_left, edr_up, px; // px = pixel, edr = edge detection rule
bool4 ir_lv1, ir_lv2_left, ir_lv2_up;
bool4 nc; // new_color
bool4 fx, fx_left, fx_up; // inequations of straight lines.
float2 fp = frac(tex0 * textureSize);
float2 dx = tex1.xy;
float2 dy = tex1.zw;
float3 A = tex2D(decal, tex0 - dx - dy).xyz;
float3 B = tex2D(decal, tex0 - dy).xyz;
float3 C = tex2D(decal, tex0 + dx - dy).xyz;
float3 D = tex2D(decal, tex0 - dx).xyz;
float3 E = tex2D(decal, tex0).xyz;
float3 F = tex2D(decal, tex0 + dx).xyz;
float3 G = tex2D(decal, tex0 - dx + dy).xyz;
float3 H = tex2D(decal, tex0 + dy).xyz;
float3 I = tex2D(decal, tex0 + dx + dy).xyz;
float3 A1 = tex2D(decal, tex0 - dx - 2.0 * dy).xyz;
float3 C1 = tex2D(decal, tex0 + dx - 2.0 * dy).xyz;
float3 A0 = tex2D(decal, tex0 - 2.0 * dx - dy).xyz;
float3 G0 = tex2D(decal, tex0 - 2.0 * dx + dy).xyz;
float3 C4 = tex2D(decal, tex0 + 2.0 * dx - dy).xyz;
float3 I4 = tex2D(decal, tex0 + 2.0 * dx + dy).xyz;
float3 G5 = tex2D(decal, tex0 - dx + 2.0 * dy).xyz;
float3 I5 = tex2D(decal, tex0 + dx + 2.0 * dy).xyz;
float3 B1 = tex2D(decal, tex0 - 2.0 * dy).xyz;
float3 D0 = tex2D(decal, tex0 - 2.0 * dx).xyz;
float3 H5 = tex2D(decal, tex0 + 2.0 * dy).xyz;
float3 F4 = tex2D(decal, tex0 + 2.0 * dx).xyz;
float4 b = mul(float4x3(B, D, H, F), yuv_weighted);
float4 c = mul(float4x3(C, A, G, I), yuv_weighted);
float4 e = mul(float4x3(E, E, E, E), yuv_weighted);
float4 d = b.yzwx;
float4 f = b.wxyz;
float4 g = c.zwxy;
float4 h = b.zwxy;
float4 i = c.wxyz;
float4 i4 = mul(float4x3(I4, C1, A0, G5), yuv_weighted);
float4 i5 = mul(float4x3(I5, C4, A1, G0), yuv_weighted);
float4 h5 = mul(float4x3(H5, F4, B1, D0), yuv_weighted);
float4 f4 = h5.yzwx;
float4 Ao = float4(1.0, -1.0, -1.0, 1.0);
float4 Bo = float4(1.0, 1.0, -1.0, -1.0);
float4 Co = float4(1.5, 0.5, -0.5, 0.5);
float4 Ax = float4(1.0, -1.0, -1.0, 1.0);
float4 Bx = float4(0.5, 2.0, -0.5, -2.0);
float4 Cx = float4(1.0, 1.0, -0.5, 0.0);
float4 Ay = float4(1.0, -1.0, -1.0, 1.0);
float4 By = float4(2.0, 0.5, -2.0, -0.5);
float4 Cy = float4(2.0, 0.0, -1.0, 0.5);
// These inequations define the line below which interpolation occurs.
fx.x = (Ao.x * fp.y + Bo.x * fp.x > Co.x);
fx.y = (Ao.y * fp.y + Bo.y * fp.x > Co.y);
fx.z = (Ao.z * fp.y + Bo.z * fp.x > Co.z);
fx.w = (Ao.w * fp.y + Bo.w * fp.x > Co.w);
fx_left.x = (Ax.x * fp.y + Bx.x * fp.x > Cx.x);
fx_left.y = (Ax.y * fp.y + Bx.y * fp.x > Cx.y);
fx_left.z = (Ax.z * fp.y + Bx.z * fp.x > Cx.z);
fx_left.w = (Ax.w * fp.y + Bx.w * fp.x > Cx.w);
fx_up.x = (Ay.x * fp.y + By.x * fp.x > Cy.x);
fx_up.y = (Ay.y * fp.y + By.y * fp.x > Cy.y);
fx_up.z = (Ay.z * fp.y + By.z * fp.x > Cy.z);
fx_up.w = (Ay.w * fp.y + By.w * fp.x > Cy.w);
ir_lv1 = ((e != f) && (e != h));
ir_lv2_left = ((e != g) && (d != g));
ir_lv2_up = ((e != c) && (b != c));
float4 w1 = weighted_distance(e, c, g, i, h5, f4, h, f);
float4 w2 = weighted_distance(h, d, i5, f, i4, b, e, i);
float4 df_fg = df(f, g);
float4 df_hc = df(h, c);
float4 t1 = (coef * df_fg);
float4 t2 = df_hc;
float4 t3 = df_fg;
float4 t4 = (coef * df_hc);
edr = (w1 < w2) && ir_lv1;
edr_left = (t1 <= t2) && ir_lv2_left;
edr_up = (t4 <= t3) && ir_lv2_up;
nc = (edr && (fx || edr_left && fx_left || edr_up && fx_up));
t1 = df(e, f);
t2 = df(e, h);
px = t1 <= t2;
float3 res = nc.x ? px.x ? F : H :
nc.y ? px.y ? B : F :
nc.z ? px.z ? D : B :
nc.w ? px.w ? H : D : E;
return float4(res.xyz, 1.0);
}
technique T0
{
pass P0
{
VertexShader = compile vs_4_0_level_9_3 main_vertex();
PixelShader = compile ps_4_0_level_9_3 main_fragment();
}
}