i went ahead and fixed it, but I was too late it seems. Unlike Jjagg i did the texture size in the vertex shader though.
Here is the replacement part in my shader. Add the struct at the beginning and change the replace the vertex shader
struct vsOut
float4 pos0 : SV_POSITION;
float2 tex0 : TEXCOORD0;
float4 tex1 : TEXCOORD1;
float4 df(float4 A, float4 B)
return abs(A - B);
float4 weighted_distance(float4 a, float4 b, float4 c, float4 d,
float4 e, float4 f, float4 g, float4 h)
return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
vsOut main_vertex(float2 tex0 : TEXCOORD0, float4 pos0 : POSITION0)
vsOut output;
output.pos0 = mul(pos0, matrixTransform);
output.tex0 = tex0;
float2 ps = 1.0 / textureSize;
output.tex1 = float4(ps.x, 0, 0, ps.y);
return output;
// all identified optimizations have been amalgamated into this file
float2 textureSize;
float4x4 matrixTransform;
const static float coef = 2.0;
const static float3 yuv_weighted = float3(14.352, 28.176, 5.472);
// Our sampler for the texture, which is just going to be pretty simple
sampler decal : register(s0);
struct vsOut
float4 pos0 : SV_POSITION;
float2 tex0 : TEXCOORD0;
float4 tex1 : TEXCOORD1;
float4 df(float4 A, float4 B)
return abs(A - B);
float4 weighted_distance(float4 a, float4 b, float4 c, float4 d,
float4 e, float4 f, float4 g, float4 h)
return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
vsOut main_vertex(float2 tex0 : TEXCOORD0,
float4 pos0 : POSITION0)
vsOut output;
output.pos0 = mul(pos0, matrixTransform);
output.tex0 = tex0;
float2 ps = 1.0 / textureSize;
output.tex1 = float4(ps.x, 0, 0, ps.y);
return output;
float4 main_fragment(float4 pos0 : SV_POSITION, float2 tex0 : TEXCOORD0,
float4 tex1 : TEXCOORD1) : SV_TARGET
bool4 edr, edr_left, edr_up, px; // px = pixel, edr = edge detection rule
bool4 ir_lv1, ir_lv2_left, ir_lv2_up;
bool4 nc; // new_color
bool4 fx, fx_left, fx_up; // inequations of straight lines.
float2 fp = frac(tex0 * textureSize);
float2 dx = tex1.xy;
float2 dy = tex1.zw;
float3 A = tex2D(decal, tex0 - dx - dy).xyz;
float3 B = tex2D(decal, tex0 - dy).xyz;
float3 C = tex2D(decal, tex0 + dx - dy).xyz;
float3 D = tex2D(decal, tex0 - dx).xyz;
float3 E = tex2D(decal, tex0).xyz;
float3 F = tex2D(decal, tex0 + dx).xyz;
float3 G = tex2D(decal, tex0 - dx + dy).xyz;
float3 H = tex2D(decal, tex0 + dy).xyz;
float3 I = tex2D(decal, tex0 + dx + dy).xyz;
float3 A1 = tex2D(decal, tex0 - dx - 2.0 * dy).xyz;
float3 C1 = tex2D(decal, tex0 + dx - 2.0 * dy).xyz;
float3 A0 = tex2D(decal, tex0 - 2.0 * dx - dy).xyz;
float3 G0 = tex2D(decal, tex0 - 2.0 * dx + dy).xyz;
float3 C4 = tex2D(decal, tex0 + 2.0 * dx - dy).xyz;
float3 I4 = tex2D(decal, tex0 + 2.0 * dx + dy).xyz;
float3 G5 = tex2D(decal, tex0 - dx + 2.0 * dy).xyz;
float3 I5 = tex2D(decal, tex0 + dx + 2.0 * dy).xyz;
float3 B1 = tex2D(decal, tex0 - 2.0 * dy).xyz;
float3 D0 = tex2D(decal, tex0 - 2.0 * dx).xyz;
float3 H5 = tex2D(decal, tex0 + 2.0 * dy).xyz;
float3 F4 = tex2D(decal, tex0 + 2.0 * dx).xyz;
float4 b = mul(float4x3(B, D, H, F), yuv_weighted);
float4 c = mul(float4x3(C, A, G, I), yuv_weighted);
float4 e = mul(float4x3(E, E, E, E), yuv_weighted);
float4 d = b.yzwx;
float4 f = b.wxyz;
float4 g = c.zwxy;
float4 h = b.zwxy;
float4 i = c.wxyz;
float4 i4 = mul(float4x3(I4, C1, A0, G5), yuv_weighted);
float4 i5 = mul(float4x3(I5, C4, A1, G0), yuv_weighted);
float4 h5 = mul(float4x3(H5, F4, B1, D0), yuv_weighted);
float4 f4 = h5.yzwx;
float4 Ao = float4(1.0, -1.0, -1.0, 1.0);
float4 Bo = float4(1.0, 1.0, -1.0, -1.0);
float4 Co = float4(1.5, 0.5, -0.5, 0.5);
float4 Ax = float4(1.0, -1.0, -1.0, 1.0);
float4 Bx = float4(0.5, 2.0, -0.5, -2.0);
float4 Cx = float4(1.0, 1.0, -0.5, 0.0);
float4 Ay = float4(1.0, -1.0, -1.0, 1.0);
float4 By = float4(2.0, 0.5, -2.0, -0.5);
float4 Cy = float4(2.0, 0.0, -1.0, 0.5);
// These inequations define the line below which interpolation occurs.
fx.x = (Ao.x * fp.y + Bo.x * fp.x > Co.x);
fx.y = (Ao.y * fp.y + Bo.y * fp.x > Co.y);
fx.z = (Ao.z * fp.y + Bo.z * fp.x > Co.z);
fx.w = (Ao.w * fp.y + Bo.w * fp.x > Co.w);
fx_left.x = (Ax.x * fp.y + Bx.x * fp.x > Cx.x);
fx_left.y = (Ax.y * fp.y + Bx.y * fp.x > Cx.y);
fx_left.z = (Ax.z * fp.y + Bx.z * fp.x > Cx.z);
fx_left.w = (Ax.w * fp.y + Bx.w * fp.x > Cx.w);
fx_up.x = (Ay.x * fp.y + By.x * fp.x > Cy.x);
fx_up.y = (Ay.y * fp.y + By.y * fp.x > Cy.y);
fx_up.z = (Ay.z * fp.y + By.z * fp.x > Cy.z);
fx_up.w = (Ay.w * fp.y + By.w * fp.x > Cy.w);
ir_lv1 = ((e != f) && (e != h));
ir_lv2_left = ((e != g) && (d != g));
ir_lv2_up = ((e != c) && (b != c));
float4 w1 = weighted_distance(e, c, g, i, h5, f4, h, f);
float4 w2 = weighted_distance(h, d, i5, f, i4, b, e, i);
float4 df_fg = df(f, g);
float4 df_hc = df(h, c);
float4 t1 = (coef * df_fg);
float4 t2 = df_hc;
float4 t3 = df_fg;
float4 t4 = (coef * df_hc);
edr = (w1 < w2) && ir_lv1;
edr_left = (t1 <= t2) && ir_lv2_left;
edr_up = (t4 <= t3) && ir_lv2_up;
nc = (edr && (fx || edr_left && fx_left || edr_up && fx_up));
t1 = df(e, f);
t2 = df(e, h);
px = t1 <= t2;
float3 res = nc.x ? px.x ? F : H :
nc.y ? px.y ? B : F :
nc.z ? px.z ? D : B :
nc.w ? px.w ? H : D : E;
return float4(res.xyz, 1.0);
technique T0
pass P0
VertexShader = compile vs_4_0_level_9_3 main_vertex();
PixelShader = compile ps_4_0_level_9_3 main_fragment();