McGuire ScreenSpaceReflections

Hi,

I made an attempt to implement SSR as described by Morgan McGuire but unfortunately it did not work to this day…

Here the algorithm is described (source included): http://casual-effects.blogspot.de/2014/08/screen-space-ray-tracing.html

There is also a SVN repository with the G3D Innovation Engine which can be found here: http://g3d.cs.williams.edu/g3d/www/index.html#toc3
In this repo there is a more up to date version of the algorithm!

I would like to share my shader source code (I converted it from OpenGL to HLSL). I am curious if we could get this to work :relaxed:

// based on a technique of Morgan McGuire described here
// http://casual-effects.blogspot.de/2014/08/screen-space-ray-tracing.html

#define VS_SHADERMODEL vs_5_0
#define PS_SHADERMODEL ps_5_0

//variables
float4x4 View;
float4x4 Projection;
float4x4 Viewport;
float4x4 ProjectionViewport;
float4x4 ProjectionInverse;
float4x4 ViewProjectionInverse;
float3 ClipInfo;
float3 CameraPosition;
float NearPlane;
float FarPlane;
float2 Resolution;
float ZThickness;
float Stride;
float JitterFraction;
float MaxSteps;
float MaxRayTraceDistance;
float HitPixelShiftX;
float HitPixelShiftY;
float HitPixelScaleX;
float HitPixelScaleY;

//textures
Texture2D DiffuseTexture;
sampler DiffuseSampler = sampler_state
{
    texture = <DiffuseTexture>;
    AddressU = Wrap;
    AddressV = Wrap;
    MipFilter = Linear;
    MinFilter = Linear;
    MagFilter = Linear;
};

Texture2D NormalTexture;
sampler NormalSampler = sampler_state
{
    texture = <NormalTexture>;
    AddressU = Wrap;
    AddressV = Wrap;
    MipFilter = Linear;
    MinFilter = Linear;
    MagFilter = Linear;
};

Texture2D DepthTexture;
sampler DepthSampler = sampler_state
{
    texture = <DepthTexture>;
    AddressU = Wrap;
    AddressV = Wrap;
    MipFilter = Linear;
    MinFilter = Linear;
    MagFilter = Linear;
};

//methods
float LinearDepthFunction(in float zw, uniform float4x4 Projection, uniform float NearPlane, uniform float FarPlane)
{
    float linearDepth = Projection._43 / (zw - Projection._33);
    float linearDepthNormalized = (linearDepth - NearPlane) / (FarPlane - NearPlane);

    return linearDepthNormalized;
}

float ColorToUnit24NewFunction(in float3 color)
{
    const float3 scale = float3(65536.0, 256.0, 1.0) / 65793.0;

    return dot(color, scale);
}

float3 DecodeNormalFunction(in float3 EncodedNormal)
{
    float3 decodedNormal = 2 * (EncodedNormal - 0.5f);

    return decodedNormal;
}

float2 UV2ScreenPositionFunction(in float2 UV)
{
    float2 screenPosition = float2(UV.x * 2 - 1, -(UV.y * 2 - 1));

    return screenPosition;
}

float2 ScreenPosition2UVFunction(in float2 ScreenPosition)
{
    float2 UV = 0.5f * (float2(ScreenPosition.x, -ScreenPosition.y) + 1);
    
    return UV;
}

float4 ReconstructWorldPositionFunction(in float2 ScreenPosition, in float Depth, in float4x4 ViewProjectionInverse)
{
    float4 position;
    position.xy = ScreenPosition.xy;
    position.z = Depth;
    position.w = 1.0f;
    position = mul(position, ViewProjectionInverse);
    position /= position.w;
    return position;
}

float4 UV2WorldPositionFunction(in float2 UV, in sampler DepthSampler, in float4x4 ViewProjectionInverse)
{
    float4 position;

    float2 screenPosition = UV2ScreenPositionFunction(UV);

    float4 depthData = tex2D(DepthSampler, UV);
    float depth = ColorToUnit24NewFunction(depthData.rgb);

    position.xy = screenPosition.xy;
    position.z = depth;
    position.w = 1.0f;
    position = mul(position, ViewProjectionInverse);
    position /= position.w;

    return position;
}

// By Morgan McGuire and Michael Mara at Williams College 2014
// Released as open source under the BSD 2-Clause License
// http://opensource.org/licenses/BSD-2-Clause
 
float DistanceSquaredFunction(float2 a, float2 b)
{
    a -= b;
    return dot(a, a);
}

bool ScreenSpaceReflectionsFunction( // Returns true if the ray hit something
 in float3 csOrig, // Camera-space ray origin, which must be within the view volume
 in float3 csDir, // Unit length camera-space ray direction
 uniform float4x4 proj, // A projection matrix that maps to pixel coordinates (not [-1, +1] normalized device coordinates)
 uniform float4x4 viewport, // A viewport matrix that maps x,y from [-1, +1] normalized device coordinates to pixel coordinates
 uniform float4x4 projectionViewport, // A projection matrix that maps to pixel coordinates (not [-1, +1] normalized device coordinates)
 in Texture2D csZBufferTexture, // The camera-space Z buffer (all negative values)
 uniform float2 csZBufferSize, // Dimensions of csZBuffer
 uniform float zThickness, // Camera space thickness to ascribe to each pixel in the depth buffer
 uniform float nearPlaneZ, // (Negative number)
 uniform float stride, // Step in horizontal or vertical pixels between samples. This is a float because integer math is slow on GPUs, but should be set to an integer >= 1
 uniform float jitter, // Number between 0 and 1 for how far to bump the ray in stride units to conceal banding artifacts
 const float maxSteps, // Maximum number of iterations. Higher gives better images but may be slow
 uniform float maxDistance, // Maximum camera-space distance to trace before returning a miss
 out float2 hitPixel, // Pixel coordinates of the first intersection with the scene
 out float3 hitfloat, // Camera space location of the ray hit
 uniform float HitPixelShiftX, // Debugging, shifts hitPixel in x direction
 uniform float HitPixelShiftY, // Debugging, shifts hitPixel in y direction
 uniform float HitPixelScaleX, // Debugging, scales hitPixel in x direction
 uniform float HitPixelScaleY, // Debugging, scales hitPixel in y direction
 in float2 UV, // Debugging, UV
 out float3 DebugColor,
 out float3 DebugColor2
)
{
    // Clip to the near plane    
    //float rayLength = ((csOrig.z + csDir.z * maxDistance) > nearPlaneZ) ?
    //    (nearPlaneZ - csOrig.z) / csDir.z : maxDistance;

    //DEBUG
    //float rayLength = ((csOrig.z + csDir.z * maxDistance) < nearPlaneZ) ?
    //    (nearPlaneZ - csOrig.z) / csDir.z : maxDistance;

    //DEBUG
    float rayLength = maxDistance;

    float3 csEndfloat = csOrig + csDir * rayLength;
 
    // Project into homogeneous clip space
    float4 H0 = mul(float4(csOrig, 1.0), projectionViewport);
    float4 H1 = mul(float4(csEndfloat, 1.0), projectionViewport);
    float k0 = 1.0 / H0.w;
    float k1 = 1.0 / H1.w;
 
    // The interpolated homogeneous version of the camera-space floats  
    float3 Q0 = csOrig * k0;
    float3 Q1 = csEndfloat * k1;
 
    // Screen-space endfloats
    float2 P0 = H0.xy * k0;
    float2 P1 = H1.xy * k1;
 
    // If the line is degenerate, make it cover at least one pixel
    // to avoid handling zero-pixel extent as a special case later
    P1 += float2((DistanceSquaredFunction(P0, P1) < 0.0001) ? float2(0.01, 0.01) : float2(0.0, 0.0));
    float2 delta = P1 - P0;
 
    // Permute so that the primary iteration is in x to collapse
    // all quadrant-specific DDA cases later
    bool permute = false;
    if (abs(delta.x) < abs(delta.y))
    {
        // This is a more-vertical line
        permute = true;
        delta = delta.yx;
        P0 = P0.yx;
        P1 = P1.yx;
    }
 
    float stepDir = sign(delta.x);
    float invdx = stepDir / delta.x;
 
    // Track the derivatives of Q and k
    float3 dQ = (Q1 - Q0) * invdx;
    float dk = (k1 - k0) * invdx;
    float2 dP = float2(stepDir, delta.y * invdx);
 
    // Scale derivatives by the desired pixel stride and then
    // offset the starting values by the jitter fraction
    dP *= stride;
    dQ *= stride;
    dk *= stride;
    P0 += dP * jitter;
    Q0 += dQ * jitter;
    k0 += dk * jitter;
 
    // Slide P from P0 to P1, (now-homogeneous) Q from Q0 to Q1, k from k0 to k1
    float3 Q = Q0;
 
    // Adjust end condition for iteration direction
    float end = P1.x * stepDir;
 
    float k = k0;
    float stepCount = 0.0;
    float prevZMaxEstimate = csOrig.z;
    float rayZMin = prevZMaxEstimate;
    float rayZMax = prevZMaxEstimate;
    float sceneZMax = rayZMax + 100;
    for (float2 P = P0;
         ((P.x * stepDir) <= end) && (stepCount < maxSteps) &&
         ((rayZMax < sceneZMax - zThickness) || (rayZMin > sceneZMax)) &&
          (sceneZMax != 0);
         P += dP, Q.z += dQ.z, k += dk, ++stepCount)
    {
         
        rayZMin = prevZMaxEstimate;
        rayZMax = (dQ.z * 0.5 + Q.z) / (dk * 0.5 + k);
        prevZMaxEstimate = rayZMax;
        if (rayZMin > rayZMax)
        {
            float t = rayZMin;
            rayZMin = rayZMax;
            rayZMax = t;
        }
 
        hitPixel = permute ? P.yx : P;
        //hitPixel.y = 1.0f - hitPixel.y; // You may need hitPixel.y = csZBufferSize.y - hitPixel.y; here if your vertical axis is different than ours in screen space

        //hitPixel.y *= -1.0f;

        //hitPixel.x += HitPixelShiftX;
        //hitPixel.y += HitPixelShiftY;
        //hitPixel.x *= HitPixelScaleX;
        //hitPixel.y *= HitPixelScaleY;

        //hitPixel.y = 1.0f - hitPixel.y;
        //hitPixel.y *= -1.0f;

        //sceneZMax = texelFetch(csZBuffer, int2(hitPixel), 0);
        float4 sceneZMaxData = csZBufferTexture.Load(int3(hitPixel, 0));
        //float4 sceneZMaxData = csZBufferTexture.Load(int3(hitPixel * csZBufferSize, 0));
        sceneZMax = ColorToUnit24NewFunction(sceneZMaxData.rgb);
        //sceneZMax = -sceneZMax;
    }
     
    // Advance Q based on the number of steps
    Q.xy += dQ.xy * stepCount;
    hitfloat = Q * (1.0 / k);






    // Support debugging. This will compile away if debugColor is unused
    if ((P.x * stepDir) > end)
    {
        // Hit the max ray distance -> blue
        DebugColor = float3(0, 0, 1);
    }
    else if (stepCount >= maxSteps)
    {
        // Ran out of steps -> red
        DebugColor = float3(1, 0, 0);
    }
    else if (sceneZMax == 0.0)
    {
        // Went off screen -> yellow
        DebugColor = float3(1, 1, 0);
    }
    else
    {
        // Encountered a valid hit -> green
        // ((rayZMax >= sceneZMax - csZThickness) && (rayZMin <= sceneZMax))
        DebugColor = float3(0, 1, 0);
    }


    DebugColor2 = csDir;


    return (rayZMax >= sceneZMax - zThickness) && (rayZMin < sceneZMax);
}

// By Morgan McGuire and Michael Mara at Williams College 2014
// Released as open source under the BSD 2-Clause License
// http://opensource.org/licenses/BSD-2-Clause
 
void SwapFunction(inout float a, inout float b)
{
    float temp = a;
    a = b;
    b = temp;
}

// Note that positions (which may affect z) are snapped during rasterization, but 
// attributes are not.

/* 
 Clipping plane constants for use by reconstructZ

 \param clipInfo = (z_f == -inf()) ? Vector3(z_n, -1.0f, 1.0f) : Vector3(z_n * z_f,  z_n - z_f,  z_f);
 \sa G3D::Projection::reconstructFromDepthClipInfo
*/
float ReconstructCSZFunction(float d, float3 clipInfo)
{
    return clipInfo[0] / (clipInfo[1] * d + clipInfo[2]);
    // with vec above ->
    // clipInfo[0]: z_n * z_f
    // clipInfo[1]: z_n - z_f
    // clipInfo[2]: z_f
    // ->
    // z = (z_n * z_f) / ((z_n - z_f) * d + z_f)
    //calculates -z !!!
}

float4 ReconstructViewPositionFunction(in float2 ScreenPosition, in float Depth, in float4x4 ProjectionInverse)
{
    float4 position;
    position.xy = ScreenPosition.xy;
    position.z = Depth;
    position.w = 1.0f;
    position = mul(position, ProjectionInverse);
    position /= position.w;
    return position;
}

bool ScreenSpaceReflectionsNEWFunction( // Returns tr ue if the ray hit something
 in float3 csOrigin, // Camera-space ray origin, which must be within the view volume and must have z < -0.01 and project within the valid screen rectangle
 in float3 csDirection, // Unit length camera-space ray direction
 uniform float4x4 Projection, // A project matrix that maps to pixel coordinates (not [-1, +1] normalized device coordinates). Usually g3d_ProjectToPixelMatrix or gbuffer_camera_projectToPixelMatrix.
 /* EXTRA */ uniform float4x4 Viewport, // A viewport matrix that maps x,y from [-1, +1] normalized device coordinates to pixel coordinates
 /* EXTRA */ uniform float4x4 ProjectionViewport, // A projectToPixel matrix that maps to pixel coordinates (not [-1, +1] normalized device coordinates)
 /* EXTRA */ uniform float4x4 ProjectionInverse, // An inverse project matrix that maps to camera space (from [-1, +1] normalized device coordinates)
 /* EXTRA */ in float2 ScreenPosition, // The screen position
 in Texture2D csZBufferTexture, // The depth or camera-space Z buffer (all negative values), depending on the value of csZBufferIsHyperbolic
 uniform float2 csZBufferSize, // Dimensions of csZBuffer
 uniform float csZThickness, // Camera space thickness to ascribe to each pixel in the depth buffer
 const in bool csZBufferIsHyperbolic, // True if csZBuffer is an OpenGL depth buffer, false (faster) if csZBuffer contains(negative) "linear" camera space z values. Const so that the compiler can evaluate the branch based on it at compile time 
 const in bool flipYCoordinate,
 float3 clipInfo, // See G3D::Camera documentation
 uniform float nearPlaneZ, // Negative number. Doesn't have to be THE actual near plane, just a reasonable value for clipping rays headed towards the camera
 /* EXTRA */ uniform float farPlaneZ, // Negative number.
 uniform float stride, // Step in horizontal or vertical pixels between samples. This is a float because integer math is slow on GPUs, but should be set to an integer >= 1
 uniform float jitterFraction, // Number between 0 and 1 for how far to bump the ray in stride units to conceal banding artifacts, plus the stride ray offset. It is recommended to set this to at least 1.0 to avoid self-intersection artifacts. Using 1 + float((int(gl_FragCoord.x) + int(gl_FragCoord.y.y)) & 1) * 0.5 gives a nice dither pattern when stride is > 1.0; 
 const float maxSteps, // Maximum number of iterations. Higher gives better images but may be slow
 in float maxRayTraceDistance, // Maximum camera-space distance to trace before returning a miss
 out float2 hitPixel, // Pixel coordinates of the first intersection with the scene
 out int hitLayer,
 out float3 csHitPoint, // Camera space location of the ray hit
 /* EXTRA */ uniform float HitPixelShiftX, // Debugging, shifts hitPixel in x direction
 /* EXTRA */ uniform float HitPixelShiftY, // Debugging, shifts hitPixel in y direction
 /* EXTRA */ uniform float HitPixelScaleX, // Debugging, scales hitPixel in x direction
 /* EXTRA */ uniform float HitPixelScaleY, // Debugging, scales hitPixel in y direction
 /* EXTRA */ in float2 UV, // Debugging, UV
 out float3 DebugColor,
 /* EXTRA */ out float3 DebugColor2
)
{
    //DEBUG
    nearPlaneZ = -abs(nearPlaneZ);
    farPlaneZ = -abs(farPlaneZ);

    // Clip ray to a near plane in 3D (doesn't have to be *the* near plane, although that would be a good idea) 
    float rayLength = ((csOrigin.z + csDirection.z * maxRayTraceDistance) > nearPlaneZ) ?
        (nearPlaneZ - csOrigin.z) / csDirection.z //clipping to nearPlane -> rayLength = distance(csOrigin, nearPlane) / csDirection.z
        : maxRayTraceDistance; // ray does not reach nearPlane, so maxRayTraceDistance can be used.

    //DEBUG
    //DebugColor2 = abs(rayLength) / (abs(farPlaneZ) - abs(nearPlaneZ));

    float3 csEndPoint = csOrigin + csDirection * rayLength;
 
    // Project into screen space
    float4 H0 = mul(float4(csOrigin, 1.0), ProjectionViewport);
    float4 H1 = mul(float4(csEndPoint, 1.0), ProjectionViewport);

    //DEBUG
    //H0 = mul(float4(csOrigin, 1.0), Projection);
    //H1 = mul(float4(csEndPoint, 1.0), Projection);

    //DEBUG
    //float4 H0 = mul(float4(csOrigin, 1.0), Projection);
    //H0.xyzw /= H0.w;
    //H0 = mul(H0, Viewport);
    //float4 H1 = mul(float4(csEndPoint, 1.0), Projection);
    //H1.xyzw /= H1.w;
    //H1 = mul(H1, Viewport);

    // There are a lot of divisions by w that can be turned into multiplications
    // at some minor precision loss...and we need to interpolate these 1/w values
    // anyway.
    //
    // Because the caller was required to clip to the near plane,
    // this homogeneous division (projecting from 4D to 2D) is guaranteed 
    // to succeed. 
    float k0 = 1.0 / H0.w;
    float k1 = 1.0 / H1.w;
 
    // Switch the original points to values that interpolate linearly in 2D
    float3 Q0 = csOrigin * k0;
    float3 Q1 = csEndPoint * k1;
 
    // Screen-space endfloats
    float2 P0 = H0.xy * k0;
    float2 P1 = H1.xy * k1;

    // [Optional clipping to frustum sides here]
 
    // Initialize to off screen
    hitPixel = float2(-1.0, -1.0);
    hitLayer = 0; // Only one layer

    // If the line is degenerate, make it cover at least one pixel
    // to avoid handling zero-pixel extent as a special case later
    P1 += float2((DistanceSquaredFunction(P0, P1) < 0.0001) ? float2(0.01, 0.01) : float2(0.0, 0.0));
    float2 delta = P1 - P0;
 
    // Permute so that the primary iteration is in x to collapse
    // all quadrant-specific DDA cases later
    bool permute = (abs(delta.x) < abs(delta.y));
    if (permute)
    {
        // More-vertical line. Create a permutation that swaps x and y in the output
        // by directly swizzling the inputs.
        delta = delta.yx;
        P0 = P0.yx;
        P1 = P1.yx;
    }
 
    // From now on, "x" is the primary iteration direction and "y" is the secondary one
    float stepDirection = sign(delta.x);
    float invdx = stepDirection / delta.x;
    float2 dP = float2(stepDirection, invdx * delta.y);
    
    // Track the derivatives of Q and k
    float3 dQ = (Q1 - Q0) * invdx;
    float dk = (k1 - k0) * invdx;

    // Because we test 1/2 a texel forward along the ray, on the very last iteration
    // the interpolation can go past the end of the ray. Use these bounds to clamp it.
    float zMin = min(csEndPoint.z, csOrigin.z);
    float zMax = max(csEndPoint.z, csOrigin.z);

    // Scale derivatives by the desired pixel stride
    dP *= stride;
    dQ *= stride;
    dk *= stride;

    // Offset the starting values by the jitter fraction
    P0 += dP * jitterFraction;
    Q0 += dQ * jitterFraction;
    k0 += dk * jitterFraction;
 
    // Slide P from P0 to P1, (now-homogeneous) Q from Q0 to Q1, k from k0 to k1
    float3 Q = Q0;
    float k = k0;
    
    // We track the ray depth at +/- 1/2 pixel to treat pixels as clip-space solid 
    // voxels. Because the depth at -1/2 for a given pixel will be the same as at 
    // +1/2 for the previous iteration, we actually only have to compute one value 
    // per iteration.
    float prevZMaxEstimate = csOrigin.z;
    float stepCount = 0.0;
    float rayZMin = prevZMaxEstimate;
    float rayZMax = prevZMaxEstimate;
    float sceneZMax = rayZMax + 1e4; // Why add 1e4 = 10000?

    // P1.x is never modified after this point, so pre-scale it by 
    // the step direction for a signed comparison
    float end = P1.x * stepDirection;
 
    // We only advance the z field of Q in the inner loop, since
    // Q.xy is never used until after the loop terminates.

    float2 P;
    for (P = P0;
         ((P.x * stepDirection) <= end) &&
        (stepCount < maxSteps) &&
        // DEBUG ###############################################
         ((rayZMax < sceneZMax - csZThickness) ||
            (rayZMin > sceneZMax)) &&
          (sceneZMax != 0.0);
         P += dP, Q.z += dQ.z, k += dk, stepCount += 1.0)
    {
        // The depth range that the ray covers within this loop
        // iteration.  Assume that the ray is moving in increasing z
        // and swap if backwards.  Because one end of the interval is
        // shared between adjacent iterations, we track the previous
        // value and then swap as needed to ensure correct ordering
        rayZMin = prevZMaxEstimate;

        // Compute the value at 1/2 step into the future
        rayZMax = (dQ.z * 0.5 + Q.z) / (dk * 0.5 + k);
        rayZMax = clamp(rayZMax, zMin, zMax);
        prevZMaxEstimate = rayZMax;

        // Since we don't know if the ray is stepping forward or backward in depth,
        // maybe swap. Note that we preserve our original z "max" estimate first.
        if (rayZMin > rayZMax)
        {
            SwapFunction(rayZMin, rayZMax);
        }
 
        // Camera-space z of the background
        hitPixel = permute ? P.yx : P;

        //DEBUG
        // This compiles away when flipYCoordinate = false
        if (flipYCoordinate)
        {
            // You may need hitPixel.y = csZBufferSize.y - hitPixel.y; here if your vertical axis is different than ours in screen space
            hitPixel.y = csZBufferSize.y - hitPixel.y;
        }
        
        //float4 sceneZMaxData = csZBufferTexture.Load(int3(hitPixel, 0));
        //float sceneZMaxDepth = ColorToUnit24NewFunction(sceneZMaxData.rgb);

        float sceneZMaxDepth = csZBufferTexture.Load(int3(hitPixel, 0)).r;

        //DEBUG convert to camera space
        float2 UVCurrent = hitPixel / csZBufferSize;
        float2 ScreenPositionCurrent = UV2ScreenPositionFunction(UVCurrent);
        sceneZMax = ReconstructViewPositionFunction(ScreenPositionCurrent, sceneZMaxDepth, ProjectionInverse).z;
        
        DebugColor2.xyz = float3(0, 0, -sceneZMax / (abs(farPlaneZ) - abs(nearPlaneZ)));

        // This compiles away when csZBufferIsHyperbolic = false
        if (csZBufferIsHyperbolic)
        {
            sceneZMax = ReconstructCSZFunction(sceneZMax, clipInfo);
        }
    } // pixel on ray
     
    // Undo the last increment, which ran after the test variables
    // were set up.
    P -= dP;
    Q.z -= dQ.z;
    k -= dk;
    stepCount -= 1.0;

    bool hit = (rayZMax >= sceneZMax - csZThickness) && (rayZMin <= sceneZMax);

    // If using non-unit stride and we hit a depth surface...
    if ((stride > 1) && hit)
    {
        // Refine the hit point within the last large-stride step
        
        // Retreat one whole stride step from the previous loop so that
        // we can re-run that iteration at finer scale
        P -= dP;
        Q.z -= dQ.z;
        k -= dk;
        stepCount -= 1.0;

        // Take the derivatives back to single-pixel stride
        float invStride = 1.0 / stride;
        dP *= invStride;
        dQ.z *= invStride;
        dk *= invStride;

        // For this test, we don't bother checking thickness or passing the end, since we KNOW there will
        // be a hit point. As soon as
        // the ray passes behind an object, call it a hit. Advance (stride + 1) steps to fully check this 
        // interval (we could skip the very first iteration, but then we'd need identical code to prime the loop)
        float refinementStepCount = 0;

        // This is the current sample point's z-value, taken back to camera space
        prevZMaxEstimate = Q.z / k;
        rayZMin = prevZMaxEstimate;

        // Ensure that the FOR-loop test passes on the first iteration since we
        // won't have a valid value of sceneZMax to test.
        sceneZMax = rayZMin - 1e7;

        for (;
            (refinementStepCount <= stride * 1.4) && //Why multiply with 1.4?
            (rayZMin > sceneZMax) && (sceneZMax != 0.0);
            P += dP, Q.z += dQ.z, k += dk, refinementStepCount += 1.0)
        {
            rayZMin = prevZMaxEstimate;

            // Compute the ray camera-space Z value at 1/2 fine step (pixel) into the future
            rayZMax = (dQ.z * 0.5 + Q.z) / (dk * 0.5 + k);
            rayZMax = clamp(rayZMax, zMin, zMax);

            prevZMaxEstimate = rayZMax;
            rayZMin = min(rayZMax, rayZMin);

            hitPixel = permute ? P.yx : P;
            
            //DEBUG
            // This compiles away when flipYCoordinate = false
            if (flipYCoordinate)
            {
            // You may need hitPixel.y = csZBufferSize.y - hitPixel.y; here if your vertical axis is different than ours in screen space
                hitPixel.y = csZBufferSize.y - hitPixel.y;
            }

            //float4 sceneZMaxData2 = csZBufferTexture.Load(int3(hitPixel, 0));
            //float sceneZMaxDepth2 = ColorToUnit24NewFunction(sceneZMaxData2.rgb);

            float sceneZMaxDepth2 = csZBufferTexture.Load(int3(hitPixel, 0)).r;

            //DEBUG convert to camera space
            float2 UVCurrent = hitPixel / csZBufferSize;
            float2 ScreenPositionCurrent = UV2ScreenPositionFunction(UVCurrent);
            sceneZMax = ReconstructViewPositionFunction(ScreenPositionCurrent, sceneZMaxDepth2, ProjectionInverse).z;

            if (csZBufferIsHyperbolic)
            {
                sceneZMax = ReconstructCSZFunction(sceneZMax, clipInfo);
            }
        }

        // Undo the last increment, which happened after the test variables were set up
        Q.z -= dQ.z;
        refinementStepCount -= 1;

        // Count the refinement steps as fractions of the original stride. Save a register
        // by not retaining invStride until here
        float refinementStepCountOverStride = refinementStepCount / stride;
        stepCount += refinementStepCountOverStride;
        //DebugColor2 = float3(refinementStepCountOverStride, refinementStepCountOverStride, refinementStepCountOverStride);
    } // refinement

    Q.xy += dQ.xy * stepCount;
    csHitPoint = Q * (1.0 / k);

    // Support debugging. This will compile away if DebugColor is unused
    if ((P.x * stepDirection) > end)
    {
        // Hit the max ray distance -> blue
        DebugColor = float3(0, 0, 1);
    }
    else if (stepCount >= maxSteps)
    {
        // Ran out of steps -> red
        DebugColor = float3(1, 0, 0);
    }
    else if (sceneZMax == 0.0)
    {
        // Went off screen -> yellow
        DebugColor = float3(1, 1, 0);
    }
    else
    {
        // Encountered a valid hit -> green
        // ((rayZMax >= sceneZMax - csZThickness) && (rayZMin <= sceneZMax))
        DebugColor = float3(0, 1, 0);
    }
        
    // Does the last point discovered represent a valid hit?
    return hit;
}

float DepthLinearDecodeFunction(in sampler DepthSampler, in float2 UV, in float FarPlane)
{
    //Decode Depth from 32 Bit
    float depth = -tex2D(DepthSampler, UV).r * FarPlane;

    return depth;
}

//structs
struct VertexShaderInput
{
    float4 Position : SV_POSITION;
    float2 UV : TEXCOORD0;
};

struct VertexShaderOutput
{
    float4 Position : SV_POSITION;
    float2 UV : TEXCOORD0;
};

struct PixelShaderOutput
{
    float4 Diffuse : SV_TARGET;
};

VertexShaderOutput VertexShaderFunction(VertexShaderInput input)
{
    VertexShaderOutput output;

    output.Position = input.Position;
    output.UV = input.UV;

    return output;
}

PixelShaderOutput PixelShaderFunction(VertexShaderOutput input)
{
    PixelShaderOutput output;

    //DEBUG
    //float2 DEBUGUV = float2(0.5f, 0.5f);
    //float2 DEBUGScreenPosition = UV2ScreenPositionFunction(DEBUGUV);

    float2 UV = input.UV;
    float2 ScreenPosition = UV2ScreenPositionFunction(input.UV);

    float4 diffuseData = tex2D(DiffuseSampler, UV);
    float4 normalData = tex2D(NormalSampler, UV);
    //float4 depthData = tex2D(DepthSampler, UV);

    //float depth = ColorToUnit24NewFunction(depthData.rgb);
    float depth = DepthLinearDecodeFunction(DepthSampler, UV, FarPlane);
    float3 normal = DecodeNormalFunction(normalData.rgb);
    float4 position = ReconstructWorldPositionFunction(ScreenPosition, depth, ViewProjectionInverse);

    // Calculate reflected position in world space
    float3 incident = normalize(position.xyz - CameraPosition);
    float3 reflectVector = reflect(incident, normalize(normal));
    float4 reflectedPosition = float4(position.xyz + reflectVector, 1.0f);
    
    // Use view space position for csOrigin
    float4 csOrigin = mul(position, View);

    // Calculate normalized csDirection
    float4 csDestination = mul(reflectedPosition, View);
    float4 csDirection = float4(normalize(csDestination.xyz - csOrigin.xyz), 1.0f);
        
    float2 hitPixel;
    int hitLayer;
    float3 hitPoint;
    float3 debugColor;
    float3 debugColor2;
    if (ScreenSpaceReflectionsNEWFunction(
        csOrigin.xyz, 
        csDirection.xyz, 
        Projection, 
        Viewport, 
        ProjectionViewport, 
        ProjectionInverse, 
        ScreenPosition, 
        DepthTexture, 
        Resolution, 
        ZThickness, 
        false, // csZBufferIsHyperbolic
        false, // flip Y
        ClipInfo, 
        NearPlane, 
        FarPlane, 
        Stride, 
        JitterFraction, 
        MaxSteps, 
        MaxRayTraceDistance, 
        hitPixel, 
        hitLayer, 
        hitPoint, 
        HitPixelShiftX, 
        HitPixelShiftY, 
        HitPixelScaleX, 
        HitPixelScaleY, 
        UV, 
        debugColor, 
        debugColor2))
    {
        int3 UVW = int3(hitPixel, 0);
        float4 diffuseDataLoad = DiffuseTexture.Load(UVW);
        output.Diffuse = diffuseDataLoad;
        //output.Diffuse.xyz *= debugColor;
    } 
    else
    {
        output.Diffuse = diffuseData;
        //output.Diffuse.xyz *= debugColor;
    }

    //debugColor = csOrigin.z / (FarPlane - NearPlane);
    //debugColor.z = -debugColor.z;

    //debugColor = csDirection.xyz;


    //float4 bla = mul(csOrigin, Projection);
    //bla.xyzw /= bla.w;
    ////NDC
    ////bla.x += 1;
    ////bla.x /= 2.0f;
    ////bla.y += 1;
    ////bla.y /= 2.0f;
    ////PixelCoords
    //float4 blabla = mul(bla, Viewport);
    //blabla.xy /= Resolution;
    //debugColor = blabla.xyz;

    if (UV.x > 0.5f)
    {
        output.Diffuse.xyz = debugColor;
    }

    //if (UV.x > 0.75f)
    //{
    //    output.Diffuse.xyz = debugColor2;
    //}

    return output;
}

technique Technique1
{
    pass Pass1
    {
        VertexShader = compile VS_SHADERMODEL VertexShaderFunction();
        PixelShader = compile PS_SHADERMODEL PixelShaderFunction();
    }
}

What’s the problem? Some people have implemented some form of Ssr in monogame

I don’t know. I made different assumptions while writing the shader, because for example it was not clear to me at several places which values or matrices are needed by the algorithm. I did not get any reasonable output. Also it has been a while since I last worked on it. I just thought, it would be good to post what I have so far. Maybe someone wants to look at it.