Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New SSAO Algorithm #62

Open
wants to merge 19 commits into
base: rend2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 94 additions & 73 deletions codemp/rd-rend2/glsl/ssao.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -8,95 +8,116 @@ void main()
{
gl_Position = attr_Position;
var_ScreenTex = attr_TexCoord0.xy;
//vec2 screenCoords = gl_Position.xy / gl_Position.w;
//var_ScreenTex = screenCoords * 0.5 + 0.5;
}

/*[Fragment]*/
uniform sampler2D u_ScreenDepthMap;
uniform vec4 u_ViewInfo; // zfar / znear, zfar
uniform sampler2D u_ScreenDepthMap; // colormap
uniform vec4 u_ViewInfo; // znear, zfar, 0, 0
uniform vec2 u_ScreenInfo; // width, height

uniform vec4 u_SSAOSettings; // aocap, 0, aoMultiplier, lightmap
uniform vec4 u_SSAOSettings2; // 0, aorange, depthTolerance, 0

in vec2 var_ScreenTex;

out vec4 out_Color;

vec2 poissonDisc[9] = vec2[9](
vec2(-0.7055767, 0.196515), vec2(0.3524343, -0.7791386),
vec2(0.2391056, 0.9189604), vec2(-0.07580382, -0.09224417),
vec2(0.5784913, -0.002528916), vec2(0.192888, 0.4064181),
vec2(-0.6335801, -0.5247476), vec2(-0.5579782, 0.7491854),
vec2(0.7320465, 0.6317794)
);

// Input: It uses texture coords as the random number seed.
// Output: Random number: [0,1), that is between 0.0 and 0.999999... inclusive.
// Author: Michael Pohoreski
// Copyright: Copyleft 2012 :-)
// Source: http://stackoverflow.com/questions/5149544/can-i-generate-a-random-number-inside-a-pixel-shader

float random( const vec2 p )
{
// We need irrationals for pseudo randomness.
// Most (all?) known transcendental numbers will (generally) work.
const vec2 r = vec2(
23.1406926327792690, // e^pi (Gelfond's constant)
2.6651441426902251); // 2^sqrt(2) (Gelfond-Schneider constant)
//return fract( cos( mod( 123456789., 1e-7 + 256. * dot(p,r) ) ) );
return mod( 123456789., 1e-7 + 256. * dot(p,r) );
}
//
// AO Shader by Monsterovich :D
//

mat2 randomRotation( const vec2 p )
{
float r = random(p);
float sinr = sin(r);
float cosr = cos(r);
return mat2(cosr, sinr, -sinr, cosr);
float readDepth( in vec2 coord, in float znear, in float zfar ) {
return (2.0 * znear) / (zfar + znear - texture( u_ScreenDepthMap, coord ).x * (zfar - znear));
}

float getLinearDepth(sampler2D depthMap, const vec2 tex, const float zFarDivZNear)
{
float sampleZDivW = texture(depthMap, tex).r;
return 1.0 / mix(zFarDivZNear, 1.0, sampleZDivW);
float compareDepths( in float depth1, in float depth2, in float znear, in float zfar ) {
float diff = sqrt( clamp(1.0-(depth1-depth2) / ( u_SSAOSettings2.y /* aorange */ / (zfar - znear)),0.0,1.0) );
float ao = min(u_SSAOSettings.x /* aocap */,max(0.0,depth1-depth2-u_SSAOSettings2.z /* depthTolerance */) * u_SSAOSettings.z /* aoMultiplier */) * diff;
return ao;
}

float ambientOcclusion(sampler2D depthMap, const vec2 tex, const float zFarDivZNear, const float zFar)
const float P1 = 0.70710678118; // sin,cos(pi/4)
const float P2x = 0.92387953251; // cos(pi/8)
const float P2y = 0.38268343236; // sin(pi/8)
const float P3x = P2y; // cos(3*pi/8)
const float P3y = P2x; // sin(3*pi/8)


void main(void)
{
float result = 0;

float sampleZ = zFar * getLinearDepth(depthMap, tex, zFarDivZNear);

vec2 expectedSlope = vec2(dFdx(sampleZ), dFdy(sampleZ)) / vec2(dFdx(tex.x), dFdy(tex.y));

if (length(expectedSlope) > 5000.0)
return 1.0;

vec2 offsetScale = vec2(3.0 / sampleZ);

mat2 rmat = randomRotation(tex);

float depth = readDepth( var_ScreenTex, u_ViewInfo.x, u_ViewInfo.y );

float d;

float pw = 1.0 / u_ScreenInfo.x;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can avoid having pw, ph and u_ScreenInfo by using texelFetch and gl_FragCoord. This lets you read from the texture using texel positions (e.g. 40, 100)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need pw and ph. If you use texelFetch to read from the texture, you can use texel coordinates:

ivec2 screenCoord = ivec2(g_FragCoord.xy);
vec4 d;
d = texelFetch(u_ScreenDepthMap, screenCoord, 0).x;
d = texelFtech(u_ScreenDepthMap, ivec2(screenCord.x, screenCoord.y + 1, 0).x;
etc..

If you really want to continue using texture, then u_ScreenInfo should contain (1.0 / w, 1.0 / h) instead of doing it in the shader.

float ph = 1.0 / u_ScreenInfo.y;

float ao = 0.0;
float aoScale = 1.0;

int i;
for (i = 0; i < 3; i++)
for (i = 0; i < 4; i++)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only sampling depths sideways and up/down doesn't look very good - it almost looks kind of pixelated. I think this needs to be changed to sampling in directions all around the currently pixel, not just left/up/down/right.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an awful lot of samples now... you're doing 64 samples now. Can you reduce this down to 16 again (like you had before)?

{
vec2 offset = rmat * poissonDisc[i] * offsetScale;
float sampleZ2 = zFar * getLinearDepth(depthMap, tex + offset, zFarDivZNear);

if (abs(sampleZ - sampleZ2) > 20.0)
result += 1.0;
else
{
float expectedZ = sampleZ + dot(expectedSlope, offset);
result += step(expectedZ - 1.0, sampleZ2);
}
// This creates a circle, using precalculated sin/cos for performance reasons
// pi / 8 (4 points)
d = readDepth( vec2(var_ScreenTex.x+pw*P2x,var_ScreenTex.y+ph*P2y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Divisions are generally slower than multiplications.

This can be changed to..

ao += compareDepths(...) * aoScale;

...

aoScale *= 0.8;


d = readDepth( vec2(var_ScreenTex.x-pw*P2x,var_ScreenTex.y+ph*P2y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x+pw*P2x,var_ScreenTex.y-ph*P2y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x-pw*P2x,var_ScreenTex.y-ph*P2y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

// 3*pi / 8 (4 points)
d = readDepth( vec2(var_ScreenTex.x+pw*P3x,var_ScreenTex.y+ph*P3y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x-pw*P3x,var_ScreenTex.y+ph*P3y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x+pw*P3x,var_ScreenTex.y-ph*P3y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x-pw*P3x,var_ScreenTex.y-ph*P3y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

// pi / 4 (4 points)
d = readDepth( vec2(var_ScreenTex.x+pw/P1,var_ScreenTex.y+ph/P1), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x-pw/P1,var_ScreenTex.y+ph/P1), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x+pw/P1,var_ScreenTex.y-ph/P1), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x-pw/P1,var_ScreenTex.y-ph/P1), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

// up/down/left/right
d = readDepth( vec2(var_ScreenTex.x+pw,var_ScreenTex.y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x-pw,var_ScreenTex.y), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x,var_ScreenTex.y-ph), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

d = readDepth( vec2(var_ScreenTex.x,var_ScreenTex.y+ph), u_ViewInfo.x, u_ViewInfo.y);
ao += compareDepths(depth,d,u_ViewInfo.x,u_ViewInfo.y) / aoScale;

pw *= 2.0;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By multiplying by 2 you're skipping depth samples in later loops... is that intentional? i.e. you read left/right/up/down going from ±1, ±2, ±4, ±8...

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you multiplying pw and ph by 2?

ph *= 2.0;
aoScale *= 1.2;
}

result *= 0.33333;

return result;
}

void main()
{
float result = ambientOcclusion(u_ScreenDepthMap, var_ScreenTex, u_ViewInfo.x, u_ViewInfo.y);

out_Color = vec4(vec3(result), 1.0);
ao /= 32.0;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing this should be divided by the number of samples you've taken (which is 64 at the moment)


float done = (1.0 - ao) * u_SSAOSettings.w;
out_Color = vec4(done, done, done, 0.0);
}
15 changes: 14 additions & 1 deletion codemp/rd-rend2/tr_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2001,7 +2001,7 @@ static void RB_RenderSSAO()
{
const float zmax = backEnd.viewParms.zFar;
const float zmin = r_znear->value;
const vec4_t viewInfo = { zmax / zmin, zmax, 0.0f, 0.0f };
const vec4_t viewInfo = { zmin, zmax, 0.0f, 0.0f };


FBO_Bind(tr.quarterFbo[0]);
Expand Down Expand Up @@ -2029,6 +2029,19 @@ static void RB_RenderSSAO()
GL_BindToTMU(tr.hdrDepthImage, TB_COLORMAP);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two texture samplers in the SSAO shader, but this is the only texture that gets bound. There should be another bind for the second texture...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought it was already bound. Where can I find TB_LIGHTMAP?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which texture are you expecting to read from?

GLSL_SetUniformVec4(&tr.ssaoShader, UNIFORM_VIEWINFO, viewInfo);

const vec2_t screenInfo = { (float)glConfig.vidWidth, (float)glConfig.vidHeight };
GLSL_SetUniformVec2(&tr.ssaoShader, UNIFORM_SCREENINFO, screenInfo);

const vec4_t ssaoSettings = { std::fabs(r_ssao_aocap->value),
0 /* unused */,
std::fabs(r_ssao_aoMultiplier->value),
std::fabs(r_ssao_lightmap->value) };
GLSL_SetUniformVec4(&tr.ssaoShader, UNIFORM_SSAOSETTINGS, ssaoSettings);
const vec4_t ssaoSettings2 = { 0 /* unused */,
std::fabs(r_ssao_aorange->value),
std::fabs(r_ssao_depthTolerance->value), 0 };
GLSL_SetUniformVec4(&tr.ssaoShader, UNIFORM_SSAOSETTINGS2, ssaoSettings2);

RB_InstantQuad2(quadVerts, texCoords);

FBO_Bind(tr.quarterFbo[1]);
Expand Down
4 changes: 4 additions & 0 deletions codemp/rd-rend2/tr_glsl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ static uniformInfo_t uniformsInfo[] =
{ "u_SpecularScale", GLSL_VEC4, 1 },

{ "u_ViewInfo", GLSL_VEC4, 1 },
{ "u_ScreenInfo", GLSL_VEC2, 1 },
{ "u_SSAOSettings", GLSL_VEC4, 1 },
{ "u_SSAOSettings2", GLSL_VEC4, 1 },
{ "u_ViewOrigin", GLSL_VEC3, 1 },
{ "u_LocalViewOrigin", GLSL_VEC3, 1 },
{ "u_ViewForward", GLSL_VEC3, 1 },
Expand Down Expand Up @@ -1884,6 +1887,7 @@ static int GLSL_LoadGPUProgramSSAO(

qglUseProgram(tr.ssaoShader.program);
GLSL_SetUniformInt(&tr.ssaoShader, UNIFORM_SCREENDEPTHMAP, TB_COLORMAP);
GLSL_SetUniformInt(&tr.ssaoShader, UNIFORM_SCREENIMAGEMAP, TB_LIGHTMAP);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need this anymore

qglUseProgram(0);

GLSL_FinishGPUShader(&tr.ssaoShader);
Expand Down
13 changes: 13 additions & 0 deletions codemp/rd-rend2/tr_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ cvar_t *r_srgb;
cvar_t *r_depthPrepass;
cvar_t *r_ssao;

cvar_t *r_ssao_aocap;
cvar_t *r_ssao_aoMultiplier;
cvar_t *r_ssao_lightmap;
cvar_t *r_ssao_aorange;
cvar_t *r_ssao_depthTolerance;

cvar_t *r_normalMapping;
cvar_t *r_specularMapping;
cvar_t *r_deluxeMapping;
Expand Down Expand Up @@ -1479,6 +1485,13 @@ void R_Register( void )
r_depthPrepass = ri.Cvar_Get( "r_depthPrepass", "1", CVAR_ARCHIVE, "" );
r_ssao = ri.Cvar_Get( "r_ssao", "0", CVAR_LATCH | CVAR_ARCHIVE, "" );

r_ssao_aocap = ri.Cvar_Get( "r_ssao_aocap", "1.0", CVAR_ARCHIVE, "" );
r_ssao_aoMultiplier = ri.Cvar_Get( "r_ssao_aoMultiplier", "20000.0", CVAR_ARCHIVE, "" );
char val[32]; sprintf(val, "%f", std::sqrt(10));
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why sqrt(10)???

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have you used sqrt(10)? What's so special about that number?

r_ssao_lightmap = ri.Cvar_Get( "r_ssao_lightmap", val, CVAR_ARCHIVE, "" );
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this for?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this cvar do? It looks like you're multiplying the result of the shader by this value.. but you already have a multiplier cvar (aoMultiplier)

r_ssao_aorange = ri.Cvar_Get( "r_ssao_aorange", "128.0", CVAR_ARCHIVE, "" );
r_ssao_depthTolerance = ri.Cvar_Get( "r_ssao_depthTolerance", "0.0", CVAR_ARCHIVE, "" );

r_normalMapping = ri.Cvar_Get( "r_normalMapping", "1", CVAR_ARCHIVE | CVAR_LATCH, "" );
r_specularMapping = ri.Cvar_Get( "r_specularMapping", "1", CVAR_ARCHIVE | CVAR_LATCH, "" );
r_deluxeMapping = ri.Cvar_Get( "r_deluxeMapping", "1", CVAR_ARCHIVE | CVAR_LATCH, "" );
Expand Down
10 changes: 9 additions & 1 deletion codemp/rd-rend2/tr_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,10 @@ typedef enum
UNIFORM_NORMALSCALE,
UNIFORM_SPECULARSCALE,

UNIFORM_VIEWINFO, // znear, zfar, width/2, height/2
UNIFORM_VIEWINFO,
UNIFORM_SCREENINFO,
UNIFORM_SSAOSETTINGS,
UNIFORM_SSAOSETTINGS2,
UNIFORM_VIEWORIGIN,
UNIFORM_LOCALVIEWORIGIN,
UNIFORM_VIEWFORWARD,
Expand Down Expand Up @@ -2506,6 +2509,11 @@ extern cvar_t *r_srgb;

extern cvar_t *r_depthPrepass;
extern cvar_t *r_ssao;
extern cvar_t *r_ssao_aocap;
extern cvar_t *r_ssao_aoMultiplier;
extern cvar_t *r_ssao_lightmap;
extern cvar_t *r_ssao_aorange;
extern cvar_t *r_ssao_depthTolerance;

extern cvar_t *r_normalMapping;
extern cvar_t *r_specularMapping;
Expand Down