Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crates/bevy_solari/src/realtime/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@ impl ViewNode for SolariLightingNode {
let bind_group_resolve_dlss_rr_textures = view_dlss_rr_textures.map(|d| {
render_context.render_device().create_bind_group(
"solari_lighting_bind_group_resolve_dlss_rr_textures",
&self.bind_group_layout_resolve_dlss_rr_textures,
&pipeline_cache
.get_bind_group_layout(&self.bind_group_layout_resolve_dlss_rr_textures),
&BindGroupEntries::sequential((
&d.diffuse_albedo.default_view,
&d.specular_albedo.default_view,
Expand Down
29 changes: 18 additions & 11 deletions crates/bevy_solari/src/realtime/restir_di.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
struct PushConstants { frame_index: u32, reset: u32 }
var<push_constant> constants: PushConstants;

const INITIAL_SAMPLES = 32u;
const INITIAL_SAMPLES = 8u;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the perf difference?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure, couldn't get bistro setup properly to test :(

const SPATIAL_REUSE_RADIUS_PIXELS = 30.0;
const CONFIDENCE_WEIGHT_CAP = 20.0;

Expand Down Expand Up @@ -73,7 +73,12 @@ fn spatial_and_shade(@builtin(global_invocation_id) global_id: vec3<u32>) {
let input_reservoir = load_reservoir_b(global_id.xy);
let spatial_reservoir = load_spatial_reservoir(global_id.xy, depth, surface.world_position, surface.world_normal, &rng);
let merge_result = merge_reservoirs(input_reservoir, spatial_reservoir, surface.world_position, surface.world_normal, diffuse_brdf, &rng);
let combined_reservoir = merge_result.merged_reservoir;
var combined_reservoir = merge_result.merged_reservoir;

if reservoir_valid(combined_reservoir) {
let resolved_light_sample = resolve_light_sample(combined_reservoir.sample, light_sources[combined_reservoir.sample.light_id >> 16u]);
combined_reservoir.unbiased_contribution_weight *= trace_light_visibility(surface.world_position, resolved_light_sample.world_position);
}

store_reservoir_a(global_id.xy, combined_reservoir);

Expand Down Expand Up @@ -133,7 +138,7 @@ fn generate_initial_reservoir(world_position: vec3<f32>, world_normal: vec3<f32>
fn load_temporal_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3<f32>, world_normal: vec3<f32>) -> Reservoir {
let motion_vector = textureLoad(motion_vectors, pixel_id, 0).xy;
let temporal_pixel_id_float = round(vec2<f32>(pixel_id) - (motion_vector * view.main_pass_viewport.zw));
let temporal_pixel_id = vec2<u32>(temporal_pixel_id_float);
let temporal_pixel_id = permute_pixel(vec2<u32>(temporal_pixel_id_float));

// Check if the current pixel was off screen during the previous frame (current pixel is newly visible),
// or if all temporal history should assumed to be invalid
Expand Down Expand Up @@ -164,6 +169,15 @@ fn load_temporal_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3
return temporal_reservoir;
}

fn permute_pixel(pixel_id: vec2<u32>) -> vec2<u32> {
let r = constants.frame_index;
let offset = vec2(r & 3u, (r >> 2u) & 3u);
var shifted_pixel_id = pixel_id + offset;
shifted_pixel_id ^= vec2(3u);
shifted_pixel_id -= offset;
return min(shifted_pixel_id, vec2<u32>(view.main_pass_viewport.zw - 1.0));
}

fn load_spatial_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3<f32>, world_normal: vec3<f32>, rng: ptr<function, u32>) -> Reservoir {
let spatial_pixel_id = get_neighbor_pixel_id(pixel_id, rng);

Expand All @@ -173,14 +187,7 @@ fn load_spatial_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3<
return empty_reservoir();
}

var spatial_reservoir = load_reservoir_b(spatial_pixel_id);

if reservoir_valid(spatial_reservoir) {
let resolved_light_sample = resolve_light_sample(spatial_reservoir.sample, light_sources[spatial_reservoir.sample.light_id >> 16u]);
spatial_reservoir.unbiased_contribution_weight *= trace_light_visibility(world_position, resolved_light_sample.world_position);
}

return spatial_reservoir;
return load_reservoir_b(spatial_pixel_id);
}

fn get_neighbor_pixel_id(center_pixel_id: vec2<u32>, rng: ptr<function, u32>) -> vec2<u32> {
Expand Down
42 changes: 18 additions & 24 deletions crates/bevy_solari/src/realtime/restir_gi.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ fn spatial_and_shade(@builtin(global_invocation_id) global_id: vec3<u32>) {
let spatial = load_spatial_reservoir(global_id.xy, depth, surface.world_position, surface.world_normal, &rng);
let merge_result = merge_reservoirs(input_reservoir, surface.world_position, surface.world_normal, surface.material.base_color / PI,
spatial.reservoir, spatial.world_position, spatial.world_normal, spatial.diffuse_brdf, &rng);
let combined_reservoir = merge_result.merged_reservoir;
var combined_reservoir = merge_result.merged_reservoir;

combined_reservoir.radiance *= trace_point_visibility(surface.world_position, combined_reservoir.sample_point_world_position);

gi_reservoirs_a[pixel_index] = combined_reservoir;

Expand Down Expand Up @@ -120,38 +122,32 @@ fn generate_initial_reservoir(world_position: vec3<f32>, world_normal: vec3<f32>
fn load_temporal_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3<f32>, world_normal: vec3<f32>) -> NeighborInfo {
let motion_vector = textureLoad(motion_vectors, pixel_id, 0).xy;
let temporal_pixel_id_float = round(vec2<f32>(pixel_id) - (motion_vector * view.main_pass_viewport.zw));
let temporal_pixel_id = permute_pixel(vec2<u32>(temporal_pixel_id_float));

// Check if the current pixel was off screen during the previous frame (current pixel is newly visible),
// or if all temporal history should assumed to be invalid
if any(temporal_pixel_id_float < vec2(0.0)) || any(temporal_pixel_id_float >= view.main_pass_viewport.zw) || bool(constants.reset) {
return NeighborInfo(empty_reservoir(), vec3(0.0), vec3(0.0), vec3(0.0));
}

let temporal_pixel_id_base = vec2<u32>(round(temporal_pixel_id_float));
for (var i = 0u; i < 4u; i++) {
let temporal_pixel_id = permute_pixel(temporal_pixel_id_base, i);

// Check if the pixel features have changed heavily between the current and previous frame
let temporal_depth = textureLoad(previous_depth_buffer, temporal_pixel_id, 0);
let temporal_surface = gpixel_resolve(textureLoad(previous_gbuffer, temporal_pixel_id, 0), temporal_depth, temporal_pixel_id, view.main_pass_viewport.zw, previous_view.world_from_clip);
let temporal_diffuse_brdf = temporal_surface.material.base_color / PI;
if pixel_dissimilar(depth, world_position, temporal_surface.world_position, world_normal, temporal_surface.world_normal, view) {
continue;
}

let temporal_pixel_index = temporal_pixel_id.x + temporal_pixel_id.y * u32(view.main_pass_viewport.z);
var temporal_reservoir = gi_reservoirs_a[temporal_pixel_index];
// Check if the pixel features have changed heavily between the current and previous frame
let temporal_depth = textureLoad(previous_depth_buffer, temporal_pixel_id, 0);
let temporal_surface = gpixel_resolve(textureLoad(previous_gbuffer, temporal_pixel_id, 0), temporal_depth, temporal_pixel_id, view.main_pass_viewport.zw, previous_view.world_from_clip);
let temporal_diffuse_brdf = temporal_surface.material.base_color / PI;
if pixel_dissimilar(depth, world_position, temporal_surface.world_position, world_normal, temporal_surface.world_normal, view) {
return NeighborInfo(empty_reservoir(), vec3(0.0), vec3(0.0), vec3(0.0));
}

temporal_reservoir.confidence_weight = min(temporal_reservoir.confidence_weight, CONFIDENCE_WEIGHT_CAP);
let temporal_pixel_index = temporal_pixel_id.x + temporal_pixel_id.y * u32(view.main_pass_viewport.z);
var temporal_reservoir = gi_reservoirs_a[temporal_pixel_index];

return NeighborInfo(temporal_reservoir, temporal_surface.world_position, temporal_surface.world_normal, temporal_diffuse_brdf);
}
temporal_reservoir.confidence_weight = min(temporal_reservoir.confidence_weight, CONFIDENCE_WEIGHT_CAP);

return NeighborInfo(empty_reservoir(), vec3(0.0), vec3(0.0), vec3(0.0));
return NeighborInfo(temporal_reservoir, temporal_surface.world_position, temporal_surface.world_normal, temporal_diffuse_brdf);
}

fn permute_pixel(pixel_id: vec2<u32>, i: u32) -> vec2<u32> {
let r = constants.frame_index + i;
fn permute_pixel(pixel_id: vec2<u32>) -> vec2<u32> {
let r = constants.frame_index;
let offset = vec2(r & 3u, (r >> 2u) & 3u);
var shifted_pixel_id = pixel_id + offset;
shifted_pixel_id ^= vec2(3u);
Expand All @@ -170,9 +166,7 @@ fn load_spatial_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3<
}

let spatial_pixel_index = spatial_pixel_id.x + spatial_pixel_id.y * u32(view.main_pass_viewport.z);
var spatial_reservoir = gi_reservoirs_b[spatial_pixel_index];

spatial_reservoir.radiance *= trace_point_visibility(world_position, spatial_reservoir.sample_point_world_position);
let spatial_reservoir = gi_reservoirs_b[spatial_pixel_index];

return NeighborInfo(spatial_reservoir, spatial_surface.world_position, spatial_surface.world_normal, spatial_diffuse_brdf);
}
Expand Down
16 changes: 9 additions & 7 deletions crates/bevy_solari/src/realtime/specular_gi.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fn specular_gi(@builtin(global_invocation_id) global_id: vec3<u32>) {

var radiance: vec3<f32>;
var wi: vec3<f32>;
if surface.material.roughness > 0.04 {
if surface.material.roughness > 0.1 {
Copy link
Contributor

@SparkyPotato SparkyPotato Oct 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be worth subgroupAny-ing this (and flipping the condition)? One branch is a lot more expensive than the other.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe? Would have to test. I feel like for latency reasons, you'd want to trace the minimum amount of rays possible, even if some threads end up idle.

// Surface is very rough, reuse the ReSTIR GI reservoir
let gi_reservoir = gi_reservoirs_a[pixel_index];
wi = normalize(gi_reservoir.sample_point_world_position - surface.world_position);
Expand Down Expand Up @@ -66,18 +66,20 @@ fn trace_glossy_path(initial_ray_origin: vec3<f32>, initial_wi: vec3<f32>, rng:
var wi = initial_wi;

// Trace up to three bounces, getting the net throughput from them
var radiance = vec3(0.0);
var throughput = vec3(1.0);
for (var i = 0u; i < 3u; i += 1u) {
// Trace ray
let ray = trace_ray(ray_origin, wi, RAY_T_MIN, RAY_T_MAX, RAY_FLAG_NONE);
if ray.kind == RAY_QUERY_INTERSECTION_NONE { break; }
let ray_hit = resolve_ray_hit_full(ray);

// Add world cache contribution
let diffuse_brdf = ray_hit.material.base_color / PI;
radiance += throughput * diffuse_brdf * query_world_cache(ray_hit.world_position, ray_hit.geometric_world_normal, view.world_position);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this is only sampling the glossy path, I'm not sure if it's worth treating the surface as diffuse and lighting it with the world cache, especially since the moment we hit a rough surface we would do the same (in the previous code).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm idk. But I think we want to sample the world cache at every bounce, and not just the last. I don't have screenshots on me atm, but comparing the screws in the PICA PICA scene before/after this PR, they look a lot closer to the PT reference.


// Surface is very rough, terminate path in the world cache
if ray_hit.material.roughness > 0.04 || i == 2u {
let diffuse_brdf = ray_hit.material.base_color / PI;
return throughput * diffuse_brdf * query_world_cache(ray_hit.world_position, ray_hit.geometric_world_normal, view.world_position);
}
if ray_hit.material.roughness > 0.04 && i != 0u { break; }

// Sample new ray direction from the GGX BRDF for next bounce
let TBN = calculate_tbn_mikktspace(ray_hit.world_normal, ray_hit.world_tangent);
Expand All @@ -93,11 +95,11 @@ fn trace_glossy_path(initial_ray_origin: vec3<f32>, initial_wi: vec3<f32>, rng:
// Update throughput for next bounce
let pdf = ggx_vndf_pdf(wo_tangent, wi_tangent, ray_hit.material.roughness);
let brdf = evaluate_brdf(N, wo, wi, ray_hit.material);
let cos_theta = dot(wi, N);
let cos_theta = saturate(dot(wi, N));
throughput *= (brdf * cos_theta) / pdf;
}

return vec3(0.0);
return radiance;
}

// Don't adjust the size of this struct without also adjusting GI_RESERVOIR_STRUCT_SIZE.
Expand Down
8 changes: 4 additions & 4 deletions crates/bevy_solari/src/realtime/world_cache_query.wgsl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#define_import_path bevy_solari::world_cache

/// How responsive the world cache is to changes in lighting (higher is less responsive, lower is more responsive)
const WORLD_CACHE_MAX_TEMPORAL_SAMPLES: f32 = 20.0;
const WORLD_CACHE_MAX_TEMPORAL_SAMPLES: f32 = 5.0;
/// Maximum amount of frames a cell can live for without being queried
const WORLD_CACHE_CELL_LIFETIME: u32 = 30u;
const WORLD_CACHE_CELL_LIFETIME: u32 = 4u;
/// Maximum amount of attempts to find a cache entry after a hash collision
const WORLD_CACHE_MAX_SEARCH_STEPS: u32 = 3u;

Expand Down Expand Up @@ -57,8 +57,8 @@ fn query_world_cache(world_position: vec3<f32>, world_normal: vec3<f32>, view_po
world_cache_geometry_data[key].world_normal = world_normal;
return vec3(0.0);
} else {
// Collision - jump to another entry
key = wrap_key(pcg_hash(key));
// Collision - linear probe to next entry
key += 1u;
}
}

Expand Down