@@ -406,9 +406,11 @@ renderCUDA(
406
406
const float2 * __restrict__ points_xy_image,
407
407
const float4 * __restrict__ conic_opacity,
408
408
const float * __restrict__ colors,
409
+ const float * __restrict__ depths,
409
410
const float * __restrict__ final_Ts,
410
411
const uint32_t * __restrict__ n_contrib,
411
412
const float * __restrict__ dL_dpixels,
413
+ const float * __restrict__ dL_depths,
412
414
float3 * __restrict__ dL_dmean2D,
413
415
float4 * __restrict__ dL_dconic2D,
414
416
float * __restrict__ dL_dopacity,
@@ -435,6 +437,7 @@ renderCUDA(
435
437
__shared__ float2 collected_xy[BLOCK_SIZE];
436
438
__shared__ float4 collected_conic_opacity[BLOCK_SIZE];
437
439
__shared__ float collected_colors[C * BLOCK_SIZE];
440
+ __shared__ float collected_depths[BLOCK_SIZE];
438
441
439
442
// In the forward, we stored the final value for T, the
440
443
// product of all (1 - alpha) factors.
@@ -448,12 +451,16 @@ renderCUDA(
448
451
449
452
float accum_rec[C] = { 0 };
450
453
float dL_dpixel[C];
454
+ float dL_depth;
455
+ float accum_depth_rec = 0 ;
451
456
if (inside)
452
457
for (int i = 0 ; i < C; i++)
453
458
dL_dpixel[i] = dL_dpixels[i * H * W + pix_id];
459
+ dL_depth = dL_depths[pix_id];
454
460
455
461
float last_alpha = 0 ;
456
462
float last_color[C] = { 0 };
463
+ float last_depth = 0 ;
457
464
458
465
// Gradient of pixel coordinate w.r.t. normalized
459
466
// screen-space viewport corrdinates (-1 to 1)
@@ -475,6 +482,7 @@ renderCUDA(
475
482
collected_conic_opacity[block.thread_rank ()] = conic_opacity[coll_id];
476
483
for (int i = 0 ; i < C; i++)
477
484
collected_colors[i * BLOCK_SIZE + block.thread_rank ()] = colors[coll_id * C + i];
485
+ collected_depths[block.thread_rank ()] = depths[coll_id];
478
486
}
479
487
block.sync ();
480
488
@@ -522,6 +530,17 @@ renderCUDA(
522
530
// many that were affected by this Gaussian.
523
531
atomicAdd (&(dL_dcolors[global_id * C + ch]), dchannel_dcolor * dL_dchannel);
524
532
}
533
+
534
+ // Propagate gradients to per-Gaussian depths
535
+ const float c_d = collected_depths[j];
536
+ accum_depth_rec = last_alpha * last_depth + (1 .f - last_alpha) * accum_depth_rec;
537
+ last_depth = c_d;
538
+ dL_dalpha += (c_d - accum_depth_rec) * dL_depth;
539
+ // for (int ch = 0; ch < C; ch++)
540
+ // {
541
+ // atomicAdd(&(dL_dcolors[global_id * C + ch]), dchannel_dcolor * dL_depth);
542
+ // }
543
+
525
544
dL_dalpha *= T;
526
545
// Update last alpha (to be used in the next iteration)
527
546
last_alpha = alpha;
@@ -630,9 +649,11 @@ void BACKWARD::render(
630
649
const float2 * means2D,
631
650
const float4 * conic_opacity,
632
651
const float * colors,
652
+ const float * depths,
633
653
const float * final_Ts,
634
654
const uint32_t * n_contrib,
635
655
const float * dL_dpixels,
656
+ const float * dL_depths,
636
657
float3 * dL_dmean2D,
637
658
float4 * dL_dconic2D,
638
659
float * dL_dopacity,
@@ -646,9 +667,11 @@ void BACKWARD::render(
646
667
means2D,
647
668
conic_opacity,
648
669
colors,
670
+ depths,
649
671
final_Ts,
650
672
n_contrib,
651
673
dL_dpixels,
674
+ dL_depths,
652
675
dL_dmean2D,
653
676
dL_dconic2D,
654
677
dL_dopacity,
0 commit comments