@@ -353,11 +353,13 @@ __global__ void preprocessCUDA(
353
353
const glm::vec3* scales,
354
354
const glm::vec4* rotations,
355
355
const float scale_modifier,
356
+ const float * view,
356
357
const float * proj,
357
358
const glm::vec3* campos,
358
359
const float3 * dL_dmean2D,
359
360
glm::vec3* dL_dmeans,
360
361
float * dL_dcolor,
362
+ float * dL_ddepth,
361
363
float * dL_dcov3D,
362
364
float * dL_dsh,
363
365
glm::vec3* dL_dscale,
@@ -386,6 +388,20 @@ __global__ void preprocessCUDA(
386
388
// of cov2D and following SH conversion also affects it.
387
389
dL_dmeans[idx] += dL_dmean;
388
390
391
+ // the w must be equal to 1 for view^T * [x,y,z,1]
392
+ float3 m_view = transformPoint4x3 (m, view);
393
+
394
+ // Compute loss gradient w.r.t. 3D means due to gradients of depth
395
+ // from rendering procedure
396
+ glm::vec3 dL_dmean2;
397
+ float mul3 = view[2 ] * m.x + view[6 ] * m.y + view[10 ] * m.z + view[14 ];
398
+ dL_dmean2.x = (view[2 ] - view[3 ] * mul3) * dL_ddepth[idx];
399
+ dL_dmean2.y = (view[6 ] - view[7 ] * mul3) * dL_ddepth[idx];
400
+ dL_dmean2.z = (view[10 ] - view[11 ] * mul3) * dL_ddepth[idx];
401
+
402
+ // That's the third part of the mean gradient.
403
+ dL_dmeans[idx] += dL_dmean2;
404
+
389
405
// Compute gradient updates due to computing colors from SHs
390
406
if (shs)
391
407
computeColorFromSH (idx, D, M, (glm::vec3*)means, *campos, shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh);
@@ -410,11 +426,12 @@ renderCUDA(
410
426
const float * __restrict__ final_Ts,
411
427
const uint32_t * __restrict__ n_contrib,
412
428
const float * __restrict__ dL_dpixels,
413
- const float * __restrict__ dL_depths ,
429
+ const float * __restrict__ dL_dpixel_depths ,
414
430
float3 * __restrict__ dL_dmean2D,
415
431
float4 * __restrict__ dL_dconic2D,
416
432
float * __restrict__ dL_dopacity,
417
- float * __restrict__ dL_dcolors)
433
+ float * __restrict__ dL_dcolors,
434
+ float * __restrict__ dL_ddepths)
418
435
{
419
436
// We rasterize again. Compute necessary block info.
420
437
auto block = cg::this_thread_block ();
@@ -451,12 +468,12 @@ renderCUDA(
451
468
452
469
float accum_rec[C] = { 0 };
453
470
float dL_dpixel[C];
454
- float dL_depth ;
471
+ float dL_dpixel_depth ;
455
472
float accum_depth_rec = 0 ;
456
473
if (inside){
457
474
for (int i = 0 ; i < C; i++)
458
475
dL_dpixel[i] = dL_dpixels[i * H * W + pix_id];
459
- dL_depth = dL_depths [pix_id];
476
+ dL_dpixel_depth = dL_dpixel_depths [pix_id];
460
477
}
461
478
462
479
float last_alpha = 0 ;
@@ -483,7 +500,7 @@ renderCUDA(
483
500
collected_conic_opacity[block.thread_rank ()] = conic_opacity[coll_id];
484
501
for (int i = 0 ; i < C; i++)
485
502
collected_colors[i * BLOCK_SIZE + block.thread_rank ()] = colors[coll_id * C + i];
486
- collected_depths[block.thread_rank ()] = depths[coll_id];
503
+ collected_depths[block.thread_rank ()] = depths[coll_id];
487
504
}
488
505
block.sync ();
489
506
@@ -511,6 +528,7 @@ renderCUDA(
511
528
512
529
T = T / (1 .f - alpha);
513
530
const float dchannel_dcolor = alpha * T;
531
+ const float dpixel_depth_ddepth = alpha * T;
514
532
515
533
// Propagate gradients to per-Gaussian colors and keep
516
534
// gradients w.r.t. alpha (blending factor for a Gaussian/pixel
@@ -534,7 +552,9 @@ renderCUDA(
534
552
const float c_d = collected_depths[j];
535
553
accum_depth_rec = last_alpha * last_depth + (1 .f - last_alpha) * accum_depth_rec;
536
554
last_depth = c_d;
537
- dL_dalpha += (c_d - accum_depth_rec) * dL_depth;
555
+ dL_dalpha += (c_d - accum_depth_rec) * dL_dpixel_depth;
556
+ atomicAdd (&(dL_ddepths[global_id]), dpixel_depth_ddepth * dL_dpixel_depth);
557
+
538
558
dL_dalpha *= T;
539
559
// Update last alpha (to be used in the next iteration)
540
560
last_alpha = alpha;
@@ -588,6 +608,7 @@ void BACKWARD::preprocess(
588
608
const float * dL_dconic,
589
609
glm::vec3* dL_dmean3D,
590
610
float * dL_dcolor,
611
+ float * dL_ddepth,
591
612
float * dL_dcov3D,
592
613
float * dL_dsh,
593
614
glm::vec3* dL_dscale,
@@ -623,11 +644,13 @@ void BACKWARD::preprocess(
623
644
(glm::vec3*)scales,
624
645
(glm::vec4*)rotations,
625
646
scale_modifier,
647
+ viewmatrix,
626
648
projmatrix,
627
649
campos,
628
650
(float3 *)dL_dmean2D,
629
651
(glm::vec3*)dL_dmean3D,
630
652
dL_dcolor,
653
+ dL_ddepth,
631
654
dL_dcov3D,
632
655
dL_dsh,
633
656
dL_dscale,
@@ -647,11 +670,12 @@ void BACKWARD::render(
647
670
const float * final_Ts,
648
671
const uint32_t * n_contrib,
649
672
const float * dL_dpixels,
650
- const float * dL_depths ,
673
+ const float * dL_dpixel_depths ,
651
674
float3 * dL_dmean2D,
652
675
float4 * dL_dconic2D,
653
676
float * dL_dopacity,
654
- float * dL_dcolors)
677
+ float * dL_dcolors,
678
+ float * dL_ddepths)
655
679
{
656
680
renderCUDA<NUM_CHANNELS> << <grid, block >> >(
657
681
ranges,
@@ -665,10 +689,11 @@ void BACKWARD::render(
665
689
final_Ts,
666
690
n_contrib,
667
691
dL_dpixels,
668
- dL_depths ,
692
+ dL_dpixel_depths ,
669
693
dL_dmean2D,
670
694
dL_dconic2D,
671
695
dL_dopacity,
672
- dL_dcolors
696
+ dL_dcolors,
697
+ dL_ddepths
673
698
);
674
699
}
0 commit comments