Add new Utils: pack & unpack data; cdf sampling; query grid (#57)

* new utils * proper test for pack * add test por cdf and query occ * add deprecated warning * bump version * fix list return to tuple
nerfstudio-project · Oct 7, 2022 · 601c4c3 · 601c4c3
1 parent 4007564
commit 601c4c3
Show file tree

Hide file tree

Showing 22 changed files with 709 additions and 59 deletions.
diff --git a/docs/source/apis/generated/nerfacc.pack_data.rst b/docs/source/apis/generated/nerfacc.pack_data.rst
@@ -0,0 +1,6 @@
+nerfacc.pack\_data
+==================
+
+.. currentmodule:: nerfacc
+
+.. autofunction:: pack_data
diff --git a/docs/source/apis/generated/nerfacc.ray_resampling.rst b/docs/source/apis/generated/nerfacc.ray_resampling.rst
@@ -0,0 +1,6 @@
+nerfacc.ray\_resampling
+=======================
+
+.. currentmodule:: nerfacc
+
+.. autofunction:: ray_resampling
diff --git a/docs/source/apis/generated/nerfacc.unpack_data.rst b/docs/source/apis/generated/nerfacc.unpack_data.rst
@@ -0,0 +1,6 @@
+nerfacc.unpack\_data
+====================
+
+.. currentmodule:: nerfacc
+
+.. autofunction:: unpack_data
diff --git a/docs/source/apis/generated/nerfacc.unpack_info.rst b/docs/source/apis/generated/nerfacc.unpack_info.rst
@@ -0,0 +1,6 @@
+nerfacc.unpack\_info
+====================
+
+.. currentmodule:: nerfacc
+
+.. autofunction:: unpack_info
diff --git a/docs/source/apis/generated/nerfacc.unpack_to_ray_indices.rst b/docs/source/apis/generated/nerfacc.unpack_to_ray_indices.rst
diff --git a/docs/source/apis/utils.rst b/docs/source/apis/utils.rst
@@ -8,11 +8,14 @@ Utils
    :toctree: generated/
 
    ray_aabb_intersect
-   unpack_to_ray_indices
+   unpack_info
 
    accumulate_along_rays
    render_weight_from_density
    render_weight_from_alpha
    render_visibility
 
+   ray_resampling
+   pack_data
+   unpack_data
 
diff --git a/nerfacc/__init__.py b/nerfacc/__init__.py
@@ -1,11 +1,13 @@
 """
 Copyright (c) 2022 Ruilong Li, UC Berkeley.
 """
+import warnings
 
+from .cdf import ray_resampling
 from .contraction import ContractionType, contract, contract_inv
-from .grid import Grid, OccupancyGrid
+from .grid import Grid, OccupancyGrid, query_grid
 from .intersection import ray_aabb_intersect
-from .pack import unpack_to_ray_indices
+from .pack import pack_data, unpack_data, unpack_info
 from .ray_marching import ray_marching
 from .version import __version__
 from .vol_rendering import (
@@ -16,19 +18,34 @@
     rendering,
 )
 
+
+# About to be deprecated
+def unpack_to_ray_indices(*args, **kwargs):
+    warnings.warn(
+        "`unpack_to_ray_indices` will be deprecated. Please use `unpack_info` instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return unpack_info(*args, **kwargs)
+
+
 __all__ = [
+    "__version__",
     "Grid",
     "OccupancyGrid",
+    "query_grid",
     "ContractionType",
     "contract",
     "contract_inv",
     "ray_aabb_intersect",
     "ray_marching",
-    "unpack_to_ray_indices",
     "accumulate_along_rays",
     "render_visibility",
     "render_weight_from_alpha",
     "render_weight_from_density",
     "rendering",
-    "__version__",
+    "pack_data",
+    "unpack_data",
+    "unpack_info",
+    "ray_resampling",
 ]
diff --git a/nerfacc/cdf.py b/nerfacc/cdf.py
@@ -0,0 +1,46 @@
+"""
+Copyright (c) 2022 Ruilong Li, UC Berkeley.
+"""
+
+from typing import Tuple
+
+from torch import Tensor
+
+import nerfacc.cuda as _C
+
+
+def ray_resampling(
+    packed_info: Tensor,
+    t_starts: Tensor,
+    t_ends: Tensor,
+    weights: Tensor,
+    n_samples: int,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """Resample a set of rays based on the CDF of the weights.
+
+    Args:
+        packed_info (Tensor): Stores information on which samples belong to the same ray. \
+            See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).
+        t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
+            shape (n_samples, 1).
+        t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
+            shape (n_samples, 1).
+        weights: Volumetric rendering weights for those samples. Tensor with shape \
+            (n_samples,).
+        n_samples (int): Number of samples per ray to resample.
+
+    Returns:
+        Resampled packed info (n_rays, 2), t_starts (n_samples, 1), and t_ends (n_samples, 1).
+    """
+    (
+        resampled_packed_info,
+        resampled_t_starts,
+        resampled_t_ends,
+    ) = _C.ray_resampling(
+        packed_info.contiguous(),
+        t_starts.contiguous(),
+        t_ends.contiguous(),
+        weights.contiguous(),
+        n_samples,
+    )
+    return resampled_packed_info, resampled_t_starts, resampled_t_ends
diff --git a/nerfacc/cuda/__init__.py b/nerfacc/cuda/__init__.py
@@ -19,13 +19,17 @@ def call_cuda(*args, **kwargs):
 contract = _make_lazy_cuda_func("contract")
 contract_inv = _make_lazy_cuda_func("contract_inv")
 
-query_occ = _make_lazy_cuda_func("query_occ")
+grid_query = _make_lazy_cuda_func("grid_query")
 
 ray_aabb_intersect = _make_lazy_cuda_func("ray_aabb_intersect")
 ray_marching = _make_lazy_cuda_func("ray_marching")
-unpack_to_ray_indices = _make_lazy_cuda_func("unpack_to_ray_indices")
+ray_resampling = _make_lazy_cuda_func("ray_resampling")
 
 rendering_forward = _make_lazy_cuda_func("rendering_forward")
 rendering_backward = _make_lazy_cuda_func("rendering_backward")
 rendering_alphas_forward = _make_lazy_cuda_func("rendering_alphas_forward")
 rendering_alphas_backward = _make_lazy_cuda_func("rendering_alphas_backward")
+
+unpack_data = _make_lazy_cuda_func("unpack_data")
+unpack_info = _make_lazy_cuda_func("unpack_info")
+unpack_info_to_mask = _make_lazy_cuda_func("unpack_info_to_mask")
diff --git a/nerfacc/cuda/csrc/cdf.cu b/nerfacc/cuda/csrc/cdf.cu
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2022 Ruilong Li, UC Berkeley.
+ */
+
+#include "include/helpers_cuda.h"
+
+template <typename scalar_t>
+__global__ void cdf_resampling_kernel(
+    const uint32_t n_rays,
+    const int *packed_info,  // input ray & point indices.
+    const scalar_t *starts,  // input start t
+    const scalar_t *ends,    // input end t
+    const scalar_t *weights, // transmittance weights
+    const int *resample_packed_info,
+    scalar_t *resample_starts,
+    scalar_t *resample_ends)
+{
+    CUDA_GET_THREAD_ID(i, n_rays);
+
+    // locate
+    const int base = packed_info[i * 2 + 0];  // point idx start.
+    const int steps = packed_info[i * 2 + 1]; // point idx shift.
+    const int resample_base = resample_packed_info[i * 2 + 0];
+    const int resample_steps = resample_packed_info[i * 2 + 1];
+    if (steps == 0)
+        return;
+
+    starts += base;
+    ends += base;
+    weights += base;
+    resample_starts += resample_base;
+    resample_ends += resample_base;
+
+    // normalize weights **per ray**
+    scalar_t weights_sum = 0.0f;
+    for (int j = 0; j < steps; j++)
+        weights_sum += weights[j];
+    scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f);
+    scalar_t padding_step = padding / steps;
+    weights_sum += padding;
+
+    int num_bins = resample_steps + 1;
+    scalar_t cdf_step_size = (1.0f - 1.0 / num_bins) / resample_steps;
+
+    int idx = 0, j = 0;
+    scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum;
+    scalar_t cdf_u = 1.0 / (2 * num_bins);
+    while (j < num_bins)
+    {
+        if (cdf_u < cdf_next)
+        {
+            // printf("cdf_u: %f, cdf_next: %f\n", cdf_u, cdf_next);
+            // resample in this interval
+            scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev);
+            scalar_t t = (cdf_u - cdf_prev) * scaling + starts[idx];
+            if (j < num_bins - 1)
+                resample_starts[j] = t;
+            if (j > 0)
+                resample_ends[j - 1] = t;
+            // going further to next resample
+            cdf_u += cdf_step_size;
+            j += 1;
+        }
+        else
+        {
+            // going to next interval
+            idx += 1;
+            cdf_prev = cdf_next;
+            cdf_next += (weights[idx] + padding_step) / weights_sum;
+        }
+    }
+    if (j != num_bins)
+    {
+        printf("Error: %d %d %f\n", j, num_bins, weights_sum);
+    }
+    return;
+}
+
+// template <typename scalar_t>
+// __global__ void cdf_resampling_kernel(
+//     const uint32_t n_rays,
+//     const int *packed_info,   // input ray & point indices.
+//     const scalar_t *starts,   // input start t
+//     const scalar_t *ends,     // input end t
+//     const scalar_t *weights,  // transmittance weights
+//     const int *resample_packed_info,
+//     scalar_t *resample_starts,
+//     scalar_t *resample_ends)
+// {
+//     CUDA_GET_THREAD_ID(i, n_rays);
+
+//     // locate
+//     const int base = packed_info[i * 2 + 0];  // point idx start.
+//     const int steps = packed_info[i * 2 + 1]; // point idx shift.
+//     const int resample_base = resample_packed_info[i * 2 + 0];
+//     const int resample_steps = resample_packed_info[i * 2 + 1];
+//     if (steps == 0)
+//         return;
+
+//     starts += base;
+//     ends += base;
+//     weights += base;
+//     resample_starts += resample_base;
+//     resample_ends += resample_base;
+
+//     scalar_t cdf_step_size = 1.0f / resample_steps;
+
+//     // normalize weights **per ray**
+//     scalar_t weights_sum = 0.0f;
+//     for (int j = 0; j < steps; j++)
+//         weights_sum += weights[j];
+
+//     scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f);
+//     scalar_t padding_step = padding / steps;
+//     weights_sum += padding;
+
+//     int idx = 0, j = 0;
+//     scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum;
+//     scalar_t cdf_u = 0.5f * cdf_step_size;
+//     while (cdf_u < 1.0f)
+//     {
+//         if (cdf_u < cdf_next)
+//         {
+//             // resample in this interval
+//             scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev);
+//             scalar_t resample_mid = (cdf_u - cdf_prev) * scaling + starts[idx];
+//             scalar_t resample_half_size = cdf_step_size * scaling * 0.5;
+//             resample_starts[j] = fmaxf(resample_mid - resample_half_size, starts[idx]);
+//             resample_ends[j] = fminf(resample_mid + resample_half_size, ends[idx]);
+//             // going further to next resample
+//             cdf_u += cdf_step_size;
+//             j += 1;
+//         }
+//         else
+//         {
+//             // go to next interval
+//             idx += 1;
+//             if (idx == steps)
+//                 break;
+//             cdf_prev = cdf_next;
+//             cdf_next += (weights[idx] + padding_step) / weights_sum;
+//         }
+//     }
+//     if (j != resample_steps)
+//     {
+//         printf("Error: %d %d %f\n", j, resample_steps, weights_sum);
+//     }
+//     return;
+// }
+
+std::vector<torch::Tensor> ray_resampling(
+    torch::Tensor packed_info,
+    torch::Tensor starts,
+    torch::Tensor ends,
+    torch::Tensor weights,
+    const int steps)
+{
+    DEVICE_GUARD(packed_info);
+
+    CHECK_INPUT(packed_info);
+    CHECK_INPUT(starts);
+    CHECK_INPUT(ends);
+    CHECK_INPUT(weights);
+
+    TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
+    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
+    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
+    TORCH_CHECK(weights.ndimension() == 1);
+
+    const uint32_t n_rays = packed_info.size(0);
+    const uint32_t n_samples = weights.size(0);
+
+    const int threads = 256;
+    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
+
+    torch::Tensor num_steps = torch::split(packed_info, 1, 1)[1];
+    torch::Tensor resample_num_steps = (num_steps > 0).to(num_steps.options()) * steps;
+    torch::Tensor resample_cum_steps = resample_num_steps.cumsum(0, torch::kInt32);
+    torch::Tensor resample_packed_info = torch::cat(
+        {resample_cum_steps - resample_num_steps, resample_num_steps}, 1);
+
+    int total_steps = resample_cum_steps[resample_cum_steps.size(0) - 1].item<int>();
+    torch::Tensor resample_starts = torch::zeros({total_steps, 1}, starts.options());
+    torch::Tensor resample_ends = torch::zeros({total_steps, 1}, ends.options());
+
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+        weights.scalar_type(),
+        "ray_resampling",
+        ([&]
+         { cdf_resampling_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
+               n_rays,
+               // inputs
+               packed_info.data_ptr<int>(),
+               starts.data_ptr<scalar_t>(),
+               ends.data_ptr<scalar_t>(),
+               weights.data_ptr<scalar_t>(),
+               resample_packed_info.data_ptr<int>(),
+               // outputs
+               resample_starts.data_ptr<scalar_t>(),
+               resample_ends.data_ptr<scalar_t>()); }));
+
+    return {resample_packed_info, resample_starts, resample_ends};
+}