From b988ffb17869b6cf0b23778f990c50a2e99ef231 Mon Sep 17 00:00:00 2001 From: Qi Wu Date: Sat, 18 Apr 2026 22:34:50 -0700 Subject: [PATCH 1/5] Add submodule for base project, update .gitignore, and enhance CMake configuration - Added a new submodule for the base project. - Updated .gitignore to include additional files and directories for better exclusion. - Enhanced CMake configuration to support standalone builds and improved CUDA settings. - Modified batch trainer and neural sampler to include new voxel types. - Introduced a setup script for easier CMake configuration and building. --- .gitignore | 156 +++++++++++++++++++++++++++++++ .gitmodules | 3 + CMakeLists.txt | 156 ++++++++++++++++++++----------- apps/CMakeLists.txt | 39 ++------ apps/batch_trainer.cpp | 23 +++-- base | 1 + core/CMakeLists.txt | 54 +++-------- core/network.cu | 45 +-------- core/samplers/neural_sampler.cpp | 23 +++-- device/CMakeLists.txt | 36 ++++--- device/device_impl.cpp | 2 +- setup_cmake.sh | 66 +++++++++++++ tcnn | 2 +- 13 files changed, 409 insertions(+), 197 deletions(-) create mode 160000 base create mode 100644 setup_cmake.sh diff --git a/.gitignore b/.gitignore index 64e24f8..0da745d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,157 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Datasets +logs/ +.DS_Store + +composer/ +eval_metrics_* params.json +params.pt + +/deps/ +model*.pt +output.json +.vscode/ + + +build +build_dav +build/ +build_*/ +*.pt + +magma/ +imgui.ini + +# for sharing test ckpt file +!SIREN_shadow_test/sample_siren_1499_validation.pt +SIREN_shadow_test/*.bson diff --git a/.gitmodules b/.gitmodules index 6adfc5b..f2652c9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "tcnn"] path = tcnn url = https://github.com/wilsonCernWq/tiny-cuda-nn.git +[submodule "base"] + path = base + url = git@github.com:wilsonCernWq/open-volume-renderer.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 194e806..d6cd71c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,69 +1,119 @@ # ======================================================================== # -# Copyright 2019-2022 Qi Wu # +# Copyright 2019-2024 Qi Wu # # # -# Licensed under the Apache License, Version 2.0 (the "License"); # -# you may not use this file except in compliance with the License. # -# You may obtain a copy of the License at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# Unless required by applicable law or agreed to in writing, software # -# distributed under the License is distributed on an "AS IS" BASIS, # -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -# See the License for the specific language governing permissions and # -# limitations under the License. # +# Licensed under the Apache License, Version 2.0 # # ======================================================================== # - # -# This project is designed to be compiled with OVR +# Standalone build — OVR is an in-tree submodule, TCNN is built from source +# via FetchContent, and the result is installed as a self-contained +# `instantvnr` Python package directory. # -option(OVR_BUILD_MODULE_NNVOLUME "Build Volumetric Neural Representation Device" OFF) +if (POLICY CMP0048) + cmake_policy(SET CMP0048 NEW) +endif() + +cmake_minimum_required(VERSION 3.24) +project(instantvnr LANGUAGES C CXX CUDA) + +# OVR cmake modules (configure_build_type, configure_cxx, configure_cuda …) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/base/cmake") -if(OVR_BUILD_MODULE_NNVOLUME) +include(configure_build_type) +include(configure_cxx) +include(configure_cuda) +# configure_optix is NOT included here — it unconditionally fetches OptiX headers +# even when OVR_BUILD_DEVICE_OPTIX7=OFF. OVR handles optix internally when enabled. - message(STATUS "Enable Neural Volume Device") +############################################################################### +# GLIBCXX ABI — defaults to 1 (GCC ≥5 default); override with +# -DIVNR_GLIBCXX_USE_CXX11_ABI=0 when linking against a library built with +# the old ABI (e.g. PyTorch wheels that ship _GLIBCXX_USE_CXX11_ABI=0). +############################################################################### +option(IVNR_GLIBCXX_USE_CXX11_ABI "Use GLIBCXX CXX11 ABI" ON) +if(IVNR_GLIBCXX_USE_CXX11_ABI) + set(IVNR_GLIBCXX_CXX11_ABI _GLIBCXX_USE_CXX11_ABI=1) +else() + set(IVNR_GLIBCXX_CXX11_ABI _GLIBCXX_USE_CXX11_ABI=0) +endif() +message(STATUS "GLIBCXX_CXX11_ABI: ${IVNR_GLIBCXX_USE_CXX11_ABI}") - if(DEFINED GDT_CUDA_ARCHITECTURES) - message(STATUS "Obtained target architecture from environment variable GDT_CUDA_ARCHITECTURES=${GDT_CUDA_ARCHITECTURES}") - set(ENV{TCNN_CUDA_ARCHITECTURES} ${GDT_CUDA_ARCHITECTURES}) - endif() +############################################################################### +# Staging directory — all shared libs and executables land here so the install +# step can copy them as a single package directory (same pattern as pysampler). +############################################################################### +set(_ivnr_staging "${CMAKE_BINARY_DIR}/instantvnr") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${_ivnr_staging}") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${_ivnr_staging}") +set(CMAKE_INSTALL_RPATH "$ORIGIN") +set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) - ################ - # tiny-cuda-nn # - ################ - if(NOT TARGET tiny-cuda-nn) - set(TCNN_BUILD_BENCHMARK OFF) - set(TCNN_BUILD_EXAMPLES OFF) - add_subdirectory(tcnn EXCLUDE_FROM_ALL) - target_compile_definitions(tiny-cuda-nn PUBLIC ${TCNN_DEFINITIONS}) - target_compile_definitions(tiny-cuda-nn INTERFACE TCNN_NAMESPACE=tcnn) - target_include_directories(tiny-cuda-nn INTERFACE ${CMAKE_CURRENT_LIST_DIR}/tcnn/include) - target_include_directories(tiny-cuda-nn INTERFACE ${CMAKE_CURRENT_LIST_DIR}/tcnn/dependencies) - if(UNIX) - set_target_properties(fmt PROPERTIES POSITION_INDEPENDENT_CODE ON) - set_target_properties(tiny-cuda-nn PROPERTIES POSITION_INDEPENDENT_CODE ON) - endif() - endif() +############################################################################### +# OVR (OpenGL, GLFW, ImGui, GDT math, TBB, TFN module) +############################################################################### +set(OVR_BUILD_OPENGL ON CACHE BOOL "" FORCE) +set(OVR_BUILD_CUDA ON CACHE BOOL "" FORCE) +set(OVR_BUILD_DEVICE_OPTIX7 OFF CACHE BOOL "" FORCE) +set(OVR_BUILD_DEVICE_OSPRAY OFF CACHE BOOL "" FORCE) +set(OVR_BUILD_PYTHON_BINDINGS OFF CACHE BOOL "" FORCE) +set(OVR_BUILD_APPS OFF CACHE BOOL "" FORCE) +add_subdirectory(base) - # Two projects uses different target names ... - if((NOT TARGET util) AND (TARGET rendercommon)) - add_library(util ALIAS rendercommon) - endif() +############################################################################### +# TBB (may be brought in by OVR; guard against double-find) +############################################################################### +if(NOT TARGET TBB::tbb) + find_package(TBB CONFIG REQUIRED) +endif() - # Simulate include paths - include_directories(${CMAKE_CURRENT_LIST_DIR}) - ################ - # - ################ - set(MACROCELL_SIZE_MIP 4) - set(ENABLE_IN_SHADER ON) - set(ENABLE_OUT_OF_CORE ON) - set(ENABLE_OPENGL ON) +############################################################################### +# tiny-cuda-nn — prefer a local checkout (tcnn/ submodule or copy), fall back +# to FetchContent when the local directory is absent. +# TCNN_REPOSITORY / TCNN_COMMIT_HASH are injected from pyproject.toml via +# scikit-build-core's cmake.define table and are used only for the fallback. +############################################################################### +set(TCNN_BUILD_BENCHMARK OFF CACHE BOOL "" FORCE) +set(TCNN_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) +add_subdirectory(tcnn) +# TCNN's cmake creates the `tiny-cuda-nn` target directly. +# Newer TCNN versions dropped the TCNN_NAMESPACE macro; inject it so all +# consumers get it without modifying TCNN source. +target_compile_definitions(tiny-cuda-nn PUBLIC TCNN_NAMESPACE=tcnn) - add_subdirectory(core) - add_subdirectory(device) - add_subdirectory(apps) +############################################################################### +# OVR compatibility aliases +############################################################################### +if((NOT TARGET util) AND (TARGET rendercommon)) + add_library(util ALIAS rendercommon) +endif() +# GFX_LIBRARIES: interactive viewer targets provided by OVR's OpenGL layer. +# OVR sets this variable; provide a safe fallback for standalone builds. +if(NOT DEFINED GFX_LIBRARIES) + set(GFX_LIBRARIES glfwApp) endif() + +############################################################################### +# Global include paths (api.h, api_internal.h live at the project root) +############################################################################### +include_directories(${CMAKE_CURRENT_LIST_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +############################################################################### +# Feature flags (match references/instantvnr original defaults) +############################################################################### +set(MACROCELL_SIZE_MIP 4) +set(ENABLE_IN_SHADER ON) +set(ENABLE_OUT_OF_CORE ON) +set(ENABLE_OPENGL ${OVR_BUILD_OPENGL}) + +############################################################################### +# Subprojects +############################################################################### +add_subdirectory(core) +# device/ is the OVR plugin layer; build it as a shared library so it is +# co-installed and available for optional OVR integration. +add_subdirectory(device) +add_subdirectory(apps) diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 419dde7..61f214f 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -1,36 +1,14 @@ # ======================================================================== # -# Copyright 2019-2020 Qi Wu # -# # -# Licensed under the Apache License, Version 2.0 (the "License"); # -# you may not use this file except in compliance with the License. # -# You may obtain a copy of the License at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# Unless required by applicable law or agreed to in writing, software # -# distributed under the License is distributed on an "AS IS" BASIS, # -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -# See the License for the specific language governing permissions and # -# limitations under the License. # +# Copyright 2019-2022 Qi Wu # +# Licensed under the Apache License, Version 2.0 # # ======================================================================== # # ======================================================================== # # Copyright 2018-2019 Ingo Wald # -# # -# Licensed under the Apache License, Version 2.0 (the "License"); # -# you may not use this file except in compliance with the License. # -# You may obtain a copy of the License at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# Unless required by applicable law or agreed to in writing, software # -# distributed under the License is distributed on an "AS IS" BASIS, # -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -# See the License for the specific language governing permissions and # -# limitations under the License. # +# Licensed under the Apache License, Version 2.0 # # ======================================================================== # set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD 17) if(NOT target) set(target instantvnr) @@ -39,6 +17,7 @@ endif() # ======================================================================== # # applications # ======================================================================== # + if(ENABLE_OPENGL) add_executable(vnr_int_dual int_dual_volume.cpp) target_link_libraries(vnr_int_dual PRIVATE ${target} ${GFX_LIBRARIES}) @@ -53,7 +32,7 @@ target_link_libraries(vnr_cmd_train PRIVATE ${target}) add_executable(vnr_cmd_render batch_renderer.cpp) target_link_libraries(vnr_cmd_render PRIVATE ${target}) -if (ENABLE_IN_SHADER) +if(ENABLE_IN_SHADER) add_executable(vnr_cmd_isosurface batch_isosurface.cpp) target_link_libraries(vnr_cmd_isosurface PRIVATE ${target}) endif() @@ -61,13 +40,13 @@ endif() add_executable(view_model view_model.cpp) target_link_libraries(view_model PRIVATE ${target}) -# OVR application +# OVR plugin applications — only built when instantvnr is loaded as an OVR device if(DEFINED OVR_BUILD_MODULE_NNVOLUME) add_executable(generate_shadow_map shadowmap.cu) - target_link_libraries(generate_shadow_map PUBLIC renderlib glfwApp $) + target_link_libraries(generate_shadow_map PUBLIC renderlib glfwApp $) - if (ENABLE_IN_SHADER) + if(ENABLE_IN_SHADER) add_executable(vnr_int_isosurface int_isosurface.cu) target_link_libraries(vnr_int_isosurface PUBLIC renderlib ${target} ${GFX_LIBRARIES}) endif() diff --git a/apps/batch_trainer.cpp b/apps/batch_trainer.cpp index d75b13c..5250a59 100644 --- a/apps/batch_trainer.cpp +++ b/apps/batch_trainer.cpp @@ -69,14 +69,21 @@ struct CmdArgs : CmdArgsBase { namespace vidi { enum VoxelType { - VOXEL_UINT8 = vnr::VALUE_TYPE_UINT8, - VOXEL_INT8 = vnr::VALUE_TYPE_INT8, - VOXEL_UINT16 = vnr::VALUE_TYPE_UINT16, - VOXEL_INT16 = vnr::VALUE_TYPE_INT16, - VOXEL_UINT32 = vnr::VALUE_TYPE_UINT32, - VOXEL_INT32 = vnr::VALUE_TYPE_INT32, - VOXEL_FLOAT = vnr::VALUE_TYPE_FLOAT, - VOXEL_DOUBLE = vnr::VALUE_TYPE_DOUBLE, + VOXEL_UINT8 = vnr::VALUE_TYPE_UINT8, + VOXEL_INT8 = vnr::VALUE_TYPE_INT8, + VOXEL_UINT16 = vnr::VALUE_TYPE_UINT16, + VOXEL_INT16 = vnr::VALUE_TYPE_INT16, + VOXEL_UINT32 = vnr::VALUE_TYPE_UINT32, + VOXEL_INT32 = vnr::VALUE_TYPE_INT32, + VOXEL_FLOAT = vnr::VALUE_TYPE_FLOAT, + VOXEL_FLOAT2 = vnr::VALUE_TYPE_FLOAT2, + VOXEL_FLOAT3 = vnr::VALUE_TYPE_FLOAT3, + VOXEL_FLOAT4 = vnr::VALUE_TYPE_FLOAT4, + VOXEL_DOUBLE = vnr::VALUE_TYPE_DOUBLE, + // multi-channel double types: vnr has no counterpart; use sentinel values + VOXEL_DOUBLE2 = 501, + VOXEL_DOUBLE3 = 502, + VOXEL_DOUBLE4 = 503, }; } // namespace vidi #define VIDI_VOLUME_EXTERNAL_TYPE_ENUM diff --git a/base b/base new file mode 160000 index 0000000..d66050c --- /dev/null +++ b/base @@ -0,0 +1 @@ +Subproject commit d66050c1e2bcf7755cd70efa86b3efdcf0eaef01 diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index cd7608c..581dba8 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -1,43 +1,21 @@ # ======================================================================== # -# Copyright 2019-2020 Qi Wu # -# # -# Licensed under the Apache License, Version 2.0 (the "License"); # -# you may not use this file except in compliance with the License. # -# You may obtain a copy of the License at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# Unless required by applicable law or agreed to in writing, software # -# distributed under the License is distributed on an "AS IS" BASIS, # -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -# See the License for the specific language governing permissions and # -# limitations under the License. # +# Copyright 2019-2022 Qi Wu # +# Licensed under the Apache License, Version 2.0 # # ======================================================================== # # ======================================================================== # # Copyright 2018-2019 Ingo Wald # -# # -# Licensed under the Apache License, Version 2.0 (the "License"); # -# you may not use this file except in compliance with the License. # -# You may obtain a copy of the License at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# Unless required by applicable law or agreed to in writing, software # -# distributed under the License is distributed on an "AS IS" BASIS, # -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -# See the License for the specific language governing permissions and # -# limitations under the License. # +# Licensed under the Apache License, Version 2.0 # # ======================================================================== # set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD 17) if(NOT target) set(target instantvnr) endif() message(STATUS "target - ${target}") -include(configure_cuda) # skip optix here +include(configure_cuda) # include_guard(GLOBAL) makes this idempotent if(DEFINED MACROCELL_SIZE_MIP) add_definitions(-DMACROCELL_SIZE_MIP=${MACROCELL_SIZE_MIP}) @@ -117,8 +95,8 @@ if(ENABLE_FVSRN) target_compile_definitions(${target} PRIVATE ENABLE_FVSRN=1) endif() -set_target_properties(${target} -PROPERTIES +set_target_properties(${target} +PROPERTIES CXX_STANDARD 17 CUDA_STANDARD 17 CUDA_RESOLVE_DEVICE_SYMBOLS ON @@ -130,15 +108,14 @@ target_include_directories(${target} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE target_include_directories(${target} PUBLIC $) target_include_directories(${target} PUBLIC $) -target_compile_options(${target} PRIVATE $<$:${CUDA_NVCC_FLAGS}>) -target_compile_options(${target} PRIVATE $<$:-Xcudafe="--diag_suppress=177">) # -> variable declared but not referenced -target_compile_options(${target} PRIVATE $<$:-Xcudafe="--diag_suppress=20044">) # -> extern variable '...' treated as static +target_compile_options(${target} PRIVATE $<$:-Xcudafe="--diag_suppress=177">) # variable declared but not referenced +target_compile_options(${target} PRIVATE $<$:-Xcudafe="--diag_suppress=20044">) # extern variable treated as static -target_link_libraries(${target} PRIVATE TBB::tbb curand) +target_link_libraries(${target} PRIVATE TBB::tbb curand tiny-cuda-nn) +target_link_libraries(${target} PUBLIC gdt util tfnmodule) -target_link_libraries(${target} PUBLIC # intentionally keep these library public - gdt util tfnmodule tiny-cuda-nn -) +# GLIBCXX ABI compile definition — must match the installed PyTorch +target_compile_definitions(${target} PUBLIC ${IVNR_GLIBCXX_CXX11_ABI}) if(TARGET openvkl::openvkl AND TARGET openvkl::testing) target_compile_definitions(${target} PRIVATE ENABLE_OPENVKL=1) @@ -148,8 +125,3 @@ endif() if(ENABLE_LOGGING) target_compile_definitions(${target} PRIVATE ENABLE_LOGGING=1) endif() - -# target_include_directories(vnr_core PUBLIC $<$:${INTEL_AIO_INC}>) -# target_link_libraries (vnr_core PUBLIC $<$:${INTEL_AIO_LIB}>) -# target_compile_definitions(vnr_core PUBLIC AIO_INTEL) -# target_compile_definitions(vnr_core PRIVATE TCNN_NAMESPACE=tcnn) diff --git a/core/network.cu b/core/network.cu index c6f7eda..64077e8 100644 --- a/core/network.cu +++ b/core/network.cu @@ -41,49 +41,8 @@ #include #include -// make a private version of thrust::plus to avoid template instantiation conflicts ... -namespace { - -template -struct maximum_op { - typedef T first_argument_type; - typedef T second_argument_type; - typedef T result_type; - __host__ __device__ constexpr T operator()(const T& lhs, const T& rhs) const { return lhs < rhs ? rhs : lhs; } -}; // end maximum - -template -struct minimum_op { - typedef T first_argument_type; - typedef T second_argument_type; - typedef T result_type; - __host__ __device__ constexpr T operator()(const T& lhs, const T& rhs) const { return lhs < rhs ? lhs : rhs; } -}; // end minimum - -template -struct plus { - typedef T first_argument_type; - typedef T second_argument_type; - typedef T result_type; - __host__ __device__ constexpr T operator()(const T &lhs, const T &rhs) const { return lhs + rhs; } -}; // end plus - -template -T parallel_sum_gpu(const T* __restrict__ data, size_t count, cudaStream_t stream = nullptr) { - const auto begin = thrust::device_ptr(data); - const auto end = begin + count; - return thrust::reduce(thrust::cuda::par.on(stream), begin, end, T(0), plus()); -} - -template -void parallel_minmax_gpu(const T* __restrict__ data, size_t count, T& init_min, T& init_max, cudaStream_t stream = nullptr) { - const auto begin = thrust::device_ptr(data); - const auto end = begin + count; - init_min = thrust::reduce(thrust::cuda::par.on(stream), begin, end, init_min, minimum_op()); - init_max = thrust::reduce(thrust::cuda::par.on(stream), begin, end, init_max, maximum_op()); -} - -} +// These helpers (maximum_op, minimum_op, plus, parallel_sum_gpu, parallel_minmax_gpu) +// are now provided by evaluation_kernel.h (ovr/ovr/common/). namespace vnr { diff --git a/core/samplers/neural_sampler.cpp b/core/samplers/neural_sampler.cpp index 2540394..1db249e 100644 --- a/core/samplers/neural_sampler.cpp +++ b/core/samplers/neural_sampler.cpp @@ -2,14 +2,21 @@ namespace vidi { enum VoxelType { - VOXEL_UINT8 = vnr::VALUE_TYPE_UINT8, - VOXEL_INT8 = vnr::VALUE_TYPE_INT8, - VOXEL_UINT16 = vnr::VALUE_TYPE_UINT16, - VOXEL_INT16 = vnr::VALUE_TYPE_INT16, - VOXEL_UINT32 = vnr::VALUE_TYPE_UINT32, - VOXEL_INT32 = vnr::VALUE_TYPE_INT32, - VOXEL_FLOAT = vnr::VALUE_TYPE_FLOAT, - VOXEL_DOUBLE = vnr::VALUE_TYPE_DOUBLE, + VOXEL_UINT8 = vnr::VALUE_TYPE_UINT8, + VOXEL_INT8 = vnr::VALUE_TYPE_INT8, + VOXEL_UINT16 = vnr::VALUE_TYPE_UINT16, + VOXEL_INT16 = vnr::VALUE_TYPE_INT16, + VOXEL_UINT32 = vnr::VALUE_TYPE_UINT32, + VOXEL_INT32 = vnr::VALUE_TYPE_INT32, + VOXEL_FLOAT = vnr::VALUE_TYPE_FLOAT, + VOXEL_FLOAT2 = vnr::VALUE_TYPE_FLOAT2, + VOXEL_FLOAT3 = vnr::VALUE_TYPE_FLOAT3, + VOXEL_FLOAT4 = vnr::VALUE_TYPE_FLOAT4, + VOXEL_DOUBLE = vnr::VALUE_TYPE_DOUBLE, + // multi-channel double types: vnr has no counterpart; use sentinel values + VOXEL_DOUBLE2 = 501, + VOXEL_DOUBLE3 = 502, + VOXEL_DOUBLE4 = 503, }; } // namespace vidi #define VIDI_VOLUME_EXTERNAL_TYPE_ENUM diff --git a/device/CMakeLists.txt b/device/CMakeLists.txt index 683a5c7..f527170 100644 --- a/device/CMakeLists.txt +++ b/device/CMakeLists.txt @@ -1,10 +1,13 @@ ################ -# +# device_nnvolume_array — OBJECT library used by the shadow-map app and as a +# build unit for the device plugin. Standalone builds still compile it so the +# full feature set is preserved; it is only *linked* when OVR_BUILD_MODULE_NNVOLUME +# is defined (i.e., when building as part of the OVR plugin framework). ################ -add_library(device_nnvolume_array OBJECT +add_library(device_nnvolume_array OBJECT device_nnvolume_array.cpp ) -set_target_properties(device_nnvolume_array +set_target_properties(device_nnvolume_array PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON CUDA_SEPARABLE_COMPILATION ON @@ -13,24 +16,33 @@ PROPERTIES target_link_libraries(device_nnvolume_array PRIVATE rendercommon TBB::tbb) ################ -# +# device_nnvolume — standalone build uses SHARED so the library is installed +# alongside other .so files. In OVR plugin mode the original MODULE type is +# appropriate; use the INSTANTVNR_STANDALONE cache variable to switch. ################ -add_library(device_nnvolume MODULE +option(INSTANTVNR_STANDALONE "Build device_nnvolume as SHARED (pip package) instead of MODULE (OVR plugin)" ON) + +if(INSTANTVNR_STANDALONE) + set(_dev_type SHARED) +else() + set(_dev_type MODULE) +endif() + +add_library(device_nnvolume ${_dev_type} $ device.cpp device_impl.cpp - # method_shadowmap.cu + # method_shadowmap.cu # uncomment to include shadow-map GPU code ) -set_target_properties(device_nnvolume +set_target_properties(device_nnvolume PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON CUDA_SEPARABLE_COMPILATION ON ) -################ -# -################ target_link_libraries (device_nnvolume PRIVATE rendercommon TBB::tbb instantvnr) target_compile_options (device_nnvolume PRIVATE $<$:${CUDA_NVCC_FLAGS}>) -target_compile_definitions(device_nnvolume PRIVATE OVR_BUILD_MODULE_NNVOLUME=1) -target_compile_definitions(device_nnvolume PRIVATE MACROCELL_SIZE_MIP=${MACROCELL_SIZE_MIP}) +target_compile_definitions(device_nnvolume PRIVATE + ${IVNR_GLIBCXX_CXX11_ABI} + MACROCELL_SIZE_MIP=${MACROCELL_SIZE_MIP} +) diff --git a/device/device_impl.cpp b/device/device_impl.cpp index 83160c7..cde89a6 100644 --- a/device/device_impl.cpp +++ b/device/device_impl.cpp @@ -163,7 +163,7 @@ DeviceNNVolume::Impl::commit() /* commit other data */ if (parent->params.camera.update()) { const auto& camera = parent->params.camera.ref(); - camera_latest = vnr::Camera{ camera.from, camera.at, camera.up }; + camera_latest = vnr::Camera{ camera.eye, camera.at, camera.up }; framebuffer_reset = true; } diff --git a/setup_cmake.sh b/setup_cmake.sh new file mode 100644 index 0000000..2298129 --- /dev/null +++ b/setup_cmake.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Configure and build the C++ instantvnr libraries and executables directly +# via CMake, without going through Python packaging. +# +# Usage: +# ./setup_cmake.sh # auto-detect everything +# SM=86 ./setup_cmake.sh # override GPU arch +# BUILD_DIR=build ./setup_cmake.sh # custom build directory +# ./setup_cmake.sh --configure # configure only (skip build) +# ./setup_cmake.sh --build # build only (skip configure) +# +# Requires: +# - CUDA toolkit (nvcc in PATH or /usr/local/cuda) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BUILD_DIR="${BUILD_DIR:-${SCRIPT_DIR}/build}" + +DO_CONFIGURE=true +DO_BUILD=true +for arg in "$@"; do + case "$arg" in + --configure) DO_BUILD=false ;; + --build) DO_CONFIGURE=false ;; + esac +done + +# ── detect GPU SM ───────────────────────────────────────────────────────────── +if [[ -n "${SM:-}" ]]; then + echo "[info] Using SM=$SM from environment" +elif command -v nvidia-smi &>/dev/null; then + SM=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -1 | tr -d '.') + echo "[info] Detected GPU sm_$SM" +else + echo "[warn] nvidia-smi not found — defaulting to native arch detection" + SM="native" +fi + +# ── detect CUDA toolkit ─────────────────────────────────────────────────────── +if command -v nvcc &>/dev/null; then CUDA_HOME="$(realpath "$(dirname "$(command -v nvcc)")/..")" +elif [[ -x /usr/local/cuda/bin/nvcc ]]; then CUDA_HOME="/usr/local/cuda" +else + echo "[error] nvcc not found — CUDA toolkit is required" >&2 + exit 1 +fi +echo "[info] CUDA_HOME: $CUDA_HOME" + +export PATH="$CUDA_HOME/bin${PATH:+:$PATH}" +export LD_LIBRARY_PATH="$CUDA_HOME/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" + +# ── configure ───────────────────────────────────────────────────────────────── +if [[ "$DO_CONFIGURE" == true ]]; then + echo "[info] Configuring in $BUILD_DIR (SM=$SM)" + cmake -S "$SCRIPT_DIR" -B "$BUILD_DIR" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_ARCHITECTURES="$SM" +fi + +# ── build ───────────────────────────────────────────────────────────────────── +if [[ "$DO_BUILD" == true ]]; then + JOBS="${JOBS:-$(nproc)}" + echo "[info] Building with $JOBS parallel jobs" + cmake --build "$BUILD_DIR" --config Release -- -j"$JOBS" + echo "[info] Build complete. Outputs in $BUILD_DIR/instantvnr/" +fi diff --git a/tcnn b/tcnn index 5bd6b38..71c6afb 160000 --- a/tcnn +++ b/tcnn @@ -1 +1 @@ -Subproject commit 5bd6b38dead69e74f7233ec69a8f3bc043ae2a1d +Subproject commit 71c6afbc5202ccf8de6dfdf612065cf7f941cb74 From bc3d4676ee029513ffee7a1cd669f01ad75bf630 Mon Sep 17 00:00:00 2001 From: Qi Wu Date: Sat, 18 Apr 2026 22:51:33 -0700 Subject: [PATCH 2/5] Refactor CMake configuration and remove shadow map components - Updated the staging directory in CMakeLists.txt to point to a new bin directory. - Removed shadow map related executables and files from the apps directory, including shadowmap.cu and shadowmap.usda. - Adjusted the CMake configuration to exclude shadow map methods from the device library. --- CMakeLists.txt | 2 +- apps/CMakeLists.txt | 14 +- apps/int_isosurface.cu | 2 +- apps/shadowmap.cu | 686 --------------------------------- apps/shadowmap.usda | 48 --- device/CMakeLists.txt | 1 - device/method_shadowmap.cu | 759 ------------------------------------- device/method_shadowmap.h | 24 -- 8 files changed, 5 insertions(+), 1531 deletions(-) delete mode 100644 apps/shadowmap.cu delete mode 100644 apps/shadowmap.usda delete mode 100644 device/method_shadowmap.cu delete mode 100644 device/method_shadowmap.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d6cd71c..18f6efb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,7 @@ message(STATUS "GLIBCXX_CXX11_ABI: ${IVNR_GLIBCXX_USE_CXX11_ABI}") # Staging directory — all shared libs and executables land here so the install # step can copy them as a single package directory (same pattern as pysampler). ############################################################################### -set(_ivnr_staging "${CMAKE_BINARY_DIR}/instantvnr") +set(_ivnr_staging "${CMAKE_BINARY_DIR}/bin") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${_ivnr_staging}") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${_ivnr_staging}") set(CMAKE_INSTALL_RPATH "$ORIGIN") diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 61f214f..b5d1e33 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -40,15 +40,7 @@ endif() add_executable(view_model view_model.cpp) target_link_libraries(view_model PRIVATE ${target}) -# OVR plugin applications — only built when instantvnr is loaded as an OVR device -if(DEFINED OVR_BUILD_MODULE_NNVOLUME) - - add_executable(generate_shadow_map shadowmap.cu) - target_link_libraries(generate_shadow_map PUBLIC renderlib glfwApp $) - - if(ENABLE_IN_SHADER) - add_executable(vnr_int_isosurface int_isosurface.cu) - target_link_libraries(vnr_int_isosurface PUBLIC renderlib ${target} ${GFX_LIBRARIES}) - endif() - +if(ENABLE_IN_SHADER) + add_executable(vnr_int_isosurface int_isosurface.cu) + target_link_libraries(vnr_int_isosurface PUBLIC renderlib ${target} ${GFX_LIBRARIES}) endif() diff --git a/apps/int_isosurface.cu b/apps/int_isosurface.cu index c1f0d49..46a6c48 100644 --- a/apps/int_isosurface.cu +++ b/apps/int_isosurface.cu @@ -207,7 +207,7 @@ public: const float scale, int width, int height) - : GLFCameraWindow(title, camera.from, camera.at, camera.up, scale, width, height) + : GLFCameraWindow(title, camera.eye, camera.at, camera.up, scale, width, height) , async_rendering_loop(std::bind(&MainWindow::render_background, this)) , renderer(renderer) , volume(volume) diff --git a/apps/shadowmap.cu b/apps/shadowmap.cu deleted file mode 100644 index 8b9a7e0..0000000 --- a/apps/shadowmap.cu +++ /dev/null @@ -1,686 +0,0 @@ -// TODO this file is incomplete - -#include "cmdline.h" - -#include "device/device_nnvolume_array.h" - -#include -#include -#include -#include -#include - -#include - -#define TFN_MODULE_EXTERNAL_VECTOR_TYPES -namespace tfn { -typedef ovr::math::vec2f vec2f; -typedef ovr::math::vec2i vec2i; -typedef ovr::math::vec3f vec3f; -typedef ovr::math::vec3i vec3i; -typedef ovr::math::vec4f vec4f; -typedef ovr::math::vec4i vec4i; -} // namespace tfn -#include - -#include -#include -#include - -#define inf float_large -#define float_large 1e31f -#define float_small 1e-31f -#define nearly_one 0.9999f - - -// to avoid crazy formatting and indentations -#define NAMESPACE_BEGIN namespace ovr { namespace nnvolume { -#define NAMESPACE_END }} - - -NAMESPACE_BEGIN - -using namespace ovr::math; -using ovr::random::RandomTEA; - -struct Light -{ - vec3f direction; - float intensity; -}; - -struct DeviceVolume -{ - Array3DScalarCUDA volume; - DeviceTransferFunction tfn; - float step = 1.f; - float step_rcp = 1.f; // GPU cacne to avoid recomputation - - box3f bbox = box3f(vec3f(0), vec3f(1)); // with respect to [0-1]^3 - affine3f transform; - - int n_lights = 0; - Light *lights{ nullptr }; - - int ao_samples = 1; -}; - -struct HostVolume -{ - DeviceVolume device; - CUDABuffer device_buffer; - - affine3f matrix; - - std::vector tfn_colors_data; - std::vector tfn_alphas_data; - vec2f original_value_range; - - std::vector lights; - CUDABuffer lights_buffer; - -public: - void load_from_array3d_scalar(array_3d_scalar_t array, float data_value_min = 1, float data_value_max = -1) - { - Array3DScalarCUDA &output = device.volume; - output = CreateArray3DScalarCUDA(array); - original_value_range.x = output.lower.v; - original_value_range.y = output.upper.v; - std::cout << "[Shadow Map] volume range = " << original_value_range.x << " " << original_value_range.y << std::endl; - set_value_range(data_value_min, data_value_max); - } - - void set_transfer_function(Array1DFloat4CUDA c, Array1DScalarCUDA a, vec2f r) - { - device.tfn.color = c; - device.tfn.opacity = a; - set_value_range(r.x, r.y); - CUDA_SYNC_CHECK(); - } - - void set_transfer_function(array_1d_float4_t c, array_1d_scalar_t a, vec2f r) - { - set_transfer_function(CreateArray1DFloat4CUDA(c), CreateArray1DScalarCUDA(a), r); - } - - void set_transfer_function(const std::vector &c, const std::vector &o, const vec2f &r) - { - tfn_colors_data.resize(c.size() / 3); - for (int i = 0; i < tfn_colors_data.size(); ++i) { - tfn_colors_data[i].x = c[3 * i + 0]; - tfn_colors_data[i].y = c[3 * i + 1]; - tfn_colors_data[i].z = c[3 * i + 2]; - tfn_colors_data[i].w = 1.f; - } - - tfn_alphas_data.resize(o.size() / 2); - for (int i = 0; i < tfn_alphas_data.size(); ++i) { - tfn_alphas_data[i] = o[2 * i + 1]; - } - - if (!tfn_colors_data.empty() && !tfn_alphas_data.empty()) { - set_transfer_function(CreateArray1DFloat4CUDA(tfn_colors_data), CreateArray1DScalarCUDA(tfn_alphas_data), r); - } - CUDA_SYNC_CHECK(); - } - - void set_value_range(float data_value_min, float data_value_max) - { - Array3DScalarCUDA &volume = device.volume; - if (data_value_max >= data_value_min) - { - float normalized_max = integer_normalize(data_value_max, volume.type); - float normalized_min = integer_normalize(data_value_min, volume.type); - volume.upper.v = normalized_max; // should use the transfer function value range here - volume.lower.v = normalized_min; - } - volume.scale.v = 1.f / (volume.upper.v - volume.lower.v); - // Need calculation on max opacity - auto r_x = max(original_value_range.x, volume.lower.v); - auto r_y = min(original_value_range.y, volume.upper.v); - device.tfn.value_range.y = r_y; - device.tfn.value_range.x = r_x; - device.tfn.range_rcp_norm = 1.f / (device.tfn.value_range.y - device.tfn.value_range.x); - } - - void load_lights(const Scene &scene) - { - lights.clear(); - for (auto& li : scene.lights) { - if (li.type == scene::Light::DIRECTIONAL) { - lights.emplace_back(Light{ - /*.direction =*/ li.directional.direction, - /*.intensity =*/ li.intensity - }); - } - } - - lights_buffer.alloc_and_upload(lights); - - device.n_lights = (int)lights.size(); - device.lights = (Light *)lights_buffer.d_pointer(); - } - -}; - -void commit(const Scene &scene, HostVolume &volume) -{ - auto& sv = ovr::parse_single_volume_scene(scene, scene::Volume::STRUCTURED_REGULAR_VOLUME).structured_regular; - auto& st = scene.instances[0].models[0].volume_model.transfer_function; - - vec3f scale = sv.grid_spacing * vec3f(sv.data->dims); - vec3f translate = sv.grid_origin; - - volume.matrix = affine3f::translate(translate) * affine3f::scale(scale); - volume.device.transform = volume.matrix; - - volume.load_from_array3d_scalar(sv.data); - volume.set_transfer_function(CreateArray1DFloat4CUDA(st.color), CreateArray1DScalarCUDA(st.opacity), st.value_range); - - volume.device.step = 1.f / scene.volume_sampling_rate; - volume.device.step_rcp = scene.volume_sampling_rate; - - volume.device.ao_samples = scene.ao_samples; - - volume.load_lights(scene); - - // call this in the end - volume.device_buffer.resize(sizeof(volume.device)); - volume.device_buffer.upload(&volume.device, 1); -} - -static __device__ bool -intersect_box(float &_t0, float &_t1, const vec3f ray_ori, const vec3f ray_dir, vec3f &box_lower, vec3f &box_upper) -{ - const vec3f &lower = box_lower; - const vec3f &upper = box_upper; - - float t0 = _t0; - float t1 = _t1; -#if 1 - const vec3i is_small = - vec3i(fabs(ray_dir.x) < float_small, fabs(ray_dir.y) < float_small, fabs(ray_dir.z) < float_small); - const vec3f rcp_dir = /* ray direction reciprocal*/ 1.f / ray_dir; - const vec3f t_lo = vec3f(is_small.x ? float_large : (lower.x - ray_ori.x) * rcp_dir.x, // - is_small.y ? float_large : (lower.y - ray_ori.y) * rcp_dir.y, // - is_small.z ? float_large : (lower.z - ray_ori.z) * rcp_dir.z // - ); - const vec3f t_hi = vec3f(is_small.x ? -float_large : (upper.x - ray_ori.x) * rcp_dir.x, // - is_small.y ? -float_large : (upper.y - ray_ori.y) * rcp_dir.y, // - is_small.z ? -float_large : (upper.z - ray_ori.z) * rcp_dir.z // - ); - t0 = max(t0, reduce_max(min(t_lo, t_hi))); - t1 = min(t1, reduce_min(max(t_lo, t_hi))); -#else - const vec3f t_lo = (lower - ray_ori) / ray_dir; - const vec3f t_hi = (upper - ray_ori) / ray_dir; - t0 = max(t0, reduce_max(min(t_lo, t_hi))); - t1 = min(t1, reduce_min(max(t_lo, t_hi))); -#endif - _t0 = t0; - _t1 = t1; - return t1 > t0; -} - -template -__forceinline__ __device__ T lerp(float r, const T &a, const T &b) -{ - return (1 - r) * a + r * b; -} - -template -static __device__ T -array1d_nodal(const ArrayCUDA<1, N> &array, float v) -{ - float t = (0.5f + v * (array.dims.v - 1)) / array.dims.v; - return tex1D(array.data, t); -} - -static __device__ float -sample_volume(const Array3DScalarCUDA &self, vec3f p) -{ - /* sample volume in object space [0, 1] */ - p.x = clamp(p.x, 0.f, 1.f); - p.y = clamp(p.y, 0.f, 1.f); - return tex3D(self.data, p.x, p.y, p.z); -} - -static __device__ void -sample_transfer_function(const DeviceTransferFunction &tfn, float sampleValue, vec3f &_sampleColor, float &_sampleAlpha) -{ - const auto v = (clamp(sampleValue, tfn.value_range.x, tfn.value_range.y) - tfn.value_range.x) * tfn.range_rcp_norm; - vec4f rgba = array1d_nodal(tfn.color, v); - rgba.w = array1d_nodal(tfn.opacity, v); // followed by the alpha correction - _sampleColor = vec3f(rgba); - _sampleAlpha = rgba.w; -} - -static __device__ void -opacity_correction(const DeviceVolume &self, const float &distance, float &opacity) -{ - opacity = 1.f - __powf(1.f - opacity, 2.f * self.step_rcp * distance); -} - -template -__device__ void -ray_marching_iterator(const float tMin, const float tMax, - const float step, const F &body, - bool debug = false) -{ - vec2f t = vec2f(tMin, min(tMax, tMin + step)); - while ((t.y > t.x) && body(t)) - { - t.x = t.y; - t.y = min(t.x + step, tMax); - } -} - -__device__ float -ray_marching_transmittance(const DeviceVolume &self, - const vec3f org, - const vec3f dir, - RandomTEA &rng) -{ - const auto marching_step = self.step; - - float alpha = 0.f; - float t0 = 0.f, t1 = inf; - - vec3f lower_end = vec3f(0.f); - vec3f upper_end = vec3f(1.f); - - if (!intersect_box(t0, t1, org, dir, lower_end, upper_end)) return 1.f; - - // jitter ray to remove ringing effects - const float jitter = rng.get_floats().x; - - // start marching - ray_marching_iterator(t0, t1, marching_step, [&](const vec2f &t) { - // sample data value - const auto p = org + lerp(jitter, t.x, t.y) * dir; // object space position - const auto sampleValue = sample_volume(self.volume, p); - // classification - vec3f sampleColor; - float sampleAlpha; - sample_transfer_function(self.tfn, sampleValue, sampleColor, sampleAlpha); - opacity_correction(self, t.y - t.x, sampleAlpha); - // blending - alpha += (1.f - alpha) * sampleAlpha; - return alpha < nearly_one; - }); - - return 1.f - alpha; -} - -__global__ void -ray_marching_kernel(const vec3i dims, const void *ptr, float *__restrict__ shadowbuffer) -{ - // 3D kernel launch - vec3i voxel_coord = vec3i(threadIdx.x + blockIdx.x * blockDim.x, threadIdx.y + blockIdx.y * blockDim.y, threadIdx.z + blockIdx.z * blockDim.z); - size_t voxel_index = voxel_coord.x + voxel_coord.y * (size_t)dims.x + voxel_coord.z * (size_t)dims.y * (size_t)dims.x; - - if (voxel_index > dims.long_product()) return; - - // generate ray & payload - RandomTEA rng(voxel_index, 0); - - // voxel center in local coordinate (0-1)^3 get the object to world transformation - const DeviceVolume &self = *((DeviceVolume *)ptr); - - const affine3f otw = self.transform; - const affine3f wto = otw.inverse(); - - const vec3f org = (vec3f(voxel_coord) + vec3f(0.5f, 0.5f, 0.5f)) / vec3f(dims); // transform to object space - - float shadow = 0.f; - for (int i = 0; i < self.n_lights; ++i) - { - auto li = self.lights[i]; // copy to register, intentional - - vec3f li_dir = normalize(li.direction); - float li_val = li.intensity; - - float li_shadow = 0.f; - for (int spv = 0; spv < self.ao_samples; ++spv) - { - li_shadow += ray_marching_transmittance(self, org, xfmVector(wto, li_dir), rng); // transform to object space - } - - shadow += (li_shadow / self.ao_samples) * li_val; - } - - shadowbuffer[voxel_index] = shadow; -} - -NAMESPACE_END - -using namespace ovr; -using namespace ovr::nnvolume; - -struct CmdArgs : CmdArgsBase { -public: - args::ArgumentParser parser; - args::HelpFlag help; - - args::Positional m_scene; - std::string scene() { return args::get(m_scene); } - - // optional - - args::ValueFlag m_sampling_rate; - float sampling_rate() { return (m_sampling_rate) ? args::get(m_sampling_rate) : 1.f; } - - args::ValueFlag m_shadow_samples; - int shadow_samples() { return (m_shadow_samples) ? args::get(m_shadow_samples) : 1; } - - args::ValueFlag m_output; - std::string output() { return (m_output) ? args::get(m_output) : "shadowmap"; } - - // group for random lights - - args::Group group_random_lights; - - args::ValueFlag m_random_lights; - bool random_lights() { return (m_random_lights); } - int num_random_lights() { return (m_random_lights) ? args::get(m_random_lights) : 0; } - - args::Flag m_random_tfn; - bool random_tfn() { return (m_random_tfn); } - - // Ring light parsing - - args::Group group_ring_lights; - - args::ValueFlag m_ring_lights; - bool ring_lights() { return (m_ring_lights); } - int num_ring_lights() { return (m_ring_lights) ? args::get(m_ring_lights) : 0; } - - args::ValueFlag m_theta; - float value_theta() { return (m_theta) ? args::get(m_theta) : 0.f; } - - args::ValueFlag m_phi; - float value_phi() { return (m_phi) ? args::get(m_phi) : 0.f; } - -public: - CmdArgs(const char *title, int argc, char **argv) - : parser(title) - , help(parser, "help", "display the help menu", {'h', "help"}) - , m_scene(parser, "string", "the scene to render") - , m_sampling_rate(parser, "float", "ray marching sampling rate", {"sampling-rate"}) - , m_shadow_samples(parser, "int", "number of samples per voxel", {"shadow-samples"}) - , m_output(parser, "string", "output name", {"output"}) - , group_random_lights(parser, "random light group:", args::Group::Validators::AllOrNone) - , m_random_lights(group_random_lights, "int", "generate N random lights", {"random-lights"}) - , m_random_tfn(parser, "flag", "generate a random transfer function", {"random-tfn"}) - , group_ring_lights(parser, "ring light group:", args::Group::Validators::AllOrNone) - , m_ring_lights(group_ring_lights, "int", "Generate N lights in a ring", {"ring-lights"}) - , m_theta(parser, "int", "ring theta", {"theta"}) - , m_phi(parser, "int", "ring phi", {"phi"}) - { - exec(parser, argc, argv); - } -}; - -int main(int ac, char **av) -{ - CmdArgs args("Shadow Volume Generator", ac, av); - - // Create scene + volume + tfn + lights - Scene scene = scene::create_scene(args.scene()); - scene.volume_sampling_rate = args.sampling_rate(); - scene.ao_samples = args.shadow_samples(); - - auto &scene_tfn = scene.instances[0].models[0].volume_model.transfer_function; - - // generate random lights - srand((unsigned int)time(NULL)); // Initialization, should only be called once. - - if (args.random_lights()) { - - scene.lights.clear(); - - for (int i = 0; i < args.num_random_lights(); ++i) { - std::cout << "Generate Light #" << i << std::endl; - - scene::Light light; - light.type = scene::Light::DIRECTIONAL; - - float theta = 2.0f * (float)M_PI * ((float)rand() / (float)(RAND_MAX)); - float phi = 1.0f * (float)M_PI * ((float)rand() / (float)(RAND_MAX)); - - // Generate Direction - float x = 1.0f * cos(phi) * sin(theta); - float y = 1.0f * sin(phi) * sin(theta); - float z = 1.0f * cos(theta); - light.directional.direction = normalize(vec3f(x, y, z)); - std::cout << "Light Direction: " << light.directional.direction.x << " " << light.directional.direction.y << " " << light.directional.direction.z << " " << std::endl; - - // Generate Color - light.intensity = 1.f / args.num_random_lights(); - - // Store - scene.lights.push_back(light); - } - - } - - // Ring light generation - else if (args.ring_lights()){ - std::cout << "Num Lights: " << args.num_ring_lights() << std::endl; - std::cout << "Theta: " << args.value_theta() << std::endl; - std::cout << "Phi: " << args.value_phi() << std::endl << std::endl; - - scene.lights.clear(); - - for (int i = 0; i < args.num_ring_lights(); ++i) { - std::cout << "Generate Light #" << i << std::endl; - - scene::Light light; - light.type = scene::Light::DIRECTIONAL; - - // Theta and phi are passed in as deg - float theta = (float)M_PI/180.f * args.value_theta(); - - float phi_offset = 360.f / args.num_ring_lights(); - float phi = (float)M_PI/180.f * (args.value_phi() + i * phi_offset); - - // Generate Direction - float x = 1.0f * cos(phi) * sin(theta); - float y = 1.0f * sin(phi) * sin(theta); - float z = 1.0f * cos(theta); - light.directional.direction = normalize(vec3f(x, y, z)); - std::cout << "Light Direction: " << light.directional.direction.x << " " << light.directional.direction.y << " " << light.directional.direction.z << " " << std::endl; - - // Generate Color - light.intensity = 1.f / args.num_ring_lights(); - - // Store - scene.lights.push_back(light); - } - - } - - - // create a transfer function object - tfn::TransferFunctionCore tfn(1024); - range1f range; - { - vec4f* color_data = scene_tfn.color->data_typed(); - float* alpha_data = scene_tfn.opacity->data_typed(); - for (int i = 0; i < scene_tfn.color->size(); ++i) { - auto color = color_data[i]; - float pos = (float)i / (scene_tfn.color->size() - 1); - tfn.addColorControl(tfn::TransferFunctionCore::ColorControl(pos, color.xyz())); - } - for (int i = 0; i < scene_tfn.color->size(); ++i) { - auto alpha = alpha_data[i]; - float pos = (float)i / (scene_tfn.color->size() - 1); - tfn.addAlphaControl(vec2f(pos, alpha)); - } - - range.lower = scene_tfn.value_range.x; - range.upper = scene_tfn.value_range.y; - } - - if (args.random_tfn()) { - tfn.clearAlphaControls(); - - // Create Number of Gaussian, 1 to 10 - const int num_gaussian = (int)(((float)rand() / (float)(RAND_MAX) + 1) * 5); - std::cout << "Generate Gaussian #" << num_gaussian << std::endl; - - for (int each_gaussian = 0; each_gaussian < num_gaussian; ++each_gaussian) - { - float gaussian_mean = (float)rand() / (float)(RAND_MAX); - float gaussian_sigma = max(0.2f * (float)rand() / (float)(RAND_MAX), 0.0001f); - float gaussian_height = max((gaussian_sigma * std::sqrt(2.0f * float(M_PI))) * (float)rand() / (float)(RAND_MAX), 0.0001f); - - std::cout << "Generate Gaussian Mean: " << gaussian_mean << std::endl; - std::cout << "Generate Gaussian Height: " << gaussian_height << std::endl; - std::cout << "Generate Gaussian Sigma: " << gaussian_sigma << std::endl; - - tfn.addGaussianObject(gaussian_mean, gaussian_sigma, gaussian_height); - } - - // It seems we do not need to explicitly normalize gaussians - } - - // overwrite the scene tfn - tfn.updateColorMap(); - { - auto* table = (vec4f*)tfn.data(); - std::vector color(tfn.resolution()); - std::vector alpha(tfn.resolution()); - for (int i = 0; i < tfn.resolution(); ++i) { - const auto rgba = table[i]; - color[i] = vec4f(rgba.xyz(), 1.f); - alpha[i] = rgba.w; - } - scene_tfn.color = CreateArray1DFloat4(color); - scene_tfn.opacity = CreateArray1DScalar(alpha); - } - - // set volume - HostVolume params; - commit(scene, params); - - // process - vec3i shadowmap_dims = vec3i(256); // vec3i(scene_vol.data->dims); - size_t shadowmap_size = shadowmap_dims.long_product(); - CUDABuffer shadowmap_gpu; - std::vector shadowmap_cpu; - range1f shadowmap_range; - - shadowmap_cpu.resize(shadowmap_size); - shadowmap_gpu.alloc(shadowmap_size * sizeof(float)); - - CUDA_SYNC_CHECK(); - - // call kernel to compute shadow volume - const int n_threads = 8; - const dim3 block_size(n_threads, n_threads, n_threads); - const dim3 grid_size( - misc::div_round_up(shadowmap_dims.x, n_threads), - misc::div_round_up(shadowmap_dims.y, n_threads), - misc::div_round_up(shadowmap_dims.z, n_threads) - ); - ray_marching_kernel<<>>(shadowmap_dims, (void *)params.device_buffer.d_pointer(), (float *)shadowmap_gpu.d_pointer()); - - CUDA_SYNC_CHECK(); - - // shadowmap_gpu.download_async(shadowmap_cpu.data(), shadowmap_cpu.size()); - cudaMemcpy(shadowmap_cpu.data(), (float*)shadowmap_gpu.d_pointer(), shadowmap_size * sizeof(float), cudaMemcpyDeviceToHost); - - CUDA_SYNC_CHECK(); - - for (int i = 0; i < shadowmap_size; i++) { - shadowmap_range.extend(shadowmap_cpu[i]); - } - std::cout << "shadowmap range: " << shadowmap_range.lower << " " << shadowmap_range.upper << std::endl; - - // save shadow volume to a binary file - std::ofstream outS(args.output() + ".bin", std::ios::out | std::ios::binary); - outS.write((char *)shadowmap_cpu.data(), shadowmap_cpu.size() * sizeof(float)); // <- This is where the code breaks - outS.close(); - - // save the scene file - json root; - - // volume data - { - json data; - data["dimensions"] = { { "x", shadowmap_dims.x }, { "y", shadowmap_dims.y }, { "z", shadowmap_dims.z } }; - data["endian"] = "LITTLE_ENDIAN"; - data["fileName"] = args.output() + ".bin"; - data["fileUpperLeft"] = false; - data["format"] = "REGULAR_GRID_RAW_BINARY"; - data["id"] = 1; - data["name"] = "shadowmap"; - data["offset"] = 0; - data["type"] = "FLOAT"; - root["dataSource"].push_back(data); - } - - root["original"] = args.scene(); - - // view - { - json& view = root["view"]; - - json& camera = view["camera"]; - camera["center"] = { { "x", shadowmap_dims.x / 2.f }, { "y", shadowmap_dims.y / 2.f }, { "z", shadowmap_dims.z / 2.f } }; - camera["eye"] = { { "x", shadowmap_dims.x / 2.f }, { "y", shadowmap_dims.y / 2.f }, { "z", shadowmap_dims.z / 2.f - shadowmap_dims.z } }; - camera["up"] = { { "x", 0.f }, { "y", 1.f }, { "z", 0.f } }; - camera["fovy"] = 60; - camera["projectionMode"] = "PERSPECTIVE"; - camera["zFar"] = 2000; - camera["zNear"] = 1; - - for (auto& li : params.lights) { - json light; - light["ambient"] = { { "a", 1.f }, { "b", 1.f }, { "g", 1.f }, { "r", 1.f } }; - light["specular"] = { { "a", 1.f }, { "b", 1.f }, { "g", 1.f }, { "r", 1.f } }; - light["diffuse"] = { { "a", 1.f }, { "r", li.intensity }, { "g", li.intensity }, { "b", li.intensity } }; - light["position"] = { { "w", 0.f }, { "x", li.direction.x }, { "y", li.direction.y }, { "z", li.direction.z } }; - light["type"] = "DIRECTIONAL_LIGHT"; - if (!view.contains("lightSource")) { - view["lightSource"] = light; - } - else { - view["additionalLightSources"].push_back(light); - } - } - - view["lighting"] = true; - view["lightingSide"] = "FRONT_SIDE"; - view["tfPreIntegration"] = false; - - auto& vol = view["volume"]; - vol["dataId"] = 1; - vol["interpolationType"] = "LINEAR_INTERPOLATION"; - vol["opacityUnitDistance"] = 1; - vol["sampleDistance"] = params.device.step; - vol["scalarMappingRange"] = { - { "maximum", 1.0 }, { "minimum", 0.0 } // we should not normalize a shadow map - }; - vol["transferFunctionType"] = "TRANSFER_FUNCTION"; - vol["visible"] = true; - - // transfer function - tfn::saveTransferFunction(tfn, vol["transferFunction"]); - } - - // save as text file - std::ofstream outJ(args.output() + ".json", std::ios::out); - outJ << std::setw(4) << root << std::endl; - outJ.close(); - - std::cout << "Ended" << std::endl; - return 0; -} - -// command to train a neural network -// ../../instant-vnr-cuda/run.sh ../../instant-vnr-cuda/build/Release/vnr_cmd_train --volume ./shadowmap.json --max-num-steps 10000 --mode GPU --network network.json - -// command to run -// bash ../scripts/run.sh ../build/Debug/renderapp configs/scene_mechhand.json nnvolume diff --git a/apps/shadowmap.usda b/apps/shadowmap.usda deleted file mode 100644 index b2db23d..0000000 --- a/apps/shadowmap.usda +++ /dev/null @@ -1,48 +0,0 @@ -#usda 1.0 - -def "scene" { - def "volume" { - string data_path = "configs/scene_chameleon.json" - # string data_path = "configs/scene_mechhand.json" - } - def "camera" { - # from = eye - float3 from = ( - -1818.01, - 2496.51, - -1875.73 - ) - # at = center - float3 at = ( - 790.71, - 1057.9441, - 926.1431 - ) - # up = up - float3 up = ( - 0.608985, - -0.313457, - -0.727943 - ) - } - def "light" { - def "ambient" { - def "first_light" { - float intensity = 1 - float3 color = (1, 1, 1) - } - } - def "directional" { - def "first_light" { - float intensity = 2 - float3 direction = (0, -10, 0) - float3 color = (1, 1, 1) - } - def "second_light" { - float intensity = 2 - float3 direction = (0, 10, 0) - float3 color = (1, 1, 1) - } - } - } -} diff --git a/device/CMakeLists.txt b/device/CMakeLists.txt index f527170..9b2cd28 100644 --- a/device/CMakeLists.txt +++ b/device/CMakeLists.txt @@ -32,7 +32,6 @@ add_library(device_nnvolume ${_dev_type} $ device.cpp device_impl.cpp - # method_shadowmap.cu # uncomment to include shadow-map GPU code ) set_target_properties(device_nnvolume PROPERTIES diff --git a/device/method_shadowmap.cu b/device/method_shadowmap.cu deleted file mode 100644 index 53900b1..0000000 --- a/device/method_shadowmap.cu +++ /dev/null @@ -1,759 +0,0 @@ -//. ======================================================================== // -//. // -//. Copyright 2019-2022 Qi Wu // -//. // -//. Licensed under the MIT License // -//. // -//. ======================================================================== // - -#include "method_shadowmap.h" -#include "raytracing.h" -#include "dda.h" - -#include - -#ifndef ADAPTIVE_SAMPLING -#error "ADAPTIVE_SAMPLING is not defined" -#endif - - -INSTANT_VNR_NAMESPACE_BEGIN - -constexpr auto N_ITERS = 16; - -using ShadingMode = MethodShadowMap::ShadingMode; -constexpr auto NO_SHADING = MethodShadowMap::NO_SHADING; -constexpr auto SHADING = MethodShadowMap::SHADING; - -// using vnr::SciVisMaterial; - -// ------------------------------------------------------------------ -// -// ------------------------------------------------------------------ - -// NOTE: what is the best SoA layout here? - -struct ShadowMapData : LaunchParams -{ - ShadowMapData(const LaunchParams& p) : LaunchParams(p) {} - - ShadingMode mode; - - SciVisMaterial material{ 2.f, 1.5f, .4f, 40.f }; - - DeviceVolume* __restrict__ volume{ nullptr }; - - // belows are only useful for sampling streaming - uint32_t* __restrict__ counter{ nullptr }; - - vec3f* __restrict__ inference_input { nullptr }; - float* __restrict__ inference_output{ nullptr }; - - // per ray payload (ordered by ray index) - uint32_t* __restrict__ pixel_index{ nullptr }; - float* __restrict__ jitter{ nullptr }; - float* __restrict__ alpha{ nullptr }; - vec3f* __restrict__ color_or_org{ nullptr }; -#if ADAPTIVE_SAMPLING - vec3f* __restrict__ iter_t_next{ nullptr }; - vec3i* __restrict__ iter_cell{ nullptr }; -#endif - float* __restrict__ iter_next_cell_begin{ nullptr }; - - // belows are only used by SSH - vec3f* __restrict__ inter_highest_org { nullptr }; // ordered by ray index - float* __restrict__ inter_highest_alpha{ nullptr }; - vec3f* __restrict__ inter_highest_color{ nullptr }; - vec3f* __restrict__ final_highest_org { nullptr }; // ordered by pixel index - float* __restrict__ final_highest_alpha{ nullptr }; - vec3f* __restrict__ final_highest_color{ nullptr }; - vec4f* __restrict__ shading_color{ nullptr }; // ordered by pixel index - float* __restrict__ jitter_ssh{ nullptr }; -}; - -/* standard version */ void -do_raymarching_trivial(cudaStream_t stream, const ShadowMapData& params); - -/* iterative version */ void -do_raymarching_iterative(cudaStream_t stream, const ShadowMapData& params, NeuralVolume* network, uint32_t numPixels); - -// ------------------------------------------------------------------ -// -// ------------------------------------------------------------------ - -namespace { - -// ------------------------------------------------------------------ - - -// ------------------------------------------------------------------ - -inline __device__ uint32_t -new_ray_index(const ShadowMapData& params) -{ - return atomicAdd(params.counter, 1); -} - -template -inline T* define_buffer(char* begin, size_t& offset, size_t buffer_size) -{ - auto* ret = (T*)(begin + offset); - offset += buffer_size * sizeof(T); - return ret; -} - -// ------- -// -// ------- - -struct Ray -{ - vec3f org{}; - vec3f dir{}; - float alpha = 0.f; - vec3f color = 0.f; // not used by shadow rays -}; - -// ------- -// -// ------- - -struct RayMarchingIter -#if ADAPTIVE_SAMPLING - : private dda::DDAIter -#endif -{ -#if ADAPTIVE_SAMPLING - using DDAIter::cell; - using DDAIter::t_next; - using DDAIter::next_cell_begin; -#else - float next_cell_begin{}; -#endif - - __device__ RayMarchingIter() {} - __device__ RayMarchingIter(const DeviceVolume& self, const vec3f& org, const vec3f& dir, const float tMin, const float tMax); - bool __device__ resumable(const DeviceVolume& self, vec3f dir, float t_min, float t_max); - - template - __device__ void exec(const DeviceVolume& self, const vec3f& org, const vec3f& dir, const float tMin, const float tMax, const float step, const uint32_t pidx, const F& body); -}; - -__device__ -RayMarchingIter::RayMarchingIter(const DeviceVolume& self, const vec3f& org, const vec3f& dir, const float tMin, const float tMax) -{ -#if ADAPTIVE_SAMPLING - const auto& dims = self.macrocell_dims; - const vec3f m_org = org * self.macrocell_spacings_rcp; - const vec3f m_dir = dir * self.macrocell_spacings_rcp; - DDAIter::init(m_org, m_dir, tMin, tMax, dims); -#endif -} - -template -__device__ void -RayMarchingIter::exec(const DeviceVolume& self, const vec3f& org, const vec3f& dir, const float tMin, const float tMax, const float step, const uint32_t pidx, const F& body) -{ -#if ADAPTIVE_SAMPLING - - const auto& dims = self.macrocell_dims; - const vec3f m_org = org * self.macrocell_spacings_rcp; - const vec3f m_dir = dir * self.macrocell_spacings_rcp; - - const auto lambda = [&](const vec3i& cell, float t0, float t1) { - // calculate max opacity - float r = opacityUpperBound(self, cell); - if (fabsf(r) <= float_epsilon) return true; // the cell is empty - // estimate a step size - const auto ss = adaptiveSamplingRate(step, r); - // iterate within the interval - vec2f t = vec2f(t0, min(t1, t0 + ss)); - while (t.y > t.x) { - DDAIter::next_cell_begin = t.y - tMin; - if (!body(t)) return false; - t.x = t.y; - t.y = min(t.x + ss, t1); - } - return true; - }; - - while (DDAIter::next(m_org, m_dir, tMin, tMax, dims, false, lambda)) {} - -#else - - vec2f t; - t.x = max(tMin + next_cell_begin, tMin); - t.y = min(t.x + step, tMax); - while (t.y > t.x) { - next_cell_begin = t.y - tMin; - if (!body(t)) return; - t.x = t.y; - t.y = min(t.x + step, tMax); - } - - next_cell_begin = float_large; - return; - -#endif -} - -bool __device__ -RayMarchingIter::resumable(const DeviceVolume& self, vec3f dir, float tMin, float tMax) -{ -#if ADAPTIVE_SAMPLING - const auto& dims = self.macrocell_dims; - const vec3f m_dir = dir * self.macrocell_spacings_rcp; - return DDAIter::resumable(m_dir, tMin, tMax, dims); -#else - return tMin + next_cell_begin < tMax; -#endif -} - - -// ------- -// -// ------- - -struct SampleStreamingPayload -{ -public: - uint32_t pixel_index = 0; - float jitter = 0.f; - RayMarchingIter iter; - -private: - union { - vec3f color; - vec3f org; - }; - float alpha = 0.f; - -public: - __device__ SampleStreamingPayload(const uint32_t pixel_index, const float jitter) : pixel_index(pixel_index), jitter(jitter), color(0) {} - __device__ SampleStreamingPayload(const ShadowMapData& params, const uint32_t ray_index); // load a payload from memory - __device__ void save(const ShadowMapData& params, uint32_t ridx) const; - // __device__ void as_camera_ray(const vec3f& c, const float& a) { color = c, alpha = a; } - // __device__ void as_shadow_ray(const vec3f& o) { org = o; } - __device__ void set_ray(const Ray& ray) { alpha = ray.alpha, color = ray.color; } - __device__ Ray compute_ray(const ShadowMapData& params) const; -}; - -__device__ -SampleStreamingPayload::SampleStreamingPayload(const ShadowMapData& params, const uint32_t ray_index) -{ - pixel_index = params.pixel_index[ray_index]; - jitter = params.jitter[ray_index]; - alpha = params.alpha[ray_index]; - color = params.color_or_org[ray_index]; -#if ADAPTIVE_SAMPLING - iter.cell = params.iter_cell[ray_index]; - iter.t_next = params.iter_t_next[ray_index]; -#endif - iter.next_cell_begin = params.iter_next_cell_begin[ray_index]; -} - -__device__ void -SampleStreamingPayload::save(const ShadowMapData& params, uint32_t ridx) const -{ - params.pixel_index[ridx] = pixel_index; - params.jitter[ridx] = jitter; - params.alpha[ridx] = alpha; - params.color_or_org[ridx] = color; -#if ADAPTIVE_SAMPLING - params.iter_cell[ridx] = iter.cell; - params.iter_t_next[ridx] = iter.t_next; -#endif - params.iter_next_cell_begin[ridx] = iter.next_cell_begin; -} - -__device__ Ray -SampleStreamingPayload::compute_ray(const ShadowMapData& params) const -{ - const auto& fbIndex = pixel_index; - - // compute pixel ID - const uint32_t ix = fbIndex % params.frame.size.x; - const uint32_t iy = fbIndex / params.frame.size.x; - - // normalized screen plane position, in [0,1]^2 - const auto& camera = params.camera; - const vec2f screen(vec2f((float)ix + .5f, (float)iy + .5f) / vec2f(params.frame.size)); - - // get the object to world transformation - const affine3f& otw = params.transform; - const affine3f wto = otw.inverse(); - - // generate ray direction - Ray ray; - ray.org = xfmPoint(wto, camera.position); - ray.dir = xfmVector(wto, normalize(/* -z axis */ camera.direction + - /* x shift */ (screen.x - 0.5f) * camera.horizontal + - /* y shift */ (screen.y - 0.5f) * camera.vertical)); - ray.alpha = alpha; - ray.color = color; - return ray; -} - -// ------------------------------------------------------------------ - -} - -// ------------------------------------------------------------------ -// -// ------------------------------------------------------------------ - -void -MethodShadowMap::render(cudaStream_t stream, const LaunchParams& _params, ShadingMode mode, DeviceVolume* volume, NeuralVolume* network, bool iterative) -{ - ShadowMapData params = _params; - - const uint32_t numPixels = (uint32_t)params.frame.size.long_product(); - - params.volume = volume; - params.mode = mode; - - if (iterative) { - const uint32_t nSamplesPerCoord = N_ITERS; - - size_t nBytes = numPixels * nSamplesPerCoord * sizeof(vec4f); // inference input + output - nBytes += numPixels * sizeof(SampleStreamingPayload); // ray payloads - nBytes += numPixels * sizeof(RayMarchingIter); // iterators - nBytes += sizeof(uint32_t); // counter - - sample_streaming_buffer.resize(nBytes, stream); - CUDA_CHECK(cudaMemsetAsync((void*)sample_streaming_buffer.d_pointer(), 0, nBytes, stream)); // initialize all buffers - - char* begin = (char*)sample_streaming_buffer.d_pointer(); - size_t offset = 0; - - // allocate staging data - params.inference_input = define_buffer(begin, offset, numPixels * nSamplesPerCoord); - params.inference_output = define_buffer(begin, offset, numPixels * nSamplesPerCoord); - - // allocate payload data - params.alpha = define_buffer(begin, offset, numPixels); - params.color_or_org = define_buffer(begin, offset, numPixels); - params.pixel_index = define_buffer(begin, offset, numPixels); - params.jitter = define_buffer(begin, offset, numPixels); -#if ADAPTIVE_SAMPLING - params.iter_cell = define_buffer(begin, offset, numPixels); - params.iter_t_next = define_buffer(begin, offset, numPixels); -#endif - params.iter_next_cell_begin = define_buffer(begin, offset, numPixels); - - // we also need a launch index buffer - params.counter = define_buffer(begin, offset, 1); - } - - if (iterative) { - do_raymarching_iterative(stream, params, network, numPixels); - } - else { - do_raymarching_trivial(stream, params); - } -} - -inline __device__ float -sample_size_scaler(const float ss, const float t0, const float t1) { - const int32_t N = (t1-t0) / ss + 1; - return (t1-t0) / N; - // return ss; -} - -template -inline __device__ void -raymarching_iterator(const DeviceVolume& self, - const vec3f& org, const vec3f& dir, - const float tMin, const float tMax, - const float step, const F& body, - bool debug = false) -{ -#if ADAPTIVE_SAMPLING - - const auto& dims = self.macrocell_dims; - const vec3f m_org = org * self.macrocell_spacings_rcp; - const vec3f m_dir = dir * self.macrocell_spacings_rcp; - dda::dda3(m_org, m_dir, tMin, tMax, dims, debug, [&](const vec3i& cell, float t0, float t1) { - // calculate max opacity - float r = opacityUpperBound(self, cell); - if (fabsf(r) <= float_epsilon) return true; // the cell is empty - // estimate a step size - const auto ss = sample_size_scaler(adaptiveSamplingRate(step, r), t0, t1); - // iterate within the interval - vec2f t = vec2f(t0, min(t1, t0 + ss)); - while (t.y > t.x) { - if (!body(t)) return false; - t.x = t.y; - t.y = min(t.x + ss, t1); - } - return true; - }); - -#else - - vec2f t = vec2f(tMin, min(tMax, tMin + step)); - while ((t.y > t.x) && body(t)) { - t.x = t.y; - t.y = min(t.x + step, tMax); - } - -#endif -} - -//------------------------------------------------------------------------------ -// -// ------------------------------------------------------------------------------ - -inline __device__ float -raymarching_transmittance(const DeviceVolume& self, - const ShadowMapData& params, - const vec3f& org, const vec3f& dir, - float t0, float t1, - float sampling_scale, - RandomTEA& rng) -{ - const auto marching_step = sampling_scale * self.step; - float alpha(0); - if (intersectVolume(t0, t1, org, dir, self)) { - // jitter ray to remove ringing effects - const float jitter = rng.get_floats().x; - // start marching - raymarching_iterator(self, org, dir, t0, t1, marching_step, [&](const vec2f& t) { - // sample data value - const auto p = org + lerp(jitter, t.x, t.y) * dir; // object space position - const auto sampleValue = sampleVolume(self.volume, p); - // classification - vec3f sampleColor; - float sampleAlpha; - sampleTransferFunction(self.tfn, sampleValue, sampleColor, sampleAlpha); - opacityCorrection(self, t.y - t.x, sampleAlpha); - // blending - alpha += (1.f - alpha) * sampleAlpha; - return alpha < nearly_one; - }); - } - return 1.f - alpha; -} - -inline __device__ vec3f -shade_scivis_light(const vec3f& ray_dir, const vec3f& normal, const vec3f& albedo, const SciVisMaterial& mat) -{ - vec3f color = 0.f; - - if (dot(normal, normal) > 1.0e-6) { - const auto N = normalize(normal); - const auto V = -ray_dir; - color += mat.ambient * albedo; - const float cosNL = std::max(dot(N, V), 0.f); - if (cosNL > 0.0f) { - color += mat.diffuse * cosNL * albedo; - const vec3f H = normalize(N + V); - const float cosNH = std::max(dot(N, H), 0.f); - color += mat.specular * powf(cosNH, mat.shininess); - } - } - - const vec3f shading2 = shade_simple_light(ray_dir, normal, albedo); - - return lerp(0.5, shading2, color); -} - -inline __device__ vec4f -raymarching_traceray(const DeviceVolume& self, - const ShadowMapData& params, - const affine3f& wto, // world to object - const affine3f& otw, // object to world - const Ray& ray, float t0, float t1, - RandomTEA& rng) -{ - const auto& marchingStep = self.step; - const auto& gradientStep = self.grad_step; - // const auto& shadingScale = params.scivis_shading_scale; - - float alpha(0); - vec3f color(0); - - if (intersectVolume(t0, t1, ray.org, ray.dir, self)) { - - auto w_org = xfmVector(otw, ray.org); - auto w_dir = xfmVector(otw, ray.dir); - - // jitter ray to remove ringing effects - const float jitter = rng.get_floats().x; - - // start marching - raymarching_iterator(self, ray.org, ray.dir, t0, t1, marchingStep, [&](const vec2f& t) { - assert(t.x < t.y); - - // sample data value - const auto p = ray.org + lerp(jitter, t.x, t.y) * ray.dir; // object space position - const auto sampleValue = sampleVolume(self.volume, p); - - // classification - vec3f sampleColor; - float sampleAlpha; - sampleTransferFunction(self.tfn, sampleValue, sampleColor, sampleAlpha); - opacityCorrection(self, t.y - t.x, sampleAlpha); - - // access gradient - const vec3f No = -sampleGradient(self.volume, p, sampleValue, gradientStep); // sample gradient - const vec3f Nw = xfmNormal(otw, No); - - const float tr = 1.f - alpha; - - // object space to world space - const auto ldir = xfmVector(wto, normalize(params.light_directional_dir)); - const auto rdir = xfmVector(otw, ray.dir); - // single shade - const float transmittance = raymarching_transmittance(self, params, p, ldir, 0.f, float_large, /*make baseline more expensive...*/ 1.0, rng); - sampleColor = lerp(0.8, sampleColor, transmittance * sampleColor); - - color += tr * sampleColor * sampleAlpha; - alpha += tr * sampleAlpha; - - return alpha < nearly_one; - }); - - } - - return vec4f(color, alpha); -} - -__global__ void -raymarching_kernel(uint32_t width, uint32_t height, const ShadowMapData params) -{ - // compute pixel ID - const size_t ix = threadIdx.x + blockIdx.x * blockDim.x; - const size_t iy = threadIdx.y + blockIdx.y * blockDim.y; - - if (ix >= width) return; - if (iy >= height) return; - - const auto& volume = *params.volume; - assert(width == params.frame.size.x && "incorrect framebuffer size"); - assert(height == params.frame.size.y && "incorrect framebuffer size"); - - // normalized screen plane position, in [0,1]^2 - const auto& camera = params.camera; - const vec2f screen(vec2f((float)ix + .5f, (float)iy + .5f) / vec2f(params.frame.size)); - - // get the object to world transformation - const affine3f otw = params.transform; - const affine3f wto = otw.inverse(); - - // pixel index - const uint32_t fbIndex = ix + iy * width; - - // random number generator - RandomTEA rng_state(params.frame_index, fbIndex); - - // generate ray direction - Ray ray; - ray.org = xfmPoint(wto, camera.position); - ray.dir = xfmVector(wto, normalize(/* -z axis */ camera.direction + - /* x shift */ (screen.x - 0.5f) * camera.horizontal + - /* y shift */ (screen.y - 0.5f) * camera.vertical)); - - // trace ray - const vec4f output = raymarching_traceray(volume, params, wto, otw, ray, 0.f, float_large, rng_state); - - // and write to frame buffer ... - writePixelColor(params, output, fbIndex); -} - -void -do_raymarching_trivial(cudaStream_t stream, const ShadowMapData& params) -{ - util::bilinear_kernel(raymarching_kernel, 0, stream, params.frame.size.x, params.frame.size.y, params); -} - - - -// ------------------------------------------------------------------------------ -// -// ------------------------------------------------------------------------------ - -__global__ void -iterative_intersect_kernel(uint32_t numRays, const ShadowMapData params, int N_ITERS) -{ - const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; - if (i >= numRays) return; - - // other constants - const auto& self = *(params.volume); - - // load payloads and rays - SampleStreamingPayload payload(params, i); - const Ray ray = payload.compute_ray(params); - - float tmin = 0.f, tmax = float_large; - const bool hashit = intersectVolume(tmin, tmax, ray.org, ray.dir, self); - assert(hashit); - - vec3f* __restrict__ coords = (vec3f*)params.inference_input; - - int k = 0; - payload.iter.exec(self, ray.org, ray.dir, tmin, tmax, self.step, payload.pixel_index, [&](const vec2f& t) { - assert(k < N_ITERS); - assert(t.x < t.y); - const vec3f c = ray.org + lerp(payload.jitter, t.x, t.y) * ray.dir; - coords[numRays * k + i] = c; - return (++k) < N_ITERS; - }); -} - -template -__global__ void -iterative_compose_kernel(uint32_t numRays, const ShadowMapData params, int N_ITERS) -{ - const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; - if (i >= numRays) return; - - const auto& self = *(params.volume); - // const auto& shadingScale = params.scivis_shading_scale; - const auto* __restrict__ shadingCoefs = params.inference_output; - - SampleStreamingPayload payload(params, i); - Ray ray = payload.compute_ray(params); - - float tmin = 0.f, tmax = float_large; - const bool hashit = intersectVolume(tmin, tmax, ray.org, ray.dir, self); - assert(hashit); - - const affine3f& otw = params.transform; - - int k = 0; - payload.iter.exec(self, ray.org, ray.dir, tmin, tmax, self.step, payload.pixel_index, [&](const vec2f& t) { - assert(k < N_ITERS); - assert(t.x < t.y); - - // classification - const auto c = ray.org + lerp(payload.jitter, t.x, t.y) * ray.dir; - const auto sampleValue = sampleVolume(self.volume, c); - vec3f sampleColor; - float sampleAlpha; - sampleTransferFunction(self.tfn, sampleValue, sampleColor, sampleAlpha); - opacityCorrection(self, t.y - t.x, sampleAlpha); - - // access gradient - const vec3f No = -sampleGradient(self.volume, c, sampleValue, self.grad_step); // sample gradient - const vec3f Nw = xfmNormal(otw, No); - - // shading - if (MODE == SHADING) { - float coef = clamp(shadingCoefs[numRays * k + i], 0.f, 1.f); - const auto rdir = xfmVector(otw, ray.dir); - const vec3f shadingColor = shade_scivis_light(rdir, Nw, sampleColor, params.material); - sampleColor = lerp(0.8, sampleColor, coef * shadingColor); - } - - // blending - const float tr = 1.f - ray.alpha; - ray.alpha += tr * sampleAlpha; - ray.color += tr * sampleColor * sampleAlpha; - - // conditions to continue iterating - return ((++k) < N_ITERS) && (ray.alpha < nearly_one); - }); - - payload.set_ray(ray); - const bool resumable = payload.iter.resumable(self, ray.dir, tmin, tmax); - if (ray.alpha < nearly_one && resumable) { - payload.save(params, new_ray_index(params)); - } - else { - writePixelColor(params, vec4f(ray.color, ray.alpha), payload.pixel_index); - } -} - -__global__ void -iterative_raygen_kernel_camera(uint32_t numRays, const ShadowMapData params) -{ - // compute ray ID - const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; - if (i >= numRays) return; - - // generate data - const auto& self = *((DeviceVolume*)params.volume); - - // random number generator - RandomTEA rng = RandomTEA(params.frame_index, i); - vec2f jitters = rng.get_floats(); - // payload & ray - SampleStreamingPayload payload(i, jitters.x); - const Ray ray = payload.compute_ray(params); - - // intersect with volume bbox & write outputs - float tmin = 0.f, tmax = float_large; - if (intersectVolume(tmin, tmax, ray.org, ray.dir, self)) { - payload.iter = RayMarchingIter(self, ray.org, ray.dir, tmin, tmax); - payload.save(params, new_ray_index(params)); - } - else { - writePixelColor(params, vec4f(ray.color, ray.alpha), payload.pixel_index); - } -} - -__global__ void -iterative_sampling_groundtruth_kernel(uint32_t numRays, const ShadowMapData params) -{ - const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; - if (i >= numRays) return; - - const auto& volume = params.volume->volume; - - const vec3f* __restrict__ inputs = params.inference_input; - float* __restrict__ outputs = params.inference_output; - - const auto p = inputs[i]; - outputs[i] = sampleVolume(volume, p); -} - -void -iterative_sampling_batch_inference(cudaStream_t stream, uint32_t numRays, const ShadowMapData& params, NeuralVolume* network) -{ - network->inference(numRays, (float*)params.inference_input, params.inference_output, stream); -} - -inline bool -iterative_ray_compaction(cudaStream_t stream, uint32_t& count, uint32_t* dptr) -{ - CUDA_CHECK(cudaMemcpyAsync(&count, dptr, sizeof(uint32_t), cudaMemcpyDeviceToHost, stream)); - CUDA_CHECK(cudaStreamSynchronize(stream)); - return count > 0; -} - -template -void iterative_raymarching_loop(cudaStream_t stream, const ShadowMapData& params, NeuralVolume* network, uint32_t numRays) -{ - const uint32_t numCoordsPerSample = N_ITERS; - - CUDA_CHECK(cudaMemsetAsync(params.counter, 0, sizeof(int32_t), stream)); - util::linear_kernel(iterative_raygen_kernel_camera, 0, stream, numRays, params); - - while (iterative_ray_compaction(stream, numRays, params.counter)) { - // Actually, we could have merged the intersection step with raygen and compose. However, there was a wired error - // and I did not figure out irs origin. Also, having the intersection step inside raygen and compose did not bring - // obvious performance benefit, so I left it as it is for now. - util::linear_kernel(iterative_intersect_kernel, 0, stream, numRays, params, N_ITERS); - - if (network) - iterative_sampling_batch_inference(stream, numCoordsPerSample * numRays, params, network); - else - util::linear_kernel(iterative_sampling_groundtruth_kernel, 0, stream, numCoordsPerSample * numRays, params); - - CUDA_CHECK(cudaMemsetAsync(params.counter, 0, sizeof(int32_t), stream)); - util::linear_kernel(iterative_compose_kernel, 0, stream, numRays, params, N_ITERS); - } -} - -void -do_raymarching_iterative(cudaStream_t stream, const ShadowMapData& params, NeuralVolume* network, uint32_t numRays) -{ - if (params.mode == NO_SHADING) - iterative_raymarching_loop(stream, params, network, numRays); - else - iterative_raymarching_loop(stream, params, network, numRays); -} - -INSTANT_VNR_NAMESPACE_END diff --git a/device/method_shadowmap.h b/device/method_shadowmap.h deleted file mode 100644 index b7fdd09..0000000 --- a/device/method_shadowmap.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include "../instantvnr_types.h" -#include "../network.h" - -#include - - -INSTANT_VNR_NAMESPACE_BEGIN - -class MethodShadowMap -{ -public: - enum ShadingMode { NO_SHADING = 0, SHADING }; - - ~MethodShadowMap() { clear(0); } - void render(cudaStream_t stream, const LaunchParams& params, ShadingMode mode, DeviceVolume* volume, NeuralVolume* nvr = nullptr, bool iterative = false); - void clear(cudaStream_t stream) { sample_streaming_buffer.free(stream); } - -private: - CUDABuffer sample_streaming_buffer; -}; - -INSTANT_VNR_NAMESPACE_END From 2aebc10fcf44e857112e226b320d9d883f227919 Mon Sep 17 00:00:00 2001 From: Qi Wu Date: Sun, 19 Apr 2026 09:40:49 -0700 Subject: [PATCH 3/5] Update submodule URL and branch for open-volume-renderer in .gitmodules --- .gitmodules | 3 ++- base | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index f2652c9..20a3b43 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,5 @@ url = https://github.com/wilsonCernWq/tiny-cuda-nn.git [submodule "base"] path = base - url = git@github.com:wilsonCernWq/open-volume-renderer.git + url = https://github.com/VIDILabs/open-volume-renderer.git + branch = reorg diff --git a/base b/base index d66050c..e13dceb 160000 --- a/base +++ b/base @@ -1 +1 @@ -Subproject commit d66050c1e2bcf7755cd70efa86b3efdcf0eaef01 +Subproject commit e13dcebf2b0da6c4771158118a6d921dd8af22b5 From 0c9e1435b2651ca1ff5cf1c530e6ddbdcafeec56 Mon Sep 17 00:00:00 2001 From: Qi Wu Date: Mon, 20 Apr 2026 15:27:19 -0700 Subject: [PATCH 4/5] Add .dockerignore file and update CMake configuration for standalone builds - Introduced a .dockerignore file to exclude unnecessary files from Docker builds. - Updated CMakeLists.txt to support installation as a relocatable package and improved target installation settings. - Enhanced setup_cmake.sh to allow for separate installation steps. - Modified Dockerfile to use a newer CUDA base image and adjusted build commands for better clarity. - Improved README with instructions for using instantvnr as a CMake package in other projects. --- .dockerignore | 24 +++++++++ .github/workflows/docker-image.yml | 46 ++++++++++++---- CMakeLists.txt | 85 ++++++++++++++++++++++++++++-- Dockerfile | 62 ++++++++++------------ README.md | 25 ++++++++- apps/CMakeLists.txt | 23 ++++++++ base | 2 +- core/CMakeLists.txt | 20 +++++-- setup_cmake.sh | 14 ++++- 9 files changed, 246 insertions(+), 55 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b9a6c1b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,24 @@ +.git +**/.git +.github/ +.cursor/ + +build +build/ +build_*/ +base/build/ +cmake-build-*/ +dist/ + +__pycache__/ +*.py[cod] +.pytest_cache/ +.mypy_cache/ +.cache/ +.venv/ +env/ +venv/ + +.vscode/ +.idea/ +imgui.ini diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 4a1bf08..7fb8ae3 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -2,20 +2,46 @@ name: Docker Image CI on: push: - branches: [ "public", "github-actions" ] + branches: [main, master, public, reorg] pull_request: - branches: [ "public", "github-actions" ] + branches: [main, master, public, reorg] -jobs: - - build: +concurrency: + group: docker-${{ github.head_ref || github.ref }} + cancel-in-progress: true +jobs: + docker: + name: Docker (CUDA_ARCH=${{ matrix.cuda_arch }}) runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + cuda_arch: ["86"] steps: - - uses: actions/checkout@v3 - with: - submodules: 'recursive' + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Build Docker image + run: | + docker build . \ + --file Dockerfile \ + --build-arg CUDA_ARCH=${{ matrix.cuda_arch }} \ + --tag instantvnr:ci-${{ github.sha }} - - name: Build the Docker image - run: docker build . --file Dockerfile --tag instantvnr:$(date +%s) + - name: Verify install prefix exists + run: | + docker run --rm instantvnr:ci-${{ github.sha }} \ + ls /instantvnr/install/lib/cmake/instantvnr/instantvnrConfig.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 18f6efb..12fb28c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,8 +5,8 @@ # ======================================================================== # # # Standalone build — OVR is an in-tree submodule, TCNN is built from source -# via FetchContent, and the result is installed as a self-contained -# `instantvnr` Python package directory. +# via FetchContent, and the result can be installed as a relocatable CMake +# package that downstream projects consume via `find_package(instantvnr)`. # if (POLICY CMP0048) @@ -14,11 +14,20 @@ if (POLICY CMP0048) endif() cmake_minimum_required(VERSION 3.24) -project(instantvnr LANGUAGES C CXX CUDA) +project(instantvnr VERSION 0.0.0 LANGUAGES C CXX CUDA) # OVR cmake modules (configure_build_type, configure_cxx, configure_cuda …) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/base/cmake") +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +set(INSTANTVNR_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(INSTANTVNR_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/instantvnr") +set(INSTANTVNR_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/instantvnr") +# Tell the embedded OVR tree where its public headers land in this package. +set(OVR_INSTALL_INCLUDEDIR "${INSTANTVNR_INSTALL_INCLUDEDIR}") + include(configure_build_type) include(configure_cxx) include(configure_cuda) @@ -117,3 +126,73 @@ add_subdirectory(core) # co-installed and available for optional OVR integration. add_subdirectory(device) add_subdirectory(apps) + +add_library(instantvnr::instantvnr ALIAS instantvnr) + +install(TARGETS instantvnr + EXPORT instantvnrTargets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) + +install(TARGETS gdt + EXPORT instantvnrTargets +) + +install(TARGETS tfnmodule + EXPORT instantvnrTargets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) + +# The public `instantvnr` target depends on rendercommon at runtime, so install +# it alongside the main shared library even though it is not part of the +# exported CMake interface. +install(TARGETS rendercommon + EXPORT instantvnrTargets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) + +# Install the header surface required by `api.h`. +install(FILES + "${CMAKE_CURRENT_SOURCE_DIR}/api.h" + DESTINATION "${INSTANTVNR_INSTALL_INCLUDEDIR}" +) +install(FILES + "${CMAKE_CURRENT_SOURCE_DIR}/core/mathdef.h" + DESTINATION "${INSTANTVNR_INSTALL_INCLUDEDIR}/core" +) +install(DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/base/gdt/gdt" + DESTINATION "${INSTANTVNR_INSTALL_INCLUDEDIR}" +) +install(DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/base/extern/json" + DESTINATION "${INSTANTVNR_INSTALL_INCLUDEDIR}" +) + +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/instantvnrConfigVersion.cmake" + VERSION "${PROJECT_VERSION}" + COMPATIBILITY SameMajorVersion +) + +export(EXPORT instantvnrTargets + FILE "${CMAKE_CURRENT_BINARY_DIR}/instantvnrConfig.cmake" + NAMESPACE instantvnr:: +) + +install(EXPORT instantvnrTargets + FILE instantvnrConfig.cmake + NAMESPACE instantvnr:: + DESTINATION "${INSTANTVNR_INSTALL_CMAKEDIR}" +) + +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/instantvnrConfigVersion.cmake" + DESTINATION "${INSTANTVNR_INSTALL_CMAKEDIR}" +) diff --git a/Dockerfile b/Dockerfile index 5c71a5f..71d641b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,15 @@ -# Example Command to Run -# docker build -t instantvnr . +# Example commands: +# docker build --build-arg CUDA_ARCH=86 -t instantvnr . # xhost +si:localuser:root -# docker run --runtime=nvidia -ti --rm -e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix -w /instantvnr/build instantvnr +# docker run --gpus all -ti --rm -e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix -w /instantvnr/build instantvnr -FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 +FROM nvidia/cuda:12.8.1-devel-ubuntu24.04 # Select a CUDA architecture to build. Currently we do not support multi-arch builds. -ARG CUDA_ARCH=70 +ARG CUDA_ARCH=90 ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update -RUN apt-get install -y --no-install-recommends \ +RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential mesa-utils pkg-config \ libglx0 libglvnd0 libglvnd-dev \ libgl1 libgl1-mesa-dev \ @@ -18,37 +17,32 @@ RUN apt-get install -y --no-install-recommends \ libgles2 libgles2-mesa-dev \ libxrandr-dev libxinerama-dev libxcursor-dev libxi-dev libssl-dev \ libaio-dev \ - wget git ninja-build imagemagick -# RUN rm -rf /var/lib/apt/lists/* + wget git ninja-build imagemagick ca-certificates \ + && rm -rf /var/lib/apt/lists/* -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility,graphics +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics ADD https://raw.githubusercontent.com/NVlabs/nvdiffrec/main/docker/10_nvidia.json \ /usr/share/glvnd/egl_vendor.d/10_nvidia.json -# Install cmake -RUN wget -qO- "https://cmake.org/files/v3.23/cmake-3.23.2-linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local +# Install CMake (3.24+ required by the standalone build). +RUN wget -qO- "https://cmake.org/files/v3.28/cmake-3.28.3-linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local -# Install tbb +# Install TBB RUN wget -qO- "https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/oneapi-tbb-2021.9.0-lin.tgz" | tar --strip-components=1 -xz -C /usr/local -# Create a superbuild -RUN git clone --recursive https://github.com/VIDILabs/open-volume-renderer.git /instantvnr/ovr -# RUN git clone --recursive https://github.com/VIDILabs/instantvnr.git /instantvnr/source -COPY . /instantvnr/source -RUN ln -s /instantvnr/source /instantvnr/ovr/projects/instantvnr - -# Config and build -RUN mkdir -p /instantvnr/build -RUN cmake -S /instantvnr/ovr -B/instantvnr/build -GNinja \ - -DOptiX_INSTALL_DIR=/instantvnr/ovr/github-actions/optix-cmake-github-actions/NVIDIA-OptiX-SDK-7.3.0-linux64-x86_64 \ - -DGDT_CUDA_ARCHITECTURES=${CUDA_ARCH} \ - -DOVR_BUILD_MODULE_NNVOLUME=ON \ - -DOVR_BUILD_DEVICE_OSPRAY=OFF \ - -DOVR_BUILD_DEVICE_OPTIX7=ON -RUN cmake --build /instantvnr/build --config Release --parallel 16 - -RUN ln -s /instantvnr/ovr/data /instantvnr/build/data -RUN cp /instantvnr/source/example-model.json /instantvnr/build/example-model.json - -WORKDIR [ '/instantvnr/build' ] +WORKDIR /instantvnr +COPY . /instantvnr + +# Configure and build the standalone project directly from this repository. +RUN SM=${CUDA_ARCH} BUILD_DIR=/instantvnr/build bash ./setup_cmake.sh + +RUN BUILD_DIR=/instantvnr/build INSTALL_PREFIX=/instantvnr/install \ + bash ./setup_cmake.sh --install + +RUN ln -s /instantvnr/data /instantvnr/build/data \ + && cp /instantvnr/example-model.json /instantvnr/build/example-model.json + +ENV CMAKE_PREFIX_PATH=/instantvnr/install + +WORKDIR /instantvnr/build diff --git a/README.md b/README.md index 51d151a..e7f9ebb 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ cd .. # Build mkdir build cd build -cmake .. -DGDT_CUDA_ARCHITECTURES=86 -DOVR_BUILD_MODULE_NNVOLUME=ON -DOVR_BUILD_DEVICE_OSPRAY=OFF -DOVR_BUILD_DEVICE_OPTIX7=OFF +cmake .. -DCMAKE_CUDA_ARCHITECTURES=86 -DOVR_BUILD_MODULE_NNVOLUME=ON -DOVR_BUILD_DEVICE_OSPRAY=OFF -DOVR_BUILD_DEVICE_OPTIX7=OFF cmake --build . --config Release --parallel 16 # In the binary output directory, setup symbolic links to the data folder @@ -72,6 +72,29 @@ docker run --gpus device=0 --runtime=nvidia -ti --rm -e DISPLAY -v /tmp/.X11-un You can also directly execute apps through the docker container +#### Using from Another CMake Project + +You can also install `instantvnr` as a CMake package and consume it from +another repository: + +```bash +cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=86 +cmake --build build --parallel +cmake --install build --prefix /path/to/instantvnr-install +``` + +Then in the downstream project's `CMakeLists.txt`: + +```cmake +find_package(instantvnr CONFIG REQUIRED) + +add_executable(my_app main.cpp) +target_link_libraries(my_app PRIVATE instantvnr::instantvnr) +``` + +The exported target preserves the current public include layout, so downstream +code can continue to include headers such as `#include `. + ### Citation ```bibtex diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index b5d1e33..44e3b6a 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -14,6 +14,22 @@ if(NOT target) set(target instantvnr) endif() +set(_instantvnr_app_common_include_dirs + "${CMAKE_CURRENT_LIST_DIR}/../base/ovr/common" + "${PROJECT_BINARY_DIR}/base/ovr/common" +) + +function(ivnr_configure_app app_target) + target_link_libraries(${app_target} PRIVATE rendercommon) + target_include_directories(${app_target} PRIVATE ${_instantvnr_app_common_include_dirs}) + target_compile_options(${app_target} PRIVATE + $<$:${CUDA_NVCC_FLAGS}> + ) + target_link_options(${app_target} PRIVATE + "LINKER:--allow-shlib-undefined" + ) +endfunction() + # ======================================================================== # # applications # ======================================================================== # @@ -21,26 +37,33 @@ endif() if(ENABLE_OPENGL) add_executable(vnr_int_dual int_dual_volume.cpp) target_link_libraries(vnr_int_dual PRIVATE ${target} ${GFX_LIBRARIES}) + ivnr_configure_app(vnr_int_dual) add_executable(vnr_int_single int_volume.cpp) target_link_libraries(vnr_int_single PRIVATE ${target} ${GFX_LIBRARIES}) + ivnr_configure_app(vnr_int_single) endif() add_executable(vnr_cmd_train batch_trainer.cpp) target_link_libraries(vnr_cmd_train PRIVATE ${target}) +ivnr_configure_app(vnr_cmd_train) add_executable(vnr_cmd_render batch_renderer.cpp) target_link_libraries(vnr_cmd_render PRIVATE ${target}) +ivnr_configure_app(vnr_cmd_render) if(ENABLE_IN_SHADER) add_executable(vnr_cmd_isosurface batch_isosurface.cpp) target_link_libraries(vnr_cmd_isosurface PRIVATE ${target}) + ivnr_configure_app(vnr_cmd_isosurface) endif() add_executable(view_model view_model.cpp) target_link_libraries(view_model PRIVATE ${target}) +ivnr_configure_app(view_model) if(ENABLE_IN_SHADER) add_executable(vnr_int_isosurface int_isosurface.cu) target_link_libraries(vnr_int_isosurface PUBLIC renderlib ${target} ${GFX_LIBRARIES}) + ivnr_configure_app(vnr_int_isosurface) endif() diff --git a/base b/base index e13dceb..0920a83 160000 --- a/base +++ b/base @@ -1 +1 @@ -Subproject commit e13dcebf2b0da6c4771158118a6d921dd8af22b5 +Subproject commit 0920a83f1a242a71c5da9fa92d3c6c898dc9e39e diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 581dba8..13b3782 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -79,7 +79,7 @@ if(ENABLE_IN_SHADER) networks/tcnn_device_network.cu marching_cube.cu ) - target_compile_definitions(${target} PUBLIC ENABLE_IN_SHADER=1) + target_compile_definitions(${target} PRIVATE ENABLE_IN_SHADER=1) endif() if(ENABLE_OUT_OF_CORE) @@ -105,14 +105,24 @@ PROPERTIES ) target_include_directories(${target} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}) -target_include_directories(${target} PUBLIC $) -target_include_directories(${target} PUBLIC $) +target_include_directories(${target} PUBLIC + $ + $ + $ + $ +) target_compile_options(${target} PRIVATE $<$:-Xcudafe="--diag_suppress=177">) # variable declared but not referenced target_compile_options(${target} PRIVATE $<$:-Xcudafe="--diag_suppress=20044">) # extern variable treated as static -target_link_libraries(${target} PRIVATE TBB::tbb curand tiny-cuda-nn) -target_link_libraries(${target} PUBLIC gdt util tfnmodule) +target_link_libraries(${target} PRIVATE + $ + $ + $ + $ + rendercommon + $ +) # GLIBCXX ABI compile definition — must match the installed PyTorch target_compile_definitions(${target} PUBLIC ${IVNR_GLIBCXX_CXX11_ABI}) diff --git a/setup_cmake.sh b/setup_cmake.sh index 2298129..ad5534e 100644 --- a/setup_cmake.sh +++ b/setup_cmake.sh @@ -8,6 +8,8 @@ # BUILD_DIR=build ./setup_cmake.sh # custom build directory # ./setup_cmake.sh --configure # configure only (skip build) # ./setup_cmake.sh --build # build only (skip configure) +# ./setup_cmake.sh --install # install only (skip configure and build) +# INSTALL_PREFIX=/opt/instantvnr ./setup_cmake.sh --install # # Requires: # - CUDA toolkit (nvcc in PATH or /usr/local/cuda) @@ -19,10 +21,12 @@ BUILD_DIR="${BUILD_DIR:-${SCRIPT_DIR}/build}" DO_CONFIGURE=true DO_BUILD=true +DO_INSTALL=false for arg in "$@"; do case "$arg" in --configure) DO_BUILD=false ;; --build) DO_CONFIGURE=false ;; + --install) DO_INSTALL=true; DO_CONFIGURE=false; DO_BUILD=false ;; esac done @@ -62,5 +66,13 @@ if [[ "$DO_BUILD" == true ]]; then JOBS="${JOBS:-$(nproc)}" echo "[info] Building with $JOBS parallel jobs" cmake --build "$BUILD_DIR" --config Release -- -j"$JOBS" - echo "[info] Build complete. Outputs in $BUILD_DIR/instantvnr/" + echo "[info] Build complete. Outputs in $BUILD_DIR/bin/" +fi + +# ── install ─────────────────────────────────────────────────────────────────── +if [[ "$DO_INSTALL" == true ]]; then + INSTALL_PREFIX="${INSTALL_PREFIX:-${SCRIPT_DIR}/install}" + echo "[info] Installing to $INSTALL_PREFIX" + cmake --install "$BUILD_DIR" --prefix "$INSTALL_PREFIX" + echo "[info] Install complete." fi From 059a170126ad9b31455bdf6b046bdaf80200581a Mon Sep 17 00:00:00 2001 From: Qi Wu Date: Tue, 21 Apr 2026 15:44:55 -0700 Subject: [PATCH 5/5] Update submodule commit for base project --- base | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base b/base index 0920a83..ff40f12 160000 --- a/base +++ b/base @@ -1 +1 @@ -Subproject commit 0920a83f1a242a71c5da9fa92d3c6c898dc9e39e +Subproject commit ff40f1267b9cbfa2c2338899910fce07538d20e9