diff --git a/CMakeLists.txt b/CMakeLists.txt index 84c9a99dc4..2ffac18cd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,6 +176,7 @@ option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) +option(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING "Enable Nabla Envmap Importance Sampling extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } diff --git a/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl new file mode 100644 index 0000000000..09abd08615 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl @@ -0,0 +1,61 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +// start concept +NBL_CONCEPT_BEGIN(3) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(coord,level)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template gather(coord,level)) , ::nbl::hlsl::is_same_v, float32_t4)) +); +#undef level +#undef coord +#undef a +#include + +// declare concept +#define NBL_CONCEPT_NAME HierarchicalSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT) +#define NBL_CONCEPT_PARAM_1 (coord,vector) +// start concept +NBL_CONCEPT_BEGIN(2) +// need to be defined AFTER the concept begins +#define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.template sampleUvs(coord)) , ::nbl::hlsl::is_same_v, matrix)) +); +#undef sampler +#undef coord +#include + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..82637a42f8 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,166 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template && hierarchical_image::LuminanceReadAccessor) +struct LuminanceMapSampler +{ + using scalar_type = T; + using vector2_type = vector; + using vector4_type = vector; + + LuminanceAccessorT _map; + uint32_t2 _mapSize; + uint32_t2 _lastWarpPixel; + bool _aspect2x1; + + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) + { + LuminanceMapSampler result; + result._map = lumaMap; + result._mapSize = mapSize; + result._lastWarpPixel = warpSize - uint32_t2(1, 1); + result._aspect2x1 = aspect2x1; + return result; + } + + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(float32_t) xi) + { + // numerical resilience against IEEE754 + scalar_type dummy = 0.0f; + PartitionRandVariable partition; + partition.leftProb = 1.0f / (1.0f + second/ first); + return partition(xi, dummy); + } + + vector2_type binarySearch(const uint32_t2 coord) + { + float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; + uint32_t2 p = uint32_t2(0, 0); + const uint32_t2 mip2x1 = findMSB(_mapSize.x) - 1; + + if (_aspect2x1) { + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = choseSecond(_map.get(uint32_t2(0, 0), mip2x1), _map.get(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const vector4_type values = _map.gather(p, i); + scalar_type wx_0, wx_1; + { + const scalar_type wy_0 = values[3] + values[2]; + const scalar_type wy_1 = values[1] + values[0]; + if (choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); + return directionUV; + } + + matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC + { + const vector2_type dir0 = binarySearch(sampleCoord + vector2_type(0, 1)); + const vector2_type dir1 = binarySearch(sampleCoord + vector2_type(1, 1)); + const vector2_type dir2 = binarySearch(sampleCoord + vector2_type(1, 0)); + const vector2_type dir3 = binarySearch(sampleCoord); + return matrix( + dir0, + dir1, + dir2, + dir3 + ); + } +}; + +template && hierarchical_image::HierarchicalSampler && concepts::Warp) +struct HierarchicalImage +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + HierarchicalSamplerT sampler; + uint32_t2 warpSize; + uint32_t2 lastWarpPixel; + + static HierarchicalImage create(NBL_CONST_REF_ARG(HierarchicalSamplerT) sampler, uint32_t2 warpSize) + { + HierarchicalImage result; + result.sampler = sampler; + result.warpSize = warpSize; + result.lastWarpPixel = warpSize - uint32_t2(1, 1); + return result; + } + + + uint32_t2 generate(NBL_REF_ARG(scalar_type) rcpPdf, vector2_type xi) NBL_CONST_MEMBER_FUNC + { + const vector2_type texelCoord = xi * lastWarpPixel; + const vector2_type sampleCoord = (texelCoord + vector2_type(0.5f, 0.5f)) / vector2_type(warpSize.x, warpSize.y); + + matrix uvs = sampler.sampleUvs(sampleCoord); + + const vector2_type interpolant = frac(texelCoord); + + const vector2_type xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const vector2_type yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const vector2_type yDiff = yVals[1] - yVals[0]; + const vector2_type uv = yDiff * interpolant.y + yVals[0]; + + const WarpResult warpResult = PostWarpT::warp(uv); + + const scalar_type detInterpolJacobian = determinant(matrix( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + rcpPdf = abs((detInterpolJacobian * scalar_type(lastWarpPixel.x * lastWarpPixel.y)) / warpResult.density); + + return warpResult.dst; + } +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl new file mode 100644 index 0000000000..b1c1fcb5b2 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ + + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct WarpResult +{ + CodomainT dst; + float32_t density; +}; +} + +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME Warp +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warper,U) +#define NBL_CONCEPT_PARAM_1 (xi,typename U::domain_type) +#define NBL_CONCEPT_PARAM_2 (dst,typename U::codomain_type) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warper NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define xi NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) +); +#undef dst +#undef xi +#undef warper +#include + +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl new file mode 100644 index 0000000000..48237c7e2a --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -0,0 +1,73 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace warp +{ + +struct Spherical +{ + using domain_type = float32_t2; + using codomain_type = float32_t3; + + template ) + static WarpResult warp(const DomainT uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = 1 / (sinTheta * numbers::pi * numbers::pi); + return warpResult; + } + + template ) + static domain_type inverseWarp(const CodomainT v) + { + float32_t2 uv = float32_t2(atan(v.y, v.x), acos(v.z)); + uv.x *= (numbers::inv_pi * 0.5); + if (v.y < 0.0f) + uv.x += 1.0f; + uv.y *= numbers::inv_pi; + return uv; + } + + + template ) + static float32_t forwardDensity(const DomainT uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * numbers::pi * numbers::pi); + + } + + template ) + static float32_t backwardDensity(const CodomainT dst) + { + return 1.0f / (sqrt(1.0f - dst.z * dst.z) * 2 * numbers::pi * numbers::pi); + } +}; + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h new file mode 100644 index 0000000000..039874202d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h @@ -0,0 +1,79 @@ +#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +#include "nbl/asset/IPipelineLayout.h" +#include "nbl/video/declarations.h" + +namespace nbl::ext::envmap_importance_sampling +{ + +class EnvmapImportanceSampling +{ + public: + + struct SCachedCreationParameters + { + // using streaming_buffer_t = video::StreamingTransientDataBufferST>; + // + // static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + // static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + // + // DrawMode drawMode = ADM_DRAW_BOTH; + + core::smart_refctd_ptr utilities; + + //! optional, default MDI buffer allocated if not provided + // core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : public SCachedCreationParameters + { + video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create + core::smart_refctd_ptr assetManager = nullptr; + + core::smart_refctd_ptr genLumaPipelineLayout = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); + + return true; + } + + }; + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device, const core::smart_refctd_ptr* sampler); + + static core::smart_refctd_ptr createMeasureLumaPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createGenWarpMapPipelineLayout(video::ILogicalDevice* device); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + + static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createMeasureLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + private: + core::smart_refctd_ptr m_lumaGenPipeline; + +}; + +} +#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl new file mode 100644 index 0000000000..e0240909f0 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl @@ -0,0 +1,49 @@ +#ifndef _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ +#define _NBL_HLSL_EXT_ENVMAP_IMPORTANCE_SAMPLING_PARAMETERS_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace envmap_importance_sampling +{ + +struct SLumaGenPushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; +}; + +struct SLumaMeasurePushConstants +{ + float32_t4 luminanceScales; + uint32_t2 lumaMapResolution; + uint64_t lumaMeasurementBuf; +}; + +struct SLumaMeasurement +{ + float32_t3 weightedDir; + float32_t luma; + float32_t maxLuma; +}; + +struct device_capabilities +{ +#ifdef TEST_NATIVE + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; +#else + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; +#endif +}; + +} +} +} +} + +#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl new file mode 100644 index 0000000000..e701f0b00d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_luma.comp.hlsl @@ -0,0 +1,30 @@ +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +[[vk::push_constant]] SLumaGenPushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] Texture2D envMap; +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] SamplerState envMapSampler; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +// TODO(kevinyu): Temporary to make nsc compiles +#define LUMA_MAP_GEN_WORKGROUP_DIM 16 + +[numthreads(LUMA_MAP_GEN_WORKGROUP_DIM, LUMA_MAP_GEN_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (all(threadID < pc.lumaMapResolution)) + { + + const float32_t2 uv = (float32_t2(threadID.xy) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); + const float32_t3 envMapSample = envMap.Sample(envMapSampler, uv).rgb; + const float32_t luma = hlsl::dot(float32_t4(envMapSample, 1.0f), pc.luminanceScales) * sin(numbers::pi * uv.y); + + outImage[threadID.xy] = luma; + } +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl new file mode 100644 index 0000000000..063dfaf9b9 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/gen_warpmap.comp.hlsl @@ -0,0 +1,51 @@ +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" + + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +// TODO(kevinyu): Temporary to make nsc compiles +#define WARPMAP_GEN_WORKGROUP_DIM 16 + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling; + +struct LuminanceAccessor +{ + float32_t get(uint32_t2 coord, uint32_t level) + { + return lumaMap.Load(uint32_t3(coord, level)); + } + + float32_t4 gather(uint32_t2 coord, uint32_t level) + { + return float32_t4( + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + + } +}; + +[numthreads(WARPMAP_GEN_WORKGROUP_DIM, WARPMAP_GEN_WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + LuminanceAccessor luminanceAccessor; + uint32_t lumaMapWidth, lumaMapHeight; + + using LuminanceSampler = LuminanceMapSampler; + + LuminanceSampler luminanceSampler = + LuminanceSampler::create(luminanceAccessor, lumaMapWidth, lumaMapHeight, lumaMapWidth != lumaMapHeight); + + uint32_t2 pixelCoord = threadID.xy; + + outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + +} diff --git a/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl new file mode 100644 index 0000000000..845d12632d --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/builtin/hlsl/measure_luma.comp.hlsl @@ -0,0 +1,143 @@ +#include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" + +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +// TODO(kevinyu): Temporary to make nsc works +using config_t = nbl::hlsl::workgroup2::ArithmeticConfiguration<4, 4, 2>; + +[[vk::push_constant]] SLumaMeasurePushConstants pc; + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +// final (level 1/2) scan needs to fit in one subgroup exactly +groupshared float32_t scratch[mpl::max_v]; + +struct PreloadedUnitData +{ + float32_t3 weightedDir; + float32_t luma; +}; + +struct ScratchProxy +{ + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = scratch[ix]; + } + + template + void set(const uint32_t ix, const AccessType value) + { + scratch[ix] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct PreloadedData +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1u) << config_t::WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t PreloadedDataCount = config_t::VirtualWorkgroupSize / WorkgroupSize; + + PreloadedUnitData getData(const uint32_t ix) + { + PreloadedUnitData value; + const int32_t2 pixelCoord = int32_t2(ix % pc.lumaMapResolution.x, ix / pc.lumaMapResolution.x); + const float32_t2 uv = (float32_t2(pixelCoord) + float32_t2(0.5, 0.5)) / float32_t2(pc.lumaMapResolution); + const float32_t luma = lumaMap.Load(int32_t3(pixelCoord, 0)); + value.weightedDir = sampling::warp::Spherical::warp(uv).dst * luma; + value.luma = luma; + return value; + } + + void preload() + { + const uint16_t invocationIndex = hlsl::workgroup::SubgroupContiguousIndex(); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + data[idx] = getData(idx * WorkgroupSize + invocationIndex); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } + + PreloadedUnitData data[config_t::ItemsPerInvocation_0]; +}; + +static PreloadedData preloadData; + +struct DirXAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.x; + } +}; + +struct DirYAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.y; + } +}; + +struct DirZAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].weightedDir.z; + } +}; + +struct LumaAccessor +{ + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloadData.data[ix >> config_t::WorkgroupSizeLog2].luma; + } +}; + +[numthreads(config_t::WorkgroupSize, 1, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + ScratchProxy scratchAccessor; + + preloadData.preload(); + preloadData.workgroupExecutionAndMemoryBarrier(); + + SLumaMeasurement measurement; + + DirXAccessor dirXAccessor; + measurement.weightedDir.x= workgroup2::reduction, device_capabilities>::template __call(dirXAccessor, scratchAccessor); + + DirYAccessor dirYAccessor; + measurement.weightedDir.y = workgroup2::reduction, device_capabilities>::template __call(dirYAccessor, scratchAccessor); + + DirZAccessor dirZAccessor; + measurement.weightedDir.z = workgroup2::reduction, device_capabilities>::template __call(dirZAccessor, scratchAccessor); + + LumaAccessor lumaAccessor; + measurement.luma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + measurement.maxLuma = workgroup2::reduction, device_capabilities>::template __call(lumaAccessor, scratchAccessor); + + if (all(threadID == uint32_t3(0, 0, 0))) + vk::RawBufferStore(pc.lumaMeasurementBuf, measurement); +} diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 085ed3c923..050907b3a3 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -339,6 +339,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup2/shared_scan.hlsl") #Extensions LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/default.vert.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/structs.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/gen_luma.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/EnvmapImportanceSampling/measure_luma.comp.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/text_rendering/msdf.hlsl") #memory LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index af46b29aab..221c1fe88e 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -66,6 +66,18 @@ if(NBL_BUILD_DEBUG_DRAW) ) endif() +if(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING) + add_subdirectory(EnvmapImportanceSampling) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDE_DIRS} + PARENT_SCOPE + ) + set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB + ${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_LIB} + PARENT_SCOPE + ) +endif() + propagate_changed_variables_to_parent_scope() NBL_ADJUST_FOLDERS(ext) \ No newline at end of file diff --git a/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp new file mode 100644 index 0000000000..a4517123b9 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.cpp @@ -0,0 +1,172 @@ +#include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" +#include "nbl/ext/EnvmapImportanceSampling/builtin/hlsl/common.hlsl" + +using namespace nbl::hlsl::ext::envmap_importance_sampling; + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/ext/debug_draw/builtin/build/CArchive.h" +#endif + +#include "nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; + +namespace nbl::ext::envmap_importance_sampling +{ + +constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/EnvmapImportanceSampling"; + +const smart_refctd_ptr EnvmapImportanceSampling::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + // extension should mount everything for you, regardless if content goes from virtual filesystem + // or disk directly - and you should never rely on application framework to expose extension data + #ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + #else + auto archive = make_smart_refctd_ptr(std::string_view(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); + #endif + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + auto getShader = [&](const core::string& key)->smart_refctd_ptr { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = params.utilities->getLogger(); + lp.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = params.assetManager->getAsset(key.c_str(), lp); + + const auto contents = bundle.getContents(); + + if (contents.empty()) + { + logger.log("Failed to load shader %s from disk", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } + + if (bundle.getAssetType() != IAsset::ET_SHADER) + { + logger.log("Loaded asset has wrong type!", ILogger::ELL_ERROR); + return nullptr; + } + + return IAsset::castDown(contents[0]); + }; + + const auto key = nbl::ext::envmap_importance_sampling::builtin::build::get_spirv_key<"gen_luma">(device); + smart_refctd_ptr genLumaShader = getShader(key); + if (!genLumaShader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = genLumaShader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + + +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapImportanceSampling::createGenLumaPipelineLayout(video::ILogicalDevice* device, const smart_refctd_ptr* sampler) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = sampler + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); + +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createMeasureLumaPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaMeasurePushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout, nullptr, nullptr, nullptr); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createGenWarpMapPipelineLayout(video::ILogicalDevice* device) +{ + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); +} +} diff --git a/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt new file mode 100644 index 0000000000..fabd4b8b50 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/CMakeLists.txt @@ -0,0 +1,83 @@ +include(${NBL_ROOT_PATH}/cmake/common.cmake) + +set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include") + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h +) + +set(NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/CEnvmapImportanceSampling.cpp" +) + +nbl_create_ext_library_project( + ENVMAP_IMPORTANCE_SAMPLING + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_H}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_SRC}" + "${NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_EXTERNAL_INCLUDE}" + "" + "" +) + +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) + +set(NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/EnvmapImportanceSampling/builtin/hlsl") +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/common.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl + ${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl +) +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_warpmap.comp.hlsl", + "KEY": "gen_warpmap", + }, + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/gen_luma.comp.hlsl", + "KEY": "gen_luma", + }, + { + "INPUT": "${NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT}/measure_luma.comp.hlsl", + "KEY": "measure_luma", + } + +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to envmap importance sampling ext common header which is not part of Nabla builtin archive + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${LIB_NAME}SPIRV + LINK_TO ${LIB_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_ENVMAP_IMPORTANCE_SAMPLING_HLSL_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/ext/EnvmapImportanceSampling/builtin/build/spirv/keys.hpp + NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::ext::envmap_importance_sampling::builtin::build + TARGET ${LIB_NAME}_builtinsBuild + LINK_TO ${LIB_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + + +add_library(Nabla::ext::EnvmapImportanceSampling ALIAS ${LIB_NAME})