Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
09f16c2
minor fixes, example
keptsecret Apr 28, 2025
6f5f8b0
bug fixes and example
keptsecret Apr 28, 2025
1bac247
fix to data accessor indexing
keptsecret Apr 29, 2025
305ac7b
added template spec for vector dim 1
keptsecret Apr 29, 2025
c08063d
added inclusive scan
keptsecret Apr 29, 2025
b1d804f
exclusive scan working
keptsecret Apr 30, 2025
3cf98ab
removed outdated comment
keptsecret Apr 30, 2025
7b310e0
minor changes to config usage
keptsecret May 1, 2025
4b4e7e8
add 1 level scans
keptsecret May 1, 2025
2e5f29f
fixes to 1 level scans
keptsecret May 2, 2025
054b269
added handling >1 vectors on level 1 scan (untested)
keptsecret May 2, 2025
1b5282c
move load/store smem into scan funcs, setup config for 3 levels
keptsecret May 5, 2025
c6dc5bc
change to use coalesced indexing for 2-level scans
keptsecret May 6, 2025
aa0c36c
added 3-level scans
keptsecret May 6, 2025
74c359b
minor bug fixes
keptsecret May 6, 2025
ce244e2
changes to data accessor usage
keptsecret May 7, 2025
90b19d8
wg reduction uses reduce instead of scan
keptsecret May 8, 2025
d2a1663
fixes to calculating levels in config
keptsecret May 9, 2025
ea39d9e
fixes to 3-level scan
keptsecret May 12, 2025
2982e5e
Merge branch 'master' into improve-workgroup-scan-2
keptsecret May 13, 2025
1c0e72e
split config into new file
keptsecret May 14, 2025
59d02fe
merge master
keptsecret May 15, 2025
507904f
minor fixes
keptsecret May 15, 2025
542592f
soome changes to arithmetic config
keptsecret May 15, 2025
a9930a0
removed referencing workgroupID in scans
keptsecret May 15, 2025
55d89c5
no need to store locals in reduce
keptsecret May 16, 2025
4e4f26e
added workgroup accessor concepts, refactor accessor usage
keptsecret May 16, 2025
56f013e
Merge branch 'master' into improve-workgroup-scan-2
keptsecret May 19, 2025
004c95a
fixed minor bug
keptsecret May 20, 2025
ccacddb
store temporaries with data accessor
keptsecret May 20, 2025
9c59677
minor fixes
keptsecret May 21, 2025
eb44262
moved indexing functionality to config struct
keptsecret May 21, 2025
573ce44
reduction returns value instead of saving directly to storage
keptsecret May 21, 2025
49ca655
fixes to 2-level scan indexing
keptsecret May 21, 2025
a639145
fixes to 3-level scan and minor stuff
keptsecret May 22, 2025
7751359
some minor fixes
keptsecret May 22, 2025
fd6f527
latest example
keptsecret May 22, 2025
27d84c8
merge master, fix conflicts
keptsecret May 26, 2025
350c6a3
more util funcs in config, fix some calculations
keptsecret May 27, 2025
14e5d15
added generic data/shared mem accessors
keptsecret May 27, 2025
f07329e
fix include guard
keptsecret May 27, 2025
48a7d16
changes to arithmetic accessor concepts
keptsecret May 27, 2025
20a54be
concept macro for checking types
keptsecret May 27, 2025
d83ac5c
revert concept macro addition
keptsecret May 27, 2025
00787bf
added generic read/write accessors
keptsecret May 27, 2025
c0dfc1e
more refactor for accessor concept changes
keptsecret May 27, 2025
55840a3
don't pass scalar_t as index type
keptsecret May 27, 2025
d758ff7
refactor accessor to match accessor template
keptsecret May 27, 2025
b062ede
simplified indexing functions
keptsecret May 27, 2025
472aa0b
more fixes to indexing
keptsecret May 28, 2025
c483941
share level 0 scan between 2-level and 3-level scans (and reduce)
keptsecret May 28, 2025
951ff99
reduce duplicate vars in config
keptsecret May 28, 2025
127c6d9
some fixes to indexing
keptsecret May 29, 2025
90d3579
fix scans for level 1+
keptsecret May 30, 2025
203c03a
some indexing fixes for 3-level reduce/scan
keptsecret May 30, 2025
0b16307
fix 3-level scan downsweep step
keptsecret May 30, 2025
83991b9
added tuple.hlsl
keptsecret Jun 2, 2025
209adb4
added some comments to config funcs for future debugging
keptsecret Jun 2, 2025
0a5dc30
merge master, fix example conflict
keptsecret Jun 2, 2025
f82b405
Merge branch 'master' into improve-workgroup-scan-2
keptsecret Jun 3, 2025
7d77d30
change indexing to uint16_t
keptsecret Jun 3, 2025
7b15a54
do inclusive scan on upsweep and shift left on downsweep
keptsecret Jun 3, 2025
37aa99b
some adjustments to config and func usages
keptsecret Jun 3, 2025
da6c313
split out level 0 scans into its own struct
keptsecret Jun 4, 2025
e230d06
fixes to 3 level scan
keptsecret Jun 4, 2025
3da175d
padding to shared mem indexing to avoid bank conflict
keptsecret Jun 5, 2025
32732e7
fix padding bugs
keptsecret Jun 5, 2025
7a2065a
update to latest example
keptsecret Jun 9, 2025
3a90fa8
Merge branch 'master' into improve-workgroup-scan-2
keptsecret Jun 9, 2025
ce77b46
uncomment some concept requires
keptsecret Jun 9, 2025
fc1bc51
removed redundant stuff, make config more readable
keptsecret Jun 13, 2025
10b7f50
fix some bugs, readability fix
keptsecret Jun 13, 2025
437c194
use x-macros for config compat between hlsl and cpp
keptsecret Jun 16, 2025
029cfeb
improved readability for config, include all new files
keptsecret Jun 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples_tests
201 changes: 148 additions & 53 deletions include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "nbl/builtin/hlsl/cpp_compat.hlsl"
#include "nbl/builtin/hlsl/tuple.hlsl"
#include "nbl/builtin/hlsl/mpl.hlsl"

namespace nbl
{
Expand All @@ -19,23 +20,37 @@ namespace impl
template<uint16_t _WorkgroupSizeLog2, uint16_t _SubgroupSizeLog2>
struct virtual_wg_size_log2
{
NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSizeLog2 = _WorkgroupSizeLog2;
NBL_CONSTEXPR_STATIC_INLINE uint16_t SubgroupSizeLog2 = _SubgroupSizeLog2;
#define DEFINE_ASSIGN(TYPE,ID,...) NBL_CONSTEXPR_STATIC_INLINE TYPE ID = __VA_ARGS__;
#define DEFINE_VIRTUAL_WG_T(ID) ID
#define DEFINE_MPL_MAX_V(TYPE,ARG1,ARG2) mpl::max_v<TYPE, ARG1, ARG2>
#define DEFINE_COND_VAL(TYPE,COND,TRUE_VAL,FALSE_VAL) conditional_value<COND,TYPE,TRUE_VAL,FALSE_VAL>::value
#include "impl/virtual_wg_size_def.hlsl"
#undef DEFINE_COND_VAL
#undef DEFINE_MPL_MAX_V
#undef DEFINE_VIRTUAL_WG_T
#undef DEFINE_ASSIGN

// must have at least enough level 0 outputs to feed a single subgroup
static_assert(WorkgroupSizeLog2>=SubgroupSizeLog2, "WorkgroupSize cannot be smaller than SubgroupSize");
static_assert(WorkgroupSizeLog2<=SubgroupSizeLog2*3+4, "WorkgroupSize cannot be larger than (SubgroupSize^3)*16");

NBL_CONSTEXPR_STATIC_INLINE uint16_t levels = conditional_value<(WorkgroupSizeLog2>SubgroupSizeLog2),uint16_t,conditional_value<(WorkgroupSizeLog2>SubgroupSizeLog2*2+2),uint16_t,3,2>::value,1>::value;
NBL_CONSTEXPR_STATIC_INLINE uint16_t value = mpl::max_v<uint32_t, SubgroupSizeLog2*levels, WorkgroupSizeLog2>;
// must have at least enough level 0 outputs to feed a single subgroup
};

template<class VirtualWorkgroup, uint16_t BaseItemsPerInvocation>
struct items_per_invocation
{
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocationProductLog2 = mpl::max_v<int16_t,VirtualWorkgroup::WorkgroupSizeLog2-VirtualWorkgroup::SubgroupSizeLog2*VirtualWorkgroup::levels,0>;
NBL_CONSTEXPR_STATIC_INLINE uint16_t value0 = BaseItemsPerInvocation;
NBL_CONSTEXPR_STATIC_INLINE uint16_t value1 = uint16_t(0x1u) << conditional_value<VirtualWorkgroup::levels==3, uint16_t,mpl::min_v<uint16_t,ItemsPerInvocationProductLog2,2>, ItemsPerInvocationProductLog2>::value;
NBL_CONSTEXPR_STATIC_INLINE uint16_t value2 = uint16_t(0x1u) << mpl::max_v<int16_t,ItemsPerInvocationProductLog2-2,0>;
#define DEFINE_ASSIGN(TYPE,ID,...) NBL_CONSTEXPR_STATIC_INLINE TYPE ID = __VA_ARGS__;
#define DEFINE_VIRTUAL_WG_T(ID) VirtualWorkgroup::ID
#define DEFINE_ITEMS_INVOC_T(ID) ID
#define DEFINE_MPL_MIN_V(TYPE,ARG1,ARG2) mpl::min_v<TYPE, ARG1, ARG2>
#define DEFINE_MPL_MAX_V(TYPE,ARG1,ARG2) mpl::max_v<TYPE, ARG1, ARG2>
#define DEFINE_COND_VAL(TYPE,COND,TRUE_VAL,FALSE_VAL) conditional_value<COND,TYPE,TRUE_VAL,FALSE_VAL>::value
#include "impl/items_per_invoc_def.hlsl"
#undef DEFINE_COND_VAL
#undef DEFINE_MPL_MAX_V
#undef DEFINE_MPL_MIN_V
#undef DEFINE_ITEMS_INVOC_T
#undef DEFINE_VIRTUAL_WG_T
#undef DEFINE_ASSIGN

using ItemsPerInvocation = tuple<integral_constant<uint16_t,value0>,integral_constant<uint16_t,value1>,integral_constant<uint16_t,value2> >;
};
Expand All @@ -44,42 +59,35 @@ struct items_per_invocation
template<uint16_t _WorkgroupSizeLog2, uint16_t _SubgroupSizeLog2, uint16_t _ItemsPerInvocation>
struct ArithmeticConfiguration
{
NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSizeLog2 = _WorkgroupSizeLog2;
NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(0x1u) << WorkgroupSizeLog2;
NBL_CONSTEXPR_STATIC_INLINE uint16_t SubgroupSizeLog2 = _SubgroupSizeLog2;
NBL_CONSTEXPR_STATIC_INLINE uint16_t SubgroupSize = uint16_t(0x1u) << SubgroupSizeLog2;

using virtual_wg_t = impl::virtual_wg_size_log2<WorkgroupSizeLog2, SubgroupSizeLog2>;
NBL_CONSTEXPR_STATIC_INLINE uint16_t LevelCount = virtual_wg_t::levels;
NBL_CONSTEXPR_STATIC_INLINE uint16_t VirtualWorkgroupSize = uint16_t(0x1u) << virtual_wg_t::value;
static_assert(VirtualWorkgroupSize<=WorkgroupSize*SubgroupSize);

using virtual_wg_t = impl::virtual_wg_size_log2<_WorkgroupSizeLog2, _SubgroupSizeLog2>;
using items_per_invoc_t = impl::items_per_invocation<virtual_wg_t, _ItemsPerInvocation>;
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_0 = items_per_invoc_t::value0;
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_1 = items_per_invoc_t::value1;
NBL_CONSTEXPR_STATIC_INLINE uint16_t ItemsPerInvocation_2 = items_per_invoc_t::value2;
static_assert(ItemsPerInvocation_2<=4, "4 level scan would have been needed with this config!");
using ItemsPerInvocation = typename items_per_invoc_t::ItemsPerInvocation;

#define DEFINE_ASSIGN(TYPE,ID,...) NBL_CONSTEXPR_STATIC_INLINE TYPE ID = __VA_ARGS__;
#define DEFINE_VIRTUAL_WG_T(ID) virtual_wg_t::ID
#define DEFINE_ITEMS_INVOC_T(ID) items_per_invoc_t::ID
#define DEFINE_CONFIG_T(ID) ID
#define DEFINE_MPL_MAX_V(TYPE,ARG1,ARG2) mpl::max_v<TYPE, ARG1, ARG2>
#define DEFINE_COND_VAL(TYPE,COND,TRUE_VAL,FALSE_VAL) conditional_value<COND,TYPE,TRUE_VAL,FALSE_VAL>::value
#include "impl/arithmetic_config_def.hlsl"
#undef DEFINE_COND_VAL
#undef DEFINE_MPL_MAX_V
#undef DEFINE_CONFIG_T
#undef DEFINE_ITEMS_INVOC_T
#undef DEFINE_VIRTUAL_WG_T
#undef DEFINE_ASSIGN

using ChannelStride = tuple<integral_constant<uint16_t,__padding>,integral_constant<uint16_t,__channelStride_1>,integral_constant<uint16_t,__channelStride_2> >; // we don't use stride 0

NBL_CONSTEXPR_STATIC_INLINE uint16_t LevelInputCount_1 = conditional_value<LevelCount==3,uint16_t,
mpl::max_v<uint16_t, (VirtualWorkgroupSize>>SubgroupSizeLog2), SubgroupSize>,
SubgroupSize*ItemsPerInvocation_1>::value;
NBL_CONSTEXPR_STATIC_INLINE uint16_t LevelInputCount_2 = conditional_value<LevelCount==3,uint16_t,SubgroupSize*ItemsPerInvocation_2,0>::value;
NBL_CONSTEXPR_STATIC_INLINE uint16_t __SubgroupsPerVirtualWorkgroup = LevelInputCount_1 / ItemsPerInvocation_1;

// user specified the shared mem size of Scalars
NBL_CONSTEXPR_STATIC_INLINE uint32_t SharedScratchElementCount = conditional_value<LevelCount==1,uint16_t,
0,
conditional_value<LevelCount==3,uint16_t,
LevelInputCount_2+(SubgroupSize*ItemsPerInvocation_1)-1,
0
>::value + LevelInputCount_1
>::value;
NBL_CONSTEXPR_STATIC_INLINE uint16_t __padding = conditional_value<LevelCount==3,uint16_t,SubgroupSize-1,0>::value;
static_assert(VirtualWorkgroupSize<=WorkgroupSize*SubgroupSize);
static_assert(ItemsPerInvocation_2<=4, "4 level scan would have been needed with this config!");

#ifdef __HLSL_VERSION
static bool electLast()
{
return glsl::gl_SubgroupInvocationID()==SubgroupSize-1;
}
#endif

// gets a subgroupID as if each workgroup has (VirtualWorkgroupSize/SubgroupSize) subgroups
// each subgroup does work (VirtualWorkgroupSize/WorkgroupSize) times, the index denoted by workgroupInVirtualIndex
Expand All @@ -94,16 +102,21 @@ struct ArithmeticConfiguration
template<uint16_t level NBL_FUNC_REQUIRES(level>0 && level<LevelCount)
static uint16_t sharedStoreIndex(const uint16_t virtualSubgroupID)
{
uint16_t nextLevelInvocationCount;
if (level == LevelCount-1)
nextLevelInvocationCount = SubgroupSize;
else
nextLevelInvocationCount = __SubgroupsPerVirtualWorkgroup;
const uint16_t ItemsPerNextInvocation = tuple_element<level,ItemsPerInvocation>::type::value;
const uint16_t outChannel = virtualSubgroupID & (ItemsPerNextInvocation-uint16_t(1u));
const uint16_t outInvocation = virtualSubgroupID / ItemsPerNextInvocation;
const uint16_t localOffset = outChannel * tuple_element<level,ChannelStride>::type::value + outInvocation;

if (level==2)
return LevelInputCount_1 + ((SubgroupSize-uint16_t(1u))*ItemsPerInvocation_1) + (virtualSubgroupID & (ItemsPerInvocation_2-uint16_t(1u))) * nextLevelInvocationCount + (virtualSubgroupID/ItemsPerInvocation_2);
{
const uint16_t baseOffset = LevelInputCount_1 + (SubgroupSize - uint16_t(1u)) * ItemsPerInvocation_1;
return baseOffset + localOffset;
}
else
return (virtualSubgroupID & (ItemsPerInvocation_1-uint16_t(1u))) * (nextLevelInvocationCount+__padding) + (virtualSubgroupID/ItemsPerInvocation_1) + virtualSubgroupID/(SubgroupSize*ItemsPerInvocation_1);
{
const uint16_t paddingOffset = virtualSubgroupID / (SubgroupSize * ItemsPerInvocation_1);
return localOffset + paddingOffset;
}
}

template<uint16_t level NBL_FUNC_REQUIRES(level>0 && level<LevelCount)
Expand All @@ -117,19 +130,101 @@ struct ArithmeticConfiguration
template<uint16_t level NBL_FUNC_REQUIRES(level>0 && level<LevelCount)
static uint16_t sharedLoadIndex(const uint16_t invocationIndex, const uint16_t component)
{
uint16_t levelInvocationCount;
if (level == LevelCount-1)
levelInvocationCount = SubgroupSize;
else
levelInvocationCount = __SubgroupsPerVirtualWorkgroup;
const uint16_t localOffset = component * tuple_element<level,ChannelStride>::type::value + invocationIndex;
const uint16_t paddingOffset = invocationIndex / SubgroupSize;

if (level==2)
return LevelInputCount_1 + ((SubgroupSize-uint16_t(1u))*ItemsPerInvocation_1) + component * levelInvocationCount + invocationIndex + invocationIndex/SubgroupSize;
{
const uint16_t baseOffset = LevelInputCount_1 + (SubgroupSize - uint16_t(1u)) * ItemsPerInvocation_1;
return baseOffset + localOffset + paddingOffset;
}
else
return component * (levelInvocationCount+__padding) + invocationIndex + invocationIndex/SubgroupSize;
return localOffset + paddingOffset;
}
};

#ifndef __HLSL_VERSION
namespace impl
{
struct SVirtualWGSizeLog2
{
static SVirtualWGSizeLog2 create(const uint16_t _WorkgroupSizeLog2, const uint16_t _SubgroupSizeLog2)
{
SVirtualWGSizeLog2 retval;
#define DEFINE_ASSIGN(TYPE,ID,...) retval.ID = __VA_ARGS__;
#define DEFINE_VIRTUAL_WG_T(ID) retval.ID
#define DEFINE_MPL_MAX_V(TYPE,ARG1,ARG2) hlsl::max<TYPE>(ARG1, ARG2)
#define DEFINE_COND_VAL(TYPE,COND,TRUE_VAL,FALSE_VAL) (COND ? TRUE_VAL : FALSE_VAL)
#include "impl/virtual_wg_size_def.hlsl"
#undef DEFINE_COND_VAL
#undef DEFINE_MPL_MAX_V
#undef DEFINE_VIRTUAL_WG_T
#undef DEFINE_ASSIGN
return retval;
}

#define DEFINE_ASSIGN(TYPE,ID,...) TYPE ID;
#include "impl/virtual_wg_size_def.hlsl"
#undef DEFINE_ASSIGN
};

struct SItemsPerInvoc
{
static SItemsPerInvoc create(const SVirtualWGSizeLog2 virtualWgSizeLog2, const uint16_t BaseItemsPerInvocation)
{
SItemsPerInvoc retval;
#define DEFINE_ASSIGN(TYPE,ID,...) retval.ID = __VA_ARGS__;
#define DEFINE_VIRTUAL_WG_T(ID) virtualWgSizeLog2.ID
#define DEFINE_ITEMS_INVOC_T(ID) retval.ID
#define DEFINE_MPL_MIN_V(TYPE,ARG1,ARG2) hlsl::min<TYPE>(ARG1, ARG2)
#define DEFINE_MPL_MAX_V(TYPE,ARG1,ARG2) hlsl::max<TYPE>(ARG1, ARG2)
#define DEFINE_COND_VAL(TYPE,COND,TRUE_VAL,FALSE_VAL) (COND ? TRUE_VAL : FALSE_VAL)
#include "impl/items_per_invoc_def.hlsl"
#undef DEFINE_COND_VAL
#undef DEFINE_MPL_MAX_V
#undef DEFINE_MPL_MIN_V
#undef DEFINE_ITEMS_INVOC_T
#undef DEFINE_VIRTUAL_WG_T
#undef DEFINE_ASSIGN
return retval;
}

#define DEFINE_ASSIGN(TYPE,ID,...) TYPE ID;
#include "impl/items_per_invoc_def.hlsl"
#undef DEFINE_ASSIGN
};
}

struct SArithmeticConfiguration
{
static SArithmeticConfiguration create(const uint16_t _WorkgroupSizeLog2, const uint16_t _SubgroupSizeLog2, const uint16_t _ItemsPerInvocation)
{
impl::SVirtualWGSizeLog2 virtualWgSizeLog2 = impl::SVirtualWGSizeLog2::create(_WorkgroupSizeLog2, _SubgroupSizeLog2);
impl::SItemsPerInvoc itemsPerInvoc = impl::SItemsPerInvoc::create(virtualWgSizeLog2, _ItemsPerInvocation);

SArithmeticConfiguration retval;
#define DEFINE_ASSIGN(TYPE,ID,...) retval.ID = __VA_ARGS__;
#define DEFINE_VIRTUAL_WG_T(ID) virtualWgSizeLog2.ID
#define DEFINE_ITEMS_INVOC_T(ID) itemsPerInvoc.ID
#define DEFINE_CONFIG_T(ID) retval.ID
#define DEFINE_MPL_MAX_V(TYPE,ARG1,ARG2) hlsl::max<TYPE>(ARG1, ARG2)
#define DEFINE_COND_VAL(TYPE,COND,TRUE_VAL,FALSE_VAL) (COND ? TRUE_VAL : FALSE_VAL)
#include "impl/arithmetic_config_def.hlsl"
#undef DEFINE_COND_VAL
#undef DEFINE_MPL_MAX_V
#undef DEFINE_CONFIG_T
#undef DEFINE_ITEMS_INVOC_T
#undef DEFINE_VIRTUAL_WG_T
#undef DEFINE_ASSIGN
return retval;
}

#define DEFINE_ASSIGN(TYPE,ID,...) TYPE ID;
#include "impl/arithmetic_config_def.hlsl"
#undef DEFINE_ASSIGN
};
#endif

template<class T>
struct is_configuration : bool_constant<false> {};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (C) 2025 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

DEFINE_ASSIGN(uint16_t, WorkgroupSizeLog2, _WorkgroupSizeLog2)
DEFINE_ASSIGN(uint16_t, WorkgroupSize, uint16_t(0x1u) << DEFINE_CONFIG_T(WorkgroupSizeLog2))
DEFINE_ASSIGN(uint16_t, SubgroupSizeLog2, _SubgroupSizeLog2)
DEFINE_ASSIGN(uint16_t, SubgroupSize, uint16_t(0x1u) << DEFINE_CONFIG_T(SubgroupSizeLog2))

DEFINE_ASSIGN(uint16_t, LevelCount, DEFINE_VIRTUAL_WG_T(levels))
DEFINE_ASSIGN(uint16_t, VirtualWorkgroupSize, uint16_t(0x1u) << DEFINE_VIRTUAL_WG_T(value))

DEFINE_ASSIGN(uint16_t, ItemsPerInvocation_0, DEFINE_ITEMS_INVOC_T(value0))
DEFINE_ASSIGN(uint16_t, ItemsPerInvocation_1, DEFINE_ITEMS_INVOC_T(value1))
DEFINE_ASSIGN(uint16_t, ItemsPerInvocation_2, DEFINE_ITEMS_INVOC_T(value2))

DEFINE_ASSIGN(uint16_t, LevelInputCount_1, DEFINE_COND_VAL(uint16_t,(DEFINE_CONFIG_T(LevelCount)==3),
DEFINE_MPL_MAX_V(uint16_t, (DEFINE_CONFIG_T(VirtualWorkgroupSize)>>DEFINE_CONFIG_T(SubgroupSizeLog2)), DEFINE_CONFIG_T(SubgroupSize)),
DEFINE_CONFIG_T(SubgroupSize)*DEFINE_CONFIG_T(ItemsPerInvocation_1)))
DEFINE_ASSIGN(uint16_t, LevelInputCount_2, DEFINE_COND_VAL(uint16_t,(DEFINE_CONFIG_T(LevelCount)==3),DEFINE_CONFIG_T(SubgroupSize)*DEFINE_CONFIG_T(ItemsPerInvocation_2),0))
DEFINE_ASSIGN(uint16_t, VirtualInvocationsAtLevel1, DEFINE_CONFIG_T(LevelInputCount_1) / DEFINE_CONFIG_T(ItemsPerInvocation_1))

DEFINE_ASSIGN(uint16_t, __padding, DEFINE_COND_VAL(uint16_t,(DEFINE_CONFIG_T(LevelCount)==3),DEFINE_CONFIG_T(SubgroupSize)-1,0))
DEFINE_ASSIGN(uint16_t, __channelStride_1, DEFINE_COND_VAL(uint16_t,(DEFINE_CONFIG_T(LevelCount)==3),DEFINE_CONFIG_T(VirtualInvocationsAtLevel1),DEFINE_CONFIG_T(SubgroupSize)) + DEFINE_CONFIG_T(__padding))
DEFINE_ASSIGN(uint16_t, __channelStride_2, DEFINE_COND_VAL(uint16_t,(DEFINE_CONFIG_T(LevelCount)==3),DEFINE_CONFIG_T(SubgroupSize),0))

// user specified the shared mem size of Scalars
DEFINE_ASSIGN(uint32_t, SharedScratchElementCount, DEFINE_COND_VAL(uint16_t,(DEFINE_CONFIG_T(LevelCount)==1),
0,
DEFINE_COND_VAL(uint16_t,(DEFINE_CONFIG_T(LevelCount)==3),
DEFINE_CONFIG_T(LevelInputCount_2)+(DEFINE_CONFIG_T(SubgroupSize)*DEFINE_CONFIG_T(ItemsPerInvocation_1))-1,
0
) + DEFINE_CONFIG_T(LevelInputCount_1)
))
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright (C) 2025 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

DEFINE_ASSIGN(uint16_t, ItemsPerInvocationProductLog2, DEFINE_MPL_MAX_V(int16_t,DEFINE_VIRTUAL_WG_T(WorkgroupSizeLog2)-DEFINE_VIRTUAL_WG_T(SubgroupSizeLog2)*DEFINE_VIRTUAL_WG_T(levels),0))
DEFINE_ASSIGN(uint16_t, value0, BaseItemsPerInvocation)
DEFINE_ASSIGN(uint16_t, value1, uint16_t(0x1u) << DEFINE_COND_VAL(uint16_t,(DEFINE_VIRTUAL_WG_T(levels)==3),DEFINE_MPL_MIN_V(uint16_t,DEFINE_ITEMS_INVOC_T(ItemsPerInvocationProductLog2),2),DEFINE_ITEMS_INVOC_T(ItemsPerInvocationProductLog2)))
DEFINE_ASSIGN(uint16_t, value2, uint16_t(0x1u) << DEFINE_MPL_MAX_V(int16_t,DEFINE_ITEMS_INVOC_T(ItemsPerInvocationProductLog2)-2,0))
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright (C) 2025 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

DEFINE_ASSIGN(uint16_t, WorkgroupSizeLog2, _WorkgroupSizeLog2)
DEFINE_ASSIGN(uint16_t, SubgroupSizeLog2, _SubgroupSizeLog2)
DEFINE_ASSIGN(uint16_t, levels, DEFINE_COND_VAL(uint16_t,(_WorkgroupSizeLog2>_SubgroupSizeLog2),DEFINE_COND_VAL(uint16_t,(_WorkgroupSizeLog2>_SubgroupSizeLog2*2+2),3,2),1))
DEFINE_ASSIGN(uint16_t, value, DEFINE_MPL_MAX_V(uint16_t, _SubgroupSizeLog2*DEFINE_VIRTUAL_WG_T(levels), _WorkgroupSizeLog2))
Loading
Loading