Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tools/clang/unittests/HLSLExec/LongVectorOps.def
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ OP(Bitwise, FirstBitLow, 1, "firstbitlow", "", "", "LongVectorOp", Bitwise, Defa
OP_DEFAULT_DEFINES(Unary, Initialize, 1, "TestInitialize", "",
" -DFUNC_INITIALIZE=1")

OP_DEFAULT_DEFINES(ArrayOperator, ArrayOperator_StaticAccess, 1, "TestArrayOperatorStaticAccess", "",
" -DTEST_ARRAY_OPERATOR_STATIC_ACCESS=1")

OP(ArrayOperator, ArrayOperator_DynamicAccess, 2, "TestArrayOperatorDynamicAccess", "",
" -DTEST_ARRAY_OPERATOR_DYNAMIC_ACCESS=1", "LongVectorOP", Default1, Zero, Default3)

#define OP_CAST_DEFAULT(GROUP, SYMBOL) \
OP_DEFAULT_DEFINES(GROUP, SYMBOL, 1, "TestCast", "", "-DFUNC_TEST_CAST=1")
#define OP_CAST(GROUP, SYMBOL, INPUT_SET_1) \
Expand Down
7 changes: 7 additions & 0 deletions tools/clang/unittests/HLSLExec/LongVectorTestData.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ struct HLSLBool_t {
HLSLBool_t() : Val(0) {}
HLSLBool_t(int32_t Val) : Val(Val) {}
HLSLBool_t(bool Val) : Val(Val) {}
explicit HLSLBool_t(float Val) : Val(Val) {}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is adding this operator necessary? I don't see where it's used.


bool operator==(const HLSLBool_t &Other) const {
return static_cast<bool>(Val) == static_cast<bool>(Other.Val);
Expand Down Expand Up @@ -324,6 +325,7 @@ BEGIN_INPUT_SETS(uint16_t)
INPUT_SET(InputSet::Default1, 1, 699, 3, 1023, 5, 6, 0, 8, 9, 10);
INPUT_SET(InputSet::Default2, 2, 111, 3, 4, 5, 9, 21, 8, 9, 10);
INPUT_SET(InputSet::Default3, 4, 112, 4, 5, 3, 7, 21, 1, 11, 9);
INPUT_SET(InputSet::Zero, 0);
INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 13, 14, 15);
INPUT_SET(InputSet::Bitwise, 0, 1, 3, 6, 9, 0x5555, 0xAAAA, 0x8000, 127,
std::numeric_limits<uint16_t>::max());
Expand All @@ -335,6 +337,7 @@ BEGIN_INPUT_SETS(uint32_t)
INPUT_SET(InputSet::Default1, 1, 2, 3, 4, 5, 0, 7, 8, 9, 10);
INPUT_SET(InputSet::Default2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
INPUT_SET(InputSet::Default3, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
INPUT_SET(InputSet::Zero, 0);
INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 30, 31, 32);
INPUT_SET(InputSet::Bitwise, 0, 1, 3, 6, 9, 0x55555555, 0xAAAAAAAA, 0x80000000,
127, std::numeric_limits<uint32_t>::max());
Expand All @@ -346,6 +349,7 @@ BEGIN_INPUT_SETS(uint64_t)
INPUT_SET(InputSet::Default1, 1, 2, 3, 4, 5, 0, 7, 1000, 9, 10);
INPUT_SET(InputSet::Default2, 1, 2, 1337, 4, 5, 6, 7, 8, 9, 10);
INPUT_SET(InputSet::Default3, 10, 20, 1338, 40, 50, 60, 70, 80, 90, 11);
INPUT_SET(InputSet::Zero, 0);
INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 62, 63, 64);
INPUT_SET(InputSet::Bitwise, 0, 1, 3, 6, 9, 0x5555555555555555,
0xAAAAAAAAAAAAAAAA, 0x8000000000000000, 127,
Expand All @@ -361,6 +365,7 @@ INPUT_SET(InputSet::Default2, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0,
-1.0);
INPUT_SET(InputSet::Default3, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
1.0);
INPUT_SET(InputSet::Zero, 0.0);
INPUT_SET(InputSet::RangeHalfPi, -1.073, 0.044, -1.047, 0.313, 1.447, -0.865,
1.364, -0.715, -0.800, 0.541);
INPUT_SET(InputSet::RangeOne, 0.331, 0.727, -0.957, 0.677, -0.025, 0.495, 0.855,
Expand Down Expand Up @@ -392,6 +397,7 @@ INPUT_SET(InputSet::Default2, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0,
-1.0);
INPUT_SET(InputSet::Default3, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
1.0);
INPUT_SET(InputSet::Zero, 0.0);
INPUT_SET(InputSet::RangeHalfPi, 0.315f, -0.316f, 1.409f, -0.09f, -1.569f,
1.302f, -0.326f, 0.781f, -1.235f, 0.623f);
INPUT_SET(InputSet::RangeOne, 0.727f, 0.331f, -0.957f, 0.677f, -0.025f, 0.495f,
Expand Down Expand Up @@ -420,6 +426,7 @@ INPUT_SET(InputSet::Default2, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0,
-1.0);
INPUT_SET(InputSet::Default3, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
1.0);
INPUT_SET(InputSet::Zero, 0.0);
INPUT_SET(InputSet::RangeHalfPi, 0.807, 0.605, 1.317, 0.188, 1.566, -1.507,
0.67, -1.553, 0.194, -0.883);
INPUT_SET(InputSet::RangeOne, 0.331, 0.277, -0.957, 0.677, -0.025, 0.495, 0.855,
Expand Down
60 changes: 60 additions & 0 deletions tools/clang/unittests/HLSLExec/LongVectors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,46 @@ BITWISE_OP(OpType::FirstBitLow, (FirstBitLow(A)));

DEFAULT_OP_1(OpType::Initialize, (A));

template <typename T>
struct Op<OpType::ArrayOperator_StaticAccess, T, 1> : DefaultValidation<T> {};

template <typename T>
static std::vector<T> buildExpectedArrayAccess(const InputSets<T> &Inputs) {
const size_t VectorSize = Inputs[0].size();
std::vector<T> Expected;
Expected.resize(VectorSize);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason you opted to zero an expected vector for the elements we aren't testing vs just setting the expected output size to 6?

You could avoid verifying a bunch of zeros by doing that. And if I recall correctly, all you will need to do is set the size of Expected to 6 here. The framework we built dispatches calls to other functions based on the number of elements in the Expected vector. And you can set OutNum in the shader to 6 with logic similar to how it's changed for the reduction op.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test read and write at different indexes on which vector size. This is done to provide GEP instruction with index to access at the beginning, middle and end of vector. Forcing that to be 6 means we will need to store the that at I always, making the storing of elements to always be static.

I could make the IndexVector to be 0 to 5, but that reduces the scope where we test GEP calc for larger vectors

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great point! I wasn't considering the storing portion when I asked that. I wonder if a comment saying that is helpful for a little documentation on that being the intent.


size_t IndexList[6] = {
0, VectorSize - 1, 1, VectorSize - 2, VectorSize / 2, VectorSize / 2 + 1};
for (size_t i = 0; i < 6; ++i)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose if you're going to use min(VectorSize, 6) in HLSL for the loop count, you should do so here too. The only difference for vectors of length 3 to 5 is redundant operations on short vectors. If you use a vector length of 1 or 2, this will index out-of-bounds at VectorSize - 2 for length 1 and VectorSize / 2 + 1 for lengths 1 and 2.

Expected[IndexList[i]] = Inputs[0][IndexList[i]];

return Expected;
}

template <typename T>
struct ExpectedBuilder<OpType::ArrayOperator_StaticAccess, T> {
static std::vector<T>
buildExpected(Op<OpType::ArrayOperator_StaticAccess, T, 1>,
const InputSets<T> &Inputs) {
DXASSERT_NOMSG(Inputs.size() == 1);
return buildExpectedArrayAccess(Inputs);
}
};

template <typename T>
struct Op<OpType::ArrayOperator_DynamicAccess, T, 2> : DefaultValidation<T> {};

template <typename T>
struct ExpectedBuilder<OpType::ArrayOperator_DynamicAccess, T> {
static std::vector<T>
buildExpected(Op<OpType::ArrayOperator_DynamicAccess, T, 2>,
const InputSets<T> &Inputs) {
DXASSERT_NOMSG(Inputs.size() == 2);
return buildExpectedArrayAccess(Inputs);
}
};

//
// Cast
//
Expand Down Expand Up @@ -1775,15 +1815,35 @@ class DxilConf_SM69_Vectorized {
// Unary

HLK_TEST(Initialize, HLSLBool_t);
HLK_TEST(ArrayOperator_StaticAccess, HLSLBool_t);
HLK_TEST(ArrayOperator_DynamicAccess, HLSLBool_t);
HLK_TEST(Initialize, int16_t);
HLK_TEST(ArrayOperator_StaticAccess, int16_t);
HLK_TEST(ArrayOperator_DynamicAccess, int16_t);
HLK_TEST(Initialize, int32_t);
HLK_TEST(ArrayOperator_StaticAccess, int32_t);
HLK_TEST(ArrayOperator_DynamicAccess, int32_t);
HLK_TEST(Initialize, int64_t);
HLK_TEST(ArrayOperator_StaticAccess, int64_t);
HLK_TEST(ArrayOperator_DynamicAccess, int64_t);
HLK_TEST(Initialize, uint16_t);
HLK_TEST(ArrayOperator_StaticAccess, uint16_t);
HLK_TEST(ArrayOperator_DynamicAccess, uint16_t);
HLK_TEST(Initialize, uint32_t);
HLK_TEST(ArrayOperator_StaticAccess, uint32_t);
HLK_TEST(ArrayOperator_DynamicAccess, uint32_t);
HLK_TEST(Initialize, uint64_t);
HLK_TEST(ArrayOperator_StaticAccess, uint64_t);
HLK_TEST(ArrayOperator_DynamicAccess, uint64_t);
HLK_TEST(Initialize, HLSLHalf_t);
HLK_TEST(ArrayOperator_StaticAccess, HLSLHalf_t);
HLK_TEST(ArrayOperator_DynamicAccess, HLSLHalf_t);
HLK_TEST(Initialize, float);
HLK_TEST(ArrayOperator_StaticAccess, float);
HLK_TEST(ArrayOperator_DynamicAccess, float);
HLK_TEST(Initialize, double);
HLK_TEST(ArrayOperator_StaticAccess, double);
HLK_TEST(ArrayOperator_DynamicAccess, double);

HLK_TEST(ShuffleVector, HLSLBool_t);
HLK_TEST(ShuffleVector, int16_t);
Expand Down
24 changes: 23 additions & 1 deletion tools/clang/unittests/HLSLExec/ShaderOpArith.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4184,7 +4184,29 @@ void MSMain(uint GID : SV_GroupIndex,
const uint32_t OutNum = NUM;
#endif

#if IS_UNARY_OP
#if TEST_ARRAY_OPERATOR_STATIC_ACCESS
uint IndexList[6] = {0, OutNum - 1, 1, OutNum - 2, OutNum / 2, OutNum / 2 + 1};

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The count could be a constant, then used later instead of 6 for clarity.
Plus the IndexList could be const and formatted to make it easier to see the pattern.

Suggested change
uint IndexList[6] = {0, OutNum - 1, 1, OutNum - 2, OutNum / 2, OutNum / 2 + 1};
const uint IndexCount = 6;
const uint IndexList[IndexCount] = {
0, OutNum - 1,
1, OutNum - 2,
OutNum / 2, OutNum / 2 + 1
};


vector<OUT_TYPE, OutNum> OutputVector = 0;
uint End = min(OutNum, 6);

[unroll]for(uint i = 0; i < 6; ++i) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Loop uses 6 instead of End.

Suggested change
[unroll]for(uint i = 0; i < 6; ++i) {
[unroll]for(uint i = 0; i < End; ++i) {

OutputVector[IndexList[i]] = Input1[IndexList[i]];
uint index = (uint)(IndexList[i]);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean this? Otherwise index is not used.

Suggested change
OutputVector[IndexList[i]] = Input1[IndexList[i]];
uint index = (uint)(IndexList[i]);
uint index = (uint)(IndexList[i]);
OutputVector[index] = Input1[index];

}
#elif TEST_ARRAY_OPERATOR_DYNAMIC_ACCESS
// Input2 will always be 0. This is added so the compiler cannot optimize the array access.
uint Modifier = (uint) Input2[0];
uint IndexList[6] = {0 + Modifier, OutNum - 1 + Modifier, 1 + Modifier, OutNum - 2 + Modifier, OutNum / 2 + Modifier, OutNum / 2 + 1 + Modifier};

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typically you can use the name Zero for clarity. Also, if you add this Zero to the initialization of index below instead of the init list, the code would be a bit simpler, and equivalent. Then it's easier to compare IndexList between static/dynamic cases. If you had a common location, you could even share the code.


vector<OUT_TYPE, OutNum> OutputVector = 0;
uint End = min(OutNum, 6);

[unroll]for(uint i = 0; i < End; ++i){
uint index = (uint)(IndexList[i]);
OutputVector[index] = Input1[index];
}
#elif IS_UNARY_OP
vector<OUT_TYPE, OutNum> OutputVector = FUNC(Input1);
#elif IS_BINARY_OP
vector<OUT_TYPE, OutNum> OutputVector = FUNC(Input1 OPERATOR
Expand Down