Skip to content

Commit cb4e557

Browse files
Pass proper dispatch flags.
- add new policy to select L1 caching - this is when kernel doesn't have any stateless writes Change-Id: I3948e652797420976159bbfec2c2a154eb9e18ee Signed-off-by: Mrozek, Michal <[email protected]>
1 parent ea09541 commit cb4e557

File tree

6 files changed

+40
-0
lines changed

6 files changed

+40
-0
lines changed

runtime/command_queue/enqueue_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
688688

689689
if (anyUncacheableArgs) {
690690
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
691+
} else if (!kernel->areStatelessWritesUsed()) {
692+
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
691693
}
692694

693695
if (gtpinIsGTPinInitialized()) {

runtime/command_stream/csr_definitions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ constexpr int64_t maxTimeout = std::numeric_limits<int64_t>::max();
3333
namespace L3CachingSettings {
3434
constexpr uint32_t l3CacheOn = 0u;
3535
constexpr uint32_t l3CacheOff = 1u;
36+
constexpr uint32_t l3AndL1On = 2u;
3637
} // namespace L3CachingSettings
3738

3839
struct DispatchFlags {

runtime/helpers/task_information.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
194194

195195
if (anyUncacheableArgs) {
196196
dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff;
197+
} else if (!kernel->areStatelessWritesUsed()) {
198+
dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On;
197199
}
198200

199201
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

unit_tests/command_queue/enqueue_kernel_1_tests.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,38 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPri
354354
EXPECT_EQ(privateScratchSize, csr.requiredPrivateScratchSize);
355355
}
356356

357+
HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) {
358+
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
359+
size_t off[3] = {0, 0, 0};
360+
size_t gws[3] = {1, 1, 1};
361+
362+
MockKernelWithInternals mockKernel(*pDevice);
363+
mockKernel.mockKernel->containsStatelessWrites = false;
364+
365+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
366+
367+
EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On);
368+
}
369+
370+
HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteOnBlockedCodePathWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) {
371+
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
372+
size_t off[3] = {0, 0, 0};
373+
size_t gws[3] = {1, 1, 1};
374+
375+
auto userEvent = clCreateUserEvent(this->context, nullptr);
376+
377+
MockKernelWithInternals mockKernel(*pDevice);
378+
mockKernel.mockKernel->containsStatelessWrites = false;
379+
380+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 1, &userEvent, nullptr);
381+
382+
clSetUserEventStatus(userEvent, 0u);
383+
384+
EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On);
385+
386+
clReleaseEvent(userEvent);
387+
}
388+
357389
HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenTheKernelCommandWillTriviallySucceed) {
358390
size_t gws[3] = {0, 0, 0};
359391
MockKernelWithInternals mockKernel(*pDevice);

unit_tests/libult/ult_command_stream_receiver.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
9898
CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
9999
const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh,
100100
uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
101+
recordedDispatchFlags = dispatchFlags;
101102
this->lastFlushedCommandStream = &commandStream;
102103
return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device);
103104
}
@@ -179,5 +180,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
179180
uint32_t latestSentTaskCountValueDuringFlush = 0;
180181
uint32_t blitBufferCalled = 0;
181182
std::atomic<uint32_t> latestWaitForCompletionWithTimeoutTaskCount{0};
183+
DispatchFlags recordedDispatchFlags;
182184
};
183185
} // namespace NEO

unit_tests/mocks/mock_kernel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class MockKernel : public Kernel {
2727
using Kernel::addAllocationToCacheFlushVector;
2828
using Kernel::allBufferArgsStateful;
2929
using Kernel::auxTranslationRequired;
30+
using Kernel::containsStatelessWrites;
3031
using Kernel::isSchedulerKernel;
3132
using Kernel::kernelArgRequiresCacheFlush;
3233
using Kernel::kernelArguments;

0 commit comments

Comments
 (0)