Skip to content

Commit

Permalink
[graphite] BufferManager: support pre-zeroed storage buffer creation
Browse files Browse the repository at this point in the history
It is sometimes desirable for a GPU-private buffer to have its contents
pre-initialized to well-defined values before being consumed by a
compute program. A common example are atomic counters that live in
device memory (e.g. bump allocator counters or indirect command buffer
fields) that a compute shader cannot reasonably initialize to 0 itself
when cross-workgroup synchronization primitives aren't available
(notably both Metal and WebGPU lack a device-scope barrier instruction).

This change introduces the `ClearBuffer` flag parameter to the
`DrawBufferManager::getStorage()`  method. When set to
`ClearBuffer::kYes`, DrawBufferManager schedules a sub-allocated buffer
region to be set to zero. These requests are accummulated in a list and
batched into a single `ClearBuffersTask` when buffers get transferred to
a recording.

Bug: b/243721614

Change-Id: I66c27c42624728f08f56e2db93413410795c4187
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/665582
Reviewed-by: Jim Van Verth <[email protected]>
Commit-Queue: Arman Uguray <[email protected]>
Reviewed-by: Michael Ludwig <[email protected]>
  • Loading branch information
armansito authored and SkCQ committed Apr 27, 2023
1 parent 9951ec3 commit 1efb703
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 26 deletions.
16 changes: 13 additions & 3 deletions src/gpu/graphite/BufferManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "include/gpu/graphite/Recording.h"
#include "src/gpu/graphite/Buffer.h"
#include "src/gpu/graphite/Caps.h"
#include "src/gpu/graphite/ClearBuffersTask.h"
#include "src/gpu/graphite/ContextPriv.h"
#include "src/gpu/graphite/CopyTask.h"
#include "src/gpu/graphite/Log.h"
Expand Down Expand Up @@ -173,13 +174,13 @@ std::tuple<void*, BindBufferInfo> DrawBufferManager::getMappedStorage(size_t req
return this->prepareMappedBindBuffer(&info, requiredBytes);
}

BindBufferInfo DrawBufferManager::getStorage(size_t requiredBytes) {
BindBufferInfo DrawBufferManager::getStorage(size_t requiredBytes, ClearBuffer cleared) {
if (!requiredBytes) {
return {};
}

auto& info = fCurrentBuffers[kGpuOnlyStorageBufferIndex];
return this->prepareBindBuffer(&info, requiredBytes);
return this->prepareBindBuffer(&info, requiredBytes, /*mappable=*/false, cleared);
}

BindBufferInfo DrawBufferManager::getVertexStorage(size_t requiredBytes) {
Expand Down Expand Up @@ -210,6 +211,10 @@ BindBufferInfo DrawBufferManager::getIndirectStorage(size_t requiredBytes) {
}

void DrawBufferManager::transferToRecording(Recording* recording) {
if (!fClearList.empty()) {
recording->priv().addTask(ClearBuffersTask::Make(std::move(fClearList)));
}

bool useTransferBuffer = !fCaps->drawBufferCanBeMapped();
for (auto& [buffer, transferBuffer] : fUsedBuffers) {
if (useTransferBuffer) {
Expand Down Expand Up @@ -267,7 +272,8 @@ std::pair<void*, BindBufferInfo> DrawBufferManager::prepareMappedBindBuffer(Buff

BindBufferInfo DrawBufferManager::prepareBindBuffer(BufferInfo* info,
size_t requiredBytes,
bool mappable) {
bool mappable,
ClearBuffer cleared) {
SkASSERT(info);
SkASSERT(requiredBytes);

Expand Down Expand Up @@ -305,6 +311,10 @@ BindBufferInfo DrawBufferManager::prepareBindBuffer(BufferInfo* info,
BindBufferInfo bindInfo{info->fBuffer.get(), info->fOffset};
info->fOffset += requiredBytes;

if (cleared == ClearBuffer::kYes) {
fClearList.push_back({bindInfo.fBuffer, bindInfo.fOffset, requiredBytes});
}

return bindInfo;
}

Expand Down
11 changes: 9 additions & 2 deletions src/gpu/graphite/BufferManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define skgpu_graphite_BufferManager_DEFINED

#include "include/core/SkRefCnt.h"
#include "include/private/base/SkTArray.h"
#include "src/gpu/BufferWriter.h"
#include "src/gpu/graphite/DrawTypes.h"
#include "src/gpu/graphite/ResourceTypes.h"
Expand Down Expand Up @@ -47,7 +48,7 @@ class DrawBufferManager {

// Utilities that return an unmapped buffer slice with a particular usage. These slices are
// intended to be only accessed by the GPU and are configured to prioritize GPU reads.
BindBufferInfo getStorage(size_t requiredBytes);
BindBufferInfo getStorage(size_t requiredBytes, ClearBuffer cleared = ClearBuffer::kNo);
BindBufferInfo getVertexStorage(size_t requiredBytes);
BindBufferInfo getIndexStorage(size_t requiredBytes);
BindBufferInfo getIndirectStorage(size_t requiredBytes);
Expand Down Expand Up @@ -82,7 +83,10 @@ class DrawBufferManager {
};
std::pair<void*, BindBufferInfo> prepareMappedBindBuffer(BufferInfo* info,
size_t requiredBytes);
BindBufferInfo prepareBindBuffer(BufferInfo* info, size_t requiredBytes, bool mappable = false);
BindBufferInfo prepareBindBuffer(BufferInfo* info,
size_t requiredBytes,
bool mappable = false,
ClearBuffer cleared = ClearBuffer::kNo);

ResourceProvider* const fResourceProvider;
const Caps* const fCaps;
Expand All @@ -99,6 +103,9 @@ class DrawBufferManager {

// Vector of buffer and transfer buffer pairs.
std::vector<std::pair<sk_sp<Buffer>, sk_sp<Buffer>>> fUsedBuffers;

// List of buffer regions that were requested to be cleared at the time of allocation.
skia_private::TArray<ClearBufferInfo> fClearList;
};

/**
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/graphite/ClearBuffersTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

namespace skgpu::graphite {

sk_sp<ClearBuffersTask> ClearBuffersTask::Make(std::vector<ClearBufferInfo> clearList) {
sk_sp<ClearBuffersTask> ClearBuffersTask::Make(skia_private::TArray<ClearBufferInfo> clearList) {
return sk_sp<ClearBuffersTask>(new ClearBuffersTask(std::move(clearList)));
}

Expand Down
24 changes: 5 additions & 19 deletions src/gpu/graphite/ClearBuffersTask.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,18 @@
#ifndef skgpu_graphite_ClearBuffersTask_DEFINED
#define skgpu_graphite_ClearBuffersTask_DEFINED

#include "src/gpu/graphite/Buffer.h"
#include "include/private/base/SkTArray.h"
#include "src/gpu/graphite/ResourceTypes.h"
#include "src/gpu/graphite/Task.h"

#include <vector>

namespace skgpu::graphite {

class Buffer;

/**
* Represents a buffer region that will be cleared. A ClearBuffersTask does not take a reference to
* the buffer it clears. A higher layer is responsible for managing the lifetime and usage refs of
* the buffer.
*/
struct ClearBufferInfo {
const Buffer* fBuffer;
size_t fOffset = 0;
size_t fSize = 0;
};

/**
* Task that clears a region of a list of buffers to 0.
*/
class ClearBuffersTask final : public Task {
public:
static sk_sp<ClearBuffersTask> Make(std::vector<ClearBufferInfo>);
static sk_sp<ClearBuffersTask> Make(skia_private::TArray<ClearBufferInfo>);
~ClearBuffersTask() override;

bool prepareResources(ResourceProvider*, const RuntimeEffectDictionary*) override {
Expand All @@ -43,10 +29,10 @@ class ClearBuffersTask final : public Task {
bool addCommands(Context*, CommandBuffer*, ReplayTargetData) override;

private:
explicit ClearBuffersTask(std::vector<ClearBufferInfo> clearList)
explicit ClearBuffersTask(skia_private::TArray<ClearBufferInfo> clearList)
: fClearList(std::move(clearList)) {}

std::vector<ClearBufferInfo> fClearList;
skia_private::TArray<ClearBufferInfo> fClearList;
};

} // namespace skgpu::graphite
Expand Down
22 changes: 22 additions & 0 deletions src/gpu/graphite/ResourceTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ enum class AccessPattern : int {
kHostVisible,
};

/**
* Determines whether the contents of a GPU buffer sub-allocation gets cleared to 0 before being
* used in a GPU command submission.
*/
enum class ClearBuffer : bool {
kNo = false,
kYes = true,
};

/**
* Must the contents of the Resource be preserved af a render pass or can a more efficient
* representation be chosen when supported by hardware.
Expand Down Expand Up @@ -123,6 +132,19 @@ struct BindBufferInfo {
bool operator!=(const BindBufferInfo& o) const { return !(*this == o); }
};

/**
* Represents a buffer region that should be cleared to 0. A ClearBuffersTask does not take an
* owning reference to the buffer it clears. A higher layer is responsible for managing the lifetime
* and usage refs of the buffer.
*/
struct ClearBufferInfo {
const Buffer* fBuffer = nullptr;
size_t fOffset = 0;
size_t fSize = 0;

operator bool() const { return SkToBool(fBuffer); }
};

}; // namespace skgpu::graphite

#endif // skgpu_graphite_ResourceTypes_DEFINED
5 changes: 4 additions & 1 deletion src/gpu/graphite/compute/DispatchGroup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,10 @@ DispatchResourceOptional Builder::allocateResource(const ComputeStep* step,
result = bufInfo;
}
} else {
auto bufInfo = bufferMgr->getStorage(bufferSize);
auto bufInfo = bufferMgr->getStorage(bufferSize,
resource.fPolicy == ResourcePolicy::kClear
? ClearBuffer::kYes
: ClearBuffer::kNo);
if (bufInfo) {
result = bufInfo;
}
Expand Down
109 changes: 109 additions & 0 deletions tests/graphite/ComputeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1095,3 +1095,112 @@ DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_AtomicOperationsOverArrayAndStructTe
kExpectedCount,
secondHalfCount);
}

DEF_GRAPHITE_TEST_FOR_METAL_CONTEXT(Compute_ClearedBuffer, reporter, context) {
constexpr uint32_t kProblemSize = 512;

std::unique_ptr<Recorder> recorder = context->makeRecorder();

// The ComputeStep requests an unmapped buffer that is zero-initialized. It writes the output to
// a mapped buffer which test verifies.
class TestComputeStep : public ComputeStep {
public:
TestComputeStep() : ComputeStep(
/*name=*/"TestClearedBuffer",
/*localDispatchSize=*/{kProblemSize, 1, 1},
/*resources=*/{
// Zero initialized input buffer
{
/*type=*/ResourceType::kStorageBuffer,
/*flow=*/DataFlow::kPrivate,
/*policy=*/ResourcePolicy::kClear,
},
// Output buffer:
{
/*type=*/ResourceType::kStorageBuffer,
/*flow=*/DataFlow::kShared, // shared to allow us to access it from the
// Builder
/*policy=*/ResourcePolicy::kMapped, // mappable for read-back
/*slot=*/0,
}
}) {}
~TestComputeStep() override = default;

std::string computeSkSL(const ResourceBindingRequirements&, int) const override {
return R"(
layout(set=0, binding=0) readonly buffer inputBlock
{
uint in_data[];
};
layout(set=0, binding=1) buffer outputBlock
{
uint out_data[];
};
void main() {
out_data[sk_GlobalInvocationID.x] = in_data[sk_GlobalInvocationID.x];
}
)";
}

size_t calculateBufferSize(const DrawParams&,
int index,
const ResourceDesc& r) const override {
return sizeof(uint32_t) * kProblemSize;
}

void prepareBuffer(const DrawParams&,
int ssboIndex,
int resourceIndex,
const ResourceDesc& r,
void* buffer,
size_t bufferSize) const override {
// Should receive this call only for the mapped buffer.
SkASSERT(resourceIndex == 1);
}

WorkgroupSize calculateGlobalDispatchSize(const DrawParams&) const override {
return WorkgroupSize(1, 1, 1);
}
} step;

DispatchGroup::Builder builder(recorder.get());
if (!builder.appendStep(&step, fake_draw_params_for_testing(), 0)) {
ERRORF(reporter, "Failed to add ComputeStep to DispatchGroup");
return;
}

// The output buffer should have been placed in the right output slot.
BindBufferInfo outputInfo = builder.getSharedBufferResource(0);
if (!outputInfo) {
ERRORF(reporter, "Failed to allocate an output buffer at slot 0");
return;
}

// Record the compute task
ComputeTask::DispatchGroupList groups;
groups.push_back(builder.finalize());
recorder->priv().add(ComputeTask::Make(std::move(groups)));

// Ensure the output buffer is synchronized to the CPU once the GPU submission has finished.
recorder->priv().add(SynchronizeToCpuTask::Make(sk_ref_sp(outputInfo.fBuffer)));

// Submit the work and wait for it to complete.
std::unique_ptr<Recording> recording = recorder->snap();
if (!recording) {
ERRORF(reporter, "Failed to make recording");
return;
}

InsertRecordingInfo insertInfo;
insertInfo.fRecording = recording.get();
context->insertRecording(insertInfo);
context->submit(SyncToCpu::kYes);

// Verify the contents of the output buffer.
uint32_t* outData = static_cast<uint32_t*>(map_bind_buffer(outputInfo));
SkASSERT(outputInfo.fBuffer->isMapped() && outData != nullptr);
for (unsigned int i = 0; i < kProblemSize; ++i) {
const uint32_t found = outData[i];
REPORTER_ASSERT(reporter, 0u == found, "expected '0u', found '%u'", found);
}
}

0 comments on commit 1efb703

Please sign in to comment.