Skip to content

Commit 5cb5dcf

Browse files
petr.kiyashkoJackOfBlades232
authored andcommitted
Added PerFrameTransferHelper for streaming and async readback.
1 parent 5ea6bfa commit 5cb5dcf

File tree

4 files changed

+912
-2
lines changed

4 files changed

+912
-2
lines changed

etna/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ add_library(etna
2222
"source/Window.cpp"
2323
"source/PerFrameCmdMgr.cpp"
2424
"source/OneShotCmdMgr.cpp"
25-
"source/BlockingTransferHelper.cpp")
25+
"source/BlockingTransferHelper.cpp"
26+
"source/PerFrameTransferHelper.cpp")
2627

2728
target_include_directories(etna PUBLIC include)
2829
target_include_directories(etna PRIVATE source)
Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
#pragma once
2+
#ifndef ETNA_PER_FRAME_TRANSFER_HELPER_HPP_INCLUDED
3+
#define ETNA_PER_FRAME_TRANSFER_HELPER_HPP_INCLUDED
4+
5+
#include <etna/Vulkan.hpp>
6+
#include <etna/Buffer.hpp>
7+
#include <etna/Image.hpp>
8+
#include <etna/GpuSharedResource.hpp>
9+
#include <etna/EtnaConfig.hpp>
10+
11+
#include <type_traits>
12+
13+
/**
14+
* PerFrameTransferHelper : "non-blocking" GPU-CPU transfer helper,
15+
* allows to transfer resources frame by frame with a dedicated staging buffer.
16+
*
17+
* Is capable of both readbacks and uploads.
18+
* Uploads have a sync API (the whole resource will be uploaded this frame),
19+
* and both uploads and readbacks have async API. Async API works by the
20+
* user first calling init(action)(resource)Async to get back an
21+
* Async(resource)(action)State struct which holds the data about the progress.
22+
* Then, each frame the user calls progress(resource)(action)Async on the
23+
* state struct until the transfer has finished.
24+
*
25+
* (resource) may be "Buffer" or "Image"
26+
* (action) may be "Readback" for buffers or "Upload" for any resource type.
27+
*
28+
* Usage of PerFrameTransferHelper looks as follows.
29+
*
30+
* PerFrameTransferHelper th{...};
31+
*
32+
* -- init calls for async operations states can be called any time
33+
*
34+
* auto bufferUpload = th.initUploadBufferAsync(buf1, 0, bufData1);
35+
* auto bufferReadback = th.initReadbackBufferAsync(bufStorageDest2, buf2, 0);
36+
* auto imageUpload = th.initUploadImageAsync(img1, 0, 0, imgData1);
37+
*
38+
* -- actual sync/progress calls must be done every frame inside of processing
39+
* -- scope, for that the API is split into several classes to enforce order.
40+
*
41+
* if (auto frame = th.beginFrame())
42+
* {
43+
* if (auto rb = frame.beginReadback())
44+
* {
45+
* if (!rb.hasSpaceThisFrame())
46+
* ; -- Out of staging space, all upload/progress calls will do nothing
47+
* if (rb.progressBufferReadbackAsync(cmd_buf, bufferReadback))
48+
* ; -- Readback is done
49+
* }
50+
* if (auto up = frame.beginUpload())
51+
* {
52+
* if (up.progressBufferUploadAsync(cmd_buf, bufferUpload))
53+
* ; -- Upload will be completed this frame
54+
* if (up.progressImageUploadAsync(cmd_buf, imageUpload))
55+
* ; -- Upload will be completed this frame
56+
* if (up.uploadBufferSync(cmd_buf, buf3, 0, bufData3))
57+
* ; -- There was space in staging and upload will be done
58+
* if (up.uploadImageSync(cmd_buf, img2, 0, 0, imgData2))
59+
* ; -- There was space in staging and upload will be done
60+
*
61+
* for (auto &texUpload : texUploads)
62+
* {
63+
* if (!up.hasSpaceThisFrame())
64+
* break;
65+
* if (texUpload.done())
66+
* continue;
67+
* if (up.progressImageUploadAsync(cmd_buf, texUpload))
68+
* ; -- Upload will be completed this frame
69+
* }
70+
* }
71+
* }
72+
*
73+
*
74+
* The order of calls and other constraints are enforced by asserts.
75+
* For example, all beginXXX calls can be called once per frame,
76+
* and readback can not be done after upload. Furthermore, if a portion
77+
* of a buffer readback was ready on frame N and progressXXX was not called
78+
* on that state, the content may be discarded and on the next call
79+
* the validation will err due to a missed piece of readback.
80+
*
81+
* In general one should progress all oustanding readbacks on every frame,
82+
* and uploads can be progressed however one wishes.
83+
*/
84+
85+
namespace etna
86+
{
87+
88+
class PerFrameTransferHelper;
89+
90+
struct AsyncBufferUploadState
91+
{
92+
const PerFrameTransferHelper* transferHelper;
93+
uint64_t lastFrame;
94+
const Buffer* dst;
95+
uint32_t offset;
96+
std::span<std::byte const> src;
97+
98+
bool done() const { return transferHelper != nullptr && src.empty(); }
99+
};
100+
101+
struct AsyncBufferReadbackState
102+
{
103+
struct Issue
104+
{
105+
uint64_t fulfillmentFrame = uint64_t(-1);
106+
uint32_t size = 0;
107+
uint32_t offset = 0;
108+
109+
bool operator==(const Issue&) const = default;
110+
};
111+
112+
const PerFrameTransferHelper* transferHelper;
113+
uint64_t lastFrame;
114+
const Buffer* src;
115+
uint32_t issuedOffset;
116+
uint32_t remainingOffset;
117+
std::array<Issue, MAX_FRAMES_INFLIGHT> issues;
118+
size_t nextIssueSlot;
119+
std::span<std::byte> dst;
120+
121+
bool done() const { return transferHelper != nullptr && dst.empty(); }
122+
};
123+
124+
struct AsyncImageUploadState
125+
{
126+
const PerFrameTransferHelper* transferHelper;
127+
uint64_t lastFrame;
128+
const Image* dst;
129+
uint32_t mipLevel;
130+
uint32_t layer;
131+
size_t bytesPerPixel;
132+
vk::Offset3D offset;
133+
std::span<std::byte const> src;
134+
135+
bool done() const { return transferHelper != nullptr && src.empty(); }
136+
};
137+
138+
class PerFrameTransferHelper
139+
{
140+
private:
141+
enum class ProcessingState
142+
{
143+
IDLE,
144+
READY,
145+
READBACK,
146+
READBACK_DONE,
147+
UPLOAD,
148+
UPLOAD_DONE,
149+
};
150+
151+
class FrameProcessor;
152+
153+
class UploadProcessor
154+
{
155+
PerFrameTransferHelper* self;
156+
friend class FrameProcessor;
157+
158+
public:
159+
explicit UploadProcessor(PerFrameTransferHelper* self)
160+
: self{self}
161+
{
162+
}
163+
164+
UploadProcessor(const UploadProcessor&) = delete;
165+
UploadProcessor& operator=(const UploadProcessor&) = delete;
166+
UploadProcessor(UploadProcessor&&) = delete;
167+
UploadProcessor& operator=(UploadProcessor&&) = delete;
168+
169+
~UploadProcessor() { finish(); }
170+
171+
template <class T>
172+
requires std::is_trivially_copyable_v<T>
173+
bool uploadBufferSync(
174+
vk::CommandBuffer cmd_buf, const Buffer& dst, uint32_t offset, std::span<T const> src)
175+
{
176+
std::span<std::byte const> raw{
177+
reinterpret_cast<const std::byte*>(src.data()), src.size_bytes()};
178+
return uploadBufferSync(cmd_buf, dst, offset, raw);
179+
}
180+
181+
bool uploadBufferSync(
182+
vk::CommandBuffer cmd_buf, const Buffer& dst, uint32_t offset, std::span<std::byte const> src)
183+
{
184+
return self->uploadBufferSync(cmd_buf, dst, offset, src);
185+
}
186+
187+
bool uploadImageSync(
188+
vk::CommandBuffer cmd_buf,
189+
const Image& dst,
190+
uint32_t mip_level,
191+
uint32_t layer,
192+
std::span<std::byte const> src)
193+
{
194+
return self->uploadImageSync(cmd_buf, dst, mip_level, layer, src);
195+
}
196+
197+
bool progressBufferUploadAsync(vk::CommandBuffer cmd_buf, AsyncBufferUploadState& state)
198+
{
199+
return self->progressBufferUploadAsync(cmd_buf, state);
200+
}
201+
bool progressImageUploadAsync(vk::CommandBuffer cmd_buf, AsyncImageUploadState& state)
202+
{
203+
return self->progressImageUploadAsync(cmd_buf, state);
204+
}
205+
206+
bool hasSpaceThisFrame() const { return self->curFrameStagingOffset < self->stagingSize; }
207+
208+
void finish();
209+
210+
explicit operator bool() const { return self != nullptr; }
211+
};
212+
213+
class ReadbackProcessor
214+
{
215+
PerFrameTransferHelper* self;
216+
friend class FrameProcessor;
217+
218+
public:
219+
explicit ReadbackProcessor(PerFrameTransferHelper* self)
220+
: self{self}
221+
{
222+
}
223+
224+
ReadbackProcessor(const ReadbackProcessor&) = delete;
225+
ReadbackProcessor& operator=(const ReadbackProcessor&) = delete;
226+
ReadbackProcessor(ReadbackProcessor&&) = delete;
227+
ReadbackProcessor& operator=(ReadbackProcessor&&) = delete;
228+
229+
~ReadbackProcessor() { finish(); }
230+
231+
bool progressBufferReadbackAsync(vk::CommandBuffer cmd_buf, AsyncBufferReadbackState& state)
232+
{
233+
return self->progressBufferReadbackAsync(cmd_buf, state);
234+
}
235+
236+
bool hasSpaceThisFrame() const { return self->curFrameStagingOffset < self->stagingSize; }
237+
238+
void finish();
239+
240+
explicit operator bool() const { return self != nullptr; }
241+
};
242+
243+
class FrameProcessor
244+
{
245+
PerFrameTransferHelper* self;
246+
friend class PerFrameTransferHelper;
247+
248+
public:
249+
explicit FrameProcessor(PerFrameTransferHelper* self)
250+
: self{self}
251+
{
252+
}
253+
254+
FrameProcessor(const FrameProcessor&) = delete;
255+
FrameProcessor& operator=(const FrameProcessor&) = delete;
256+
FrameProcessor(FrameProcessor&&) = delete;
257+
FrameProcessor& operator=(FrameProcessor&&) = delete;
258+
259+
~FrameProcessor() { finish(); }
260+
261+
ReadbackProcessor beginReadback();
262+
UploadProcessor beginUpload();
263+
264+
void finish();
265+
266+
explicit operator bool() const { return self != nullptr; }
267+
};
268+
269+
friend class UploadProcessor;
270+
friend class ReadbackProcessor;
271+
friend class FrameProcessor;
272+
273+
public:
274+
struct CreateInfo
275+
{
276+
vk::DeviceSize totalStagingSize;
277+
const GpuWorkCount* wc;
278+
};
279+
280+
explicit PerFrameTransferHelper(CreateInfo info);
281+
282+
PerFrameTransferHelper(const PerFrameTransferHelper&) = delete;
283+
PerFrameTransferHelper& operator=(const PerFrameTransferHelper&) = delete;
284+
PerFrameTransferHelper(PerFrameTransferHelper&&) = delete;
285+
PerFrameTransferHelper& operator=(PerFrameTransferHelper&&) = delete;
286+
287+
FrameProcessor beginFrame();
288+
289+
template <class T>
290+
requires std::is_trivially_copyable_v<T>
291+
AsyncBufferUploadState initUploadBufferAsync(
292+
const Buffer& dst, uint32_t offset, std::span<T const> src) const
293+
{
294+
std::span<std::byte const> raw{
295+
reinterpret_cast<const std::byte*>(src.data()), src.size_bytes()};
296+
return initUploadBufferAsync(dst, offset, raw);
297+
}
298+
299+
AsyncBufferUploadState initUploadBufferAsync(
300+
const Buffer& dst, uint32_t offset, std::span<std::byte const> src) const;
301+
302+
template <class T>
303+
requires std::is_trivially_copyable_v<T>
304+
AsyncBufferReadbackState initReadbackBufferAsync(
305+
std::span<T> dst, const Buffer& src, uint32_t offset) const
306+
{
307+
std::span<std::byte> raw{reinterpret_cast<std::byte*>(dst.data()), dst.size_bytes()};
308+
return initReadbackBufferAsync(raw, src, offset);
309+
}
310+
311+
AsyncBufferReadbackState initReadbackBufferAsync(
312+
std::span<std::byte> dst, const Buffer& src, uint32_t offset) const;
313+
314+
// @NOTE: for now doesn't support 3D images (unlike sync API)
315+
AsyncImageUploadState initUploadImageAsync(
316+
const Image& dst, uint32_t mip_level, uint32_t layer, std::span<std::byte const> src) const;
317+
318+
private:
319+
ProcessingState inFrameState;
320+
uint64_t lastFrame;
321+
vk::DeviceSize stagingSize;
322+
vk::DeviceSize curFrameStagingOffset;
323+
GpuSharedResource<Buffer> stagingBuffer;
324+
const GpuWorkCount& wc;
325+
326+
bool uploadBufferSync(
327+
vk::CommandBuffer cmd_buf, const Buffer& dst, uint32_t offset, std::span<std::byte const> src);
328+
329+
bool uploadImageSync(
330+
vk::CommandBuffer cmd_buf,
331+
const Image& dst,
332+
uint32_t mip_level,
333+
uint32_t layer,
334+
std::span<std::byte const> src);
335+
336+
bool progressBufferUploadAsync(vk::CommandBuffer cmd_buf, AsyncBufferUploadState& state);
337+
bool progressBufferReadbackAsync(vk::CommandBuffer cmd_buf, AsyncBufferReadbackState& state);
338+
bool progressImageUploadAsync(vk::CommandBuffer cmd_buf, AsyncImageUploadState& state);
339+
340+
void transferBufferRegion(
341+
vk::CommandBuffer cmd_buf,
342+
const Buffer& src,
343+
const Buffer& dst,
344+
vk::DeviceSize src_offset,
345+
vk::DeviceSize dst_offset,
346+
size_t size);
347+
348+
void uploadImageRegion(
349+
vk::CommandBuffer cmd_buf,
350+
const Image& dst,
351+
uint32_t mip_level,
352+
uint32_t layer,
353+
size_t bytes_per_pixel,
354+
vk::Offset3D offset,
355+
vk::DeviceSize staging_offset,
356+
size_t size);
357+
358+
void uploadImageRect(
359+
vk::CommandBuffer cmd_buf,
360+
const Image& dst,
361+
uint32_t mip_level,
362+
uint32_t layer,
363+
vk::Offset3D offset,
364+
vk::Extent3D extent,
365+
vk::DeviceSize staging_offset);
366+
};
367+
368+
} // namespace etna
369+
370+
#endif

etna/source/BlockingTransferHelper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ void BlockingTransferHelper::uploadImage(
109109

110110
const std::size_t bytesPerPixel = vk::blockSize(dst.getFormat());
111111

112-
ETNA_ASSERTF(d == 1, "3D image uploads are not implemented yet!");
112+
ETNA_ASSERTF(d == 1, "3D image blocking uploads are not implemented yet!");
113113

114114
ETNA_ASSERTF(
115115
w * h * bytesPerPixel == src.size(),

0 commit comments

Comments
 (0)