Skip to content

Commit f4a8223

Browse files
Copilotchrxh
andcommitted
Refactor: use extractObjectData with copyToCpu instead of separate extractObjectDataToCpuBuffers method
Co-authored-by: chrxh <[email protected]>
1 parent 5bf5966 commit f4a8223

File tree

5 files changed

+64
-179
lines changed

5 files changed

+64
-179
lines changed

source/EngineGpuKernels/CudaGeometryBuffers.cu

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,55 @@ void CudaGeometryBuffers::copyToOpenGL(GeometryBuffers const& geometryBuffers, N
240240
geometryBuffers->uploadDetonationEventData(hostDetonationEventBuffer.data(), numObjects.detonationEventVertices);
241241
}
242242
}
243+
244+
CpuGeometryBuffers CudaGeometryBuffers::copyToCpu(NumRenderObjects const& numObjects)
245+
{
246+
CpuGeometryBuffers result;
247+
248+
if (numObjects.cells > 0) {
249+
result.cells.resize(numObjects.cells);
250+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.cells.data(), deviceCellBuffer, numObjects.cells * sizeof(CellVertexData), cudaMemcpyDeviceToHost));
251+
}
252+
253+
if (numObjects.energyParticles > 0) {
254+
result.energyParticles.resize(numObjects.energyParticles);
255+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.energyParticles.data(), deviceEnergyParticleBuffer, numObjects.energyParticles * sizeof(EnergyParticleVertexData), cudaMemcpyDeviceToHost));
256+
}
257+
258+
if (numObjects.locations > 0) {
259+
result.locations.resize(numObjects.locations);
260+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.locations.data(), deviceLocationBuffer, numObjects.locations * sizeof(LocationVertexData), cudaMemcpyDeviceToHost));
261+
}
262+
263+
if (numObjects.selectedObjects > 0) {
264+
result.selectedObjects.resize(numObjects.selectedObjects);
265+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.selectedObjects.data(), deviceSelectedObjectBuffer, numObjects.selectedObjects * sizeof(SelectedObjectVertexData), cudaMemcpyDeviceToHost));
266+
}
267+
268+
if (numObjects.lineIndices > 0) {
269+
result.lineIndices.resize(numObjects.lineIndices);
270+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.lineIndices.data(), deviceLineIndexBuffer, numObjects.lineIndices * sizeof(unsigned int), cudaMemcpyDeviceToHost));
271+
}
272+
273+
if (numObjects.triangleIndices > 0) {
274+
result.triangleIndices.resize(numObjects.triangleIndices);
275+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.triangleIndices.data(), deviceTriangleIndexBuffer, numObjects.triangleIndices * sizeof(unsigned int), cudaMemcpyDeviceToHost));
276+
}
277+
278+
if (numObjects.connectionArrowVertices > 0) {
279+
result.connectionArrows.resize(numObjects.connectionArrowVertices);
280+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.connectionArrows.data(), deviceSelectedConnectionBuffer, numObjects.connectionArrowVertices * sizeof(ConnectionArrowVertexData), cudaMemcpyDeviceToHost));
281+
}
282+
283+
if (numObjects.attackEventVertices > 0) {
284+
result.attackEvents.resize(numObjects.attackEventVertices);
285+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.attackEvents.data(), deviceAttackEventBuffer, numObjects.attackEventVertices * sizeof(AttackEventVertexData), cudaMemcpyDeviceToHost));
286+
}
287+
288+
if (numObjects.detonationEventVertices > 0) {
289+
result.detonationEvents.resize(numObjects.detonationEventVertices);
290+
CHECK_FOR_CUDA_ERROR(cudaMemcpy(result.detonationEvents.data(), deviceDetonationEventBuffer, numObjects.detonationEventVertices * sizeof(DetonationEventVertexData), cudaMemcpyDeviceToHost));
291+
}
292+
293+
return result;
294+
}

source/EngineGpuKernels/CudaGeometryBuffers.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <EngineInterface/CpuGeometryBuffers.h>
34
#include <EngineInterface/GeometryBuffers.h>
45

56
#include "Base.cuh"
@@ -44,4 +45,5 @@ struct CudaGeometryBuffers
4445
void allocateBuffersForNoInterop(NumRenderObjects const& numObjects);
4546
void freeBuffersForNoInterop();
4647
void copyToOpenGL(GeometryBuffers const& geometryBuffers, NumRenderObjects const& numObjects);
48+
CpuGeometryBuffers copyToCpu(NumRenderObjects const& numObjects);
4749
};

source/EngineGpuKernels/GeometryKernelsService.cu

Lines changed: 0 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -208,177 +208,3 @@ void GeometryKernelsService::extractObjectData(
208208
KERNEL_CALL(cudaExtractDetonationEventData, data, renderingData.deviceDetonationEventBuffer, _numDetonationEventVertices);
209209
}
210210
}
211-
212-
void GeometryKernelsService::extractObjectDataToCpuBuffers(
213-
SettingsForSimulation const& settings,
214-
SimulationData data,
215-
CpuGeometryBuffers& cpuBuffers,
216-
RealRect const& visibleWorldRect)
217-
{
218-
auto const& gpuSettings = settings.cudaSettings;
219-
float2 const visibleTopLeft{visibleWorldRect.topLeft.x, visibleWorldRect.topLeft.y};
220-
221-
// Get counts first
222-
auto numCells = data.objects.cells.getNumEntries_host();
223-
auto numParticles = data.objects.particles.getNumEntries_host();
224-
225-
setValueToDevice(_numLocations, static_cast<uint64_t>(0));
226-
KERNEL_CALL_1_1(cudaExtractLocationData, data, nullptr, _numLocations, visibleTopLeft);
227-
cudaDeviceSynchronize();
228-
auto numLocations = copyToHost(_numLocations);
229-
230-
setValueToDevice(_numSelectedObjects, static_cast<uint64_t>(0));
231-
KERNEL_CALL(cudaExtractSelectedObjectData, data, nullptr, _numSelectedObjects);
232-
cudaDeviceSynchronize();
233-
auto numSelectedObjects = copyToHost(_numSelectedObjects);
234-
235-
setValueToDevice(_numLineIndices, static_cast<uint64_t>(0));
236-
KERNEL_CALL(cudaExtractLineIndices, data, nullptr, _numLineIndices);
237-
cudaDeviceSynchronize();
238-
auto numLineIndices = copyToHost(_numLineIndices);
239-
240-
setValueToDevice(_numTriangleIndices, static_cast<uint64_t>(0));
241-
KERNEL_CALL(cudaExtractTriangleIndices, data, nullptr, _numTriangleIndices);
242-
cudaDeviceSynchronize();
243-
auto numTriangleIndices = copyToHost(_numTriangleIndices);
244-
245-
setValueToDevice(_numSelectedConnectionVertices, static_cast<uint64_t>(0));
246-
KERNEL_CALL(cudaExtractSelectedConnectionData, data, nullptr, _numSelectedConnectionVertices);
247-
cudaDeviceSynchronize();
248-
auto numConnectionArrows = copyToHost(_numSelectedConnectionVertices);
249-
250-
setValueToDevice(_numAttackEventVertices, static_cast<uint64_t>(0));
251-
KERNEL_CALL(cudaExtractAttackEventData, data, nullptr, _numAttackEventVertices);
252-
cudaDeviceSynchronize();
253-
auto numAttackEvents = copyToHost(_numAttackEventVertices);
254-
255-
setValueToDevice(_numDetonationEventVertices, static_cast<uint64_t>(0));
256-
KERNEL_CALL(cudaExtractDetonationEventData, data, nullptr, _numDetonationEventVertices);
257-
cudaDeviceSynchronize();
258-
auto numDetonationEvents = copyToHost(_numDetonationEventVertices);
259-
260-
// Allocate device buffers
261-
CellVertexData* deviceCellBuffer = nullptr;
262-
EnergyParticleVertexData* deviceEnergyParticleBuffer = nullptr;
263-
LocationVertexData* deviceLocationBuffer = nullptr;
264-
SelectedObjectVertexData* deviceSelectedObjectBuffer = nullptr;
265-
unsigned int* deviceLineIndexBuffer = nullptr;
266-
unsigned int* deviceTriangleIndexBuffer = nullptr;
267-
ConnectionArrowVertexData* deviceConnectionArrowBuffer = nullptr;
268-
AttackEventVertexData* deviceAttackEventBuffer = nullptr;
269-
DetonationEventVertexData* deviceDetonationEventBuffer = nullptr;
270-
271-
if (numCells > 0) {
272-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceCellBuffer, numCells * sizeof(CellVertexData)));
273-
}
274-
if (numParticles > 0) {
275-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceEnergyParticleBuffer, numParticles * sizeof(EnergyParticleVertexData)));
276-
}
277-
if (numLocations > 0) {
278-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceLocationBuffer, numLocations * sizeof(LocationVertexData)));
279-
}
280-
if (numSelectedObjects > 0) {
281-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceSelectedObjectBuffer, numSelectedObjects * sizeof(SelectedObjectVertexData)));
282-
}
283-
if (numLineIndices > 0) {
284-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceLineIndexBuffer, numLineIndices * sizeof(unsigned int)));
285-
}
286-
if (numTriangleIndices > 0) {
287-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceTriangleIndexBuffer, numTriangleIndices * sizeof(unsigned int)));
288-
}
289-
if (numConnectionArrows > 0) {
290-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceConnectionArrowBuffer, numConnectionArrows * sizeof(ConnectionArrowVertexData)));
291-
}
292-
if (numAttackEvents > 0) {
293-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceAttackEventBuffer, numAttackEvents * sizeof(AttackEventVertexData)));
294-
}
295-
if (numDetonationEvents > 0) {
296-
CHECK_FOR_CUDA_ERROR(cudaMalloc(&deviceDetonationEventBuffer, numDetonationEvents * sizeof(DetonationEventVertexData)));
297-
}
298-
299-
// Extract data to device buffers
300-
if (numCells > 0) {
301-
KERNEL_CALL(cudaExtractCellData, data, deviceCellBuffer);
302-
}
303-
if (numParticles > 0) {
304-
KERNEL_CALL(cudaExtractEnergyParticleData, data, deviceEnergyParticleBuffer);
305-
}
306-
if (numLocations > 0) {
307-
setValueToDevice(_numLocations, static_cast<uint64_t>(0));
308-
KERNEL_CALL_1_1(cudaExtractLocationData, data, deviceLocationBuffer, _numLocations, visibleTopLeft);
309-
}
310-
if (numSelectedObjects > 0) {
311-
setValueToDevice(_numSelectedObjects, static_cast<uint64_t>(0));
312-
KERNEL_CALL(cudaExtractSelectedObjectData, data, deviceSelectedObjectBuffer, _numSelectedObjects);
313-
}
314-
if (numLineIndices > 0) {
315-
setValueToDevice(_numLineIndices, static_cast<uint64_t>(0));
316-
KERNEL_CALL(cudaExtractLineIndices, data, deviceLineIndexBuffer, _numLineIndices);
317-
}
318-
if (numTriangleIndices > 0) {
319-
setValueToDevice(_numTriangleIndices, static_cast<uint64_t>(0));
320-
KERNEL_CALL(cudaExtractTriangleIndices, data, deviceTriangleIndexBuffer, _numTriangleIndices);
321-
}
322-
if (numConnectionArrows > 0) {
323-
setValueToDevice(_numSelectedConnectionVertices, static_cast<uint64_t>(0));
324-
KERNEL_CALL(cudaExtractSelectedConnectionData, data, deviceConnectionArrowBuffer, _numSelectedConnectionVertices);
325-
}
326-
if (numAttackEvents > 0) {
327-
setValueToDevice(_numAttackEventVertices, static_cast<uint64_t>(0));
328-
KERNEL_CALL(cudaExtractAttackEventData, data, deviceAttackEventBuffer, _numAttackEventVertices);
329-
}
330-
if (numDetonationEvents > 0) {
331-
setValueToDevice(_numDetonationEventVertices, static_cast<uint64_t>(0));
332-
KERNEL_CALL(cudaExtractDetonationEventData, data, deviceDetonationEventBuffer, _numDetonationEventVertices);
333-
}
334-
cudaDeviceSynchronize();
335-
336-
// Resize CPU buffers
337-
cpuBuffers.cells.resize(numCells);
338-
cpuBuffers.energyParticles.resize(numParticles);
339-
cpuBuffers.locations.resize(numLocations);
340-
cpuBuffers.selectedObjects.resize(numSelectedObjects);
341-
cpuBuffers.lineIndices.resize(numLineIndices);
342-
cpuBuffers.triangleIndices.resize(numTriangleIndices);
343-
cpuBuffers.connectionArrows.resize(numConnectionArrows);
344-
cpuBuffers.attackEvents.resize(numAttackEvents);
345-
cpuBuffers.detonationEvents.resize(numDetonationEvents);
346-
347-
// Copy from device to host
348-
if (numCells > 0) {
349-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.cells.data(), deviceCellBuffer, numCells * sizeof(CellVertexData), cudaMemcpyDeviceToHost));
350-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceCellBuffer));
351-
}
352-
if (numParticles > 0) {
353-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.energyParticles.data(), deviceEnergyParticleBuffer, numParticles * sizeof(EnergyParticleVertexData), cudaMemcpyDeviceToHost));
354-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceEnergyParticleBuffer));
355-
}
356-
if (numLocations > 0) {
357-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.locations.data(), deviceLocationBuffer, numLocations * sizeof(LocationVertexData), cudaMemcpyDeviceToHost));
358-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceLocationBuffer));
359-
}
360-
if (numSelectedObjects > 0) {
361-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.selectedObjects.data(), deviceSelectedObjectBuffer, numSelectedObjects * sizeof(SelectedObjectVertexData), cudaMemcpyDeviceToHost));
362-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceSelectedObjectBuffer));
363-
}
364-
if (numLineIndices > 0) {
365-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.lineIndices.data(), deviceLineIndexBuffer, numLineIndices * sizeof(unsigned int), cudaMemcpyDeviceToHost));
366-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceLineIndexBuffer));
367-
}
368-
if (numTriangleIndices > 0) {
369-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.triangleIndices.data(), deviceTriangleIndexBuffer, numTriangleIndices * sizeof(unsigned int), cudaMemcpyDeviceToHost));
370-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceTriangleIndexBuffer));
371-
}
372-
if (numConnectionArrows > 0) {
373-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.connectionArrows.data(), deviceConnectionArrowBuffer, numConnectionArrows * sizeof(ConnectionArrowVertexData), cudaMemcpyDeviceToHost));
374-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceConnectionArrowBuffer));
375-
}
376-
if (numAttackEvents > 0) {
377-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.attackEvents.data(), deviceAttackEventBuffer, numAttackEvents * sizeof(AttackEventVertexData), cudaMemcpyDeviceToHost));
378-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceAttackEventBuffer));
379-
}
380-
if (numDetonationEvents > 0) {
381-
CHECK_FOR_CUDA_ERROR(cudaMemcpy(cpuBuffers.detonationEvents.data(), deviceDetonationEventBuffer, numDetonationEvents * sizeof(DetonationEventVertexData), cudaMemcpyDeviceToHost));
382-
CHECK_FOR_CUDA_ERROR(cudaFree(deviceDetonationEventBuffer));
383-
}
384-
}

source/EngineGpuKernels/GeometryKernelsService.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
#include <Base/Singleton.h>
44

5-
#include <EngineInterface/CpuGeometryBuffers.h>
65
#include <EngineInterface/GeometryBuffers.h>
76

87
#include "Base.cuh"
@@ -22,7 +21,6 @@ public:
2221
void restorePositions(SettingsForSimulation const& settings, SimulationData data);
2322
NumRenderObjects getNumRenderObjects(SettingsForSimulation const& settings, SimulationData data, RealRect const& visibleWorldRect);
2423
void extractObjectData(SettingsForSimulation const& settings, SimulationData data, CudaGeometryBuffers& renderingData, RealRect const& visibleWorldRect, bool useInterop);
25-
void extractObjectDataToCpuBuffers(SettingsForSimulation const& settings, SimulationData data, CpuGeometryBuffers& cpuBuffers, RealRect const& visibleWorldRect);
2624

2725
private:
2826
GeometryKernelsService() = default;

source/EngineGpuKernels/SimulationCudaFacade.cu

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,17 @@ CpuGeometryBuffers _SimulationCudaFacade::testOnly_copyBuffersFromCudaToCpu()
677677
checkAndProcessSimulationParameterChanges();
678678
auto simulationData = getSimulationDataPtrCopy();
679679
RealRect visibleWorldRect = {{0, 0}, {static_cast<float>(_settings.worldSizeX), static_cast<float>(_settings.worldSizeY)}};
680-
CpuGeometryBuffers cpuBuffers;
681-
GeometryKernelsService::get().extractObjectDataToCpuBuffers(_settings, simulationData, cpuBuffers, visibleWorldRect);
680+
681+
// Get render object counts and allocate device buffers
682+
auto numRenderObjects = GeometryKernelsService::get().getNumRenderObjects(_settings, simulationData, visibleWorldRect);
683+
_cudaGeometryBuffers->allocateBuffersForNoInterop(numRenderObjects);
684+
685+
// Extract data to device buffers (useInterop=false)
686+
GeometryKernelsService::get().extractObjectData(_settings, simulationData, *_cudaGeometryBuffers, visibleWorldRect, false);
682687
syncAndCheck();
683-
return cpuBuffers;
688+
689+
// Convert to CPU buffers
690+
return _cudaGeometryBuffers->copyToCpu(numRenderObjects);
684691
}
685692

686693
void _SimulationCudaFacade::initCuda()

0 commit comments

Comments
 (0)