@@ -95,12 +95,13 @@ void GeometryKernelsService::extractObjectData(
9595 SettingsForSimulation const & settings,
9696 SimulationData data,
9797 CudaGeometryBuffers& renderingData,
98- RealRect const & visibleWorldRect)
98+ RealRect const & visibleWorldRect,
99+ bool useInterop)
99100{
100101 auto const & gpuSettings = settings.cudaSettings ;
101102 float2 const visibleTopLeft{visibleWorldRect.topLeft .x , visibleWorldRect.topLeft .y };
102103
103- if (GlobalSettings::get (). isInterop () ) {
104+ if (useInterop ) {
104105 // Interop mode: use CUDA-OpenGL interoperability
105106 CHECK_FOR_CUDA_ERROR (cudaGraphicsMapResources (1 , &renderingData.vertexBuffer ));
106107 CellVertexData* mappedCellBuffer;
@@ -208,64 +209,176 @@ void GeometryKernelsService::extractObjectData(
208209 }
209210}
210211
211- std::vector<SelectedObjectVertexData> GeometryKernelsService::testOnly_getSelectedObjectData (SettingsForSimulation const & settings, SimulationData data)
212+ void GeometryKernelsService::extractObjectDataToCpuBuffers (
213+ SettingsForSimulation const & settings,
214+ SimulationData data,
215+ CpuGeometryBuffers& cpuBuffers,
216+ RealRect const & visibleWorldRect)
212217{
213218 auto const & gpuSettings = settings.cudaSettings ;
219+ float2 const visibleTopLeft{visibleWorldRect.topLeft .x , visibleWorldRect.topLeft .y };
214220
215- // First count how many selected objects
216- setValueToDevice (_numSelectedObjects, static_cast <uint64_t >(0 ));
217- KERNEL_CALL (cudaExtractSelectedObjectData, data, nullptr , _numSelectedObjects);
218- cudaDeviceSynchronize ();
219- auto numObjects = copyToHost (_numSelectedObjects);
220-
221- if (numObjects == 0 ) {
222- return {};
223- }
221+ // Get counts first
222+ auto numCells = data.objects .cells .getNumEntries_host ();
223+ auto numParticles = data.objects .particles .getNumEntries_host ();
224224
225- // Allocate device memory and extract data
226- SelectedObjectVertexData* deviceBuffer;
227- CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceBuffer, numObjects * sizeof (SelectedObjectVertexData)));
225+ setValueToDevice (_numLocations, static_cast <uint64_t >(0 ));
226+ KERNEL_CALL_1_1 (cudaExtractLocationData, data, nullptr , _numLocations, visibleTopLeft);
227+ cudaDeviceSynchronize ();
228+ auto numLocations = copyToHost (_numLocations);
228229
229230 setValueToDevice (_numSelectedObjects, static_cast <uint64_t >(0 ));
230- KERNEL_CALL (cudaExtractSelectedObjectData, data, deviceBuffer , _numSelectedObjects);
231+ KERNEL_CALL (cudaExtractSelectedObjectData, data, nullptr , _numSelectedObjects);
231232 cudaDeviceSynchronize ();
233+ auto numSelectedObjects = copyToHost (_numSelectedObjects);
232234
233- // Copy to host
234- std::vector<SelectedObjectVertexData> result (numObjects);
235- CHECK_FOR_CUDA_ERROR (cudaMemcpy (result.data (), deviceBuffer, numObjects * sizeof (SelectedObjectVertexData), cudaMemcpyDeviceToHost));
236-
237- CHECK_FOR_CUDA_ERROR (cudaFree (deviceBuffer));
238-
239- return result;
240- }
235+ setValueToDevice (_numLineIndices, static_cast <uint64_t >(0 ));
236+ KERNEL_CALL (cudaExtractLineIndices, data, nullptr , _numLineIndices);
237+ cudaDeviceSynchronize ();
238+ auto numLineIndices = copyToHost (_numLineIndices);
241239
242- std::vector<ConnectionArrowVertexData> GeometryKernelsService::testOnly_getConnectionArrowData (SettingsForSimulation const & settings, SimulationData data)
243- {
244- auto const & gpuSettings = settings.cudaSettings ;
240+ setValueToDevice (_numTriangleIndices, static_cast <uint64_t >(0 ));
241+ KERNEL_CALL (cudaExtractTriangleIndices, data, nullptr , _numTriangleIndices);
242+ cudaDeviceSynchronize ();
243+ auto numTriangleIndices = copyToHost (_numTriangleIndices);
245244
246- // First count how many connection arrow vertices
247245 setValueToDevice (_numSelectedConnectionVertices, static_cast <uint64_t >(0 ));
248246 KERNEL_CALL (cudaExtractSelectedConnectionData, data, nullptr , _numSelectedConnectionVertices);
249247 cudaDeviceSynchronize ();
250- auto numVertices = copyToHost (_numSelectedConnectionVertices);
251-
252- if (numVertices == 0 ) {
253- return {};
254- }
255-
256- // Allocate device memory and extract data
257- ConnectionArrowVertexData* deviceBuffer;
258- CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceBuffer, numVertices * sizeof (ConnectionArrowVertexData)));
248+ auto numConnectionArrows = copyToHost (_numSelectedConnectionVertices);
259249
260- setValueToDevice (_numSelectedConnectionVertices , static_cast <uint64_t >(0 ));
261- KERNEL_CALL (cudaExtractSelectedConnectionData , data, deviceBuffer, _numSelectedConnectionVertices );
250+ setValueToDevice (_numAttackEventVertices , static_cast <uint64_t >(0 ));
251+ KERNEL_CALL (cudaExtractAttackEventData , data, nullptr , _numAttackEventVertices );
262252 cudaDeviceSynchronize ();
253+ auto numAttackEvents = copyToHost (_numAttackEventVertices);
263254
264- // Copy to host
265- std::vector<ConnectionArrowVertexData> result (numVertices);
266- CHECK_FOR_CUDA_ERROR (cudaMemcpy (result.data (), deviceBuffer, numVertices * sizeof (ConnectionArrowVertexData), cudaMemcpyDeviceToHost));
255+ setValueToDevice (_numDetonationEventVertices, static_cast <uint64_t >(0 ));
256+ KERNEL_CALL (cudaExtractDetonationEventData, data, nullptr , _numDetonationEventVertices);
257+ cudaDeviceSynchronize ();
258+ auto numDetonationEvents = copyToHost (_numDetonationEventVertices);
259+
260+ // Allocate device buffers
261+ CellVertexData* deviceCellBuffer = nullptr ;
262+ EnergyParticleVertexData* deviceEnergyParticleBuffer = nullptr ;
263+ LocationVertexData* deviceLocationBuffer = nullptr ;
264+ SelectedObjectVertexData* deviceSelectedObjectBuffer = nullptr ;
265+ unsigned int * deviceLineIndexBuffer = nullptr ;
266+ unsigned int * deviceTriangleIndexBuffer = nullptr ;
267+ ConnectionArrowVertexData* deviceConnectionArrowBuffer = nullptr ;
268+ AttackEventVertexData* deviceAttackEventBuffer = nullptr ;
269+ DetonationEventVertexData* deviceDetonationEventBuffer = nullptr ;
270+
271+ if (numCells > 0 ) {
272+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceCellBuffer, numCells * sizeof (CellVertexData)));
273+ }
274+ if (numParticles > 0 ) {
275+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceEnergyParticleBuffer, numParticles * sizeof (EnergyParticleVertexData)));
276+ }
277+ if (numLocations > 0 ) {
278+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceLocationBuffer, numLocations * sizeof (LocationVertexData)));
279+ }
280+ if (numSelectedObjects > 0 ) {
281+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceSelectedObjectBuffer, numSelectedObjects * sizeof (SelectedObjectVertexData)));
282+ }
283+ if (numLineIndices > 0 ) {
284+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceLineIndexBuffer, numLineIndices * sizeof (unsigned int )));
285+ }
286+ if (numTriangleIndices > 0 ) {
287+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceTriangleIndexBuffer, numTriangleIndices * sizeof (unsigned int )));
288+ }
289+ if (numConnectionArrows > 0 ) {
290+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceConnectionArrowBuffer, numConnectionArrows * sizeof (ConnectionArrowVertexData)));
291+ }
292+ if (numAttackEvents > 0 ) {
293+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceAttackEventBuffer, numAttackEvents * sizeof (AttackEventVertexData)));
294+ }
295+ if (numDetonationEvents > 0 ) {
296+ CHECK_FOR_CUDA_ERROR (cudaMalloc (&deviceDetonationEventBuffer, numDetonationEvents * sizeof (DetonationEventVertexData)));
297+ }
267298
268- CHECK_FOR_CUDA_ERROR (cudaFree (deviceBuffer));
299+ // Extract data to device buffers
300+ if (numCells > 0 ) {
301+ KERNEL_CALL (cudaExtractCellData, data, deviceCellBuffer);
302+ }
303+ if (numParticles > 0 ) {
304+ KERNEL_CALL (cudaExtractEnergyParticleData, data, deviceEnergyParticleBuffer);
305+ }
306+ if (numLocations > 0 ) {
307+ setValueToDevice (_numLocations, static_cast <uint64_t >(0 ));
308+ KERNEL_CALL_1_1 (cudaExtractLocationData, data, deviceLocationBuffer, _numLocations, visibleTopLeft);
309+ }
310+ if (numSelectedObjects > 0 ) {
311+ setValueToDevice (_numSelectedObjects, static_cast <uint64_t >(0 ));
312+ KERNEL_CALL (cudaExtractSelectedObjectData, data, deviceSelectedObjectBuffer, _numSelectedObjects);
313+ }
314+ if (numLineIndices > 0 ) {
315+ setValueToDevice (_numLineIndices, static_cast <uint64_t >(0 ));
316+ KERNEL_CALL (cudaExtractLineIndices, data, deviceLineIndexBuffer, _numLineIndices);
317+ }
318+ if (numTriangleIndices > 0 ) {
319+ setValueToDevice (_numTriangleIndices, static_cast <uint64_t >(0 ));
320+ KERNEL_CALL (cudaExtractTriangleIndices, data, deviceTriangleIndexBuffer, _numTriangleIndices);
321+ }
322+ if (numConnectionArrows > 0 ) {
323+ setValueToDevice (_numSelectedConnectionVertices, static_cast <uint64_t >(0 ));
324+ KERNEL_CALL (cudaExtractSelectedConnectionData, data, deviceConnectionArrowBuffer, _numSelectedConnectionVertices);
325+ }
326+ if (numAttackEvents > 0 ) {
327+ setValueToDevice (_numAttackEventVertices, static_cast <uint64_t >(0 ));
328+ KERNEL_CALL (cudaExtractAttackEventData, data, deviceAttackEventBuffer, _numAttackEventVertices);
329+ }
330+ if (numDetonationEvents > 0 ) {
331+ setValueToDevice (_numDetonationEventVertices, static_cast <uint64_t >(0 ));
332+ KERNEL_CALL (cudaExtractDetonationEventData, data, deviceDetonationEventBuffer, _numDetonationEventVertices);
333+ }
334+ cudaDeviceSynchronize ();
269335
270- return result;
336+ // Resize CPU buffers
337+ cpuBuffers.cells .resize (numCells);
338+ cpuBuffers.energyParticles .resize (numParticles);
339+ cpuBuffers.locations .resize (numLocations);
340+ cpuBuffers.selectedObjects .resize (numSelectedObjects);
341+ cpuBuffers.lineIndices .resize (numLineIndices);
342+ cpuBuffers.triangleIndices .resize (numTriangleIndices);
343+ cpuBuffers.connectionArrows .resize (numConnectionArrows);
344+ cpuBuffers.attackEvents .resize (numAttackEvents);
345+ cpuBuffers.detonationEvents .resize (numDetonationEvents);
346+
347+ // Copy from device to host
348+ if (numCells > 0 ) {
349+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.cells .data (), deviceCellBuffer, numCells * sizeof (CellVertexData), cudaMemcpyDeviceToHost));
350+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceCellBuffer));
351+ }
352+ if (numParticles > 0 ) {
353+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.energyParticles .data (), deviceEnergyParticleBuffer, numParticles * sizeof (EnergyParticleVertexData), cudaMemcpyDeviceToHost));
354+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceEnergyParticleBuffer));
355+ }
356+ if (numLocations > 0 ) {
357+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.locations .data (), deviceLocationBuffer, numLocations * sizeof (LocationVertexData), cudaMemcpyDeviceToHost));
358+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceLocationBuffer));
359+ }
360+ if (numSelectedObjects > 0 ) {
361+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.selectedObjects .data (), deviceSelectedObjectBuffer, numSelectedObjects * sizeof (SelectedObjectVertexData), cudaMemcpyDeviceToHost));
362+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceSelectedObjectBuffer));
363+ }
364+ if (numLineIndices > 0 ) {
365+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.lineIndices .data (), deviceLineIndexBuffer, numLineIndices * sizeof (unsigned int ), cudaMemcpyDeviceToHost));
366+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceLineIndexBuffer));
367+ }
368+ if (numTriangleIndices > 0 ) {
369+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.triangleIndices .data (), deviceTriangleIndexBuffer, numTriangleIndices * sizeof (unsigned int ), cudaMemcpyDeviceToHost));
370+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceTriangleIndexBuffer));
371+ }
372+ if (numConnectionArrows > 0 ) {
373+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.connectionArrows .data (), deviceConnectionArrowBuffer, numConnectionArrows * sizeof (ConnectionArrowVertexData), cudaMemcpyDeviceToHost));
374+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceConnectionArrowBuffer));
375+ }
376+ if (numAttackEvents > 0 ) {
377+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.attackEvents .data (), deviceAttackEventBuffer, numAttackEvents * sizeof (AttackEventVertexData), cudaMemcpyDeviceToHost));
378+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceAttackEventBuffer));
379+ }
380+ if (numDetonationEvents > 0 ) {
381+ CHECK_FOR_CUDA_ERROR (cudaMemcpy (cpuBuffers.detonationEvents .data (), deviceDetonationEventBuffer, numDetonationEvents * sizeof (DetonationEventVertexData), cudaMemcpyDeviceToHost));
382+ CHECK_FOR_CUDA_ERROR (cudaFree (deviceDetonationEventBuffer));
383+ }
271384}
0 commit comments