@@ -73,7 +73,6 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
73
73
std::vector< cl_device_id > device_id;
74
74
cl_context context;
75
75
cl_command_queue queue;
76
- cl_event outEvent = NULL ;
77
76
clfftPlanHandle plan_handle;
78
77
79
78
for (unsigned u = 0 ; u < max_dimensions; ++u) {
@@ -204,7 +203,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
204
203
205
204
206
205
OPENCL_V_THROW ( clEnqueueWriteBuffer ( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0 , size_of_input_buffers_in_bytes, &input[ 0 ],
207
- 0 , NULL , &outEvent ),
206
+ 0 , NULL , NULL ),
208
207
" clEnqueueWriteBuffer failed" );
209
208
210
209
}
@@ -252,10 +251,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
252
251
253
252
254
253
OPENCL_V_THROW ( clEnqueueWriteBuffer ( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0 , size_of_input_buffers_in_bytes, &real[ 0 ],
255
- 0 , NULL , &outEvent ),
254
+ 0 , NULL , NULL ),
256
255
" clEnqueueWriteBuffer failed" );
257
256
OPENCL_V_THROW ( clEnqueueWriteBuffer ( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0 , size_of_input_buffers_in_bytes, &imag[ 0 ],
258
- 0 , NULL , &outEvent ),
257
+ 0 , NULL , NULL ),
259
258
" clEnqueueWriteBuffer failed" );
260
259
}
261
260
break ;
@@ -289,7 +288,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
289
288
290
289
291
290
OPENCL_V_THROW ( clEnqueueWriteBuffer ( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0 , size_of_input_buffers_in_bytes, &input[ 0 ],
292
- 0 , NULL , &outEvent ),
291
+ 0 , NULL , NULL ),
293
292
" clEnqueueWriteBuffer failed" );
294
293
}
295
294
break ;
@@ -325,10 +324,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
325
324
326
325
327
326
OPENCL_V_THROW ( clEnqueueWriteBuffer ( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0 , size_of_input_buffers_in_bytes, &real[ 0 ],
328
- 0 , NULL , &outEvent ),
327
+ 0 , NULL , NULL ),
329
328
" clEnqueueWriteBuffer failed" );
330
329
OPENCL_V_THROW ( clEnqueueWriteBuffer ( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0 , size_of_input_buffers_in_bytes, &imag[ 0 ],
331
- 0 , NULL , &outEvent ),
330
+ 0 , NULL , NULL ),
332
331
" clEnqueueWriteBuffer failed" );
333
332
}
334
333
break ;
@@ -373,7 +372,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
373
372
374
373
375
374
OPENCL_V_THROW ( clEnqueueWriteBuffer ( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0 , size_of_input_buffers_in_bytes, &real[ 0 ],
376
- 0 , NULL , &outEvent ),
375
+ 0 , NULL , NULL ),
377
376
" clEnqueueWriteBuffer failed" );
378
377
}
379
378
break ;
@@ -391,22 +390,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
391
390
terr << _T ( " Could not find the external timing library; timings disabled" ) << std::endl;
392
391
}
393
392
394
-
395
393
// Timer module discovered and loaded successfully
396
394
// Initialize function pointers to call into the shared module
397
395
PFGETSTATTIMER get_timer = reinterpret_cast < PFGETSTATTIMER > ( LoadFunctionAddr ( timerLibHandle, " getStatTimer" ) );
398
396
399
- // Create and initialize our timer class, if the external timer shared library loaded
400
- baseStatTimer* timer = NULL ;
401
- size_t clFFTID = 0 ;
402
- if ( get_timer )
403
- {
404
- timer = get_timer ( CLFFT_GPU );
405
- timer->Reserve ( 1 , profile_count );
406
- timer->setNormalize ( true );
407
-
408
- clFFTID = timer->getUniqueID ( " clFFT" , 0 );
409
- }
410
397
411
398
OPENCL_V_THROW ( clfftSetup ( setupData.get ( ) ), " clfftSetup failed" );
412
399
OPENCL_V_THROW ( clfftCreateDefaultPlan ( &plan_handle, context, dim, lengths ), " clfftCreateDefaultPlan failed" );
@@ -511,37 +498,64 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
511
498
}
512
499
}
513
500
514
- // Loop as many times as the user specifies to average out the timings
515
- //
501
+
516
502
cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];
517
503
518
- Timer tr;
519
- tr.Start ();
504
+ // Execute once for basic functional test
505
+ OPENCL_V_THROW ( clfftEnqueueTransform ( plan_handle, dir, 1 , &queue, 0 , NULL , NULL ,
506
+ &input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
507
+ " clfftEnqueueTransform failed" );
520
508
521
- for ( cl_uint i = 0 ; i < profile_count; ++i )
522
- {
523
- if ( timer ) timer->Start ( clFFTID );
509
+ OPENCL_V_THROW ( clFinish ( queue ), " clFinish failed" );
510
+
524
511
525
- OPENCL_V_THROW ( clfftEnqueueTransform ( plan_handle, dir, 1 , &queue, 0 , NULL , &outEvent,
526
- &input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
527
- " clfftEnqueueTransform failed" );
512
+ // Create and initialize our timer class, if the external timer shared library loaded
513
+ baseStatTimer* timer = NULL ;
514
+ size_t clFFTID = 0 ;
515
+ if ( get_timer )
516
+ {
517
+ timer = get_timer ( CLFFT_GPU );
518
+ timer->Reserve ( 1 , profile_count );
519
+ timer->setNormalize ( true );
528
520
529
- if ( timer ) timer-> Stop ( clFFTID );
521
+ clFFTID = timer-> getUniqueID ( " clFFT " , 0 );
530
522
}
531
- OPENCL_V_THROW ( clFinish ( queue ), " clFinish failed" );
532
- if (clMedBuffer) clReleaseMemObject (clMedBuffer);
533
523
534
- double wtime = tr.Sample ()/((double )profile_count);
535
- size_t totalLen = 1 ;
536
- for (int i=0 ; i<dim; i++) totalLen *= lengths[i];
537
- double opsconst = 5.0 * (double )totalLen * log ((double )totalLen) / log (2.0 );
524
+ cl_event *outEvent = new cl_event[profile_count];
525
+ for ( cl_uint i = 0 ; i < profile_count; ++i ) outEvent[i] = 0 ;
538
526
539
527
if (profile_count > 1 )
540
528
{
529
+ Timer tr;
530
+ tr.Start ();
531
+ for ( cl_uint i = 0 ; i < profile_count; ++i )
532
+ {
533
+ if ( timer ) timer->Start ( clFFTID );
534
+
535
+ OPENCL_V_THROW ( clfftEnqueueTransform ( plan_handle, dir, 1 , &queue, 0 , NULL , &outEvent[i],
536
+ &input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
537
+ " clfftEnqueueTransform failed" );
538
+
539
+ if ( timer ) timer->Stop ( clFFTID );
540
+ }
541
+ OPENCL_V_THROW ( clWaitForEvents ( profile_count, outEvent ), " clWaitForEvents failed" );
542
+
543
+ double wtime = tr.Sample ()/((double )profile_count);
544
+
545
+ OPENCL_V_THROW ( clFinish ( queue ), " clFinish failed" );
546
+
547
+ size_t totalLen = 1 ;
548
+ for (int i=0 ; i<dim; i++) totalLen *= lengths[i];
549
+ double opsconst = 5.0 * (double )totalLen * log ((double )totalLen) / log (2.0 );
550
+
551
+
541
552
tout << " \n Execution wall time: " << 1000.0 *wtime << " ms" << std::endl;
542
553
tout << " Execution gflops: " << ((double )batch_size * opsconst)/(1000000000.0 *wtime) << std::endl;
554
+
543
555
}
544
556
557
+ if (clMedBuffer) clReleaseMemObject (clMedBuffer);
558
+
545
559
if ( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
546
560
{
547
561
// Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
@@ -553,6 +567,14 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
553
567
/* ****************/
554
568
FreeSharedLibrary ( timerLibHandle );
555
569
570
+ for ( cl_uint i = 0 ; i < profile_count; ++i )
571
+ {
572
+ if (outEvent[i])
573
+ clReleaseEvent (outEvent[i]);
574
+ }
575
+
576
+ delete[] outEvent;
577
+
556
578
// Read and check output data
557
579
// This check is not valid if the FFT is executed multiple times inplace.
558
580
//
@@ -725,7 +747,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
725
747
OPENCL_V_THROW ( clfftDestroyPlan ( &plan_handle ), " clfftDestroyPlan failed" );
726
748
OPENCL_V_THROW ( clfftTeardown ( ), " clfftTeardown failed" );
727
749
728
- cleanupCL ( &context, &queue, countOf ( input_cl_mem_buffers ), input_cl_mem_buffers, countOf ( output_cl_mem_buffers ), output_cl_mem_buffers, &outEvent );
750
+ cleanupCL ( &context, &queue, countOf ( input_cl_mem_buffers ), input_cl_mem_buffers, countOf ( output_cl_mem_buffers ), output_cl_mem_buffers, NULL );
729
751
return 0 ;
730
752
}
731
753
0 commit comments