Skip to content

Commit d4b7acb

Browse files
author
gregrodgers
committed
revert use of dependencies and remove preallocated signals
1 parent 961f1ef commit d4b7acb

File tree

1 file changed

+4
-66
lines changed

1 file changed

+4
-66
lines changed

bin/snk_genw.sh

Lines changed: 4 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ function write_header_template(){
118118
#endif
119119
#ifndef __SNK_DEFS
120120
#define SNK_MAX_STREAMS 8
121-
#define SNK_MAX_TASKS 100001
122121
extern _CPPSTRING_ void stream_sync(const int stream_num);
123122
124123
#define SNK_ORDERED 1
@@ -144,12 +143,10 @@ struct snk_lparm_s {
144143
int barrier; /* default = SNK_UNORDERED */
145144
int acquire_fence_scope; /* default = 2 */
146145
int release_fence_scope; /* default = 2 */
147-
snk_task_t *requires ; /* Linked list of required parent tasks, default = NULL */
148-
snk_task_t *needs ; /* Linked list of parent tasks where only one must complete, default=NULL */
149146
} ;
150147
151148
/* This string macro is used to declare launch parameters set default values */
152-
#define SNK_INIT_LPARM(X,Y) snk_lparm_t * X ; snk_lparm_t _ ## X ={.ndim=1,.gdims={Y},.ldims={64},.stream=-1,.barrier=SNK_UNORDERED,.acquire_fence_scope=2,.release_fence_scope=2,.requires=NULL,.needs=NULL} ; X = &_ ## X ;
149+
#define SNK_INIT_LPARM(X,Y) snk_lparm_t * X ; snk_lparm_t _ ## X ={.ndim=1,.gdims={Y},.ldims={64},.stream=-1,.barrier=SNK_UNORDERED,.acquire_fence_scope=2,.release_fence_scope=2} ; X = &_ ## X ;
153150
154151
/* Equivalent host data types for kernel data types */
155152
typedef struct snk_image3d_s snk_image3d_t;
@@ -329,11 +326,9 @@ static hsa_status_t get_kernarg_memory_region(hsa_region_t region, void* data) {
329326
330327
/* Stream specific globals */
331328
hsa_queue_t* Stream_CommandQ[SNK_MAX_STREAMS];
332-
snk_task_t SNK_Tasks[SNK_MAX_TASKS];
333329
static int SNK_NextTaskId = 0 ;
334330
335331
/* Context(cl file) specific globals */
336-
hsa_ext_module_t* _CN__BrigModule;
337332
hsa_agent_t _CN__Agent;
338333
hsa_ext_program_t _CN__HsaProgram;
339334
hsa_executable_t _CN__Executable;
@@ -367,18 +362,15 @@ status_t _CN__InitContext(){
367362
uint32_t queue_size = 0;
368363
err = hsa_agent_get_info(_CN__Agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
369364
ErrorCheck(Querying the agent maximum queue size, err);
370-
/* printf("The maximum queue size is %u.\n", (unsigned int) queue_size); */
371-
372-
/* Load the BRIG binary. */
373-
_CN__BrigModule = (hsa_ext_module_t*) &_CN__HSA_BrigMem;
365+
/* printf("The maximum queue size is %u.\n", (unsigned int) queue_size); */
374366
375367
/* Create hsa program. */
376368
memset(&_CN__HsaProgram,0,sizeof(hsa_ext_program_t));
377369
err = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &_CN__HsaProgram);
378370
ErrorCheck(Create the program, err);
379371
380372
/* Add the BRIG module to hsa program. */
381-
err = hsa_ext_program_add_module(_CN__HsaProgram, _CN__BrigModule);
373+
err = hsa_ext_program_add_module(_CN__HsaProgram, (hsa_ext_module_t) _CN__HSA_BrigMem );
382374
ErrorCheck(Adding the brig module to the program, err);
383375
384376
/* Determine the agents ISA. */
@@ -423,14 +415,6 @@ status_t _CN__InitContext(){
423415
err=hsa_signal_create(1, 0, NULL, &Sync_Signal);
424416
ErrorCheck(Creating a HSA signal, err);
425417
426-
int task_num;
427-
/* Initialize all preallocated tasks and signals */
428-
for ( task_num = 0 ; task_num < SNK_MAX_TASKS; task_num++){
429-
SNK_Tasks[task_num].next = NULL;
430-
err=hsa_signal_create(1, 0, NULL, &(SNK_Tasks[task_num].signal));
431-
ErrorCheck(Creating a HSA signal, err);
432-
}
433-
434418
/* Create queues and signals for each stream. */
435419
int stream_num;
436420
for ( stream_num = 0 ; stream_num < SNK_MAX_STREAMS ; stream_num++){
@@ -533,29 +517,6 @@ function write_kernel_template(){
533517
this_Q = Stream_CommandQ[stream_num];
534518
}
535519
536-
if ( lparm->requires != NULL) {
537-
/* For dependent child tasks, wait till all parent kernels are finished. */
538-
/* FIXME: To use multiple barrier AND packets or individual waiting for better performance?
539-
KPS benchmark showed that barrier AND was a lot slower, but will keep both implementations
540-
for future use */
541-
#if 1
542-
#if 0
543-
barrier_sync(stream_num, lparm->requires);
544-
#else
545-
snk_task_t *p = lparm->requires;
546-
while(p != NULL) {
547-
hsa_signal_value_t value = hsa_signal_wait_acquire(p->signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
548-
// HSA manual uses a while loop. Why?
549-
// while(hsa_signal_wait_acquire(p->signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0);
550-
p = p->next;
551-
}
552-
#endif
553-
#endif
554-
}
555-
if ( lparm->needs != NULL) {
556-
printf("\n THIS TASK NEEDS ONE OF A LIST OF PARENTS TO COMPLETE BEFORE THIS TASK STARTS \n\n");
557-
}
558-
559520
/* Obtain the current queue write index. increases with each call to kernel */
560521
uint64_t index = hsa_queue_load_write_index_relaxed(this_Q);
561522
/* printf("DEBUG:Call #%d to kernel \"%s\" \n",(int) index,"_KN_"); */
@@ -566,16 +527,6 @@ function write_kernel_template(){
566527
567528
/* FIXME: We need to check for queue overflow here. */
568529
569-
/* If this kernel was declared as snk_task_t*, then use preallocated signal */
570-
if ( needs_return_task == 1) {
571-
if ( SNK_NextTaskId == SNK_MAX_TASKS ) {
572-
printf("ERROR: Too many parent tasks, increase SNK_MAX_TASKS =%d\n",SNK_MAX_TASKS);
573-
return ;
574-
}
575-
/* hsa_signal_store_relaxed(SNK_Tasks[SNK_NextTaskId].signal,1); */
576-
this_aql->completion_signal = SNK_Tasks[SNK_NextTaskId].signal;
577-
}
578-
579530
if ( stream_num < 0 ) {
580531
/* Use the global synchrnous signal Sync_Signal */
581532
this_aql->completion_signal=Sync_Signal;
@@ -948,24 +899,11 @@ __SEDCMD=" "
948899
fi
949900
fi
950901

951-
# Make sure template knows when to allocate and bind a global signal for this function
952-
if [ $__KT == "snk_task_t*" ] ; then
953-
echo " int needs_return_task = 1;" >>$__CWRAP
954-
else
955-
echo " int needs_return_task = 0;" >>$__CWRAP
956-
fi
957-
958902
# Now add the kernel template to wrapper and change all three strings
959903
# 1) Context Name _CN_ 2) Kerneel name _KN_ and 3) Funtion name _FN_
960904
write_kernel_template | sed -e "s/_CN_/${__SN}/g;s/_KN_/${__KN}/g;s/_FN_/${__FN}/g" >>$__CWRAP
961905

962-
# if kernel is type snk_task_t*, then return &parentTask else return void
963-
# FIXME: Need to rotate and reuse the task array!
964-
if [ $__KT == "snk_task_t*" ] ; then
965-
echo " return (snk_task_t*) &(SNK_Tasks[SNK_NextTaskId++]);" >> $__CWRAP
966-
else
967-
echo " return;" >> $__CWRAP
968-
fi
906+
echo " return;" >> $__CWRAP
969907
echo "} " >> $__CWRAP
970908
echo "/* ------ End of SNACK function ${__KN} ------ */ " >> $__CWRAP
971909

0 commit comments

Comments
 (0)