@@ -153,18 +153,28 @@ case ${ac_SFW_FP16} in
153153 AC_MSG_ERROR ( [ "SFW FP16 option not supported ${ac_SFW_FP16}"] ) ;;
154154esac
155155
156- # ############## SUMMIT JSRUN
157- AC_ARG_ENABLE ( [ summit] ,
158- [ AC_HELP_STRING ([ --enable-summit=yes|no] , [ enable IBMs jsrun resource manager for SUMMIT] )] ,
159- [ ac_SUMMIT=${enable_summit}] , [ ac_SUMMIT=no] )
160- case ${ac_SUMMIT} in
161- no);;
156+ # ############## Default to accelerator cshift, but revert to host if UCX is buggy or other reasons
157+ AC_ARG_ENABLE ( [ accelerator-cshift] ,
158+ [ AC_HELP_STRING ([ --enable-accelerator-cshift=yes|no] , [ run cshift on the device] )] ,
159+ [ ac_ACC_CSHIFT=${enable_accelerator_cshift}] , [ ac_ACC_CSHIFT=yes] )
160+
161+ AC_ARG_ENABLE ( [ ucx-buggy] ,
162+ [ AC_HELP_STRING ([ --enable-ucx-buggy=yes|no] , [ enable workaround for UCX device buffer bugs] )] ,
163+ [ ac_UCXBUGGY=${enable_ucx_buggy}] , [ ac_UCXBUGGY=no] )
164+
165+ case ${ac_UCXBUGGY} in
162166 yes)
163- AC_DEFINE ( [ GRID_IBM_SUMMIT] ,[ 1] ,[ Let JSRUN manage the GPU device allocation] ) ;;
164- *)
165- AC_DEFINE ( [ GRID_IBM_SUMMIT] ,[ 1] ,[ Let JSRUN manage the GPU device allocation] ) ;;
167+ ac_ACC_CSHIFT=no;;
168+ *);;
169+ esac
170+
171+ case ${ac_ACC_CSHIFT} in
172+ yes)
173+ AC_DEFINE ( [ ACCELERATOR_CSHIFT] ,[ 1] ,[ UCX device buffer bugs are not present] ) ;;
174+ *);;
166175esac
167176
177+
168178# ############## SYCL/CUDA/HIP/none
169179AC_ARG_ENABLE ( [ accelerator] ,
170180 [ AC_HELP_STRING ([ --enable-accelerator=cuda|sycl|hip|none] , [ enable none,cuda,sycl,hip acceleration] )] ,
@@ -181,8 +191,9 @@ case ${ac_ACCELERATOR} in
181191 echo HIP acceleration
182192 AC_DEFINE ( [ GRID_HIP] ,[ 1] ,[ Use HIP offload] ) ;;
183193 none)
184- echo NO acceleration
185- ;;
194+ echo NO acceleration ;;
195+ no)
196+ echo NO acceleration ;;
186197 *)
187198 AC_MSG_ERROR ( [ "Acceleration not suppoorted ${ac_ACCELERATOR}"] ) ;;
188199esac
@@ -477,28 +488,26 @@ esac
477488AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
478489AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
479490
480- # ############## Precision selection - deprecate
481- # AC_ARG_ENABLE([precision],
482- # [AC_HELP_STRING([--enable-precision=single|double],
483- # [Select default word size of Real])],
484- # [ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
485-
491+ # ##### PRECISION ALWAYS DOUBLE
486492AC_DEFINE ( [ GRID_DEFAULT_PRECISION_DOUBLE] ,[ 1] ,[ GRID_DEFAULT_PRECISION is DOUBLE] )
487493
488- # case ${ac_PRECISION} in
489- # single)
490- # AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
491- # ;;
492- # double)
493- # ;;
494- # *)
495- # AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
496- # ;;
497- # esac
498-
499- # ##################### Shared memory allocation technique under MPI3
500- AC_ARG_ENABLE ( [ shm] ,[ AC_HELP_STRING ([ --enable-shm=shmopen|shmget|hugetlbfs|shmnone] ,
501- [ Select SHM allocation technique] )] ,[ ac_SHM=${enable_shm}] ,[ ac_SHM=shmopen] )
494+ # ########################################################
495+ # ##################### set GPU device to rank in node ##
496+ # ########################################################
497+ AC_ARG_ENABLE ( [ setdevice] ,[ AC_HELP_STRING ([ --enable-setdevice | --disable-setdevice] ,
498+ [ Set GPU to rank in node with cudaSetDevice or similar] )] ,[ ac_SETDEVICE=${enable_SETDEVICE}] ,[ ac_SETDEVICE=no] )
499+ case ${ac_SETDEVICE} in
500+ yes);;
501+ *)
502+ AC_DEFINE ( [ GRID_DEFAULT_GPU] ,[ 1] ,[ GRID_DEFAULT_GPU] )
503+ ;;
504+ esac
505+
506+ # ########################################################
507+ # ##################### Shared memory intranode #########
508+ # ########################################################
509+ AC_ARG_ENABLE ( [ shm] ,[ AC_HELP_STRING ([ --enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no] ,
510+ [ Select SHM allocation technique] )] ,[ ac_SHM=${enable_shm}] ,[ ac_SHM=no] )
502511
503512case ${ac_SHM} in
504513
@@ -517,8 +526,12 @@ case ${ac_SHM} in
517526 AC_DEFINE ( [ GRID_MPI3_SHMGET] ,[ 1] ,[ GRID_MPI3_SHMGET] )
518527 ;;
519528
520- shmnone)
529+ shmnone | no )
521530 AC_DEFINE ( [ GRID_MPI3_SHM_NONE] ,[ 1] ,[ GRID_MPI3_SHM_NONE] )
531+ AC_DEFINE ( [ GRID_SHM_DISABLE] ,[ 1] ,[ USE MPI for intranode comms] ) ;;
532+
533+ nvlink)
534+ AC_DEFINE ( [ GRID_MPI3_SHM_NVLINK] ,[ 1] ,[ GRID_MPI3_SHM_NVLINK] )
522535 ;;
523536
524537 hugetlbfs)
@@ -537,10 +550,23 @@ AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
537550 [ ac_SHMPATH=/var/lib/hugetlbfs/global/pagesize-2MB/] )
538551AC_DEFINE_UNQUOTED ( [ GRID_SHM_PATH] ,[ "$ac_SHMPATH"] ,[ Path to a hugetlbfs filesystem for MMAPing] )
539552
553+
554+ # ############## communication type selection
555+ AC_ARG_ENABLE ( [ comms-threads] ,[ AC_HELP_STRING ([ --enable-comms-threads | --disable-comms-threads] ,
556+ [ Use multiple threads in MPI calls] )] ,[ ac_COMMS_THREADS=${enable_comms_threads}] ,[ ac_COMMS_THREADS=yes] )
557+
558+ case ${ac_COMMS_THREADS} in
559+ yes)
560+ AC_DEFINE ( [ GRID_COMMS_THREADING] ,[ 1] ,[ GRID_COMMS_NONE] )
561+ ;;
562+ *) ;;
563+ esac
564+
540565# ############## communication type selection
541566AC_ARG_ENABLE ( [ comms] ,[ AC_HELP_STRING ([ --enable-comms=none|mpi|mpi-auto] ,
542567 [ Select communications] )] ,[ ac_COMMS=${enable_comms}] ,[ ac_COMMS=none] )
543568
569+
544570case ${ac_COMMS} in
545571 none)
546572 AC_DEFINE ( [ GRID_COMMS_NONE] ,[ 1] ,[ GRID_COMMS_NONE] )
0 commit comments