Skip to content

Commit 5fa7215

Browse files
committed
[rocmlibs][rccl] Support rccl on gfx1150
Fix existing script for building rccl for aomp. Added build of rocm-core which is a prerequisite for rccl. It also disables collective tracing to ensure comptability with gfx1150.
1 parent 1111009 commit 5fa7215

File tree

5 files changed

+155
-32
lines changed

5 files changed

+155
-32
lines changed

bin/aomp_common_vars

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ unset CMAKE_GENERATOR
2222
AOMP_COMPILER_NAME=${AOMP_COMPILER_NAME:-AOMP}
2323

2424
# ROCM_VERSION may be set by Jenkins, if not guess a good number
25-
ROCM_VERSION=${ROCM_VERSION:-6.0.0}
25+
# it is used by rocm-core and rccl also
26+
ROCM_VERSION=${ROCM_VERSION:-6.4.0}
2627

2728
# Set the AOMP VERSION STRING
2829
AOMP_VERSION=${AOMP_VERSION:-"21.0"}
@@ -302,18 +303,6 @@ for arch in ${ROCMLIBS_GFXLIST//;/ }; do
302303
fi
303304
done
304305

305-
# rccl currently doesn't support gfx1103 and gfx1150.
306-
RCCL_GFXLIST=${RCCL_GFXLIST:-"gfx90a;gfx942"}
307-
308-
# Do a sanity check for stanalone build that RCCL_GFXLIST is a subset of ROCMLIBS_GFXLIST
309-
for arch in ${RCCL_GFXLIST//;/ }; do
310-
if [[ ! $ROCMLIBS_GFXLIST =~ $arch ]]; then
311-
echo "ERROR: RCCL_GFXLIST architecture $arch not in ROCMLIBS_GFXLIST $ROCMLIBS_GFXLIST"
312-
echo "Please update RCCL_GFXLIST to be a subset of ROCMLIBS_GFXLIST, or leave it undefined to use the default architecture list."
313-
exit 1
314-
fi
315-
done
316-
317306
# Calculate the number of threads to use for make
318307
COMP_THREADS=1
319308
if [ ! -z `which "getconf"` ]; then

bin/rocmlibs/build_rccl.sh

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#!/bin/bash
2-
#
3-
# build_rccl.sh: Script to build and install rccl.
4-
# This uses a slightly modified install.sh from rccl.
52
#
3+
# build_rccl.sh: Script to build and install rccl.
4+
# This uses a slightly modified install.sh from rccl.
5+
# It has a dependency on rocm-core.
66
BUILD_TYPE=${BUILD_TYPE:-Release}
77

88
# --- Start standard header to set AOMP environment variables ----
@@ -11,6 +11,8 @@ thisdir=`dirname $realpath`
1111
. $thisdir/../aomp_common_vars
1212
# --- end standard header ----
1313

14+
export ROCM_PATH=$AOMP_INSTALL_DIR
15+
1416
_howcalled=${0##*/}
1517
_shname=${_howcalled#build_*} # strip off build_
1618
_libname=${_shname%*.sh} # strip off .sh to get component libname = rccl
@@ -32,9 +34,9 @@ fi
3234
# rccl needs cmake 3.25, so put prereq cmake first in path
3335
export PATH=$AOMP_SUPP/cmake/bin:$PATH
3436

35-
if [ $AOMP_STANDALONE_BUILD == 1 ] ; then
36-
if [ ! -L $AOMP ] ; then
37-
if [ -d $AOMP ] ; then
37+
if [ $AOMP_STANDALONE_BUILD == 1 ] ; then
38+
if [ ! -L $AOMP ] ; then
39+
if [ -d $AOMP ] ; then
3840
echo "ERROR: Directory $AOMP is a physical directory."
3941
echo " It must be a symbolic link or not exist"
4042
exit 1
@@ -45,25 +47,25 @@ else
4547
exit 1
4648
fi
4749

48-
if [ "$1" == "nocmake" ] ; then
50+
if [ "$1" == "nocmake" ] ; then
4951
_nocmake_option="--nocmake"
5052
else
5153
_nocmake_option=""
5254
fi
5355

54-
if [ "$BUILD_TYPE" == "Release" ] ; then
56+
if [ "$BUILD_TYPE" == "Release" ] ; then
5557
_buildtype_option=""
5658
_build_dir_option="release"
5759
else
5860
_buildtype_option="--debug"
5961
_build_dir_option="debug"
6062
fi
6163

62-
# Make sure we can update the install directory
64+
# Make sure we can update the install directory
6365
if [ "$1" == "install" ] ; then
6466
$SUDO mkdir -p $AOMP_INSTALL_DIR
6567
$SUDO touch $AOMP_INSTALL_DIR/testfile
66-
if [ $? != 0 ] ; then
68+
if [ $? != 0 ] ; then
6769
echo "ERROR: No update access to $AOMP_INSTALL_DIR"
6870
exit 1
6971
fi
@@ -78,7 +80,7 @@ if [ "$1" != "nocmake" ] && [ "$1" != "install" ] ; then
7880
rm -rf $BUILD_DIR/build/rocmlibs/$_libname
7981
mkdir -p $BUILD_DIR/build/rocmlibs/$_libname
8082
else
81-
if [ ! -d $BUILD_DIR/build/rocmlibs/$_libname ] ; then
83+
if [ ! -d $BUILD_DIR/build/rocmlibs/$_libname ] ; then
8284
echo "ERROR: The build directory $BUILD_DIR/build/rocmlibs/$_libname does not exist"
8385
echo " run $0 without install and without nocmake option"
8486
exit 1
@@ -92,11 +94,12 @@ if [ "$1" != "install" ] ; then
9294
echo
9395
echo " -----Running cmake in install.sh ---"
9496
echo cd $AOMP_REPOS/build/rocmlibs/$_libname
95-
cd $AOMP_REPOS/build/rocmlibs/$_libname
97+
cd $AOMP_REPOS/build/rocmlibs/$_libname
9698
# --noinstall must follow --prefix because --prefix sets install_library=true
97-
echo $_source_dir/install.sh $_nocmake_option $_buildtype_option -j $AOMP_JOB_THREADS --prefix $AOMP_INSTALL_DIR $_set_ninja_gen --source_dir $_source_dir --noinstall --amdgpu_targets $RCCL_GFXLIST
98-
$_source_dir/install.sh $_nocmake_option $_buildtype_option -j $AOMP_JOB_THREADS --prefix $AOMP_INSTALL_DIR $_set_ninja_gen --source_dir $_source_dir --noinstall --amdgpu_targets $RCCL_GFXLIST
99-
if [ $? != 0 ] ; then
99+
# add --disable-colltrace because it is not supported on gfx1150
100+
echo $_source_dir/install.sh $_nocmake_option $_buildtype_option -j $AOMP_JOB_THREADS --prefix $AOMP_INSTALL_DIR $_set_ninja_gen --source_dir $_source_dir --noinstall --amdgpu_targets $ROCMLIBS_GFXLIST --disable-colltrace
101+
$_source_dir/install.sh $_nocmake_option $_buildtype_option -j $AOMP_JOB_THREADS --prefix $AOMP_INSTALL_DIR $_set_ninja_gen --source_dir $_source_dir --noinstall --amdgpu_targets $ROCMLIBS_GFXLIST -l --disable-colltrace
102+
if [ $? != 0 ] ; then
100103
echo "ERROR install failed."
101104
echo " $MYCMAKEOPTS"
102105
cd $_curdir
@@ -118,9 +121,9 @@ if [ "$1" == "install" ] ; then
118121
echo "SUCCESSFUL INSTALL to $AOMP_INSTALL_DIR"
119122
echo
120123
removepatch $_source_dir
121-
else
122-
echo
124+
else
125+
echo
123126
echo "SUCCESSFUL BUILD, please run: $0 install"
124127
echo " to install into $AOMP_INSTALL_DIR"
125-
echo
128+
echo
126129
fi

bin/rocmlibs/build_rocm-core.sh

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/bin/bash
2+
#
3+
# build_rocm-core.sh: Script to build and install rocm-core library
4+
#
5+
#
6+
BUILD_TYPE=${BUILD_TYPE:-Release}
7+
8+
# --- Start standard header to set AOMP environment variables ----
9+
realpath=`realpath $0`
10+
thisdir=`dirname $realpath`
11+
. $thisdir/../aomp_common_vars
12+
# --- end standard header ----
13+
14+
_repo_dir=$AOMP_REPOS/rocmlibs/rocm-core
15+
patchrepo $_repo_dir
16+
17+
export CC=$LLVM_INSTALL_LOC/bin/clang
18+
export CXX=$LLVM_INSTALL_LOC/bin/clang++
19+
export FC=$LLVM_INSTALL_LOC/bin/flang
20+
export ROCM_DIR=$AOMP_INSTALL_DIR
21+
export ROCM_PATH=$AOMP_INSTALL_DIR
22+
export PATH=$AOMP_SUPP/cmake/bin:$AOMP_INSTALL_DIR/bin:$PATH
23+
export HIP_USE_PERL_SCRIPTS=1
24+
export USE_PERL_SCRIPTS=1
25+
export NUM_PROC=$AOMP_JOB_THREADS
26+
export CXXFLAGS="-I$AOMP_INSTALL_DIR/include -D__HIP_PLATFORM_AMD__=1"
27+
export LDFLAGS="-fPIC"
28+
if [ "$AOMP_USE_CCACHE" != 0 ] ; then
29+
_ccache_bin=`which ccache`
30+
# export CMAKE_CXX_COMPILER_LAUNCHER=$_ccache_bin
31+
fi
32+
33+
if [ $AOMP_STANDALONE_BUILD == 1 ] ; then
34+
if [ ! -L $AOMP ] ; then
35+
if [ -d $AOMP ] ; then
36+
echo "ERROR: Directory $AOMP is a physical directory."
37+
echo " It must be a symbolic link or not exist"
38+
exit 1
39+
fi
40+
fi
41+
else
42+
echo "ERROR: $0 only valid for AOMP_STANDALONE_BUILD=1"
43+
exit 1
44+
fi
45+
46+
if [ "$1" == "nocmake" ] ; then
47+
echo "ERROR: nocmake is not an option for $0"
48+
exit 1
49+
fi
50+
51+
# Make sure we can update the install directory
52+
if [ "$1" == "install" ] ; then
53+
$SUDO mkdir -p $AOMP_INSTALL_DIR
54+
$SUDO touch $AOMP_INSTALL_DIR/testfile
55+
if [ $? != 0 ] ; then
56+
echo "ERROR: No update access to $AOMP_INSTALL_DIR"
57+
exit 1
58+
fi
59+
$SUDO rm $AOMP_INSTALL_DIR/testfile
60+
fi
61+
62+
if [ "$1" != "install" ] ; then
63+
echo
64+
echo "This is a FRESH START. ERASING any previous builds in $BUILD_DIR/build/rocmlibs/rocm-core"
65+
echo "Use ""$0 install"" to avoid FRESH START."
66+
echo rm -rf $BUILD_DIR/build/rocmlibs/rocm-core
67+
rm -rf $BUILD_DIR/build/rocmlibs/rocm-core
68+
mkdir -p $BUILD_DIR/build/rocmlibs/rocm-core
69+
else
70+
if [ ! -d $BUILD_DIR/build/rocmlibs/rocm-core ] ; then
71+
echo "ERROR: The build directory $BUILD_DIR/build/rocmlibs/rocm-core does not exist"
72+
echo " run $0 without install option. "
73+
exit 1
74+
fi
75+
fi
76+
77+
if [ "$1" != "install" ] ; then
78+
# Remember start directory to return on exit
79+
_curdir=$PWD
80+
echo " -----Running cmake ---"
81+
echo cd $AOMP_REPOS/build/rocmlibs/rocm-core
82+
cd $AOMP_REPOS/build/rocmlibs/rocm-core
83+
pwd
84+
MYCMAKEOPTS="
85+
-DCMAKE_CXX_COMPILER=$CXX
86+
-DCMAKE_C_COMPILER=$CC
87+
-DROCM_DIR:PATH=$AOMP_INSTALL_DIR
88+
-DCPACK_PACKAGING_INSTALL_PREFIX=$AOMP_INSTALL_DIR
89+
-DCMAKE_INSTALL_PREFIX=$AOMP_INSTALL_DIR
90+
-DROCM_PATH=$AOMP_INSTALL_DIR
91+
-DCMAKE_PREFIX_PATH:PATH=$AOMP_INSTALL_DIR
92+
-DCPACK_SET_DESTDIR=OFF
93+
-DCMAKE_BUILD_TYPE=Release
94+
-DCMAKE_VERBOSE_MAKEFILE=1
95+
-DROCM_VERSION=$ROCM_VERSION
96+
"
97+
echo $AOMP_CMAKE $MYCMAKEOPTS $_repo_dir
98+
$AOMP_CMAKE $MYCMAKEOPTS $_repo_dir
99+
if [ $? != 0 ] ; then
100+
echo "ERROR cmake failed."
101+
echo " $MYCMAKEOPTS"
102+
cd $_curdir
103+
exit 1
104+
fi
105+
make -j$AOMP_JOB_THREADS
106+
107+
if [ $? != 0 ] ; then
108+
echo "ERROR make -j $AOMP_JOB_THREADS failed"
109+
exit 1
110+
fi
111+
fi
112+
113+
if [ "$1" == "install" ] ; then
114+
echo " -----Installing to $AOMP_INSTALL_DIR ---- "
115+
cd $BUILD_DIR/build/rocmlibs/rocm-core
116+
make -j$AOMP_JOB_THREADS install
117+
if [ $? != 0 ] ; then
118+
echo "ERROR install to $AOMP_INSTALL_DIR failed "
119+
exit 1
120+
fi
121+
echo
122+
echo "SUCCESSFUL INSTALL to $AOMP_INSTALL_DIR"
123+
echo
124+
removepatch $_repo_dir
125+
else
126+
echo
127+
echo "SUCCESSFUL BUILD, please run: $0 install"
128+
echo " to install into $AOMP_INSTALL_DIR"
129+
echo
130+
fi

bin/rocmlibs/build_rocmlibs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ components="prereq rocm-cmake"
100100
if [ "$AOMP_STANDALONE_BUILD" == 1 ] ; then
101101
# This ordered build is important when starting from scratch
102102
#components="$components rocBLAS rocPRIM rocSPARSE rocSOLVER hipBLAS-common hipBLAS rocRAND hipRAND rccl half hipSOLVER"
103-
components="$components rocBLAS rocPRIM rocSPARSE rocSOLVER hipBLAS-common hipBLAS rocRAND hipRAND rccl half "
103+
components="$components rocBLAS rocPRIM rocSPARSE rocSOLVER hipBLAS-common hipBLAS rocRAND hipRAND rocm-core rccl half "
104104
else
105105
echo "ERROR: Cannot run $0 with AOMP_STANDALONE_BUILD=$AOMP_STANDALONE_BUILD"
106106
echo " Please set $AOMP_STANDALONE_BUILD=1"

bin/rocmlibs/rocmlibs.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
<project remote="rocm" path="hipRAND" name="hipRAND" revision="release/rocm-rel-6.4" groups="unlocked"/>
1717
<project remote="rocm" path="hipSOLVER" name="hipSOLVER" revision="release/rocm-rel-6.4" groups="unlocked"/>
1818
<project remote="rocm" path="hipSPARSE" name="hipSPARSE" revision="release/rocm-rel-6.4" groups="unlocked"/>
19+
<project remote="rocm" path="rocm-core" name="rocm-core" revision="release/rocm-rel-6.4" groups="unlocked"/>
1920
<project remote="rocm" path="rccl" name="rccl" revision="release/rocm-rel-6.4" groups="unlocked"/>
2021
<project remote="rocm" path="rocBLAS" name="rocBLAS" revision="release/rocm-rel-6.4" groups="unlocked"/>
2122
<project remote="rocm" path="rocFFT" name="rocFFT" revision="release/rocm-rel-6.4" groups="unlocked"/>

0 commit comments

Comments
 (0)