11#! /bin/bash
22# SBATCH --exclusive
3- # SBATCH --nodes=1
4- # SBATCH --ntasks=64
3+ # SBATCH --nodes=2
4+ # SBATCH --ntasks=128
55# SBATCH --constraint=hpc6id.32xlarge
66# SBATCH --partition=hpc6id
77# SBATCH --chdir=/fsx/slurm-out/mechanical
@@ -15,33 +15,33 @@ export I_MPI_FABRICS=shm:ofi
1515export I_MPI_OFI_PROVIDER=efa
1616export I_MPI_MULTIRAIL=1
1717# export FI_EFA_RECVWIN_SIZE=65536
18+ export FI_EFA_FORK_SAFE=1
1819module load libfabric-aws
1920export INTELMPI_ROOT=" $( dirname $( dirname $( which mpirun) ) ) "
2021# ####################### EFA settings ########################
2122
22-
2323export ANSYSLMD_LICENSE_FILE=
${ANSYSLMD_LICENSE_FILE:- " [email protected] " } 2424
25- mechanicalversion=${1:- " 231 " }
25+ mechanicalversion=${1:- " v251 " }
2626input_file=${2:- " /fsx/.../your_file.dat" }
2727basedir=${BASE_DIR:- " /fsx" }
2828
29- APP_BIN_PATH=" ${basedir} /ansys_inc/v{${mechanicalversion} /ansys/bin/ansys${mechanicalversion} "
30-
29+ APP_BIN_PATH=" ${basedir} /ansys_inc/${mechanicalversion} /ansys/bin/mapdl"
3130TOKEN=$( curl -s -X PUT " http://169.254.169.254/latest/api/token" -H " X-aws-ec2-metadata-token-ttl-seconds: 21600" )
3231workdir=" $( readlink -m " ${basedir} /${SLURM_JOB_NAME% .* } " ) /Run/${SLURM_JOB_ID} -${SLURM_JOB_NUM_NODES} x$( curl -s -H " X-aws-ec2-metadata-token: $TOKEN " http://169.254.169.254/latest/meta-data/instance-type) -${SLURM_NPROCS} -$( date " +%d-%m-%Y-%H-%M" ) "
3332
3433mkdir -p " ${workdir} " && cd " ${workdir} "
3534
36- scontrol show nodes $SLURM_NODELIST | grep NodeHostName= | awk ' {print $2}' | sed ' s/NodeHostName=//' > hostfile
3735export cores_x_node=$(( (SLURM_NPROCS / SLURM_JOB_NUM_NODES ) + ( SLURM_NPROCS % SLURM_JOB_NUM_NODES > 0 )) )
3836
39- MACHINES=$( xargs printf ' %s:$cores_x_node, ' < hostfile)
37+ for i in $( scontrol show hostnames=$SLURM_JOB_NODELIST ) ; do
38+ machines=$machines :$i :$cores_x_node
39+ done
40+ machines=${machines: 1}
4041
4142cp $0 .
4243ln -s " ${input_file} " .
4344
44-
4545echo " Drop caches on all nodes"
4646mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c " sync && echo 3 | sudo tee /proc/sys/vm/drop_caches"
4747
@@ -51,5 +51,5 @@ mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c "echo always | sudo tee /sys
5151echo " Install missing lib"
5252mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c " sudo yum -y install mesa-libGLU"
5353
54-
55- " ${APP_BIN_PATH} " -b -dis -mpi intelmpi -ssh -np ${SLURM_NPROCS} - machines $MACHINES -i $( basename ${input_file} ) -o output.log
54+ # Ansys Mechanical gets the total number of cores from hostlist not from -np
55+ " ${APP_BIN_PATH} " -b -dis -mpi intelmpi -ssh -machines $MACHINES -i $( basename ${input_file} ) -o output.log
0 commit comments