Skip to content

Commit 241ff65

Browse files
committed
test efa with usr lib
1 parent fafd41c commit 241ff65

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

dlc_developer_config.toml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ build_inference = false
4646

4747
# Set do_build to "false" to skip builds and test the latest image built by this PR
4848
# Note: at least one build is required to set do_build to "false"
49-
do_build = true
49+
do_build = false
5050

5151
[notify]
5252
### Notify on test failures
@@ -57,12 +57,12 @@ notify_test_failures = false
5757

5858
[test]
5959
### On by default
60-
sanity_tests = true
61-
security_tests = true
60+
sanity_tests = false
61+
security_tests = false
6262
safety_check_test = false
6363
ecr_scan_allowlist_feature = false
64-
ecs_tests = true
65-
eks_tests = true
64+
ecs_tests = false
65+
eks_tests = false
6666
ec2_tests = true
6767
# Set it to true if you are preparing a Benchmark related PR
6868
ec2_benchmark_tests = false
@@ -74,7 +74,7 @@ ec2_benchmark_tests = false
7474
ec2_tests_on_heavy_instances = true
7575
### SM specific tests
7676
### On by default
77-
sagemaker_local_tests = true
77+
sagemaker_local_tests = false
7878
### Set enable_ipv6 = true to run tests with IPv6-enabled resources
7979
### Off by default (set to false)
8080
enable_ipv6 = false
@@ -92,13 +92,13 @@ enable_ipv6 = false
9292
ipv6_vpc_name = ""
9393

9494
# run standard sagemaker remote tests from test/sagemaker_tests
95-
sagemaker_remote_tests = true
95+
sagemaker_remote_tests = false
9696
# run efa sagemaker tests
97-
sagemaker_efa_tests = true
97+
sagemaker_efa_tests = false
9898
# run release_candidate_integration tests
99-
sagemaker_rc_tests = true
99+
sagemaker_rc_tests = false
100100
# run sagemaker benchmark tests
101-
sagemaker_benchmark_tests = true
101+
sagemaker_benchmark_tests = false
102102

103103
# SM remote EFA test instance type
104104
sagemaker_remote_efa_instance_type = ""

test/dlc_tests/container_tests/bin/efa/testEFA

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ check_efa_nccl_all_reduce(){
8282
# versions in DLC images.
8383
mpirun -x FI_PROVIDER="efa" -n $NODES -N $GPU_COUNT --hostfile $NUM_HOSTS_FILE \
8484
-x NCCL_DEBUG=INFO ${USE_DEVICE_RDMA_ARG} -x NCCL_PROTO=simple -x NCCL_ALGO=ring -x RDMAV_FORK_SAFE=1 \
85-
-x PATH -x LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:$LD_LIBRARY_PATH \
85+
-x PATH -x LD_LIBRARY_PATH=${CUDA_HOME}/lib/:${CUDA_HOME}/lib64/:/usr/lib/x86_64-linux-gnu/:/usr/lib/:$LD_LIBRARY_PATH \
8686
-x NCCL_SOCKET_IFNAME=^lo --mca pml ^cm --mca btl tcp,self --mca btl_tcp_if_exclude lo,docker0 --bind-to none \
8787
/all_reduce_perf -b 8 -e 1G -f 2 -g 1 -c 1 -n 100 2>&1 | tee "${TRAINING_LOG}"
8888
fi

0 commit comments

Comments
 (0)