Skip to content

Commit a3999b1

Browse files
sarckkfacebook-github-bot
authored andcommitted
Update gcc version for FBGEMM install in CI (#2654)
Summary: TorchRec CI currently is failing with issues on incompatible GLIBCXX version. The cause is that FBGEMM now requires g++ 11.1+ for building binaries that reference GLIBCXX_3.4.29 (as of pytorch/pytorch#141035) As recommended in https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/utils_build.bash and pytorch/FBGEMM#3423, install GCC using conda to control glibcxx version being used. Differential Revision: D67607624
1 parent 504642a commit a3999b1

File tree

7 files changed

+85
-20
lines changed

7 files changed

+85
-20
lines changed

.github/scripts/install_fbgemm.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ echo "CHANNEL"
1212
echo "$CHANNEL"
1313

1414
if [ "$CHANNEL" = "nightly" ]; then
15-
${CONDA_RUN} pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/"$CU_VERSION"
15+
${CONDA_RUN} pip --pre install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/"$CU_VERSION"
1616
elif [ "$CHANNEL" = "test" ]; then
17-
${CONDA_RUN} pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/"$CU_VERSION"
17+
${CONDA_RUN} pip --pre install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/"$CU_VERSION"
1818
fi

.github/workflows/docs.yml

+23-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ jobs:
3333
- name: Setup conda
3434
run: |
3535
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
36-
bash ~/miniconda.sh -b -p $HOME/miniconda
36+
bash ~/miniconda.sh -b -p $HOME/miniconda -u
37+
conda update -n base -c defaults -y conda
3738
- name: setup Path
3839
run: |
3940
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
@@ -51,7 +52,26 @@ jobs:
5152
- name: Install gcc
5253
shell: bash
5354
run: |
54-
sudo apt-get install build-essential
55+
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
56+
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
57+
cc_path=$(conda run -n build_binary printenv CC)
58+
cxx_path=$(conda run -n build_binary printenv CXX)
59+
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
60+
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
61+
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
62+
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
63+
64+
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
65+
echo "[TEST] Enumerating libstdc++.so files ..."
66+
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
67+
for f in $all_libcxx_libs; do
68+
echo "$f";
69+
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
70+
echo ""
71+
done
72+
73+
echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
74+
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
5575
- name: setup Path
5676
run: |
5777
echo /usr/local/bin >> $GITHUB_PATH
@@ -61,7 +81,7 @@ jobs:
6181
conda install -n build_binary --yes pytorch cpuonly -c pytorch-nightly
6282
- name: Install fbgemm
6383
run: |
64-
conda run -n build_binary pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
84+
conda run -n build_binary pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
6585
- name: Install torchmetrics
6686
run: |
6787
conda run -n build_binary pip install torchmetrics==1.0.3

.github/workflows/pyre.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
- name: Install dependencies
2121
run: >
2222
pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu &&
23-
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu &&
23+
pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu &&
2424
pip install -r requirements.txt &&
2525
pip install pyre-check-nightly==$(cat .pyre_configuration | grep version | awk '{print $2}' | sed 's/\"//g')
2626
- name: Pyre check

.github/workflows/release_build.yml

+25-5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ jobs:
4848
run: |
4949
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
5050
bash ~/miniconda.sh -b -p $HOME/miniconda -u
51+
conda update -n base -c defaults -y conda
5152
- name: setup Path
5253
run: |
5354
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
@@ -62,9 +63,28 @@ jobs:
6263
- name: check python version
6364
run: |
6465
conda run -n build_binary python --version
65-
- name: Install C/C++ compilers
66-
run: |
67-
sudo yum install -y gcc gcc-c++
66+
- name: Install gcc
67+
run: |
68+
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
69+
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
70+
cc_path=$(conda run -n build_binary printenv CC)
71+
cxx_path=$(conda run -n build_binary printenv CXX)
72+
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
73+
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
74+
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
75+
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
76+
77+
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
78+
echo "[TEST] Enumerating libstdc++.so files ..."
79+
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
80+
for f in $all_libcxx_libs; do
81+
echo "$f";
82+
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
83+
echo ""
84+
done
85+
86+
echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
87+
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
6888
- name: Install PyTorch and CUDA
6989
shell: bash
7090
run: |
@@ -73,7 +93,7 @@ jobs:
7393
shell: bash
7494
run: |
7595
conda run -n build_binary pip install numpy
76-
conda run -n build_binary pip install fbgemm-gpu
96+
conda run -n build_binary pip install --pre fbgemm-gpu
7797
- name: Install Dependencies
7898
shell: bash
7999
run: |
@@ -179,7 +199,7 @@ jobs:
179199
shell: bash
180200
run: |
181201
conda run -n build_binary pip install numpy
182-
conda run -n build_binary pip install fbgemm-gpu
202+
conda run -n build_binary pip install --pre fbgemm-gpu
183203
- name: Install torchmetrics
184204
shell: bash
185205
run: |

.github/workflows/unittest_ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
conda run -n build_binary \
8282
python -c "import torch.distributed"
8383
conda run -n build_binary \
84-
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
84+
pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
8585
conda run -n build_binary \
8686
python -c "import fbgemm_gpu"
8787
echo "fbgemm_gpu succeeded"

.github/workflows/unittest_ci_cpu.yml

+27-6
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,41 @@ jobs:
4545
conda info
4646
python --version
4747
conda run -n build_binary python --version
48+
49+
echo "[INSTALL] Installing gcc..."
50+
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
51+
52+
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
53+
cc_path=$(conda run -n build_binary printenv CC)
54+
cxx_path=$(conda run -n build_binary printenv CXX)
55+
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
56+
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
57+
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
58+
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
59+
60+
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
61+
echo "[INSTALL] Enumerating libstdc++.so files ..."
62+
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
63+
for f in $all_libcxx_libs; do
64+
echo "$f";
65+
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
66+
echo ""
67+
done
68+
69+
echo "[INSTALL] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
70+
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
71+
4872
conda run -n build_binary \
4973
pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu
5074
conda run -n build_binary \
51-
python -c "import torch"
75+
python -c "import torch; print(torch.__version__, torch.version.cuda); "
5276
echo "torch succeeded"
5377
conda run -n build_binary \
5478
python -c "import torch.distributed"
5579
conda run -n build_binary \
56-
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
80+
pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
5781
conda run -n build_binary \
58-
python -c "import fbgemm_gpu"
82+
python -c "import torch; import fbgemm_gpu; print(fbgemm_gpu.__version__)"
5983
echo "fbgemm_gpu succeeded"
6084
conda run -n build_binary \
6185
pip install -r requirements.txt
@@ -73,9 +97,6 @@ jobs:
7397
python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \
7498
--ignore-glob=**/test_utils/
7599
echo "Starting C++ Tests"
76-
conda install -n build_binary -y gxx_linux-64
77-
conda run -n build_binary \
78-
x86_64-conda-linux-gnu-g++ --version
79100
conda install -n build_binary -c anaconda redis -y
80101
conda run -n build_binary redis-server --daemonize yes
81102
mkdir cpp-build

torchrec/distributed/utils.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,9 @@ def create_global_tensor_shape_stride_from_metadata(
525525
"""
526526
size = None
527527
if parameter_sharding.sharding_type == ShardingType.COLUMN_WISE.value:
528-
row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[0] # pyre-ignore[16]
528+
row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[
529+
0
530+
] # pyre-ignore[16]
529531
col_dim = 0
530532
for shard in parameter_sharding.sharding_spec.shards:
531533
col_dim += shard.shard_sizes[1]
@@ -551,4 +553,6 @@ def create_global_tensor_shape_stride_from_metadata(
551553
for _ in range(devices_per_node):
552554
row_dim += parameter_sharding.sharding_spec.shards[0].shard_sizes[0]
553555
size = torch.Size([row_dim, col_dim])
554-
return size, (size[1], 1) if size else (torch.Size([0, 0]), (0, 1)) # pyre-ignore[7]
556+
return size, (
557+
(size[1], 1) if size else (torch.Size([0, 0]), (0, 1))
558+
) # pyre-ignore[7]

0 commit comments

Comments
 (0)