Skip to content

Commit 27a72f0

Browse files
committed
[misc] support torch2.3 (#5893)
* [misc] support torch2.3 * [devops] update compatibility ci * [devops] update compatibility ci * [devops] add debug * [devops] add debug * [devops] add debug * [devops] add debug * [devops] remove debug * [devops] remove debug
1 parent 530283d commit 27a72f0

File tree

5 files changed

+27
-74
lines changed

5 files changed

+27
-74
lines changed

.compatibility

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
2.1.0-12.1.0
22
2.2.2-12.1.0
3+
2.3.0-12.1.0

.github/workflows/compatiblity_test_on_dispatch.yml

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -55,41 +55,27 @@ jobs:
5555
steps:
5656
- name: Install dependencies
5757
run: |
58-
pip install -U pip setuptools==68.2.2 wheel --user
59-
- uses: actions/checkout@v2
60-
with:
61-
repository: hpcaitech/TensorNVMe
62-
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
63-
path: TensorNVMe
64-
- name: Install tensornvme
65-
run: |
66-
cd TensorNVMe
6758
apt update && apt install -y cmake
68-
pip install -r requirements.txt
69-
DISABLE_URING=1 pip install -v .
59+
pip install -U pip setuptools==68.2.2 wheel --user
60+
7061
- uses: actions/checkout@v2
7162
with:
7263
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
73-
- name: Download cub for CUDA 10.2
74-
run: |
75-
CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
7664

77-
# check if it is CUDA 10.2
78-
# download cub
79-
if [ "$CUDA_VERSION" = "10.2" ]; then
80-
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
81-
unzip 1.8.0.zip
82-
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
83-
fi
8465
- name: Install Colossal-AI
8566
run: |
8667
BUILD_EXT=1 pip install -v .
87-
pip install -r requirements/requirements-test.txt
68+
pip install --no-cache-dir -r requirements/requirements-test.txt
69+
70+
- name: Install tensornvme
71+
run: |
72+
DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git
73+
8874
- name: Unit Testing
8975
run: |
9076
PYTHONPATH=$PWD pytest --durations=0 tests
9177
env:
9278
DATA: /data/scratch/cifar-10
93-
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
79+
LD_LIBRARY_PATH: /github/home/.tensornvme/lib
9480
LLAMA_PATH: /data/scratch/llama-tiny
9581
MOE_TENSOR_PATH: /data/scratch/moe_tensors

.github/workflows/compatiblity_test_on_pr.yml

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -49,42 +49,27 @@ jobs:
4949
steps:
5050
- name: Install dependencies
5151
run: |
52-
pip install -U pip setuptools==68.2.2 wheel --user
53-
- uses: actions/checkout@v2
54-
with:
55-
repository: hpcaitech/TensorNVMe
56-
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
57-
path: TensorNVMe
58-
- name: Install tensornvme
59-
run: |
60-
cd TensorNVMe
6152
apt update && apt install -y cmake
62-
pip install -r requirements.txt
63-
DISABLE_URING=1 pip install -v .
53+
pip install -U pip setuptools==68.2.2 wheel --user
54+
6455
- uses: actions/checkout@v2
6556
with:
6657
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
67-
- name: Download cub for CUDA 10.2
68-
run: |
69-
CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
70-
71-
# check if it is CUDA 10.2
72-
# download cub
73-
if [ "$CUDA_VERSION" = "10.2" ]; then
74-
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
75-
unzip 1.8.0.zip
76-
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
77-
fi
7858

7959
- name: Install Colossal-AI
8060
run: |
8161
BUILD_EXT=1 pip install -v .
82-
pip install -r requirements/requirements-test.txt
62+
pip install --no-cache-dir -r requirements/requirements-test.txt
63+
64+
- name: Install tensornvme
65+
run: |
66+
DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git
67+
8368
- name: Unit Testing
8469
run: |
8570
PYTHONPATH=$PWD pytest --durations=0 tests
8671
env:
8772
DATA: /data/scratch/cifar-10
88-
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
73+
LD_LIBRARY_PATH: /github/home/.tensornvme/lib
8974
LLAMA_PATH: /data/scratch/llama-tiny
9075
MOE_TENSOR_PATH: /data/scratch/moe_tensors

.github/workflows/compatiblity_test_on_schedule.yml

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -43,47 +43,28 @@ jobs:
4343
steps:
4444
- name: Install dependencies
4545
run: |
46+
apt update && apt install -y cmake
4647
pip install -U pip setuptools==68.2.2 wheel --user
4748
48-
- uses: actions/checkout@v2
49-
with:
50-
repository: hpcaitech/TensorNVMe
51-
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
52-
path: TensorNVMe
53-
54-
- name: Install tensornvme
55-
run: |
56-
cd TensorNVMe
57-
apt update && apt install -y cmake
58-
pip install -r requirements.txt
59-
DISABLE_URING=1 pip install -v .
6049
- uses: actions/checkout@v2
6150
with:
6251
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
6352

64-
- name: Download cub for CUDA 10.2
65-
run: |
66-
CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
67-
68-
# check if it is CUDA 10.2
69-
# download cub
70-
if [ "$CUDA_VERSION" = "10.2" ]; then
71-
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
72-
unzip 1.8.0.zip
73-
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
74-
fi
75-
7653
- name: Install Colossal-AI
7754
run: |
7855
BUILD_EXT=1 pip install -v .
79-
pip install -r requirements/requirements-test.txt
56+
pip install --no-cache-dir -r requirements/requirements-test.txt
57+
58+
- name: Install tensornvme
59+
run: |
60+
DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git
8061
8162
- name: Unit Testing
8263
run: |
8364
PYTHONPATH=$PWD pytest --durations=0 tests
8465
env:
8566
DATA: /data/scratch/cifar-10
86-
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
67+
LD_LIBRARY_PATH: /github/home/.tensornvme/lib
8768
LLAMA_PATH: /data/scratch/llama-tiny
8869
MOE_TENSOR_PATH: /data/scratch/moe_tensors
8970

requirements/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ click
88
fabric
99
contexttimer
1010
ninja
11-
torch>=2.1.0,<2.3.0
11+
torch>=2.1.0,<=2.3.0
1212
safetensors
1313
einops
1414
pydantic

0 commit comments

Comments
 (0)