Skip to content

Commit 479b24b

Browse files
authored
.github: Add basic gpu test workflow (#106)
1 parent 0a8ebf9 commit 479b24b

File tree

1 file changed

+92
-93
lines changed

1 file changed

+92
-93
lines changed

.github/workflows/compile_t4.yml

+92-93
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,105 @@
1-
name: Compile main
1+
name: Run compile tests
22

33
on:
4+
pull_request:
45
push:
56
branches:
67
- main
7-
pull_request:
88
workflow_dispatch:
99

1010
jobs:
11-
run-tinystories:
12-
strategy:
13-
matrix:
14-
runner: [4-core-ubuntu-gpu-t4]
15-
runs-on: ${{matrix.runner}}
16-
steps:
17-
- name: Checkout repo
18-
uses: actions/checkout@v2
19-
- name: Setup Python
20-
uses: actions/setup-python@v2
21-
with:
22-
python-version: 3.11
23-
- name: Print machine info
24-
run: |
25-
uname -a
26-
if [ $(uname -s) == Darwin ]; then
27-
sysctl machdep.cpu.brand_string
28-
sysctl machdep.cpu.core_count
29-
fi
30-
- name: Install requirements
31-
run: |
32-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
33-
pip install -r requirements.txt
34-
- name: Download checkpoints
35-
run: |
36-
mkdir -p checkpoints/stories15M
37-
pushd checkpoints/stories15M
38-
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
39-
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
40-
popd
41-
- name: Run inference
42-
run: |
43-
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
44-
export MODEL_NAME=stories15M
45-
export MODEL_DIR=/tmp
46-
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
47-
cat ./output_eager
48-
python generate.py --device cuda --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
49-
cat ./output_compiled
50-
python export.py --device cuda --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
51-
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
52-
cat ./output_aoti
11+
test-cuda:
12+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
13+
with:
14+
runner: linux.g5.4xlarge.nvidia.gpu
15+
gpu-arch-type: cuda
16+
gpu-arch-version: "12.1"
17+
script: |
18+
echo "::group::Print machine info"
19+
uname -a
20+
if [ $(uname -s) == Darwin ]; then
21+
sysctl machdep.cpu.brand_string
22+
sysctl machdep.cpu.core_count
23+
fi
24+
echo "::endgroup::"
25+
26+
echo "::group::Download checkpoints"
27+
# Install requirements
28+
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
29+
pip install -r requirements.txt
30+
echo "::endgroup::"
31+
32+
echo "::group::Download checkpoints"
33+
mkdir -p checkpoints/stories15M
34+
pushd checkpoints/stories15M
35+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
36+
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
37+
popd
38+
echo "::endgroup::"
39+
40+
echo "::group::Run inference"
41+
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
42+
export MODEL_NAME=stories15M
43+
export MODEL_DIR=/tmp
44+
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
45+
cat ./output_eager
46+
python generate.py --device cuda --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
47+
cat ./output_compiled
48+
python export.py --device cuda --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
49+
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
50+
cat ./output_aoti
5351
54-
echo "******************************************"
55-
echo "******* Emb: channel-wise quantized ******"
56-
echo "******************************************"
57-
python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
58-
cat ./output_eager
59-
python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
60-
cat ./output_compiled
61-
python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
62-
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
63-
cat ./output_aoti
52+
echo "******************************************"
53+
echo "******* Emb: channel-wise quantized ******"
54+
echo "******************************************"
55+
# python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
56+
# cat ./output_eager
57+
# python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
58+
# cat ./output_compiled
59+
# python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
60+
# python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
61+
# cat ./output_aoti
6462
65-
echo "******************************************"
66-
echo "******** Emb: group-wise quantized *******"
67-
echo "******************************************"
68-
python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
69-
cat ./output_eager
70-
python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
71-
cat ./output_compiled
72-
python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
73-
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
74-
cat ./output_aoti
63+
echo "******************************************"
64+
echo "******** Emb: group-wise quantized *******"
65+
echo "******************************************"
66+
# python generate.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
67+
# cat ./output_eager
68+
# python generate.py --device cuda --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
69+
# cat ./output_compiled
70+
# python export.py --device cuda --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
71+
# python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
72+
# cat ./output_aoti
7573
76-
echo "******************************************"
77-
echo "******* INT8 channel-wise quantized ******"
78-
echo "******************************************"
79-
python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
80-
cat ./output_eager
81-
python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
82-
cat ./output_compiled
83-
python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
84-
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
85-
cat ./output_aoti
74+
echo "******************************************"
75+
echo "******* INT8 channel-wise quantized ******"
76+
echo "******************************************"
77+
# python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
78+
# cat ./output_eager
79+
# python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
80+
# cat ./output_compiled
81+
# python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
82+
# python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
83+
# cat ./output_aoti
8684
87-
echo "******************************************"
88-
echo "******** INT8 group-wise quantized *******"
89-
echo "******************************************"
90-
python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
91-
cat ./output_eager
92-
python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
93-
cat ./output_compiled
94-
python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
95-
python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
96-
cat ./output_aoti
85+
echo "******************************************"
86+
echo "******** INT8 group-wise quantized *******"
87+
echo "******************************************"
88+
# python generate.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
89+
# cat ./output_eager
90+
# python generate.py --device cuda --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
91+
# cat ./output_compiled
92+
# python export.py --device cuda --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
93+
# python generate.py --device cuda --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
94+
# cat ./output_aoti
9795
98-
echo "tests complete"
99-
echo "******************************************"
100-
# echo "********* EAGER vs TORCH.COMPILE *********"
101-
# echo "******************************************"
102-
# diff output_eager output_compiled
103-
# echo "******************************************"
104-
# echo "********* EAGER vs AOT INDUCTOR *********"
105-
# echo "******************************************"
106-
# diff output_eager output_aoti
96+
echo "tests complete"
97+
echo "******************************************"
98+
echo "::endgroup::"
99+
# echo "********* EAGER vs TORCH.COMPILE *********"
100+
# echo "******************************************"
101+
# diff output_eager output_compiled
102+
# echo "******************************************"
103+
# echo "********* EAGER vs AOT INDUCTOR *********"
104+
# echo "******************************************"
105+
# diff output_eager output_aoti

0 commit comments

Comments
 (0)