Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update run-docs to avoid code duplication #1439

Merged
merged 29 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e90ee12
Update run-docs to avoid duplicate code
mikekgfb Dec 23, 2024
ddb3773
Update run-docs
mikekgfb Dec 24, 2024
d834661
Merge branch 'main' into patch-35
mikekgfb Dec 27, 2024
9e82bbc
Update build_native.sh
mikekgfb Dec 29, 2024
6087a58
Update run-docs
mikekgfb Jan 1, 2025
347c64e
Merge branch 'main' into patch-35
mikekgfb Jan 6, 2025
92a2f8a
Merge branch 'main' into patch-35
mikekgfb Jan 15, 2025
d602eed
Merge branch 'main' into patch-35
mikekgfb Jan 17, 2025
f0df24e
Merge branch 'main' into patch-35
mikekgfb Jan 18, 2025
a3772f1
Merge branch 'main' into patch-35
mikekgfb Jan 22, 2025
f670dc9
Merge branch 'main' into patch-35
mikekgfb Jan 23, 2025
158b3e6
Merge branch 'pytorch:main' into patch-35
mikekgfb Jan 23, 2025
dcb2a60
Update run-docs
mikekgfb Jan 23, 2025
adcb28a
Update run-docs
mikekgfb Jan 24, 2025
053058d
Merge branch 'main' into patch-35
Jack-Khuu Jan 24, 2025
5e21fff
Merge branch 'main' into patch-35
Jack-Khuu Jan 24, 2025
680937b
Merge branch 'main' into patch-35
mikekgfb Jan 27, 2025
bd594fb
Update README.md
mikekgfb Jan 28, 2025
1015de7
Update quantization.md
mikekgfb Jan 28, 2025
02dd5db
Update run-docs
mikekgfb Jan 28, 2025
da1b98d
Update run.cpp
mikekgfb Jan 28, 2025
f3ee3e4
Update run.cpp
mikekgfb Jan 28, 2025
5629e29
Create cuda-32.json
mikekgfb Jan 28, 2025
902a5da
Create mobile-32.json
mikekgfb Jan 28, 2025
0ac7096
Update run-docs
mikekgfb Jan 28, 2025
4d97e78
Update run-docs
mikekgfb Jan 28, 2025
c787e1a
Update run-readme-pr-mps.yml
mikekgfb Jan 29, 2025
156ceda
Update run.cpp
mikekgfb Jan 29, 2025
b77ddf3
Update run.cpp
mikekgfb Jan 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 62 additions & 140 deletions .ci/scripts/run-docs
Original file line number Diff line number Diff line change
@@ -1,145 +1,67 @@
# /bin/bash -x
#!/bin/bash -x

if [ "X$1" == "X" ]; then
# Check if an argument was provided
if [ -z "$1" ]; then
echo "Must specify document to run"
exit 1
fi

if [ "$1" == "readme" ]; then
echo "::group::Create script to run README"
python3 torchchat/utils/scripts/updown.py --create-sections --file README.md --replace 'llama3.1:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-readme.sh
echo "::endgroup::"

echo "::group::Run README"
echo "*******************************************"
cat ./run-readme.sh
echo "*******************************************"
bash -x ./run-readme.sh
echo "::endgroup::"

exit 0
fi

if [ "$1" == "quantization" ]; then
echo "::group::Create script to run quantization"
python3 torchchat/utils/scripts/updown.py --create-sections --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-quantization.sh
echo "::endgroup::"

echo "::group::Run quantization"
echo "*******************************************"
cat ./run-quantization.sh
echo "*******************************************"
bash -x ./run-quantization.sh
echo "::endgroup::"

exit 0
fi

if [ "$1" == "gguf" ]; then
echo "::group::Create script to run gguf"
python3 torchchat/utils/scripts/updown.py --file docs/GGUF.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-gguf.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-gguf.sh
echo "::endgroup::"

echo "::group::Run gguf"
echo "*******************************************"
cat ./run-gguf.sh
echo "*******************************************"
bash -x ./run-gguf.sh
echo "::endgroup::"
fi


if [ "$1" == "advanced" ]; then
echo "::group::Create script to run advanced"
python3 torchchat/utils/scripts/updown.py --file docs/ADVANCED-USERS.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-advanced.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-advanced.sh
echo "::endgroup::"

echo "::group::Run advanced"
echo "*******************************************"
cat ./run-advanced.sh
echo "*******************************************"
bash -x ./run-advanced.sh
echo "::endgroup::"
fi

if [ "$1" == "evaluation" ]; then
echo "::group::Create script to run evaluation"
python3 torchchat/utils/scripts/updown.py --file torchchat/utils/docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-evaluation.sh
echo "::endgroup::"

echo "::group::Run evaluation"
echo "*******************************************"
cat ./run-evaluation.sh
echo "*******************************************"
bash -x ./run-evaluation.sh
fi

if [ "$1" == "multimodal" ]; then

# Expecting that this might fail this test as-is, because
# it's the first on-pr test depending on github secrets for access with HF token access

echo "::group::Create script to run multimodal"
python3 torchchat/utils/scripts/updown.py --file docs/multimodal.md > ./run-multimodal.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-multimodal.sh
echo "::endgroup::"

echo "::group::Run multimodal"
echo "*******************************************"
cat ./run-multimodal.sh
echo "*******************************************"
bash -x ./run-multimodal.sh
echo "::endgroup::"
fi

if [ "$1" == "native" ]; then

echo "::group::Create script to run native-execution"
python3 torchchat/utils/scripts/updown.py --file docs/native-execution.md > ./run-native.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-native.sh
echo "::endgroup::"

echo "::group::Run native-execution"
echo "*******************************************"
cat ./run-native.sh
echo "*******************************************"
bash -x ./run-native.sh
echo "::endgroup::"
fi

if [ "$1" == "distributed" ]; then

echo "::group::Create script to run distributed"
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md --replace 'llama3.1:stories110M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-distributed.sh
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md --suppress huggingface-cli,HF_TOKEN > ./run-distributed.sh
# for good measure, if something happened to updown processor,
# and it did not error out, fail with an exit 1
echo "exit 1" >> ./run-distributed.sh
echo "::endgroup::"

echo "::group::Run distributed"
echo "*******************************************"
cat ./run-distributed.sh
echo "*******************************************"
bash -x ./run-distributed.sh
echo "::endgroup::"
fi
# Pre-initialize variables
filepath=""
parameters="--replace 'llama3:stories15M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN"
script_name="./run-${1}.sh" # Dynamically initialize script name

# Use a case statement to handle the $1 argument
case "$1" in
"readme")
filepath="README.md"
;;
"quantization")
filepath="docs/quantization.md"
;;
"gguf")
filepath="docs/GGUF.md"
;;
"advanced")
filepath="docs/ADVANCED-USERS.md"
;;
"evaluation")
filepath="torchchat/utils/docs/evaluation.md"
;;
"multimodal")
filepath="docs/multimodal.md"
parameters="" # Clear parameters
;;
"native")
filepath="docs/native-execution.md"
parameters="" # Clear parameters
;;
"distributed")
filepath="docs/distributed.md"
parameters="--replace 'llama3.1:stories110M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN" # Use stories110M to avoid need for authentication
;;
"local")
filepath="docs/local-model.md"
parameters="" # Clear parameters
;;

*)
echo "Unknown option: $1"
exit 1
;;
esac

# Generate the script
echo "::group::Create script to run $1"
python3 torchchat/utils/scripts/updown.py --file "$filepath" $parameters > "$script_name"
# if something happened to updown processor, and it did not error out, fail with an exit 1
echo "exit 1" >> "$script_name"
echo "::endgroup::"

# Run the script
echo "::group::Run $1"
echo "*******************************************"
cat "$script_name"
echo "*******************************************"
bash -x "$script_name"
echo "::endgroup::"
5 changes: 3 additions & 2 deletions .github/workflows/run-readme-pr-mps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ jobs:
conda create -y -n test-readme-mps-macos python=3.10.11 llvm-openmp
conda activate test-readme-mps-macos
set -x
# NS: Remove previous installation of torch first
# as this script does not isntall anything into conda env but rather as system dep
# NS: Remove previous installation of torch first
# as this script does not install anything into conda env but rather as system dep
pip3 uninstall -y torch || true
set -eou pipefail
Expand All @@ -37,6 +37,7 @@ jobs:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-14
timeout: 60
script: |
set -x
conda create -y -n test-quantization-mps-macos python=3.10.11
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ torchchat/utils/scripts/build_native.sh et

Execute using the runner
```bash
cmake-out/et_run llama3.1.pte -z `python3 torchchat.py where llama3.1`/tokenizer.model -l 3 -i "Once upon a time"
cmake-out/et_run llama3.1.pte -z `python3 torchchat.py where llama3.1`/tokenizer.model -i "Once upon a time"
```

</details>
Expand Down
4 changes: 2 additions & 2 deletions docs/quantization.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ OMP_NUM_THREADS=6 python3 torchchat.py generate llama3.1 --dso-path llama3_1.so
If you built the AOTI runner with link_torchao_ops as discussed in the setup section, you can also use the C++ runner:

```
OMP_NUM_THREADS=6 ./cmake-out/aoti_run llama3_1.so -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l 3 -i "Once upon a time,"
OMP_NUM_THREADS=6 ./cmake-out/aoti_run llama3_1.so -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -i "Once upon a time," # -l 3
```

#### ExecuTorch
Expand All @@ -193,7 +193,7 @@ python torchchat.py export llama3.1 --device cpu --dtype float32 --quantize '{"e
Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file. It will not work with the `python torchchat.py generate` command.

```
./cmake-out/et_run llama3_1.pte -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l 3 -i "Once upon a time,"
./cmake-out/et_run llama3_1.pte -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l3 -i "Once upon a time,"
```

## Experimental TorchAO MPS lowbit kernels
Expand Down
44 changes: 28 additions & 16 deletions runner/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -803,41 +803,53 @@ int main(int argc, char *argv[]) {
} else {
error_usage();
}
for (int i = 2; i < argc; i += 2) {
for (int i = 2; i < argc; i += 1) {
// do some basic validation
if (i + 1 >= argc) {
error_usage();
} // must have arg after flag
char *parm = argv[i+1];
// uniarg means the arg comes right after the letter in accordance with posix
int uniarg = strlen(argv[i]) > 2;

if (argv[i][0] != '-') {
error_usage();
} // must start with dash
if (strlen(argv[i]) != 2) {

if (strlen(argv[i]) < 2) {
error_usage();
} // must be -x (one dash, one letter)
} // must have at least dash '-' and option letter

if (uniarg) {
parm=&argv[i][2];
} else if (i + 1 >= argc) {
error_usage();
} // must have arg after option if flag is not contiguous to option

// read in the args
if (argv[i][1] == 't') {
temperature = atof(argv[i + 1]);
temperature = atof(parm);
} else if (argv[i][1] == 'p') {
topp = atof(argv[i + 1]);
topp = atof(parm);
} else if (argv[i][1] == 's') {
rng_seed = atoi(argv[i + 1]);
rng_seed = atoi(parm);
} else if (argv[i][1] == 'n') {
steps = atoi(argv[i + 1]);
steps = atoi(parm);
} else if (argv[i][1] == 'v') {
vocab_size = atoi(argv[i + 1]);
vocab_size = atoi(parm);
} else if (argv[i][1] == 'i') {
prompt = argv[i + 1];
prompt = parm;
} else if (argv[i][1] == 'z') {
tokenizer_path = argv[i + 1];
tokenizer_path = parm;
} else if (argv[i][1] == 'm') {
mode = argv[i + 1];
mode = parm;
} else if (argv[i][1] == 'y') {
system_prompt = argv[i + 1];
system_prompt = parm;
} else if (argv[i][1] == 'l') {
llama_ver = atoi(argv[i + 1]);
llama_ver = atoi(parm);
} else {
error_usage();
}

// account for parameter
i += (uniarg)?0:1;
}

if (model_path == NULL) {
Expand Down
5 changes: 5 additions & 0 deletions torchchat/quant_config/cuda-32.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"executor": {"accelerator": "cuda"},
"precision": {"dtype": "bf16"},
"linear:int4": {"groupsize" : 32}
}
4 changes: 4 additions & 0 deletions torchchat/quant_config/mobile-32.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"embedding": {"bitwidth": 4, "groupsize" : 32},
"linear:a8w4dq": {"groupsize" : 32}
}
Loading