From bdac616154682650f184d01c1876df9fd72d5916 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:55:09 -0800 Subject: [PATCH 01/24] update experimental kernels in torchchat --- .github/workflows/pull.yml | 55 +++++++++++++++++++++---- docs/quantization.md | 16 +++++--- install/install_requirements.sh | 4 +- torchchat/utils/quantize.py | 71 +++++++++++++++++++++++++-------- 4 files changed, 116 insertions(+), 30 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 5dbafee9f..a6300211f 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1055,7 +1055,54 @@ jobs: ./runner/build_android.sh echo "Tests complete." - test-torchao-experimental: + test-torchao-experimental-python: + strategy: + matrix: + runner: [macos-14-xlarge] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + submodules: true + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.10.11 + - name: Setup Xcode + if: runner.os == 'macOS' + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '15.3' + - name: Print machine info + run: | + uname -a + if [ $(uname -s) == Darwin ]; then + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + fi + - name: Install torchchat + run: | + echo "Intalling pip3 packages" + ./install/install_requirements.sh + pip3 list + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' + - name: Run inference + run: | + python torchchat.py download stories110M + wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model + export PRMT="Once upon a time in a land far away" + echo "Generate eager" + python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' + echo "Generate compile" + python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' --compile + echo "Export AOTI" + python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' + echo "Generate AOTI" + python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}" + echo "Tests complete." + + test-torchao-experimental-cpp: strategy: matrix: runner: [macos-14-xlarge] @@ -1109,18 +1156,12 @@ jobs: python torchchat.py download stories110M wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model export PRMT="Once upon a time in a land far away" - echo "Generate eager" - python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' - echo "Generate compile" - python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' --compile echo "Export and run ET (C++ runner)" python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}" echo "Export and run AOTI (C++ runner)" python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}" - echo "Generate AOTI" - python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}" echo "Tests complete." test-torchao-experimental-mps: diff --git a/docs/quantization.md b/docs/quantization.md index 704a7ed6a..3eaeae36b 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -120,13 +120,15 @@ python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my n ## Experimental TorchAO lowbit kernels -WARNING: These kernels only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon. +If you are on a Mac with Apple Silicon, we have 1-8 quantization available for embedding and linear layers, backed by CPU and MPS kernels. + +The CPU kernels are installed automatically by the torchchat install script and can be used out of the box. To use the MPS kernels, follow the setup instructions below. ### Use #### linear:a8wxdq The quantization scheme linear:a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize. -It takes arguments bitwidth (1, 2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false). +It takes arguments bitwidth (1, 2, 3, 4, 5, 6, 7, 8), groupsize (-1 if channelwise desired), and has_weight_zeros (true, false). The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true). Roughly speaking, {bitwidth: 4, groupsize: 32, has_weight_zeros: false} is similar to GGML's Q4_0 quantization scheme. @@ -138,7 +140,9 @@ The quantization scheme embedding:wx quantizes embeddings in a groupwise manner You should expect high performance on ARM CPU if groupsize is divisible by 32. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization. ### Setup -To use linear:a8wxdq and embedding:wx, you must set up the torchao experimental kernels. These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon. +If you are using the torchao ops from python, they are available out of the box on a Mac with Apple Silicon, and you can skip these setup steps. + +If you plan to use the kernels from the AOTI/ExecuTorch C++ runners, follow the setup steps below. From the torchchat root directory, run ``` @@ -147,7 +151,7 @@ bash torchchat/utils/scripts/build_torchao_ops.sh This should take about 10 seconds to complete. -Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao_ops when running the scripts the build the runners. +When building the AOTI and C++ runners, you must pass the flag link_torchao_ops when running the scripts the build the runners. ``` bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops @@ -175,8 +179,8 @@ OMP_NUM_THREADS=6 python3 torchchat.py generate llama3.1 --device cpu --dtype fl #### AOTI ``` -OMP_NUM_THREADS=6 python torchchat.py export llama3.1 --device cpu --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' --output-dso llama3_1.so -OMP_NUM_THREADS=6 python3 torchchat.py generate llama3.1 --dso-path llama3_1.so --prompt "Once upon a time," --num-samples 5 +OMP_NUM_THREADS=6 python torchchat.py export llama3.1 --device cpu --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' --output-aoti-package-path llama3_1.pt2 +OMP_NUM_THREADS=6 python3 torchchat.py generate llama3.1 --aoti-package-path llama3_1.pt2 --prompt "Once upon a time," --num-samples 5 ``` If you built the AOTI runner with link_torchao_ops as discussed in the setup section, you can also use the C++ runner: diff --git a/install/install_requirements.sh b/install/install_requirements.sh index 360ba1801..35a6967a9 100755 --- a/install/install_requirements.sh +++ b/install/install_requirements.sh @@ -117,9 +117,11 @@ fi # For torchao need to install from github since nightly build doesn't have macos build. # TODO: Remove this and install nightly build, once it supports macos +# USE_CPP=1 indicates that the torchao experimental aten kernels will be built and loaded +# if on Mac with Apple Silicon ( set -x - $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@2f97b0955953fa1a46594a27f0df2bc48d93e79d + USE_CPP=1 $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@11333ba2cb5c4e792bc4f5c0d70c12991f972008 ) if [[ -x "$(command -v nvidia-smi)" ]]; then diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index b90d098b3..3f7ed1b66 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -50,6 +50,18 @@ state_dict_device, use_et_backend, ) +from torchao.experimental.packed_linear_int8_dynamic_activation_intx_weight_layout import ( + PackedLinearInt8DynamicActivationIntxWeightLayout, +) +from torchao.experimental.quant_api import ( + int8_dynamic_activation_intx_weight, + IntxWeightEmbeddingQuantizer, +) +from torchao.quantization.granularity import ( + PerGroup, + PerRow, +) +from torchao.dtypes import PlainLayout # Flag for whether the a8wxdq quantizer is available. @@ -117,7 +129,47 @@ def quantize_model( unwrap_tensor_subclass(model) continue - if quantizer in ["linear:a8wxdq", "embedding:wx"]: + if quantizer == "linear:a8wxdq": + if get_precision() != torch.float32: + print(f"Quantizer {quantizer} requires float32 inputs, but received {get_precision()}. Changing dtype to float32. Note that after quantization, the weights will be lowbit integers, not float32.") + set_precision(torch.float32) + + group_size = q_kwargs["groupsize"] + bit_width = q_kwargs["bitwidth"] + has_weight_zeros = q_kwargs["has_weight_zeros"] + granularity = PerRow() + if group_size != -1: + granularity = PerGroup(group_size) + weight_dtype = getattr(torch, f"int{bit_width}") + + try: + quantize_( + model, + int8_dynamic_activation_intx_weight( + weight_dtype=weight_dtype, + granularity=granularity, + has_weight_zeros=has_weight_zeros, + layout=PackedLinearInt8DynamicActivationIntxWeightLayout(), + ), + ) + except Exception as e: + print("Encountered error during quantization: {e}") + print("Trying with PlainLayout") + quantize_( + model, + int8_dynamic_activation_intx_weight( + weight_dtype=weight_dtype, + granularity=granularity, + has_weight_zeros=has_weight_zeros, + layout=PlainLayout(), + ), + ) + + if not support_tensor_subclass: + unwrap_tensor_subclass(model) + continue + + if quantizer == "embedding:wx": # These quantizers require float32 input weights. Note that after quantization, # the weights will no longer be float32, but lowbit integers if get_precision() != torch.float32: @@ -889,10 +941,12 @@ def quantized_model(self) -> nn.Module: # class references quantizer_class_dict = { "embedding": EmbeddingOnlyQuantHandler, + "embedding:wx": IntxWeightEmbeddingQuantizer, "linear:int8": WeightOnlyInt8QuantHandler, "precision": PrecisionHandler, "executor": ExecutorHandler, "linear:int4": Int4WeightOnlyQuantizer, + "linear:a8wxdq": None, # uses quantize_ API "linear:a8w4dq": Int8DynActInt4WeightQuantizer, } @@ -916,26 +970,11 @@ def quantized_model(self) -> nn.Module: torchao_experimental_quant_api ) from torchao_experimental_quant_api import ( - Int8DynActIntxWeightLinearQuantizer, - IntxWeightEmbeddingQuantizer, UIntxWeightOnlyLinearQuantizer, ) - - quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightLinearQuantizer - quantizer_class_dict["embedding:wx"] = IntxWeightEmbeddingQuantizer quantizer_class_dict["linear:afpwx"] = UIntxWeightOnlyLinearQuantizer # Try loading custom op - try: - import glob - - libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/libtorchao_ops_aten.*") - libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) - torch.ops.load_library(libs[0]) - print("Loaded torchao cpu ops.") - except Exception as e: - print("Unable to load torchao cpu ops library. Slow fallback kernels will be used.") - try: libname = "libtorchao_ops_mps_aten.dylib" libpath = f"{torchao_build_path}/cmake-out/lib/{libname}" From 74363e432ff12684fc6aa04fc8ff76d944900e58 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 15 Jan 2025 17:09:54 -0800 Subject: [PATCH 02/24] Update docs/quantization.md Co-authored-by: Jack-Khuu --- docs/quantization.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quantization.md b/docs/quantization.md index 3eaeae36b..d1de63b14 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -140,7 +140,7 @@ The quantization scheme embedding:wx quantizes embeddings in a groupwise manner You should expect high performance on ARM CPU if groupsize is divisible by 32. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization. ### Setup -If you are using the torchao ops from python, they are available out of the box on a Mac with Apple Silicon, and you can skip these setup steps. +If you are using the torchao ops from python (i.e not with a C++ runner), they are available out of the box on a Mac with Apple Silicon, and you can skip these setup steps. If you plan to use the kernels from the AOTI/ExecuTorch C++ runners, follow the setup steps below. From 48f568d98187dcea90c81b06ef03ac4725fbd49c Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 15 Jan 2025 17:10:03 -0800 Subject: [PATCH 03/24] Update torchchat/utils/quantize.py Co-authored-by: Jack-Khuu --- torchchat/utils/quantize.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index 3f7ed1b66..70a2651ca 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -137,9 +137,7 @@ def quantize_model( group_size = q_kwargs["groupsize"] bit_width = q_kwargs["bitwidth"] has_weight_zeros = q_kwargs["has_weight_zeros"] - granularity = PerRow() - if group_size != -1: - granularity = PerGroup(group_size) + granularity = PerRow() if group_size == -1 else PerGroup(group_size) weight_dtype = getattr(torch, f"int{bit_width}") try: From 525701de15d8748573fc88b936c0b8e33ac124a3 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 15 Jan 2025 17:10:11 -0800 Subject: [PATCH 04/24] Update torchchat/utils/quantize.py Co-authored-by: Jack-Khuu --- torchchat/utils/quantize.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index 70a2651ca..499b9507c 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -968,8 +968,7 @@ def quantized_model(self) -> nn.Module: torchao_experimental_quant_api ) from torchao_experimental_quant_api import ( - UIntxWeightOnlyLinearQuantizer, - ) + from torchao_experimental_quant_api import UIntxWeightOnlyLinearQuantizer quantizer_class_dict["linear:afpwx"] = UIntxWeightOnlyLinearQuantizer # Try loading custom op From f9a7bb9a628537e5ce4f6b75f675c76e4a5ea647 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Fri, 17 Jan 2025 14:04:49 -0800 Subject: [PATCH 05/24] Fixing import typo in quantize.py --- torchchat/utils/quantize.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index 499b9507c..15736b035 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -967,8 +967,7 @@ def quantized_model(self) -> nn.Module: torchao_experimental_quant_api_spec.loader.exec_module( torchao_experimental_quant_api ) - from torchao_experimental_quant_api import ( - from torchao_experimental_quant_api import UIntxWeightOnlyLinearQuantizer + from torchao_experimental_quant_api import UIntxWeightOnlyLinearQuantizer quantizer_class_dict["linear:afpwx"] = UIntxWeightOnlyLinearQuantizer # Try loading custom op From 0abe175e01a330315d78c5859916fc31aca24f59 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Fri, 17 Jan 2025 17:47:24 -0800 Subject: [PATCH 06/24] Bump ET pin to pick up AO changes --- install/.pins/et-pin.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index e79e9c341..640cd889c 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -9c043290ad3944268290e015c3063bc411e6ef6b +9836b39fe690e1906f133b4a233863149c30d499 From 76e8ec53bb6ad85d11a62d8faebd33b941993d88 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Tue, 11 Feb 2025 11:32:37 -0800 Subject: [PATCH 07/24] Bump torchao-pin to match ET and torchchat --- install/.pins/torchao-pin.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install/.pins/torchao-pin.txt b/install/.pins/torchao-pin.txt index 2da70769c..48cc62670 100644 --- a/install/.pins/torchao-pin.txt +++ b/install/.pins/torchao-pin.txt @@ -1 +1 @@ -2e032c6b0de960dee554dcb08126ace718b14c6d +11333ba2cb5c4e792bc4f5c0d70c12991f972008 From 3e04645446d80ddedf55a5706aaaaa18e152c62b Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Wed, 26 Feb 2025 16:13:12 -0800 Subject: [PATCH 08/24] Update torchao-pin.txt --- install/.pins/torchao-pin.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install/.pins/torchao-pin.txt b/install/.pins/torchao-pin.txt index 48cc62670..4ef0f3788 100644 --- a/install/.pins/torchao-pin.txt +++ b/install/.pins/torchao-pin.txt @@ -1 +1 @@ -11333ba2cb5c4e792bc4f5c0d70c12991f972008 +7d8794622f3ac7ffa98761314019a20fba06edef From 94fcd9af8d2ef97857e4668be17ac68c57050c10 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Wed, 26 Feb 2025 17:09:58 -0800 Subject: [PATCH 09/24] Split up AOTI and ET tests --- .github/workflows/pull.yml | 54 +++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 9c4e43957..3ca972a81 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1123,6 +1123,56 @@ jobs: echo "Tests complete." test-torchao-experimental-cpp: + strategy: + matrix: + runner: [macos-14-xlarge] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + submodules: true + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.10.11 + - name: Setup Xcode + if: runner.os == 'macOS' + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '15.3' + - name: Print machine info + run: | + uname -a + if [ $(uname -s) == Darwin ]; then + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + fi + - name: Install torchchat + run: | + echo "Intalling pip3 packages" + ./install/install_requirements.sh + pip3 list + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' + - name: Install torchao-ops + id: install-torchao-ops + run: | + bash torchchat/utils/scripts/build_torchao_ops.sh + - name: Install runner + run: | + echo "Installing runner" + bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops + - name: Run inference + run: | + python torchchat.py download stories110M + wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model + export PRMT="Once upon a time in a land far away" + echo "Export and run AOTI (C++ runner)" + python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' + ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}" + echo "Tests complete." + + test-torchao-experimental-et: strategy: matrix: runner: [macos-14-xlarge] @@ -1175,10 +1225,6 @@ jobs: echo "Export and run ET (C++ runner)" python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}" - echo "Export and run AOTI (C++ runner)" - python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}' - ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}" - echo "Tests complete." test-torchao-experimental-mps: strategy: From 7e56c553b4a07048a4e4571dcc7edbc3a733a267 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Wed, 26 Feb 2025 17:55:20 -0800 Subject: [PATCH 10/24] Bump ET pin to 2-26-25 with new AO pin --- install/.pins/et-pin.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index 5f0c55ac2..6664d4e72 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -791472d6706b027552f39f11b28d034e4839c9af \ No newline at end of file +68042847fd0eb6aac94ab2ffad8e1440fca865f4 From 77e8a62652d6619dd76b10c49ea09beda5f443ad Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Wed, 26 Feb 2025 18:04:28 -0800 Subject: [PATCH 11/24] Undo et pin bump; fails basic install --- install/.pins/et-pin.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index 6664d4e72..ecad1b9bb 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -68042847fd0eb6aac94ab2ffad8e1440fca865f4 +791472d6706b027552f39f11b28d034e4839c9af From 94ad51a620674d7a267226b9235c46038462f821 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 10 Mar 2025 18:42:05 -0700 Subject: [PATCH 12/24] update --- install/.pins/torchao-pin.txt | 2 +- install/install_requirements.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/install/.pins/torchao-pin.txt b/install/.pins/torchao-pin.txt index 4ef0f3788..a18a37c8c 100644 --- a/install/.pins/torchao-pin.txt +++ b/install/.pins/torchao-pin.txt @@ -1 +1 @@ -7d8794622f3ac7ffa98761314019a20fba06edef +1eb4d3354edb78d4651b28556200fa615c1b68c3 diff --git a/install/install_requirements.sh b/install/install_requirements.sh index 11fa280b8..f4bcaba0a 100755 --- a/install/install_requirements.sh +++ b/install/install_requirements.sh @@ -130,9 +130,10 @@ fi # TODO: Remove this and install nightly build, once it supports macos # USE_CPP=1 indicates that the torchao experimental aten kernels will be built and loaded # if on Mac with Apple Silicon +export TORCHAO_PIN=$(cat install/.pins/torchao-pin.txt) ( set -x - USE_CPP=1 $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@7d8794622f3ac7ffa98761314019a20fba06edef + USE_CPP=1 $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@${TORCHAO_PIN} ) if [[ -x "$(command -v nvidia-smi)" ]]; then From 34cb931617b76eb4bdd95ed176f0bed2d5dd3cc4 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 10 Mar 2025 18:54:16 -0700 Subject: [PATCH 13/24] up --- torchchat/utils/scripts/install_utils.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 83b412be0..ac1df9d85 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -200,6 +200,7 @@ install_torchao_aten_ops() { CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ + -DTORCHAO_BUILD_CPU_AARCH64=ON \ -DCMAKE_BUILD_TYPE="Release" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja @@ -217,6 +218,7 @@ install_torchao_executorch_ops() { -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ -DCMAKE_BUILD_TYPE="Release" \ -DTORCHAO_BUILD_EXECUTORCH_OPS=ON \ + -DTORCHAO_BUILD_CPU_AARCH64=ON \ -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \ -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \ -S . \ From b564fc14a66e9c99dde795c89839ec3eae140de9 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 10 Mar 2025 18:57:54 -0700 Subject: [PATCH 14/24] up --- torchchat/utils/scripts/build_native.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index b8481b4cc..e2b8b4fc0 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -86,9 +86,6 @@ if [[ "$TARGET" == "et" ]]; then EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" install_torchao_executorch_ops fi -elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then - # Install OMP when using AOTI with linked torchao ops - brew install libomp fi popd From 9eed5d1bd77ab7c8a9fcf1c334502f3acf3a938e Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 10 Mar 2025 21:54:05 -0700 Subject: [PATCH 15/24] up --- .github/workflows/pull.yml | 14 +++++++------- torchchat/utils/scripts/build_native.sh | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 3ca972a81..ebdc295d5 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -292,7 +292,7 @@ jobs: echo "::endgroup::" echo "::group::Run inference with quantize file" - for DEVICE in cpu; do # cuda + for DEVICE in cpu; do # cuda # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'` # follow up with torchao as a separate PR echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot" @@ -349,7 +349,7 @@ jobs: # python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" # python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" # echo "::endgroup::" - + test-gpu-aoti-float16: permissions: id-token: write @@ -1198,6 +1198,11 @@ jobs: sysctl machdep.cpu.brand_string sysctl machdep.cpu.core_count fi + - name: Install ET + run: | + echo "Installing ExecuTorch" + export TORCHCHAT_ROOT=${PWD} + bash torchchat/utils/scripts/install_et.sh - name: Install torchchat run: | echo "Intalling pip3 packages" @@ -1208,11 +1213,6 @@ jobs: id: install-torchao-ops run: | bash torchchat/utils/scripts/build_torchao_ops.sh - - name: Install ET - run: | - echo "Installing ExecuTorch" - export TORCHCHAT_ROOT=${PWD} - bash torchchat/utils/scripts/install_et.sh - name: Install runner run: | echo "Installing runner" diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index e2b8b4fc0..32ee59829 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -86,6 +86,9 @@ if [[ "$TARGET" == "et" ]]; then EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" install_torchao_executorch_ops fi +elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then + # Install OMP when using AOTI with linked torchao ops + brew install libomp fi popd From 14365c424affd30f7535d18a41d3213ede64da56 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 10 Mar 2025 22:10:08 -0700 Subject: [PATCH 16/24] up --- .github/workflows/pull.yml | 10 +++++----- runner/aoti.cmake | 2 +- torchchat/utils/scripts/build_native.sh | 7 +++---- torchchat/utils/scripts/install_utils.sh | 2 ++ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index ebdc295d5..ac664a95f 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1198,17 +1198,17 @@ jobs: sysctl machdep.cpu.brand_string sysctl machdep.cpu.core_count fi - - name: Install ET - run: | - echo "Installing ExecuTorch" - export TORCHCHAT_ROOT=${PWD} - bash torchchat/utils/scripts/install_et.sh - name: Install torchchat run: | echo "Intalling pip3 packages" ./install/install_requirements.sh pip3 list python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' + - name: Install ET + run: | + echo "Installing ExecuTorch" + export TORCHCHAT_ROOT=${PWD} + bash torchchat/utils/scripts/install_et.sh - name: Install torchao-ops id: install-torchao-ops run: | diff --git a/runner/aoti.cmake b/runner/aoti.cmake index ae907b391..3bfe294ea 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -15,7 +15,7 @@ ENDIF() find_package(CUDA) -find_package(Torch 2.4.0) +find_package(Torch REQUIRED) if(Torch_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${TORCH_CXX_FLAGS} -fpermissive") diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index 32ee59829..e36de9290 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -57,6 +57,7 @@ while (( "$#" )); do done source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" +find_cmake_prefix_path if [ -z "${ET_BUILD_DIR}" ]; then ET_BUILD_DIR="et-build" @@ -80,8 +81,6 @@ if [[ "$TARGET" == "et" ]]; then exit 1 fi - source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" - find_cmake_prefix_path EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" install_torchao_executorch_ops @@ -94,9 +93,9 @@ popd # CMake commands if [[ "$TARGET" == "et" ]]; then - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_OPS="${LINK_TORCHAO_OPS}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH="${MY_CMAKE_PREFIX_PATH}" -DLINK_TORCHAO_OPS="${LINK_TORCHAO_OPS}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja else - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_OPS="${LINK_TORCHAO_OPS}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH="${MY_CMAKE_PREFIX_PATH}" -DLINK_TORCHAO_OPS="${LINK_TORCHAO_OPS}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja fi cmake --build ./cmake-out --target "${TARGET}"_run diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index ac1df9d85..0c4c1f7b6 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -186,6 +186,8 @@ clone_torchao() { install_torchao_aten_ops() { local device=${1:-cpu} + USE_CPP=1 pip install "${TORCHCHAT_ROOT}/torchao-build/src/ao" + if [[ "$device" == "cpu" ]]; then echo "Building torchao custom ops for ATen" pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental From 66d90e14ccb8f9c0499e61f50f6f1844a62edb63 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 10 Mar 2025 22:29:58 -0700 Subject: [PATCH 17/24] up --- torchchat/export.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchchat/export.py b/torchchat/export.py index 997639ffe..bad97cd35 100644 --- a/torchchat/export.py +++ b/torchchat/export.py @@ -439,7 +439,8 @@ def main(args): tokenizer, max_seq_length=builder_args.max_seq_length, support_tensor_subclass=output_dso_path is None - and output_aoti_package_path is None, + and output_aoti_package_path is None + and output_pte_path is None, ) model_to_pte = model model_to_dso = model From 12cbd13371448f7be4a02817a031377597bbc2a8 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 11 Mar 2025 09:23:54 -0700 Subject: [PATCH 18/24] up --- runner/aoti.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runner/aoti.cmake b/runner/aoti.cmake index 3bfe294ea..ae907b391 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -15,7 +15,7 @@ ENDIF() find_package(CUDA) -find_package(Torch REQUIRED) +find_package(Torch 2.4.0) if(Torch_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${TORCH_CXX_FLAGS} -fpermissive") From 28d1a99c1bf76cce5e8683b77895d941b547bd1f Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 11 Mar 2025 11:57:45 -0700 Subject: [PATCH 19/24] up --- install/.pins/torchao-pin.txt | 2 +- torchchat/utils/scripts/install_utils.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/install/.pins/torchao-pin.txt b/install/.pins/torchao-pin.txt index a18a37c8c..c1b84754c 100644 --- a/install/.pins/torchao-pin.txt +++ b/install/.pins/torchao-pin.txt @@ -1 +1 @@ -1eb4d3354edb78d4651b28556200fa615c1b68c3 +711fa0809f06fc97febd0c3fe72563c3fe227e51 diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 0c4c1f7b6..019309ef6 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -203,6 +203,7 @@ install_torchao_aten_ops() { cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ -DTORCHAO_BUILD_CPU_AARCH64=ON \ + -DTORCHAO_PARALLEL_BACKEND=OPENMP \ -DCMAKE_BUILD_TYPE="Release" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja From d2cc25aab67e5894f3de6c72b45214ad3c0647e7 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:01:01 -0700 Subject: [PATCH 20/24] up --- torchchat/utils/scripts/install_utils.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 019309ef6..478c0867d 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -204,6 +204,7 @@ install_torchao_aten_ops() { -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ -DTORCHAO_BUILD_CPU_AARCH64=ON \ -DTORCHAO_PARALLEL_BACKEND=OPENMP \ + -DOpenMP_ROOT="/opt/homebrew/opt/libomp" \ -DCMAKE_BUILD_TYPE="Release" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja From d79f870b24ae4fd032355112a4f72c528e1cad6a Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 11 Mar 2025 14:20:24 -0700 Subject: [PATCH 21/24] up --- .github/workflows/pull.yml | 13 +++++++------ torchchat/utils/scripts/build_native.sh | 1 + torchchat/utils/scripts/build_torchao_ops.sh | 1 - torchchat/utils/scripts/clone_torchao.sh | 12 ++++++++++++ torchchat/utils/scripts/install_utils.sh | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 torchchat/utils/scripts/clone_torchao.sh diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index ac664a95f..e44d9d037 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1154,10 +1154,10 @@ jobs: ./install/install_requirements.sh pip3 list python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' - - name: Install torchao-ops - id: install-torchao-ops + - name: Clone torchao + id: clone-torchao run: | - bash torchchat/utils/scripts/build_torchao_ops.sh + bash torchchat/utils/scripts/clone_torchao.sh - name: Install runner run: | echo "Installing runner" @@ -1209,10 +1209,10 @@ jobs: echo "Installing ExecuTorch" export TORCHCHAT_ROOT=${PWD} bash torchchat/utils/scripts/install_et.sh - - name: Install torchao-ops - id: install-torchao-ops + - name: Clone torchao + id: clone-torchao run: | - bash torchchat/utils/scripts/build_torchao_ops.sh + bash torchchat/utils/scripts/clone_torchao.sh - name: Install runner run: | echo "Installing runner" @@ -1256,6 +1256,7 @@ jobs: - name: Install torchao-ops-mps id: install-torchao-ops-mps run: | + bash torchchat/utils/scripts/clone_torchao.sh bash torchchat/utils/scripts/build_torchao_ops.sh mps - name: Run inference run: | diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index e36de9290..d0e141678 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -88,6 +88,7 @@ if [[ "$TARGET" == "et" ]]; then elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then # Install OMP when using AOTI with linked torchao ops brew install libomp + install_torchao_aten_ops cpu fi popd diff --git a/torchchat/utils/scripts/build_torchao_ops.sh b/torchchat/utils/scripts/build_torchao_ops.sh index 46e2479ac..a8388d8d7 100644 --- a/torchchat/utils/scripts/build_torchao_ops.sh +++ b/torchchat/utils/scripts/build_torchao_ops.sh @@ -16,6 +16,5 @@ source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" pushd ${TORCHCHAT_ROOT} find_cmake_prefix_path -clone_torchao install_torchao_aten_ops "$device" popd diff --git a/torchchat/utils/scripts/clone_torchao.sh b/torchchat/utils/scripts/clone_torchao.sh new file mode 100644 index 000000000..834e9434a --- /dev/null +++ b/torchchat/utils/scripts/clone_torchao.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" + +pushd ${TORCHCHAT_ROOT} +clone_torchao +popd diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 478c0867d..9613fd740 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -204,7 +204,7 @@ install_torchao_aten_ops() { -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ -DTORCHAO_BUILD_CPU_AARCH64=ON \ -DTORCHAO_PARALLEL_BACKEND=OPENMP \ - -DOpenMP_ROOT="/opt/homebrew/opt/libomp" \ + -DOpenMP_ROOT="$(brew --prefix)/opt/libomp" \ -DCMAKE_BUILD_TYPE="Release" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja From a8106fd80ce1528ca972d85c9a03beb47f946ab5 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 11 Mar 2025 14:50:08 -0700 Subject: [PATCH 22/24] up --- install/install_requirements.sh | 10 +--------- torchchat/utils/scripts/install_et.sh | 4 ++++ torchchat/utils/scripts/install_utils.sh | 2 -- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/install/install_requirements.sh b/install/install_requirements.sh index f4bcaba0a..0e58409c8 100755 --- a/install/install_requirements.sh +++ b/install/install_requirements.sh @@ -126,15 +126,7 @@ then ) fi -# For torchao need to install from github since nightly build doesn't have macos build. -# TODO: Remove this and install nightly build, once it supports macos -# USE_CPP=1 indicates that the torchao experimental aten kernels will be built and loaded -# if on Mac with Apple Silicon -export TORCHAO_PIN=$(cat install/.pins/torchao-pin.txt) -( - set -x - USE_CPP=1 $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@${TORCHAO_PIN} -) +bash install/install_torchao.sh if [[ -x "$(command -v nvidia-smi)" ]]; then ( diff --git a/torchchat/utils/scripts/install_et.sh b/torchchat/utils/scripts/install_et.sh index 8062a8316..3fd7e2a1b 100755 --- a/torchchat/utils/scripts/install_et.sh +++ b/torchchat/utils/scripts/install_et.sh @@ -19,4 +19,8 @@ pushd ${TORCHCHAT_ROOT} find_cmake_prefix_path clone_executorch install_executorch_libs $ENABLE_ET_PYBIND + +# During installation, ET uninstalls torchchat's preferred version of torchao +# so we reinstall here +bash install/install/install_torchao.sh popd diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 9613fd740..68987e666 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -186,8 +186,6 @@ clone_torchao() { install_torchao_aten_ops() { local device=${1:-cpu} - USE_CPP=1 pip install "${TORCHCHAT_ROOT}/torchao-build/src/ao" - if [[ "$device" == "cpu" ]]; then echo "Building torchao custom ops for ATen" pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental From aa6fb7034e569504fc5d373a14deee702ab07c81 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 11 Mar 2025 14:55:14 -0700 Subject: [PATCH 23/24] up --- install/install_torchao.sh | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 install/install_torchao.sh diff --git a/install/install_torchao.sh b/install/install_torchao.sh new file mode 100644 index 000000000..84974040a --- /dev/null +++ b/install/install_torchao.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +# USE_CPP=1 indicates that the torchao experimental aten kernels will be built and loaded +# if on Mac with Apple Silicon + +if [ -z "${PYTHON_EXECUTABLE:-}" ]; +then + if [[ -z ${CONDA_DEFAULT_ENV:-} ]] || [[ ${CONDA_DEFAULT_ENV:-} == "base" ]] || [[ ! -x "$(command -v python)" ]]; + then + PYTHON_EXECUTABLE=python3 + else + PYTHON_EXECUTABLE=python + fi +fi +echo "Using python executable: $PYTHON_EXECUTABLE" + +if [[ "$PYTHON_EXECUTABLE" == "python" ]]; +then + PIP_EXECUTABLE=pip +elif [[ "$PYTHON_EXECUTABLE" == "python3" ]]; +then + PIP_EXECUTABLE=pip3 +else + PIP_EXECUTABLE=pip${PYTHON_SYS_VERSION} +fi +echo "Using pip executable: $PIP_EXECUTABLE" + + +export TORCHAO_PIN=$(cat install/.pins/torchao-pin.txt) +( + set -x + USE_CPP=1 $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@${TORCHAO_PIN} +) From 8a9a644a1ff83595fd12ab07930ae44a418ceb73 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 11 Mar 2025 15:16:19 -0700 Subject: [PATCH 24/24] up --- torchchat/utils/scripts/install_et.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchchat/utils/scripts/install_et.sh b/torchchat/utils/scripts/install_et.sh index 3fd7e2a1b..531e80a6e 100755 --- a/torchchat/utils/scripts/install_et.sh +++ b/torchchat/utils/scripts/install_et.sh @@ -22,5 +22,5 @@ install_executorch_libs $ENABLE_ET_PYBIND # During installation, ET uninstalls torchchat's preferred version of torchao # so we reinstall here -bash install/install/install_torchao.sh +bash install/install_torchao.sh popd