Description
🐛 Describe the bug
python3 torchchat.py export llama3.1 --output-dso-path exportedModels/llama3.1.so
Using device=cuda
Setting max_seq_length to 300 for DSO export.
Loading model...
Time to load model: 2.74 seconds
-----------------------------------------------------------
Exporting model using AOT Inductor to /home/warden/source/torchchat/exportedModels/llama3.1.so
W1010 15:36:44.314000 6252 .venv/lib/python3.11/site-packages/torch/_export/__init__.py:225] +============================+
W1010 15:36:44.314000 6252 .venv/lib/python3.11/site-packages/torch/_export/__init__.py:226] | !!! WARNING !!! |
W1010 15:36:44.314000 6252 .venv/lib/python3.11/site-packages/torch/_export/__init__.py:227] +============================+
W1010 15:36:44.314000 6252 .venv/lib/python3.11/site-packages/torch/_export/__init__.py:228] torch._export.aot_compile() is being deprecated, please switch to directly calling torch._inductor.aoti_compile_and_package(torch.export.export()) instead.
/tmp/tmpie2hawx7/main.c:5:10: fatal error: Python.h: No such file or directory
5 | #include <Python.h>
| ^~~~~~~~~~
compilation terminated.
Traceback (most recent call last):
File "/home/warden/source/torchchat/torchchat.py", line 97, in <module>
export_main(args)
File "/home/warden/source/torchchat/torchchat/export.py", line 422, in main
export_for_server(
File "/home/warden/source/torchchat/torchchat/export.py", line 68, in export_for_server
so = torch._export.aot_compile(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_export/__init__.py", line 303, in aot_compile
so_path = torch._inductor.aot_compile(gm, args, kwargs, options=options) # type: ignore[arg-type]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/__init__.py", line 204, in aot_compile
return compile_fx_aot(
^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1150, in compile_fx_aot
compiled_lib_path = compile_fx(
^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1297, in compile_fx
return compile_fx(
^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1329, in compile_fx
return compile_fx(
^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1551, in compile_fx
return inference_compiler(unlifted_gm, example_inputs_)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1384, in fw_compiler_base
return _fw_compiler_base(model, example_inputs, is_inference)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 1455, in _fw_compiler_base
return inner_compile(
^^^^^^^^^^^^^^
File "/usr/lib/python3.11/contextlib.py", line 81, in inner
return func(*args, **kwds)
^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 465, in compile_fx_inner
return wrap_compiler_debug(_compile_fx_inner, compiler_name="inductor")(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_dynamo/repro/after_aot.py", line 85, in debug_wrapper
inner_compiled_fn = compiler_fn(gm, example_inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 660, in _compile_fx_inner
compiled_graph = codegen_and_compile(
^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 560, in codegen_and_compile
compiled_graph = fx_codegen_and_compile(gm, example_inputs, **fx_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/compile_fx.py", line 872, in fx_codegen_and_compile
compiled_fn = graph.compile_to_fn()
^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/graph.py", line 1933, in compile_to_fn
code, linemap = self.codegen_with_cpp_wrapper()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/graph.py", line 1725, in codegen_with_cpp_wrapper
compiled = self.compile_to_module().call
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/graph.py", line 1877, in compile_to_module
return self._compile_to_module()
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/graph.py", line 1883, in _compile_to_module
self.codegen_with_cpp_wrapper() if self.cpp_wrapper else self.codegen()
^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/graph.py", line 1822, in codegen
self.scheduler.codegen()
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/scheduler.py", line 3423, in codegen
return self._codegen()
^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/scheduler.py", line 3501, in _codegen
self.get_backend(device).codegen_node(node)
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/codegen/cuda_combined_scheduling.py", line 80, in codegen_node
return self._triton_scheduling.codegen_node(node)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/codegen/simd.py", line 1194, in codegen_node
return self.codegen_node_schedule(node_schedule, buf_accesses, numel, rnumel)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/codegen/simd.py", line 1403, in codegen_node_schedule
src_code = kernel.codegen_kernel()
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/codegen/triton.py", line 2760, in codegen_kernel
**self.inductor_meta_common(),
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/_inductor/codegen/triton.py", line 2626, in inductor_meta_common
"backend_hash": torch.utils._triton.triton_hash_with_backend(),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/utils/_triton.py", line 65, in triton_hash_with_backend
backend = triton_backend()
^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/torch/utils/_triton.py", line 57, in triton_backend
target = driver.active.get_current_target()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/runtime/driver.py", line 23, in __getattr__
self._initialize_obj()
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/runtime/driver.py", line 20, in _initialize_obj
self._obj = self._init_fn()
^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/runtime/driver.py", line 9, in _create_driver
return actives[0]()
^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 371, in __init__
self.utils = CudaUtils() # TODO: make static
^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 80, in __init__
mod = compile_module_from_src(Path(os.path.join(dirname, "driver.c")).read_text(), "cuda_utils")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/backends/nvidia/driver.py", line 57, in compile_module_from_src
so = _build(name, src_path, tmpdir, library_dirs(), include_dir, libraries)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/runtime/build.py", line 48, in _build
ret = subprocess.check_call(cc_cmd)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.11/subprocess.py", line 413, in check_call
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['/usr/bin/gcc', '/tmp/tmpie2hawx7/main.c', '-O3', '-shared', '-fPIC', '-o', '/tmp/tmpie2hawx7/cuda_utils.cpython-311-x86_64-linux-gnu.so', '-lcuda', '-L/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/backends/nvidia/lib', '-L/usr/lib/wsl/lib', '-I/home/warden/source/torchchat/.venv/lib/python3.11/site-packages/triton/backends/nvidia/include', '-I/tmp/tmpie2hawx7', '-I/usr/include/python3.11']' returned non-zero exit status 1.
Versions
Operating System Information
Linux Furiosa 5.15.153.1-microsoft-standard-WSL2 #1 SMP Fri Mar 29 23:14:13 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux
PRETTY_NAME="Ubuntu 24.04.1 LTS"
NAME="Ubuntu"
VERSION_ID="24.04"
VERSION="24.04.1 LTS (Noble Numbat)"
VERSION_CODENAME=noble
ID=ubuntu
ID_LIKE=debian
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
UBUNTU_CODENAME=noble
LOGO=ubuntu-logo
Python Version
Python 3.11.10
PIP Version
pip 24.0 from /home/warden/source/torchchat/.venv/lib/python3.11/site-packages/pip (python 3.11)
Installed Packages
absl-py==2.1.0
accelerate==1.0.0
aiohappyeyeballs==2.4.3
aiohttp==3.10.9
aiosignal==1.3.1
altair==5.4.1
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
anyio==4.6.0
attrs==24.2.0
blinker==1.8.2
blobfile==3.0.0
cachetools==5.5.0
certifi==2024.8.30
chardet==5.2.0
charset-normalizer==3.4.0
click==8.1.7
cmake==3.30.4
colorama==0.4.6
DataProperty==1.0.1
datasets==3.0.1
dill==0.3.8
distro==1.9.0
evaluate==0.4.3
filelock==3.16.1
Flask==3.0.3
frozenlist==1.4.1
fsspec==2024.6.1
gguf==0.10.0
gitdb==4.0.11
GitPython==3.1.43
h11==0.14.0
httpcore==1.0.6
httpx==0.27.2
huggingface-hub==0.25.2
idna==3.10
itsdangerous==2.2.0
Jinja2==3.1.4
jiter==0.6.1
joblib==1.4.2
jsonlines==4.0.0
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
lm_eval==0.4.2
lxml==5.3.0
markdown-it-py==3.0.0
MarkupSafe==3.0.1
mbstrdecoder==1.1.3
mdurl==0.1.2
more-itertools==10.5.0
mpmath==1.3.0
multidict==6.1.0
multiprocess==0.70.16
narwhals==1.9.2
networkx==3.4
ninja==1.11.1.1
nltk==3.9.1
numexpr==2.10.1
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.21.5
nvidia-nvjitlink-cu12==12.6.77
nvidia-nvtx-cu12==12.1.105
omegaconf==2.3.0
openai==1.51.2
packaging==24.1
pandas==2.2.3
pathvalidate==3.2.1
peft==0.13.1
pillow==10.4.0
portalocker==2.10.1
propcache==0.2.0
protobuf==5.28.2
psutil==6.0.0
pyarrow==17.0.0
pybind11==2.13.6
pycryptodomex==3.21.0
pydantic==2.9.2
pydantic_core==2.23.4
pydeck==0.9.1
Pygments==2.18.0
pytablewriter==1.2.0
python-dateutil==2.9.0.post0
pytorch-triton==3.1.0+cf34004b8a
pytz==2024.2
PyYAML==6.0.2
referencing==0.35.1
regex==2024.9.11
requests==2.32.3
rich==13.9.2
rouge-score==0.1.2
rpds-py==0.20.0
sacrebleu==2.4.3
safetensors==0.4.5
scikit-learn==1.5.2
scipy==1.14.1
sentencepiece==0.2.0
six==1.16.0
smmap==5.0.1
snakeviz==2.2.0
sniffio==1.3.1
sqlitedict==2.1.0
streamlit==1.39.0
sympy==1.13.1
tabledata==1.3.3
tabulate==0.9.0
tcolorpy==0.1.6
tenacity==9.0.0
threadpoolctl==3.5.0
tiktoken==0.8.0
tokenizers==0.20.1
toml==0.10.2
torch==2.6.0.dev20241002+cu121
torchao==0.5.0
torchtune==0.3.0.dev20240928+cu121
torchvision==0.20.0.dev20241002+cu121
tornado==6.4.1
tqdm==4.66.5
tqdm-multiprocess==0.0.11
transformers==4.45.2
typepy==1.3.2
typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.2.3
watchdog==5.0.3
Werkzeug==3.0.4
word2number==1.1
xxhash==3.5.0
yarl==1.14.0
zstandard==0.23.0
zstd==1.5.5.1
PyTorch Version
2.6.0.dev20241002+cu121