-
Notifications
You must be signed in to change notification settings - Fork 1.3k
HPU support #3378
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HPU support #3378
Changes from 163 commits
4f462b0
7b51103
9d7376e
069b88a
cd3cbb9
2493abe
32cbc88
5fd4de2
7f72745
f66c5df
2a4130d
fa1bc44
d3e24c5
00cc283
97081da
ddcb3ca
6de389c
ae9a76b
5b8b0b2
a2f8040
6abecdd
7bc37dc
ef1de61
1b6905e
defe3fa
9551ce3
9c84fe7
6f00591
c94bfbd
61235d3
0896a50
e974758
ee08748
6f0fbe4
c5c50c6
34010c9
9f75a6e
e80b484
5cacc31
f006c4e
19e652a
40d22b1
eb37c43
dc4ca51
74b307a
5a6d5ef
5a1c0c9
50d9e71
f0579e8
dfc82ec
176e3d2
6c688d0
b078e90
0dcb46a
5abb1a4
b63a6fa
36f8794
ab5cbb0
e318161
0c040c3
6f5977e
f1e196f
2772b68
7d1ef62
427c313
be91183
5c0cd84
ae1431a
d383ea5
0b62d52
f2504a5
f5cf0d5
ac434c2
1501105
8b5708e
8935766
d8301cd
6ce9e3a
42775d2
788e95f
03b391e
2247739
07ba582
647dfab
8e63b29
6b1d131
7803291
2883ca1
007d4a8
9c12fae
324d6df
839c6be
3e548f4
f67a898
f449d3f
79ef8a5
f772b76
6218cec
31872f6
610c68b
347db07
5fc5a2a
dc7a773
9606f0d
6b77bc4
d556021
e2fe2cc
05e6861
ef6192c
59b51e5
c6731f5
66ec449
28dae91
ec9c562
53f99c3
5f9928d
ddbece5
72bd312
506d07e
ae67bcc
405b857
27be94c
4e0e966
e2a8d85
03e2646
3ed87c1
2dcab3e
55b0d3c
bd2afc3
75e5b81
e5dfad4
ed84e7b
a05e54a
eb0b3a3
7b2650a
9b227d8
8cf20cd
7c4897b
d0485f1
c37aefd
bdae68d
d919931
efd2a27
394b687
4f76d2c
b3dd375
17d43ab
db16287
e359c01
e9cfca4
ac41600
8571ef4
3115ee4
7c6a44a
e8f9a48
3face36
06c1f53
75aaabd
21fca86
a99c297
81a37be
92775af
ce13eeb
04983cc
5efbe8c
4847474
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| name: Gaudi1 tests (scheduled) | ||
|
|
||
| on: | ||
| workflow_dispatch: | ||
| pull_request: | ||
| branches: | ||
| - main | ||
IlyasMoutawwakil marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| schedule: | ||
| - cron: "0 2 * * *" | ||
|
|
||
| concurrency: | ||
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | ||
| cancel-in-progress: true | ||
|
|
||
| jobs: | ||
| run_gaudi1_tests: | ||
| name: Test on Gaudi1 | ||
| runs-on: | ||
| group: aws-dl1-24xlarge | ||
|
|
||
| container: | ||
| image: docker://vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest | ||
| options: --runtime=habana --shm-size=64G --cap-add=sys_nice --env HABANA_VISIBLE_DEVICES=0,1 | ||
| env: | ||
| OMPI_MCA_btl_vader_single_copy_mechanism: none | ||
| PT_ENABLE_INT64_SUPPORT: 1 | ||
| PT_HPU_LAZY_MODE: 0 | ||
| RUN_SLOW: 1 | ||
|
|
||
| steps: | ||
| - name: HL-SMI (1) | ||
| run: | | ||
| hl-smi | ||
| echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | ||
| echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | ||
|
|
||
| - name: Extract HPU visible modules | ||
| id: add-modules | ||
| run: | | ||
| export HABANA_VISIBLE_MODULES=$(hl-smi -Q module_id -f csv,noheader | tr '\n' ',' | sed 's/,$//') | ||
| echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" >> $GITHUB_ENV | ||
|
|
||
| - name: HL-SMI (2) | ||
| run: | | ||
| hl-smi | ||
| echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}" | ||
| echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" | ||
|
|
||
| - name: Checkout to Accelerate | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Install Accelerate with Transformers & DeepSpeed | ||
| run: | | ||
| pip install -e .[testing] \ | ||
| git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0 \ | ||
| git+https://github.com/huggingface/transformers.git@hpu-support | ||
|
|
||
| - name: Run CLI tests | ||
| run: | | ||
| make test_cli | ||
|
|
||
| - name: Run Core tests | ||
| run: | | ||
| make test_core | ||
|
|
||
| - name: Run Big Modeling tests | ||
| run: | | ||
| make test_big_modeling | ||
|
|
||
| - name: Run FSDP integration tests | ||
| run: | | ||
| make test_fsdp | ||
|
|
||
| - name: Run DeepSpeed integration tests | ||
| run: | | ||
| make test_deepspeed | ||
|
|
||
| - name: Run Examples tests | ||
| run: | | ||
| make test_examples | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ test_big_modeling: | |
|
|
||
| test_core: | ||
| python -m pytest -s -v ./tests/ --ignore=./tests/test_examples.py --ignore=./tests/deepspeed --ignore=./tests/test_big_modeling.py \ | ||
| --ignore=./tests/fsdp --ignore=./tests/test_cli.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_core.log",) | ||
| --ignore=./tests/fsdp --ignore=./tests/tp --ignore=./tests/test_cli.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_core.log",) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure TP should be part of test_core, tell me if you want me to revert this.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah i don't think we want that cc @muellerzr
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed |
||
|
|
||
| test_cli: | ||
| python -m pytest -s -v ./tests/test_cli.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_cli.log",) | ||
|
|
@@ -39,6 +39,9 @@ test_deepspeed: | |
| test_fsdp: | ||
| python -m pytest -s -v ./tests/fsdp $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_fsdp.log",) | ||
|
|
||
| test_tp: | ||
| python -m pytest -s -v ./tests/tp $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_tp.log",) | ||
|
|
||
| # Since the new version of pytest will *change* how things are collected, we need `deepspeed` to | ||
| # run after test_core and test_cli | ||
| test: | ||
|
|
@@ -47,13 +50,14 @@ test: | |
| $(MAKE) test_big_modeling | ||
| $(MAKE) test_deepspeed | ||
| $(MAKE) test_fsdp | ||
| $(MAKE) test_tp | ||
|
|
||
| test_examples: | ||
| python -m pytest -s -v ./tests/test_examples.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_examples.log",) | ||
|
|
||
| # Broken down example tests for the CI runners | ||
| test_integrations: | ||
| python -m pytest -s -v ./tests/deepspeed ./tests/fsdp $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_integrations.log",) | ||
| python -m pytest -s -v ./tests/deepspeed ./tests/fsdp ./tests/tp $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_integrations.log",) | ||
|
|
||
| test_example_differences: | ||
| python -m pytest -s -v ./tests/test_examples.py::ExampleDifferenceTests $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_example_diff.log",) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ | |
| "ruff ~= 0.6.4", | ||
| ] | ||
| extras["docs"] = [] | ||
| extras["test_prod"] = ["pytest>=7.2.0,<=8.0.0", "pytest-xdist", "pytest-subtests", "parameterized"] | ||
| extras["test_prod"] = ["pytest>=7.2.0,<=8.0.0", "pytest-xdist", "pytest-subtests", "parameterized", "pytest-order"] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL 👀 |
||
| extras["test_dev"] = [ | ||
| "datasets", | ||
| "diffusers", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will be removed and only
schedulewill stay before merge.