Skip to content

Commit 1dd9e46

Browse files
Merge remote-tracking branch 'origin/transformers_future' into upstream-accelerate
2 parents 356fcfd + e802f5f commit 1dd9e46

File tree

10 files changed

+245
-46
lines changed

10 files changed

+245
-46
lines changed

.github/workflows/fast_tests.yml

Lines changed: 57 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
name: Unit and integration tests
22

3-
43
on:
54
workflow_dispatch:
65
pull_request:
7-
branches: [ main ]
6+
branches: [main]
87
push:
9-
branches: [ main ]
8+
branches: [main]
109

1110
concurrency:
1211
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -16,50 +15,69 @@ jobs:
1615
transformers:
1716
name: Run tests for optimum.habana.transformers
1817
runs-on: [self-hosted, linux, x64, gaudi2, fast]
18+
19+
container:
20+
image: docker://vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
21+
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES
22+
env:
23+
OMPI_MCA_btl_vader_single_copy_mechanism: none
24+
1925
steps:
20-
- name: Checkout
21-
uses: actions/checkout@v2
22-
- name: Pull image
26+
- name: HL-SMI (1)
27+
run: |
28+
hl-smi
29+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
30+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
31+
32+
- name: Extract HPU visible modules
2333
run: |
24-
docker pull vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
34+
export HABANA_VISIBLE_MODULES=$(hl-smi -Q module_id -f csv,noheader | tr '\n' ',' | sed 's/,$//')
35+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" >> $GITHUB_ENV
36+
37+
- name: HL-SMI (2)
38+
run: |
39+
hl-smi
40+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
41+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
42+
43+
- name: Checkout
44+
uses: actions/checkout@v4
45+
2546
- name: Run tests
2647
run: |
27-
docker run \
28-
--rm \
29-
-v $PWD:/root/workspace \
30-
-v /scratch-1:/data \
31-
--workdir=/root/workspace \
32-
--runtime=habana \
33-
-e HABANA_VISIBLE_DEVICES=$DOCKER_HABANA_VISIBLE_DEVICES \
34-
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
35-
-e HF_HOME=/data \
36-
--cap-add=sys_nice \
37-
--net=host \
38-
--ipc=host \
39-
vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
40-
/bin/bash tests/ci/fast_tests.sh
48+
/bin/bash tests/ci/fast_tests.sh
49+
4150
diffusers:
4251
name: Run tests for optimum.habana.diffusers
4352
runs-on: [self-hosted, linux, x64, gaudi2, fast]
53+
54+
container:
55+
image: docker://vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
56+
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES
57+
env:
58+
OMPI_MCA_btl_vader_single_copy_mechanism: none
59+
4460
steps:
45-
- name: Checkout
46-
uses: actions/checkout@v2
47-
- name: Pull image
61+
- name: HL-SMI (1)
62+
run: |
63+
hl-smi
64+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
65+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
66+
67+
- name: Extract HPU visible modules
4868
run: |
49-
docker pull vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
69+
export HABANA_VISIBLE_MODULES=$(hl-smi -Q module_id -f csv,noheader | tr '\n' ',' | sed 's/,$//')
70+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" >> $GITHUB_ENV
71+
72+
- name: HL-SMI (2)
73+
run: |
74+
hl-smi
75+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
76+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
77+
78+
- name: Checkout
79+
uses: actions/checkout@v4
80+
5081
- name: Run tests
5182
run: |
52-
docker run \
53-
--rm \
54-
-v $PWD:/root/workspace \
55-
-v /scratch-1:/data \
56-
--workdir=/root/workspace \
57-
--runtime=habana \
58-
-e HABANA_VISIBLE_DEVICES=$DOCKER_HABANA_VISIBLE_DEVICES \
59-
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
60-
-e HF_HOME=/data \
61-
--cap-add=sys_nice \
62-
--net=host \
63-
--ipc=host \
64-
vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
65-
/bin/bash tests/ci/fast_tests_diffusers.sh
83+
/bin/bash tests/ci/fast_tests_diffusers.sh

.github/workflows/upstream.yml

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
name: Upstream Integrations
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
# every monday at 00:00 UTC
7+
- cron: "0 0 * * 1"
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
transformers:
15+
name: Upstream Transformers
16+
runs-on: [self-hosted, linux, x64, gaudi2, fast]
17+
18+
container:
19+
image: docker://vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
20+
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES
21+
env:
22+
OMPI_MCA_btl_vader_single_copy_mechanism: none
23+
RUN_THIRD_PARTY_DEVICE_TESTS: 1
24+
TRANSFORMERS_TEST_DEVICE: hpu
25+
PT_ENABLE_INT64_SUPPORT: 1
26+
PT_HPU_LAZY_MODE: 0
27+
RUN_SLOW: 1
28+
29+
steps:
30+
- name: HL-SMI (1)
31+
run: |
32+
hl-smi
33+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
34+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
35+
36+
- name: Extract HPU visible modules
37+
run: |
38+
export HABANA_VISIBLE_MODULES=$(hl-smi -Q module_id -f csv,noheader | tr '\n' ',' | sed 's/,$//')
39+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" >> $GITHUB_ENV
40+
41+
- name: HL-SMI (2)
42+
run: |
43+
hl-smi
44+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
45+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
46+
47+
- name: Checkout to Transformers
48+
uses: actions/checkout@v4
49+
with:
50+
repository: huggingface/transformers
51+
52+
- name: Install Transformers with Accelerate & DeepSpeed
53+
run: |
54+
pip install -e .[testing] "numpy<2.0.0" scipy scikit-learn \
55+
git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0 \
56+
git+https://github.com/huggingface/accelerate.git
57+
58+
- name: Run Trainer tests
59+
run: |
60+
pytest tests/trainer/test_trainer.py -s -vvvv
61+
62+
- name: Run Trainer Utils tests
63+
run: |
64+
pytest tests/trainer/test_trainer_utils.py -s -vvvv
65+
66+
- name: Run Trainer Seq2Seq tests
67+
run: |
68+
pytest tests/trainer/test_trainer_seq2seq.py -s -vvvv
69+
70+
- name: Run Trainer Distributed tests
71+
run: |
72+
pytest tests/trainer/test_trainer_distributed.py -s -vvvv
73+
74+
- name: Run FSDP Integration tests
75+
run: |
76+
pytest tests/fsdp/test_fsdp.py tests/trainer/test_trainer_fsdp.py -s -vvvv
77+
78+
- name: Run DeepSpeed Integration tests
79+
run: |
80+
pytest tests/deepspeed/test_deepspeed.py -s -vvvv
81+
82+
accelerate:
83+
name: Upstream Accelerate
84+
runs-on: [self-hosted, linux, x64, gaudi2, fast]
85+
86+
container:
87+
image: docker://vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
88+
options: --runtime=habana --shm-size=64G --cap-add=sys_nice --env HABANA_VISIBLE_DEVICES
89+
env:
90+
OMPI_MCA_btl_vader_single_copy_mechanism: none
91+
PT_ENABLE_INT64_SUPPORT: 1
92+
PT_HPU_LAZY_MODE: 0
93+
RUN_SLOW: 1
94+
95+
steps:
96+
- name: HL-SMI (1)
97+
run: |
98+
hl-smi
99+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
100+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
101+
102+
- name: Extract HPU visible modules
103+
run: |
104+
export HABANA_VISIBLE_MODULES=$(hl-smi -Q module_id -f csv,noheader | tr '\n' ',' | sed 's/,$//')
105+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}" >> $GITHUB_ENV
106+
107+
- name: HL-SMI (2)
108+
run: |
109+
hl-smi
110+
echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
111+
echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
112+
113+
- name: Checkout to Accelerate
114+
uses: actions/checkout@v4
115+
with:
116+
repository: huggingface/accelerate
117+
118+
- name: Install Accelerate with Transformers & DeepSpeed
119+
run: |
120+
pip install -e .[testing] \
121+
git+https://github.com/huggingface/transformers.git \
122+
git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0
123+
124+
- name: Run CLI tests
125+
run: |
126+
make test_cli
127+
128+
- name: Run Core tests
129+
run: |
130+
make test_core
131+
132+
- name: Run Big Modeling tests
133+
run: |
134+
make test_big_modeling
135+
136+
- name: Run FSDP integration tests
137+
run: |
138+
make test_fsdp
139+
140+
- name: Run DeepSpeed integration tests
141+
run: |
142+
make test_deepspeed
143+
144+
- name: Run Examples tests
145+
run: |
146+
make test_examples

examples/language-modeling/run_clm.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ def main():
459459

460460
# Note that chatglm2/3 has float16 dtype from config.json, and on Gaudi we need to use bfloat16.
461461
if config.model_type == "chatglm":
462-
config.dtype = "torch.bfloat16"
462+
config.torch_dtype = torch.bfloat16
463463

464464
tokenizer_kwargs = {
465465
"cache_dir": model_args.cache_dir,
@@ -484,6 +484,11 @@ def main():
484484
if model_args.torch_dtype in ["auto", None]
485485
else getattr(torch, model_args.torch_dtype)
486486
)
487+
# workaraund for https://github.com/huggingface/transformers/issues/36258
488+
# TODO: remove after fix is avalible in a release version of `transformers``
489+
if torch_dtype is None:
490+
torch_dtype = getattr(config, "torch_dtype", None)
491+
487492
model = AutoModelForCausalLM.from_pretrained(
488493
model_args.model_name_or_path,
489494
from_tf=bool(".ckpt" in model_args.model_name_or_path),

optimum/habana/transformers/models/deepseek_v3/modeling_deepseek_v3.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,6 @@ def forward(
16921692

16931693
hidden_states = outputs[0]
16941694
logits = self.lm_head(hidden_states)
1695-
logits = logits.float()
16961695

16971696
loss = None
16981697
if labels is not None:

optimum/habana/transformers/models/gpt2/modeling_gpt2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def _upcast_and_reordered_attn(self, query, key, value, attention_mask=None, hea
7070
attn_weights = attn_weights * head_mask
7171

7272
attn_output = torch.matmul(attn_weights, value)
73+
attn_output = attn_output.transpose(1, 2)
7374

7475
return attn_output, attn_weights
7576

optimum/habana/transformers/models/gpt_neox/modeling_gpt_neox.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def gaudi_gpt_neox_model_forward(
269269
return_dict: Optional[bool] = None,
270270
cache_position: Optional[torch.LongTensor] = None,
271271
token_idx: Optional[torch.Tensor] = None,
272+
**kwargs,
272273
) -> Union[Tuple, BaseModelOutputWithPast]:
273274
"""
274275
Copied from GPTNeoxModel.forward: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt_neox/modeling_gpt_neox.py

optimum/habana/transformers/models/mllama/modeling_mllama.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1152,8 +1152,11 @@ def _update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_
11521152
# add cross-attn mask for new token
11531153
if cross_attention_mask_prev is not None:
11541154
token_idx = model_kwargs.get("token_idx", None)
1155+
token_idx_cpu = model_kwargs.get(
1156+
"token_idx_cpu", None
1157+
) # returns an integer so following slicing ops happen using int instead of tensor
11551158
if token_idx is not None:
1156-
mask = cross_attention_mask_prev[:, token_idx - 2 : token_idx - 1, ...]
1159+
mask = cross_attention_mask_prev[:, token_idx_cpu - 2 : token_idx_cpu - 1, ...]
11571160
cross_attention_mask_prev.index_copy_(1, token_idx - 1, mask)
11581161
model_kwargs["cross_attention_mask"] = cross_attention_mask_prev
11591162
else:

optimum/habana/transformers/models/qwen2_vl/modeling_qwen2_vl.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,8 +517,9 @@ def forward(
517517

518518
# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1420
519519
class GaudiQwen2VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
520-
# todo: change when the following gets fixed https://github.com/huggingface/transformers/blame/66f29aaaf55c8fe0c3dbcd24beede2ca4effac56/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py#L390C5-L390C27
520+
# todo: change when the following gets fixed https://github.com/huggingface/transformers/blame/66f29aaaf55c8fe0c3dbcd24beede2ca4effac56/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py#L390C5-L390C27
521521
_supports_static_cache = True
522+
522523
def forward(
523524
self,
524525
input_ids: torch.LongTensor = None,

optimum/habana/transformers/trainer.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,7 @@ def _gradient_checkpointing_wrap(func, *args, **kwargs):
935935
train_dataloader,
936936
len_dataloader,
937937
num_examples,
938+
steps_trained_in_current_epoch,
938939
)
939940

940941
hb_profiler = HabanaProfile(
@@ -1584,7 +1585,6 @@ def _prepare_input(self, data: Union[torch.Tensor, Any]) -> Union[torch.Tensor,
15841585
return data.to(**kwargs)
15851586
return data
15861587

1587-
15881588
# handled by accelerate now (in model preparation)
15891589
# def autocast_smart_context_manager(self, cache_enabled: Optional[bool] = True):
15901590
# """
@@ -2643,7 +2643,14 @@ def _zero_model_grad(self, model):
26432643
model._zero_grad_kwargs = {}
26442644

26452645
def get_num_items_in_batches(
2646-
self, args, epochs_trained, num_train_epochs, train_dataloader, len_dataloader, num_examples
2646+
self,
2647+
args,
2648+
epochs_trained,
2649+
num_train_epochs,
2650+
train_dataloader,
2651+
len_dataloader,
2652+
num_examples,
2653+
steps_trained_in_current_epoch,
26472654
):
26482655
"""
26492656
Calculate the number of items in each batch for all epochs during training.
@@ -2659,10 +2666,15 @@ def get_num_items_in_batches(
26592666
total_updates = steps_in_epoch // args.gradient_accumulation_steps + 1
26602667
if args.gradient_accumulation_steps == 1:
26612668
total_updates -= 1
2669+
global_step = 0
26622670

26632671
num_items_in_batches = []
26642672
for epoch in range(epochs_trained, num_train_epochs):
2665-
epoch_dataloader = train_dataloader
2673+
if epoch == epochs_trained and steps_trained_in_current_epoch > 0:
2674+
epoch_dataloader = skip_first_batches(train_dataloader, steps_trained_in_current_epoch)
2675+
else:
2676+
epoch_dataloader = train_dataloader
2677+
26662678
if hasattr(epoch_dataloader, "set_epoch"):
26672679
epoch_dataloader.set_epoch(epoch)
26682680

@@ -2702,6 +2714,11 @@ def get_num_items_in_batches(
27022714
num_items_in_batch = None
27032715

27042716
num_items_in_batches[epoch].append(num_items_in_batch)
2717+
global_step += 1
2718+
2719+
# For iterable datasets, don't do more than max_steps steps
2720+
if len_dataloader is None and global_step >= args.max_steps:
2721+
break
27052722

27062723
return num_items_in_batches
27072724

0 commit comments

Comments
 (0)