Skip to content

Commit c70bdd8

Browse files
author
pytorchbot
committed
2025-01-18 nightly release (779569e)
1 parent d30110b commit c70bdd8

26 files changed

+667
-30
lines changed

recipes/configs/generation.yaml

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
# Config for running the InferenceRecipe in generate.py to generate output from an LLM
1+
# Config for running the InferenceRecipe in generate.py to generate output
2+
# from Llama2 7B model
3+
#
4+
# This config assumes that you've run the following command before launching
5+
# this run:
6+
# tune download meta-llama/Llama-2-7b-hf --output-dir /tmp/Llama-2-7b-hf --ignore-patterns "*.safetensors" --hf-token <HF_TOKEN>
27
#
38
# To launch, run the following command from root torchtune directory:
49
# tune run generate --config generation
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Config for running the InferenceRecipe in dev/generate_v2.py to generate output
2+
# using a Llama3 70B Instruct model
3+
#
4+
# This config assumes that you've run the following command before launching:
5+
# tune download meta-llama/Meta-Llama-3-70B-Instruct --output-dir /tmp/Meta-Llama-3-70B-Instruct --ignore-patterns "original/consolidated*" --hf-token <HF_TOKEN>
6+
#
7+
# To launch, run the following command from root torchtune directory:
8+
# tune run --nproc_per_node 8 dev/generate_v2_distributed --config llama3/70B_generation_distributed
9+
10+
output_dir: ./
11+
12+
# Model arguments
13+
model:
14+
_component_: torchtune.models.llama3.llama3_70b
15+
16+
parallelize_plan:
17+
_component_: torchtune.models.llama3.base_llama_tp_plan
18+
19+
# Transform arguments
20+
tokenizer:
21+
_component_: torchtune.models.llama3.llama3_tokenizer
22+
path: /tmp/Meta-Llama-3-70B-Instruct/original/tokenizer.model
23+
prompt_template: null
24+
max_seq_len: 8192
25+
26+
# Checkpointer
27+
checkpointer:
28+
_component_: torchtune.training.FullModelHFCheckpointer
29+
checkpoint_dir: /tmp/Meta-Llama-3-70B-Instruct
30+
checkpoint_files:
31+
filename_format: model-{}-of-{}.safetensors
32+
max_filename: "00030"
33+
recipe_checkpoint: null
34+
output_dir: ${output_dir}
35+
model_type: LLAMA3
36+
37+
# Device
38+
device: cuda
39+
dtype: bf16
40+
seed: 1234
41+
log_level: INFO
42+
43+
# Generation arguments
44+
prompt:
45+
system: null
46+
user:
47+
text: Tell a joke.
48+
max_new_tokens: 200
49+
temperature: 0.6 # 0.8 and 0.6 are popular values to try
50+
top_k: 300
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Config for running the InferenceRecipe in dev/generate_v2.py to generate output
2+
# using a Llama3.1 70B Instruct model
3+
#
4+
# This config assumes that you've run the following command before launching:
5+
# tune download meta-llama/Meta-Llama-3.1-70B-Instruct --output-dir /tmp/Meta-Llama-3.1-70B-Instruct --ignore-patterns "original/consolidated*" --hf-token <HF_TOKEN>
6+
#
7+
# To launch, run the following command from root torchtune directory:
8+
# tune run --nproc_per_node 8 dev/generate_v2_distributed --config llama3_1/70B_generation_distributed
9+
10+
output_dir: ./
11+
12+
# Model arguments
13+
model:
14+
_component_: torchtune.models.llama3_1.llama3_1_70b
15+
16+
parallelize_plan:
17+
_component_: torchtune.models.llama3.base_llama_tp_plan
18+
19+
# Transform arguments
20+
tokenizer:
21+
_component_: torchtune.models.llama3.llama3_tokenizer
22+
path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
23+
prompt_template: null
24+
max_seq_len: 8192
25+
26+
# Checkpointer
27+
checkpointer:
28+
_component_: torchtune.training.FullModelHFCheckpointer
29+
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
30+
checkpoint_files:
31+
filename_format: model-{}-of-{}.safetensors
32+
max_filename: "00030"
33+
recipe_checkpoint: null
34+
output_dir: ${output_dir}
35+
model_type: LLAMA3
36+
37+
# Device
38+
device: cuda
39+
dtype: bf16
40+
seed: 1234
41+
log_level: INFO
42+
43+
# Generation arguments
44+
prompt:
45+
system: null
46+
user:
47+
text: Tell a joke.
48+
max_new_tokens: 200
49+
temperature: 0.6 # 0.8 and 0.6 are popular values to try
50+
top_k: 300

recipes/configs/llama3_2_vision/11B_generation_v2.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# To launch, run the following command from root torchtune directory:
88
# tune run dev/generate_v2 --config llama3_2_vision/generation_v2
99

10-
output_dir: ./ # Not needed
10+
output_dir: ./
1111

1212
# Model arguments
1313
model:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Config for running the InferenceRecipe in dev/generate_v2.py to generate output
2+
# using a Llama3.1 70B Instruct model
3+
#
4+
# This config assumes that you've run the following command before launching:
5+
# tune download meta-llama/Llama-3.3-70B-Instruct --ignore-patterns "original/consolidated*" --hf-token <HF_TOKEN>
6+
#
7+
# To launch, run the following command from root torchtune directory:
8+
# tune run --nproc_per_node 8 dev/generate_v2_distributed --config llama3_3/70B_generation_distributed
9+
10+
output_dir: ./
11+
12+
# Model arguments
13+
model:
14+
_component_: torchtune.models.llama3_3.llama3_3_70b
15+
16+
parallelize_plan:
17+
_component_: torchtune.models.llama3.base_llama_tp_plan
18+
19+
# Transform arguments
20+
tokenizer:
21+
_component_: torchtune.models.llama3.llama3_tokenizer
22+
path: /tmp/Llama-3.3-70B-Instruct/original/tokenizer.model
23+
prompt_template: null
24+
max_seq_len: 8192
25+
26+
# Checkpointer
27+
checkpointer:
28+
_component_: torchtune.training.FullModelHFCheckpointer
29+
checkpoint_dir: /tmp/Llama-3.3-70B-Instruct/
30+
checkpoint_files:
31+
filename_format: model-{}-of-{}.safetensors
32+
max_filename: "00030"
33+
recipe_checkpoint: null
34+
output_dir: ${output_dir}
35+
model_type: LLAMA3
36+
37+
# Device
38+
device: cuda
39+
dtype: bf16
40+
seed: 1234
41+
log_level: INFO
42+
43+
# Generation arguments
44+
prompt:
45+
system: null
46+
user:
47+
text: Tell a joke.
48+
max_new_tokens: 200
49+
temperature: 0.6 # 0.8 and 0.6 are popular values to try
50+
top_k: 300

recipes/dev/early_exit_finetune_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ def _setup_data(
653653
for single_cfg_dataset in cfg_dataset
654654
]
655655
ds = ConcatDataset(datasets=datasets)
656-
packed = False
656+
packed = getattr(ds, "packed", False)
657657
else:
658658
ds = config.instantiate(cfg_dataset, self._tokenizer)
659659
packed = cfg_dataset.get("packed", False)

recipes/dev/generate_v2.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,22 @@ def __call__(self, prompt: Dict[str, Any]) -> List[Message]:
3939

4040
# Iterate through roles and add content
4141
for role, content in prompt.items():
42-
if isinstance(content, str):
42+
if content is None:
43+
continue
44+
elif isinstance(content, str):
4345
new_content = [{"type": "text", "content": content}]
44-
else:
45-
assert (
46-
"image" in content.keys()
47-
), "Multiple entries per role expect an image key"
46+
elif "image" in content.keys():
4847
image_loc = content["image"]
4948
image = load_image(image_loc)
5049
new_content = [
5150
{"type": "image", "content": image},
5251
{"type": "text", "content": content["text"]},
5352
]
53+
else:
54+
assert (
55+
"text" in content.keys()
56+
), "Multiple entries per role expect at least a text key"
57+
new_content = [{"type": "text", "content": content["text"]}]
5458
messages.append(Message(role=role, content=new_content))
5559

5660
# Finally, add an empty assistant message to kick-start generation
@@ -109,12 +113,12 @@ def log_metrics(self, total_time: int, tokens_per_second: float) -> None:
109113
f"Time for inference: {total_time:.02f} sec total, {tokens_per_second:.02f} tokens/sec"
110114
)
111115
self._logger.info(
112-
f"Bandwidth achieved: {model_size * tokens_per_second / 1e9:.02f} GB/s"
116+
f"Bandwidth achieved: {model_size * tokens_per_second / (1024**3):.02f} GiB/s"
113117
)
114118
if self._device.type != "cpu":
115119
torch_device = utils.get_torch_device_namespace()
116120
self._logger.info(
117-
f"Max memory allocated: {torch_device.max_memory_allocated() / 1e9:.02f} GB"
121+
f"Max memory allocated: {torch_device.max_memory_allocated() / (1024**3):.02f} GiB"
118122
)
119123

120124
@torch.inference_mode()

0 commit comments

Comments
 (0)