Skip to content

Commit d2eefb1

Browse files
Ankur-singhjoecummings
authored andcommitted
Update docs and docstrings related to Llama3VisionTransform (pytorch#2382)
1 parent 721502f commit d2eefb1

File tree

4 files changed

+5
-6
lines changed

4 files changed

+5
-6
lines changed

docs/source/basics/model_transforms.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ These are intended to be drop-in replacements for tokenizers in multimodal datas
3232
Message(
3333
role="user",
3434
content=[
35-
{"type": "image", "content": Image.new(mode="RGB", size=(224, 224))},
36-
{"type": "image", "content": Image.new(mode="RGB", size=(224, 224))},
35+
{"type": "image", "content": Image.new(mode="RGB", size=(560, 560))},
36+
{"type": "image", "content": Image.new(mode="RGB", size=(560, 560))},
3737
{"type": "text", "content": "What is common in these two images?"},
3838
],
3939
),

docs/source/basics/multimodal_datasets.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ in the text, ``"<image>"`` for where to place the image tokens. This will get re
4545
from torchtune.models.llama3_2_vision import llama3_2_vision_transform
4646
from torchtune.datasets.multimodal import multimodal_chat_dataset
4747
48-
model_transform = Llama3VisionTransform(
48+
model_transform = llama3_2_vision_transform(
4949
path="/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model",
5050
prompt_template="torchtune.data.QuestionAnswerTemplate",
5151
max_seq_len=8192,

torchtune/datasets/multimodal/_multimodal.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,9 @@ def multimodal_chat_dataset(
120120
121121
::
122122
123-
>>> from torchtune.datasets.multimodal import multimodal_chat_dataset
124123
>>> from torchtune.models.llama3_2_vision import llama3_2_vision_transform
125124
>>> from torchtune.datasets.multimodal import multimodal_chat_dataset
126-
>>> model_transform = Llama3VisionTransform(
125+
>>> model_transform = llama3_2_vision_transform(
127126
>>> path="/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model",
128127
>>> prompt_template="torchtune.data.QuestionAnswerTemplate",
129128
>>> max_seq_len=8192,

torchtune/models/llama3_2_vision/_transform.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def __call__(
194194
195195
Args:
196196
sample (Mapping[str, Any]): A sample with a "messages" field.
197-
inference (bool): Whether to run in inference mode. Default is True.
197+
inference (bool): Whether to run in inference mode. Default is False.
198198
199199
Returns:
200200
Mapping[str, Any]: The transformed sample with the following fields:

0 commit comments

Comments
 (0)