huggingface · qheaden · Jan 26, 2026
diff --git a/chapters/en/chapter11/3.mdx b/chapters/en/chapter11/3.mdx
@@ -99,7 +99,7 @@ Now that we understand the key components, let's implement the training with pro
 
 ```python
 from datasets import load_dataset
-from trl import SFTConfig, SFTTrainer
+from trl import SFTConfig, SFTTrainer, clone_chat_template
 import torch
 
 # Set device
@@ -110,12 +110,19 @@ dataset = load_dataset("HuggingFaceTB/smoltalk", "all")
 
 # Configure model and tokenizer
 model_name = "HuggingFaceTB/SmolLM2-135M"
+instruct_model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
+
 model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name).to(
     device
 )
 tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)
-# Setup chat template
-model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)
+
+# Setup chat template by cloning it from the tokenizer used by the instruct version of the model
+model, tokenizer, added_tokens = clone_chat_template(
+	model=model,
+	tokenizer=tokenizer,
+	source_tokenizer_path=instruct_model_name
+)
 
 # Configure trainer
 training_args = SFTConfig(