Fix non-uniform seeding (#35906)

rdraskicTT · kpaigwar · sraizada-tt · djordje-tt · commit a0b141a9a839 · 2026-01-29T10:19:21.000Z
Co-authored-by: kpaigwar &lt;kpaigwar@tenstorrent.com&gt;
Co-authored-by: Stuti Raizada &lt;sraizada@tenstorrent.com&gt;
Co-authored-by: Djordje Ivanovic &lt;divanovic@tenstorrent.com&gt;
diff --git a/models/tt_transformers/demo/simple_text_demo.py b/models/tt_transformers/demo/simple_text_demo.py
@@ -1001,6 +1001,7 @@ def test_demo_text(
                 temperature=sampling_params["temperature"],
                 top_k=sampling_params["top_k"],
                 top_p=sampling_params["top_p"],
+                seed=sampling_params["seed"] if "seed" in sampling_params else None,
                 frequency_penalty=sampling_params["frequency_penalty"]
                 if "frequency_penalty" in sampling_params
                 else 0.0,
@@ -1110,6 +1111,7 @@ def test_demo_text(
                 enable_trace=enable_trace,
                 page_table=page_table,
                 kv_cache=tt_kv_cache,
+                reset_batch=(iteration == 0),
                 sampling_params=device_sampling_params,
                 prompt_tokens=input_tokens_prefill_pt,
                 output_tokens=out_tok,
diff --git a/models/tt_transformers/tt/generator.py b/models/tt_transformers/tt/generator.py
@@ -76,11 +76,13 @@ def _apply_prefill_sampling_state(
     *,
     sampling_params: SamplingParams,
     prompt_tokens: torch.Tensor | None,
+    empty_slots: list[int],
 ):
-    sampling_module = getattr(model_instance, "sampling_prefill", None)
+    sampling_module = getattr(model_instance, "sampling", None)
     assert sampling_module is not None, "Sampling module not found in model for sampling on device."
     sampling_module.reset_sampling_params(sampling_params)
-    sampling_module.reset_seed(sampling_params.seed)
+    sampling_module.seed_manager.reset_seed(sampling_params.seed, empty_slots)
+    sampling_module.seed_manager.get_new_values(empty_slots, replicate_seeds=True)
     if prompt_tokens is not None:
         sampling_module.reset_prompt_tokens(prompt_tokens)
     sampling_module.reset_output_state()
@@ -422,6 +424,7 @@ def prefill_forward_text(
                     self.model[model_id],
                     sampling_params=per_request_params,
                     prompt_tokens=prefill_ids[:, :seq_len].repeat(32, 1),
+                    empty_slots=[user_id % 32],
                 )
 
             if enable_trace_current_prompt:
@@ -471,7 +474,7 @@ def prefill_forward_text(
                     logits = self.model[model_id].process_logits_after_prefill_trace(logits, last_token_idx)
 
             if sampling_enabled:
-                tt_tokens, tt_log_probs = self.model[model_id].sampling_prefill.sample(
+                tt_tokens, tt_log_probs = self.model[model_id].sampling.sample(
                     logits,
                     enable_trace=False,
                 )
@@ -732,8 +735,8 @@ def decode_forward_text(
                 sampling_module = getattr(self.model[i], "sampling", None)
                 assert sampling_module is not None, "Sampling module not found in model for sampling on device."
                 sampling_module.reset_sampling_params(formatted_params)
+                sampling_module.seed_manager.get_new_values()
                 if reset_batch:
-                    sampling_module.reset_seed(formatted_params.seed)
                     sampling_module.reset_prompt_tokens(prompt_chunks[i])
                     sampling_module.reset_output_state(output_chunks[i])
 
diff --git a/models/tt_transformers/tt/model.py b/models/tt_transformers/tt/model.py
@@ -137,11 +137,6 @@ def __init__(
         sampling_splits = self.args.num_devices if list(self.mesh_device.shape) != [1, 1] else 2
         self._supports_on_device_sampling = self.args.vocab_size // sampling_splits <= 64 * 1024
         if self._supports_on_device_sampling:
-            self.sampling_prefill = SamplingGenerator(
-                args=args,
-                mesh_device=mesh_device,
-                tt_ccl=self.tt_ccl,
-            )
             self.sampling = SamplingGenerator(
                 args=args,
                 mesh_device=mesh_device,