change to text string match

JiahangXu · JiahangXu · commit fa722978dcd4 · 2025-10-20T07:51:13.000Z
diff --git a/agentlightning/verl/daemon.py b/agentlightning/verl/daemon.py
@@ -8,6 +8,7 @@
 import time
 import uuid
 from collections.abc import Mapping
+from collections import defaultdict
 from typing import Any, Dict, List, Literal, Optional, Tuple
 
 import numpy as np
@@ -25,7 +26,12 @@
 
 configure_logger()
 
-
+from transformers import AutoTokenizer
+model_dir = '/mnt/teamdrive/RAG_RL/models/meta-llama/Llama-3.2-3B'
+tok = AutoTokenizer.from_pretrained(str(model_dir), local_files_only=True, use_fast=True)
+def _decode(ids, skip_special_tokens=True):
+    return tok.decode(ids, skip_special_tokens=skip_special_tokens, clean_up_tokenization_spaces=False)
+        
 def get_left_padded_ids_and_attention_mask(
     ids: List[int], max_length: int, pad_token_id: int
 ) -> Tuple[List[int], List[int]]:
@@ -555,13 +561,28 @@ def get_test_metrics(self):
         assert len(self._completed_rollouts_v0) == self._total_tasks_queued
 
         sample_stat_list: List[Dict[str, Any]] = []
-        for _, rollout in self._completed_rollouts_v0.items():
+        sample_stat_list_by_source: Dict[str, List[Dict[str, Any]]] = defaultdict(
+            list
+        )  # FIXME: Evaluate whether grouping stats by source is actually needed.
+
+        for rollout_id, rollout in self._completed_rollouts_v0.items():
             final_reward = self._fillna_reward(rollout)
             if not rollout.triplets:
                 print(f"Warning: No triplets found for test rollout {rollout.rollout_id}.")
                 sample_stat_list.append({"reward": final_reward})
                 continue
             response_length_list = [len(triplet.response.get("token_ids", [])) for triplet in rollout.triplets]
+            if "data_source" in self._task_id_to_original_sample[rollout_id]:
+                # When a test sample includes a 'data_source' field, record per-source statistics for test results.
+                data_source = self._task_id_to_original_sample[rollout_id]["data_source"]
+                sample_stat_list_by_source[data_source].append(
+                    {
+                        "sum_response_length": np.sum(response_length_list),
+                        "mean_response_length": np.mean(response_length_list) if response_length_list else 0,
+                        "turn_count": len(rollout.triplets),
+                        "reward": final_reward,
+                    }
+                )
             sample_stat_list.append(
                 {
                     "sum_response_length": np.sum(response_length_list),
@@ -570,18 +591,45 @@ def get_test_metrics(self):
                     "reward": final_reward,
                 }
             )
+        metric_dict: Dict[str, Any] = {}
 
         stats_w_trace = [stat for stat in sample_stat_list if "sum_response_length" in stat]
-        return {
-            "val/n_rollouts": len(sample_stat_list),
-            "val/n_rollouts_w_trace": len(stats_w_trace),
-            "val/reward": np.mean(
-                [stat["reward"] for stat in sample_stat_list]
-            ),  # each rollout must have a reward (fillna if missing)
-            "val/mean_response_length": np.mean([stat["mean_response_length"] for stat in stats_w_trace]),
-            "val/sum_response_length": np.mean([stat["sum_response_length"] for stat in stats_w_trace]),
-            "val/turn_count": np.mean([stat["turn_count"] for stat in stats_w_trace]),
+        stats_w_trace_by_source = {
+            data_source: [stat for stat in sample_stats if "sum_response_length" in stat]
+            for data_source, sample_stats in sample_stat_list_by_source.items()
         }
+        for data_source, sample_stats in sample_stat_list_by_source.items():
+            metric_dict.update(
+                {
+                    f"val/{data_source}/n_rollouts": len(sample_stats),
+                    f"val/{data_source}/n_rollouts_w_trace": len(stats_w_trace_by_source[data_source]),
+                    f"val/{data_source}/reward": np.mean(
+                        [stat["reward"] for stat in sample_stats]
+                    ),  # each rollout must have a reward (fillna if missing)
+                    f"val/{data_source}/mean_response_length": np.mean(
+                        [stat["mean_response_length"] for stat in stats_w_trace_by_source[data_source]]
+                    ),
+                    f"val/{data_source}/sum_response_length": np.mean(
+                        [stat["sum_response_length"] for stat in stats_w_trace_by_source[data_source]]
+                    ),
+                    f"val/{data_source}/turn_count": np.mean(
+                        [stat["turn_count"] for stat in stats_w_trace_by_source[data_source]]
+                    ),
+                }
+            )
+        metric_dict.update(
+            {
+                "val/n_rollouts": len(sample_stat_list),
+                "val/n_rollouts_w_trace": len(stats_w_trace),
+                "val/reward": np.mean(
+                    [stat["reward"] for stat in sample_stat_list]
+                ),  # each rollout must have a reward (fillna if missing)
+                "val/mean_response_length": np.mean([stat["mean_response_length"] for stat in stats_w_trace]),
+                "val/sum_response_length": np.mean([stat["sum_response_length"] for stat in stats_w_trace]),
+                "val/turn_count": np.mean([stat["turn_count"] for stat in stats_w_trace]),
+            }
+        )
+        return metric_dict
 
     def get_train_data_batch(self, max_prompt_length: int, max_response_length: int, device: torch.device):
         """
@@ -684,19 +732,32 @@ def get_train_data_batch(self, max_prompt_length: int, max_response_length: int,
             for rollout_id, sample_info in finished_id_to_sample_info.items():
                 merged_trace_idx: List[List[int]] = []
                 current_merged_trace_idx: List[int] = []
-                current_context: List[int] = []
+                current_context: str = ""
                 for turn_index, trace in enumerate(sample_info["trace_list"]):
-                    if (trace["prompt_ids"] + trace["response_ids"])[: len(current_context)] == current_context:
-                        current_context = trace["prompt_ids"] + trace["response_ids"]
+                    # print('~' * 20)
+                    # print((trace["prompt_ids"] + trace["response_ids"]))
+                    # print(current_context)
+                    # print(f'|START|{_decode((trace["prompt_ids"] + trace["response_ids"]))}|END|')
+                    # print(f'|START|{_decode(current_context)}|END|')
+                    
+                    temp_combined = _decode(trace["prompt_ids"] + trace["response_ids"])
+                    if temp_combined[: len(current_context)] == current_context:
+                    # if (trace["prompt_ids"] + trace["response_ids"])[: len(current_context)] == current_context:
+                        current_context = temp_combined
                         current_merged_trace_idx.append(turn_index)
                     else:
                         # assert len(current_merged_trace_idx) > 0
                         merged_trace_idx.append(current_merged_trace_idx)
                         current_merged_trace_idx = [turn_index]
-                        current_context = trace["prompt_ids"] + trace["response_ids"]
+                        current_context = temp_combined
                 if current_merged_trace_idx not in merged_trace_idx:
                     merged_trace_idx.append(current_merged_trace_idx)
 
+                print('-' * 20)
+                print(merged_trace_idx)
+                # assert len(merged_trace_idx) == 1
+                # assert sum(len(x) for x in merged_trace_idx) == len(sample_info["trace_list"])
+                
                 for current_merged_trace_idx in merged_trace_idx:
                     prompt_ids = sample_info["trace_list"][current_merged_trace_idx[0]]["prompt_ids"]
                     response_ids = sample_info["trace_list"][current_merged_trace_idx[0]]["response_ids"]