From db24498c30ad48304f418d608b800f52e914c8dc Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Tue, 11 Mar 2025 21:42:31 -0700
Subject: [PATCH 01/15] add is_valid check for extract usage

---
 eureka_ml_insights/data_utils/transform.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index f1bf574e..034702a1 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -464,9 +464,21 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         # if the model is one for which the usage of completion tokens is known, use that corresponding column for the model
         # otherwise, use the default "n_output_tokens" which is computed with a universal tokenizer as shown in TokenCounterTransform()
         if usage_completion_read_col:
-            df[self.usage_completion_output_col] = df[self.prepend_completion_read_col + "usage"].apply(lambda x: x[usage_completion_read_col])
+            df[self.usage_completion_output_col] = df.apply(lambda x: self._extract_usage(x, usage_completion_read_col), axis=1)
         elif self.prepend_completion_read_col + "n_output_tokens" in df.columns:
             df[self.usage_completion_output_col] = df[self.prepend_completion_read_col + "n_output_tokens"]
         else:
             df[self.usage_completion_output_col] = np.nan
         return df 
+    
+    def _extract_usage(self, row, usage_completion_read_col):
+        """
+        Extracts the token usage for a given row is is_valid is True. 
+        Args:
+            row (pd.Series): A row of the dataframe.
+        Returns:
+            int: The token usage for the row.
+        """
+        if row[self.prepend_completion_read_col + "is_valid"]:
+            return row[self.prepend_completion_read_col + "usage"][usage_completion_read_col]
+        return np.nan

From 7a31781dcefe049be2af2d4cc5b0cf3bd60d3f06 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Wed, 12 Mar 2025 11:17:24 -0700
Subject: [PATCH 02/15] comment fix

---
 eureka_ml_insights/data_utils/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 034702a1..455bf926 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -473,7 +473,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
     
     def _extract_usage(self, row, usage_completion_read_col):
         """
-        Extracts the token usage for a given row is is_valid is True. 
+        Extracts the token usage for a given row if is_valid is True. 
         Args:
             row (pd.Series): A row of the dataframe.
         Returns:

From 196c3857f9f9852cfeef83ea60c286cbc46cbed9 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Wed, 12 Mar 2025 12:10:04 -0700
Subject: [PATCH 03/15] changed check

---
 eureka_ml_insights/data_utils/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 455bf926..869cd835 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -479,6 +479,6 @@ def _extract_usage(self, row, usage_completion_read_col):
         Returns:
             int: The token usage for the row.
         """
-        if row[self.prepend_completion_read_col + "is_valid"]:
+        if not pd.isna(row['usage']) and usage_completion_read_col in row['usage']:
             return row[self.prepend_completion_read_col + "usage"][usage_completion_read_col]
         return np.nan

From 7bf52aab9fa096b4a31ba18ed52596093521f6e6 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Wed, 12 Mar 2025 14:53:02 -0700
Subject: [PATCH 04/15] add column validation

---
 eureka_ml_insights/data_utils/transform.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 869cd835..cca38009 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -463,6 +463,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
             usage_completion_read_col = "completion_tokens"
         # if the model is one for which the usage of completion tokens is known, use that corresponding column for the model
         # otherwise, use the default "n_output_tokens" which is computed with a universal tokenizer as shown in TokenCounterTransform()
+        self.validate(df)
         if usage_completion_read_col:
             df[self.usage_completion_output_col] = df.apply(lambda x: self._extract_usage(x, usage_completion_read_col), axis=1)
         elif self.prepend_completion_read_col + "n_output_tokens" in df.columns:
@@ -471,6 +472,13 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
             df[self.usage_completion_output_col] = np.nan
         return df 
     
+    def validate(self, df: pd.DataFrame, usage_completion_read_col: str) -> pd.DataFrame:
+        """Check that all columns to be transformed are present actually in the data frame."""
+        if usage_completion_read_col and self.prepend_completion_read_col+'usage' not in df.columns:
+            raise ValueError(f"The {self.prepend_completion_read_col + 'usage'} column is not present in the data frame.")
+        if self.prepend_completion_read_col + "n_output_tokens" not in df.columns:
+            raise ValueError(f"The {self.prepend_completion_read_col + 'n_output_tokens'} column is not present in the data frame.")
+
     def _extract_usage(self, row, usage_completion_read_col):
         """
         Extracts the token usage for a given row if is_valid is True. 
@@ -479,6 +487,6 @@ def _extract_usage(self, row, usage_completion_read_col):
         Returns:
             int: The token usage for the row.
         """
-        if not pd.isna(row['usage']) and usage_completion_read_col in row['usage']:
+        if not pd.isna(row[self.prepend_completion_read_col + 'usage']) and usage_completion_read_col in row[self.prepend_completion_read_col + 'usage']:
             return row[self.prepend_completion_read_col + "usage"][usage_completion_read_col]
         return np.nan

From 4914d5772165e19700124350e0a9b6c9698bef6e Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Wed, 12 Mar 2025 14:53:35 -0700
Subject: [PATCH 05/15] add column validation

---
 eureka_ml_insights/data_utils/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index cca38009..595e1335 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -476,7 +476,7 @@ def validate(self, df: pd.DataFrame, usage_completion_read_col: str) -> pd.DataF
         """Check that all columns to be transformed are present actually in the data frame."""
         if usage_completion_read_col and self.prepend_completion_read_col+'usage' not in df.columns:
             raise ValueError(f"The {self.prepend_completion_read_col + 'usage'} column is not present in the data frame.")
-        if self.prepend_completion_read_col + "n_output_tokens" not in df.columns:
+        elif self.prepend_completion_read_col + "n_output_tokens" not in df.columns:
             raise ValueError(f"The {self.prepend_completion_read_col + 'n_output_tokens'} column is not present in the data frame.")
 
     def _extract_usage(self, row, usage_completion_read_col):

From 2aa917a876d705ad091f33c5e4b2f315bb9a9b7e Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Wed, 12 Mar 2025 14:54:52 -0700
Subject: [PATCH 06/15] add column validation

---
 eureka_ml_insights/data_utils/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 595e1335..2ba80ffb 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -481,7 +481,7 @@ def validate(self, df: pd.DataFrame, usage_completion_read_col: str) -> pd.DataF
 
     def _extract_usage(self, row, usage_completion_read_col):
         """
-        Extracts the token usage for a given row if is_valid is True. 
+        Extracts the token usage for a given row if usage column and corresponding completion column exists. 
         Args:
             row (pd.Series): A row of the dataframe.
         Returns:

From 697d21c87571e8a07efca08a78453e50e6daace6 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Wed, 12 Mar 2025 15:29:16 -0700
Subject: [PATCH 07/15] rename variables

---
 eureka_ml_insights/data_utils/transform.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 2ba80ffb..57c9f46f 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -436,7 +436,8 @@ class ExtractUsageTransform:
     """
     model_config: ModelConfig
     usage_completion_output_col: str = "usage_completion" 
-    prepend_completion_read_col: str = "" 
+    usage_column: str = "usage"
+    n_tokens_column: str = "n_output_tokens" 
 
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         """
@@ -466,18 +467,18 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         self.validate(df)
         if usage_completion_read_col:
             df[self.usage_completion_output_col] = df.apply(lambda x: self._extract_usage(x, usage_completion_read_col), axis=1)
-        elif self.prepend_completion_read_col + "n_output_tokens" in df.columns:
-            df[self.usage_completion_output_col] = df[self.prepend_completion_read_col + "n_output_tokens"]
+        elif self.n_tokens_column in df.columns:
+            df[self.usage_completion_output_col] = df[self.self.n_tokens_column]
         else:
             df[self.usage_completion_output_col] = np.nan
         return df 
     
     def validate(self, df: pd.DataFrame, usage_completion_read_col: str) -> pd.DataFrame:
         """Check that all columns to be transformed are present actually in the data frame."""
-        if usage_completion_read_col and self.prepend_completion_read_col+'usage' not in df.columns:
-            raise ValueError(f"The {self.prepend_completion_read_col + 'usage'} column is not present in the data frame.")
-        elif self.prepend_completion_read_col + "n_output_tokens" not in df.columns:
-            raise ValueError(f"The {self.prepend_completion_read_col + 'n_output_tokens'} column is not present in the data frame.")
+        if usage_completion_read_col and self.usage_column not in df.columns:
+            raise ValueError(f"The {self.usage_column} column is not present in the data frame.")
+        elif self.n_tokens_column not in df.columns:
+            raise ValueError(f"The {self.n_tokens_column + 'n_output_tokens'} column is not present in the data frame.")
 
     def _extract_usage(self, row, usage_completion_read_col):
         """

From f8f3e68870ee2bd5b9d0318ebfd8859563e710c5 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Wed, 12 Mar 2025 15:31:39 -0700
Subject: [PATCH 08/15] rename variables

---
 eureka_ml_insights/data_utils/transform.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 57c9f46f..b334e691 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -478,7 +478,7 @@ def validate(self, df: pd.DataFrame, usage_completion_read_col: str) -> pd.DataF
         if usage_completion_read_col and self.usage_column not in df.columns:
             raise ValueError(f"The {self.usage_column} column is not present in the data frame.")
         elif self.n_tokens_column not in df.columns:
-            raise ValueError(f"The {self.n_tokens_column + 'n_output_tokens'} column is not present in the data frame.")
+            raise ValueError(f"The {self.n_tokens_column} column is not present in the data frame.")
 
     def _extract_usage(self, row, usage_completion_read_col):
         """
@@ -488,6 +488,6 @@ def _extract_usage(self, row, usage_completion_read_col):
         Returns:
             int: The token usage for the row.
         """
-        if not pd.isna(row[self.prepend_completion_read_col + 'usage']) and usage_completion_read_col in row[self.prepend_completion_read_col + 'usage']:
-            return row[self.prepend_completion_read_col + "usage"][usage_completion_read_col]
+        if not pd.isna(row[self.usage_column]) and usage_completion_read_col in row[self.usage_column]:
+            return row[self.usage_column][usage_completion_read_col]
         return np.nan

From aaa501ce8210479d30571f76d152b8a6b42806f7 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Thu, 13 Mar 2025 14:02:15 -0700
Subject: [PATCH 09/15] update comments

---
 eureka_ml_insights/data_utils/transform.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index b334e691..817b9eec 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -431,8 +431,9 @@ class ExtractUsageTransform:
     Extracts token usage completion numbers (except prompt input tokens) for all models.
     args:
         model_config: config used for the experiment.
-        usage_completion_output_col: str, default name of the column where completion numbers will be stored for all models
-        prepend_completion_read_col: str, prepend string to add to the name of the usage column from which to read. Useful for cases when the usage column might have been renamed earlier in the pipeline.
+        usage_completion_output_col: str, default name of the column where completion numbers will be stored for model
+        usage_column: str, default name of the column where usage information is stored for model
+        n_tokens_column: str, default name of the column where number of tokens is stored for model
     """
     model_config: ModelConfig
     usage_completion_output_col: str = "usage_completion" 

From fd2064962836b3de4a16c302a23acabd435e5d40 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Thu, 13 Mar 2025 23:34:32 -0700
Subject: [PATCH 10/15] update comment

---
 eureka_ml_insights/data_utils/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 817b9eec..25c6bf5e 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -475,7 +475,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         return df 
     
     def validate(self, df: pd.DataFrame, usage_completion_read_col: str) -> pd.DataFrame:
-        """Check that all columns to be transformed are present actually in the data frame."""
+        """Check that usage_columns or n_tokens_columns are present actually in the data frame."""
         if usage_completion_read_col and self.usage_column not in df.columns:
             raise ValueError(f"The {self.usage_column} column is not present in the data frame.")
         elif self.n_tokens_column not in df.columns:

From b66d95e71171f958615a84712d5ef77aad94714b Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Thu, 13 Mar 2025 23:39:39 -0700
Subject: [PATCH 11/15] update comment

---
 eureka_ml_insights/data_utils/transform.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 25c6bf5e..e11efd4f 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -448,7 +448,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
             df (pd.DataFrame): Input dataframe of inference results retrieved with the model_config.
 
         Returns:
-            pd.DataFrame: Transformed dataframe with completion token numbers in completion_usage_col.
+            pd.DataFrame: Transformed dataframe with completion token numbers in usage_completion_output_col.
         """
         usage_completion_read_col = None
         if (self.model_config.class_name is GeminiModel):
@@ -475,7 +475,11 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         return df 
     
     def validate(self, df: pd.DataFrame, usage_completion_read_col: str) -> pd.DataFrame:
-        """Check that usage_columns or n_tokens_columns are present actually in the data frame."""
+        """Check that usage_columns or n_tokens_columns are present actually in the data frame.
+        Args:
+            df (pd.DataFrame): Input dataframe containing model_output_col and id_col.
+            usage_completion_read_col (str): The column name for token extraction.
+        """
         if usage_completion_read_col and self.usage_column not in df.columns:
             raise ValueError(f"The {self.usage_column} column is not present in the data frame.")
         elif self.n_tokens_column not in df.columns:
@@ -486,6 +490,7 @@ def _extract_usage(self, row, usage_completion_read_col):
         Extracts the token usage for a given row if usage column and corresponding completion column exists. 
         Args:
             row (pd.Series): A row of the dataframe.
+            usage_completion_read_col (str): The column name to extract the token usage from.
         Returns:
             int: The token usage for the row.
         """

From eb1d7c28ad3446a7d3edf0511f0fe11a600e61db Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Fri, 14 Mar 2025 10:48:11 -0700
Subject: [PATCH 12/15] updating func

---
 eureka_ml_insights/data_utils/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index e11efd4f..84fe8ff2 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -465,7 +465,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
             usage_completion_read_col = "completion_tokens"
         # if the model is one for which the usage of completion tokens is known, use that corresponding column for the model
         # otherwise, use the default "n_output_tokens" which is computed with a universal tokenizer as shown in TokenCounterTransform()
-        self.validate(df)
+        self.validate(df, usage_completion_read_col)
         if usage_completion_read_col:
             df[self.usage_completion_output_col] = df.apply(lambda x: self._extract_usage(x, usage_completion_read_col), axis=1)
         elif self.n_tokens_column in df.columns:

From 3fa1cb445adf06482596ddc00a338e1466f07e09 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Fri, 14 Mar 2025 11:09:39 -0700
Subject: [PATCH 13/15] updated tests

---
 eureka_ml_insights/data_utils/transform.py | 2 +-
 tests/test_utils.py                        | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index 7a1822f8..6597b355 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -475,7 +475,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         if usage_completion_read_col:
             df[self.usage_completion_output_col] = df.apply(lambda x: self._extract_usage(x, usage_completion_read_col), axis=1)
         elif self.n_tokens_column in df.columns:
-            df[self.usage_completion_output_col] = df[self.self.n_tokens_column]
+            df[self.usage_completion_output_col] = df[self.n_tokens_column]
         else:
             df[self.usage_completion_output_col] = np.nan
         return df 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index dc8ae164..e9af285e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -111,7 +111,9 @@ def __init__(self, model_name="generic_test_model"):
         self.name = model_name
 
     def generate(self, text_prompt, *args, **kwargs):
-        return {"model_output": "Generic model output", "is_valid": random.choice([True, False])}
+        return {"model_output": "Generic model output", 
+                "is_valid": random.choice([True, False]),
+                "n_output_tokens": 3}
 
 
 class DNAEvaluationInferenceTestModel:

From 13058797ab9b53eeef972a84a8aee22995dbcd3f Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Fri, 14 Mar 2025 13:57:02 -0700
Subject: [PATCH 14/15] updated test model

---
 tests/pipeline_tests.py | 1 -
 tests/test_utils.py     | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/pipeline_tests.py b/tests/pipeline_tests.py
index 547d125d..be068e8c 100644
--- a/tests/pipeline_tests.py
+++ b/tests/pipeline_tests.py
@@ -540,7 +540,6 @@ def get_config(self):
         return TEST_KITAB_ONE_BOOK_CONSTRAINT_PIPELINE().pipeline_config
 
 
-@unittest.skipIf("skip_tests_with_missing_ds" in os.environ, "Missing public dataset. TODO: revert")
 class GPQA_PipelineTest(PipelineTest, unittest.TestCase):
     def get_config(self):
         return TEST_GPQA_PIPELINE().pipeline_config
diff --git a/tests/test_utils.py b/tests/test_utils.py
index e9af285e..824bbabb 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -51,7 +51,8 @@ def __init__(self):
 
     def generate(self, text_prompt, query_images=None):
         return {"model_output": random.choice(["Final Answer: A", "Final Answer: B", "Final Answer: C", "Final Answer: D"]), 
-                "is_valid": random.choice([True, False])}
+                "is_valid": random.choice([True, False]),
+                "n_output_tokens": 3}
 
     def name(self):
         return self.name

From 9306d6a6a9e53ed44b74e350b08a3c8fad8fa1c9 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Fri, 14 Mar 2025 14:11:27 -0700
Subject: [PATCH 15/15] skipped gpqa

---
 tests/pipeline_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/pipeline_tests.py b/tests/pipeline_tests.py
index be068e8c..547d125d 100644
--- a/tests/pipeline_tests.py
+++ b/tests/pipeline_tests.py
@@ -540,6 +540,7 @@ def get_config(self):
         return TEST_KITAB_ONE_BOOK_CONSTRAINT_PIPELINE().pipeline_config
 
 
+@unittest.skipIf("skip_tests_with_missing_ds" in os.environ, "Missing public dataset. TODO: revert")
 class GPQA_PipelineTest(PipelineTest, unittest.TestCase):
     def get_config(self):
         return TEST_GPQA_PIPELINE().pipeline_config