Fix errors in train.py & modelling.py; add ability to track multiple metrics at once (#24)

ShreyBiswas · web-flow · commit 3966113090ec · 2024-10-06T11:34:57.000+03:00
* Use updated evaluate.load for metrics, add ability to compute multiple metrics at once

Signed-off-by: ShreyBiswas &lt;shrey.biswas@gmail.com&gt;

* Fix ignore_keys_for_eval throwing error due to being a set, not a list

Signed-off-by: ShreyBiswas &lt;shrey.biswas@gmail.com&gt;

* Add **kwargs to the FastFit.forward() call

Forward may get called with different arguments depending on the model; extra arguments like token_type_ids could cause it to crash. This is a simpler way to ignore those.

Signed-off-by: ShreyBiswas &lt;shrey.biswas@gmail.com&gt;

* add trust_remote_code so more models can be used

Signed-off-by: ShreyBiswas &lt;shrey.biswas@gmail.com&gt;

* adding evaluate to requirements

Signed-off-by: Shrey Biswas &lt;shrey.biswas@gmail.com&gt;

---------

Signed-off-by: ShreyBiswas &lt;shrey.biswas@gmail.com&gt;
Signed-off-by: Shrey Biswas &lt;shrey.biswas@gmail.com&gt;
diff --git a/fastfit/modeling.py b/fastfit/modeling.py
@@ -835,7 +835,7 @@ def mask_tokens(self, inputs, special_tokens_mask=None):
 
 
 class FastFit(FastFitTrainable):
-    def forward(self, input_ids, attention_mask, labels=None):
+    def forward(self, input_ids, attention_mask, labels=None, **kwargs):
         return SequenceClassifierOutput(
             logits=self.inference_forward(input_ids, attention_mask),
         )
diff --git a/fastfit/train.py b/fastfit/train.py
@@ -14,7 +14,8 @@
 import torch
 import datasets
 import numpy as np
-from datasets import load_dataset, load_metric
+from datasets import load_dataset
+from evaluate import load
 
 import transformers
 from transformers import (
@@ -622,6 +623,7 @@ def set_model(self):
         else:
             config = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path=self.model_args.model_name_or_path,
+                trust_remote_code=True,
             )
             config = FastFitConfig.from_encoder_config(
                 config,
@@ -872,7 +874,21 @@ def preprocess_function(examples):
                 )
 
     def set_trainer(self):
-        metric = load_metric(self.data_args.metric_name, experiment_id=uuid.uuid4())
+
+        if type(self.data_args.metric_name) == str: # single metric name
+            metrics = [load(self.data_args.metric_name, experiment_id=uuid.uuid4())]
+        elif type(self.data_args.metric_name) == list: # compute multiple metrics
+            metrics = []
+            for metric in self.data_args.metric_name:
+                try:
+                    metrics.append(
+                    load(metric, experiment_id=uuid.uuid4())
+                    )
+                except:
+                    logger.error(f"Metric {metric} not found. Skipping...")
+                    continue
+
+
 
         # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
         # predictions and label_ids field) and has to return a dictionary string to float.
@@ -886,7 +902,17 @@ def compute_metrics(p: EvalPrediction):
                 else np.argmax(predictions, axis=1)
             )
             references = p.label_ids
-            return metric.compute(predictions=predictions, references=references)
+
+            results = {}
+
+            for metric in metrics:
+                if metric.name != 'accuracy':
+                    results.update(metric.compute(predictions=predictions, references=references,average='macro'))
+                else:
+                    results.update(metric.compute(predictions=predictions, references=references))
+
+            return results
+
 
         # Data collator will default to DataCollatorWithPadding when the tokenizer is passed to Trainer, so we change it if
         # we already did the padding.
@@ -966,7 +992,7 @@ def train(self):
         if self.training_args.do_train:
             train_result = self.trainer.train(
                 resume_from_checkpoint=self.checkpoint,
-                ignore_keys_for_eval={"doc_input_ids", "doc_attention_mask", "labels"},
+                ignore_keys_for_eval=list({"doc_input_ids", "doc_attention_mask", "labels"}),
             )
             metrics = train_result.metrics
             max_train_samples = (
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 torch
 transformers[torch]
 scikit-learn
-datasets
+datasets
+evaluate

Original file line number	Diff line number	Diff line change
`@@ -835,7 +835,7 @@ def mask_tokens(self, inputs, special_tokens_mask=None):`
`835`	`835`
`836`	`836`
`837`	`837`	`class FastFit(FastFitTrainable):`
`838`		`- def forward(self, input_ids, attention_mask, labels=None):`
	`838`	`+ def forward(self, input_ids, attention_mask, labels=None, **kwargs):`
`839`	`839`	`return SequenceClassifierOutput(`
`840`	`840`	`logits=self.inference_forward(input_ids, attention_mask),`
`841`	`841`	`)`