[skip ci] Merge branch 'main' into transformers_future

regisss · regisss · commit b780d709a047 · 2025-03-18T14:32:33.000Z
diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
@@ -29,7 +29,14 @@
 
 import torch
 from transformers import BatchEncoding
-from utils import adjust_batch, count_hpu_graphs, finalize_quantization, initialize_model, save_model
+from utils import (
+    SetTrueOrFalseOrNone,
+    adjust_batch,
+    count_hpu_graphs,
+    finalize_quantization,
+    initialize_model,
+    save_model,
+)
 
 from optimum.habana.utils import get_hpu_memory_stats
 
@@ -276,7 +283,9 @@ def setup_parser(parser):
     )
     parser.add_argument(
         "--flash_attention_fast_softmax",
-        action="store_true",
+        nargs="?",
+        const=None,
+        action=SetTrueOrFalseOrNone,
         help="Whether to enable Habana Flash Attention in fast softmax mode.",
     )
     parser.add_argument(
@@ -382,8 +391,13 @@ def setup_parser(parser):
     if not args.use_hpu_graphs:
         args.limit_hpu_graphs = False
 
-    if args.use_flash_attention and not args.flash_attention_fast_softmax:
+    if args.use_flash_attention and args.flash_attention_fast_softmax is None:
+        logger.warning(
+            "`--flash_attention_fast_softmax` was not set; defaulting to True due to `--use_flash_attention` being enabled."
+        )
         args.flash_attention_fast_softmax = True
+    else:
+        args.flash_attention_fast_softmax = False
 
     args.quant_config = os.getenv("QUANT_CONFIG", "")
     if args.quant_config and args.load_quantized_model_with_autogptq:
diff --git a/examples/text-generation/utils.py b/examples/text-generation/utils.py
@@ -17,6 +17,7 @@
 # Copyright (C) 2020-2021 Habana Labs, Ltd. an Intel Company
 ###############################################################################
 
+import argparse
 import copy
 import glob
 import os
@@ -796,3 +797,46 @@ def local_split_rank_state_dict(model, gathered_state_dict):
         cur_accelerator.synchronize()
 
     return rank_state_dict
+
+
+class SetTrueOrFalseOrNone(argparse.Action):
+    """
+    Custom argparse action to handle a flag that can be set to True, False, or None.
+
+    This action allows an argument to be:
+    - Set to True if the flag is present without a value.
+    - Set to a boolean value (True or False) if explicitly provided.
+    - Set to None if the flag is not present.
+
+    The argument accepts the following values (case-insensitive):
+    - True values: 'true', '1', 't', 'y', 'yes'
+    - False values: 'false', '0', 'f', 'n', 'no'
+
+    If an invalid value is provided, an argparse.ArgumentTypeError is raised.
+    """
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        value_map = {
+            "true": True,
+            "1": True,
+            "t": True,
+            "y": True,
+            "yes": True,
+            "false": False,
+            "0": False,
+            "f": False,
+            "n": False,
+            "no": False,
+        }
+        if values is None:
+            setattr(namespace, self.dest, True)
+        elif isinstance(values, bool):
+            setattr(namespace, self.dest, values)
+        else:
+            value_lower = values.lower()
+            if value_lower in value_map:
+                setattr(namespace, self.dest, value_map[value_lower])
+            else:
+                raise argparse.ArgumentTypeError(
+                    f"Invalid value for {option_string}: {values}. Expected one of: {', '.join(value_map.keys())}."
+                )