bump this into the constructor of BuilderArgs

yanbing-j · yanbing-j · commit d22fdb6356e0 · 2025-01-16T03:57:36.000Z
diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py
@@ -179,6 +179,10 @@ def from_args(cls, args: argparse.Namespace) -> "BuilderArgs":
         pp = getattr(args, "pp", 1)
         tp = getattr(args, "tp", 1)
         chpt_from = getattr(args, "chpt_from", "hf")
+        if args.device == "cpu" and (args.attention_backend == "efficient_attention"
+                                     or args.attention_backend == "cudnn_attention"):
+            print(f"Warning: {args.attention_backend} is not supported on CPU. Using math instead.")
+            args.attention_backend = "math"
         return cls(
             checkpoint_dir=checkpoint_dir,
             checkpoint_path=checkpoint_path,
diff --git a/torchchat/generate.py b/torchchat/generate.py
@@ -1179,10 +1179,6 @@ def callback(x, *, done_generating=False):
                 prof = torch.profiler.profile()
             t0 = time.perf_counter()
             num_tokens_generated = 0
-            if self.builder_args.device == "cpu" and (self.builder_args.attention_backend == "efficient_attention"
-                                     or self.builder_args.attention_backend == "cudnn_attention"):
-                print(f"Warning: {self.builder_args.attention_backend} is not supported on CPU. Using math instead.")
-                self.builder_args.attention_backend = "math"
             with prof:
                 generator_func = self.generate(
                     self.model,