zufangzhu
diff --git a/‎csrc/dispatch_utils.h‎
Lines changed: 9 additions & 0 deletions b/‎csrc/dispatch_utils.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎csrc/ops.h‎
Lines changed: 4 additions & 1 deletion b/‎csrc/ops.h‎
Lines changed: 4 additions & 1 deletion
@@ -78,3 +78,12 @@
 #define VLLM_DISPATCH_INTEGRAL_AND_UNSIGNED_TYPES(TYPE, NAME, ...) \
   AT_DISPATCH_SWITCH(                                              \
       TYPE, NAME, VLLM_DISPATCH_CASE_INTEGRAL_AND_UNSIGNED_TYPES(__VA_ARGS__))
+
+#define VLLM_DISPATCH_BOOL(expr, const_expr, ...) \
+  if (expr) {                                     \
+    constexpr bool const_expr = true;             \
+    __VA_ARGS__();                                \
+  } else {                                        \
+    constexpr bool const_expr = false;            \
+    __VA_ARGS__();                                \
+  }
@@ -75,7 +75,10 @@ void gather_cache(
     std::optional<torch::Tensor> seq_starts = std::nullopt);
 
 void static_scaled_fp8_quant(
-    torch::Tensor& out, torch::Tensor const& input, torch::Tensor const& scale);
+    torch::Tensor& out,
+    torch::Tensor const& input,
+    torch::Tensor const& scale,
+    std::optional<std::tuple<int64_t, int64_t>> group_shape = std::nullopt);
 
 void dynamic_scaled_fp8_quant(
     torch::Tensor& out, torch::Tensor const& input, torch::Tensor& scale);