test

lanluo-nvidia · lanluo-nvidia · commit d2b1422b7f3d · 2025-04-30T10:01:55.000-07:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,14 +21,14 @@ repos:
           - id: clang-format
             types_or: [c++, c, cuda]
     - repo: https://github.com/keith/pre-commit-buildifier
-      rev: 6.4.0
+      rev: 8.0.3
       hooks:
           - id: buildifier
             args:
                 - --warnings=all
           - id: buildifier-lint
     - repo: https://github.com/abravalheri/validate-pyproject
-      rev: v0.23
+      rev: v0.24.1
       hooks:
           - id: validate-pyproject
     - repo: https://github.com/pycqa/isort
@@ -37,17 +37,17 @@ repos:
           - id: isort
             name: isort (python)
     - repo: https://github.com/pre-commit/mirrors-mypy
-      rev: "v1.9.0"
+      rev: "v1.15.0"
       hooks:
           - id: mypy
             exclude: "^py/torch_tensorrt/fx|^examples|^tests|^py/torch_tensorrt/dynamo/_experimental|^tools|^docs|noxfile.py|setup.py|versions.py"
     - repo: https://github.com/astral-sh/ruff-pre-commit
       # Ruff version.
-      rev: v0.3.3
+      rev: v0.11.7
       hooks:
           - id: ruff
     - repo: https://github.com/psf/black
-      rev: 24.3.0
+      rev: 25.1.0
       hooks:
           - id: black
             exclude: ^examples/custom_converters/elu_converter/setup.py|^docs
@@ -57,7 +57,7 @@ repos:
           - id: typos
     - repo: https://github.com/astral-sh/uv-pre-commit
       # uv version.
-      rev: 0.5.5
+      rev: 0.7.1
       hooks:
           # Update the uv lockfile
           - id: uv-lock
diff --git a/py/torch_tensorrt/_enums.py b/py/torch_tensorrt/_enums.py
@@ -76,10 +76,10 @@ class dtype(Enum):
 
     f8 = auto()
     """8 bit floating-point number, equivalent to ``dtype.fp8`` and ``dtype.float8``
-    
+
     :meta hide-value:
     """
-    
+
     f4 = auto()
     """4 bit floating-point number, equivalent to ``dtype.fp4`` and ``dtype.float4``
 
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -29,7 +29,14 @@
 REQUIRE_FULL_COMPILATION = False
 DRYRUN = False
 HARDWARE_COMPATIBLE = False
-SUPPORTED_KERNEL_PRECISIONS = {dtype.f32, dtype.f16, dtype.bf16, dtype.i8, dtype.f8}
+SUPPORTED_KERNEL_PRECISIONS = {
+    dtype.f32,
+    dtype.f16,
+    dtype.bf16,
+    dtype.i8,
+    dtype.f8,
+    dtype.f4,
+}
 TIMING_CACHE_PATH = os.path.join(
     tempfile.gettempdir(), "torch_tensorrt_engine_cache", "timing_cache.bin"
 )
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py
@@ -68,6 +68,7 @@ def quantize(
 
         return dq_output
 
+
 def dynamic_block_quantize(
     ctx: ConversionContext,
     target: Target,
@@ -99,23 +100,29 @@ def dynamic_block_quantize(
             raise ValueError(
                 f"dynamic_block_quantize converter received an input of {input_tensor.shape} shape. Supported shapes: 2D or 3D"
             )
-        print(f"input_tensor.shape: {input_tensor.shape} {block_size=} {amax=} {num_bits=} {exponent_bits=} {scale_num_bits=} {scale_exponent_bits=}")
         max_bound = 6
         amax = to_torch(amax, None)
         scale = torch.divide(amax, max_bound)
         scale = get_trt_tensor(ctx, scale, name + "_scale")
 
-        output_type=trt.DataType.FP4
         # Add Q node
-        dynamic_quantize_layer = ctx.net.add_dynamic_quantize(input_tensor, axis=-1, block_size=16, output_type=output_type)
-        quantize_layer.set_output_type(0, output_type)
+        dynamic_quantize_layer = ctx.net.add_dynamic_quantize(
+            input_tensor,
+            axis=-1,
+            block_size=16,
+            output_type=trt.DataType.FP4,
+            scale_type=trt.DataType.FP8,
+        )
+        dynamic_quantize_layer.set_output_type(0, trt.DataType.FP4)
 
-        set_layer_name(quantize_layer, target, name + "_quantize", source_ir)
-        q_output = quantize_layer.get_output(0)
+        set_layer_name(
+            dynamic_quantize_layer, target, name + "_dynamic_quantize", source_ir
+        )
+        q_output = dynamic_quantize_layer.get_output(0)
         # Add DQ node
         dequantize_layer = ctx.net.add_dequantize(q_output, scale)
         set_layer_name(dequantize_layer, target, name + "_dequantize", source_ir)
-        dequantize_layer.precision = output_type
+        dequantize_layer.precision = trt.DataType.FP4
         dq_output = dequantize_layer.get_output(0)
 
         return dq_output
diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py
@@ -199,10 +199,9 @@ def test_resnet18_half(ir):
     torch._dynamo.reset()
 
 
-
 @unittest.skipIf(
-    torch.cuda.get_device_capability() < (8, 9),
-    "FP4 quantization requires compute capability 8.9 or later",
+    torch.cuda.get_device_capability() < (10, 0),
+    "FP4 quantization requires compute capability 10.0 or later",
 )
 @unittest.skipIf(
     not importlib.util.find_spec("modelopt"),
@@ -216,8 +215,8 @@ def test_base_fp4(ir):
     class SimpleNetwork(torch.nn.Module):
         def __init__(self):
             super(SimpleNetwork, self).__init__()
-            self.linear1 = torch.nn.Linear(in_features=10, out_features=5)
-            self.linear2 = torch.nn.Linear(in_features=5, out_features=1)
+            self.linear1 = torch.nn.Linear(in_features=32, out_features=16)
+            self.linear2 = torch.nn.Linear(in_features=16, out_features=1)
 
         def forward(self, x):
             x = self.linear1(x)
@@ -229,12 +228,12 @@ def calibrate_loop(model):
         """Simple calibration function for testing."""
         model(input_tensor)
 
-    input_tensor = torch.randn(1, 10).cuda()
+    input_tensor = torch.randn(1, 32).cuda()
     model = SimpleNetwork().eval().cuda()
 
     quant_cfg = mtq.NVFP4_DEFAULT_CFG
     mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop)
-    # model has FP8 qdq nodes at this point
+    # model has FP4 qdq nodes at this point
     output_pyt = model(input_tensor)
 
     with torch.no_grad():