pytorch
diff --git a/‎test/prototype/mx_formats/test_mx_linear.py
Lines changed: 32 additions & 0 deletions b/‎test/prototype/mx_formats/test_mx_linear.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎torchao/__init__.py
Lines changed: 2 additions & 1 deletion b/‎torchao/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchao/core/config.py
Lines changed: 1 addition & 0 deletions b/‎torchao/core/config.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎torchao/prototype/mx_formats/__init__.py
Lines changed: 2 additions & 0 deletions b/‎torchao/prototype/mx_formats/__init__.py
Lines changed: 2 additions & 0 deletions
@@ -25,6 +25,7 @@
     MXInferenceLinear,
     MXLinear,
 )
+from torchao.prototype.mx_formats.mx_subclass import MXFPConfig
 from torchao.quantization import quantize_
 from torchao.quantization.utils import compute_error
 from torchao.utils import (
@@ -372,3 +373,34 @@ def test_inference_print_str():
     s = str(m)
     assert "bl_sz=32" in s
     assert "kernel=emulated" in s
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+@pytest.mark.skipif(
+    not TORCH_VERSION_AT_LEAST_2_8, reason="torch.compile requires PyTorch 2.8+"
+)
+@pytest.mark.skipif(not is_sm_at_least_100, reason="Reqs sm100")
+@pytest.mark.parametrize("elem_dtype", [torch.float8_e4m3fn])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("compile", [True, False])
+@torch.no_grad()
+def test_inference_subclass(elem_dtype, bias: bool, compile: bool):
+    """
+    Smoke test for inference compile
+    """
+    if elem_dtype in (torch.float8_e4m3fn, torch.float8_e5m2):
+        if not is_sm_at_least_89():
+            pytest.skip("CUDA capability >= 8.9 required for float8 in triton")
+
+    m = nn.Linear(32, 128, bias=bias, dtype=torch.bfloat16, device="cuda")
+    m_mx = copy.deepcopy(m)
+    config = MXFPConfig()
+    quantize_(m_mx, config=config)
+    if compile:
+        m_mx = torch.compile(m_mx, fullgraph=True)
+
+    x = torch.randn(128, 32, device="cuda", dtype=torch.bfloat16)
+    y_ref = m(x)
+    y_mx = m_mx(x)
+    sqnr = compute_error(y_ref, y_mx)
+    assert sqnr >= 25.0, f"Got a sqnr of {sqnr} for {elem_dtype} and bias={bias}"
@@ -43,7 +43,7 @@
     quantize_,
 )
 
-from . import dtypes, optim, swizzle, testing
+from . import dtypes, optim, quantization, swizzle, testing
 
 __all__ = [
     "dtypes",
@@ -53,4 +53,5 @@
     "swizzle",
     "testing",
     "ops",
+    "quantization",
 ]
@@ -175,6 +175,7 @@ def config_to_dict(config: AOBaseConfig) -> Dict[str, Any]:
     "torchao.quantization",
     "torchao.sparsity.sparse_api",
     "torchao.prototype.quantization",
+    "torchao.prototype.mx_formats",
 }
 
 
 
@@ -4,6 +4,7 @@
     MXLinearConfig,
     MXLinearRecipeName,
 )
+from torchao.prototype.mx_formats.mx_subclass import MXFPConfig
 
 # import mx_linear here to register the quantize_ transform logic
 # ruff: noqa: I001
@@ -14,4 +15,5 @@
     "MXInferenceLinearConfig",
     "MXLinearConfig",
     "MXLinearRecipeName",
+    "MXFPConfig",
 ]
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@`
`43`	`43`	`quantize_,`
`44`	`44`	`)`
`45`	`45`
`46`		`-from . import dtypes, optim, swizzle, testing`
	`46`	`+from . import dtypes, optim, quantization, swizzle, testing`
`47`	`47`
`48`	`48`	`__all__ = [`
`49`	`49`	`"dtypes",`
`@@ -53,4 +53,5 @@`
`53`	`53`	`"swizzle",`
`54`	`54`	`"testing",`
`55`	`55`	`"ops",`
	`56`	`+ "quantization",`
`56`	`57`	`]`
Original file line number	Diff line number	Diff line change
`@@ -175,6 +175,7 @@ def config_to_dict(config: AOBaseConfig) -> Dict[str, Any]:`
`175`	`175`	`"torchao.quantization",`
`176`	`176`	`"torchao.sparsity.sparse_api",`
`177`	`177`	`"torchao.prototype.quantization",`
	`178`	`+ "torchao.prototype.mx_formats",`
`178`	`179`	`}`
`179`	`180`
`180`	`181`
Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`MXLinearConfig,`
`5`	`5`	`MXLinearRecipeName,`
`6`	`6`	`)`
	`7`	`+from torchao.prototype.mx_formats.mx_subclass import MXFPConfig`
`7`	`8`
`8`	`9`	`# import mx_linear here to register the quantize_ transform logic`
`9`	`10`	`# ruff: noqa: I001`
`@@ -14,4 +15,5 @@`
`14`	`15`	`"MXInferenceLinearConfig",`
`15`	`16`	`"MXLinearConfig",`
`16`	`17`	`"MXLinearRecipeName",`
	`18`	`+ "MXFPConfig",`
`17`	`19`	`]`