pytorch · drisspg · Apr 25, 2025 · Apr 26, 2025
diff --git a/test/prototype/mx_formats/test_mx_linear.py b/test/prototype/mx_formats/test_mx_linear.py
@@ -25,6 +25,7 @@
     MXInferenceLinear,
     MXLinear,
 )
+from torchao.prototype.mx_formats.mx_subclass import MXFPConfig
 from torchao.quantization import quantize_
 from torchao.quantization.utils import compute_error
 from torchao.utils import (
@@ -372,3 +373,34 @@ def test_inference_print_str():
     s = str(m)
     assert "bl_sz=32" in s
     assert "kernel=emulated" in s
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+@pytest.mark.skipif(
+    not TORCH_VERSION_AT_LEAST_2_8, reason="torch.compile requires PyTorch 2.8+"
+)
+@pytest.mark.skipif(not is_sm_at_least_100, reason="Reqs sm100")
+@pytest.mark.parametrize("elem_dtype", [torch.float8_e4m3fn])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("compile", [True, False])
+@torch.no_grad()
+def test_inference_subclass(elem_dtype, bias: bool, compile: bool):
+    """
+    Smoke test for inference compile
+    """
+    if elem_dtype in (torch.float8_e4m3fn, torch.float8_e5m2):
+        if not is_sm_at_least_89():
+            pytest.skip("CUDA capability >= 8.9 required for float8 in triton")
+
+    m = nn.Linear(32, 128, bias=bias, dtype=torch.bfloat16, device="cuda")
+    m_mx = copy.deepcopy(m)
+    config = MXFPConfig()
+    quantize_(m_mx, config=config)
+    if compile:
+        m_mx = torch.compile(m_mx, fullgraph=True)
+
+    x = torch.randn(128, 32, device="cuda", dtype=torch.bfloat16)
+    y_ref = m(x)
+    y_mx = m_mx(x)
+    sqnr = compute_error(y_ref, y_mx)
+    assert sqnr >= 25.0, f"Got a sqnr of {sqnr} for {elem_dtype} and bias={bias}"
diff --git a/torchao/__init__.py b/torchao/__init__.py
@@ -43,7 +43,7 @@
     quantize_,
 )
 
-from . import dtypes, optim, swizzle, testing
+from . import dtypes, optim, quantization, swizzle, testing
 
 __all__ = [
     "dtypes",
@@ -53,4 +53,5 @@
     "swizzle",
     "testing",
     "ops",
+    "quantization",
 ]
diff --git a/torchao/core/config.py b/torchao/core/config.py
@@ -175,6 +175,7 @@ def config_to_dict(config: AOBaseConfig) -> Dict[str, Any]:
     "torchao.quantization",
     "torchao.sparsity.sparse_api",
     "torchao.prototype.quantization",
+    "torchao.prototype.mx_formats",
 }
 
 

diff --git a/torchao/prototype/mx_formats/__init__.py b/torchao/prototype/mx_formats/__init__.py
@@ -4,6 +4,7 @@
     MXLinearConfig,
     MXLinearRecipeName,
 )
+from torchao.prototype.mx_formats.mx_subclass import MXFPConfig
 
 # import mx_linear here to register the quantize_ transform logic
 # ruff: noqa: I001
@@ -14,4 +15,5 @@
     "MXInferenceLinearConfig",
     "MXLinearConfig",
     "MXLinearRecipeName",
+    "MXFPConfig",
 ]