Fix float8 + int4 QAT (#2851)

andrewor14 · web-flow · commit 0596713d18d0 · 2025-08-22T17:57:55.000-04:00
**Summary:** After #2779, `Float8DynamicActivationInt4Weight` no longer has the `group_size` field, but QAT continues to read from this field. This commit fixes it to just use the fixed 128 group size. **Test Plan:** ``` python test/quantization/test_qat.py -k test_infer_fp8_int4_config ```
diff --git a/test/quantization/test_qat.py b/test/quantization/test_qat.py
@@ -1932,6 +1932,26 @@ def test_quantize_api_fp8_int4(self):
             target_convert_sqnr=float("inf"),
         )
 
+    @unittest.skipIf(not _CUDA_IS_AVAILABLE, "skipping when cuda is not available")
+    def test_infer_fp8_int4_config(self):
+        """
+        Test that fake quantize configs are correctly inferred from
+        `Float8DynamicActivationInt4WeightConfig`.
+        """
+        from torchao.quantization.qat.fake_quantize_config import (
+            _infer_fake_quantize_configs,
+        )
+
+        base_config = Float8DynamicActivationInt4WeightConfig()
+        (act_config, weight_config) = _infer_fake_quantize_configs(base_config)
+        self.assertIsInstance(act_config, Float8FakeQuantizeConfig)
+        self.assertEqual(act_config.dtype, torch.float8_e4m3fn)
+        self.assertIsInstance(act_config.granularity, PerRow)
+        self.assertIsInstance(weight_config, IntxFakeQuantizeConfig)
+        self.assertEqual(weight_config.dtype, torch.int4)
+        self.assertEqual(weight_config.group_size, 128)
+        self.assertTrue(weight_config.is_symmetric)
+
 
 instantiate_parametrized_tests(TestQAT)
 
diff --git a/torchao/quantization/qat/fake_quantize_config.py b/torchao/quantization/qat/fake_quantize_config.py
@@ -382,7 +382,7 @@ def _infer_fake_quantize_configs(
         )
         weight_config = IntxFakeQuantizeConfig(
             dtype=torch.int4,
-            group_size=base_config.group_size,
+            group_size=128,
             is_symmetric=True,
         )
     else:

Original file line number	Diff line number	Diff line change
`@@ -382,7 +382,7 @@ def _infer_fake_quantize_configs(`
`382`	`382`	`)`
`383`	`383`	`weight_config = IntxFakeQuantizeConfig(`
`384`	`384`	`dtype=torch.int4,`
`385`		`- group_size=base_config.group_size,`
	`385`	`+ group_size=128,`
`386`	`386`	`is_symmetric=True,`
`387`	`387`	`)`
`388`	`388`	`else:`