pytorch
diff --git a/‎test/dtypes/test_float8_activation_int4_groupwise_preshuffle.py
Lines changed: 166 additions & 0 deletions b/‎test/dtypes/test_float8_activation_int4_groupwise_preshuffle.py
Lines changed: 166 additions & 0 deletions
diff --git a/‎test/quantization/quantize_/test_int4_groupwise_preshuffle.py
Lines changed: 67 additions & 24 deletions b/‎test/quantization/quantize_/test_int4_groupwise_preshuffle.py
Lines changed: 67 additions & 24 deletions
diff --git a/‎torchao/dtypes/__init__.py
Lines changed: 4 additions & 0 deletions b/‎torchao/dtypes/__init__.py
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,166 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD 3-Clause license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from torch.testing._internal.common_utils import (
+    TestCase,
+    run_tests,
+)
+
+from torchao.quantization import (
+    FbgemmConfig,
+    quantize_,
+)
+from torchao.quantization.utils import compute_error
+from torchao.utils import (
+    TORCH_VERSION_AT_LEAST_2_8,
+    is_sm_at_least_90,
+)
+
+
+@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_8, "Need pytorch 2.8+")
+@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+@unittest.skipIf(not is_sm_at_least_90(), "Nedd sm90+")
+class TestInt4GroupwisePreshuffleTensor(TestCase):
+    def setUp(self):
+        self.config = FbgemmConfig(
+            input_dtype=torch.bfloat16,
+            weight_dtype=torch.int4,
+            output_dtype=torch.bfloat16,
+            block_size=[1, 128],
+            preshuffle=True,
+            float8_activation=True,
+        )
+        self.bmm_config = FbgemmConfig(
+            input_dtype=torch.bfloat16,
+            weight_dtype=torch.int4,
+            output_dtype=torch.bfloat16,
+            block_size=[1, 1, 128],
+            preshuffle=True,
+            float8_activation=True,
+        )
+        self.GPU_DEVICES = ["cuda"] if torch.cuda.is_available() else []
+
+    def test_linear(self):
+        dtype = torch.bfloat16
+        device = "cuda"
+        input = torch.randn(1, 128, dtype=dtype, device=device)
+        linear = torch.nn.Linear(128, 256, dtype=dtype, device=device)
+        original = linear(input)
+        quantize_(linear, self.config)
+        quantized = linear(input)
+        self.assertTrue(compute_error(original, quantized) > 20)
+
+    # @unittest.skip("WIP: this doesn't work yet")
+    def test_slice(self):
+        dtype = torch.bfloat16
+        device = "cuda"
+        dummy = torch.nn.Linear(256, 256, bias=False, dtype=dtype, device=device)
+        dummy1 = torch.nn.Linear(256, 64, bias=False, dtype=dtype, device=device)
+        dummy1.weight = torch.nn.Parameter(
+            dummy.weight.narrow(0, 0, 64), requires_grad=False
+        )
+        dummy2 = torch.nn.Linear(128, 256, dtype=dtype, device=device)
+        dummy2.weight = torch.nn.Parameter(
+            dummy.weight.narrow(1, 0, 128), requires_grad=False
+        )
+
+        quantize_(dummy, self.config)
+        weight1 = dummy.weight.narrow(0, 0, 64)
+        weight2 = dummy.weight.narrow(1, 0, 128)
+        # check the slicing operation is correctly performend of the constituents Tensors
+        self.assertEqual(
+            weight1.packed_weight, dummy.weight.packed_weight.narrow(0, 0, 64)
+        )
+        self.assertEqual(weight1.group_scale, dummy.weight.group_scale.narrow(2, 0, 64))
+        self.assertEqual(
+            weight2.packed_weight, dummy.weight.packed_weight.narrow(1, 0, 64)
+        )
+        self.assertEqual(weight2.group_scale, dummy.weight.group_scale.narrow(0, 0, 1))
+
+        # check for 1. sliced bf16 weight 2. sliced quantized weight
+        # can produce similar results doing matmul on the same input Tensor
+
+        input = torch.randn(2, 256, dtype=dtype, device=device)
+        res_ref = dummy1(input)
+        dummy.weight = torch.nn.Parameter(weight1, requires_grad=False)
+        res = dummy(input)
+        sqnr = compute_error(res, res_ref)
+        assert sqnr > 20, f"Got: {sqnr}"
+
+        input = torch.randn(2, 128, dtype=dtype, device=device)
+        res_ref = dummy2(input)
+        dummy.weight = torch.nn.Parameter(weight2, requires_grad=False)
+        res = dummy(input)
+        sqnr = compute_error(res, res_ref)
+        assert sqnr > 15, f"Got: {sqnr}"
+
+    def test_slice_and_copy_(self):
+        l = torch.nn.Linear(1024, 1024).to("cuda").to(torch.bfloat16)
+        l.weight = torch.nn.Parameter(
+            torch.zeros(1024, 1024, dtype=torch.bfloat16, device="cuda")
+        )
+        quantize_(l, self.config)
+        param = l.weight
+        param_data = param.data
+        param_data = param_data.narrow(0, 0, 512)
+        assert (
+            param.data.packed_weight.data_ptr() == param_data.packed_weight.data_ptr()
+        )
+        assert param.data.group_scale.data_ptr() == param_data.group_scale.data_ptr()
+        assert param.data.row_scale.data_ptr() == param_data.row_scale.data_ptr()
+        orig_value = param.data.packed_weight[0][0].item()
+
+        # dummy_l has random input (shouldn't be 0)
+        dummy_l = torch.nn.Linear(1024, 1024).to("cuda").to(torch.bfloat16)
+        quantize_(dummy_l, self.config)
+        quantized = dummy_l.weight
+        quantized = quantized.narrow(0, 0, 512)
+
+        param_data.copy_(quantized)
+
+        # making sure param.data is updated
+        assert param.data.packed_weight[0][0] != orig_value
+
+    def test_bmm(self):
+        class M(torch.nn.Module):
+            def __init__(self, weight):
+                super().__init__()
+                self.weight = weight
+
+            def forward(self, x):
+                return torch.bmm(x, self.weight)
+
+        dtype = torch.bfloat16
+        device = "cuda"
+        input = torch.randn(10, 32, 128, dtype=dtype, device=device)
+        weight = torch.randn(10, 128, 256, dtype=dtype, device=device)
+        m = M(weight).eval()
+        original = m(input)
+        m.weight = torch.nn.Parameter(m.weight.transpose(1, 2).contiguous())
+        quantize_(m, self.bmm_config, filter_fn=lambda x, fqn: True)
+        quantized = m(input)
+        self.assertTrue(compute_error(original, quantized) > 18)
+
+    def test_to_device(self):
+        for device in self.GPU_DEVICES:
+            linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16)
+            quantize_(linear, self.config)
+            linear.to(device)
+
+            linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16)
+            quantize_(linear, self.config)
+            linear.to(device=device)
+
+            linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16)
+            quantize_(linear, self.config)
+            linear.to(device)
+
+
+if __name__ == "__main__":
+    run_tests()
@@ -4,11 +4,14 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
+import tempfile
 import unittest
 
 import torch
 from torch.testing._internal.common_utils import (
     TestCase,
+    instantiate_parametrized_tests,
+    parametrize,
     run_tests,
 )
 
@@ -23,6 +26,42 @@
     is_sm_at_least_90,
 )
 
+BF16_ACT_CONFIG = FbgemmConfig(
+    input_dtype=torch.bfloat16,
+    weight_dtype=torch.int4,
+    output_dtype=torch.bfloat16,
+    block_size=[1, 128],
+    preshuffle=True,
+    activation_dtype_for_int4="bf16",
+)
+
+BF16_ACT_BMM_CONFIG = FbgemmConfig(
+    input_dtype=torch.bfloat16,
+    weight_dtype=torch.int4,
+    output_dtype=torch.bfloat16,
+    block_size=[1, 1, 128],
+    preshuffle=True,
+    activation_dtype_for_int4="bf16",
+)
+
+FP8_ACT_CONFIG = FbgemmConfig(
+    input_dtype=torch.bfloat16,
+    weight_dtype=torch.int4,
+    output_dtype=torch.bfloat16,
+    block_size=[1, 128],
+    preshuffle=True,
+    activation_dtype_for_int4="fp8",
+)
+
+FP8_ACT_BMM_CONFIG = FbgemmConfig(
+    input_dtype=torch.bfloat16,
+    weight_dtype=torch.int4,
+    output_dtype=torch.bfloat16,
+    block_size=[1, 1, 128],
+    preshuffle=True,
+    activation_dtype_for_int4="fp8",
+)
+
 
 @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_8, "Need pytorch 2.8+")
 @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@@ -32,33 +71,23 @@
 )
 class TestInt4GroupwisePreshuffleTensor(TestCase):
     def setUp(self):
-        self.config = FbgemmConfig(
-            input_dtype=torch.bfloat16,
-            weight_dtype=torch.int4,
-            output_dtype=torch.bfloat16,
-            block_size=[1, 128],
-            preshuffle=True,
-        )
-        self.bmm_config = FbgemmConfig(
-            input_dtype=torch.bfloat16,
-            weight_dtype=torch.int4,
-            output_dtype=torch.bfloat16,
-            block_size=[1, 1, 128],
-            preshuffle=True,
-        )
         self.GPU_DEVICES = ["cuda"] if torch.cuda.is_available() else []
 
-    def test_linear(self):
+    @parametrize("config", [BF16_ACT_CONFIG, FP8_ACT_CONFIG])
+    def test_linear(self, config):
         dtype = torch.bfloat16
         device = "cuda"
         input = torch.randn(1, 128, dtype=dtype, device=device)
         linear = torch.nn.Linear(128, 256, dtype=dtype, device=device)
         original = linear(input)
-        quantize_(linear, self.config)
+        quantize_(linear, config)
         quantized = linear(input)
         self.assertTrue(compute_error(original, quantized) > 20)
 
-    def test_bmm(self):
+    # Note: this order will error out: `Got bad cuda status: an illegal memory access was encountered at line: 449`
+    # @parametrize("bmm_config", [BF16_ACT_BMM_CONFIG, FP8_ACT_BMM_CONFIG])
+    @parametrize("bmm_config", [FP8_ACT_BMM_CONFIG, BF16_ACT_BMM_CONFIG])
+    def test_bmm(self, bmm_config):
         class M(torch.nn.Module):
             def __init__(self, weight):
                 super().__init__()
@@ -74,32 +103,46 @@ def forward(self, x):
         m = M(weight).eval()
         original = m(input)
         m.weight = torch.nn.Parameter(m.weight.transpose(1, 2).contiguous())
-        quantize_(m, self.bmm_config, filter_fn=lambda x, fqn: True)
+        quantize_(m, bmm_config, filter_fn=lambda x, fqn: True)
         quantized = m(input)
         self.assertTrue(compute_error(original, quantized) > 18)
 
-    def test_to_device(self):
+    @parametrize("config", [BF16_ACT_CONFIG, FP8_ACT_CONFIG])
+    def test_to_device(self, config):
         for device in self.GPU_DEVICES:
             linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16)
-            quantize_(linear, self.config)
+            quantize_(linear, config)
             linear.to(device)
 
             linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16)
-            quantize_(linear, self.config)
+            quantize_(linear, config)
             linear.to(device=device)
 
             linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16)
-            quantize_(linear, self.config)
+            quantize_(linear, config)
             linear.to(device)
 
-    def test_module_path(self):
+    @parametrize("config", [BF16_ACT_CONFIG, FP8_ACT_CONFIG])
+    def test_module_path(self, config):
         linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16)
-        quantize_(linear, self.config)
+        quantize_(linear, config)
         self.assertEqual(
             str(type(linear.weight)),
             "<class 'torchao.quantization.Int4GroupwisePreshuffleTensor'>",
         )
 
+        with tempfile.NamedTemporaryFile() as f:
+            torch.save(linear.state_dict(), f)
+            f.seek(0)
+            state_dict = torch.load(f)
+            self.assertEqual(
+                str(type(state_dict["weight"])),
+                "<class 'torchao.quantization.Int4GroupwisePreshuffleTensor'>",
+            )
+
+
+instantiate_parametrized_tests(TestInt4GroupwisePreshuffleTensor)
+
 
 if __name__ == "__main__":
     run_tests()
@@ -10,6 +10,9 @@
 )
 from .fbgemm_fp8_tensor import FbgemmFp8Tensor, to_fbgemm_fp8
 from .fbgemm_int4_tensor import FbgemmInt4Tensor, to_fbgemm_int4
+from .float8_activation_int4_groupwise_preshuffle_tensor import (
+    Float8ActivationInt4GroupwisePreshuffleTensor,
+)
 from .floatx import (
     CutlassSemiSparseLayout,
     Float8Layout,
@@ -70,4 +73,5 @@
     "FbgemmFp8Tensor",
     "Int8DynamicActInt4WeightCPULayout",
     "Int4GroupwisePreshuffleTensor",
+    "Float8ActivationInt4GroupwisePreshuffleTensor",
 ]