Add test_intx_weight_only_e2e, set UnifTorchaoQuantizer defaults

lisjin · lisjin · commit 6c1b813bb9ff · 2025-04-28T07:03:06.000-07:00
diff --git a/test/prototype/test_parq.py b/test/prototype/test_parq.py
@@ -5,12 +5,13 @@
 # LICENSE file in the root directory of this source tree.
 import copy
 import unittest
+from typing import Optional
 
 import torch
 from torch import nn
 from torch.testing._internal import common_utils
 
-from torchao import quantize_
+from torchao.core.config import AOBaseConfig
 from torchao.dtypes import Int4CPULayout
 from torchao.prototype.parq.optim import (
     ProxHardQuant,
@@ -29,6 +30,7 @@
     IntxWeightOnlyConfig,
     _is_linear,
     int4_weight_only,
+    quantize_,
 )
 from torchao.utils import (
     TORCH_VERSION_AT_LEAST_2_4,
@@ -44,18 +46,26 @@ def split_param_groups(model):
 
     def get_param_groups(model):
         for module in model.children():
-            is_linear = isinstance(module, nn.Linear)
-            for n, p in module.named_parameters(recurse=False):
+            is_linear = _is_linear(module)
+            for n, p in module.named_parameters():
                 if is_linear and n == "weight":
                     params_quant.append(p)
                 else:
                     params_no_quant.append(p)
-            get_param_groups(module)
 
     get_param_groups(model)
     return params_quant, params_no_quant
 
 
+def build_param_groups(model, b: int = 2, group_size: Optional[int] = None):
+    params_quant, params_no_quant = split_param_groups(model)
+    quant_kwargs = {"quant_block_size": group_size} if group_size else {}
+    return [
+        {"params": params_quant, "quant_bits": b, **quant_kwargs},
+        {"params": params_no_quant},
+    ]
+
+
 class M(nn.Module):
     def __init__(self, m=256, n=128, k=16, bias=False):
         super().__init__()
@@ -87,46 +97,28 @@ class TestPARQuantization(common_utils.TestCase):
     def setUp(self):
         torch.manual_seed(123)
         self.model = M(bias=True).to(_DEVICE)
-        self.params_quant, self.params_no_quant = split_param_groups(self.model)
 
-    def train_loop(self, optimizer, steps=1):
-        for _ in range(steps):
-            x = self.model.example_inputs(device=_DEVICE)
-            out = self.model(x)
-            out.sum().backward()
-            optimizer.step()
+    @common_utils.parametrize("b", [0, 1, 2, 4])
+    @common_utils.parametrize("unif_quant", [True, False])
+    @common_utils.parametrize("hard_prox", [True, False])
+    def test_parq_train_loop(self, b: int = 2, unif_quant=True, hard_prox=True):
+        if unif_quant and b == 0:
+            self.skipTest("Ternary uniform quantization not yet supported")
 
-    def test_2bit_unif_quantizer_hard_prox(self):
-        b = 2
         self.model.reset_parameters()
-        param_groups = [
-            {"params": self.params_quant, "quant_bits": b},
-            {"params": self.params_no_quant},
-        ]
+        param_groups = build_param_groups(self.model, b)
         base_optimizer = torch.optim.AdamW(param_groups)
-        quantizer = UnifQuantizer()
-        optimizer = QuantOptimizer(base_optimizer, quantizer, ProxHardQuant())
-        self.train_loop(optimizer)
 
-        for child in self.model.children():
-            if isinstance(child, nn.Linear):
-                self.assertEqual(
-                    child.weight.unique().numel(), quantizer.get_quant_size(b)
-                )
-
-    def test_ternarybit_lsbq_parq_prox(self):
-        b = 0
-        self.model.reset_parameters()
-        param_groups = [
-            {"params": self.params_quant, "quant_bits": b},
-            {"params": self.params_no_quant},
-        ]
-        base_optimizer = torch.optim.AdamW(param_groups)
-        quantizer = LSBQuantizer()
-        optimizer = QuantOptimizer(
-            base_optimizer, quantizer, ProxPARQ(anneal_start=0, anneal_end=2)
+        quantizer = UnifQuantizer() if unif_quant else LSBQuantizer()
+        prox_map = (
+            ProxHardQuant() if hard_prox else ProxPARQ(anneal_start=0, anneal_end=2)
         )
-        self.train_loop(optimizer, steps=3)
+        optimizer = QuantOptimizer(base_optimizer, quantizer, prox_map)
+        for _ in range(3):
+            x = self.model.example_inputs(device=_DEVICE)
+            out = self.model(x)
+            out.sum().backward()
+            optimizer.step()
 
         for child in self.model.children():
             if isinstance(child, nn.Linear):
@@ -163,6 +155,35 @@ def compare_quantized_models(
             ref = getattr(m_ref, n).weight.dequantize()
             self.assertTrue(q.equal(ref))
 
+    def compare_parq_convert(
+        self,
+        model: nn.Module,
+        m_ref: nn.Module,
+        optimizer: QuantOptimizer,
+        config: AOBaseConfig,
+    ):
+        # do not update model weights, just quantize
+        optimizer.zero_grad()
+        optimizer.step()
+
+        orig_model = copy.deepcopy(model)  # save copy of PARQ quantized model
+
+        # equivalent to torchao's convert step
+        model.eval()
+        optimizer.restore_latent_params()
+        quantize_(model, config, filter_fn=optimizer._get_filter_fn(model))
+
+        for n, module in model.named_modules():
+            if not _is_linear(module):
+                continue
+
+            p_orig = getattr(orig_model, n).weight  # PARQ weight
+            p = module.weight.dequantize()  # PARQ weight after quantize_
+            p_ref = getattr(m_ref, n).weight.dequantize()  # native quantize_
+
+            self.assertTrue(p_orig.equal(p_ref))
+            self.assertTrue(p.equal(p_ref))
+
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "Test only enabled for 2.4+")
     @common_utils.parametrize("group_size", [32, 256])
     def test_int4_weight_only(self, group_size: int = 32):
@@ -195,7 +216,7 @@ def test_intx_weight_only(self, b: int = 2, group_size: int = 32):
             ),
         )
 
-        quantizer = UnifTorchaoQuantizer(symmetric=True)
+        quantizer = UnifTorchaoQuantizer()
         self.compare_quantized_models(model, m_ref, quantizer, b, group_size)
 
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "Test only enabled for 2.4+")
@@ -211,40 +232,36 @@ def test_int4_weight_only_e2e(self, group_size: int = 32):
         quantize_(m_ref, config)
 
         b = 4
-        params_quant, params_no_quant = split_param_groups(model)
-        param_groups = [
-            {"params": params_quant, "quant_bits": b, "quant_block_size": group_size},
-            {"params": params_no_quant},
-        ]
-        base_optimizer = torch.optim.AdamW(param_groups)
-
+        base_optimizer = torch.optim.AdamW(build_param_groups(model, b, group_size))
         optimizer = QuantOptimizer(
             base_optimizer,
             Int4UnifTorchaoQuantizer(),
             ProxHardQuant(),
             quant_per_channel=True,
         )
+        self.compare_parq_convert(model, m_ref, optimizer, config)
 
-        # do not update model weights, just quantize
-        optimizer.zero_grad()
-        optimizer.step()
-
-        orig_model = copy.deepcopy(model)  # save copy of PARQ quantized model
-
-        # equivalent to torchao's convert step
-        model.eval()
-        optimizer.torchao_quantize_(model, config)
-
-        for n, module in model.named_modules():
-            if not _is_linear(module):
-                continue
+    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_6, "Test only enabled for 2.6+")
+    @unittest.skipIf(_DEVICE == "cpu", "Need GPU available")
+    @common_utils.parametrize("b", [2, 3, 4, 8])
+    def test_intx_weight_only_e2e(self, b: int = 2, group_size: int = 32):
+        model = M(m=512, n=512).to(_DEVICE)
+        model.reset_parameters()
 
-            p_orig = getattr(orig_model, n).weight  # PARQ weight
-            p = module.weight.dequantize()  # PARQ weight after quantize_
-            p_ref = getattr(m_ref, n).weight.dequantize()  # natively quantize_
+        m_ref = copy.deepcopy(model).eval().to(_DEVICE)
+        config = IntxWeightOnlyConfig(
+            weight_dtype=_BIT_WIDTH_TO_DTYPE[b], granularity=PerGroup(group_size)
+        )
+        quantize_(m_ref, config)
 
-            self.assertTrue(p_orig.equal(p_ref))
-            self.assertTrue(p.equal(p_ref))
+        base_optimizer = torch.optim.AdamW(build_param_groups(model, b, group_size))
+        optimizer = QuantOptimizer(
+            base_optimizer,
+            UnifTorchaoQuantizer(),
+            ProxHardQuant(),
+            quant_per_channel=True,
+        )
+        self.compare_parq_convert(model, m_ref, optimizer, config)
 
 
 common_utils.instantiate_parametrized_tests(TestPARQuantization)
diff --git a/torchao/prototype/parq/optim/quantopt.py b/torchao/prototype/parq/optim/quantopt.py
@@ -13,9 +13,6 @@
 from torch import Tensor
 from torch.optim import Optimizer
 
-from torchao import quantize_
-from torchao.core.config import AOBaseConfig
-
 from ..quant import Quantizer
 from ..utils import HAS_DTENSOR, is_dtensor
 from .proxmap import ProxMap
@@ -109,32 +106,32 @@ def quantize_(
         quants.copy_(Q)
         return q
 
-    @torch.no_grad()
-    def torchao_quantize_(self, model: torch.nn.Module, config: AOBaseConfig):
-        """Recursively call torchao.quantize_ on model using given config."""
-        self.restore_latent_params()
-        param_set = {
+    def regularized_param_groups(self):  # pyre-ignore[3]
+        """Yield parameter groups that need to be quantized."""
+        for group in self.param_groups:
+            if group.get("quant_bits", 16) < 16:
+                yield group
+
+    @property
+    def _param_set(self) -> set[int]:
+        return {
             p.data_ptr()
             for group in self.regularized_param_groups()
             for p in group["params"]
         }
 
-        def inner_quantize_(model):
-            for module in model.children():
-                for param in module.parameters(recurse=False):
-                    if param.data_ptr() in param_set:
-                        quantize_(module, config)
-                        break
+    def _get_filter_fn(
+        self, module: torch.nn.Module
+    ) -> Callable[[torch.nn.Module], bool]:
+        param_set = self._param_set
 
-                inner_quantize_(module)
+        def _filter_fn(module: torch.nn.Module, *args) -> bool:
+            for p in module.parameters(recurse=False):
+                if p.data_ptr() in param_set:
+                    return True
+            return False
 
-        inner_quantize_(model)
-
-    def regularized_param_groups(self):  # pyre-ignore[3]
-        """Yield parameter groups that need to be quantized."""
-        for group in self.param_groups:
-            if group.get("quant_bits", 16) < 16:
-                yield group
+        return _filter_fn
 
     @torch._disable_dynamo
     def state_dict(self) -> dict[str, Any]:
@@ -285,6 +282,7 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
         return loss
 
     @torch._disable_dynamo
+    @torch.no_grad()
     def restore_latent_params(self) -> None:
         """Restore latent parameters as optimizer parameters"""
         for group in self.regularized_param_groups():
@@ -293,6 +291,7 @@ def restore_latent_params(self) -> None:
                     p.copy_(self.state[p]["latent"])
 
     @torch._disable_dynamo
+    @torch.no_grad()
     def save_latent_params(self) -> None:
         """Save updated latent parameters before applying prox-map"""
         if self.warmup_steps == 0:
diff --git a/torchao/prototype/parq/quant/uniform_torchao.py b/torchao/prototype/parq/quant/uniform_torchao.py
@@ -29,8 +29,8 @@ class UnifTorchaoQuantizer(Quantizer):
 
     def __init__(
         self,
-        symmetric: bool,
-        target_dtype: Optional[torch.dtype] = None,
+        mapping_type: MappingType = MappingType.SYMMETRIC,
+        target_dtype: torch.dtype = torch.int8,
         quant_min: Optional[Union[int, float]] = None,
         quant_max: Optional[Union[int, float]] = None,
         eps: Optional[float] = None,
@@ -39,9 +39,7 @@ def __init__(
     ) -> None:
         super().__init__(center=False)
 
-        self.mapping_type = (
-            MappingType.SYMMETRIC if symmetric else MappingType.ASYMMETRIC
-        )
+        self.mapping_type = mapping_type
         self.target_dtype = target_dtype
         self.quant_min = quant_min
         self.quant_max = quant_max
@@ -55,8 +53,6 @@ def _init_quant_min_max(self, b: int) -> None:
             self.quant_min, self.quant_max = _DTYPE_TO_QVALUE_BOUNDS[
                 _BIT_WIDTH_TO_DTYPE[b]
             ]
-        if self.target_dtype is None:
-            self.target_dtype = torch.int8
 
     def get_quant_size(self, b: int) -> int:
         self._init_quant_min_max(b)
@@ -125,7 +121,7 @@ class Int4UnifTorchaoQuantizer(UnifTorchaoQuantizer):
 
     def __init__(self) -> None:
         super().__init__(
-            symmetric=False,
+            mapping_type=MappingType.ASYMMETRIC,
             target_dtype=torch.int32,
             quant_min=0,
             quant_max=15,