Add QuantOptimizer.torchao_quantize_

lisjin · lisjin · commit 81ba28741715 · 2025-04-24T09:30:28.000-07:00
diff --git a/test/prototype/test_parq.py b/test/prototype/test_parq.py
@@ -248,9 +248,7 @@ def test_int4_weight_only_e2e(self, group_size: int = 32):
 
         # equivalent to torchao's convert step
         model.eval()
-        with torch.no_grad():
-            optimizer.restore_latent_params()
-        quantize_(model, quantizer.config)
+        optimizer.torchao_quantize_(model)
 
         for n, module in model.named_modules():
             if not _is_linear(module):
diff --git a/torchao/prototype/parq/optim/quantopt.py b/torchao/prototype/parq/optim/quantopt.py
@@ -13,6 +13,8 @@
 from torch import Tensor
 from torch.optim import Optimizer
 
+from torchao import quantize_
+
 from ..quant import Quantizer
 from ..utils import HAS_DTENSOR, is_dtensor
 from .proxmap import ProxMap
@@ -106,6 +108,28 @@ def quantize_(
         quants.copy_(Q)
         return q
 
+    @torch.no_grad()
+    def torchao_quantize_(self, model):
+        assert hasattr(self.quantizer, "config"), "Missing self.quantizer.config"
+
+        self.restore_latent_params()
+        param_set = {
+            p.data_ptr()
+            for group in self.regularized_param_groups()
+            for p in group["params"]
+        }
+
+        def inner_quantize_(model):
+            for module in model.children():
+                for param in module.parameters(recurse=False):
+                    if param.data_ptr() in param_set:
+                        quantize_(module, self.quantizer.config)
+                        break
+
+                inner_quantize_(module)
+
+        inner_quantize_(model)
+
     def regularized_param_groups(self):  # pyre-ignore[3]
         """Yield parameter groups that need to be quantized."""
         for group in self.param_groups:
@@ -189,9 +213,9 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
 
                 # reshape p according to block size if specified
                 if block_size is not None:
-                    assert p.size(-1) % block_size == 0, (
-                        f"{p.size(-1)=} is not divisible by {block_size=}"
-                    )
+                    assert (
+                        p.size(-1) % block_size == 0
+                    ), f"{p.size(-1)=} is not divisible by {block_size=}"
                     assert p.dim() <= 2, f"Invalid {p.dim()=} for {block_size=}"
                     if p.dim() == 1:
                         p = p.unsqueeze(0)
diff --git a/torchao/prototype/parq/quant/uniform_torchao.py b/torchao/prototype/parq/quant/uniform_torchao.py
@@ -52,14 +52,6 @@ def __init__(
         self.zero_point_domain = zero_point_domain
         self.config = config
 
-    @property
-    def q_kwargs(self) -> dict[str, Union[int, float]]:
-        return {
-            "quant_min": self.quant_min,
-            "quant_max": self.quant_max,
-            "zero_point_domain": self.zero_point_domain,
-        }
-
     def _init_quant_min_max(self, b: int) -> None:
         if self.quant_min is None or self.quant_max is None:
             assert b in _BIT_WIDTH_TO_DTYPE, f"Unsupported bitwidth {b}"
@@ -89,11 +81,26 @@ def quantize(
             self.target_dtype,
             eps=self.eps,
             preserve_zero=self.preserve_zero,
-            **self.q_kwargs,
+            quant_min=self.quant_min,
+            quant_max=self.quant_max,
+            zero_point_domain=self.zero_point_domain,
         )
         q_args = (block_size, s, zero_point, self.target_dtype)
-        q = quantize_affine(p, *q_args, **self.q_kwargs)
-        q = dequantize_affine(q, *q_args, output_dtype=p.dtype, **self.q_kwargs)
+        q = quantize_affine(
+            p,
+            *q_args,
+            quant_min=self.quant_min,
+            quant_max=self.quant_max,
+            zero_point_domain=self.zero_point_domain,
+        )
+        q = dequantize_affine(
+            q,
+            *q_args,
+            output_dtype=p.dtype,
+            quant_min=self.quant_min,
+            quant_max=self.quant_max,
+            zero_point_domain=self.zero_point_domain,
+        )
 
         Q = torch.arange(
             self.quant_min, self.quant_max + 1, dtype=self.target_dtype, device=p.device
@@ -105,6 +112,12 @@ def quantize(
             block_size = Q.shape
 
         Q = dequantize_affine(
-            Q, block_size, *q_args[1:], output_dtype=p.dtype, **self.q_kwargs
+            Q,
+            block_size,
+            *q_args[1:],
+            output_dtype=p.dtype,
+            quant_min=self.quant_min,
+            quant_max=self.quant_max,
+            zero_point_domain=self.zero_point_domain,
         )
         return q, Q