pytorch · jerryzh168 · Aug 22, 2025 · Aug 22, 2025
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -186,103 +186,60 @@ class MyTensor(TorchAOBaseTensor):
             tensor_data_names = ["qdata"]
             tensor_attribute_names = ["attr", "device"]
 
-            def __new__(cls, qdata, attr, device):
+            def __new__(cls, qdata, attr, device=None):
                 shape = qdata.shape
                 if device is None:
                     device = qdata.device
                 kwargs = {"device": device}
                 return torch.Tensor._make_wrapper_subclass(cls, shape, **kwargs)  # type: ignore[attr-defined]
 
-            def __init__(self, qdata, attr, device):
+            def __init__(self, qdata, attr, device=None):
                 self.qdata = qdata
                 self.attr = attr
 
         l = torch.nn.Linear(2, 3)
-        l.weight = torch.nn.Parameter(MyTensor(l.weight, "attr", None))
+        l.weight = torch.nn.Parameter(MyTensor(l.weight, "attr"))
         lp_tensor = l.weight
 
         another_tensor = torch.nn.Linear(2, 3).weight
         # attribute has to be the same
-        lp_tensor_for_copy = MyTensor(another_tensor, "attr", None)
+        lp_tensor_for_copy = MyTensor(another_tensor, "attr")
         self._test_default_impls_helper(lp_tensor, lp_tensor_for_copy)
 
     @skip_if_no_cuda()
     def test_default_impls_with_optional_data(self):
         class MyTensorWithOptionalData(TorchAOBaseTensor):
             tensor_data_names = ["qdata"]
-            tensor_attribute_names = ["attr", "device"]
             optional_tensor_data_names = ["zero_point"]
-
-            def __new__(cls, qdata, attr, device, zero_point=None):
-                shape = qdata.shape
-                if device is None:
-                    device = qdata.device
-                kwargs = {"device": device}
-                return torch.Tensor._make_wrapper_subclass(cls, shape, **kwargs)  # type: ignore[attr-defined]
-
-            def __init__(self, qdata, attr, device, zero_point=None):
-                self.qdata = qdata
-                self.attr = attr
-                self.zero_point = zero_point
-
-        # test both the optional Tensor is None
-        # and not None
-        l = torch.nn.Linear(2, 3)
-        lp_tensor = MyTensorWithOptionalData(l.weight, "attr", None, None)
-        l = torch.nn.Linear(2, 3)
-        lp_tensor_for_copy = MyTensorWithOptionalData(l.weight, "attr", None, None)
-        self._test_default_impls_helper(lp_tensor, lp_tensor_for_copy)
-
-        l = torch.nn.Linear(2, 3)
-        lp_tensor = MyTensorWithOptionalData(
-            l.weight, "attr", None, torch.zeros_like(l.weight)
-        )
-        l = torch.nn.Linear(2, 3)
-        lp_tensor_for_copy = MyTensorWithOptionalData(
-            l.weight, "attr", None, torch.zeros_like(l.weight)
-        )
-        self._test_default_impls_helper(lp_tensor, lp_tensor_for_copy)
-
-    @skip_if_no_cuda()
-    def test_default_impls_with_optional_attr(self):
-        class MyTensorWithOptionalData(TorchAOBaseTensor):
-            tensor_data_names = ["qdata"]
             tensor_attribute_names = ["attr", "device"]
-            optional_tensor_data_names = ["zero_point"]
-            optional_tensor_attribute_names = ["optional_attr"]
 
-            def __new__(cls, qdata, attr, device, zero_point=None, optional_attr=None):
+            def __new__(cls, qdata, zero_point=None, attr=1.0, device=None):
                 shape = qdata.shape
                 if device is None:
                     device = qdata.device
                 kwargs = {"device": device}
                 return torch.Tensor._make_wrapper_subclass(cls, shape, **kwargs)  # type: ignore[attr-defined]
 
-            def __init__(
-                self, qdata, attr, device, zero_point=None, optional_attr=None
-            ):
+            def __init__(self, qdata, zero_point=None, attr=1.0, device=None):
                 self.qdata = qdata
-                self.attr = attr
                 self.zero_point = zero_point
-                self.optional_attr = optional_attr
+                self.attr = attr
 
         # test both the optional Tensor is None
         # and not None
         l = torch.nn.Linear(2, 3)
-        lp_tensor = MyTensorWithOptionalData(l.weight, "attr", None, zero_point=None)
+        lp_tensor = MyTensorWithOptionalData(l.weight, None, "attr")
         l = torch.nn.Linear(2, 3)
-        lp_tensor_for_copy = MyTensorWithOptionalData(
-            l.weight, "attr", None, zero_point=None
-        )
+        lp_tensor_for_copy = MyTensorWithOptionalData(l.weight, None, "attr")
         self._test_default_impls_helper(lp_tensor, lp_tensor_for_copy)
 
         l = torch.nn.Linear(2, 3)
         lp_tensor = MyTensorWithOptionalData(
-            l.weight, "attr", None, zero_point=None, optional_attr="value"
+            l.weight, torch.zeros_like(l.weight), "attr"
         )
         l = torch.nn.Linear(2, 3)
         lp_tensor_for_copy = MyTensorWithOptionalData(
-            l.weight, "attr", None, zero_point=None, optional_attr="value"
+            l.weight, torch.zeros_like(l.weight), "attr"
         )
         self._test_default_impls_helper(lp_tensor, lp_tensor_for_copy)
 

diff --git a/torchao/quantization/quantize_/workflows/float8/float8_tensor.py b/torchao/quantization/quantize_/workflows/float8/float8_tensor.py
@@ -94,8 +94,7 @@ class Float8Tensor(TorchAOBaseTensor):
     """
 
     tensor_data_names = ["qdata", "scale"]
-    tensor_attribute_names = []
-    optional_tensor_attribute_names = [
+    tensor_attribute_names = [
         "block_size",
         "mm_config",
         "hp_value_lb",
@@ -107,15 +106,15 @@ class Float8Tensor(TorchAOBaseTensor):
 
     def __new__(
         cls,
-        qdata: torch.Tensor,
-        scale: torch.Tensor,
-        block_size: Optional[List[int]] = None,
-        mm_config: Optional[Float8MMConfig] = None,
-        hp_value_lb: Optional[float] = None,
-        hp_value_ub: Optional[float] = None,
-        act_quant_kwargs: Optional[QuantizeTensorToFloat8Kwargs] = None,
-        kernel_preference: KernelPreference = KernelPreference.AUTO,
-        dtype: Optional[torch.dtype] = None,
+        qdata,
+        scale,
+        block_size,
+        mm_config,
+        hp_value_lb,
+        hp_value_ub,
+        act_quant_kwargs,
+        kernel_preference,
+        dtype,
     ):
         shape = qdata.shape
         kwargs = {}

diff --git a/torchao/quantization/quantize_/workflows/int4/int4_preshuffled_tensor.py b/torchao/quantization/quantize_/workflows/int4/int4_preshuffled_tensor.py
@@ -75,17 +75,17 @@ class Int4PreshuffledTensor(TorchAOBaseTensor):
     """
 
     tensor_data_names = ["qdata", "group_scale"]
-    tensor_attribute_names = ["block_size", "shape"]
     optional_tensor_data_names = ["group_zero", "row_scale"]
+    tensor_attribute_names = ["block_size", "shape"]
 
     def __new__(
         cls,
-        qdata: torch.Tensor,
-        group_scale: torch.Tensor,
-        block_size: List[int],
-        shape: List[int],
-        group_zero: Optional[torch.Tensor] = None,
-        row_scale: Optional[torch.Tensor] = None,
+        qdata,
+        group_scale,
+        group_zero,
+        row_scale,
+        block_size,
+        shape,
     ):
         kwargs = {}
         kwargs["device"] = qdata.device
@@ -97,19 +97,19 @@ def __init__(
         self,
         qdata: torch.Tensor,
         group_scale: torch.Tensor,
+        group_zero: Optional[torch.Tensor],
+        row_scale: Optional[torch.Tensor],
         block_size: List[int],
         shape: List[int],
-        group_zero: Optional[torch.Tensor] = None,
-        row_scale: Optional[torch.Tensor] = None,
     ):
         # one and only one of group_scale and group_zero should be None
         assert group_zero is None or row_scale is None
         assert not (group_zero is not None and row_scale is not None)
         self.qdata = qdata
-        self.row_scale = row_scale
-        self.block_size = block_size
         self.group_scale = group_scale
         self.group_zero = group_zero
+        self.row_scale = row_scale
+        self.block_size = block_size
 
     def _quantization_type(self):
         return f"shape={self.shape}, block_size={self.block_size}, device={self.device}"
@@ -178,10 +178,10 @@ def from_hp(
         return Int4PreshuffledTensor(
             qdata=wq,
             group_scale=group_scale,
-            block_size=block_size,
-            shape=original_shape,
             group_zero=group_zero,
             row_scale=row_scale,
+            block_size=block_size,
+            shape=original_shape,
         )