pytorch
diff --git a/‎.github/workflows/build_wheels_linux.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_wheels_linux.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/regression_test.yml‎
Lines changed: 15 additions & 15 deletions b/‎.github/workflows/regression_test.yml‎
Lines changed: 15 additions & 15 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/float8/utils.py‎
Lines changed: 3 additions & 0 deletions b/‎benchmarks/float8/utils.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎test/dtypes/test_affine_quantized.py‎
Lines changed: 1 addition & 0 deletions b/‎test/dtypes/test_affine_quantized.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/dtypes/test_nf4.py‎
Lines changed: 3 additions & 3 deletions b/‎test/dtypes/test_nf4.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/prototype/mx_formats/test_mx_linear.py‎
Lines changed: 7 additions & 1 deletion b/‎test/prototype/mx_formats/test_mx_linear.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎test/prototype/mx_formats/test_mx_mm.py‎
Lines changed: 2 additions & 2 deletions b/‎test/prototype/mx_formats/test_mx_mm.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/prototype/scaled_grouped_mm/test_kernels.py‎
Lines changed: 3 additions & 0 deletions b/‎test/prototype/scaled_grouped_mm/test_kernels.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎test/prototype/scaled_grouped_mm/test_scaled_grouped_mm.py‎
Lines changed: 2 additions & 0 deletions b/‎test/prototype/scaled_grouped_mm/test_scaled_grouped_mm.py‎
Lines changed: 2 additions & 0 deletions
@@ -28,7 +28,7 @@ jobs:
       os: linux
       with-cpu: enable
       with-cuda: enable
-      with-rocm: enable
+      with-rocm: disable
       with-xpu: enable
       # Note: if free-threaded python is required add py3.13t here
       python-versions: '["3.9"]'
 
@@ -59,35 +59,35 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - name: CUDA 2.3
+          - name: CUDA 2.5.1
             runs-on: linux.g5.12xlarge.nvidia.gpu
-            torch-spec: 'torch==2.3.0'
+            torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121'
             gpu-arch-type: "cuda"
-            gpu-arch-version: "12.1"
-          - name: CUDA 2.4
+            gpu-arch-version: "12.6"
+          - name: CUDA 2.6
             runs-on: linux.g5.12xlarge.nvidia.gpu
-            torch-spec: 'torch==2.4.0'
+            torch-spec: 'torch==2.6.0'
             gpu-arch-type: "cuda"
-            gpu-arch-version: "12.1"
-          - name: CUDA 2.5.1
+            gpu-arch-version: "12.6"
+          - name: CUDA 2.7
             runs-on: linux.g5.12xlarge.nvidia.gpu
-            torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121'
+            torch-spec: 'torch==2.7.0'
             gpu-arch-type: "cuda"
-            gpu-arch-version: "12.1"
+            gpu-arch-version: "12.6"
 
-          - name: CPU 2.3
+          - name: CPU 2.5.1
             runs-on: linux.4xlarge
-            torch-spec: 'torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu'
+            torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu'
             gpu-arch-type: "cpu"
             gpu-arch-version: ""
-          - name: CPU 2.4
+          - name: CPU 2.6
             runs-on: linux.4xlarge
-            torch-spec: 'torch==2.4.0 --index-url https://download.pytorch.org/whl/cpu'
+            torch-spec: 'torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu'
             gpu-arch-type: "cpu"
             gpu-arch-version: ""
-          - name: CPU 2.5.1
+          - name: CPU 2.7
             runs-on: linux.4xlarge
-            torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu'
+            torch-spec: 'torch==2.7.0 --index-url https://download.pytorch.org/whl/cpu'
             gpu-arch-type: "cpu"
             gpu-arch-version: ""
 
 
@@ -11,7 +11,7 @@ repos:
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.8
+    rev: v0.11.6
     hooks:
       # Run the linter.
       - id: ruff
 
@@ -83,6 +83,9 @@ def profiler_output_to_filtered_time_by_kernel_name(
             continue
         elif e.key == "Activity Buffer Request":
             continue
+        elif e.key == "Unrecognized":
+            # TODO I think these are nvjet related
+            continue
 
         kernel_name_to_gpu_time_us[e.key] = e.self_device_time_total
     return kernel_name_to_gpu_time_us
 
@@ -222,6 +222,7 @@ def apply_uint6_weight_only_quant(linear):
 
         deregister_aqt_quantized_linear_dispatch(dispatch_condition)
 
+    @skip_if_rocm("ROCm enablement in progress")
     @unittest.skipIf(len(GPU_DEVICES) == 0, "Need GPU available")
     def test_print_quantized_module(self):
         for device in self.GPU_DEVICES:
 
@@ -39,7 +39,7 @@
     to_nf4,
 )
 from torchao.testing.utils import skip_if_rocm
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_8
+from torchao.utils import TORCH_VERSION_AT_LEAST_2_7
 
 bnb_available = False
 
@@ -119,7 +119,7 @@ def test_backward_dtype_match(self, dtype: torch.dtype):
     @unittest.skipIf(not bnb_available, "Need bnb availble")
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
     @unittest.skipIf(
-        TORCH_VERSION_AT_LEAST_2_8, reason="Failing in CI"
+        TORCH_VERSION_AT_LEAST_2_7, reason="Failing in CI"
     )  # TODO: fix this
     @skip_if_rocm("ROCm enablement in progress")
     @parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
@@ -146,7 +146,7 @@ def test_reconstruction_qlora_vs_bnb(self, dtype: torch.dtype):
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
     @skip_if_rocm("ROCm enablement in progress")
     @unittest.skipIf(
-        TORCH_VERSION_AT_LEAST_2_8, reason="Failing in CI"
+        TORCH_VERSION_AT_LEAST_2_7, reason="Failing in CI"
     )  # TODO: fix this
     @parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
     def test_nf4_bnb_linear(self, dtype: torch.dtype):
 
@@ -28,14 +28,15 @@
 from torchao.quantization import quantize_
 from torchao.quantization.utils import compute_error
 from torchao.utils import (
+    TORCH_VERSION_AT_LEAST_2_7,
     TORCH_VERSION_AT_LEAST_2_8,
     is_sm_at_least_89,
     is_sm_at_least_100,
 )
 
 torch.manual_seed(2)
 
-if not TORCH_VERSION_AT_LEAST_2_8:
+if not TORCH_VERSION_AT_LEAST_2_7:
     pytest.skip("Unsupported PyTorch version", allow_module_level=True)
 
 
@@ -222,6 +223,8 @@ def test_linear_compile(hp_dtype, recipe_name, bias, use_fp8_dim1_cast_triton_ke
             pytest.skip("CUDA capability >= 8.9 required for float8 in triton")
 
     if recipe_name in ["mxfp8_cublas", "mxfp8_cutlass", "mxfp4_cutlass"]:
+        if not TORCH_VERSION_AT_LEAST_2_8:
+            pytest.skip("torch.compile requires PyTorch 2.8+")
         if not is_sm_at_least_100():
             pytest.skip("CUDA capability >= 10.0 required for MX gemms")
 
@@ -308,6 +311,9 @@ def test_inference_linear(elem_dtype, bias, input_shape):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+@pytest.mark.skipif(
+    not TORCH_VERSION_AT_LEAST_2_8, reason="torch.compile requires PyTorch 2.8+"
+)
 @pytest.mark.parametrize("elem_dtype", SUPPORTED_ELEM_DTYPES)
 def test_inference_compile_simple(elem_dtype):
     """
 
@@ -10,9 +10,9 @@
 from torchao.ops import mx_fp4_bf16, mx_fp8_bf16
 from torchao.prototype.mx_formats.mx_tensor import DTYPE_FP4, MXTensor
 from torchao.prototype.mx_formats.utils import to_blocked
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_8, is_sm_at_least_100
+from torchao.utils import TORCH_VERSION_AT_LEAST_2_7, is_sm_at_least_100
 
-if not TORCH_VERSION_AT_LEAST_2_8:
+if not TORCH_VERSION_AT_LEAST_2_7:
     pytest.skip("Unsupported PyTorch version", allow_module_level=True)
 
 
 
@@ -28,8 +28,10 @@
     _to_2d_jagged_float8_tensor_colwise,
     _to_2d_jagged_float8_tensor_rowwise,
 )
+from torchao.testing.utils import skip_if_rocm
 
 
+@skip_if_rocm("ROCm enablement in progress")
 @pytest.mark.parametrize("round_scales_to_power_of_2", [True, False])
 def test_row_major_with_jagged_rowwise_scales(round_scales_to_power_of_2: bool):
     # tests case where rowwise scales are computed for multiple distinct subtensors,
@@ -57,6 +59,7 @@ def test_row_major_with_jagged_rowwise_scales(round_scales_to_power_of_2: bool):
     assert not _is_column_major(kernel_fp8_data), "fp8 data is not row major"
 
 
+@skip_if_rocm("ROCm enablement in progress")
 @pytest.mark.parametrize("round_scales_to_power_of_2", [True, False])
 def test_column_major_with_jagged_colwise_scales(round_scales_to_power_of_2: bool):
     # tests case where colwise scales are computed for multiple distinct subtensors,
 
@@ -29,8 +29,10 @@
 from torchao.prototype.scaled_grouped_mm.scaled_grouped_mm import (
     _scaled_grouped_mm,
 )
+from torchao.testing.utils import skip_if_rocm
 
 
+@skip_if_rocm("ROCm enablement in progress")
 def test_valid_scaled_grouped_mm_2d_3d():
     out_dtype = torch.bfloat16
     device = "cuda"
Original file line number	Diff line number	Diff line change
`@@ -29,8 +29,10 @@`
`29`	`29`	`from torchao.prototype.scaled_grouped_mm.scaled_grouped_mm import (`
`30`	`30`	`_scaled_grouped_mm,`
`31`	`31`	`)`
	`32`	`+from torchao.testing.utils import skip_if_rocm`
`32`	`33`
`33`	`34`
	`35`	`+@skip_if_rocm("ROCm enablement in progress")`
`34`	`36`	`def test_valid_scaled_grouped_mm_2d_3d():`
`35`	`37`	`out_dtype = torch.bfloat16`
`36`	`38`	`device = "cuda"`