From 824ca83a77e0bef2af6d4eb8fcb5d8707a49d44f Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Fri, 29 Aug 2025 11:23:34 -0400 Subject: [PATCH 01/12] recover skipped tests Signed-off-by: shanjiaz --- .../compression/decompression_configs_skipped/w8a8.yaml | 4 ---- .../compression/run_compressed_configs_skipped/w8a8.yaml | 4 ---- 2 files changed, 8 deletions(-) delete mode 100644 tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml delete mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml b/tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml deleted file mode 100644 index b5a846cbc..000000000 --- a/tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "commit" -test_type: "regression" -compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed" -skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml deleted file mode 100644 index dd2134011..000000000 --- a/tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "nightly" -test_type: "regression" -compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed -uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From 5e028c76ed2760dc2bd9a811e7c77c5d924d3e07 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Fri, 29 Aug 2025 11:26:01 -0400 Subject: [PATCH 02/12] move to right folder --- .../transformers/compression/decompression_configs/w8a8.yaml | 4 ++++ .../transformers/compression/run_compressed_configs/w8a8.yaml | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml create mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml b/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml new file mode 100644 index 000000000..b5a846cbc --- /dev/null +++ b/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml @@ -0,0 +1,4 @@ +cadence: "commit" +test_type: "regression" +compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed" +skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml new file mode 100644 index 000000000..dd2134011 --- /dev/null +++ b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml @@ -0,0 +1,4 @@ +cadence: "nightly" +test_type: "regression" +compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed +uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From b03072f93b49c6e43f1df03eb292873221d377f3 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Fri, 29 Aug 2025 11:23:34 -0400 Subject: [PATCH 03/12] recover skipped tests Signed-off-by: shanjiaz --- .../compression/decompression_configs_skipped/w8a8.yaml | 4 ---- .../compression/run_compressed_configs_skipped/w8a8.yaml | 4 ---- 2 files changed, 8 deletions(-) delete mode 100644 tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml delete mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml b/tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml deleted file mode 100644 index b5a846cbc..000000000 --- a/tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "commit" -test_type: "regression" -compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed" -skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml deleted file mode 100644 index dd2134011..000000000 --- a/tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "nightly" -test_type: "regression" -compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed -uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From 4fe5ae105d2d7f9f0e7170d8e1ada7f45a41f8e6 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Fri, 29 Aug 2025 11:26:01 -0400 Subject: [PATCH 04/12] move to right folder --- .../transformers/compression/decompression_configs/w8a8.yaml | 4 ++++ .../transformers/compression/run_compressed_configs/w8a8.yaml | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml create mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml b/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml new file mode 100644 index 000000000..b5a846cbc --- /dev/null +++ b/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml @@ -0,0 +1,4 @@ +cadence: "commit" +test_type: "regression" +compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed" +skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml new file mode 100644 index 000000000..dd2134011 --- /dev/null +++ b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml @@ -0,0 +1,4 @@ +cadence: "nightly" +test_type: "regression" +compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed +uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From c48f5e3f025ded265fa9a47a706e2f001a77f29f Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Thu, 4 Sep 2025 18:13:15 -0400 Subject: [PATCH 05/12] test Signed-off-by: shanjiaz --- .../compression/run_compressed_configs/w8a16.yaml | 4 ---- .../transformers/compression/run_compressed_configs/w8a8.yaml | 4 ---- 2 files changed, 8 deletions(-) delete mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml delete mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml deleted file mode 100644 index 6521d66ec..000000000 --- a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "commit" -test_type: "regression" -compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-compressed -uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-uncompressed \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml deleted file mode 100644 index dd2134011..000000000 --- a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "nightly" -test_type: "regression" -compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed -uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From 5ff57085da7f39e166dd1640e23f9c30a472b918 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Thu, 4 Sep 2025 18:13:27 -0400 Subject: [PATCH 06/12] test --- tests/llmcompressor/transformers/w8a16.yaml | 4 ++++ tests/llmcompressor/transformers/w8a8.yaml | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/llmcompressor/transformers/w8a16.yaml create mode 100644 tests/llmcompressor/transformers/w8a8.yaml diff --git a/tests/llmcompressor/transformers/w8a16.yaml b/tests/llmcompressor/transformers/w8a16.yaml new file mode 100644 index 000000000..6521d66ec --- /dev/null +++ b/tests/llmcompressor/transformers/w8a16.yaml @@ -0,0 +1,4 @@ +cadence: "commit" +test_type: "regression" +compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-compressed +uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-uncompressed \ No newline at end of file diff --git a/tests/llmcompressor/transformers/w8a8.yaml b/tests/llmcompressor/transformers/w8a8.yaml new file mode 100644 index 000000000..dd2134011 --- /dev/null +++ b/tests/llmcompressor/transformers/w8a8.yaml @@ -0,0 +1,4 @@ +cadence: "nightly" +test_type: "regression" +compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed +uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From e7898f3d2ab2033d0722440a1e047a07299df2ff Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Tue, 9 Sep 2025 17:11:29 -0400 Subject: [PATCH 07/12] test memory management --- .../compression/test_run_compressed.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/llmcompressor/transformers/compression/test_run_compressed.py b/tests/llmcompressor/transformers/compression/test_run_compressed.py index 4be243701..d6faa9dfb 100644 --- a/tests/llmcompressor/transformers/compression/test_run_compressed.py +++ b/tests/llmcompressor/transformers/compression/test_run_compressed.py @@ -15,6 +15,13 @@ "tests/llmcompressor/transformers/compression/run_compressed_configs" ) +# Memory management functions + +def cleanup_global_memory(): + """Force cleanup of all GPU memory between test classes.""" + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.synchronize() @requires_gpu @parameterized_class(parse_params(COMPRESSED_LINEAR_CONFIG_DIR)) @@ -34,6 +41,8 @@ class Test_Decompressed_Linear_Uncompressed_Linear(unittest.TestCase): @classmethod def setUpClass(cls): + # Clean up any leftover memory from previous tests + cleanup_global_memory() cls.test_dir = tempfile.mkdtemp() quantization_config = CompressedTensorsConfig(run_compressed=False) @@ -86,7 +95,10 @@ def tearDownClass(cls): shutil.rmtree(cls.test_dir) del cls.decompressed_model del cls.uncompressed_model + del cls.tokenizer + torch.cuda.empty_cache() + torch.cuda.synchronize() @requires_gpu @@ -107,6 +119,8 @@ class Test_Compressed_CompressedLinear_Decompressed_Linear(unittest.TestCase): @classmethod def setUpClass(cls): + # Clean up any leftover memory from previous tests + cleanup_global_memory() cls.test_dir = tempfile.mkdtemp() # Should have CompressedLinear modules @@ -170,4 +184,4 @@ def tearDownClass(cls): shutil.rmtree(cls.test_dir) del cls.decompressed_model del cls.compressed_model - torch.cuda.empty_cache() + del cls.tokenizer From 044e65b81e41ae80fb1021caf3a664b4cfdc52c1 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Tue, 9 Sep 2025 19:30:29 -0400 Subject: [PATCH 08/12] quality Signed-off-by: shanjiaz --- .../transformers/compression/test_run_compressed.py | 2 ++ tests/llmcompressor/transformers/w8a16.yaml | 4 ---- tests/llmcompressor/transformers/w8a8.yaml | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) delete mode 100644 tests/llmcompressor/transformers/w8a16.yaml delete mode 100644 tests/llmcompressor/transformers/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/test_run_compressed.py b/tests/llmcompressor/transformers/compression/test_run_compressed.py index d6faa9dfb..acbc4b8bc 100644 --- a/tests/llmcompressor/transformers/compression/test_run_compressed.py +++ b/tests/llmcompressor/transformers/compression/test_run_compressed.py @@ -17,12 +17,14 @@ # Memory management functions + def cleanup_global_memory(): """Force cleanup of all GPU memory between test classes.""" if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.synchronize() + @requires_gpu @parameterized_class(parse_params(COMPRESSED_LINEAR_CONFIG_DIR)) class Test_Decompressed_Linear_Uncompressed_Linear(unittest.TestCase): diff --git a/tests/llmcompressor/transformers/w8a16.yaml b/tests/llmcompressor/transformers/w8a16.yaml deleted file mode 100644 index 6521d66ec..000000000 --- a/tests/llmcompressor/transformers/w8a16.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "commit" -test_type: "regression" -compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-compressed -uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-uncompressed \ No newline at end of file diff --git a/tests/llmcompressor/transformers/w8a8.yaml b/tests/llmcompressor/transformers/w8a8.yaml deleted file mode 100644 index dd2134011..000000000 --- a/tests/llmcompressor/transformers/w8a8.yaml +++ /dev/null @@ -1,4 +0,0 @@ -cadence: "nightly" -test_type: "regression" -compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed -uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From 87b3ddf85d750cd74fa0b6cb3ce6265dd417a159 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Tue, 9 Sep 2025 19:30:38 -0400 Subject: [PATCH 09/12] quality --- .../compression/run_compressed_configs/w8a16.yaml | 4 ++++ .../transformers/compression/run_compressed_configs/w8a8.yaml | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml create mode 100644 tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml new file mode 100644 index 000000000..6521d66ec --- /dev/null +++ b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml @@ -0,0 +1,4 @@ +cadence: "commit" +test_type: "regression" +compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-compressed +uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A16-G128-uncompressed \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml new file mode 100644 index 000000000..dd2134011 --- /dev/null +++ b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml @@ -0,0 +1,4 @@ +cadence: "nightly" +test_type: "regression" +compressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-compressed +uncompressed_model_stub: nm-testing/TinyLlama-1.1B-Chat-v1.0-W8A8-Dynamic-Per-Token-uncompressed \ No newline at end of file From ea076f34ce6e0f8f7805be9e71a6666362bd4e58 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Wed, 10 Sep 2025 13:43:50 -0400 Subject: [PATCH 10/12] added memory handling --- .../compression/test_run_compressed.py | 74 +++++++++---------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/tests/llmcompressor/transformers/compression/test_run_compressed.py b/tests/llmcompressor/transformers/compression/test_run_compressed.py index acbc4b8bc..30eeea136 100644 --- a/tests/llmcompressor/transformers/compression/test_run_compressed.py +++ b/tests/llmcompressor/transformers/compression/test_run_compressed.py @@ -15,15 +15,6 @@ "tests/llmcompressor/transformers/compression/run_compressed_configs" ) -# Memory management functions - - -def cleanup_global_memory(): - """Force cleanup of all GPU memory between test classes.""" - if torch.cuda.is_available(): - torch.cuda.empty_cache() - torch.cuda.synchronize() - @requires_gpu @parameterized_class(parse_params(COMPRESSED_LINEAR_CONFIG_DIR)) @@ -44,29 +35,30 @@ class Test_Decompressed_Linear_Uncompressed_Linear(unittest.TestCase): @classmethod def setUpClass(cls): # Clean up any leftover memory from previous tests - cleanup_global_memory() + # cleanup_global_memory() cls.test_dir = tempfile.mkdtemp() quantization_config = CompressedTensorsConfig(run_compressed=False) - # Decompressed using HFQuantizer - # Linear foward - cls.decompressed_model = AutoModelForCausalLM.from_pretrained( - cls.compressed_model_stub, - torch_dtype="auto", - device_map="auto", - quantization_config=quantization_config, - ) - - # Load model as is at the uncompressed state - # Linear forward - cls.uncompressed_model = AutoModelForCausalLM.from_pretrained( - cls.uncompressed_model_stub, - torch_dtype=cls.decompressed_model.dtype, - device_map=cls.decompressed_model.device, - ) - - cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub) + with torch.no_grad(): + # Decompressed using HFQuantizer + # Linear foward + cls.decompressed_model = AutoModelForCausalLM.from_pretrained( + cls.compressed_model_stub, + torch_dtype="auto", + device_map="auto", + quantization_config=quantization_config, + ) + + # Load model as is at the uncompressed state + # Linear forward + cls.uncompressed_model = AutoModelForCausalLM.from_pretrained( + cls.uncompressed_model_stub, + torch_dtype=cls.decompressed_model.dtype, + device_map=cls.decompressed_model.device, + ) + + cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub) def test_compressed_matches_decompressed(self): SAMPLE_INPUT = [ @@ -95,6 +87,11 @@ def test_compressed_matches_decompressed(self): def tearDownClass(cls): if os.path.isdir(cls.test_dir): shutil.rmtree(cls.test_dir) + + if hasattr(cls, "decompressed_model") and cls.decompressed_model is not None: + cls.decompressed_model.cpu() + if hasattr(cls, "uncompressed_model") and cls.uncompressed_model is not None: + cls.uncompressed_model.cpu() del cls.decompressed_model del cls.uncompressed_model del cls.tokenizer @@ -121,20 +118,12 @@ class Test_Compressed_CompressedLinear_Decompressed_Linear(unittest.TestCase): @classmethod def setUpClass(cls): - # Clean up any leftover memory from previous tests - cleanup_global_memory() cls.test_dir = tempfile.mkdtemp() - - # Should have CompressedLinear modules - # Compressed Linear forward cls.compressed_model = AutoModelForCausalLM.from_pretrained( cls.compressed_model_stub, torch_dtype="auto", device_map="auto", ) - - # Should just be linear modules - # Linear forward quantization_config = CompressedTensorsConfig(run_compressed=False) cls.decompressed_model = AutoModelForCausalLM.from_pretrained( cls.compressed_model_stub, @@ -142,7 +131,6 @@ def setUpClass(cls): device_map=cls.compressed_model.device, quantization_config=quantization_config, ) - cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub) def test_compressed_linear_modules_exist(self): @@ -173,17 +161,25 @@ def test_compressed_matches_decompressed__hf_quantizer(self): ) inputs = inputs.to(compressed_device) - compressed_model_out = self.compressed_model.generate(**inputs, max_length=50) # Compare outputs for each input for idx in range(len(SAMPLE_INPUT)): - torch.equal(compressed_model_out[idx], decompressed_model_out[idx]) + equal = torch.equal(compressed_model_out[idx], decompressed_model_out[idx]) + assert equal @classmethod def tearDownClass(cls): if os.path.isdir(cls.test_dir): shutil.rmtree(cls.test_dir) + + if hasattr(cls, "decompressed_model") and cls.decompressed_model is not None: + cls.decompressed_model.cpu() + if hasattr(cls, "compressed_model") and cls.compressed_model is not None: + cls.compressed_model.cpu() del cls.decompressed_model del cls.compressed_model del cls.tokenizer + + torch.cuda.empty_cache() + torch.cuda.synchronize() From 2c1f3607da1c1d8862fc6a73a219f4cd474022e1 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Fri, 12 Sep 2025 11:01:42 -0400 Subject: [PATCH 11/12] fixed style Signed-off-by: shanjiaz --- .../compression/test_run_compressed.py | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/tests/llmcompressor/transformers/compression/test_run_compressed.py b/tests/llmcompressor/transformers/compression/test_run_compressed.py index 30eeea136..9c173501f 100644 --- a/tests/llmcompressor/transformers/compression/test_run_compressed.py +++ b/tests/llmcompressor/transformers/compression/test_run_compressed.py @@ -34,31 +34,28 @@ class Test_Decompressed_Linear_Uncompressed_Linear(unittest.TestCase): @classmethod def setUpClass(cls): - # Clean up any leftover memory from previous tests - # cleanup_global_memory() cls.test_dir = tempfile.mkdtemp() quantization_config = CompressedTensorsConfig(run_compressed=False) - with torch.no_grad(): - # Decompressed using HFQuantizer - # Linear foward - cls.decompressed_model = AutoModelForCausalLM.from_pretrained( - cls.compressed_model_stub, - torch_dtype="auto", - device_map="auto", - quantization_config=quantization_config, - ) - - # Load model as is at the uncompressed state - # Linear forward - cls.uncompressed_model = AutoModelForCausalLM.from_pretrained( - cls.uncompressed_model_stub, - torch_dtype=cls.decompressed_model.dtype, - device_map=cls.decompressed_model.device, - ) - - cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub) + # Decompressed using HFQuantizer + # Linear foward + cls.decompressed_model = AutoModelForCausalLM.from_pretrained( + cls.compressed_model_stub, + torch_dtype="auto", + device_map="auto", + quantization_config=quantization_config, + ) + + # Load model as is at the uncompressed state + # Linear forward + cls.uncompressed_model = AutoModelForCausalLM.from_pretrained( + cls.uncompressed_model_stub, + torch_dtype=cls.decompressed_model.dtype, + device_map=cls.decompressed_model.device, + ) + + cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub) def test_compressed_matches_decompressed(self): SAMPLE_INPUT = [ @@ -119,11 +116,17 @@ class Test_Compressed_CompressedLinear_Decompressed_Linear(unittest.TestCase): @classmethod def setUpClass(cls): cls.test_dir = tempfile.mkdtemp() + + # Should have CompressedLinear modules + # Compressed Linear forward cls.compressed_model = AutoModelForCausalLM.from_pretrained( cls.compressed_model_stub, torch_dtype="auto", device_map="auto", ) + + # Should just be linear modules + # Linear forward quantization_config = CompressedTensorsConfig(run_compressed=False) cls.decompressed_model = AutoModelForCausalLM.from_pretrained( cls.compressed_model_stub, @@ -131,6 +134,7 @@ def setUpClass(cls): device_map=cls.compressed_model.device, quantization_config=quantization_config, ) + cls.tokenizer = AutoTokenizer.from_pretrained(cls.compressed_model_stub) def test_compressed_linear_modules_exist(self): @@ -165,8 +169,7 @@ def test_compressed_matches_decompressed__hf_quantizer(self): # Compare outputs for each input for idx in range(len(SAMPLE_INPUT)): - equal = torch.equal(compressed_model_out[idx], decompressed_model_out[idx]) - assert equal + assert torch.equal(compressed_model_out[idx], decompressed_model_out[idx]) @classmethod def tearDownClass(cls): From 0dbee7e47aa53178a2ac78fb1caff5bb9ff3e0e2 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Mon, 15 Sep 2025 09:44:35 -0400 Subject: [PATCH 12/12] min diff Signed-off-by: shanjiaz --- .../transformers/compression/test_run_compressed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/llmcompressor/transformers/compression/test_run_compressed.py b/tests/llmcompressor/transformers/compression/test_run_compressed.py index 9c173501f..798a436c8 100644 --- a/tests/llmcompressor/transformers/compression/test_run_compressed.py +++ b/tests/llmcompressor/transformers/compression/test_run_compressed.py @@ -165,11 +165,12 @@ def test_compressed_matches_decompressed__hf_quantizer(self): ) inputs = inputs.to(compressed_device) + compressed_model_out = self.compressed_model.generate(**inputs, max_length=50) # Compare outputs for each input for idx in range(len(SAMPLE_INPUT)): - assert torch.equal(compressed_model_out[idx], decompressed_model_out[idx]) + torch.equal(compressed_model_out[idx], decompressed_model_out[idx]) @classmethod def tearDownClass(cls):