diff --git a/tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml b/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml similarity index 100% rename from tests/llmcompressor/transformers/compression/decompression_configs_skipped/w8a8.yaml rename to tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml b/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml similarity index 100% rename from tests/llmcompressor/transformers/compression/run_compressed_configs_skipped/w8a8.yaml rename to tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml diff --git a/tests/llmcompressor/transformers/compression/test_run_compressed.py b/tests/llmcompressor/transformers/compression/test_run_compressed.py index 4be243701..798a436c8 100644 --- a/tests/llmcompressor/transformers/compression/test_run_compressed.py +++ b/tests/llmcompressor/transformers/compression/test_run_compressed.py @@ -84,9 +84,17 @@ def test_compressed_matches_decompressed(self): def tearDownClass(cls): if os.path.isdir(cls.test_dir): shutil.rmtree(cls.test_dir) + + if hasattr(cls, "decompressed_model") and cls.decompressed_model is not None: + cls.decompressed_model.cpu() + if hasattr(cls, "uncompressed_model") and cls.uncompressed_model is not None: + cls.uncompressed_model.cpu() del cls.decompressed_model del cls.uncompressed_model + del cls.tokenizer + torch.cuda.empty_cache() + torch.cuda.synchronize() @requires_gpu @@ -168,6 +176,14 @@ def test_compressed_matches_decompressed__hf_quantizer(self): def tearDownClass(cls): if os.path.isdir(cls.test_dir): shutil.rmtree(cls.test_dir) + + if hasattr(cls, "decompressed_model") and cls.decompressed_model is not None: + cls.decompressed_model.cpu() + if hasattr(cls, "compressed_model") and cls.compressed_model is not None: + cls.compressed_model.cpu() del cls.decompressed_model del cls.compressed_model + del cls.tokenizer + torch.cuda.empty_cache() + torch.cuda.synchronize()