From 72d3000d2e4b368e7393ba4ba19b2e43121ebf23 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 21 Jan 2025 11:16:20 +0530 Subject: [PATCH 01/19] add precomputation tests --- tests/trainers/test_trainers_common.py | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/trainers/test_trainers_common.py diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py new file mode 100644 index 00000000..f92b2be4 --- /dev/null +++ b/tests/trainers/test_trainers_common.py @@ -0,0 +1,29 @@ +from finetrainers import Trainer +from huggingface_hub import snapshot_download +import tempfile +import os + +class TrainerTestMixin: + def get_training_args(self): + raise NotImplementedError + + def download_dataset_txt_format(self, cache_dir): + path = snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir) + return path + + def test_precomputation_txt_format(self): + # Here we assume the dataset is formatted like: + # https://huggingface.co/datasets/Wild-Heart/Disney-VideoGeneration-Dataset/tree/main + training_args = self.get_training_args() + + with tempfile.TemporaryDirectory() as tmpdir: + training_args.data_root = self.download_dataset_txt_format(cache_dir=tmpdir) + trainer = Trainer(training_args) + training_args = trainer.args + + trainer.prepare_dataset() + trainer.prepare_models() + trainer.prepare_precomputations() + + precomputed_dir = os.path.join(training_args.data_root, f"{training_args.pretrained_model_name_or_path}_precomputed") + assert os.path.exists(precomputed_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir()}" \ No newline at end of file From d91476da76b998c07ec2d1404da79318fcce9f4f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 21 Jan 2025 11:24:44 +0530 Subject: [PATCH 02/19] update --- tests/trainers/__init__.py | 0 tests/trainers/cogvideox/__init__.py | 0 tests/trainers/cogvideox/test_cogvideox.py | 12 ++++++++++++ 3 files changed, 12 insertions(+) create mode 100644 tests/trainers/__init__.py create mode 100644 tests/trainers/cogvideox/__init__.py create mode 100644 tests/trainers/cogvideox/test_cogvideox.py diff --git a/tests/trainers/__init__.py b/tests/trainers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/trainers/cogvideox/__init__.py b/tests/trainers/cogvideox/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py new file mode 100644 index 00000000..fd4439a0 --- /dev/null +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -0,0 +1,12 @@ +from ..test_trainers_common import TrainerTestMixin +from finetrainers import parse_arguments +import unittest + + +class CogVideoXTester(unittest.TestCase, TrainerTestMixin): + def get_training_args(self): + args = parse_arguments() + args.training_type = "lora" + args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox" + args.video_resolution_buckets = "9x16x16" + return args \ No newline at end of file From 9ba2aff07e415cff3a3ecd927d3614daff137ae7 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 21 Jan 2025 11:45:37 +0530 Subject: [PATCH 03/19] updates --- tests/trainers/cogvideox/test_cogvideox.py | 7 +++++++ tests/trainers/test_trainers_common.py | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py index fd4439a0..e8aa44e4 100644 --- a/tests/trainers/cogvideox/test_cogvideox.py +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -1,3 +1,10 @@ +import sys +from pathlib import Path + +current_file = Path(__file__).resolve() +root_dir = current_file.parents[3] +sys.path.append(str(root_dir)) + from ..test_trainers_common import TrainerTestMixin from finetrainers import parse_arguments import unittest diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index f92b2be4..0cdbcd27 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -1,3 +1,14 @@ +import sys +from pathlib import Path + +current_file = Path(__file__).resolve() +root_dir = current_file.parents[1] +sys.path.append(str(root_dir)) + +# import os +# current_dir = os.path.dirname(os.path.abspath(__file__)) +# print(f"{current_dir=}") + from finetrainers import Trainer from huggingface_hub import snapshot_download import tempfile From 348fe817db658e280ee3c40200702d7f629bdf5e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 21 Jan 2025 13:53:04 +0530 Subject: [PATCH 04/19] changes --- finetrainers/args.py | 4 ++- finetrainers/models/cogvideox/lora.py | 7 +++-- finetrainers/trainer.py | 12 +++++--- finetrainers/utils/memory_utils.py | 8 ++--- tests/trainers/cogvideox/test_cogvideox.py | 18 +++++++++-- tests/trainers/test_trainers_common.py | 35 ++++++++++++++++++---- 6 files changed, 64 insertions(+), 20 deletions(-) diff --git a/finetrainers/args.py b/finetrainers/args.py index 46cd04cc..343d6396 100644 --- a/finetrainers/args.py +++ b/finetrainers/args.py @@ -337,6 +337,7 @@ class Args: validation_every_n_steps: Optional[int] = None enable_model_cpu_offload: bool = False validation_frame_rate: int = 25 + do_not_run_validation: bool = False # Miscellaneous arguments tracker_name: str = "finetrainers" @@ -483,7 +484,8 @@ def parse_arguments() -> Args: def validate_args(args: Args): _validated_model_args(args) _validate_training_args(args) - _validate_validation_args(args) + if not args.do_not_run_validation: + _validate_validation_args(args) def _add_model_arguments(parser: argparse.ArgumentParser) -> None: diff --git a/finetrainers/models/cogvideox/lora.py b/finetrainers/models/cogvideox/lora.py index 65d86ee9..dfeda3be 100644 --- a/finetrainers/models/cogvideox/lora.py +++ b/finetrainers/models/cogvideox/lora.py @@ -3,7 +3,7 @@ import torch from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler, CogVideoXPipeline, CogVideoXTransformer3DModel from PIL import Image -from transformers import T5EncoderModel, T5Tokenizer +from transformers import T5EncoderModel, T5Tokenizer, AutoTokenizer from .utils import prepare_rotary_positional_embeddings @@ -15,7 +15,10 @@ def load_condition_models( cache_dir: Optional[str] = None, **kwargs, ): - tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir) + try: + tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir) + except: + tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir) text_encoder = T5EncoderModel.from_pretrained( model_id, subfolder="text_encoder", torch_dtype=text_encoder_dtype, revision=revision, cache_dir=cache_dir ) diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py index 0839db79..bdbddccf 100644 --- a/finetrainers/trainer.py +++ b/finetrainers/trainer.py @@ -255,7 +255,8 @@ def collate_fn(batch): memory_statistics = get_memory_statistics() logger.info(f"Memory after precomputing conditions: {json.dumps(memory_statistics, indent=4)}") - torch.cuda.reset_peak_memory_stats(accelerator.device) + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats(accelerator.device) # Precompute latents latent_components = self.model_config["load_latent_models"](**self._get_load_components_kwargs()) @@ -302,7 +303,8 @@ def collate_fn(batch): memory_statistics = get_memory_statistics() logger.info(f"Memory after precomputing latents: {json.dumps(memory_statistics, indent=4)}") - torch.cuda.reset_peak_memory_stats(accelerator.device) + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats(accelerator.device) # Update dataloader to use precomputed conditions and latents self.dataloader = torch.utils.data.DataLoader( @@ -984,7 +986,8 @@ def validate(self, step: int, final_validation: bool = False) -> None: free_memory() memory_statistics = get_memory_statistics() logger.info(f"Memory after validation end: {json.dumps(memory_statistics, indent=4)}") - torch.cuda.reset_peak_memory_stats(accelerator.device) + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats(accelerator.device) if not final_validation: self.transformer.train() @@ -1107,7 +1110,8 @@ def _delete_components(self) -> None: self.vae = None self.scheduler = None free_memory() - torch.cuda.synchronize(self.state.accelerator.device) + if torch.cuda.is_available(): + torch.cuda.synchronize(self.state.accelerator.device) def _get_and_prepare_pipeline_for_validation(self, final_validation: bool = False) -> DiffusionPipeline: accelerator = self.state.accelerator diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py index d7616b19..1b492507 100644 --- a/finetrainers/utils/memory_utils.py +++ b/finetrainers/utils/memory_utils.py @@ -28,10 +28,10 @@ def get_memory_statistics(precision: int = 3) -> Dict[str, Any]: logger.warning("No CUDA, MPS, or ROCm device found. Memory statistics are not available.") return { - "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision), - "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision), - "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision), - "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision), + "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision) if memory_allocated else None, + "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision) if memory_reserved else None, + "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision) if max_memory_allocated else None, + "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision) if max_memory_reserved else None, } diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py index e8aa44e4..247da525 100644 --- a/tests/trainers/cogvideox/test_cogvideox.py +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -6,14 +6,26 @@ sys.path.append(str(root_dir)) from ..test_trainers_common import TrainerTestMixin -from finetrainers import parse_arguments +from typing import Tuple +from finetrainers import Args import unittest +# Copied for now. +def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]: + return tuple(map(int, resolution_bucket.split("x"))) + + class CogVideoXTester(unittest.TestCase, TrainerTestMixin): + model_name = "cogvideox" + def get_training_args(self): - args = parse_arguments() + args = Args() + args.model_name = self.model_name args.training_type = "lora" args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox" - args.video_resolution_buckets = "9x16x16" + args.data_root = "" # will be set from the tester method. + args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")] + args.precompute_conditions = True + args.do_not_run_validation = True return args \ No newline at end of file diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index 0cdbcd27..f80f93b6 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -5,16 +5,18 @@ root_dir = current_file.parents[1] sys.path.append(str(root_dir)) -# import os -# current_dir = os.path.dirname(os.path.abspath(__file__)) -# print(f"{current_dir=}") from finetrainers import Trainer +from finetrainers.utils.file_utils import string_to_filename +from finetrainers.constants import PRECOMPUTED_DIR_NAME, PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME from huggingface_hub import snapshot_download import tempfile +import glob import os class TrainerTestMixin: + model_name = None + def get_training_args(self): raise NotImplementedError @@ -28,13 +30,34 @@ def test_precomputation_txt_format(self): training_args = self.get_training_args() with tempfile.TemporaryDirectory() as tmpdir: - training_args.data_root = self.download_dataset_txt_format(cache_dir=tmpdir) + # Prepare remaining args. + training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) + + training_args.video_column = "videos.txt" + training_args.caption_column = "prompt.txt" + with open(f"{training_args.data_root}/{training_args.video_column}", "r", encoding="utf-8") as file: + video_paths = [training_args.data_root.joinpath(line.strip()) for line in file.readlines() if len(line.strip()) > 0] + + # Initialize trainer. + training_args.output_dir = tmpdir trainer = Trainer(training_args) training_args = trainer.args + # Perform precomputations. trainer.prepare_dataset() trainer.prepare_models() trainer.prepare_precomputations() - precomputed_dir = os.path.join(training_args.data_root, f"{training_args.pretrained_model_name_or_path}_precomputed") - assert os.path.exists(precomputed_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir()}" \ No newline at end of file + cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path) + precomputation_dir = ( + Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}" + ) + + # Checks. + conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME + latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME + assert os.path.exists(precomputation_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n" + assert os.path.exists(conditions_dir), f"conditions dir ({str(conditions_dir)}) doesn't exist" + assert os.path.exists(latents_dir), f"latents dir ({str(latents_dir)}) doesn't exist" + assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")]) + assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")]) \ No newline at end of file From 3dda221b72c0df22c733bbb86fbace07c70a4be1 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 21 Jan 2025 13:58:48 +0530 Subject: [PATCH 05/19] quality --- finetrainers/models/cogvideox/lora.py | 12 ++++-- finetrainers/utils/memory_utils.py | 12 ++++-- tests/trainers/cogvideox/test_cogvideox.py | 19 ++++++---- tests/trainers/test_trainers_common.py | 43 +++++++++++++--------- 4 files changed, 54 insertions(+), 32 deletions(-) diff --git a/finetrainers/models/cogvideox/lora.py b/finetrainers/models/cogvideox/lora.py index dfeda3be..d8adc2aa 100644 --- a/finetrainers/models/cogvideox/lora.py +++ b/finetrainers/models/cogvideox/lora.py @@ -3,7 +3,7 @@ import torch from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler, CogVideoXPipeline, CogVideoXTransformer3DModel from PIL import Image -from transformers import T5EncoderModel, T5Tokenizer, AutoTokenizer +from transformers import AutoTokenizer, T5EncoderModel, T5Tokenizer from .utils import prepare_rotary_positional_embeddings @@ -16,9 +16,13 @@ def load_condition_models( **kwargs, ): try: - tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir) - except: - tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir) + tokenizer = T5Tokenizer.from_pretrained( + model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir + ) + except: # noqa + tokenizer = AutoTokenizer.from_pretrained( + model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir + ) text_encoder = T5EncoderModel.from_pretrained( model_id, subfolder="text_encoder", torch_dtype=text_encoder_dtype, revision=revision, cache_dir=cache_dir ) diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py index 1b492507..906efba4 100644 --- a/finetrainers/utils/memory_utils.py +++ b/finetrainers/utils/memory_utils.py @@ -28,10 +28,16 @@ def get_memory_statistics(precision: int = 3) -> Dict[str, Any]: logger.warning("No CUDA, MPS, or ROCm device found. Memory statistics are not available.") return { - "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision) if memory_allocated else None, + "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision) + if memory_allocated + else None, "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision) if memory_reserved else None, - "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision) if max_memory_allocated else None, - "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision) if max_memory_reserved else None, + "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision) + if max_memory_allocated + else None, + "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision) + if max_memory_reserved + else None, } diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py index 247da525..390e0968 100644 --- a/tests/trainers/cogvideox/test_cogvideox.py +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -1,21 +1,24 @@ -import sys +import sys from pathlib import Path + current_file = Path(__file__).resolve() root_dir = current_file.parents[3] sys.path.append(str(root_dir)) -from ..test_trainers_common import TrainerTestMixin -from typing import Tuple -from finetrainers import Args -import unittest +import unittest # noqa +from typing import Tuple # noqa + +from finetrainers import Args # noqa + +from ..test_trainers_common import TrainerTestMixin # noqa + # Copied for now. def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]: return tuple(map(int, resolution_bucket.split("x"))) - class CogVideoXTester(unittest.TestCase, TrainerTestMixin): model_name = "cogvideox" @@ -24,8 +27,8 @@ def get_training_args(self): args.model_name = self.model_name args.training_type = "lora" args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox" - args.data_root = "" # will be set from the tester method. + args.data_root = "" # will be set from the tester method. args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")] args.precompute_conditions = True args.do_not_run_validation = True - return args \ No newline at end of file + return args diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index f80f93b6..0b59e8ff 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -1,34 +1,37 @@ -import sys +import sys from pathlib import Path + current_file = Path(__file__).resolve() root_dir = current_file.parents[1] sys.path.append(str(root_dir)) -from finetrainers import Trainer -from finetrainers.utils.file_utils import string_to_filename -from finetrainers.constants import PRECOMPUTED_DIR_NAME, PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME -from huggingface_hub import snapshot_download -import tempfile -import glob -import os +import os # noqa +import tempfile # noqa + +from huggingface_hub import snapshot_download # noqa + +from finetrainers import Trainer # noqa +from finetrainers.constants import PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME # noqa +from finetrainers.utils.file_utils import string_to_filename # noqa + class TrainerTestMixin: - model_name = None + model_name = None def get_training_args(self): raise NotImplementedError - + def download_dataset_txt_format(self, cache_dir): path = snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir) return path - + def test_precomputation_txt_format(self): # Here we assume the dataset is formatted like: # https://huggingface.co/datasets/Wild-Heart/Disney-VideoGeneration-Dataset/tree/main training_args = self.get_training_args() - + with tempfile.TemporaryDirectory() as tmpdir: # Prepare remaining args. training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) @@ -36,7 +39,11 @@ def test_precomputation_txt_format(self): training_args.video_column = "videos.txt" training_args.caption_column = "prompt.txt" with open(f"{training_args.data_root}/{training_args.video_column}", "r", encoding="utf-8") as file: - video_paths = [training_args.data_root.joinpath(line.strip()) for line in file.readlines() if len(line.strip()) > 0] + video_paths = [ + training_args.data_root.joinpath(line.strip()) + for line in file.readlines() + if len(line.strip()) > 0 + ] # Initialize trainer. training_args.output_dir = tmpdir @@ -52,12 +59,14 @@ def test_precomputation_txt_format(self): precomputation_dir = ( Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}" ) - + # Checks. conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME - assert os.path.exists(precomputation_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n" + assert os.path.exists( + precomputation_dir + ), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n" assert os.path.exists(conditions_dir), f"conditions dir ({str(conditions_dir)}) doesn't exist" assert os.path.exists(latents_dir), f"latents dir ({str(latents_dir)}) doesn't exist" - assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")]) - assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")]) \ No newline at end of file + assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")]) # noqa + assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")]) # noqa From 8841dbd388c95dcadba5993aa9f059cc86f4ff88 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 24 Jan 2025 13:05:07 +0530 Subject: [PATCH 06/19] remove do_not_run_validation. --- finetrainers/args.py | 4 +--- tests/trainers/cogvideox/test_cogvideox.py | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/finetrainers/args.py b/finetrainers/args.py index 343d6396..46cd04cc 100644 --- a/finetrainers/args.py +++ b/finetrainers/args.py @@ -337,7 +337,6 @@ class Args: validation_every_n_steps: Optional[int] = None enable_model_cpu_offload: bool = False validation_frame_rate: int = 25 - do_not_run_validation: bool = False # Miscellaneous arguments tracker_name: str = "finetrainers" @@ -484,8 +483,7 @@ def parse_arguments() -> Args: def validate_args(args: Args): _validated_model_args(args) _validate_training_args(args) - if not args.do_not_run_validation: - _validate_validation_args(args) + _validate_validation_args(args) def _add_model_arguments(parser: argparse.ArgumentParser) -> None: diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py index 390e0968..0ef7ea02 100644 --- a/tests/trainers/cogvideox/test_cogvideox.py +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -30,5 +30,7 @@ def get_training_args(self): args.data_root = "" # will be set from the tester method. args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")] args.precompute_conditions = True - args.do_not_run_validation = True + args.validation_prompts = [] + args.validation_heights = [] + args.validation_widths = [] return args From ee368a160bff109ce44693c2d11e6248f081c20f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 24 Jan 2025 13:21:52 +0530 Subject: [PATCH 07/19] make get_memory_stat method leaner. --- finetrainers/utils/memory_utils.py | 36 ++++++++++++++---------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py index 906efba4..1fa5962d 100644 --- a/finetrainers/utils/memory_utils.py +++ b/finetrainers/utils/memory_utils.py @@ -9,35 +9,33 @@ def get_memory_statistics(precision: int = 3) -> Dict[str, Any]: - memory_allocated = None - memory_reserved = None - max_memory_allocated = None - max_memory_reserved = None + memory_stats = { + "memory_allocated": None, + "memory_reserved": None, + "max_memory_allocated": None, + "max_memory_reserved": None, + } if torch.cuda.is_available(): device = torch.cuda.current_device() - memory_allocated = torch.cuda.memory_allocated(device) - memory_reserved = torch.cuda.memory_reserved(device) - max_memory_allocated = torch.cuda.max_memory_allocated(device) - max_memory_reserved = torch.cuda.max_memory_reserved(device) + memory_stats.update( + { + "memory_allocated": torch.cuda.memory_allocated(device), + "memory_reserved": torch.cuda.memory_reserved(device), + "max_memory_allocated": torch.cuda.max_memory_allocated(device), + "max_memory_reserved": torch.cuda.max_memory_reserved(device), + } + ) elif torch.backends.mps.is_available(): - memory_allocated = torch.mps.current_allocated_memory() + memory_stats["memory_allocated"] = torch.mps.current_allocated_memory() else: logger.warning("No CUDA, MPS, or ROCm device found. Memory statistics are not available.") return { - "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision) - if memory_allocated - else None, - "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision) if memory_reserved else None, - "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision) - if max_memory_allocated - else None, - "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision) - if max_memory_reserved - else None, + key: (round(bytes_to_gigabytes(value), ndigits=precision) if value else None) + for key, value in memory_stats.items() } From d63fbcf6774ea3b4de788f8610f15da751b640df Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 24 Jan 2025 13:25:10 +0530 Subject: [PATCH 08/19] reset memory utils. --- finetrainers/trainer.py | 11 ++++------- finetrainers/utils/memory_utils.py | 8 ++++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py index bdbddccf..9b049190 100644 --- a/finetrainers/trainer.py +++ b/finetrainers/trainer.py @@ -54,7 +54,7 @@ ) from .utils.file_utils import string_to_filename from .utils.hub_utils import save_model_card -from .utils.memory_utils import free_memory, get_memory_statistics, make_contiguous +from .utils.memory_utils import free_memory, get_memory_statistics, make_contiguous, reset_memory_stats from .utils.model_utils import resolve_vae_cls_from_ckpt_path from .utils.optimizer_utils import get_optimizer from .utils.torch_utils import align_device_and_dtype, expand_tensor_dims, unwrap_model @@ -255,8 +255,7 @@ def collate_fn(batch): memory_statistics = get_memory_statistics() logger.info(f"Memory after precomputing conditions: {json.dumps(memory_statistics, indent=4)}") - if torch.cuda.is_available(): - torch.cuda.reset_peak_memory_stats(accelerator.device) + reset_memory_stats(accelerator.device) # Precompute latents latent_components = self.model_config["load_latent_models"](**self._get_load_components_kwargs()) @@ -303,8 +302,7 @@ def collate_fn(batch): memory_statistics = get_memory_statistics() logger.info(f"Memory after precomputing latents: {json.dumps(memory_statistics, indent=4)}") - if torch.cuda.is_available(): - torch.cuda.reset_peak_memory_stats(accelerator.device) + reset_memory_stats(accelerator.device) # Update dataloader to use precomputed conditions and latents self.dataloader = torch.utils.data.DataLoader( @@ -986,8 +984,7 @@ def validate(self, step: int, final_validation: bool = False) -> None: free_memory() memory_statistics = get_memory_statistics() logger.info(f"Memory after validation end: {json.dumps(memory_statistics, indent=4)}") - if torch.cuda.is_available(): - torch.cuda.reset_peak_memory_stats(accelerator.device) + reset_memory_stats(accelerator.device) if not final_validation: self.transformer.train() diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py index 1fa5962d..7bffdb4a 100644 --- a/finetrainers/utils/memory_utils.py +++ b/finetrainers/utils/memory_utils.py @@ -53,6 +53,14 @@ def free_memory() -> None: # TODO(aryan): handle non-cuda devices +def reset_memory_stats(device: torch.device): + # TODO: handle for non-cuda devices + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats(device) + else: + logger.warning("No CUDA, device found. Memory statistics are not available.") + + def make_contiguous(x: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: if isinstance(x, torch.Tensor): return x.contiguous() From 5529264e76e86a594bb538ea812aa4137ed8669d Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 24 Jan 2025 13:27:28 +0530 Subject: [PATCH 09/19] sync util. --- finetrainers/trainer.py | 11 ++++++++--- finetrainers/utils/memory_utils.py | 9 ++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py index 9b049190..bb28329d 100644 --- a/finetrainers/trainer.py +++ b/finetrainers/trainer.py @@ -54,7 +54,13 @@ ) from .utils.file_utils import string_to_filename from .utils.hub_utils import save_model_card -from .utils.memory_utils import free_memory, get_memory_statistics, make_contiguous, reset_memory_stats +from .utils.memory_utils import ( + free_memory, + get_memory_statistics, + make_contiguous, + reset_memory_stats, + synchornize_device, +) from .utils.model_utils import resolve_vae_cls_from_ckpt_path from .utils.optimizer_utils import get_optimizer from .utils.torch_utils import align_device_and_dtype, expand_tensor_dims, unwrap_model @@ -1107,8 +1113,7 @@ def _delete_components(self) -> None: self.vae = None self.scheduler = None free_memory() - if torch.cuda.is_available(): - torch.cuda.synchronize(self.state.accelerator.device) + synchornize_device(self.state.accelerator.device) def _get_and_prepare_pipeline_for_validation(self, final_validation: bool = False) -> DiffusionPipeline: accelerator = self.state.accelerator diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py index 7bffdb4a..b0579311 100644 --- a/finetrainers/utils/memory_utils.py +++ b/finetrainers/utils/memory_utils.py @@ -58,7 +58,14 @@ def reset_memory_stats(device: torch.device): if torch.cuda.is_available(): torch.cuda.reset_peak_memory_stats(device) else: - logger.warning("No CUDA, device found. Memory statistics are not available.") + logger.warning("No CUDA, device found. Nothing to reset memory of.") + + +def synchornize_device(device: torch.device): + if torch.cuda.is_available(): + torch.cuda.synchronize(device) + else: + logger.warning("No CUDA, device found. Nothing to synchronize.") def make_contiguous(x: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: From 1f61911846dd413c91940a8d2993c1555f16591c Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 24 Jan 2025 14:48:20 +0530 Subject: [PATCH 10/19] updates --- tests/trainers/cogvideox/test_cogvideox.py | 20 ++- tests/trainers/test_trainers_common.py | 147 +++++++++++++++------ 2 files changed, 122 insertions(+), 45 deletions(-) diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py index 0ef7ea02..a5419274 100644 --- a/tests/trainers/cogvideox/test_cogvideox.py +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -20,11 +20,13 @@ def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]: class CogVideoXTester(unittest.TestCase, TrainerTestMixin): - model_name = "cogvideox" + MODEL_NAME = "cogvideox" + EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"} + EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_embeds"} def get_training_args(self): args = Args() - args.model_name = self.model_name + args.model_name = self.MODEL_NAME args.training_type = "lora" args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox" args.data_root = "" # will be set from the tester method. @@ -34,3 +36,17 @@ def get_training_args(self): args.validation_heights = [] args.validation_widths = [] return args + + @property + def latent_output_shape(self): + return (8, 3, 2, 2) + + @property + def condition_output_shape(self): + return (226, 32) + + def populate_shapes(self): + for k in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS: + self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape + for k in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS: + self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index 0b59e8ff..b95b3e9b 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -1,72 +1,133 @@ -import sys +import tempfile from pathlib import Path +import torch +from huggingface_hub import snapshot_download -current_file = Path(__file__).resolve() -root_dir = current_file.parents[1] -sys.path.append(str(root_dir)) +from finetrainers import Trainer +from finetrainers.constants import ( + PRECOMPUTED_CONDITIONS_DIR_NAME, + PRECOMPUTED_DIR_NAME, + PRECOMPUTED_LATENTS_DIR_NAME, +) +from finetrainers.utils.file_utils import string_to_filename -import os # noqa -import tempfile # noqa - -from huggingface_hub import snapshot_download # noqa +class TrainerTestMixin: + MODEL_NAME = None + EXPECTED_PRECOMPUTATION_LATENT_KEYS = set() + EXPECTED_LATENT_SHAPES = {} + EXPECTED_PRECOMPUTATION_CONDITION_KEYS = set() + EXPECTED_CONDITION_SHAPES = {} -from finetrainers import Trainer # noqa -from finetrainers.constants import PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME # noqa -from finetrainers.utils.file_utils import string_to_filename # noqa + def get_training_args(self): + raise NotImplementedError + @property + def latent_output_shape(self): + raise NotImplementedError -class TrainerTestMixin: - model_name = None + @property + def condition_output_shape(self): + raise NotImplementedError - def get_training_args(self): + def populate_shapes(self): raise NotImplementedError def download_dataset_txt_format(self, cache_dir): - path = snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir) - return path - - def test_precomputation_txt_format(self): - # Here we assume the dataset is formatted like: - # https://huggingface.co/datasets/Wild-Heart/Disney-VideoGeneration-Dataset/tree/main + return snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir) + + def get_precomputation_dir(self, training_args): + cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path) + return Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}" + + def tearDown(self): + super().tearDown() + self.EXPECTED_LATENT_SHAPES.clear() + self.EXPECTED_CONDITION_SHAPES.clear() + + def _verify_precomputed_files(self, video_paths, all_conditions, all_latents): + assert len(video_paths) == len(all_conditions), "Mismatch in conditions file count" + assert len(video_paths) == len(all_latents), "Mismatch in latents file count" + + for latent, condition in zip(all_latents, all_conditions): + latent_keys = set(torch.load(latent, weights_only=True).keys()) + condition_keys = set(torch.load(condition, weights_only=True).keys()) + assert latent_keys == self.EXPECTED_PRECOMPUTATION_LATENT_KEYS, f"Unexpected latent keys: {latent_keys}" + assert ( + condition_keys == self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS + ), f"Unexpected condition keys: {condition_keys}" + + def _verify_shapes(self, latent_files, condition_files): + self.populate_shapes() + + for l_path, c_path in zip(latent_files, condition_files): + latent = torch.load(l_path, weights_only=True, map_location="cpu") + condition = torch.load(c_path, weights_only=True, map_location="cpu") + + for key in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS: + assert ( + latent[key].shape[1:] == self.EXPECTED_LATENT_SHAPES[key] + ), f"Latent shape mismatch for key: {key}" + + for key in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS: + assert ( + condition[key].shape[1:] == self.EXPECTED_CONDITION_SHAPES[key] + ), f"Condition shape mismatch for key: {key}" + + def test_precomputation_txt_format_creates_files(self): training_args = self.get_training_args() with tempfile.TemporaryDirectory() as tmpdir: - # Prepare remaining args. training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) - training_args.video_column = "videos.txt" training_args.caption_column = "prompt.txt" - with open(f"{training_args.data_root}/{training_args.video_column}", "r", encoding="utf-8") as file: - video_paths = [ - training_args.data_root.joinpath(line.strip()) - for line in file.readlines() - if len(line.strip()) > 0 - ] - - # Initialize trainer. + + with open(training_args.data_root / training_args.video_column, "r", encoding="utf-8") as file: + video_paths = [training_args.data_root / line.strip() for line in file if line.strip()] + training_args.output_dir = tmpdir trainer = Trainer(training_args) training_args = trainer.args - # Perform precomputations. trainer.prepare_dataset() trainer.prepare_models() trainer.prepare_precomputations() - cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path) - precomputation_dir = ( - Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}" - ) + precomputation_dir = self.get_precomputation_dir(training_args) + conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME + latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME + + assert precomputation_dir.exists(), f"Precomputed dir not found: {precomputation_dir}" + assert conditions_dir.exists(), f"Conditions dir not found: {conditions_dir}" + assert latents_dir.exists(), f"Latents dir not found: {latents_dir}" + + all_conditions = list(conditions_dir.glob("*.pt")) + all_latents = list(latents_dir.glob("*.pt")) + + self._verify_precomputed_files(video_paths, all_conditions, all_latents) - # Checks. + def test_precomputation_txt_format_matches_shapes(self): + training_args = self.get_training_args() + + with tempfile.TemporaryDirectory() as tmpdir: + training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) + training_args.video_column = "videos.txt" + training_args.caption_column = "prompt.txt" + + training_args.output_dir = tmpdir + trainer = Trainer(training_args) + training_args = trainer.args + + trainer.prepare_dataset() + trainer.prepare_models() + trainer.prepare_precomputations() + + precomputation_dir = self.get_precomputation_dir(training_args) conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME - assert os.path.exists( - precomputation_dir - ), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n" - assert os.path.exists(conditions_dir), f"conditions dir ({str(conditions_dir)}) doesn't exist" - assert os.path.exists(latents_dir), f"latents dir ({str(latents_dir)}) doesn't exist" - assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")]) # noqa - assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")]) # noqa + + latent_files = list(latents_dir.glob("*.pt")) + condition_files = list(conditions_dir.glob("*.pt")) + + self._verify_shapes(latent_files, condition_files) From c432f399abe526deca18ff419945bce07435957f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 27 Jan 2025 18:17:52 +0530 Subject: [PATCH 11/19] typo. Co-authored-by: a-r-r-o-w --- finetrainers/trainer.py | 4 ++-- finetrainers/utils/memory_utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py index 9711ca0c..81dc1d68 100644 --- a/finetrainers/trainer.py +++ b/finetrainers/trainer.py @@ -59,7 +59,7 @@ get_memory_statistics, make_contiguous, reset_memory_stats, - synchornize_device, + synchronize_device, ) from .utils.model_utils import resolve_vae_cls_from_ckpt_path from .utils.optimizer_utils import get_optimizer @@ -1113,7 +1113,7 @@ def _delete_components(self) -> None: self.vae = None self.scheduler = None free_memory() - synchornize_device(self.state.accelerator.device) + synchronize_device(self.state.accelerator.device) def _get_and_prepare_pipeline_for_validation(self, final_validation: bool = False) -> DiffusionPipeline: accelerator = self.state.accelerator diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py index b0579311..dcde3d89 100644 --- a/finetrainers/utils/memory_utils.py +++ b/finetrainers/utils/memory_utils.py @@ -61,7 +61,7 @@ def reset_memory_stats(device: torch.device): logger.warning("No CUDA, device found. Nothing to reset memory of.") -def synchornize_device(device: torch.device): +def synchronize_device(device: torch.device): if torch.cuda.is_available(): torch.cuda.synchronize(device) else: From 3609fdfabb2e77185e316d985dd946e11ab39c74 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 27 Jan 2025 18:20:11 +0530 Subject: [PATCH 12/19] resolve imports. --- tests/trainers/cogvideox/test_cogvideox.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py index a5419274..da5384a0 100644 --- a/tests/trainers/cogvideox/test_cogvideox.py +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -1,16 +1,14 @@ import sys +import unittest from pathlib import Path +from typing import Tuple current_file = Path(__file__).resolve() root_dir = current_file.parents[3] sys.path.append(str(root_dir)) -import unittest # noqa -from typing import Tuple # noqa - from finetrainers import Args # noqa - from ..test_trainers_common import TrainerTestMixin # noqa From 8faac472d48675456434fae7f9b07ef208381800 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 29 Jan 2025 16:11:57 +0530 Subject: [PATCH 13/19] updates --- tests/trainers/test_trainers_common.py | 39 +++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index b95b3e9b..fc54d22d 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -1,16 +1,22 @@ +import sys import tempfile from pathlib import Path import torch from huggingface_hub import snapshot_download -from finetrainers import Trainer -from finetrainers.constants import ( + +current_file = Path(__file__).resolve() +root_dir = current_file.parents[2] +sys.path.append(str(root_dir)) + +from finetrainers import Trainer # noqa +from finetrainers.constants import ( # noqa PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME, ) -from finetrainers.utils.file_utils import string_to_filename +from finetrainers.utils.file_utils import string_to_filename # noqa class TrainerTestMixin: @@ -121,7 +127,9 @@ def test_precomputation_txt_format_matches_shapes(self): trainer.prepare_dataset() trainer.prepare_models() - trainer.prepare_precomputations() + with self.assertLogs(level="INFO") as captured: + trainer.prepare_precomputations() + assert any("Precomputed data not found. Running precomputation." in msg for msg in captured.output) precomputation_dir = self.get_precomputation_dir(training_args) conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME @@ -131,3 +139,26 @@ def test_precomputation_txt_format_matches_shapes(self): condition_files = list(conditions_dir.glob("*.pt")) self._verify_shapes(latent_files, condition_files) + + def test_precomputation_txt_format_no_redo(self): + training_args = self.get_training_args() + + with tempfile.TemporaryDirectory() as tmpdir: + training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) + training_args.video_column = "videos.txt" + training_args.caption_column = "prompt.txt" + + training_args.output_dir = tmpdir + trainer = Trainer(training_args) + training_args = trainer.args + + trainer.prepare_dataset() + trainer.prepare_models() + trainer.prepare_precomputations() + + with self.assertLogs(level="INFO") as captured: + trainer.prepare_precomputations() + + assert any( + "Precomputed conditions and latents found. Loading precomputed data" in msg for msg in captured.output + ) From 19356bb7329dab98d985854462c544b0f3a097c2 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 29 Jan 2025 16:19:18 +0530 Subject: [PATCH 14/19] update ltx --- tests/trainers/cogvideox/test_cogvideox.py | 8 +--- tests/trainers/ltx_video/__init__.py | 0 tests/trainers/ltx_video/test_ltx_video.py | 44 ++++++++++++++++++++++ tests/trainers/test_trainers_common.py | 6 +++ 4 files changed, 51 insertions(+), 7 deletions(-) create mode 100644 tests/trainers/ltx_video/__init__.py create mode 100644 tests/trainers/ltx_video/test_ltx_video.py diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py index da5384a0..e0790322 100644 --- a/tests/trainers/cogvideox/test_cogvideox.py +++ b/tests/trainers/cogvideox/test_cogvideox.py @@ -1,7 +1,6 @@ import sys import unittest from pathlib import Path -from typing import Tuple current_file = Path(__file__).resolve() @@ -9,12 +8,7 @@ sys.path.append(str(root_dir)) from finetrainers import Args # noqa -from ..test_trainers_common import TrainerTestMixin # noqa - - -# Copied for now. -def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]: - return tuple(map(int, resolution_bucket.split("x"))) +from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket # noqa class CogVideoXTester(unittest.TestCase, TrainerTestMixin): diff --git a/tests/trainers/ltx_video/__init__.py b/tests/trainers/ltx_video/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/trainers/ltx_video/test_ltx_video.py b/tests/trainers/ltx_video/test_ltx_video.py new file mode 100644 index 00000000..d74d5602 --- /dev/null +++ b/tests/trainers/ltx_video/test_ltx_video.py @@ -0,0 +1,44 @@ +import sys +import unittest +from pathlib import Path + + +current_file = Path(__file__).resolve() +root_dir = current_file.parents[3] +sys.path.append(str(root_dir)) + +from finetrainers import Args # noqa +from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket # noqa + + +class LTXVideoTester(unittest.TestCase, TrainerTestMixin): + MODEL_NAME = "ltx_video" + EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"} + EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_embeds"} + + def get_training_args(self): + args = Args() + args.model_name = self.MODEL_NAME + args.training_type = "lora" + args.pretrained_model_name_or_path = "finetrainers/dummy-ltxvideo" + args.data_root = "" # will be set from the tester method. + args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")] + args.precompute_conditions = True + args.validation_prompts = [] + args.validation_heights = [] + args.validation_widths = [] + return args + + @property + def latent_output_shape(self): + return (8, 3, 2, 2) + + @property + def condition_output_shape(self): + return (226, 32) + + def populate_shapes(self): + for k in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS: + self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape + for k in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS: + self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index fc54d22d..02f6d193 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -1,6 +1,7 @@ import sys import tempfile from pathlib import Path +from typing import Tuple import torch from huggingface_hub import snapshot_download @@ -19,6 +20,11 @@ from finetrainers.utils.file_utils import string_to_filename # noqa +# Copied for now. +def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]: + return tuple(map(int, resolution_bucket.split("x"))) + + class TrainerTestMixin: MODEL_NAME = None EXPECTED_PRECOMPUTATION_LATENT_KEYS = set() From 5909c21e2fcb836fa291dbf8105edf2522b32fdd Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 29 Jan 2025 17:36:21 +0530 Subject: [PATCH 15/19] updates --- finetrainers/models/ltx_video/lora.py | 11 ++- tests/trainers/ltx_video/test_ltx_video.py | 22 +++-- tests/trainers/test_trainers_common.py | 98 ++++++++++++---------- 3 files changed, 75 insertions(+), 56 deletions(-) diff --git a/finetrainers/models/ltx_video/lora.py b/finetrainers/models/ltx_video/lora.py index bdd6ffa3..49ea1db4 100644 --- a/finetrainers/models/ltx_video/lora.py +++ b/finetrainers/models/ltx_video/lora.py @@ -5,7 +5,7 @@ from accelerate.logging import get_logger from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel from PIL import Image -from transformers import T5EncoderModel, T5Tokenizer +from transformers import AutoTokenizer, T5EncoderModel, T5Tokenizer logger = get_logger("finetrainers") # pylint: disable=invalid-name @@ -18,7 +18,14 @@ def load_condition_models( cache_dir: Optional[str] = None, **kwargs, ) -> Dict[str, nn.Module]: - tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir) + try: + tokenizer = T5Tokenizer.from_pretrained( + model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir + ) + except: # noqa + tokenizer = AutoTokenizer.from_pretrained( + model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir + ) text_encoder = T5EncoderModel.from_pretrained( model_id, subfolder="text_encoder", torch_dtype=text_encoder_dtype, revision=revision, cache_dir=cache_dir ) diff --git a/tests/trainers/ltx_video/test_ltx_video.py b/tests/trainers/ltx_video/test_ltx_video.py index d74d5602..e0de0aa8 100644 --- a/tests/trainers/ltx_video/test_ltx_video.py +++ b/tests/trainers/ltx_video/test_ltx_video.py @@ -13,8 +13,8 @@ class LTXVideoTester(unittest.TestCase, TrainerTestMixin): MODEL_NAME = "ltx_video" - EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"} - EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_embeds"} + EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"height", "latents", "latents_mean", "latents_std", "num_frames", "width"} + EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_attention_mask", "prompt_embeds"} def get_training_args(self): args = Args() @@ -31,14 +31,20 @@ def get_training_args(self): @property def latent_output_shape(self): - return (8, 3, 2, 2) + # only tensor object shapes + return (16, 3, 4, 4), (), () @property def condition_output_shape(self): - return (226, 32) + # only tensor object shapes + return (128,), (128, 32) def populate_shapes(self): - for k in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS: - self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape - for k in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS: - self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape + i = 0 + for k in sorted(self.EXPECTED_PRECOMPUTATION_LATENT_KEYS): + if k in ["height", "num_frames", "width"]: + continue + self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape[i] + i += 1 + for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS)): + self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape[i] diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index 02f6d193..9be7294a 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -20,8 +20,8 @@ from finetrainers.utils.file_utils import string_to_filename # noqa -# Copied for now. def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]: + """Parse a resolution like '512x512' into a tuple of ints (512, 512).""" return tuple(map(int, resolution_bucket.split("x"))) @@ -50,6 +50,7 @@ def download_dataset_txt_format(self, cache_dir): return snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir) def get_precomputation_dir(self, training_args): + """Return the path of the precomputation directory based on the training args.""" cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path) return Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}" @@ -59,51 +60,72 @@ def tearDown(self): self.EXPECTED_CONDITION_SHAPES.clear() def _verify_precomputed_files(self, video_paths, all_conditions, all_latents): + """Check that the correct number of precomputed files exist and have the right keys.""" assert len(video_paths) == len(all_conditions), "Mismatch in conditions file count" assert len(video_paths) == len(all_latents), "Mismatch in latents file count" for latent, condition in zip(all_latents, all_conditions): - latent_keys = set(torch.load(latent, weights_only=True).keys()) - condition_keys = set(torch.load(condition, weights_only=True).keys()) - assert latent_keys == self.EXPECTED_PRECOMPUTATION_LATENT_KEYS, f"Unexpected latent keys: {latent_keys}" - assert ( - condition_keys == self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS + latent_keys = sorted(set(torch.load(latent, weights_only=True).keys())) + condition_keys = sorted(set(torch.load(condition, weights_only=True).keys())) + assert latent_keys == sorted( + self.EXPECTED_PRECOMPUTATION_LATENT_KEYS + ), f"Unexpected latent keys: {latent_keys}" + assert condition_keys == sorted( + self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS ), f"Unexpected condition keys: {condition_keys}" def _verify_shapes(self, latent_files, condition_files): + """Check that the shapes of latents and conditions match expected shapes.""" self.populate_shapes() - for l_path, c_path in zip(latent_files, condition_files): latent = torch.load(l_path, weights_only=True, map_location="cpu") condition = torch.load(c_path, weights_only=True, map_location="cpu") for key in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS: + if not torch.is_tensor(latent[key]): + continue + expected = self.EXPECTED_LATENT_SHAPES[key] + original = tuple(latent[key].shape[1:]) assert ( - latent[key].shape[1:] == self.EXPECTED_LATENT_SHAPES[key] - ), f"Latent shape mismatch for key: {key}" + original == expected + ), f"Latent shape mismatch for key: {key}. expected={expected}, got={original}" for key in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS: + if not torch.is_tensor(condition[key]): + continue + expected = self.EXPECTED_CONDITION_SHAPES[key] + original = tuple(condition[key].shape[1:]) assert ( - condition[key].shape[1:] == self.EXPECTED_CONDITION_SHAPES[key] - ), f"Condition shape mismatch for key: {key}" - - def test_precomputation_txt_format_creates_files(self): + original == expected + ), f"Condition shape mismatch for key: {key}. expected={expected}, got={original}" + + def _setup_trainer(self, tmpdir): + """ + Helper method to reduce duplication across tests. + Creates and returns a trainer, along with updated training args. + """ training_args = self.get_training_args() + training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) + training_args.video_column = "videos.txt" + training_args.caption_column = "prompt.txt" + training_args.output_dir = tmpdir + + trainer = Trainer(training_args) + # Trainer may update the training_args internally, so refresh the reference + training_args = trainer.args + trainer.prepare_dataset() + trainer.prepare_models() + return trainer, training_args + + def test_precomputation_txt_format_creates_files(self): with tempfile.TemporaryDirectory() as tmpdir: - training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) - training_args.video_column = "videos.txt" - training_args.caption_column = "prompt.txt" + trainer, training_args = self._setup_trainer(tmpdir) + # Load video paths (only needed in this test) with open(training_args.data_root / training_args.video_column, "r", encoding="utf-8") as file: video_paths = [training_args.data_root / line.strip() for line in file if line.strip()] - training_args.output_dir = tmpdir - trainer = Trainer(training_args) - training_args = trainer.args - - trainer.prepare_dataset() - trainer.prepare_models() trainer.prepare_precomputations() precomputation_dir = self.get_precomputation_dir(training_args) @@ -120,22 +142,14 @@ def test_precomputation_txt_format_creates_files(self): self._verify_precomputed_files(video_paths, all_conditions, all_latents) def test_precomputation_txt_format_matches_shapes(self): - training_args = self.get_training_args() - with tempfile.TemporaryDirectory() as tmpdir: - training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) - training_args.video_column = "videos.txt" - training_args.caption_column = "prompt.txt" + trainer, training_args = self._setup_trainer(tmpdir) - training_args.output_dir = tmpdir - trainer = Trainer(training_args) - training_args = trainer.args - - trainer.prepare_dataset() - trainer.prepare_models() with self.assertLogs(level="INFO") as captured: trainer.prepare_precomputations() - assert any("Precomputed data not found. Running precomputation." in msg for msg in captured.output) + assert any( + "Precomputed data not found. Running precomputation." in msg for msg in captured.output + ), "Expected info log about missing precomputed data." precomputation_dir = self.get_precomputation_dir(training_args) conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME @@ -147,24 +161,16 @@ def test_precomputation_txt_format_matches_shapes(self): self._verify_shapes(latent_files, condition_files) def test_precomputation_txt_format_no_redo(self): - training_args = self.get_training_args() - with tempfile.TemporaryDirectory() as tmpdir: - training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir)) - training_args.video_column = "videos.txt" - training_args.caption_column = "prompt.txt" - - training_args.output_dir = tmpdir - trainer = Trainer(training_args) - training_args = trainer.args + trainer, _ = self._setup_trainer(tmpdir) - trainer.prepare_dataset() - trainer.prepare_models() + # should create new precomputations trainer.prepare_precomputations() + # should detect existing precomputations and not redo with self.assertLogs(level="INFO") as captured: trainer.prepare_precomputations() assert any( "Precomputed conditions and latents found. Loading precomputed data" in msg for msg in captured.output - ) + ), "Expected info log about found precomputations." From 778d0774d6e17db964e3c3e54fc3eeeda9d49677 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 29 Jan 2025 17:55:01 +0530 Subject: [PATCH 16/19] updates --- tests/trainers/hunyaun_video/__init__.py | 0 .../hunyaun_video/test_hunyaun_video.py | 46 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 tests/trainers/hunyaun_video/__init__.py create mode 100644 tests/trainers/hunyaun_video/test_hunyaun_video.py diff --git a/tests/trainers/hunyaun_video/__init__.py b/tests/trainers/hunyaun_video/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/trainers/hunyaun_video/test_hunyaun_video.py b/tests/trainers/hunyaun_video/test_hunyaun_video.py new file mode 100644 index 00000000..7d0916ed --- /dev/null +++ b/tests/trainers/hunyaun_video/test_hunyaun_video.py @@ -0,0 +1,46 @@ +import sys +import unittest +from pathlib import Path + + +current_file = Path(__file__).resolve() +root_dir = current_file.parents[3] +sys.path.append(str(root_dir)) + +from finetrainers import Args # noqa +from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket # noqa + + +class LTXVideoTester(unittest.TestCase, TrainerTestMixin): + MODEL_NAME = "hunyuan_video" + EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"} + EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"pooled_prompt_embeds", "prompt_attention_mask", "prompt_embeds"} + + def get_training_args(self): + args = Args() + args.model_name = self.MODEL_NAME + args.training_type = "lora" + args.pretrained_model_name_or_path = "finetrainers/dummy-hunyaunvideo" + args.data_root = "" # will be set from the tester method. + args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")] + args.precompute_conditions = True + args.validation_prompts = [] + args.validation_heights = [] + args.validation_widths = [] + return args + + @property + def latent_output_shape(self): + # only tensor object shapes + return (16, 3, 4, 4) + + @property + def condition_output_shape(self): + # only tensor object shapes + return (), (128,), (128, 32) + + def populate_shapes(self): + for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_LATENT_KEYS)): + self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape[i] + for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS)): + self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape[i] From 0f8b4bbfcb1708d732bc503901da4102de9cf1ad Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 30 Jan 2025 11:44:03 +0530 Subject: [PATCH 17/19] updates --- tests/trainers/hunyaun_video/test_hunyaun_video.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/trainers/hunyaun_video/test_hunyaun_video.py b/tests/trainers/hunyaun_video/test_hunyaun_video.py index 7d0916ed..65194097 100644 --- a/tests/trainers/hunyaun_video/test_hunyaun_video.py +++ b/tests/trainers/hunyaun_video/test_hunyaun_video.py @@ -11,10 +11,15 @@ from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket # noqa -class LTXVideoTester(unittest.TestCase, TrainerTestMixin): +class HunyuanVideoTester(unittest.TestCase, TrainerTestMixin): MODEL_NAME = "hunyuan_video" EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"} - EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"pooled_prompt_embeds", "prompt_attention_mask", "prompt_embeds"} + EXPECTED_PRECOMPUTATION_CONDITION_KEYS = { + "guidance", + "pooled_prompt_embeds", + "prompt_attention_mask", + "prompt_embeds", + } def get_training_args(self): args = Args() From 9595803cec21eb363b26acaf35fa9753723142c8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 30 Jan 2025 12:37:20 +0530 Subject: [PATCH 18/19] updates --- finetrainers/models/hunyuan_video/lora.py | 1 + tests/trainers/hunyaun_video/test_hunyaun_video.py | 6 +++--- tests/trainers/test_trainers_common.py | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/finetrainers/models/hunyuan_video/lora.py b/finetrainers/models/hunyuan_video/lora.py index 1d8ccd1f..7f45fd9b 100644 --- a/finetrainers/models/hunyuan_video/lora.py +++ b/finetrainers/models/hunyuan_video/lora.py @@ -202,6 +202,7 @@ def prepare_latents( h = torch.cat(encoded_slices) else: h = vae._encode(image_or_video) + print(f"{h.shape=}") return {"latents": h} diff --git a/tests/trainers/hunyaun_video/test_hunyaun_video.py b/tests/trainers/hunyaun_video/test_hunyaun_video.py index 65194097..935223ba 100644 --- a/tests/trainers/hunyaun_video/test_hunyaun_video.py +++ b/tests/trainers/hunyaun_video/test_hunyaun_video.py @@ -37,15 +37,15 @@ def get_training_args(self): @property def latent_output_shape(self): # only tensor object shapes - return (16, 3, 4, 4) + return (8, 3, 2, 2) @property def condition_output_shape(self): # only tensor object shapes - return (), (128,), (128, 32) + return (), (8,), (256,), (256, 16) def populate_shapes(self): for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_LATENT_KEYS)): - self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape[i] + self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS)): self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape[i] diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index 9be7294a..3486c4f1 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -86,6 +86,7 @@ def _verify_shapes(self, latent_files, condition_files): continue expected = self.EXPECTED_LATENT_SHAPES[key] original = tuple(latent[key].shape[1:]) + print(f"{key=}") assert ( original == expected ), f"Latent shape mismatch for key: {key}. expected={expected}, got={original}" From 2c7f758a0465d7b37b4983ff770fa025ebcc963c Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 30 Jan 2025 12:37:53 +0530 Subject: [PATCH 19/19] fixes --- finetrainers/models/hunyuan_video/lora.py | 1 - tests/trainers/test_trainers_common.py | 1 - 2 files changed, 2 deletions(-) diff --git a/finetrainers/models/hunyuan_video/lora.py b/finetrainers/models/hunyuan_video/lora.py index 7f45fd9b..1d8ccd1f 100644 --- a/finetrainers/models/hunyuan_video/lora.py +++ b/finetrainers/models/hunyuan_video/lora.py @@ -202,7 +202,6 @@ def prepare_latents( h = torch.cat(encoded_slices) else: h = vae._encode(image_or_video) - print(f"{h.shape=}") return {"latents": h} diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py index 3486c4f1..9be7294a 100644 --- a/tests/trainers/test_trainers_common.py +++ b/tests/trainers/test_trainers_common.py @@ -86,7 +86,6 @@ def _verify_shapes(self, latent_files, condition_files): continue expected = self.EXPECTED_LATENT_SHAPES[key] original = tuple(latent[key].shape[1:]) - print(f"{key=}") assert ( original == expected ), f"Latent shape mismatch for key: {key}. expected={expected}, got={original}"