From 72d3000d2e4b368e7393ba4ba19b2e43121ebf23 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 21 Jan 2025 11:16:20 +0530
Subject: [PATCH 01/19] add precomputation tests

---
 tests/trainers/test_trainers_common.py | 29 ++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 tests/trainers/test_trainers_common.py

diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
new file mode 100644
index 00000000..f92b2be4
--- /dev/null
+++ b/tests/trainers/test_trainers_common.py
@@ -0,0 +1,29 @@
+from finetrainers import Trainer
+from huggingface_hub import snapshot_download
+import tempfile
+import os
+
+class TrainerTestMixin:
+    def get_training_args(self):
+        raise NotImplementedError
+    
+    def download_dataset_txt_format(self, cache_dir):
+        path = snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir)
+        return path
+    
+    def test_precomputation_txt_format(self):
+        # Here we assume the dataset is formatted like:
+        # https://huggingface.co/datasets/Wild-Heart/Disney-VideoGeneration-Dataset/tree/main
+        training_args = self.get_training_args()
+        
+        with tempfile.TemporaryDirectory() as tmpdir:
+            training_args.data_root = self.download_dataset_txt_format(cache_dir=tmpdir)
+            trainer = Trainer(training_args)
+            training_args = trainer.args
+
+            trainer.prepare_dataset()
+            trainer.prepare_models()
+            trainer.prepare_precomputations()
+
+            precomputed_dir = os.path.join(training_args.data_root, f"{training_args.pretrained_model_name_or_path}_precomputed")
+            assert os.path.exists(precomputed_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir()}"
\ No newline at end of file

From d91476da76b998c07ec2d1404da79318fcce9f4f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 21 Jan 2025 11:24:44 +0530
Subject: [PATCH 02/19] update

---
 tests/trainers/__init__.py                 |  0
 tests/trainers/cogvideox/__init__.py       |  0
 tests/trainers/cogvideox/test_cogvideox.py | 12 ++++++++++++
 3 files changed, 12 insertions(+)
 create mode 100644 tests/trainers/__init__.py
 create mode 100644 tests/trainers/cogvideox/__init__.py
 create mode 100644 tests/trainers/cogvideox/test_cogvideox.py

diff --git a/tests/trainers/__init__.py b/tests/trainers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/trainers/cogvideox/__init__.py b/tests/trainers/cogvideox/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
new file mode 100644
index 00000000..fd4439a0
--- /dev/null
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -0,0 +1,12 @@
+from ..test_trainers_common import TrainerTestMixin
+from finetrainers import parse_arguments
+import unittest
+
+
+class CogVideoXTester(unittest.TestCase, TrainerTestMixin):
+    def get_training_args(self):
+        args = parse_arguments()
+        args.training_type = "lora"
+        args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox"
+        args.video_resolution_buckets = "9x16x16"
+        return args
\ No newline at end of file

From 9ba2aff07e415cff3a3ecd927d3614daff137ae7 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 21 Jan 2025 11:45:37 +0530
Subject: [PATCH 03/19] updates

---
 tests/trainers/cogvideox/test_cogvideox.py |  7 +++++++
 tests/trainers/test_trainers_common.py     | 11 +++++++++++
 2 files changed, 18 insertions(+)

diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
index fd4439a0..e8aa44e4 100644
--- a/tests/trainers/cogvideox/test_cogvideox.py
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -1,3 +1,10 @@
+import sys 
+from pathlib import Path
+
+current_file = Path(__file__).resolve()
+root_dir = current_file.parents[3]
+sys.path.append(str(root_dir))
+
 from ..test_trainers_common import TrainerTestMixin
 from finetrainers import parse_arguments
 import unittest
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index f92b2be4..0cdbcd27 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -1,3 +1,14 @@
+import sys 
+from pathlib import Path
+
+current_file = Path(__file__).resolve()
+root_dir = current_file.parents[1]
+sys.path.append(str(root_dir))
+
+# import os
+# current_dir = os.path.dirname(os.path.abspath(__file__))
+# print(f"{current_dir=}")
+
 from finetrainers import Trainer
 from huggingface_hub import snapshot_download
 import tempfile

From 348fe817db658e280ee3c40200702d7f629bdf5e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 21 Jan 2025 13:53:04 +0530
Subject: [PATCH 04/19] changes

---
 finetrainers/args.py                       |  4 ++-
 finetrainers/models/cogvideox/lora.py      |  7 +++--
 finetrainers/trainer.py                    | 12 +++++---
 finetrainers/utils/memory_utils.py         |  8 ++---
 tests/trainers/cogvideox/test_cogvideox.py | 18 +++++++++--
 tests/trainers/test_trainers_common.py     | 35 ++++++++++++++++++----
 6 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/finetrainers/args.py b/finetrainers/args.py
index 46cd04cc..343d6396 100644
--- a/finetrainers/args.py
+++ b/finetrainers/args.py
@@ -337,6 +337,7 @@ class Args:
     validation_every_n_steps: Optional[int] = None
     enable_model_cpu_offload: bool = False
     validation_frame_rate: int = 25
+    do_not_run_validation: bool = False
 
     # Miscellaneous arguments
     tracker_name: str = "finetrainers"
@@ -483,7 +484,8 @@ def parse_arguments() -> Args:
 def validate_args(args: Args):
     _validated_model_args(args)
     _validate_training_args(args)
-    _validate_validation_args(args)
+    if not args.do_not_run_validation:
+        _validate_validation_args(args)
 
 
 def _add_model_arguments(parser: argparse.ArgumentParser) -> None:
diff --git a/finetrainers/models/cogvideox/lora.py b/finetrainers/models/cogvideox/lora.py
index 65d86ee9..dfeda3be 100644
--- a/finetrainers/models/cogvideox/lora.py
+++ b/finetrainers/models/cogvideox/lora.py
@@ -3,7 +3,7 @@
 import torch
 from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler, CogVideoXPipeline, CogVideoXTransformer3DModel
 from PIL import Image
-from transformers import T5EncoderModel, T5Tokenizer
+from transformers import T5EncoderModel, T5Tokenizer, AutoTokenizer
 
 from .utils import prepare_rotary_positional_embeddings
 
@@ -15,7 +15,10 @@ def load_condition_models(
     cache_dir: Optional[str] = None,
     **kwargs,
 ):
-    tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir)
+    try:
+        tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir)
+    except:
+        tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir)
     text_encoder = T5EncoderModel.from_pretrained(
         model_id, subfolder="text_encoder", torch_dtype=text_encoder_dtype, revision=revision, cache_dir=cache_dir
     )
diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py
index 0839db79..bdbddccf 100644
--- a/finetrainers/trainer.py
+++ b/finetrainers/trainer.py
@@ -255,7 +255,8 @@ def collate_fn(batch):
 
         memory_statistics = get_memory_statistics()
         logger.info(f"Memory after precomputing conditions: {json.dumps(memory_statistics, indent=4)}")
-        torch.cuda.reset_peak_memory_stats(accelerator.device)
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats(accelerator.device)
 
         # Precompute latents
         latent_components = self.model_config["load_latent_models"](**self._get_load_components_kwargs())
@@ -302,7 +303,8 @@ def collate_fn(batch):
 
         memory_statistics = get_memory_statistics()
         logger.info(f"Memory after precomputing latents: {json.dumps(memory_statistics, indent=4)}")
-        torch.cuda.reset_peak_memory_stats(accelerator.device)
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats(accelerator.device)
 
         # Update dataloader to use precomputed conditions and latents
         self.dataloader = torch.utils.data.DataLoader(
@@ -984,7 +986,8 @@ def validate(self, step: int, final_validation: bool = False) -> None:
         free_memory()
         memory_statistics = get_memory_statistics()
         logger.info(f"Memory after validation end: {json.dumps(memory_statistics, indent=4)}")
-        torch.cuda.reset_peak_memory_stats(accelerator.device)
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats(accelerator.device)
 
         if not final_validation:
             self.transformer.train()
@@ -1107,7 +1110,8 @@ def _delete_components(self) -> None:
         self.vae = None
         self.scheduler = None
         free_memory()
-        torch.cuda.synchronize(self.state.accelerator.device)
+        if torch.cuda.is_available():
+            torch.cuda.synchronize(self.state.accelerator.device)
 
     def _get_and_prepare_pipeline_for_validation(self, final_validation: bool = False) -> DiffusionPipeline:
         accelerator = self.state.accelerator
diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py
index d7616b19..1b492507 100644
--- a/finetrainers/utils/memory_utils.py
+++ b/finetrainers/utils/memory_utils.py
@@ -28,10 +28,10 @@ def get_memory_statistics(precision: int = 3) -> Dict[str, Any]:
         logger.warning("No CUDA, MPS, or ROCm device found. Memory statistics are not available.")
 
     return {
-        "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision),
-        "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision),
-        "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision),
-        "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision),
+        "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision) if memory_allocated else None,
+        "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision) if memory_reserved else None,
+        "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision) if max_memory_allocated else None,
+        "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision) if max_memory_reserved else None,
     }
 
 
diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
index e8aa44e4..247da525 100644
--- a/tests/trainers/cogvideox/test_cogvideox.py
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -6,14 +6,26 @@
 sys.path.append(str(root_dir))
 
 from ..test_trainers_common import TrainerTestMixin
-from finetrainers import parse_arguments
+from typing import  Tuple
+from finetrainers import Args
 import unittest
 
+# Copied for now.
+def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]:
+    return tuple(map(int, resolution_bucket.split("x")))
+
+
 
 class CogVideoXTester(unittest.TestCase, TrainerTestMixin):
+    model_name = "cogvideox"
+
     def get_training_args(self):
-        args = parse_arguments()
+        args = Args()
+        args.model_name = self.model_name
         args.training_type = "lora"
         args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox"
-        args.video_resolution_buckets = "9x16x16"
+        args.data_root = "" # will be set from the tester method.
+        args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")]
+        args.precompute_conditions = True
+        args.do_not_run_validation = True
         return args
\ No newline at end of file
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index 0cdbcd27..f80f93b6 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -5,16 +5,18 @@
 root_dir = current_file.parents[1]
 sys.path.append(str(root_dir))
 
-# import os
-# current_dir = os.path.dirname(os.path.abspath(__file__))
-# print(f"{current_dir=}")
 
 from finetrainers import Trainer
+from finetrainers.utils.file_utils import string_to_filename
+from finetrainers.constants import PRECOMPUTED_DIR_NAME, PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME
 from huggingface_hub import snapshot_download
 import tempfile
+import glob
 import os
 
 class TrainerTestMixin:
+    model_name  = None 
+
     def get_training_args(self):
         raise NotImplementedError
     
@@ -28,13 +30,34 @@ def test_precomputation_txt_format(self):
         training_args = self.get_training_args()
         
         with tempfile.TemporaryDirectory() as tmpdir:
-            training_args.data_root = self.download_dataset_txt_format(cache_dir=tmpdir)
+            # Prepare remaining args.
+            training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
+
+            training_args.video_column = "videos.txt"
+            training_args.caption_column = "prompt.txt"
+            with open(f"{training_args.data_root}/{training_args.video_column}", "r", encoding="utf-8") as file:
+                video_paths = [training_args.data_root.joinpath(line.strip()) for line in file.readlines() if len(line.strip()) > 0]
+
+            # Initialize trainer.
+            training_args.output_dir = tmpdir
             trainer = Trainer(training_args)
             training_args = trainer.args
 
+            # Perform precomputations.
             trainer.prepare_dataset()
             trainer.prepare_models()
             trainer.prepare_precomputations()
 
-            precomputed_dir = os.path.join(training_args.data_root, f"{training_args.pretrained_model_name_or_path}_precomputed")
-            assert os.path.exists(precomputed_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir()}"
\ No newline at end of file
+            cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path)
+            precomputation_dir = (
+                Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}"
+            )
+            
+            # Checks.
+            conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME
+            latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME
+            assert os.path.exists(precomputation_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n"
+            assert os.path.exists(conditions_dir), f"conditions dir ({str(conditions_dir)}) doesn't exist"
+            assert os.path.exists(latents_dir), f"latents dir ({str(latents_dir)}) doesn't exist"
+            assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")])
+            assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")])
\ No newline at end of file

From 3dda221b72c0df22c733bbb86fbace07c70a4be1 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Tue, 21 Jan 2025 13:58:48 +0530
Subject: [PATCH 05/19] quality

---
 finetrainers/models/cogvideox/lora.py      | 12 ++++--
 finetrainers/utils/memory_utils.py         | 12 ++++--
 tests/trainers/cogvideox/test_cogvideox.py | 19 ++++++----
 tests/trainers/test_trainers_common.py     | 43 +++++++++++++---------
 4 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/finetrainers/models/cogvideox/lora.py b/finetrainers/models/cogvideox/lora.py
index dfeda3be..d8adc2aa 100644
--- a/finetrainers/models/cogvideox/lora.py
+++ b/finetrainers/models/cogvideox/lora.py
@@ -3,7 +3,7 @@
 import torch
 from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler, CogVideoXPipeline, CogVideoXTransformer3DModel
 from PIL import Image
-from transformers import T5EncoderModel, T5Tokenizer, AutoTokenizer
+from transformers import AutoTokenizer, T5EncoderModel, T5Tokenizer
 
 from .utils import prepare_rotary_positional_embeddings
 
@@ -16,9 +16,13 @@ def load_condition_models(
     **kwargs,
 ):
     try:
-        tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir)
-    except:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir)
+        tokenizer = T5Tokenizer.from_pretrained(
+            model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir
+        )
+    except:  # noqa
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir
+        )
     text_encoder = T5EncoderModel.from_pretrained(
         model_id, subfolder="text_encoder", torch_dtype=text_encoder_dtype, revision=revision, cache_dir=cache_dir
     )
diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py
index 1b492507..906efba4 100644
--- a/finetrainers/utils/memory_utils.py
+++ b/finetrainers/utils/memory_utils.py
@@ -28,10 +28,16 @@ def get_memory_statistics(precision: int = 3) -> Dict[str, Any]:
         logger.warning("No CUDA, MPS, or ROCm device found. Memory statistics are not available.")
 
     return {
-        "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision) if memory_allocated else None,
+        "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision)
+        if memory_allocated
+        else None,
         "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision) if memory_reserved else None,
-        "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision) if max_memory_allocated else None,
-        "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision) if max_memory_reserved else None,
+        "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision)
+        if max_memory_allocated
+        else None,
+        "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision)
+        if max_memory_reserved
+        else None,
     }
 
 
diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
index 247da525..390e0968 100644
--- a/tests/trainers/cogvideox/test_cogvideox.py
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -1,21 +1,24 @@
-import sys 
+import sys
 from pathlib import Path
 
+
 current_file = Path(__file__).resolve()
 root_dir = current_file.parents[3]
 sys.path.append(str(root_dir))
 
-from ..test_trainers_common import TrainerTestMixin
-from typing import  Tuple
-from finetrainers import Args
-import unittest
+import unittest  # noqa
+from typing import Tuple  # noqa
+
+from finetrainers import Args  # noqa
+
+from ..test_trainers_common import TrainerTestMixin  # noqa
+
 
 # Copied for now.
 def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]:
     return tuple(map(int, resolution_bucket.split("x")))
 
 
-
 class CogVideoXTester(unittest.TestCase, TrainerTestMixin):
     model_name = "cogvideox"
 
@@ -24,8 +27,8 @@ def get_training_args(self):
         args.model_name = self.model_name
         args.training_type = "lora"
         args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox"
-        args.data_root = "" # will be set from the tester method.
+        args.data_root = ""  # will be set from the tester method.
         args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")]
         args.precompute_conditions = True
         args.do_not_run_validation = True
-        return args
\ No newline at end of file
+        return args
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index f80f93b6..0b59e8ff 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -1,34 +1,37 @@
-import sys 
+import sys
 from pathlib import Path
 
+
 current_file = Path(__file__).resolve()
 root_dir = current_file.parents[1]
 sys.path.append(str(root_dir))
 
 
-from finetrainers import Trainer
-from finetrainers.utils.file_utils import string_to_filename
-from finetrainers.constants import PRECOMPUTED_DIR_NAME, PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME
-from huggingface_hub import snapshot_download
-import tempfile
-import glob
-import os
+import os  # noqa
+import tempfile  # noqa
+
+from huggingface_hub import snapshot_download  # noqa
+
+from finetrainers import Trainer  # noqa
+from finetrainers.constants import PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME  # noqa
+from finetrainers.utils.file_utils import string_to_filename  # noqa
+
 
 class TrainerTestMixin:
-    model_name  = None 
+    model_name = None
 
     def get_training_args(self):
         raise NotImplementedError
-    
+
     def download_dataset_txt_format(self, cache_dir):
         path = snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir)
         return path
-    
+
     def test_precomputation_txt_format(self):
         # Here we assume the dataset is formatted like:
         # https://huggingface.co/datasets/Wild-Heart/Disney-VideoGeneration-Dataset/tree/main
         training_args = self.get_training_args()
-        
+
         with tempfile.TemporaryDirectory() as tmpdir:
             # Prepare remaining args.
             training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
@@ -36,7 +39,11 @@ def test_precomputation_txt_format(self):
             training_args.video_column = "videos.txt"
             training_args.caption_column = "prompt.txt"
             with open(f"{training_args.data_root}/{training_args.video_column}", "r", encoding="utf-8") as file:
-                video_paths = [training_args.data_root.joinpath(line.strip()) for line in file.readlines() if len(line.strip()) > 0]
+                video_paths = [
+                    training_args.data_root.joinpath(line.strip())
+                    for line in file.readlines()
+                    if len(line.strip()) > 0
+                ]
 
             # Initialize trainer.
             training_args.output_dir = tmpdir
@@ -52,12 +59,14 @@ def test_precomputation_txt_format(self):
             precomputation_dir = (
                 Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}"
             )
-            
+
             # Checks.
             conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME
             latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME
-            assert os.path.exists(precomputation_dir), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n"
+            assert os.path.exists(
+                precomputation_dir
+            ), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n"
             assert os.path.exists(conditions_dir), f"conditions dir ({str(conditions_dir)}) doesn't exist"
             assert os.path.exists(latents_dir), f"latents dir ({str(latents_dir)}) doesn't exist"
-            assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")])
-            assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")])
\ No newline at end of file
+            assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")])  # noqa
+            assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")])  # noqa

From 8841dbd388c95dcadba5993aa9f059cc86f4ff88 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 24 Jan 2025 13:05:07 +0530
Subject: [PATCH 06/19] remove do_not_run_validation.

---
 finetrainers/args.py                       | 4 +---
 tests/trainers/cogvideox/test_cogvideox.py | 4 +++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/finetrainers/args.py b/finetrainers/args.py
index 343d6396..46cd04cc 100644
--- a/finetrainers/args.py
+++ b/finetrainers/args.py
@@ -337,7 +337,6 @@ class Args:
     validation_every_n_steps: Optional[int] = None
     enable_model_cpu_offload: bool = False
     validation_frame_rate: int = 25
-    do_not_run_validation: bool = False
 
     # Miscellaneous arguments
     tracker_name: str = "finetrainers"
@@ -484,8 +483,7 @@ def parse_arguments() -> Args:
 def validate_args(args: Args):
     _validated_model_args(args)
     _validate_training_args(args)
-    if not args.do_not_run_validation:
-        _validate_validation_args(args)
+    _validate_validation_args(args)
 
 
 def _add_model_arguments(parser: argparse.ArgumentParser) -> None:
diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
index 390e0968..0ef7ea02 100644
--- a/tests/trainers/cogvideox/test_cogvideox.py
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -30,5 +30,7 @@ def get_training_args(self):
         args.data_root = ""  # will be set from the tester method.
         args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")]
         args.precompute_conditions = True
-        args.do_not_run_validation = True
+        args.validation_prompts = []
+        args.validation_heights = []
+        args.validation_widths = []
         return args

From ee368a160bff109ce44693c2d11e6248f081c20f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 24 Jan 2025 13:21:52 +0530
Subject: [PATCH 07/19] make get_memory_stat method leaner.

---
 finetrainers/utils/memory_utils.py | 36 ++++++++++++++----------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py
index 906efba4..1fa5962d 100644
--- a/finetrainers/utils/memory_utils.py
+++ b/finetrainers/utils/memory_utils.py
@@ -9,35 +9,33 @@
 
 
 def get_memory_statistics(precision: int = 3) -> Dict[str, Any]:
-    memory_allocated = None
-    memory_reserved = None
-    max_memory_allocated = None
-    max_memory_reserved = None
+    memory_stats = {
+        "memory_allocated": None,
+        "memory_reserved": None,
+        "max_memory_allocated": None,
+        "max_memory_reserved": None,
+    }
 
     if torch.cuda.is_available():
         device = torch.cuda.current_device()
-        memory_allocated = torch.cuda.memory_allocated(device)
-        memory_reserved = torch.cuda.memory_reserved(device)
-        max_memory_allocated = torch.cuda.max_memory_allocated(device)
-        max_memory_reserved = torch.cuda.max_memory_reserved(device)
+        memory_stats.update(
+            {
+                "memory_allocated": torch.cuda.memory_allocated(device),
+                "memory_reserved": torch.cuda.memory_reserved(device),
+                "max_memory_allocated": torch.cuda.max_memory_allocated(device),
+                "max_memory_reserved": torch.cuda.max_memory_reserved(device),
+            }
+        )
 
     elif torch.backends.mps.is_available():
-        memory_allocated = torch.mps.current_allocated_memory()
+        memory_stats["memory_allocated"] = torch.mps.current_allocated_memory()
 
     else:
         logger.warning("No CUDA, MPS, or ROCm device found. Memory statistics are not available.")
 
     return {
-        "memory_allocated": round(bytes_to_gigabytes(memory_allocated), ndigits=precision)
-        if memory_allocated
-        else None,
-        "memory_reserved": round(bytes_to_gigabytes(memory_reserved), ndigits=precision) if memory_reserved else None,
-        "max_memory_allocated": round(bytes_to_gigabytes(max_memory_allocated), ndigits=precision)
-        if max_memory_allocated
-        else None,
-        "max_memory_reserved": round(bytes_to_gigabytes(max_memory_reserved), ndigits=precision)
-        if max_memory_reserved
-        else None,
+        key: (round(bytes_to_gigabytes(value), ndigits=precision) if value else None)
+        for key, value in memory_stats.items()
     }
 
 

From d63fbcf6774ea3b4de788f8610f15da751b640df Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 24 Jan 2025 13:25:10 +0530
Subject: [PATCH 08/19] reset memory utils.

---
 finetrainers/trainer.py            | 11 ++++-------
 finetrainers/utils/memory_utils.py |  8 ++++++++
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py
index bdbddccf..9b049190 100644
--- a/finetrainers/trainer.py
+++ b/finetrainers/trainer.py
@@ -54,7 +54,7 @@
 )
 from .utils.file_utils import string_to_filename
 from .utils.hub_utils import save_model_card
-from .utils.memory_utils import free_memory, get_memory_statistics, make_contiguous
+from .utils.memory_utils import free_memory, get_memory_statistics, make_contiguous, reset_memory_stats
 from .utils.model_utils import resolve_vae_cls_from_ckpt_path
 from .utils.optimizer_utils import get_optimizer
 from .utils.torch_utils import align_device_and_dtype, expand_tensor_dims, unwrap_model
@@ -255,8 +255,7 @@ def collate_fn(batch):
 
         memory_statistics = get_memory_statistics()
         logger.info(f"Memory after precomputing conditions: {json.dumps(memory_statistics, indent=4)}")
-        if torch.cuda.is_available():
-            torch.cuda.reset_peak_memory_stats(accelerator.device)
+        reset_memory_stats(accelerator.device)
 
         # Precompute latents
         latent_components = self.model_config["load_latent_models"](**self._get_load_components_kwargs())
@@ -303,8 +302,7 @@ def collate_fn(batch):
 
         memory_statistics = get_memory_statistics()
         logger.info(f"Memory after precomputing latents: {json.dumps(memory_statistics, indent=4)}")
-        if torch.cuda.is_available():
-            torch.cuda.reset_peak_memory_stats(accelerator.device)
+        reset_memory_stats(accelerator.device)
 
         # Update dataloader to use precomputed conditions and latents
         self.dataloader = torch.utils.data.DataLoader(
@@ -986,8 +984,7 @@ def validate(self, step: int, final_validation: bool = False) -> None:
         free_memory()
         memory_statistics = get_memory_statistics()
         logger.info(f"Memory after validation end: {json.dumps(memory_statistics, indent=4)}")
-        if torch.cuda.is_available():
-            torch.cuda.reset_peak_memory_stats(accelerator.device)
+        reset_memory_stats(accelerator.device)
 
         if not final_validation:
             self.transformer.train()
diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py
index 1fa5962d..7bffdb4a 100644
--- a/finetrainers/utils/memory_utils.py
+++ b/finetrainers/utils/memory_utils.py
@@ -53,6 +53,14 @@ def free_memory() -> None:
     # TODO(aryan): handle non-cuda devices
 
 
+def reset_memory_stats(device: torch.device):
+    # TODO: handle for non-cuda devices
+    if torch.cuda.is_available():
+        torch.cuda.reset_peak_memory_stats(device)
+    else:
+        logger.warning("No CUDA, device found. Memory statistics are not available.")
+
+
 def make_contiguous(x: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
     if isinstance(x, torch.Tensor):
         return x.contiguous()

From 5529264e76e86a594bb538ea812aa4137ed8669d Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 24 Jan 2025 13:27:28 +0530
Subject: [PATCH 09/19] sync util.

---
 finetrainers/trainer.py            | 11 ++++++++---
 finetrainers/utils/memory_utils.py |  9 ++++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py
index 9b049190..bb28329d 100644
--- a/finetrainers/trainer.py
+++ b/finetrainers/trainer.py
@@ -54,7 +54,13 @@
 )
 from .utils.file_utils import string_to_filename
 from .utils.hub_utils import save_model_card
-from .utils.memory_utils import free_memory, get_memory_statistics, make_contiguous, reset_memory_stats
+from .utils.memory_utils import (
+    free_memory,
+    get_memory_statistics,
+    make_contiguous,
+    reset_memory_stats,
+    synchornize_device,
+)
 from .utils.model_utils import resolve_vae_cls_from_ckpt_path
 from .utils.optimizer_utils import get_optimizer
 from .utils.torch_utils import align_device_and_dtype, expand_tensor_dims, unwrap_model
@@ -1107,8 +1113,7 @@ def _delete_components(self) -> None:
         self.vae = None
         self.scheduler = None
         free_memory()
-        if torch.cuda.is_available():
-            torch.cuda.synchronize(self.state.accelerator.device)
+        synchornize_device(self.state.accelerator.device)
 
     def _get_and_prepare_pipeline_for_validation(self, final_validation: bool = False) -> DiffusionPipeline:
         accelerator = self.state.accelerator
diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py
index 7bffdb4a..b0579311 100644
--- a/finetrainers/utils/memory_utils.py
+++ b/finetrainers/utils/memory_utils.py
@@ -58,7 +58,14 @@ def reset_memory_stats(device: torch.device):
     if torch.cuda.is_available():
         torch.cuda.reset_peak_memory_stats(device)
     else:
-        logger.warning("No CUDA, device found. Memory statistics are not available.")
+        logger.warning("No CUDA, device found. Nothing to reset memory of.")
+
+
+def synchornize_device(device: torch.device):
+    if torch.cuda.is_available():
+        torch.cuda.synchronize(device)
+    else:
+        logger.warning("No CUDA, device found. Nothing to synchronize.")
 
 
 def make_contiguous(x: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:

From 1f61911846dd413c91940a8d2993c1555f16591c Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 24 Jan 2025 14:48:20 +0530
Subject: [PATCH 10/19] updates

---
 tests/trainers/cogvideox/test_cogvideox.py |  20 ++-
 tests/trainers/test_trainers_common.py     | 147 +++++++++++++++------
 2 files changed, 122 insertions(+), 45 deletions(-)

diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
index 0ef7ea02..a5419274 100644
--- a/tests/trainers/cogvideox/test_cogvideox.py
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -20,11 +20,13 @@ def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]:
 
 
 class CogVideoXTester(unittest.TestCase, TrainerTestMixin):
-    model_name = "cogvideox"
+    MODEL_NAME = "cogvideox"
+    EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"}
+    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_embeds"}
 
     def get_training_args(self):
         args = Args()
-        args.model_name = self.model_name
+        args.model_name = self.MODEL_NAME
         args.training_type = "lora"
         args.pretrained_model_name_or_path = "finetrainers/dummy-cogvideox"
         args.data_root = ""  # will be set from the tester method.
@@ -34,3 +36,17 @@ def get_training_args(self):
         args.validation_heights = []
         args.validation_widths = []
         return args
+
+    @property
+    def latent_output_shape(self):
+        return (8, 3, 2, 2)
+
+    @property
+    def condition_output_shape(self):
+        return (226, 32)
+
+    def populate_shapes(self):
+        for k in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS:
+            self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape
+        for k in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS:
+            self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index 0b59e8ff..b95b3e9b 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -1,72 +1,133 @@
-import sys
+import tempfile
 from pathlib import Path
 
+import torch
+from huggingface_hub import snapshot_download
 
-current_file = Path(__file__).resolve()
-root_dir = current_file.parents[1]
-sys.path.append(str(root_dir))
+from finetrainers import Trainer
+from finetrainers.constants import (
+    PRECOMPUTED_CONDITIONS_DIR_NAME,
+    PRECOMPUTED_DIR_NAME,
+    PRECOMPUTED_LATENTS_DIR_NAME,
+)
+from finetrainers.utils.file_utils import string_to_filename
 
 
-import os  # noqa
-import tempfile  # noqa
-
-from huggingface_hub import snapshot_download  # noqa
+class TrainerTestMixin:
+    MODEL_NAME = None
+    EXPECTED_PRECOMPUTATION_LATENT_KEYS = set()
+    EXPECTED_LATENT_SHAPES = {}
+    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = set()
+    EXPECTED_CONDITION_SHAPES = {}
 
-from finetrainers import Trainer  # noqa
-from finetrainers.constants import PRECOMPUTED_CONDITIONS_DIR_NAME, PRECOMPUTED_DIR_NAME, PRECOMPUTED_LATENTS_DIR_NAME  # noqa
-from finetrainers.utils.file_utils import string_to_filename  # noqa
+    def get_training_args(self):
+        raise NotImplementedError
 
+    @property
+    def latent_output_shape(self):
+        raise NotImplementedError
 
-class TrainerTestMixin:
-    model_name = None
+    @property
+    def condition_output_shape(self):
+        raise NotImplementedError
 
-    def get_training_args(self):
+    def populate_shapes(self):
         raise NotImplementedError
 
     def download_dataset_txt_format(self, cache_dir):
-        path = snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir)
-        return path
-
-    def test_precomputation_txt_format(self):
-        # Here we assume the dataset is formatted like:
-        # https://huggingface.co/datasets/Wild-Heart/Disney-VideoGeneration-Dataset/tree/main
+        return snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir)
+
+    def get_precomputation_dir(self, training_args):
+        cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path)
+        return Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}"
+
+    def tearDown(self):
+        super().tearDown()
+        self.EXPECTED_LATENT_SHAPES.clear()
+        self.EXPECTED_CONDITION_SHAPES.clear()
+
+    def _verify_precomputed_files(self, video_paths, all_conditions, all_latents):
+        assert len(video_paths) == len(all_conditions), "Mismatch in conditions file count"
+        assert len(video_paths) == len(all_latents), "Mismatch in latents file count"
+
+        for latent, condition in zip(all_latents, all_conditions):
+            latent_keys = set(torch.load(latent, weights_only=True).keys())
+            condition_keys = set(torch.load(condition, weights_only=True).keys())
+            assert latent_keys == self.EXPECTED_PRECOMPUTATION_LATENT_KEYS, f"Unexpected latent keys: {latent_keys}"
+            assert (
+                condition_keys == self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS
+            ), f"Unexpected condition keys: {condition_keys}"
+
+    def _verify_shapes(self, latent_files, condition_files):
+        self.populate_shapes()
+
+        for l_path, c_path in zip(latent_files, condition_files):
+            latent = torch.load(l_path, weights_only=True, map_location="cpu")
+            condition = torch.load(c_path, weights_only=True, map_location="cpu")
+
+            for key in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS:
+                assert (
+                    latent[key].shape[1:] == self.EXPECTED_LATENT_SHAPES[key]
+                ), f"Latent shape mismatch for key: {key}"
+
+            for key in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS:
+                assert (
+                    condition[key].shape[1:] == self.EXPECTED_CONDITION_SHAPES[key]
+                ), f"Condition shape mismatch for key: {key}"
+
+    def test_precomputation_txt_format_creates_files(self):
         training_args = self.get_training_args()
 
         with tempfile.TemporaryDirectory() as tmpdir:
-            # Prepare remaining args.
             training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
-
             training_args.video_column = "videos.txt"
             training_args.caption_column = "prompt.txt"
-            with open(f"{training_args.data_root}/{training_args.video_column}", "r", encoding="utf-8") as file:
-                video_paths = [
-                    training_args.data_root.joinpath(line.strip())
-                    for line in file.readlines()
-                    if len(line.strip()) > 0
-                ]
-
-            # Initialize trainer.
+
+            with open(training_args.data_root / training_args.video_column, "r", encoding="utf-8") as file:
+                video_paths = [training_args.data_root / line.strip() for line in file if line.strip()]
+
             training_args.output_dir = tmpdir
             trainer = Trainer(training_args)
             training_args = trainer.args
 
-            # Perform precomputations.
             trainer.prepare_dataset()
             trainer.prepare_models()
             trainer.prepare_precomputations()
 
-            cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path)
-            precomputation_dir = (
-                Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}"
-            )
+            precomputation_dir = self.get_precomputation_dir(training_args)
+            conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME
+            latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME
+
+            assert precomputation_dir.exists(), f"Precomputed dir not found: {precomputation_dir}"
+            assert conditions_dir.exists(), f"Conditions dir not found: {conditions_dir}"
+            assert latents_dir.exists(), f"Latents dir not found: {latents_dir}"
+
+            all_conditions = list(conditions_dir.glob("*.pt"))
+            all_latents = list(latents_dir.glob("*.pt"))
+
+            self._verify_precomputed_files(video_paths, all_conditions, all_latents)
 
-            # Checks.
+    def test_precomputation_txt_format_matches_shapes(self):
+        training_args = self.get_training_args()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
+            training_args.video_column = "videos.txt"
+            training_args.caption_column = "prompt.txt"
+
+            training_args.output_dir = tmpdir
+            trainer = Trainer(training_args)
+            training_args = trainer.args
+
+            trainer.prepare_dataset()
+            trainer.prepare_models()
+            trainer.prepare_precomputations()
+
+            precomputation_dir = self.get_precomputation_dir(training_args)
             conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME
             latents_dir = precomputation_dir / PRECOMPUTED_LATENTS_DIR_NAME
-            assert os.path.exists(
-                precomputation_dir
-            ), f"Precomputation wasn't successful. Couldn't find the precomputed dir: {os.listdir(training_args.data_root)=}\n"
-            assert os.path.exists(conditions_dir), f"conditions dir ({str(conditions_dir)}) doesn't exist"
-            assert os.path.exists(latents_dir), f"latents dir ({str(latents_dir)}) doesn't exist"
-            assert len(video_paths) == len([p for p in conditions_dir.glob("*.pt")])  # noqa
-            assert len(video_paths) == len([p for p in latents_dir.glob("*.pt")])  # noqa
+
+            latent_files = list(latents_dir.glob("*.pt"))
+            condition_files = list(conditions_dir.glob("*.pt"))
+
+            self._verify_shapes(latent_files, condition_files)

From c432f399abe526deca18ff419945bce07435957f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 27 Jan 2025 18:17:52 +0530
Subject: [PATCH 11/19] typo.

Co-authored-by: a-r-r-o-w <aryan@huggingface.co>
---
 finetrainers/trainer.py            | 4 ++--
 finetrainers/utils/memory_utils.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py
index 9711ca0c..81dc1d68 100644
--- a/finetrainers/trainer.py
+++ b/finetrainers/trainer.py
@@ -59,7 +59,7 @@
     get_memory_statistics,
     make_contiguous,
     reset_memory_stats,
-    synchornize_device,
+    synchronize_device,
 )
 from .utils.model_utils import resolve_vae_cls_from_ckpt_path
 from .utils.optimizer_utils import get_optimizer
@@ -1113,7 +1113,7 @@ def _delete_components(self) -> None:
         self.vae = None
         self.scheduler = None
         free_memory()
-        synchornize_device(self.state.accelerator.device)
+        synchronize_device(self.state.accelerator.device)
 
     def _get_and_prepare_pipeline_for_validation(self, final_validation: bool = False) -> DiffusionPipeline:
         accelerator = self.state.accelerator
diff --git a/finetrainers/utils/memory_utils.py b/finetrainers/utils/memory_utils.py
index b0579311..dcde3d89 100644
--- a/finetrainers/utils/memory_utils.py
+++ b/finetrainers/utils/memory_utils.py
@@ -61,7 +61,7 @@ def reset_memory_stats(device: torch.device):
         logger.warning("No CUDA, device found. Nothing to reset memory of.")
 
 
-def synchornize_device(device: torch.device):
+def synchronize_device(device: torch.device):
     if torch.cuda.is_available():
         torch.cuda.synchronize(device)
     else:

From 3609fdfabb2e77185e316d985dd946e11ab39c74 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 27 Jan 2025 18:20:11 +0530
Subject: [PATCH 12/19] resolve imports.

---
 tests/trainers/cogvideox/test_cogvideox.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
index a5419274..da5384a0 100644
--- a/tests/trainers/cogvideox/test_cogvideox.py
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -1,16 +1,14 @@
 import sys
+import unittest
 from pathlib import Path
+from typing import Tuple
 
 
 current_file = Path(__file__).resolve()
 root_dir = current_file.parents[3]
 sys.path.append(str(root_dir))
 
-import unittest  # noqa
-from typing import Tuple  # noqa
-
 from finetrainers import Args  # noqa
-
 from ..test_trainers_common import TrainerTestMixin  # noqa
 
 

From 8faac472d48675456434fae7f9b07ef208381800 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 29 Jan 2025 16:11:57 +0530
Subject: [PATCH 13/19] updates

---
 tests/trainers/test_trainers_common.py | 39 +++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index b95b3e9b..fc54d22d 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -1,16 +1,22 @@
+import sys
 import tempfile
 from pathlib import Path
 
 import torch
 from huggingface_hub import snapshot_download
 
-from finetrainers import Trainer
-from finetrainers.constants import (
+
+current_file = Path(__file__).resolve()
+root_dir = current_file.parents[2]
+sys.path.append(str(root_dir))
+
+from finetrainers import Trainer  # noqa
+from finetrainers.constants import (  # noqa
     PRECOMPUTED_CONDITIONS_DIR_NAME,
     PRECOMPUTED_DIR_NAME,
     PRECOMPUTED_LATENTS_DIR_NAME,
 )
-from finetrainers.utils.file_utils import string_to_filename
+from finetrainers.utils.file_utils import string_to_filename  # noqa
 
 
 class TrainerTestMixin:
@@ -121,7 +127,9 @@ def test_precomputation_txt_format_matches_shapes(self):
 
             trainer.prepare_dataset()
             trainer.prepare_models()
-            trainer.prepare_precomputations()
+            with self.assertLogs(level="INFO") as captured:
+                trainer.prepare_precomputations()
+            assert any("Precomputed data not found. Running precomputation." in msg for msg in captured.output)
 
             precomputation_dir = self.get_precomputation_dir(training_args)
             conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME
@@ -131,3 +139,26 @@ def test_precomputation_txt_format_matches_shapes(self):
             condition_files = list(conditions_dir.glob("*.pt"))
 
             self._verify_shapes(latent_files, condition_files)
+
+    def test_precomputation_txt_format_no_redo(self):
+        training_args = self.get_training_args()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
+            training_args.video_column = "videos.txt"
+            training_args.caption_column = "prompt.txt"
+
+            training_args.output_dir = tmpdir
+            trainer = Trainer(training_args)
+            training_args = trainer.args
+
+            trainer.prepare_dataset()
+            trainer.prepare_models()
+            trainer.prepare_precomputations()
+
+            with self.assertLogs(level="INFO") as captured:
+                trainer.prepare_precomputations()
+
+            assert any(
+                "Precomputed conditions and latents found. Loading precomputed data" in msg for msg in captured.output
+            )

From 19356bb7329dab98d985854462c544b0f3a097c2 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 29 Jan 2025 16:19:18 +0530
Subject: [PATCH 14/19] update ltx

---
 tests/trainers/cogvideox/test_cogvideox.py |  8 +---
 tests/trainers/ltx_video/__init__.py       |  0
 tests/trainers/ltx_video/test_ltx_video.py | 44 ++++++++++++++++++++++
 tests/trainers/test_trainers_common.py     |  6 +++
 4 files changed, 51 insertions(+), 7 deletions(-)
 create mode 100644 tests/trainers/ltx_video/__init__.py
 create mode 100644 tests/trainers/ltx_video/test_ltx_video.py

diff --git a/tests/trainers/cogvideox/test_cogvideox.py b/tests/trainers/cogvideox/test_cogvideox.py
index da5384a0..e0790322 100644
--- a/tests/trainers/cogvideox/test_cogvideox.py
+++ b/tests/trainers/cogvideox/test_cogvideox.py
@@ -1,7 +1,6 @@
 import sys
 import unittest
 from pathlib import Path
-from typing import Tuple
 
 
 current_file = Path(__file__).resolve()
@@ -9,12 +8,7 @@
 sys.path.append(str(root_dir))
 
 from finetrainers import Args  # noqa
-from ..test_trainers_common import TrainerTestMixin  # noqa
-
-
-# Copied for now.
-def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]:
-    return tuple(map(int, resolution_bucket.split("x")))
+from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket  # noqa
 
 
 class CogVideoXTester(unittest.TestCase, TrainerTestMixin):
diff --git a/tests/trainers/ltx_video/__init__.py b/tests/trainers/ltx_video/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/trainers/ltx_video/test_ltx_video.py b/tests/trainers/ltx_video/test_ltx_video.py
new file mode 100644
index 00000000..d74d5602
--- /dev/null
+++ b/tests/trainers/ltx_video/test_ltx_video.py
@@ -0,0 +1,44 @@
+import sys
+import unittest
+from pathlib import Path
+
+
+current_file = Path(__file__).resolve()
+root_dir = current_file.parents[3]
+sys.path.append(str(root_dir))
+
+from finetrainers import Args  # noqa
+from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket  # noqa
+
+
+class LTXVideoTester(unittest.TestCase, TrainerTestMixin):
+    MODEL_NAME = "ltx_video"
+    EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"}
+    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_embeds"}
+
+    def get_training_args(self):
+        args = Args()
+        args.model_name = self.MODEL_NAME
+        args.training_type = "lora"
+        args.pretrained_model_name_or_path = "finetrainers/dummy-ltxvideo"
+        args.data_root = ""  # will be set from the tester method.
+        args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")]
+        args.precompute_conditions = True
+        args.validation_prompts = []
+        args.validation_heights = []
+        args.validation_widths = []
+        return args
+
+    @property
+    def latent_output_shape(self):
+        return (8, 3, 2, 2)
+
+    @property
+    def condition_output_shape(self):
+        return (226, 32)
+
+    def populate_shapes(self):
+        for k in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS:
+            self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape
+        for k in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS:
+            self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index fc54d22d..02f6d193 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -1,6 +1,7 @@
 import sys
 import tempfile
 from pathlib import Path
+from typing import Tuple
 
 import torch
 from huggingface_hub import snapshot_download
@@ -19,6 +20,11 @@
 from finetrainers.utils.file_utils import string_to_filename  # noqa
 
 
+# Copied for now.
+def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]:
+    return tuple(map(int, resolution_bucket.split("x")))
+
+
 class TrainerTestMixin:
     MODEL_NAME = None
     EXPECTED_PRECOMPUTATION_LATENT_KEYS = set()

From 5909c21e2fcb836fa291dbf8105edf2522b32fdd Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 29 Jan 2025 17:36:21 +0530
Subject: [PATCH 15/19] updates

---
 finetrainers/models/ltx_video/lora.py      | 11 ++-
 tests/trainers/ltx_video/test_ltx_video.py | 22 +++--
 tests/trainers/test_trainers_common.py     | 98 ++++++++++++----------
 3 files changed, 75 insertions(+), 56 deletions(-)

diff --git a/finetrainers/models/ltx_video/lora.py b/finetrainers/models/ltx_video/lora.py
index bdd6ffa3..49ea1db4 100644
--- a/finetrainers/models/ltx_video/lora.py
+++ b/finetrainers/models/ltx_video/lora.py
@@ -5,7 +5,7 @@
 from accelerate.logging import get_logger
 from diffusers import AutoencoderKLLTXVideo, FlowMatchEulerDiscreteScheduler, LTXPipeline, LTXVideoTransformer3DModel
 from PIL import Image
-from transformers import T5EncoderModel, T5Tokenizer
+from transformers import AutoTokenizer, T5EncoderModel, T5Tokenizer
 
 
 logger = get_logger("finetrainers")  # pylint: disable=invalid-name
@@ -18,7 +18,14 @@ def load_condition_models(
     cache_dir: Optional[str] = None,
     **kwargs,
 ) -> Dict[str, nn.Module]:
-    tokenizer = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir)
+    try:
+        tokenizer = T5Tokenizer.from_pretrained(
+            model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir
+        )
+    except:  # noqa
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, subfolder="tokenizer", revision=revision, cache_dir=cache_dir
+        )
     text_encoder = T5EncoderModel.from_pretrained(
         model_id, subfolder="text_encoder", torch_dtype=text_encoder_dtype, revision=revision, cache_dir=cache_dir
     )
diff --git a/tests/trainers/ltx_video/test_ltx_video.py b/tests/trainers/ltx_video/test_ltx_video.py
index d74d5602..e0de0aa8 100644
--- a/tests/trainers/ltx_video/test_ltx_video.py
+++ b/tests/trainers/ltx_video/test_ltx_video.py
@@ -13,8 +13,8 @@
 
 class LTXVideoTester(unittest.TestCase, TrainerTestMixin):
     MODEL_NAME = "ltx_video"
-    EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"}
-    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_embeds"}
+    EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"height", "latents", "latents_mean", "latents_std", "num_frames", "width"}
+    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"prompt_attention_mask", "prompt_embeds"}
 
     def get_training_args(self):
         args = Args()
@@ -31,14 +31,20 @@ def get_training_args(self):
 
     @property
     def latent_output_shape(self):
-        return (8, 3, 2, 2)
+        # only tensor object shapes
+        return (16, 3, 4, 4), (), ()
 
     @property
     def condition_output_shape(self):
-        return (226, 32)
+        # only tensor object shapes
+        return (128,), (128, 32)
 
     def populate_shapes(self):
-        for k in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS:
-            self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape
-        for k in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS:
-            self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape
+        i = 0
+        for k in sorted(self.EXPECTED_PRECOMPUTATION_LATENT_KEYS):
+            if k in ["height", "num_frames", "width"]:
+                continue
+            self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape[i]
+            i += 1
+        for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS)):
+            self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape[i]
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index 02f6d193..9be7294a 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -20,8 +20,8 @@
 from finetrainers.utils.file_utils import string_to_filename  # noqa
 
 
-# Copied for now.
 def parse_resolution_bucket(resolution_bucket: str) -> Tuple[int, ...]:
+    """Parse a resolution like '512x512' into a tuple of ints (512, 512)."""
     return tuple(map(int, resolution_bucket.split("x")))
 
 
@@ -50,6 +50,7 @@ def download_dataset_txt_format(self, cache_dir):
         return snapshot_download(repo_id="finetrainers/dummy-disney-dataset", repo_type="dataset", cache_dir=cache_dir)
 
     def get_precomputation_dir(self, training_args):
+        """Return the path of the precomputation directory based on the training args."""
         cleaned_model_id = string_to_filename(training_args.pretrained_model_name_or_path)
         return Path(training_args.data_root) / f"{training_args.model_name}_{cleaned_model_id}_{PRECOMPUTED_DIR_NAME}"
 
@@ -59,51 +60,72 @@ def tearDown(self):
         self.EXPECTED_CONDITION_SHAPES.clear()
 
     def _verify_precomputed_files(self, video_paths, all_conditions, all_latents):
+        """Check that the correct number of precomputed files exist and have the right keys."""
         assert len(video_paths) == len(all_conditions), "Mismatch in conditions file count"
         assert len(video_paths) == len(all_latents), "Mismatch in latents file count"
 
         for latent, condition in zip(all_latents, all_conditions):
-            latent_keys = set(torch.load(latent, weights_only=True).keys())
-            condition_keys = set(torch.load(condition, weights_only=True).keys())
-            assert latent_keys == self.EXPECTED_PRECOMPUTATION_LATENT_KEYS, f"Unexpected latent keys: {latent_keys}"
-            assert (
-                condition_keys == self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS
+            latent_keys = sorted(set(torch.load(latent, weights_only=True).keys()))
+            condition_keys = sorted(set(torch.load(condition, weights_only=True).keys()))
+            assert latent_keys == sorted(
+                self.EXPECTED_PRECOMPUTATION_LATENT_KEYS
+            ), f"Unexpected latent keys: {latent_keys}"
+            assert condition_keys == sorted(
+                self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS
             ), f"Unexpected condition keys: {condition_keys}"
 
     def _verify_shapes(self, latent_files, condition_files):
+        """Check that the shapes of latents and conditions match expected shapes."""
         self.populate_shapes()
-
         for l_path, c_path in zip(latent_files, condition_files):
             latent = torch.load(l_path, weights_only=True, map_location="cpu")
             condition = torch.load(c_path, weights_only=True, map_location="cpu")
 
             for key in self.EXPECTED_PRECOMPUTATION_LATENT_KEYS:
+                if not torch.is_tensor(latent[key]):
+                    continue
+                expected = self.EXPECTED_LATENT_SHAPES[key]
+                original = tuple(latent[key].shape[1:])
                 assert (
-                    latent[key].shape[1:] == self.EXPECTED_LATENT_SHAPES[key]
-                ), f"Latent shape mismatch for key: {key}"
+                    original == expected
+                ), f"Latent shape mismatch for key: {key}. expected={expected}, got={original}"
 
             for key in self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS:
+                if not torch.is_tensor(condition[key]):
+                    continue
+                expected = self.EXPECTED_CONDITION_SHAPES[key]
+                original = tuple(condition[key].shape[1:])
                 assert (
-                    condition[key].shape[1:] == self.EXPECTED_CONDITION_SHAPES[key]
-                ), f"Condition shape mismatch for key: {key}"
-
-    def test_precomputation_txt_format_creates_files(self):
+                    original == expected
+                ), f"Condition shape mismatch for key: {key}. expected={expected}, got={original}"
+
+    def _setup_trainer(self, tmpdir):
+        """
+        Helper method to reduce duplication across tests.
+        Creates and returns a trainer, along with updated training args.
+        """
         training_args = self.get_training_args()
+        training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
+        training_args.video_column = "videos.txt"
+        training_args.caption_column = "prompt.txt"
+        training_args.output_dir = tmpdir
+
+        trainer = Trainer(training_args)
+        # Trainer may update the training_args internally, so refresh the reference
+        training_args = trainer.args
 
+        trainer.prepare_dataset()
+        trainer.prepare_models()
+        return trainer, training_args
+
+    def test_precomputation_txt_format_creates_files(self):
         with tempfile.TemporaryDirectory() as tmpdir:
-            training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
-            training_args.video_column = "videos.txt"
-            training_args.caption_column = "prompt.txt"
+            trainer, training_args = self._setup_trainer(tmpdir)
 
+            # Load video paths (only needed in this test)
             with open(training_args.data_root / training_args.video_column, "r", encoding="utf-8") as file:
                 video_paths = [training_args.data_root / line.strip() for line in file if line.strip()]
 
-            training_args.output_dir = tmpdir
-            trainer = Trainer(training_args)
-            training_args = trainer.args
-
-            trainer.prepare_dataset()
-            trainer.prepare_models()
             trainer.prepare_precomputations()
 
             precomputation_dir = self.get_precomputation_dir(training_args)
@@ -120,22 +142,14 @@ def test_precomputation_txt_format_creates_files(self):
             self._verify_precomputed_files(video_paths, all_conditions, all_latents)
 
     def test_precomputation_txt_format_matches_shapes(self):
-        training_args = self.get_training_args()
-
         with tempfile.TemporaryDirectory() as tmpdir:
-            training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
-            training_args.video_column = "videos.txt"
-            training_args.caption_column = "prompt.txt"
+            trainer, training_args = self._setup_trainer(tmpdir)
 
-            training_args.output_dir = tmpdir
-            trainer = Trainer(training_args)
-            training_args = trainer.args
-
-            trainer.prepare_dataset()
-            trainer.prepare_models()
             with self.assertLogs(level="INFO") as captured:
                 trainer.prepare_precomputations()
-            assert any("Precomputed data not found. Running precomputation." in msg for msg in captured.output)
+            assert any(
+                "Precomputed data not found. Running precomputation." in msg for msg in captured.output
+            ), "Expected info log about missing precomputed data."
 
             precomputation_dir = self.get_precomputation_dir(training_args)
             conditions_dir = precomputation_dir / PRECOMPUTED_CONDITIONS_DIR_NAME
@@ -147,24 +161,16 @@ def test_precomputation_txt_format_matches_shapes(self):
             self._verify_shapes(latent_files, condition_files)
 
     def test_precomputation_txt_format_no_redo(self):
-        training_args = self.get_training_args()
-
         with tempfile.TemporaryDirectory() as tmpdir:
-            training_args.data_root = Path(self.download_dataset_txt_format(cache_dir=tmpdir))
-            training_args.video_column = "videos.txt"
-            training_args.caption_column = "prompt.txt"
-
-            training_args.output_dir = tmpdir
-            trainer = Trainer(training_args)
-            training_args = trainer.args
+            trainer, _ = self._setup_trainer(tmpdir)
 
-            trainer.prepare_dataset()
-            trainer.prepare_models()
+            # should create new precomputations
             trainer.prepare_precomputations()
 
+            # should detect existing precomputations and not redo
             with self.assertLogs(level="INFO") as captured:
                 trainer.prepare_precomputations()
 
             assert any(
                 "Precomputed conditions and latents found. Loading precomputed data" in msg for msg in captured.output
-            )
+            ), "Expected info log about found precomputations."

From 778d0774d6e17db964e3c3e54fc3eeeda9d49677 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 29 Jan 2025 17:55:01 +0530
Subject: [PATCH 16/19] updates

---
 tests/trainers/hunyaun_video/__init__.py      |  0
 .../hunyaun_video/test_hunyaun_video.py       | 46 +++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 tests/trainers/hunyaun_video/__init__.py
 create mode 100644 tests/trainers/hunyaun_video/test_hunyaun_video.py

diff --git a/tests/trainers/hunyaun_video/__init__.py b/tests/trainers/hunyaun_video/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/trainers/hunyaun_video/test_hunyaun_video.py b/tests/trainers/hunyaun_video/test_hunyaun_video.py
new file mode 100644
index 00000000..7d0916ed
--- /dev/null
+++ b/tests/trainers/hunyaun_video/test_hunyaun_video.py
@@ -0,0 +1,46 @@
+import sys
+import unittest
+from pathlib import Path
+
+
+current_file = Path(__file__).resolve()
+root_dir = current_file.parents[3]
+sys.path.append(str(root_dir))
+
+from finetrainers import Args  # noqa
+from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket  # noqa
+
+
+class LTXVideoTester(unittest.TestCase, TrainerTestMixin):
+    MODEL_NAME = "hunyuan_video"
+    EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"}
+    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"pooled_prompt_embeds", "prompt_attention_mask", "prompt_embeds"}
+
+    def get_training_args(self):
+        args = Args()
+        args.model_name = self.MODEL_NAME
+        args.training_type = "lora"
+        args.pretrained_model_name_or_path = "finetrainers/dummy-hunyaunvideo"
+        args.data_root = ""  # will be set from the tester method.
+        args.video_resolution_buckets = [parse_resolution_bucket("9x16x16")]
+        args.precompute_conditions = True
+        args.validation_prompts = []
+        args.validation_heights = []
+        args.validation_widths = []
+        return args
+
+    @property
+    def latent_output_shape(self):
+        # only tensor object shapes
+        return (16, 3, 4, 4)
+
+    @property
+    def condition_output_shape(self):
+        # only tensor object shapes
+        return (), (128,), (128, 32)
+
+    def populate_shapes(self):
+        for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_LATENT_KEYS)):
+            self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape[i]
+        for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS)):
+            self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape[i]

From 0f8b4bbfcb1708d732bc503901da4102de9cf1ad Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 30 Jan 2025 11:44:03 +0530
Subject: [PATCH 17/19] updates

---
 tests/trainers/hunyaun_video/test_hunyaun_video.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/trainers/hunyaun_video/test_hunyaun_video.py b/tests/trainers/hunyaun_video/test_hunyaun_video.py
index 7d0916ed..65194097 100644
--- a/tests/trainers/hunyaun_video/test_hunyaun_video.py
+++ b/tests/trainers/hunyaun_video/test_hunyaun_video.py
@@ -11,10 +11,15 @@
 from ..test_trainers_common import TrainerTestMixin, parse_resolution_bucket  # noqa
 
 
-class LTXVideoTester(unittest.TestCase, TrainerTestMixin):
+class HunyuanVideoTester(unittest.TestCase, TrainerTestMixin):
     MODEL_NAME = "hunyuan_video"
     EXPECTED_PRECOMPUTATION_LATENT_KEYS = {"latents"}
-    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {"pooled_prompt_embeds", "prompt_attention_mask", "prompt_embeds"}
+    EXPECTED_PRECOMPUTATION_CONDITION_KEYS = {
+        "guidance",
+        "pooled_prompt_embeds",
+        "prompt_attention_mask",
+        "prompt_embeds",
+    }
 
     def get_training_args(self):
         args = Args()

From 9595803cec21eb363b26acaf35fa9753723142c8 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 30 Jan 2025 12:37:20 +0530
Subject: [PATCH 18/19] updates

---
 finetrainers/models/hunyuan_video/lora.py          | 1 +
 tests/trainers/hunyaun_video/test_hunyaun_video.py | 6 +++---
 tests/trainers/test_trainers_common.py             | 1 +
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/finetrainers/models/hunyuan_video/lora.py b/finetrainers/models/hunyuan_video/lora.py
index 1d8ccd1f..7f45fd9b 100644
--- a/finetrainers/models/hunyuan_video/lora.py
+++ b/finetrainers/models/hunyuan_video/lora.py
@@ -202,6 +202,7 @@ def prepare_latents(
             h = torch.cat(encoded_slices)
         else:
             h = vae._encode(image_or_video)
+        print(f"{h.shape=}")
         return {"latents": h}
 
 
diff --git a/tests/trainers/hunyaun_video/test_hunyaun_video.py b/tests/trainers/hunyaun_video/test_hunyaun_video.py
index 65194097..935223ba 100644
--- a/tests/trainers/hunyaun_video/test_hunyaun_video.py
+++ b/tests/trainers/hunyaun_video/test_hunyaun_video.py
@@ -37,15 +37,15 @@ def get_training_args(self):
     @property
     def latent_output_shape(self):
         # only tensor object shapes
-        return (16, 3, 4, 4)
+        return (8, 3, 2, 2)
 
     @property
     def condition_output_shape(self):
         # only tensor object shapes
-        return (), (128,), (128, 32)
+        return (), (8,), (256,), (256, 16)
 
     def populate_shapes(self):
         for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_LATENT_KEYS)):
-            self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape[i]
+            self.EXPECTED_LATENT_SHAPES[k] = self.latent_output_shape
         for i, k in enumerate(sorted(self.EXPECTED_PRECOMPUTATION_CONDITION_KEYS)):
             self.EXPECTED_CONDITION_SHAPES[k] = self.condition_output_shape[i]
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index 9be7294a..3486c4f1 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -86,6 +86,7 @@ def _verify_shapes(self, latent_files, condition_files):
                     continue
                 expected = self.EXPECTED_LATENT_SHAPES[key]
                 original = tuple(latent[key].shape[1:])
+                print(f"{key=}")
                 assert (
                     original == expected
                 ), f"Latent shape mismatch for key: {key}. expected={expected}, got={original}"

From 2c7f758a0465d7b37b4983ff770fa025ebcc963c Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 30 Jan 2025 12:37:53 +0530
Subject: [PATCH 19/19] fixes

---
 finetrainers/models/hunyuan_video/lora.py | 1 -
 tests/trainers/test_trainers_common.py    | 1 -
 2 files changed, 2 deletions(-)

diff --git a/finetrainers/models/hunyuan_video/lora.py b/finetrainers/models/hunyuan_video/lora.py
index 7f45fd9b..1d8ccd1f 100644
--- a/finetrainers/models/hunyuan_video/lora.py
+++ b/finetrainers/models/hunyuan_video/lora.py
@@ -202,7 +202,6 @@ def prepare_latents(
             h = torch.cat(encoded_slices)
         else:
             h = vae._encode(image_or_video)
-        print(f"{h.shape=}")
         return {"latents": h}
 
 
diff --git a/tests/trainers/test_trainers_common.py b/tests/trainers/test_trainers_common.py
index 3486c4f1..9be7294a 100644
--- a/tests/trainers/test_trainers_common.py
+++ b/tests/trainers/test_trainers_common.py
@@ -86,7 +86,6 @@ def _verify_shapes(self, latent_files, condition_files):
                     continue
                 expected = self.EXPECTED_LATENT_SHAPES[key]
                 original = tuple(latent[key].shape[1:])
-                print(f"{key=}")
                 assert (
                     original == expected
                 ), f"Latent shape mismatch for key: {key}. expected={expected}, got={original}"