a-r-r-o-w · sayakpaul · Jan 10, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/tests/scripts/dummy_cogvideox_lora.sh b/tests/scripts/dummy_cogvideox_lora.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+GPU_IDS="0,1"
+DATA_ROOT="$ROOT_DIR/video-dataset-disney"
+CAPTION_COLUMN="prompt.txt"
+VIDEO_COLUMN="videos.txt"
+OUTPUT_DIR="cogvideox"
+ID_TOKEN="BW_STYLE"
+
+# Model arguments
+model_cmd="--model_name cogvideox \
+  --pretrained_model_name_or_path THUDM/CogVideoX-5b"
+
+# Dataset arguments
+dataset_cmd="--data_root $DATA_ROOT \
+  --video_column $VIDEO_COLUMN \
+  --caption_column $CAPTION_COLUMN \
+  --id_token $ID_TOKEN \
+  --video_resolution_buckets 49x480x720 \
+  --caption_dropout_p 0.05"
+
+# Dataloader arguments
+dataloader_cmd="--dataloader_num_workers 0 --precompute_conditions"
+
+# Training arguments
+training_cmd="--training_type lora \
+  --seed 42 \
+  --mixed_precision bf16 \
+  --batch_size 1 \
+  --precompute_conditions \
+  --train_steps 10 \
+  --rank 128 \
+  --lora_alpha 128 \
+  --target_modules to_q to_k to_v to_out.0 \
+  --gradient_accumulation_steps 1 \
+  --gradient_checkpointing \
+  --checkpointing_steps 5 \
+  --checkpointing_limit 2 \
+  --resume_from_checkpoint=latest \
+  --enable_slicing \
+  --enable_tiling"
+
+# Optimizer arguments
+optimizer_cmd="--optimizer adamw \
+  --lr 3e-5 \
+  --beta1 0.9 \
+  --beta2 0.95 \
+  --weight_decay 1e-4 \
+  --epsilon 1e-8 \
+  --max_grad_norm 1.0"
+
+# Validation arguments
+validation_prompts=$(cat <<EOF
+$ID_TOKEN A black and white animated scene unfolds with an anthropomorphic goat surrounded by musical notes and symbols, suggesting a playful environment. Mickey Mouse appears, leaning forward in curiosity as the goat remains still. The goat then engages with Mickey, who bends down to converse or react. The dynamics shift as Mickey grabs the goat, potentially in surprise or playfulness, amidst a minimalistic background. The scene captures the evolving relationship between the two characters in a whimsical, animated setting, emphasizing their interactions and emotions.@@@49x512x768:::$ID_TOKEN A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage@@@49x512x768
+EOF
+)
+validation_cmd="--validation_prompts \"$validation_prompts\" \
+   --validation_steps 5 \
+   --num_validation_videos 1"
+
+# Miscellaneous arguments
+miscellaneous_cmd="--tracker_name finetrainers-cog \
+  --output_dir $OUTPUT_DIR \
+  --nccl_timeout 1800 \
+  --report_to wandb"
+
+cmd="accelerate launch --config_file $ROOT_DIR/accelerate_configs/uncompiled_2.yaml --gpu_ids $GPU_IDS $ROOT_DIR/train.py \
+  $model_cmd \
+  $dataset_cmd \
+  $dataloader_cmd \
+  $training_cmd \
+  $optimizer_cmd \
+  $validation_cmd \
+  $miscellaneous_cmd"
+
+echo "Running command: $cmd"
+eval $cmd
+echo -ne "-------------------- Finished executing script --------------------\n\n"
+
+rm -rf $OUTPUT_DIR
+rm -rf $DATA_ROOT/*_precomputed
diff --git a/tests/scripts/dummy_hunyuanvideo_lora.sh b/tests/scripts/dummy_hunyuanvideo_lora.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+GPU_IDS="0,1"
+DATA_ROOT="$ROOT_DIR/video-dataset-disney"
+CAPTION_COLUMN="prompt.txt"
+VIDEO_COLUMN="videos.txt"
+OUTPUT_DIR="hunyuan-video"
+ID_TOKEN="BW_STYLE"
+
+# Model arguments
+model_cmd="--model_name hunyuan_video \
+  --pretrained_model_name_or_path hunyuanvideo-community/HunyuanVideo"
+
+# Dataset arguments
+dataset_cmd="--data_root $DATA_ROOT \
+  --video_column $VIDEO_COLUMN \
+  --caption_column $CAPTION_COLUMN \
+  --id_token afkx \
+  --video_resolution_buckets 24x512x768 \
+  --caption_dropout_p 0.05"
+
+# Dataloader arguments
+dataloader_cmd="--dataloader_num_workers 0 --precompute_conditions"
+
+# Training arguments
+training_cmd="--training_type lora \
+  --seed 42 \
+  --mixed_precision bf16 \
+  --batch_size 1 \
+  --train_steps 10 \
+  --rank 16 \
+  --lora_alpha 16 \
+  --target_modules to_q to_k to_v to_out.0 \
+  --gradient_accumulation_steps 1 \
+  --gradient_checkpointing \
+  --checkpointing_steps 5 \
+  --checkpointing_limit 2 \
+  --enable_slicing \
+  --enable_tiling"
+
+# Optimizer arguments
+optimizer_cmd="--optimizer adamw \
+  --lr 3e-5 \
+  --beta1 0.9 \
+  --beta2 0.95 \
+  --weight_decay 1e-4 \
+  --epsilon 1e-8 \
+  --max_grad_norm 1.0"
+
+# Validation arguments
+validation_prompts=$(cat <<EOF
+$ID_TOKEN A black and white animated scene unfolds with an anthropomorphic goat surrounded by musical notes and symbols, suggesting a playful environment. Mickey Mouse appears, leaning forward in curiosity as the goat remains still. The goat then engages with Mickey, who bends down to converse or react. The dynamics shift as Mickey grabs the goat, potentially in surprise or playfulness, amidst a minimalistic background. The scene captures the evolving relationship between the two characters in a whimsical, animated setting, emphasizing their interactions and emotions.@@@49x512x768:::$ID_TOKEN A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage@@@49x512x768
+EOF
+)
+validation_cmd="--validation_prompts \"$validation_prompts\" \
+   --validation_steps 5 \
+   --num_validation_videos 1"
+
+# Miscellaneous arguments
+miscellaneous_cmd="--tracker_name finetrainers-hunyuan-video \
+  --output_dir $OUTPUT_DIR \
+  --nccl_timeout 1800 \
+  --report_to wandb"
+
+cmd="accelerate launch --config_file $ROOT_DIR/accelerate_configs/uncompiled_2.yaml --gpu_ids $GPU_IDS $ROOT_DIR/train.py \
+  $model_cmd \
+  $dataset_cmd \
+  $dataloader_cmd \
+  $training_cmd \
+  $optimizer_cmd \
+  $validation_cmd \
+  $miscellaneous_cmd"
+
+echo "Running command: $cmd"
+eval $cmd
+echo -ne "-------------------- Finished executing script --------------------\n\n"
+
+rm -rf $OUTPUT_DIR
+rm -rf $DATA_ROOT/*_precomputed
diff --git a/tests/scripts/dummy_ltx_video_lora.sh b/tests/scripts/dummy_ltx_video_lora.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+GPU_IDS="0,1"
+DATA_ROOT="$ROOT_DIR/video-dataset-disney"
+CAPTION_COLUMN="prompt.txt"
+VIDEO_COLUMN="videos.txt"
+OUTPUT_DIR="ltx-video"
+ID_TOKEN="BW_STYLE"
+
+# Model arguments
+model_cmd="--model_name ltx_video \
+  --pretrained_model_name_or_path Lightricks/LTX-Video"
+
+# Dataset arguments
+dataset_cmd="--data_root $DATA_ROOT \
+  --video_column $VIDEO_COLUMN \
+  --caption_column $CAPTION_COLUMN \
+  --id_token $ID_TOKEN \
+  --video_resolution_buckets 49x512x768 \
+  --caption_dropout_p 0.05"
+
+# Dataloader arguments
+dataloader_cmd="--dataloader_num_workers 0 --precompute_conditions"
+
+# Diffusion arguments
+diffusion_cmd="--flow_resolution_shifting"
+
+# Training arguments
+training_cmd="--training_type lora \
+  --seed 42 \
+  --mixed_precision bf16 \
+  --batch_size 1 \
+  --train_steps 10 \
+  --rank 128 \
+  --lora_alpha 128 \
+  --target_modules to_q to_k to_v to_out.0 \
+  --gradient_accumulation_steps 1 \
+  --gradient_checkpointing \
+  --checkpointing_steps 5 \
+  --checkpointing_limit 2 \
+  --enable_slicing \
+  --enable_tiling"
+
+# Optimizer arguments
+optimizer_cmd="--optimizer adamw \
+  --lr 3e-5 \
+  --beta1 0.9 \
+  --beta2 0.95 \
+  --weight_decay 1e-4 \
+  --epsilon 1e-8 \
+  --max_grad_norm 1.0"
+
+# Validation arguments
+validation_prompts=$(cat <<EOF
+$ID_TOKEN A black and white animated scene unfolds with an anthropomorphic goat surrounded by musical notes and symbols, suggesting a playful environment. Mickey Mouse appears, leaning forward in curiosity as the goat remains still. The goat then engages with Mickey, who bends down to converse or react. The dynamics shift as Mickey grabs the goat, potentially in surprise or playfulness, amidst a minimalistic background. The scene captures the evolving relationship between the two characters in a whimsical, animated setting, emphasizing their interactions and emotions.@@@49x512x768:::$ID_TOKEN A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage@@@49x512x768
+EOF
+)
+validation_cmd="--validation_prompts \"$validation_prompts\" \
+   --validation_steps 5 \
+   --num_validation_videos 1"
+
+# Miscellaneous arguments
+miscellaneous_cmd="--tracker_name finetrainers-ltxv \
+  --output_dir $OUTPUT_DIR \
+  --nccl_timeout 1800 \
+  --report_to wandb"
+
+cmd="accelerate launch --config_file $ROOT_DIR/accelerate_configs/uncompiled_2.yaml --gpu_ids $GPU_IDS $ROOT_DIR/train.py \
+  $model_cmd \
+  $dataset_cmd \
+  $dataloader_cmd \
+  $diffusion_cmd \
+  $training_cmd \
+  $optimizer_cmd \
+  $validation_cmd \
+  $miscellaneous_cmd"
+
+echo "Running command: $cmd"
+eval $cmd
+
+echo "Removing $OUTPUT_DIR"
+rm -rf $OUTPUT_DIR
+echo "Removing precomputed stuff in $DATA_ROOT"
+rm -rf $DATA_ROOT/*_precomputed
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -4,7 +4,7 @@
 
 
 def test_video_dataset():
-    from dataset import VideoDataset
+    from cogvideox.dataset import VideoDataset
 
     dataset_dirs = VideoDataset(
         data_root="assets/tests/",
@@ -33,7 +33,7 @@ def test_video_dataset():
 
 
 def test_video_dataset_with_resizing():
-    from dataset import VideoDatasetWithResizing
+    from cogvideox.dataset import VideoDatasetWithResizing
 
     dataset_dirs = VideoDatasetWithResizing(
         data_root="assets/tests/",
@@ -63,7 +63,7 @@ def test_video_dataset_with_resizing():
 
 def test_video_dataset_with_bucket_sampler():
     import torch
-    from dataset import BucketSampler, VideoDatasetWithResizing
+    from cogvideox.dataset import BucketSampler, VideoDatasetWithResizing
     from torch.utils.data import DataLoader
 
     dataset_dirs = VideoDatasetWithResizing(

diff --git a/tests/test_model_runs_minimally_lora.sh b/tests/test_model_runs_minimally_lora.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# This shell script is for the maintainers and contributors to QUICKLY check
+# if the major changes they're introducing still work with the rest of the models supported
+# in `finetrainers`. It DOES NOT give a sense of implementation correctness as that requires
+# much longer training runs but it DOES ensure basic functionalities work in the large training
+# setup.
+
+# It should be run as so from the root of `finetrainers`: `bash tests/test_model_runs_minimally_lora.sh`
+
+######################################################
+# Set common variables.
+######################################################
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+export ROOT_DIR
+export WANDB_MODE="offline"
+export NCCL_P2P_DISABLE=1
+export TORCH_NCCL_ENABLE_MONITORING=0
+export FINETRAINERS_LOG_LEVEL=DEBUG
+
+echo "Using $ROOT_DIR as rootdir."
+
+######################################################
+# Download Disney dataset.
+######################################################
+
+# Ensure dataset is downloaded
+DATA_ROOT="$ROOT_DIR/video-dataset-disney"
+if [ ! -d "$DATA_ROOT" ]; then
+    echo "Downloading Disney dataset to $DATA_ROOT..."
+    huggingface-cli download \
+        --repo-type dataset Wild-Heart/Disney-VideoGeneration-Dataset \
+        --local-dir "$DATA_ROOT"
+else
+    echo "Dataset already exists at $DATA_ROOT. Skipping download."
+fi
+
+######################################################
+# Run models
+######################################################
+
+Define models to test
+models=("dummy_ltx_video_lora" "dummy_cogvideox_lora" "dummy_hunyuanvideo_lora")
+for model_script in "${models[@]}"; do
+    echo "Running $model_script test..."
+    bash $ROOT_DIR/tests/scripts/$model_script.sh
+done