hao-ai-lab · JiayiZhangA · Aug 15, 2025 · Aug 16, 2025 · Aug 16, 2025 · Aug 17, 2025
diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml
@@ -18,6 +18,12 @@ on:
         required: false
         default: false
         type: boolean
+      python_3_12_cuda_12_9:
+        description: 'Build Python 3.12 image Cuda 12.9'
+        required: false
+        default: false
+        type: boolean
+
 
 permissions:
   contents: read
@@ -49,4 +55,13 @@ jobs:
       python_version: '3.12'
       dockerfile_path: docker/Dockerfile.python3.12
       tag_suffix: py3.12
+    secrets: inherit
+
+  build-python-3-12-cuda-12-9:
+    if: ${{ github.event.inputs.python_3_12_cuda_12_9 == 'true' }}
+    uses: ./.github/workflows/build-image-template.yml
+    with:
+      python_version: '3.12'
+      dockerfile_path: docker/Dockerfile.python3.12.cuda12.9.1
+      tag_suffix: py3.12-cuda12.9.1
     secrets: inherit
diff --git a/assets/full.svg b/assets/full.svg
diff --git a/assets/icon-simple.svg b/assets/icon-simple.svg
diff --git a/docker/Dockerfile.python3.10 b/docker/Dockerfile.python3.10
@@ -1,48 +1,72 @@
-FROM nvidia/cuda:12.4.1-devel-ubuntu20.04
+FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+SHELL ["/bin/bash", "-c"]
+
 WORKDIR /FastVideo
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
     wget \
     git \
     ca-certificates \
     openssh-server \
+    zsh \
+    vim \
+    curl \
+    gcc-11 \
+    g++-11 \
+    clang-11 \
     && rm -rf /var/lib/apt/lists/*
 
-RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-    bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
-    rm Miniconda3-latest-Linux-x86_64.sh
+# Set up C++20 compilers for ThunderKittens
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11
 
-ENV PATH=/opt/conda/bin:$PATH
+# Set CUDA environment variables
+ENV CUDA_HOME=/usr/local/cuda-12.8
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
 
-RUN conda create --name fastvideo-dev python=3.10.0 -y
-
-SHELL ["/bin/bash", "-c"]
+# Install uv and source its environment
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    echo 'source $HOME/.local/bin/env' >> /root/.bashrc
 
 # Copy just the pyproject.toml first to leverage Docker cache
 COPY pyproject.toml ./
 
 # Create a dummy README to satisfy the installation
 RUN echo "# Placeholder" > README.md
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir --upgrade pip && \
-    conda run -n fastvideo-dev pip install --no-cache-dir .[dev] && \
-    conda run -n fastvideo-dev pip install --no-cache-dir flash-attn==2.7.4.post1 --no-build-isolation && \
-    conda clean -afy
+# Create and activate virtual environment with specific Python version and seed
+RUN source $HOME/.local/bin/env && \
+    uv venv --python 3.10 --seed /opt/venv && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir --upgrade pip && \
+    uv pip install --no-cache-dir .[dev] && \
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
 
 COPY . .
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir -e .[dev]
+# Install dependencies using uv and set up shell configuration
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir -e .[dev] && \
+    git config --unset-all http.https://github.com/.extraheader || true && \
+    echo 'source /opt/venv/bin/activate' >> /root/.bashrc && \
+    echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
 
-# Remove authentication headers
-RUN git config --unset-all http.https://github.com/.extraheader || true
+# Install STA (Sliding Tile Attention)
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_sta.py install
 
-# Set up automatic conda environment activation for all shells
-RUN echo 'source /opt/conda/etc/profile.d/conda.sh' >> /root/.bashrc && \
-    echo 'conda activate fastvideo-dev' >> /root/.bashrc && \
-    # Ensure .bashrc is sourced for SSH login shells
-    echo 'if [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
+# Install VSA 
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_vsa.py install
 
 EXPOSE 22
diff --git a/docker/Dockerfile.python3.11 b/docker/Dockerfile.python3.11
@@ -1,48 +1,72 @@
-FROM nvidia/cuda:12.4.1-devel-ubuntu20.04
+FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
+SHELL ["/bin/bash", "-c"]
+
 WORKDIR /FastVideo
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
     wget \
     git \
     ca-certificates \
     openssh-server \
+    zsh \
+    vim \
+    curl \
+    gcc-11 \
+    g++-11 \
+    clang-11 \
     && rm -rf /var/lib/apt/lists/*
 
-RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-    bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
-    rm Miniconda3-latest-Linux-x86_64.sh
+# Set up C++20 compilers for ThunderKittens
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11
 
-ENV PATH=/opt/conda/bin:$PATH
+# Set CUDA environment variables
+ENV CUDA_HOME=/usr/local/cuda-12.8
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
 
-RUN conda create --name fastvideo-dev python=3.11.11 -y
-
-SHELL ["/bin/bash", "-c"]
+# Install uv and source its environment
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    echo 'source $HOME/.local/bin/env' >> /root/.bashrc
 
 # Copy just the pyproject.toml first to leverage Docker cache
 COPY pyproject.toml ./
 
 # Create a dummy README to satisfy the installation
 RUN echo "# Placeholder" > README.md
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir --upgrade pip && \
-    conda run -n fastvideo-dev pip install --no-cache-dir .[dev] && \
-    conda run -n fastvideo-dev pip install --no-cache-dir flash-attn==2.7.4.post1 --no-build-isolation && \
-    conda clean -afy
+# Create and activate virtual environment with specific Python version and seed
+RUN source $HOME/.local/bin/env && \
+    uv venv --python 3.11 --seed /opt/venv && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir --upgrade pip && \
+    uv pip install --no-cache-dir .[dev] && \
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
 
 COPY . .
 
-RUN conda run -n fastvideo-dev pip install --no-cache-dir -e .[dev]
+# Install dependencies using uv and set up shell configuration
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir -e .[dev] && \
+    git config --unset-all http.https://github.com/.extraheader || true && \
+    echo 'source /opt/venv/bin/activate' >> /root/.bashrc && \
+    echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
 
-# Remove authentication headers
-RUN git config --unset-all http.https://github.com/.extraheader || true
+# Install STA (Sliding Tile Attention)
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_sta.py install
 
-# Set up automatic conda environment activation for all shells
-RUN echo 'source /opt/conda/etc/profile.d/conda.sh' >> /root/.bashrc && \
-    echo 'conda activate fastvideo-dev' >> /root/.bashrc && \
-    # Ensure .bashrc is sourced for SSH login shells
-    echo 'if [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
+# Install VSA 
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_vsa.py install
 
 EXPOSE 22
diff --git a/docker/Dockerfile.python3.12 b/docker/Dockerfile.python3.12
@@ -43,7 +43,7 @@ RUN source $HOME/.local/bin/env && \
     source /opt/venv/bin/activate && \
     uv pip install --no-cache-dir --upgrade pip && \
     uv pip install --no-cache-dir .[dev] && \
-    uv pip install --no-cache-dir flash-attn==2.8.0.post2 --no-build-isolation
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
 
 COPY . .
 

diff --git a/docker/Dockerfile.python3.12.cuda12.9.1 b/docker/Dockerfile.python3.12.cuda12.9.1
@@ -0,0 +1,72 @@
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+SHELL ["/bin/bash", "-c"]
+
+WORKDIR /FastVideo
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    wget \
+    git \
+    ca-certificates \
+    openssh-server \
+    zsh \
+    vim \
+    curl \
+    gcc-11 \
+    g++-11 \
+    clang-11 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set up C++20 compilers for ThunderKittens
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11
+
+# Set CUDA environment variables
+ENV CUDA_HOME=/usr/local/cuda-12.9
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
+
+# Install uv and source its environment
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    echo 'source $HOME/.local/bin/env' >> /root/.bashrc
+
+# Copy just the pyproject.toml first to leverage Docker cache
+COPY pyproject.toml ./
+
+# Create a dummy README to satisfy the installation
+RUN echo "# Placeholder" > README.md
+
+# Create and activate virtual environment with specific Python version and seed
+RUN source $HOME/.local/bin/env && \
+    uv venv --python 3.12 --seed /opt/venv && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir --upgrade pip && \
+    uv pip install --no-cache-dir .[dev] && \
+    uv pip install --no-cache-dir flash-attn==2.8.3 --no-build-isolation
+
+COPY . .
+
+# Install dependencies using uv and set up shell configuration
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    uv pip install --no-cache-dir -e .[dev] && \
+    git config --unset-all http.https://github.com/.extraheader || true && \
+    echo 'source /opt/venv/bin/activate' >> /root/.bashrc && \
+    echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
+
+# Install STA (Sliding Tile Attention)
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_sta.py install
+
+# Install VSA 
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    cd csrc/attn && \
+    git submodule update --init --recursive && \
+    python setup_vsa.py install
+
+EXPOSE 22
diff --git a/examples/inference/gradio/local/README.md b/examples/inference/gradio/local/README.md
@@ -0,0 +1,56 @@
+# FastVideo Gradio Local Demo
+
+This is a Gradio-based web interface for generating videos using the FastVideo framework. The demo allows users to create videos from text prompts with various customization options.
+
+## Overview
+
+The demo uses the FastVideo framework to generate videos based on text prompts. It provides a simple web interface built with Gradio that allows users to:
+
+- Enter text prompts to generate videos
+- Customize video parameters (dimensions, number of frames, etc.)
+- Use negative prompts to guide the generation process
+- Set or randomize seeds for reproducibility
+
+---
+
+## Usage
+
+Run the demo with:
+
+```bash
+python examples/inference/gradio/local/gradio_local_demo.py
+```
+
+This will start a web server at `http://0.0.0.0:7860` where you can access the interface.
+
+---
+
+## Model Initialization
+
+This demo initializes a `VideoGenerator` with the minimum required arguments for inference. Users can seamlessly adjust inference options between generations, including prompts, resolution, video length, *without ever needing to reload the model*.
+
+## Video Generation
+
+The core functionality is in the `generate_video` function, which:
+1. Processes user inputs
+2. Uses the FastVideo VideoGenerator from earlier to run inference (`generator.generate_video()`)
+
+## Gradio Interface
+
+The interface is built with several components:
+- A text input for the prompt
+- A video display for the result
+- Inference options in a collapsible accordion:
+  - Height and width sliders
+  - Number of frames slider
+  - Guidance scale slider
+  - Negative prompt options
+  - Seed controls
+
+### Inference Options
+
+- **Height/Width**: Control the resolution of the generated video
+- **Number of Frames**: Set how many frames to generate
+- **Guidance Scale**: Control how closely the generation follows the prompt
+- **Negative Prompt**: Specify what you don't want to see in the video
+- **Seed**: Control randomness for reproducible results