Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Custom stuff
/proj
/notebooks


# Byte-compiled / optimized / DLL files
Expand Down
1,917 changes: 1,897 additions & 20 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ tqdm = "^4.66.5"
torch = { version = "^2.4.1+cpu", source = "pytorch" }
torchvision = { version = "^0.19.1+cpu", source = "pytorch" }
scikit-learn = "^1.5.1"
jupyter = "^1.1.1"

[[tool.poetry.source]]
name = "pypi"
Expand Down
4 changes: 4 additions & 0 deletions src/framemgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from qcluster import QCluster
from projstate import ProjectState, FrameListMetadata
from lilutil import print_cvcap_metadata


class FrameManager:
Expand Down Expand Up @@ -53,6 +54,9 @@ def for_project(cls, project: ProjectState):
def __len__(self):
return self.total_frames

def print_metadata(self):
print_cvcap_metadata(self.video_path)

def analyze(self):
"""Analyzes the video frame by frame. Calculates embeddings,
and clusters the frames for diversity.
Expand Down
35 changes: 35 additions & 0 deletions src/lilutil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os

import cv2

def format_time_hms(seconds: float) -> str:
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = int(seconds % 60)
out = ""
if hours > 0:
out += f"{hours:02d}h"
if minutes > 0:
out += f"{minutes:02d}m"
if seconds > 0:
out += f"{seconds:02d}s"
return out

def print_cvcap_metadata(video_path:str):
print(f"Video path: {video_path}")
cap = cv2.VideoCapture(video_path)
print(f"CV2 object: {cap}")
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
reported_fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
file_size = os.path.getsize(video_path)
file_format = os.path.splitext(video_path)[1]
estimated_duration = frame_count / reported_fps
print(f"Size (MB): {file_size / 1024 / 1024:.2f}")
print(f"File format: {file_format}")
print(f"Number of frames: {frame_count}")
print(f"Resolution: {width}x{height}")
if frame_count > 0:
print(f"Bytes per frame: {file_size / frame_count:.1f}")
print(f"Estimated duration: {format_time_hms(estimated_duration)}")
7 changes: 7 additions & 0 deletions src/s1setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from framemgr import FrameManager
from projstate import ProjectState



def project_dir_from_filename(filename: str) -> str:
"""Get the project directory from the filename.
"""
Expand Down Expand Up @@ -52,12 +54,17 @@ def save_frames(decoder: FrameManager, num_frames: int, save_dir: str):
parser.add_argument("--max-frames", type=int, default=0, help="Maximum number of frames to analyze")
parser.add_argument("--show-frames", type=int, default=10, help="Show the N most diverse sample frames")
parser.add_argument("--save-frames", type=int, default=10, help="Save the N most diverse sample frames to the project directory")
parser.add_argument("--just-info", action="store_true", help="Just print the video metadata and exit")
args = parser.parse_args()

proj_dir = args.project_dir or project_dir_from_filename(args.video_path)
project = ProjectState(project_dir=proj_dir, video_path=args.video_path)

decoder = FrameManager(video_path=args.video_path, max_frames=args.max_frames)
if args.just_info:
decoder.print_metadata()
exit(0)

decoder.analyze()

if args.show_frames > 0:
Expand Down
125 changes: 125 additions & 0 deletions src/vsplit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from dataclasses import dataclass
import os
import traceback

from PIL import Image
from tqdm.auto import tqdm
import cv2
import numpy as np

from lilutil import print_cvcap_metadata

@dataclass
class SplitRegion:
x1: int
y1: int
x2: int
y2: int
name: str


def bgr_image(frame: np.ndarray) -> Image.Image:
return Image.fromarray(frame[:, :, ::-1])

class VideoSplitter:
"""VideoSplitter is a class that processes a video file and can split it into regions.

Example usage:
vs = VideoSplitter('path_to_video.mp4')
vs.print_metadata()
vs.define_regions_4()
vs.write_split_videos('quarters')
"""

def __init__(self, video_file):
self.video_file = video_file
self.regions = []
self._reload_video()

def _reload_video(self):
self.cap = cv2.VideoCapture(self.video_file)
if not self.cap.isOpened():
raise ValueError(f"Error opening video file: {self.video_file}")

self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.reported_fps = self.cap.get(cv2.CAP_PROP_FPS)
self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.file_size = os.path.getsize(self.video_file)
self.file_format = os.path.splitext(self.video_file)[1]
self.estimated_duration = self.frame_count / self.reported_fps

def print_metadata(self):
print_cvcap_metadata(self.video_file)

def define_regions_4(self, names: list[str]=["upper_left", "upper_right", "lower_left", "lower_right"]):
"""Define 4 equally-sized regions for splitting.
"""
self.regions = [
SplitRegion(0, 0, self.width // 2, self.height // 2, names[0]),
SplitRegion(self.width // 2, 0, self.width, self.height // 2, names[1]),
SplitRegion(0, self.height // 2, self.width // 2, self.height, names[2]),
SplitRegion(self.width // 2, self.height // 2, self.width, self.height, names[3])
]

def _split_frame(self, frame: np.ndarray) -> list[np.ndarray]:
out = []
for region in self.regions:
out.append(frame[region.y1:region.y2, region.x1:region.x2])
return out

def preview_regions(self) -> dict[str, Image.Image]:
"""Returns the first full frame, and then each of the splits.
"""
ret, sample_frame = self.cap.read()
out = {"full_frame": bgr_image(sample_frame)}
if not ret:
raise ValueError("Failed to read a frame from the video.")
ndarray_crops = self._split_frame(sample_frame)
for i, region in enumerate(self.regions):
out[region.name] = bgr_image(ndarray_crops[i])
return out

def write_split_videos(self, codec: str = 'XVID'):
"""Writes the video to a series of files, one for each region.
Codec options:
- 'XVID' = MPEG-4 codec
- 'MJPG' = Motion-JPEG codec
- 'X264' = H.264 codec
"""
self._reload_video() # make sure we're at the start of the video
num_regions = len(self.regions)
if num_regions == 0:
raise ValueError("No regions defined")

# Create VideoWriters for each region
filename_prefix = os.path.splitext(self.video_file)[0]
out_filenames = [f"{filename_prefix}_{region.name}.avi" for region in self.regions]
writers = []
for i, out_filename in enumerate(out_filenames):
size = (self.regions[i].x2 - self.regions[i].x1, self.regions[i].y2 - self.regions[i].y1)
writer = cv2.VideoWriter(out_filename, cv2.VideoWriter_fourcc(*codec), self.reported_fps, size)
print(f"Writing to {out_filename} with size {size} and {self.reported_fps} fps using {codec} codec")
writers.append(writer)

try:
# Process each frame
for i in tqdm(range(self.frame_count)):
if not self.cap.isOpened():
break
ret, frame = self.cap.read()
if not ret:
break

for i, region in enumerate(self.regions):
cropped_frame = frame[region.y1:region.y2, region.x1:region.x2]
writers[i].write(cropped_frame)
print(f"Finished writing all {self.frame_count} frames")
finally:
self.cap.release()
for writer in writers:
try:
writer.release()
except Exception as e:
traceback.print_exc()