groundlight · robotrapta · Dec 30, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # Custom stuff
 /proj
+/notebooks
 
 
 # Byte-compiled / optimized / DLL files

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,7 @@ tqdm = "^4.66.5"
 torch = { version = "^2.4.1+cpu", source = "pytorch" }
 torchvision = { version = "^0.19.1+cpu", source = "pytorch" }
 scikit-learn = "^1.5.1"
+jupyter = "^1.1.1"
 
 [[tool.poetry.source]]
 name = "pypi"

diff --git a/src/framemgr.py b/src/framemgr.py
@@ -12,6 +12,7 @@
 
 from qcluster import QCluster
 from projstate import ProjectState, FrameListMetadata
+from lilutil import print_cvcap_metadata
 
 
 class FrameManager:
@@ -53,6 +54,9 @@ def for_project(cls, project: ProjectState):
     def __len__(self):
         return self.total_frames
 
+    def print_metadata(self):
+        print_cvcap_metadata(self.video_path)
+
     def analyze(self):
         """Analyzes the video frame by frame.  Calculates embeddings, 
         and clusters the frames for diversity.

diff --git a/src/lilutil.py b/src/lilutil.py
@@ -0,0 +1,35 @@
+import os
+
+import cv2
+
+def format_time_hms(seconds: float) -> str:
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds = int(seconds % 60)
+    out = ""
+    if hours > 0:   
+        out += f"{hours:02d}h"
+    if minutes > 0:
+        out += f"{minutes:02d}m"
+    if seconds > 0:
+        out += f"{seconds:02d}s"
+    return out
+
+def print_cvcap_metadata(video_path:str):
+    print(f"Video path: {video_path}")
+    cap = cv2.VideoCapture(video_path)
+    print(f"CV2 object: {cap}")
+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    reported_fps = cap.get(cv2.CAP_PROP_FPS)
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    file_size = os.path.getsize(video_path)
+    file_format = os.path.splitext(video_path)[1]
+    estimated_duration = frame_count / reported_fps
+    print(f"Size (MB): {file_size / 1024 / 1024:.2f}")
+    print(f"File format: {file_format}")
+    print(f"Number of frames: {frame_count}")
+    print(f"Resolution: {width}x{height}")
+    if frame_count > 0:
+        print(f"Bytes per frame: {file_size / frame_count:.1f}")
+    print(f"Estimated duration: {format_time_hms(estimated_duration)}")
diff --git a/src/s1setup.py b/src/s1setup.py
@@ -11,6 +11,8 @@
 from framemgr import FrameManager
 from projstate import ProjectState
 
+
+
 def project_dir_from_filename(filename: str) -> str:
     """Get the project directory from the filename.
     """
@@ -52,12 +54,17 @@ def save_frames(decoder: FrameManager, num_frames: int, save_dir: str):
     parser.add_argument("--max-frames", type=int, default=0, help="Maximum number of frames to analyze")
     parser.add_argument("--show-frames", type=int, default=10, help="Show the N most diverse sample frames")
     parser.add_argument("--save-frames", type=int, default=10, help="Save the N most diverse sample frames to the project directory")
+    parser.add_argument("--just-info", action="store_true", help="Just print the video metadata and exit")
     args = parser.parse_args()
 
     proj_dir = args.project_dir or project_dir_from_filename(args.video_path)
     project = ProjectState(project_dir=proj_dir, video_path=args.video_path)
 
     decoder = FrameManager(video_path=args.video_path, max_frames=args.max_frames)
+    if args.just_info:
+        decoder.print_metadata()
+        exit(0)
+
     decoder.analyze()
 
     if args.show_frames > 0:

diff --git a/src/vsplit.py b/src/vsplit.py
@@ -0,0 +1,125 @@
+from dataclasses import dataclass
+import os
+import traceback
+
+from PIL import Image
+from tqdm.auto import tqdm
+import cv2
+import numpy as np
+
+from lilutil import print_cvcap_metadata
+
+@dataclass
+class SplitRegion:
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    name: str
+
+
+def bgr_image(frame: np.ndarray) -> Image.Image:
+    return Image.fromarray(frame[:, :, ::-1])
+
+class VideoSplitter:
+    """VideoSplitter is a class that processes a video file and can split it into regions.
+
+    Example usage:
+        vs = VideoSplitter('path_to_video.mp4')
+        vs.print_metadata()
+        vs.define_regions_4()
+        vs.write_split_videos('quarters')
+    """
+
+    def __init__(self, video_file):
+        self.video_file = video_file
+        self.regions = []
+        self._reload_video()
+
+    def _reload_video(self):
+        self.cap = cv2.VideoCapture(self.video_file)
+        if not self.cap.isOpened():
+            raise ValueError(f"Error opening video file: {self.video_file}")
+
+        self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        self.reported_fps = self.cap.get(cv2.CAP_PROP_FPS)
+        self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        self.file_size = os.path.getsize(self.video_file)
+        self.file_format = os.path.splitext(self.video_file)[1]
+        self.estimated_duration = self.frame_count / self.reported_fps
+
+    def print_metadata(self):
+        print_cvcap_metadata(self.video_file)
+
+    def define_regions_4(self, names: list[str]=["upper_left", "upper_right", "lower_left", "lower_right"]):
+        """Define 4 equally-sized regions for splitting.
+        """
+        self.regions = [
+            SplitRegion(0, 0, self.width // 2, self.height // 2, names[0]),
+            SplitRegion(self.width // 2, 0, self.width, self.height // 2, names[1]),
+            SplitRegion(0, self.height // 2, self.width // 2, self.height, names[2]),
+            SplitRegion(self.width // 2, self.height // 2, self.width, self.height, names[3])
+        ]
+
+    def _split_frame(self, frame: np.ndarray) -> list[np.ndarray]:
+        out = []
+        for region in self.regions:
+            out.append(frame[region.y1:region.y2, region.x1:region.x2])
+        return out
+
+    def preview_regions(self) -> dict[str, Image.Image]:
+        """Returns the first full frame, and then each of the splits.
+        """
+        ret, sample_frame = self.cap.read()
+        out = {"full_frame": bgr_image(sample_frame)}
+        if not ret:
+            raise ValueError("Failed to read a frame from the video.")
+        ndarray_crops = self._split_frame(sample_frame)
+        for i, region in enumerate(self.regions):
+            out[region.name] = bgr_image(ndarray_crops[i])
+        return out
+
+    def write_split_videos(self, codec: str = 'XVID'):
+        """Writes the video to a series of files, one for each region.
+        Codec options:
+        - 'XVID' = MPEG-4 codec
+        - 'MJPG' = Motion-JPEG codec
+        - 'X264' = H.264 codec
+        """
+        self._reload_video()  # make sure we're at the start of the video
+        num_regions = len(self.regions)
+        if num_regions == 0:
+            raise ValueError("No regions defined")
+
+        # Create VideoWriters for each region
+        filename_prefix = os.path.splitext(self.video_file)[0]
+        out_filenames = [f"{filename_prefix}_{region.name}.avi" for region in self.regions]
+        writers = []
+        for i, out_filename in enumerate(out_filenames):
+            size = (self.regions[i].x2 - self.regions[i].x1, self.regions[i].y2 - self.regions[i].y1)
+            writer = cv2.VideoWriter(out_filename, cv2.VideoWriter_fourcc(*codec), self.reported_fps, size)
+            print(f"Writing to {out_filename} with size {size} and {self.reported_fps} fps using {codec} codec")
+            writers.append(writer)
+
+        try:
+            # Process each frame
+            for i in tqdm(range(self.frame_count)):
+                if not self.cap.isOpened():
+                    break
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+
+                for i, region in enumerate(self.regions):
+                    cropped_frame = frame[region.y1:region.y2, region.x1:region.x2]
+                    writers[i].write(cropped_frame)
+            print(f"Finished writing all {self.frame_count} frames")
+        finally:
+            self.cap.release()
+            for writer in writers:
+                try:
+                    writer.release()
+                except Exception as e:
+                    traceback.print_exc()
+