Skip to content

Commit 3bc02b6

Browse files
committed
dataloader progress
1 parent 50a93bd commit 3bc02b6

File tree

3 files changed

+99
-0
lines changed

3 files changed

+99
-0
lines changed

ns_vfs/api/run_with_nsvqa.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from ns_vfs.model_checking.stormpy import StormModelChecker
99
from ns_vfs.percepter.single_vision_percepter import SingleVisionPercepter
1010
from ns_vfs.validator import FrameValidator
11+
from ns_vfs.dataloader.longvideobench import LongVideoBench
1112

1213

1314
def run_nsvs_nsvqa(
@@ -100,6 +101,12 @@ def run_nsvs_nsvqa(
100101

101102

102103
if __name__ == "__main__":
104+
video_path = "/nas/mars/dataset/LongVideoBench/videos/86CxyhFV9MI.mp4"
105+
subtitle_path = "/nas/mars/dataset/LongVideoBench/subtitles/86CxyhFV9MI_en.json"
106+
bench = LongVideoBench(video_path, subtitle_path)
107+
108+
import sys
109+
sys.exit(0)
103110
sample_data = [
104111
{
105112
"frames": [

ns_vfs/dataloader/_base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import abc
2+
3+
4+
class DatasetLoader(abc.ABC):
5+
"""Base class for loading dataset."""
6+
7+
def __init__(self, video_path: str, subtitle_path: str) -> None:
8+
self.video_path = video_path
9+
self.subtitle_path = subtitle_path
10+
11+
@abc.abstractmethod
12+
def load_all(self) -> any:
13+
"""Load video and subtitles."""
14+

ns_vfs/dataloader/longvideobench.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from typing import List, Dict, Union
2+
import numpy as np
3+
import json
4+
import cv2
5+
6+
from ns_vfs.dataloader._base import DatasetLoader
7+
8+
class LongVideoBench(DatasetLoader):
9+
def _parse_timestamp(self, ts: str) -> float:
10+
"""
11+
Parse a timestamp like "HH:MM:SS.mmm" into total seconds as float.
12+
"""
13+
h, m, s = ts.split(':')
14+
return int(h) * 3600 + int(m) * 60 + float(s)
15+
16+
def load_all(self, sample_fps: int = 2, chunk_size: int = 10) -> List[Dict[str, Union[List[np.ndarray], None]]]:
17+
"""
18+
Load a video and subtitles, sample at `sample_fps` frames/sec, group every
19+
`chunk_size` frames into one dict, and attach subtitles overlapping each chunk.
20+
21+
Returns:
22+
List of dicts of the form:
23+
[
24+
{'frames': [f1, f2, ..., f10], 'subtitle': None},
25+
{'frames': [f11, ..., f20], 'subtitle': "some text"},
26+
...
27+
]
28+
"""
29+
# --- 1) Load and parse subtitles ---
30+
with open(self.subtitle_path, 'r', encoding='utf-8') as f:
31+
subs = json.load(f)
32+
# convert each to (start_sec, line)
33+
subtitles = [
34+
(self._parse_timestamp(entry['start']), entry['line'])
35+
for entry in subs
36+
]
37+
38+
# --- 2) Open video and get duration ---
39+
cap = cv2.VideoCapture(self.video_path)
40+
if not cap.isOpened():
41+
raise IOError(f"Cannot open video: {self.video_path}")
42+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
43+
vid_fps = cap.get(cv2.CAP_PROP_FPS)
44+
duration_sec = total_frames / vid_fps
45+
46+
# --- 3) Sample frames at regular intervals ---
47+
interval = 1.0 / sample_fps
48+
timestamps = np.arange(0, duration_sec, interval)
49+
50+
sampled = []
51+
for t in timestamps:
52+
cap.set(cv2.CAP_PROP_POS_MSEC, t * 1000)
53+
ret, frame = cap.read()
54+
if not ret:
55+
break
56+
sampled.append((t, frame.copy()))
57+
cap.release()
58+
59+
chunks: List[Dict[str, Union[List[np.ndarray], None]]] = []
60+
for i in range(0, len(sampled), chunk_size):
61+
chunk = sampled[i:i + chunk_size]
62+
if not chunk:
63+
continue
64+
65+
frames = [f for (_, f) in chunk]
66+
67+
t_start = chunk[0][0]
68+
t_end = chunk[-1][0]
69+
70+
lines = [line for (ts, line) in subtitles if t_start <= ts <= t_end]
71+
subtitle_text = " ".join(lines) if lines else None
72+
73+
chunks.append({
74+
'frames': frames,
75+
'subtitle': subtitle_text
76+
})
77+
78+
return chunks

0 commit comments

Comments
 (0)