-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathextract_video_data.py
87 lines (57 loc) · 2 KB
/
extract_video_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
This script will process data from transcripts,
thumbnails, and videos
"""
#%% imports
import os
import webvtt
import cv2
# %% functions
def clean_transcript(captions):
"""
This function takes a webvtt list of captions
and converts them to a single string of text.
"""
full_text = ''
for caption in captions:
caption.text = caption.text.split('\n')[-1]
full_text += caption.text
return full_text
def detect_faces(video_id, thumbnail_folder, faces_folder):
"""
This function uses openCV to find faces in a thumbnail
image and saves the extracted face to the `faces` folder.
# based on https://www.datacamp.com/tutorial/face-detection-python-opencv
# and: https://stackoverflow.com/questions/57055694/
"""
img = cv2.imread(f'{thumbnail_folder}{video_id}.jpg')
print(f'image loaded, with shape: {img.shape}')
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# load face classifier
face_classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
faces = face_classifier.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=5, minSize=(40,40))
print(faces)
# write to a file
count = 0
for (x,y,w,h) in faces:
face = img[y:y+h, x:x+w] #slice the face from the image
cv2.imwrite(f'{faces_folder}{video_id}-{str(count)}.jpg', face) #save the image
count+=1
#%% main
if __name__ == '__main__':
transcript_folder = 'data/transcripts/'
thumbnail_folder = 'data/thumbnails/'
faces_folder = 'data/faces/'
# example video id
video_id = 'ernwEzDFGu8'
# load the trancsript
captions = webvtt.read(f'{transcript_folder}{video_id}.en.vtt')
print(f'Caption 0: {captions[0]}')
print(f'Caption 0 text: {captions[0].text}')
text = clean_transcript(captions)
print(text)
with open(f'{transcript_folder}{video_id}.txt', 'w') as f:
f.write(text)
# extract faces
detect_faces(video_id, thumbnail_folder, faces_folder)
# %%