-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
181 lines (153 loc) · 5.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import csv
import gensim
import sys
import imageio
import torch
import os
import spacy
from moviepy.editor import VideoFileClip, concatenate_videoclips
import clip
from PIL import Image
from transformers import AutoProcessor, AutoTokenizer, CLIPModel
def song_to_list(path, lemma=False):
# with open(path) as f:
# lines = f.readlines()
# lines.append('\n')
# full_song = []
# stanza = []
# for i in lines:
# if i == '\n':
# full_song.append(stanza)
# stanza = []
# else:
# stanza.append(i[:-1])
# return full_song
nlp = spacy.load("en_core_web_sm")
with open(path) as f:
lines = f.readlines()
lines.append('\n')
full_song = []
stanza = []
for i in lines:
if i == '\n':
full_song.append(stanza)
stanza = []
else:
if lemma:
doc = nlp(i[:-1])
lemmatised_text = ' '.join([token.lemma_ for token in doc])
stanza.append(lemmatised_text)
else:
new_line = []
for j in i[:-1].split(' '):
if "'" in j:
doc = nlp(j)
lemmatised_text = ' '.join([token.lemma_ for token in doc])
new_line.append(lemmatised_text)
else:
new_line.append(j)
stanza.append(" ".join(new_line))
return full_song
def concrete_score(path):
csv.field_size_limit(sys.maxsize)
file = open(path,"r")
data = list(csv.reader(file, delimiter=","))
list_x = []
for i in data[1:]:
key = i[0]
value = float(i[1])
list_x.append([key, value])
word_score = dict(list_x)
file.close()
return word_score
def change_lyric(emb_path, lyric, word_score):
embeddings_300d = gensim.models.KeyedVectors.load_word2vec_format(
emb_path, binary=True)
new_lyric = ''
for root_word in lyric.split(" "):
if root_word not in word_score.keys():
print(root_word)
new_lyric = " ".join([new_lyric, root_word])
continue
replaced_word = ''
i = 0.0
bracket = 0
topn = 100
max_score = 8.0
similar = embeddings_300d.most_similar(root_word, topn=topn)
while i < max_score:
sim_words = similar[bracket:bracket + 20]
for word, _ in sim_words:
if word not in word_score.keys():
continue
score = word_score[word]
if score > i:
i = score
if word_score[word] > word_score[root_word]:
replaced_word = word
else:
replaced_word = root_word
bracket = bracket + 20
if bracket > topn:
max_score = max_score - 0.2
similar = embeddings_300d.most_similar(word, topn=bracket)
topn = bracket
new_lyric = " ".join([new_lyric, replaced_word])
new_lyric = new_lyric[1:]
return new_lyric
def gif(result_path, output_path, fps):
images = []
for filename in os.listdir(result_path):
images.append(imageio.imread(os.path.join(result_path,filename)))
imageio.mimsave(output_path, images, fps = fps)
def merge_mp4(folder_path, output_path):
# Get the list of MP4 files in the folder
file_list = os.listdir(folder_path)
file_list = [file for file in file_list if file.endswith('.mp4')]
# Create a list to store the video clips
video_clips = []
# Iterate over the MP4 files and load them as video clips
for file in file_list:
file_path = os.path.join(folder_path, file)
video_clip = VideoFileClip(file_path)
video_clips.append(video_clip)
# Concatenate the video clips into a single video
final_video = concatenate_videoclips(video_clips)
# Write the final merged video to the output file
final_video.write_videofile(output_path, codec='libx264')
def cond_clip(embeds, images, gpu="cuda"):
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14")
clip_score = 0.0
for i, combo in enumerate(zip(embeds, images)):
# for i in range(len(embeds)):
cond_embed, image = combo
# cond_embed = embeds[i]
# image = images[i]
# print(cond_embed.shape)
image_input = processor(images = image,return_tensors="pt")
image_input = image_input.to(gpu)
cond_embed = cond_embed.to(gpu)
model = model.to(gpu)
with torch.no_grad():
image_features = model.get_image_features(**image_input)
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
cond_embed = cond_embed / cond_embed.norm(dim=-1, keepdim=True)
print(image_features.shape)
print(cond_embed.shape)
clip_score = clip_score + torch.matmul(cond_embed, image_features.T).softmax(dim=-1).squeeze()
return clip_score/i
@torch.no_grad()
def latent_to_image(latents, SD):
image = SD.decode_latents(latents)
image = SD.numpy_to_pil(image)
return image
def load_gif(path):
gif = Image.open(path)
frames = []
for i in range(1, gif.n_frames):
gif.seek(i)
frame = np.array(gif)
frames.append(torch.tensor(frame).unsqueeze(0))
frames[-1] = frames[-1].permute(0, 3, 1, 2)
return torch.cat(frames, dim=0)