-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsrt2txt.py
83 lines (75 loc) · 2.65 KB
/
srt2txt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# convert subtitles from .srt to .txt format
# by simply remove timestamps and blank lines
#
# Zhenhao Ge, 2015-03-10
import os, sys
#import glob
import fnmatch
import codecs
args = sys.argv[1:]
refresh = False
if len(args) > 0 and args[0] == '--refresh':
refresh = True
del args[0]
elif len(args) > 0 and args[0] == '--help':
print('convert subtitles from .srt to .txt format')
print('usage: [--refreseh] directory (optional)')
sys.exit(1)
directory = {}
directory['current'] = os.getcwd()
if len(args) > 0:
directory['work'] = args[0]
else:
if os.name == 'nt':
dirs = [r'\\\\psf\Dropbox\Study\Language\Chinese\Red\subtitles',
r'D:\Dropbox\Study\Language\Chinese\Red\subtitles']
for url in dirs:
if os.path.isdir(url):
directory['work'] = url
break
#directory['work'] = r'\\\\psf\Dropbox\Study\Language\Chinese\Red\subtitles'
elif os.name == 'posix':
dirs = [r'/Users/zge/Dropbox/Study/Language/Chinese/Red/subtitles',
r'/cygdrive/d/Dropbox/Study/Language/Chinese/Red/subtitles',
os.path.join(os.getenv("HOME"),
r'Dropbox/Study/Language/Chinese/Red/subtitles'),]
for url in dirs:
if os.path.isdir(url):
directory['work'] = url
break
#directory['work'] = r'/Users/zge/Dropbox/Study/Language/Chinese/Red/subtitles'
else:
print(os.name)
sys.exit(1)
if directory['current'] != directory['work']:
print('work directory:', directory['work'])
os.chdir(directory['work'])
#matches = glob.glob("*.srt")
matches = []
for root, dirnames, filenames in os.walk(directory['work']):
for filename in fnmatch.filter(filenames, '*.srt'):
matches.append(os.path.join(root, filename))
cnt1, cnt2 = 0, 0
for file in matches:
source = codecs.open(file, 'rb', encoding='utf-8')
file2 = os.path.splitext(file)[0] + '.txt'
if os.path.isfile(file2) and refresh == False:
print(file2, 'existed')
cnt1 += 1
else:
out = codecs.open(file2, 'wb', encoding='utf-8')
#lines = source.readlines()
for line in source:
if not (line[0].isdigit() or line=='\r\n' or line[0]==u'\ufeff'):
#print(line[:-2], end='\n')
#line
out.write(line)
print(file2, 'created')
out.close()
cnt2 += 1
source.close()
if refresh == False:
print(str(len(matches)), 'processed,', str(cnt1), 'already existed,',
str(cnt2), 'newly created.')
else:
print('refresh all:', str(cnt2), 'newly created')