Skip to content

Commit 13ba45f

Browse files
committed
Convert to python 2
1 parent 98dc7c9 commit 13ba45f

7 files changed

+42
-28
lines changed

pypkg/dataset.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import print_function
2+
13
from argparse import ArgumentParser
24
import glob
35
from itertools import islice
@@ -38,9 +40,10 @@ def __init__(self, corpus_dir, which='train', version='3.0', corrected=True,
3840
"""
3941
if which not in ['train', 'valid', 'test']:
4042
raise ValueError(
41-
f'`which` should be one of "train", "valid", or "test". Got "{which}".')
43+
'`which` should be one of "train", "valid", or "test". Got "{}".'
44+
.format(which))
4245
if version not in ['2.0', '3.0']:
43-
raise ValueError(f'`version` should be "2.0" or "3.0". Got "{version}".')
46+
raise ValueError('`version` should be "2.0" or "3.0". Got "{}".'.format(version))
4447

4548
self.corpus_dir = corpus_dir
4649
self.which = which
@@ -71,12 +74,13 @@ def _get_iterator(self):
7174
path = os.path.join(self.corpus_dir, self.version, self.corrected_dir,
7275
self.parsed_dir, 'wsj')
7376
for sec in self.sections:
74-
glob_pattern = os.path.join(path, f'{sec:02}', '*.mrg')
77+
glob_pattern = os.path.join(path, '{:02}'.format(sec), '*.mrg')
7578
for filename in sorted(glob.glob(glob_pattern)):
7679
with open(filename) as f:
7780
lines = (line.rstrip() for line in f if line.rstrip())
78-
yield from (self._preprocess_sentence(sent)
79-
for sent in self._concat_parsed_sentences(lines))
81+
for s in (self._preprocess_sentence(sent)
82+
for sent in self._concat_parsed_sentences(lines)):
83+
yield s
8084

8185
@classmethod
8286
def _preprocess_sentence(cls, sentence):
@@ -164,20 +168,22 @@ def __init__(self, corpus_dir, which='train', split_num=0, max_num_sentences=Non
164168
"""
165169
if which not in ['train', 'valid', 'test']:
166170
raise ValueError(
167-
f'`which` should be one of "train", "valid", or "test". Got "{which}".')
171+
'`which` should be one of "train", "valid", or "test". Got "{}".'
172+
.format(which))
168173

169174
self.corpus_dir = corpus_dir
170175
self.which = which
171176
self.split_num = split_num
172177
self.max_num_sentences = max_num_sentences
173178

174179
def _get_iterator(self):
175-
filename = os.path.join(self.corpus_dir,
176-
f'{self.FILENAME}.{self.split_num}.{self.which}')
180+
filename = os.path.join(self.corpus_dir, '{}.{}.{}'.format(
181+
self.FILENAME, self.split_num, self.which))
177182
with open(filename) as f:
178183
for line in f:
179-
yield from (self._preprocess_sentence(sent)
180-
for sent in self._get_parsed_sentences(line))
184+
for s in (self._preprocess_sentence(sent)
185+
for sent in self._get_parsed_sentences(line)):
186+
yield s
181187

182188
@staticmethod
183189
def _get_parsed_sentences(line):

pypkg/utils.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,8 @@ def dump_args(args, path=None, excludes=None, override_excludes=False):
4848

4949

5050
def _get_last_commit_hash():
51-
return subprocess.run(
52-
'git log -n 1 --pretty=format:%H'.split(), stdout=subprocess.PIPE,
53-
encoding='UTF-8').stdout
51+
return subprocess.Popen(
52+
'git log -n 1 --pretty=format:%H'.split(), stdout=subprocess.PIPE).communicate()[0]
5453

5554

5655
def load_args(obj, path=None, typecast=None):

scripts/do_brown_cluster.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/env python
22

3+
from __future__ import print_function
4+
35
from argparse import ArgumentParser
46
from math import sqrt
57
import os
@@ -24,9 +26,10 @@
2426
for word in line.strip().split():
2527
vocab.add(word)
2628
clust_size = int(sqrt(len(vocab)))
27-
print(f'vocab size: {len(vocab)}', file=sys.stderr)
28-
print(f'cluster size: {clust_size}', file=sys.stderr)
29+
print('vocab size: {}'.format(len(vocab)), file=sys.stderr)
30+
print('cluster size: {}'.format(clust_size), file=sys.stderr)
2931

30-
cmd = f'{args.wcluster} --text {args.file} --c {clust_size} --output_dir {args.outdir}'
31-
print(f'command: {cmd}', file=sys.stderr)
32-
subprocess.run(cmd, check=True, shell=True)
32+
cmd = '{} --text {} --c {} --output_dir {}'.format(
33+
args.wcluster, args.file, clust_size, args.outdir)
34+
print('command: {}'.format(cmd), file=sys.stderr)
35+
subprocess.call(cmd, shell=True)

scripts/get_unkified_from_oracle.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/env python
22

3+
from __future__ import print_function
4+
35
from argparse import ArgumentParser
46

57
from pypkg.oracle import oracle_iter, gen_oracle_iter

scripts/remove_unseen.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/env python
22

3+
from __future__ import print_function
4+
35
from argparse import ArgumentParser
46

57
from nltk.tree import Tree

scripts/split_cv.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#!/usr/bin/env python
22

3+
from __future__ import print_function
4+
35
from argparse import ArgumentParser
4-
from itertools import accumulate
56
import os
67
import random
78

@@ -17,9 +18,8 @@ def get_split_indices(size, k=5):
1718
fold_sizes = [size // k for _ in range(k)]
1819
for i in range(size % k):
1920
fold_sizes[i] += 1
20-
cum_fold_sizes = list(accumulate(fold_sizes))
2121
for i in range(k):
22-
skip = cum_fold_sizes[i-1] if i > 0 else 0
22+
skip = sum(fold_sizes[:i])
2323
fsz = fold_sizes[i]
2424
yield indices[skip:skip+fsz]
2525

@@ -62,9 +62,9 @@ def split(iterable, indices, hold_out=0.):
6262

6363
for i, indices in enumerate(get_split_indices(len(lines), k=args.k)):
6464
test, valid, train = split(lines, indices, hold_out=args.hold_out)
65-
fname_test = os.path.join(f'{output_dir}', f'{basename}.{i}.test')
66-
fname_valid = os.path.join(f'{output_dir}', f'{basename}.{i}.valid')
67-
fname_train = os.path.join(f'{output_dir}', f'{basename}.{i}.train')
65+
fname_test = os.path.join(output_dir, '{}.{}.test'.format(basename, i))
66+
fname_valid = os.path.join(output_dir, '{}.{}.valid'.format(basename, i))
67+
fname_train = os.path.join(output_dir, '{}.{}.train'.format(basename, i))
6868
with open(fname_test, 'w') as f:
6969
print(''.join(test), file=f)
7070
with open(fname_train, 'w') as f:

scripts/word_percentage.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/env python
22

3+
from __future__ import print_function, division
4+
35
from argparse import ArgumentParser
46

57
from nltk.tree import Tree
@@ -30,7 +32,7 @@
3032

3133
pre_words_in_training = train_words.intersection(pretrained_words)
3234
pre_words_rate = len(pre_words_in_training) / len(train_words)
33-
print(f'Number of pretrained words: {len(pretrained_words)}')
34-
print(f'Number of pretrained words in training: {len(pre_words_in_training)}')
35-
print(f'Number of word in training: {len(train_words)}')
36-
print(f'Percentage of pretrained words in training: {pre_words_rate:.2%}')
35+
print('Number of pretrained words: {}'.format(len(pretrained_words)))
36+
print('Number of pretrained words in training: {}'.format(len(pre_words_in_training)))
37+
print('Number of word in training: {}'.format(len(train_words)))
38+
print('Percentage of pretrained words in training: {:.2%}'.format(pre_words_rate))

0 commit comments

Comments
 (0)