|
| 1 | +from __future__ import print_function |
| 2 | + |
1 | 3 | from argparse import ArgumentParser
|
2 | 4 | import glob
|
3 | 5 | from itertools import islice
|
@@ -38,9 +40,10 @@ def __init__(self, corpus_dir, which='train', version='3.0', corrected=True,
|
38 | 40 | """
|
39 | 41 | if which not in ['train', 'valid', 'test']:
|
40 | 42 | raise ValueError(
|
41 |
| - f'`which` should be one of "train", "valid", or "test". Got "{which}".') |
| 43 | + '`which` should be one of "train", "valid", or "test". Got "{}".' |
| 44 | + .format(which)) |
42 | 45 | if version not in ['2.0', '3.0']:
|
43 |
| - raise ValueError(f'`version` should be "2.0" or "3.0". Got "{version}".') |
| 46 | + raise ValueError('`version` should be "2.0" or "3.0". Got "{}".'.format(version)) |
44 | 47 |
|
45 | 48 | self.corpus_dir = corpus_dir
|
46 | 49 | self.which = which
|
@@ -71,12 +74,13 @@ def _get_iterator(self):
|
71 | 74 | path = os.path.join(self.corpus_dir, self.version, self.corrected_dir,
|
72 | 75 | self.parsed_dir, 'wsj')
|
73 | 76 | for sec in self.sections:
|
74 |
| - glob_pattern = os.path.join(path, f'{sec:02}', '*.mrg') |
| 77 | + glob_pattern = os.path.join(path, '{:02}'.format(sec), '*.mrg') |
75 | 78 | for filename in sorted(glob.glob(glob_pattern)):
|
76 | 79 | with open(filename) as f:
|
77 | 80 | lines = (line.rstrip() for line in f if line.rstrip())
|
78 |
| - yield from (self._preprocess_sentence(sent) |
79 |
| - for sent in self._concat_parsed_sentences(lines)) |
| 81 | + for s in (self._preprocess_sentence(sent) |
| 82 | + for sent in self._concat_parsed_sentences(lines)): |
| 83 | + yield s |
80 | 84 |
|
81 | 85 | @classmethod
|
82 | 86 | def _preprocess_sentence(cls, sentence):
|
@@ -164,20 +168,22 @@ def __init__(self, corpus_dir, which='train', split_num=0, max_num_sentences=Non
|
164 | 168 | """
|
165 | 169 | if which not in ['train', 'valid', 'test']:
|
166 | 170 | raise ValueError(
|
167 |
| - f'`which` should be one of "train", "valid", or "test". Got "{which}".') |
| 171 | + '`which` should be one of "train", "valid", or "test". Got "{}".' |
| 172 | + .format(which)) |
168 | 173 |
|
169 | 174 | self.corpus_dir = corpus_dir
|
170 | 175 | self.which = which
|
171 | 176 | self.split_num = split_num
|
172 | 177 | self.max_num_sentences = max_num_sentences
|
173 | 178 |
|
174 | 179 | def _get_iterator(self):
|
175 |
| - filename = os.path.join(self.corpus_dir, |
176 |
| - f'{self.FILENAME}.{self.split_num}.{self.which}') |
| 180 | + filename = os.path.join(self.corpus_dir, '{}.{}.{}'.format( |
| 181 | + self.FILENAME, self.split_num, self.which)) |
177 | 182 | with open(filename) as f:
|
178 | 183 | for line in f:
|
179 |
| - yield from (self._preprocess_sentence(sent) |
180 |
| - for sent in self._get_parsed_sentences(line)) |
| 184 | + for s in (self._preprocess_sentence(sent) |
| 185 | + for sent in self._get_parsed_sentences(line)): |
| 186 | + yield s |
181 | 187 |
|
182 | 188 | @staticmethod
|
183 | 189 | def _get_parsed_sentences(line):
|
|
0 commit comments