Convert to python 2

kmkurn · kmkurn · commit 13ba45f5937c · 2017-07-04T17:42:04.000+01:00
diff --git a/pypkg/dataset.py b/pypkg/dataset.py
@@ -1,3 +1,5 @@
+from __future__ import print_function
+
 from argparse import ArgumentParser
 import glob
 from itertools import islice
@@ -38,9 +40,10 @@ def __init__(self, corpus_dir, which='train', version='3.0', corrected=True,
         """
         if which not in ['train', 'valid', 'test']:
             raise ValueError(
-                f'`which` should be one of "train", "valid", or "test". Got "{which}".')
+                '`which` should be one of "train", "valid", or "test". Got "{}".'
+                .format(which))
         if version not in ['2.0', '3.0']:
-            raise ValueError(f'`version` should be "2.0" or "3.0". Got "{version}".')
+            raise ValueError('`version` should be "2.0" or "3.0". Got "{}".'.format(version))
 
         self.corpus_dir = corpus_dir
         self.which = which
@@ -71,12 +74,13 @@ def _get_iterator(self):
         path = os.path.join(self.corpus_dir, self.version, self.corrected_dir,
                             self.parsed_dir, 'wsj')
         for sec in self.sections:
-            glob_pattern = os.path.join(path, f'{sec:02}', '*.mrg')
+            glob_pattern = os.path.join(path, '{:02}'.format(sec), '*.mrg')
             for filename in sorted(glob.glob(glob_pattern)):
                 with open(filename) as f:
                     lines = (line.rstrip() for line in f if line.rstrip())
-                    yield from (self._preprocess_sentence(sent)
-                                for sent in self._concat_parsed_sentences(lines))
+                    for s in (self._preprocess_sentence(sent)
+                              for sent in self._concat_parsed_sentences(lines)):
+                        yield s
 
     @classmethod
     def _preprocess_sentence(cls, sentence):
@@ -164,20 +168,22 @@ def __init__(self, corpus_dir, which='train', split_num=0, max_num_sentences=Non
         """
         if which not in ['train', 'valid', 'test']:
             raise ValueError(
-                f'`which` should be one of "train", "valid", or "test". Got "{which}".')
+                '`which` should be one of "train", "valid", or "test". Got "{}".'
+                .format(which))
 
         self.corpus_dir = corpus_dir
         self.which = which
         self.split_num = split_num
         self.max_num_sentences = max_num_sentences
 
     def _get_iterator(self):
-        filename = os.path.join(self.corpus_dir,
-                                f'{self.FILENAME}.{self.split_num}.{self.which}')
+        filename = os.path.join(self.corpus_dir, '{}.{}.{}'.format(
+            self.FILENAME, self.split_num, self.which))
         with open(filename) as f:
             for line in f:
-                yield from (self._preprocess_sentence(sent)
-                            for sent in self._get_parsed_sentences(line))
+                for s in (self._preprocess_sentence(sent)
+                          for sent in self._get_parsed_sentences(line)):
+                    yield s
 
     @staticmethod
     def _get_parsed_sentences(line):
diff --git a/pypkg/utils.py b/pypkg/utils.py
@@ -48,9 +48,8 @@ def dump_args(args, path=None, excludes=None, override_excludes=False):
 
 
 def _get_last_commit_hash():
-    return subprocess.run(
-        'git log -n 1 --pretty=format:%H'.split(), stdout=subprocess.PIPE,
-        encoding='UTF-8').stdout
+    return subprocess.Popen(
+        'git log -n 1 --pretty=format:%H'.split(), stdout=subprocess.PIPE).communicate()[0]
 
 
 def load_args(obj, path=None, typecast=None):
diff --git a/scripts/do_brown_cluster.py b/scripts/do_brown_cluster.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
+
 from argparse import ArgumentParser
 from math import sqrt
 import os
@@ -24,9 +26,10 @@
             for word in line.strip().split():
                 vocab.add(word)
     clust_size = int(sqrt(len(vocab)))
-    print(f'vocab size: {len(vocab)}', file=sys.stderr)
-    print(f'cluster size: {clust_size}', file=sys.stderr)
+    print('vocab size: {}'.format(len(vocab)), file=sys.stderr)
+    print('cluster size: {}'.format(clust_size), file=sys.stderr)
 
-    cmd = f'{args.wcluster} --text {args.file} --c {clust_size} --output_dir {args.outdir}'
-    print(f'command: {cmd}', file=sys.stderr)
-    subprocess.run(cmd, check=True, shell=True)
+    cmd = '{} --text {} --c {} --output_dir {}'.format(
+        args.wcluster, args.file, clust_size, args.outdir)
+    print('command: {}'.format(cmd), file=sys.stderr)
+    subprocess.call(cmd, shell=True)
diff --git a/scripts/get_unkified_from_oracle.py b/scripts/get_unkified_from_oracle.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
+
 from argparse import ArgumentParser
 
 from pypkg.oracle import oracle_iter, gen_oracle_iter
diff --git a/scripts/remove_unseen.py b/scripts/remove_unseen.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
+
 from argparse import ArgumentParser
 
 from nltk.tree import Tree
diff --git a/scripts/split_cv.py b/scripts/split_cv.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
+
 from argparse import ArgumentParser
-from itertools import accumulate
 import os
 import random
 
@@ -17,9 +18,8 @@ def get_split_indices(size, k=5):
     fold_sizes = [size // k for _ in range(k)]
     for i in range(size % k):
         fold_sizes[i] += 1
-    cum_fold_sizes = list(accumulate(fold_sizes))
     for i in range(k):
-        skip = cum_fold_sizes[i-1] if i > 0 else 0
+        skip = sum(fold_sizes[:i])
         fsz = fold_sizes[i]
         yield indices[skip:skip+fsz]
 
@@ -62,9 +62,9 @@ def split(iterable, indices, hold_out=0.):
 
     for i, indices in enumerate(get_split_indices(len(lines), k=args.k)):
         test, valid, train = split(lines, indices, hold_out=args.hold_out)
-        fname_test = os.path.join(f'{output_dir}', f'{basename}.{i}.test')
-        fname_valid = os.path.join(f'{output_dir}', f'{basename}.{i}.valid')
-        fname_train = os.path.join(f'{output_dir}', f'{basename}.{i}.train')
+        fname_test = os.path.join(output_dir, '{}.{}.test'.format(basename, i))
+        fname_valid = os.path.join(output_dir, '{}.{}.valid'.format(basename, i))
+        fname_train = os.path.join(output_dir, '{}.{}.train'.format(basename, i))
         with open(fname_test, 'w') as f:
             print(''.join(test), file=f)
         with open(fname_train, 'w') as f:
diff --git a/scripts/word_percentage.py b/scripts/word_percentage.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+from __future__ import print_function, division
+
 from argparse import ArgumentParser
 
 from nltk.tree import Tree
@@ -30,7 +32,7 @@
 
     pre_words_in_training = train_words.intersection(pretrained_words)
     pre_words_rate = len(pre_words_in_training) / len(train_words)
-    print(f'Number of pretrained words: {len(pretrained_words)}')
-    print(f'Number of pretrained words in training: {len(pre_words_in_training)}')
-    print(f'Number of word in training: {len(train_words)}')
-    print(f'Percentage of pretrained words in training: {pre_words_rate:.2%}')
+    print('Number of pretrained words: {}'.format(len(pretrained_words)))
+    print('Number of pretrained words in training: {}'.format(len(pre_words_in_training)))
+    print('Number of word in training: {}'.format(len(train_words)))
+    print('Percentage of pretrained words in training: {:.2%}'.format(pre_words_rate))