replace string sums by f-strings in prints

UppsalaNLP · Mar 26, 2020 · fd02334 · fd02334
1 parent b778f3c
commit fd02334
Show file tree

Hide file tree

Showing 7 changed files with 38 additions and 39 deletions.
diff --git a/scripts/analysis_multimono.py b/scripts/analysis_multimono.py
@@ -17,7 +17,7 @@ def main(options,args):
     bas_runs = int(args[1]) # number of baseline experiments
     exp_name = args[2] # name of new experiments
     exp_runs = int(args[3]) # number of new experiments
-    print("Results for experiment: " + exp_name)
+    print(f"Results for experiment: {exp_name}")
 
     langs = args[4:]
     bas_means = np.zeros((len(langs),))

diff --git a/scripts/pick_model.py b/scripts/pick_model.py
@@ -13,7 +13,7 @@
     include_file = sys.argv[1]
     trained_models_dir = sys.argv[2].strip("/")
     #make sure there are no annoying spaces
-    print('Removing leading and trailing spaces from ' + include_file)
+    print(f'Removing leading and trailing spaces from {include_file}')
     os.system(f"sed -i 's/\\s*//g' {include_file}")
     print('Finding best iteration for each language and storing in best_epochs.txt')
     cmd = f'./scripts/best_res.sh {include_file} {trained_models_dir} >best_epochs.txt'
@@ -24,7 +24,7 @@
     outdir = sys.argv[3]
 
 if not os.path.exists(outdir):
-    print('Creating directory ' + outdir)
+    print(f'Creating directory {outdir}')
     os.mkdir(outdir)
 for line in open('best_epochs.txt','r'):
     try:
@@ -41,14 +41,14 @@
 for lang in d:
     lpath = outdir + '/' + lang + '/'
     if not os.path.exists(lpath):
-        print('Creating directory ' + lpath)
+        print(f'Creating directory {lpath}')
         os.mkdir(lpath)
     infile = trained_models_dir + '/' + lang + '/barchybrid.model' + str(d[lang])
     outfile = lpath + 'barchybrid.model'
     if os.path.exists(infile):
-        print('Copying ' + infile + ' to ' + outfile)
+        print(f'Copying {infile} to {outfile}'')
         os.system(f'cp {infile} {outfile}')
     if outdir != trained_models_dir: 
         paramfile = trained_models_dir + '/' + lang + '/params.pickle'
-        print('Copying ' + paramfile + ' to ' + lpath)
+        print(f'Copying {paramfile} to {lpath}'')
         os.system(f'cp {paramfile} {lpath}')
diff --git a/uuparser/arc_hybrid.py b/uuparser/arc_hybrid.py
@@ -118,11 +118,11 @@ def __evaluate(self, stack, buf, train):
 
 
     def Save(self, filename):
-        print('Saving model to ' + filename)
+        print(f'Saving model to {filename}')
         self.model.save(filename)
 
     def Load(self, filename):
-        print('Loading model from ' + filename)
+        print(f'Loading model from {filename}')
         self.model.populate(filename)
 
 

diff --git a/uuparser/feature_extractor.py b/uuparser/feature_extractor.py
@@ -106,7 +106,7 @@ def __init__(self, model, options, vocab, nnvecs=1):
                 2 * (options.char_lstm_output_size
                      if options.char_emb_size > 0 else 0)
         )
-        print("Word-level LSTM input size: " + str(self.lstm_input_size))
+        print(f"Word-level LSTM input size: {self.lstm_input_size}")
 
         self.bilstms = []
         if options.no_bilstms > 0:

diff --git a/uuparser/options_manager.py b/uuparser/options_manager.py
@@ -43,7 +43,7 @@ def __init__(self,options):
         if not options.outdir:
             raise Exception("You must specify an output directory via the --outdir option")
         elif not os.path.exists(options.outdir): # create output directory if it doesn't exist
-            print("Creating output directory " + options.outdir)
+            print(f"Creating output directory {options.outdir}")
             os.mkdir(options.outdir)
 
         if not options.graph_based and (not options.predict and not
@@ -141,11 +141,12 @@ def create_UD_treebank_list(self,options):
                 else:
                     treebank.outdir = options.outdir
                 if not os.path.exists(treebank.outdir): # create language-specific output folder if it doesn't exist
-                    print("Creating language-specific output directory " + treebank.outdir)
+                    print(f"Creating language-specific output directory {treebank.outdir}")
                     os.mkdir(treebank.outdir)
                 else:
-                    print("Warning: language-specific subdirectory " + treebank.outdir
-                        + " already exists, contents may be overwritten")
+                    print(
+                        f"Warning: language-specific subdirectory {treebank.outdir} already exists, contents may be overwritten"
+                    )
 
                 if not options.predict:
                     self.prepareDev(treebank,options)
@@ -163,7 +164,7 @@ def create_UD_treebank_list(self,options):
 
                 treebanks.append(treebank)
             else:
-                print("Warning: skipping invalid language code " + iso)
+                print(f"Warning: skipping invalid language code {iso}")
 
         return treebanks
 
@@ -180,8 +181,7 @@ def prepareDev(self,treebank,options):
                     dev_file = os.path.join(treebank.outdir,'dev-split' + '.conllu') # location for the new dev file
                     train_file = os.path.join(treebank.outdir,'train-split' + '.conllu') # location for the new train file
                     dev_len = int(0.01*options.dev_percent*tot_sen)
-                    print("Taking " + str(dev_len) + " of " + str(tot_sen)
-                            + " sentences from training data as new dev data for " + treebank.name)
+                    print(f"Taking {dev_len} of {tot_sen} sentences from training data as new dev data for {treebank.name}")
                     random.shuffle(train_data)
                     dev_data = train_data[:dev_len]
                     utils.write_conll(dev_file,dev_data) # write the new dev data to file
@@ -196,11 +196,10 @@ def prepareDev(self,treebank,options):
                         + treebank.name + " (minimum required --min-train-size: " + str(options.min_train_sents) + ")")
                     treebank.pred_dev = False
             else: # option --create-dev not set
-                print("Warning: No dev data for " + treebank.name
-                        + ", consider adding option --create-dev to create dev data from training set")
+                print(f"Warning: No dev data for {treebank.name}, consider adding option --create-dev to create dev data from training set")
                 treebank.pred_dev = False
         if options.model_selection and not treebank.pred_dev:
-            print("Warning: can't do model selection for " + treebank.name + " as prediction on dev data is off")
+            print(f"Warning: can't do model selection for {treebank.name} as prediction on dev data is off")
 
     # if debug options is set, we read in the training, dev and test files as appropriate, cap the number of sentences and store
     # new files with these smaller data sets

diff --git a/uuparser/parser.py b/uuparser/parser.py
@@ -25,7 +25,7 @@ def run(experiment,options):
             print('Finished collecting vocab')
 
             with open(paramsfile, 'wb') as paramsfp:
-                print('Saving params to ' + paramsfile)
+                print(f'Saving params to {paramsfile}')
                 pickle.dump((vocab, options), paramsfp)
 
                 print('Initializing the model')
@@ -44,10 +44,10 @@ def run(experiment,options):
 
         for epoch in range(options.first_epoch, options.epochs+1):
 
-            print('Starting epoch ' + str(epoch))
+            print(f'Starting epoch {epoch}')
             traindata = list(utils.read_conll_dir(experiment.treebanks, "train", options.max_sentences))
             parser.Train(traindata,options)
-            print('Finished epoch ' + str(epoch))
+            print(f'Finished epoch {epoch}')
 
             model_file = os.path.join(experiment.outdir, options.model + str(epoch))
             parser.Save(model_file)
@@ -59,7 +59,7 @@ def run(experiment,options):
                 if pred_treebanks:
                     for treebank in pred_treebanks:
                         treebank.outfilename = os.path.join(treebank.outdir, 'dev_epoch_' + str(epoch) + '.conllu')
-                        print("Predicting on dev data for " + treebank.name)
+                        print(f"Predicting on dev data for {treebank.name}")
                     pred = list(parser.Predict(pred_treebanks,"dev",options))
                     utils.write_conll_multiling(pred,pred_treebanks)
 
@@ -84,11 +84,11 @@ def run(experiment,options):
             if epoch == options.epochs:
                 bestmodel_file = os.path.join(experiment.outdir,"barchybrid.model" + str(dev_best[0]))
                 model_file = os.path.join(experiment.outdir,"barchybrid.model")
-                print("Copying " + bestmodel_file + " to " + model_file)
+                print(f"Copying {bestmodel_file} to {model_file}")
                 copyfile(bestmodel_file,model_file)
                 best_dev_file = os.path.join(experiment.outdir,"best_dev_epoch.txt")
                 with open (best_dev_file, 'w') as fh:
-                    print("Writing best scores to: " + best_dev_file)
+                    print(f"Writing best scores to: {best_dev_file}")
                     if len(experiment.treebanks) == 1:
                         fh.write(f"Best dev score {dev_best[1]} at epoch {dev_best[0]:d}\n")
                     else:
@@ -97,7 +97,7 @@ def run(experiment,options):
     else: #if predict - so
 
         params = os.path.join(experiment.modeldir,options.params)
-        print('Reading params from ' + params)
+        print(f'Reading params from {params}')
         with open(params, 'rb') as paramsfp:
             stored_vocab, stored_opt = pickle.load(paramsfp)
 
@@ -129,7 +129,7 @@ def run(experiment,options):
 
             if options.pred_eval:
                 for treebank in experiment.treebanks:
-                    print("Evaluating on " + treebank.name)
+                    print(f"Evaluating on {treebank.name}")
                     score = utils.evaluate(treebank.test_gold,treebank.outfilename,options.conllu)
                     print(f"Obtained LAS F1 score of {score:.2f} on {treebank.name}")
 

diff --git a/uuparser/utils.py b/uuparser/utils.py
@@ -293,7 +293,7 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
     # hard lim means capping the corpus size across the whole training procedure
     # soft lim means using a sample of the whole corpus at each epoch
     fh = open(filename,'r',encoding='utf-8')
-    print("Reading " + filename)
+    print(f"Reading {filename}")
     if vocab_prep and not hard_lim:
         maxSize = -1 # when preparing the vocab with a soft limit we need to use the whole corpus
     ts = time.time()
@@ -329,8 +329,8 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
                             yield tokens
                             yield_count += 1
                             if yield_count == maxSize:
-                                print("Capping size of corpus at " + str(yield_count) + " sentences")
-                                break;
+                                print(f"Capping size of corpus at {yield_count} sentences")
+                                break
                     else:
                         yield tokens
                 else:
@@ -352,7 +352,7 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
                 tokens.append(token)
 
     if hard_lim and yield_count < maxSize:
-        print('Warning: unable to yield ' + str(maxSize) + ' sentences, only ' + str(yield_count) + ' found')
+        print(f'Warning: unable to yield {maxSize} sentences, only {yield_count} found')
 
 # TODO: deal with case where there are still unyielded tokens
 # e.g. when there is no newline at end of file
@@ -364,7 +364,7 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
     if maxSize > 0 and not hard_lim:
         if len(sents) > maxSize:
             sents = random.sample(sents,maxSize)
-            print("Yielding " + str(len(sents)) + " random sentences")
+            print(f"Yielding {len(sents)} random sentences")
         for toks in sents:
             yield toks
 
@@ -373,30 +373,30 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
 
 
 def write_conll(fn, conll_gen):
-    print("Writing to " + fn)
+    print(f"Writing to {fn}")
     sents = 0
     with open(fn, 'w', encoding='utf-8') as fh:
         for sentence in conll_gen:
             sents += 1
             for entry in sentence[1:]:
                 fh.write(str(entry) + '\n')
             fh.write('\n')
-        print("Wrote " + str(sents) + " sentences")
+        print(f"Wrote {sents} sentences")
 
 
 def write_conll_multiling(conll_gen, treebanks):
     tbank_dict = {treebank.iso_id:treebank for treebank in treebanks}
     cur_tbank = conll_gen[0][0].treebank_id
     outfile = tbank_dict[cur_tbank].outfilename
     fh = open(outfile,'w',encoding='utf-8')
-    print("Writing to " + outfile)
+    print(f"Writing to {outfile}")
     for sentence in conll_gen:
         if cur_tbank != sentence[0].treebank_id:
             fh.close()
             cur_tbank = sentence[0].treebank_id
             outfile = tbank_dict[cur_tbank].outfilename
             fh = open(outfile,'w',encoding='utf-8')
-            print("Writing to " + outfile)
+            print(f"Writing to {outfile}")
         for entry in sentence[1:]:
             fh.write(str(entry) + '\n')
         fh.write('\n')
@@ -419,7 +419,7 @@ def normalize(word):
 
 def evaluate(gold,test,conllu):
     scoresfile = test + '.txt'
-    print("Writing to " + scoresfile)
+    print(f"Writing to {scoresfile}")
     with open(scoresfile, "w") as scoresfile_stream:
         if not conllu:
             #os.system('perl src/utils/eval.pl -g ' + gold + ' -s ' + test  + ' > ' + scoresfile + ' &')
@@ -502,7 +502,7 @@ def extract_embeddings_from_file(filename, words=None, max_emb=-1, filtered_file
                     error_count += 1
                 line_count += 1
                 if line_count % 100000 == 0:
-                    print("Reading line: " + str(line_count))
+                    print(f"Reading line: {line_count}")
             else:
                 break
 
@@ -514,7 +514,7 @@ def extract_embeddings_from_file(filename, words=None, max_emb=-1, filtered_file
         print(f"{len(embeddings):d} entries found from vocabulary (out of {len(words):d})")
 
     if filtered_filename and embeddings:
-        print("Writing filtered embeddings to " + filtered_filename)
+        print(f"Writing filtered embeddings to {filtered_filename}")
         with open(filtered_filename,'w') as fh_filter:
             no_embeddings = len(embeddings)
             embedding_size = len(embeddings.itervalues().next())