Skip to content

Commit

Permalink
replace string sums by f-strings in prints
Browse files Browse the repository at this point in the history
  • Loading branch information
LoicGrobol committed Mar 26, 2020
1 parent b778f3c commit fd02334
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 39 deletions.
2 changes: 1 addition & 1 deletion scripts/analysis_multimono.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def main(options,args):
bas_runs = int(args[1]) # number of baseline experiments
exp_name = args[2] # name of new experiments
exp_runs = int(args[3]) # number of new experiments
print("Results for experiment: " + exp_name)
print(f"Results for experiment: {exp_name}")

langs = args[4:]
bas_means = np.zeros((len(langs),))
Expand Down
10 changes: 5 additions & 5 deletions scripts/pick_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
include_file = sys.argv[1]
trained_models_dir = sys.argv[2].strip("/")
#make sure there are no annoying spaces
print('Removing leading and trailing spaces from ' + include_file)
print(f'Removing leading and trailing spaces from {include_file}')
os.system(f"sed -i 's/\\s*//g' {include_file}")
print('Finding best iteration for each language and storing in best_epochs.txt')
cmd = f'./scripts/best_res.sh {include_file} {trained_models_dir} >best_epochs.txt'
Expand All @@ -24,7 +24,7 @@
outdir = sys.argv[3]

if not os.path.exists(outdir):
print('Creating directory ' + outdir)
print(f'Creating directory {outdir}')
os.mkdir(outdir)
for line in open('best_epochs.txt','r'):
try:
Expand All @@ -41,14 +41,14 @@
for lang in d:
lpath = outdir + '/' + lang + '/'
if not os.path.exists(lpath):
print('Creating directory ' + lpath)
print(f'Creating directory {lpath}')
os.mkdir(lpath)
infile = trained_models_dir + '/' + lang + '/barchybrid.model' + str(d[lang])
outfile = lpath + 'barchybrid.model'
if os.path.exists(infile):
print('Copying ' + infile + ' to ' + outfile)
print(f'Copying {infile} to {outfile}'')
os.system(f'cp {infile} {outfile}')
if outdir != trained_models_dir:
paramfile = trained_models_dir + '/' + lang + '/params.pickle'
print('Copying ' + paramfile + ' to ' + lpath)
print(f'Copying {paramfile} to {lpath}'')
os.system(f'cp {paramfile} {lpath}')
4 changes: 2 additions & 2 deletions uuparser/arc_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ def __evaluate(self, stack, buf, train):


def Save(self, filename):
print('Saving model to ' + filename)
print(f'Saving model to {filename}')
self.model.save(filename)

def Load(self, filename):
print('Loading model from ' + filename)
print(f'Loading model from {filename}')
self.model.populate(filename)


Expand Down
2 changes: 1 addition & 1 deletion uuparser/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def __init__(self, model, options, vocab, nnvecs=1):
2 * (options.char_lstm_output_size
if options.char_emb_size > 0 else 0)
)
print("Word-level LSTM input size: " + str(self.lstm_input_size))
print(f"Word-level LSTM input size: {self.lstm_input_size}")

self.bilstms = []
if options.no_bilstms > 0:
Expand Down
19 changes: 9 additions & 10 deletions uuparser/options_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self,options):
if not options.outdir:
raise Exception("You must specify an output directory via the --outdir option")
elif not os.path.exists(options.outdir): # create output directory if it doesn't exist
print("Creating output directory " + options.outdir)
print(f"Creating output directory {options.outdir}")
os.mkdir(options.outdir)

if not options.graph_based and (not options.predict and not
Expand Down Expand Up @@ -141,11 +141,12 @@ def create_UD_treebank_list(self,options):
else:
treebank.outdir = options.outdir
if not os.path.exists(treebank.outdir): # create language-specific output folder if it doesn't exist
print("Creating language-specific output directory " + treebank.outdir)
print(f"Creating language-specific output directory {treebank.outdir}")
os.mkdir(treebank.outdir)
else:
print("Warning: language-specific subdirectory " + treebank.outdir
+ " already exists, contents may be overwritten")
print(
f"Warning: language-specific subdirectory {treebank.outdir} already exists, contents may be overwritten"
)

if not options.predict:
self.prepareDev(treebank,options)
Expand All @@ -163,7 +164,7 @@ def create_UD_treebank_list(self,options):

treebanks.append(treebank)
else:
print("Warning: skipping invalid language code " + iso)
print(f"Warning: skipping invalid language code {iso}")

return treebanks

Expand All @@ -180,8 +181,7 @@ def prepareDev(self,treebank,options):
dev_file = os.path.join(treebank.outdir,'dev-split' + '.conllu') # location for the new dev file
train_file = os.path.join(treebank.outdir,'train-split' + '.conllu') # location for the new train file
dev_len = int(0.01*options.dev_percent*tot_sen)
print("Taking " + str(dev_len) + " of " + str(tot_sen)
+ " sentences from training data as new dev data for " + treebank.name)
print(f"Taking {dev_len} of {tot_sen} sentences from training data as new dev data for {treebank.name}")
random.shuffle(train_data)
dev_data = train_data[:dev_len]
utils.write_conll(dev_file,dev_data) # write the new dev data to file
Expand All @@ -196,11 +196,10 @@ def prepareDev(self,treebank,options):
+ treebank.name + " (minimum required --min-train-size: " + str(options.min_train_sents) + ")")
treebank.pred_dev = False
else: # option --create-dev not set
print("Warning: No dev data for " + treebank.name
+ ", consider adding option --create-dev to create dev data from training set")
print(f"Warning: No dev data for {treebank.name}, consider adding option --create-dev to create dev data from training set")
treebank.pred_dev = False
if options.model_selection and not treebank.pred_dev:
print("Warning: can't do model selection for " + treebank.name + " as prediction on dev data is off")
print(f"Warning: can't do model selection for {treebank.name} as prediction on dev data is off")

# if debug options is set, we read in the training, dev and test files as appropriate, cap the number of sentences and store
# new files with these smaller data sets
Expand Down
16 changes: 8 additions & 8 deletions uuparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def run(experiment,options):
print('Finished collecting vocab')

with open(paramsfile, 'wb') as paramsfp:
print('Saving params to ' + paramsfile)
print(f'Saving params to {paramsfile}')
pickle.dump((vocab, options), paramsfp)

print('Initializing the model')
Expand All @@ -44,10 +44,10 @@ def run(experiment,options):

for epoch in range(options.first_epoch, options.epochs+1):

print('Starting epoch ' + str(epoch))
print(f'Starting epoch {epoch}')
traindata = list(utils.read_conll_dir(experiment.treebanks, "train", options.max_sentences))
parser.Train(traindata,options)
print('Finished epoch ' + str(epoch))
print(f'Finished epoch {epoch}')

model_file = os.path.join(experiment.outdir, options.model + str(epoch))
parser.Save(model_file)
Expand All @@ -59,7 +59,7 @@ def run(experiment,options):
if pred_treebanks:
for treebank in pred_treebanks:
treebank.outfilename = os.path.join(treebank.outdir, 'dev_epoch_' + str(epoch) + '.conllu')
print("Predicting on dev data for " + treebank.name)
print(f"Predicting on dev data for {treebank.name}")
pred = list(parser.Predict(pred_treebanks,"dev",options))
utils.write_conll_multiling(pred,pred_treebanks)

Expand All @@ -84,11 +84,11 @@ def run(experiment,options):
if epoch == options.epochs:
bestmodel_file = os.path.join(experiment.outdir,"barchybrid.model" + str(dev_best[0]))
model_file = os.path.join(experiment.outdir,"barchybrid.model")
print("Copying " + bestmodel_file + " to " + model_file)
print(f"Copying {bestmodel_file} to {model_file}")
copyfile(bestmodel_file,model_file)
best_dev_file = os.path.join(experiment.outdir,"best_dev_epoch.txt")
with open (best_dev_file, 'w') as fh:
print("Writing best scores to: " + best_dev_file)
print(f"Writing best scores to: {best_dev_file}")
if len(experiment.treebanks) == 1:
fh.write(f"Best dev score {dev_best[1]} at epoch {dev_best[0]:d}\n")
else:
Expand All @@ -97,7 +97,7 @@ def run(experiment,options):
else: #if predict - so

params = os.path.join(experiment.modeldir,options.params)
print('Reading params from ' + params)
print(f'Reading params from {params}')
with open(params, 'rb') as paramsfp:
stored_vocab, stored_opt = pickle.load(paramsfp)

Expand Down Expand Up @@ -129,7 +129,7 @@ def run(experiment,options):

if options.pred_eval:
for treebank in experiment.treebanks:
print("Evaluating on " + treebank.name)
print(f"Evaluating on {treebank.name}")
score = utils.evaluate(treebank.test_gold,treebank.outfilename,options.conllu)
print(f"Obtained LAS F1 score of {score:.2f} on {treebank.name}")

Expand Down
24 changes: 12 additions & 12 deletions uuparser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
# hard lim means capping the corpus size across the whole training procedure
# soft lim means using a sample of the whole corpus at each epoch
fh = open(filename,'r',encoding='utf-8')
print("Reading " + filename)
print(f"Reading {filename}")
if vocab_prep and not hard_lim:
maxSize = -1 # when preparing the vocab with a soft limit we need to use the whole corpus
ts = time.time()
Expand Down Expand Up @@ -329,8 +329,8 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
yield tokens
yield_count += 1
if yield_count == maxSize:
print("Capping size of corpus at " + str(yield_count) + " sentences")
break;
print(f"Capping size of corpus at {yield_count} sentences")
break
else:
yield tokens
else:
Expand All @@ -352,7 +352,7 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
tokens.append(token)

if hard_lim and yield_count < maxSize:
print('Warning: unable to yield ' + str(maxSize) + ' sentences, only ' + str(yield_count) + ' found')
print(f'Warning: unable to yield {maxSize} sentences, only {yield_count} found')

# TODO: deal with case where there are still unyielded tokens
# e.g. when there is no newline at end of file
Expand All @@ -364,7 +364,7 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
if maxSize > 0 and not hard_lim:
if len(sents) > maxSize:
sents = random.sample(sents,maxSize)
print("Yielding " + str(len(sents)) + " random sentences")
print(f"Yielding {len(sents)} random sentences")
for toks in sents:
yield toks

Expand All @@ -373,30 +373,30 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li


def write_conll(fn, conll_gen):
print("Writing to " + fn)
print(f"Writing to {fn}")
sents = 0
with open(fn, 'w', encoding='utf-8') as fh:
for sentence in conll_gen:
sents += 1
for entry in sentence[1:]:
fh.write(str(entry) + '\n')
fh.write('\n')
print("Wrote " + str(sents) + " sentences")
print(f"Wrote {sents} sentences")


def write_conll_multiling(conll_gen, treebanks):
tbank_dict = {treebank.iso_id:treebank for treebank in treebanks}
cur_tbank = conll_gen[0][0].treebank_id
outfile = tbank_dict[cur_tbank].outfilename
fh = open(outfile,'w',encoding='utf-8')
print("Writing to " + outfile)
print(f"Writing to {outfile}")
for sentence in conll_gen:
if cur_tbank != sentence[0].treebank_id:
fh.close()
cur_tbank = sentence[0].treebank_id
outfile = tbank_dict[cur_tbank].outfilename
fh = open(outfile,'w',encoding='utf-8')
print("Writing to " + outfile)
print(f"Writing to {outfile}")
for entry in sentence[1:]:
fh.write(str(entry) + '\n')
fh.write('\n')
Expand All @@ -419,7 +419,7 @@ def normalize(word):

def evaluate(gold,test,conllu):
scoresfile = test + '.txt'
print("Writing to " + scoresfile)
print(f"Writing to {scoresfile}")
with open(scoresfile, "w") as scoresfile_stream:
if not conllu:
#os.system('perl src/utils/eval.pl -g ' + gold + ' -s ' + test + ' > ' + scoresfile + ' &')
Expand Down Expand Up @@ -502,7 +502,7 @@ def extract_embeddings_from_file(filename, words=None, max_emb=-1, filtered_file
error_count += 1
line_count += 1
if line_count % 100000 == 0:
print("Reading line: " + str(line_count))
print(f"Reading line: {line_count}")
else:
break

Expand All @@ -514,7 +514,7 @@ def extract_embeddings_from_file(filename, words=None, max_emb=-1, filtered_file
print(f"{len(embeddings):d} entries found from vocabulary (out of {len(words):d})")

if filtered_filename and embeddings:
print("Writing filtered embeddings to " + filtered_filename)
print(f"Writing filtered embeddings to {filtered_filename}")
with open(filtered_filename,'w') as fh_filter:
no_embeddings = len(embeddings)
embedding_size = len(embeddings.itervalues().next())
Expand Down

0 comments on commit fd02334

Please sign in to comment.