diff --git a/.gitignore b/.gitignore
index bf6cca2..37733e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 .DS_Store
 .vscode/
 */__pycache__/*
+*.pyc
diff --git a/README.md b/README.md
index 078f91a..6f052b6 100644
--- a/README.md
+++ b/README.md
@@ -41,12 +41,18 @@ The deep embeddings used in this work are generated using the End2End network pr
 ```
 Krishnan, P., Dutta, K., Jawahar, C.V.: Word spotting and recognition using deep embedding. In: 2018 13th IAPR International Workshop on Document Analysis Systems (DAS). pp. 1–6 (April 2018). https://doi.org/10.1109/DAS.2018.70
 ```
-Word text and image's deep embeddings for testing this repository are provided in the ```embeddings``` folder.
+Word text and image's deep embeddings for testing this repository are provided in the ``embeddings`` folder.
 Text files containing the information about the embeddings are required while running the code. They are in the format<br>
-```<img1-path><space><text1-string><space><dummyInt><space>1```<br>
-```<img2-path><space><text2-string><space><dummyInt><space>1```<br>
-...<br>
-Corresponding text files for testing this repository are provided in the ``gen_files`` folder.
+```
+<img1-path><space><text1-string><space><dummyInt><space>1
+<img2-path><space><text2-string><space><dummyInt><space>1
+...
+```
+One can refer to and use [https://github.com/kris314/hwnet](https://github.com/kris314/hwnet) for generating embeddings.
+
+For the purpose of making it easier to explore the code in this repository, sample text files and embeddings are provided in ``gen_files`` and ``embeddings``, respectively.
+
+Original Dataset used in this work will be released by [CVIT](http://cvit.iiit.ac.in) soon.
 
 ### Performing word recognition (using a pre-trained EmbedNet)
 Pre-trained EmbedNet models are saved in the ``models`` folder.<br>
@@ -102,7 +108,47 @@ Other arguments for word recognition experiment are:
 - `k` total number of predictions to test on (max 20)
 
 ### Training EmbedNet
-TODO
+Prepare text files and embeddings as mentioned in [Generating/using deep embeddings](#Generating/using-deep-embeddings). Refer files in folder ``gen_files`` for text file's examples. Once the embeddings are prepared run the following command
+```sh
+python src/EmbedNet_train.py --model_name provide_a_name_of_your_choice
+```
+For the purpose of a demonstration, you can run the following command
+```sh
+python src/EmbedNet_train.py --model_name temp
+```
+This will start training an EmbedNet for 1000 epochs and save the models in `trained/EmbedNet_models`.
+
+Other arguments for `EmbedNet_train.py` are:
+```sh
+--base_dir
+--model_dir
+--train_percentage
+--epochs
+--lr
+--batch
+--model_name
+--margin
+--hidden_layers
+--gpu_id
+--image_embeds
+--topk_embeds
+--image_file
+--predictions_file
+```
+- `base_dir` is a path to the directory for saving models
+- `model_dir` is a name of the folder for saving trained models
+- `train_percentage` percentage of data to use for training
+- `epochs` number of epochs to train for
+- `lr` learning rate
+- `batch` batch size
+- `model_name` name of the model for saving
+- `margin` triplet loss margin
+- `hidden_layers` list of input size of the hidden layers
+- `gpu_id` specify which GPU to use
+- `image_embeds` is used to provide path to the image embeddings
+- `topk_embeds` is used to provide path to the TopK predictions' embeddings
+- `image_file` is used to provide path to the image's text information file
+- `predictions_file` is used to provide path to the TopK predictions' text information file
 
 License and Citation
 ---------------------
diff --git a/src/EmbedNet_dataprep.py b/src/EmbedNet_dataprep.py
deleted file mode 100644
index a7aa754..0000000
--- a/src/EmbedNet_dataprep.py
+++ /dev/null
@@ -1,152 +0,0 @@
-"""
-This code is used for preparing the Triplet dataset for EmbedNet
-"""
-# Standard imports
-import os
-import pdb
-import pickle
-import argparse
-
-# Third party imports
-import numpy as np
-from tqdm import tqdm
-import Levenshtein as lev
-
-parser = argparse.ArgumentParser(description='Data Preperation for deep word recognition')
-
-# Arguments for text and embeddings path
-parser.add_argument('--image_embeds', default='/ssd_scratch/cvit/sid/embeddings/topk_preds_1500featsImg.npy', help='Path to the image embeddings')
-parser.add_argument('--text_embeds', default='/ssd_scratch/cvit/sid/embeddings/topk_preds_1500featsSynth.npy', help='Path to the text embeddigns')
-parser.add_argument('--image_info', default='/ssd_scratch/cvit/sid/image_embed_top_k_1500.txt', help='Path to the file containing word image information')
-parser.add_argument('--text_info', default='/ssd_scratch/cvit/sid/top_preds_embeds_with_confidence_1500.txt', help='Path to the file containing text output information')
-
-# model path and name arguments
-parser.add_argument('--base_path', default='/ssd_scratch/cvit/sid/', help='Path to the base directory where the training and testing data is stored')
-parser.add_argument('--file_name', default='EmbedNet_data', help='Name of the data file')
-
-# Training and testing split flag
-parser.add_argument('--train_percent', default=0.8, type=float, help='Percent of train data')
-parser.add_argument('--semi_hard', default=False, action='store_true', help='If True semi-hard examples will also be included')
-parser.add_argument('--save', default=False, action='store_true', help='If true data will be saved in ssd_scratch')
-
-args = parser.parse_args()
-print(args)
-
-data_path = args.base_path + args.file_name
-
-print('[INFO] Loading embeddings and text files...')
-image_embeds = np.load(args.image_embeds)
-try:
-    topk_embeds = np.load(args.text_embeds)
-except Exception as e:
-    print('[INFO] Loading text embeddings in memmap mode...')
-    topk_embeds = np.memmap(args.text_embeds, dtype=np.float32, mode='r', shape=(2109500, 2048))
-
-with open(args.image_info, 'r') as image_file:
-    image_info = image_file.readlines()
-image_info = [item.split()[1] for item in image_info]
-
-with open(args.text_info, 'r') as text_file:
-    text_info = text_file.readlines()
-text_info = [item.split()[1] for item in text_info]
-
-# This piece is for handling text files with more data as compared to the numpy files
-# text_info = text_info[:topk_embeds.shape[0]]
-# image_info = image_info[:image_embeds.shape[0]]
-
-# # Getting count of number of words in training set
-split_count = int(args.train_percent * len(image_info))
-image_info = image_info[:split_count]
-text_info = text_info[:split_count*20]
-image_embeds = image_embeds[:split_count]
-topk_embeds = topk_embeds[:split_count*20]
-
-text_dict = dict()
-embeds_dict = dict()
-ko = 0
-k = 20
-"""Text Dictionary is in the form
-{'word':[([top_20_preds],[lev_dist]), (..., ...), ...], ...}
-Embedding dictionary is in the form
-{'word': {'image_embeds': [all image_embeds occurances], 'text_embeds': [[top_20_text_embeds], [top_20_text_embeds], ...]}, ...}
-"""
-for word in tqdm(image_info, desc='[INFO] Text Dict'):
-    if word not in text_dict.keys():
-        text_dict[word] = [(text_info[ko: k], [lev.distance(word, item) for item in text_info[ko: k]])]
-    else:
-        text_dict[word].append((text_info[ko: k], [lev.distance(word, item) for item in text_info[ko: k]]))
-    ko = k
-    k += 20
-
-ko = 0
-k = 20
-for count, image_embed in enumerate(tqdm(image_embeds, desc='[INFO] Embeds Dict')):
-    word = image_info[count]
-    if word not in embeds_dict.keys():
-        embeds_dict[word] = {'image_embeds': [image_embed], 'text_embeds': [topk_embeds[ko: k]]}
-    else:
-        embeds_dict[word]['image_embeds'].append(image_embed)
-        embeds_dict[word]['text_embeds'].append(topk_embeds[ko: k])
-    ko = k
-    k += 20
-
-final_list = list()
-for word in tqdm(text_dict.keys(), desc='[INFO] Data Prep'):
-    predictions = text_dict[word]
-    image_embeddings, text_embeddings = embeds_dict[word]['image_embeds'], np.array(embeds_dict[word]['text_embeds'])
-    for instance_count, single_instance in enumerate(predictions):
-        top20_preds, top20_edit_dist = single_instance[0], single_instance[1]
-        instance_text_embeds = text_embeddings[instance_count]
-        anchor = image_embeddings[instance_count]
-        positive = None
-        negative_list = list()
-        if args.semi_hard:
-            semi_negative_list = list()
-        for count, pred in enumerate(top20_preds):
-            if word == pred:
-                positive = instance_text_embeds[count]
-            else:
-                if not args.semi_hard:
-                    negative_list.append(instance_text_embeds[count])
-            if args.semi_hard:            
-                condition = True
-                no_inf = 1000
-                while condition and no_inf != 0:
-                    random_num = np.random.randint(low=1, high=len(topk_embeds))
-                    random_embedding = topk_embeds[random_num]
-                    if np.linalg.norm(anchor - random_embedding) > 0.4:
-                        condition = False
-                    no_inf -= 1
-                semi_negative_list.append(random_embedding)
-        if args.semi_hard:
-            for semi_hard_neg_embed in semi_negative_list:
-                if positive is None:
-                    pass
-                else:
-                    final_list.append({'anchor': anchor, 'positive': positive, 'negative': np.array(semi_hard_neg_embed)})
-        else:
-            for negative_embeds in negative_list:
-                if positive is None:    # There are a few cases when the OCR even fails to predics in Top20 predicitons
-                    pass
-                else:
-                    final_list.append({'anchor': anchor, 'positive': positive, 'negative': np.array(negative_embeds)})
-
-def check(final_list):
-    positive_distance = list()
-    negative_distance = list()
-    for sample in tqdm(final_list, desc='[INFO] Checking'):
-        anchor = sample['anchor']
-        positive = sample['positive']
-        negative = sample['negative']
-        try:
-            positive_distance.append(np.linalg.norm(anchor - positive))
-            negative_distance.append(np.linalg.norm(anchor - negative))
-        except Exception as e:
-            print(e)
-            pdb.set_trace()
-    print('[INFO] Mean distance of anchors with positive pairs is {} Max {} Min {}.\n[INFO] Mean distance of anchor with negative pairs is {} Max {} Min {}.'.format(np.mean(positive_distance), np.max(positive_distance), np.min(positive_distance), np.mean(negative_distance), np.max(negative_distance), np.min(negative_distance)))
-
-check(final_list)
-if args.save:
-    pickle.dump(final_list, open(data_path, 'wb'))
-    print('[INFO] Total number of triples generated: {}\n[INFO] Pickle file saved at {}'.format(len(final_list), data_path))
diff --git a/src/EmbedNet.py b/src/EmbedNet_train.py
similarity index 67%
rename from src/EmbedNet.py
rename to src/EmbedNet_train.py
index 276f72b..4307930 100644
--- a/src/EmbedNet.py
+++ b/src/EmbedNet_train.py
@@ -15,18 +15,18 @@
 import torch.nn as nn
 from tqdm import tqdm
 from models import EmbedNet
-from online_triplets import Triplets
+from triplets import Triplets
 import torch.nn.functional as F
 import torch.utils.data as data
 from torch.utils.data import DataLoader
-from torch.utils.tensorboard import SummaryWriter
 
 torch.backends.cudnn.enabled = False
 torch.backends.cudnn.benchmark = False
 
 parser = argparse.ArgumentParser(description='Neural Networks for word recognition')
 # File paths and directory names
-parser.add_argument('--base_dir', default='/ssd_scratch/cvit/sid/', help='Path to the directory for saving models')
+parser.add_argument('--base_dir', default='trained', help='Path to the directory for saving models')
+parser.add_argument('--model_dir', default='EmbedNet_models', help='Name of the folder for saving trained models')
 
 # Various model hyperparameters
 parser.add_argument('--train_percentage', type=float, default=0.8, help='Percentage of data to use for training')
@@ -38,39 +38,35 @@
 parser.add_argument('--hidden_layers', nargs='+', type=int, default=[1024, 512, 256, 128], help='List of input size of the hidden layers')
 
 parser.add_argument('--gpu_id', default=0, type=int, help='Specify which GPU to use')
-parser.add_argument('--image_embeds', default='/ssd_scratch/cvit/sid/embeddings/image_embeds_top_k_allfeatsImg.npy', help='Path to the image embeddings')
-parser.add_argument('--text_embeds', default='/ssd_scratch/cvit/sid/embeddings/top_preds_embeds_all_featsSynth.dat', help='Path to the text embeddigns')
-parser.add_argument('--image_info', default='/ssd_scratch/cvit/sid/EmbedGenFiles/image_embed_top_k_all.txt', help='Path to the file containing word image information')
-parser.add_argument('--text_info', default='/ssd_scratch/cvit/sid/EmbedGenFiles/top_preds_embeds_all_with_confidence.txt', help='Path to the file containing text output information')
+parser.add_argument('--image_embeds', default='embeddings/topk_preds_100featsImg.npy', help='Path to the image embeddings')
+parser.add_argument('--topk_embeds', default='embeddings/topk_preds_100featsSynth.npy', help='Path to the text embeddigns')
+parser.add_argument('--image_file', default='gen_files/image_embed_top_k_100.txt', help='Path to the file containing word image information')
+parser.add_argument('--predictions_file', default='gen_files/top_preds_embeds_100_with_conf.txt', help='Path to the file containing text output information')
 args = parser.parse_args()
 print(args)
 
 print('[INFO] Loading embeddings and text files...')
 image_embeds = np.load(args.image_embeds)
 try:
-    topk_embeds = np.load(args.text_embeds)
+    topk_embeds = np.load(args.topk_embeds)
 except Exception as e:
     print('[INFO] Loading text embeddings in memmap mode...')
-    topk_embeds = np.memmap(args.text_embeds, dtype=np.float32, mode='r', shape=(2109500, 2048))
+    topk_embeds = np.memmap(args.topk_embeds, dtype=np.float32, mode='r', shape=(2109500, 2048))
 
-with open(args.image_info, 'r') as image_file:
-    image_info = image_file.readlines()
-image_info = [item.split()[1] for item in image_info]
+with open(args.image_file, 'r') as image_file:
+    image_file = image_file.readlines()
+image_file = [item.split()[1] for item in image_file]
 
-with open(args.text_info, 'r') as text_file:
-    text_info = text_file.readlines()
-topk_info = [item.split()[1] for item in text_info]
+with open(args.predictions_file, 'r') as text_file:
+    predictions_file = text_file.readlines()
+topk_info = [item.split()[1] for item in predictions_file]
 
 assert args.model_name, "Provide a model name for proceeding"
 epochs = args.epochs
 lr = args.lr
-writer_path = 'logs/' + args.model_name
-writer = SummaryWriter(writer_path)
-model_dir = 'EmbedNet/EmbedNet_models'
-train_list_dir = 'EmbedNet'
-assert os.path.exists(os.path.join(args.base_dir, train_list_dir)), "Train data directory does not exists, create one using data_prep.py"
+model_dir = args.model_dir
 if not os.path.exists(os.path.join(args.base_dir, model_dir)):
-    os.mkdir(os.path.join(args.base_dir, model_dir))
+    os.makedirs(os.path.join(args.base_dir, model_dir))
 
 if torch.cuda.device_count() > 1:    
     torch.cuda.set_device(args.gpu_id)
@@ -115,22 +111,7 @@ def get_dataloaders(train_list):
     return train_data_loader, val_data_loader
 
 
-def calculate_accuracy(model_path):
-    print("[INFO] Calculating current model's accuracy...")
-    temp_model_path = os.path.join(model_path, args.model_name + '_temp.pkl')
-    hidden_string = str(args.hidden_layers).replace(',', ' ').replace('[', '').replace(']', '')
-    try:
-        command = 'python parallel_word_rec_EmbedNet.py --use_model --hidden_layers {} --model_path {}  --testing --test_split 0.75858 > {}.txt'.format(hidden_string, temp_model_path, args.model_name)
-    except Exception as e:
-        print(e)
-        pdb.set_trace()
-    os.system(command)
-    data = open('{}.txt'.format(args.model_name), 'r').readlines()
-    accuracy = data[0].split()[-1]
-    return float(accuracy)
-
-
-triplet = Triplets(topk_info, image_info, topk_embeds, image_embeds, args.train_percentage, args.margin, verbose=True)
+triplet = Triplets(topk_info, image_file, topk_embeds, image_embeds, args.train_percentage, args.margin, verbose=True)
 train_list = triplet.initial_list()
 
 train_data_loader, val_data_loader = get_dataloaders(train_list)
@@ -176,6 +157,8 @@ def calculate_accuracy(model_path):
             anchor = anchor.cuda().double()
             positive = positive.cuda().double()
             negative = negative.cuda().double()
+        else:
+            model = model.double()
         model.zero_grad()
         anchor_ = model(anchor)
         positive_ = model(positive)
@@ -185,8 +168,6 @@ def calculate_accuracy(model_path):
         tr_loss.backward()
         optimizer.step()          
         train_loss_per_epoch += float(tr_loss)
-        writer.add_scalar('Train Loss/Batch', float(tr_loss), train_batch_count)
-    writer.add_scalar('Train Loss/Epoch', train_loss_per_epoch, epoch)
     for data_point in tqdm(val_data_loader, desc='[INFO] Validation'):
         validation_batch_count += 1
         anchor = data_point['anchor']
@@ -203,26 +184,15 @@ def calculate_accuracy(model_path):
         negative_ = model(negative)  
         val_loss = criterion(anchor_, positive_, negative_)
         val_loss_per_epoch += float(val_loss)
-        writer.add_scalar('Validation Loss/Batch', float0(val_loss), validation_batch_count)
-    writer.add_scalar('Validation Loss/Epoch', val_loss_per_epoch, epoch)
-    # Saving model based on the current accuracy
-    save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer, temp=True)
-    updated_accuracy = calculate_accuracy(os.path.join(args.base_dir, model_dir))
-    writer.add_scalar('Word Accuracy/Epoch', updated_accuracy, epoch)
-    if updated_accuracy > accuracy:
-        accuracy = updated_accuracy
-        save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer, accuracy=True)
-        model_saved_epoch = epoch + 1
     if val_loss_per_epoch < base_valid:
         base_valid = val_loss_per_epoch
         save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer)
-    print('[INFO] Train Loss {}, validation loss {} accuracy {}.'.format(round(train_loss_per_epoch, 3), round(val_loss_per_epoch, 3), round(updated_accuracy, 3)))
+    print('[INFO] Train Loss {}, validation loss {}.'.format(round(train_loss_per_epoch, 3), round(val_loss_per_epoch, 3)))
     if (epoch + 1) - model_saved_epoch >= 5:
         print('[INFO] Updating the train and validation list...')
-        updated_list, new_hard_neg_number = triplet.EmbedNet_embeds(model, 128)
+        updated_list, new_hard_neg_number = triplet.embednet_embeds(model, 128)
         if new_hard_neg_number < old_hard_neg_number:
             save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer, hard=True)
             old_hard_neg_number = new_hard_neg_number
         train_data_loader, val_data_loader = get_dataloaders(updated_list)
         model_saved_epoch = epoch + 1
-    writer.add_scalars('Training Curves', {'Train Loss': train_loss_per_epoch, 'Validation Loss': val_loss_per_epoch}, epoch)
diff --git a/src/models.py b/src/models.py
index 6a72296..c33f0b6 100644
--- a/src/models.py
+++ b/src/models.py
@@ -48,8 +48,6 @@ def __init__(self, in_features, out_features, hidden_layers=[1024, 512, 256, 128
         self.hidden_layers = hidden_layers
         self.layers = nn.ModuleList()
         current_dim = self.in_features
-        # if self.hidden_layers[-1] != out_features:
-        #     print('[INFO] Last hidden layer output and final layer output is different.')
         for hidden_dim in self.hidden_layers:
             self.layers.append(nn.Linear(current_dim, hidden_dim))
             current_dim = hidden_dim
diff --git a/src/online_triplets.py b/src/triplets.py
similarity index 100%
rename from src/online_triplets.py
rename to src/triplets.py
diff --git a/src/word_rec_EmbedNet.py b/src/word_rec_EmbedNet.py
index c389a8e..b416ea5 100644
--- a/src/word_rec_EmbedNet.py
+++ b/src/word_rec_EmbedNet.py
@@ -158,16 +158,3 @@ def get_EmbedNet_embed(input_embedding):
 accuracyList = [round(item, 3) for item in accuracyList]
 print('[INFO] Top {} accuracies are: {}.'.format(len(accuracyList), accuracyList))
 print('[INFO] Number of words tested on {}.'.format(total))
-
-# Command using for generating final new results (02/12/20)
-# python3 src/word_rec_EmbedNet.py --image_embeds embeddings/topk_preds_100featsImg.npy --topk_embeds embeddings/topk_preds_100featsSynth.npy --predictions_file gen_files/top_preds_embeds_100_with_conf.txt --image_file gen_files/image_embed_top_k_100.txt --use_model  --model_path /ssd_scratch/cvit/sid/WNet1AdamLR000001EXTOnGen1MarginNoConfidence240620.pkl --hidden_layers 1024 --test_split 1 --testing
-# Command updated on 03/12/20
-# python3 src/word_rec_EmbedNet.py  --use_model  --hidden_layers 1024
-# Command for running baseline model
-# python3 src/word_rec_EmbedNet.py
-# Command for running model using the confidence scores
-# python3 src/word_rec_EmbedNet.py --use_confidence
-# Command for running model using the EmbedNet
-# python3 src/word_rec_EmbedNet.py --use_confidence --use_model --hidden_layers 1024
-# Command for running model using EmbedNet and CAB
-# python3 src/word_rec_EmbedNet.py --use_confidence --use_model --hidden_layers 1024 --cab