Update README for training instructions

Sid2697 · Dec 3, 2020 · fc2790e · fc2790e
1 parent 33ea9e0
commit fc2790e
Show file tree

Hide file tree

Showing 7 changed files with 75 additions and 225 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .DS_Store
 .vscode/
 */__pycache__/*
+*.pyc
diff --git a/README.md b/README.md
@@ -41,12 +41,18 @@ The deep embeddings used in this work are generated using the End2End network pr
 ```
 Krishnan, P., Dutta, K., Jawahar, C.V.: Word spotting and recognition using deep embedding. In: 2018 13th IAPR International Workshop on Document Analysis Systems (DAS). pp. 1–6 (April 2018). https://doi.org/10.1109/DAS.2018.70
 ```
-Word text and image's deep embeddings for testing this repository are provided in the ```embeddings``` folder.
+Word text and image's deep embeddings for testing this repository are provided in the ``embeddings`` folder.
 Text files containing the information about the embeddings are required while running the code. They are in the format<br>
-```<img1-path><space><text1-string><space><dummyInt><space>1```<br>
-```<img2-path><space><text2-string><space><dummyInt><space>1```<br>
-...<br>
-Corresponding text files for testing this repository are provided in the ``gen_files`` folder.
+```
+<img1-path><space><text1-string><space><dummyInt><space>1
+<img2-path><space><text2-string><space><dummyInt><space>1
+...
+```
+One can refer to and use [https://github.com/kris314/hwnet](https://github.com/kris314/hwnet) for generating embeddings.
+
+For the purpose of making it easier to explore the code in this repository, sample text files and embeddings are provided in ``gen_files`` and ``embeddings``, respectively.
+
+Original Dataset used in this work will be released by [CVIT](http://cvit.iiit.ac.in) soon.
 
 ### Performing word recognition (using a pre-trained EmbedNet)
 Pre-trained EmbedNet models are saved in the ``models`` folder.<br>
@@ -102,7 +108,47 @@ Other arguments for word recognition experiment are:
 - `k` total number of predictions to test on (max 20)
 
 ### Training EmbedNet
-TODO
+Prepare text files and embeddings as mentioned in [Generating/using deep embeddings](#Generating/using-deep-embeddings). Refer files in folder ``gen_files`` for text file's examples. Once the embeddings are prepared run the following command
+```sh
+python src/EmbedNet_train.py --model_name provide_a_name_of_your_choice
+```
+For the purpose of a demonstration, you can run the following command
+```sh
+python src/EmbedNet_train.py --model_name temp
+```
+This will start training an EmbedNet for 1000 epochs and save the models in `trained/EmbedNet_models`.
+
+Other arguments for `EmbedNet_train.py` are:
+```sh
+--base_dir
+--model_dir
+--train_percentage
+--epochs
+--lr
+--batch
+--model_name
+--margin
+--hidden_layers
+--gpu_id
+--image_embeds
+--topk_embeds
+--image_file
+--predictions_file
+```
+- `base_dir` is a path to the directory for saving models
+- `model_dir` is a name of the folder for saving trained models
+- `train_percentage` percentage of data to use for training
+- `epochs` number of epochs to train for
+- `lr` learning rate
+- `batch` batch size
+- `model_name` name of the model for saving
+- `margin` triplet loss margin
+- `hidden_layers` list of input size of the hidden layers
+- `gpu_id` specify which GPU to use
+- `image_embeds` is used to provide path to the image embeddings
+- `topk_embeds` is used to provide path to the TopK predictions' embeddings
+- `image_file` is used to provide path to the image's text information file
+- `predictions_file` is used to provide path to the TopK predictions' text information file
 
 License and Citation
 ---------------------

diff --git a/src/EmbedNet_dataprep.py b/src/EmbedNet_dataprep.py
diff --git a/src/EmbedNet.py → src/EmbedNet_train.py b/src/EmbedNet.py → src/EmbedNet_train.py
@@ -15,18 +15,18 @@
 import torch.nn as nn
 from tqdm import tqdm
 from models import EmbedNet
-from online_triplets import Triplets
+from triplets import Triplets
 import torch.nn.functional as F
 import torch.utils.data as data
 from torch.utils.data import DataLoader
-from torch.utils.tensorboard import SummaryWriter
 
 torch.backends.cudnn.enabled = False
 torch.backends.cudnn.benchmark = False
 
 parser = argparse.ArgumentParser(description='Neural Networks for word recognition')
 # File paths and directory names
-parser.add_argument('--base_dir', default='/ssd_scratch/cvit/sid/', help='Path to the directory for saving models')
+parser.add_argument('--base_dir', default='trained', help='Path to the directory for saving models')
+parser.add_argument('--model_dir', default='EmbedNet_models', help='Name of the folder for saving trained models')
 
 # Various model hyperparameters
 parser.add_argument('--train_percentage', type=float, default=0.8, help='Percentage of data to use for training')
@@ -38,39 +38,35 @@
 parser.add_argument('--hidden_layers', nargs='+', type=int, default=[1024, 512, 256, 128], help='List of input size of the hidden layers')
 
 parser.add_argument('--gpu_id', default=0, type=int, help='Specify which GPU to use')
-parser.add_argument('--image_embeds', default='/ssd_scratch/cvit/sid/embeddings/image_embeds_top_k_allfeatsImg.npy', help='Path to the image embeddings')
-parser.add_argument('--text_embeds', default='/ssd_scratch/cvit/sid/embeddings/top_preds_embeds_all_featsSynth.dat', help='Path to the text embeddigns')
-parser.add_argument('--image_info', default='/ssd_scratch/cvit/sid/EmbedGenFiles/image_embed_top_k_all.txt', help='Path to the file containing word image information')
-parser.add_argument('--text_info', default='/ssd_scratch/cvit/sid/EmbedGenFiles/top_preds_embeds_all_with_confidence.txt', help='Path to the file containing text output information')
+parser.add_argument('--image_embeds', default='embeddings/topk_preds_100featsImg.npy', help='Path to the image embeddings')
+parser.add_argument('--topk_embeds', default='embeddings/topk_preds_100featsSynth.npy', help='Path to the text embeddigns')
+parser.add_argument('--image_file', default='gen_files/image_embed_top_k_100.txt', help='Path to the file containing word image information')
+parser.add_argument('--predictions_file', default='gen_files/top_preds_embeds_100_with_conf.txt', help='Path to the file containing text output information')
 args = parser.parse_args()
 print(args)
 
 print('[INFO] Loading embeddings and text files...')
 image_embeds = np.load(args.image_embeds)
 try:
-    topk_embeds = np.load(args.text_embeds)
+    topk_embeds = np.load(args.topk_embeds)
 except Exception as e:
     print('[INFO] Loading text embeddings in memmap mode...')
-    topk_embeds = np.memmap(args.text_embeds, dtype=np.float32, mode='r', shape=(2109500, 2048))
+    topk_embeds = np.memmap(args.topk_embeds, dtype=np.float32, mode='r', shape=(2109500, 2048))
 
-with open(args.image_info, 'r') as image_file:
-    image_info = image_file.readlines()
-image_info = [item.split()[1] for item in image_info]
+with open(args.image_file, 'r') as image_file:
+    image_file = image_file.readlines()
+image_file = [item.split()[1] for item in image_file]
 
-with open(args.text_info, 'r') as text_file:
-    text_info = text_file.readlines()
-topk_info = [item.split()[1] for item in text_info]
+with open(args.predictions_file, 'r') as text_file:
+    predictions_file = text_file.readlines()
+topk_info = [item.split()[1] for item in predictions_file]
 
 assert args.model_name, "Provide a model name for proceeding"
 epochs = args.epochs
 lr = args.lr
-writer_path = 'logs/' + args.model_name
-writer = SummaryWriter(writer_path)
-model_dir = 'EmbedNet/EmbedNet_models'
-train_list_dir = 'EmbedNet'
-assert os.path.exists(os.path.join(args.base_dir, train_list_dir)), "Train data directory does not exists, create one using data_prep.py"
+model_dir = args.model_dir
 if not os.path.exists(os.path.join(args.base_dir, model_dir)):
-    os.mkdir(os.path.join(args.base_dir, model_dir))
+    os.makedirs(os.path.join(args.base_dir, model_dir))
 
 if torch.cuda.device_count() > 1:    
     torch.cuda.set_device(args.gpu_id)
@@ -115,22 +111,7 @@ def get_dataloaders(train_list):
     return train_data_loader, val_data_loader
 
 
-def calculate_accuracy(model_path):
-    print("[INFO] Calculating current model's accuracy...")
-    temp_model_path = os.path.join(model_path, args.model_name + '_temp.pkl')
-    hidden_string = str(args.hidden_layers).replace(',', ' ').replace('[', '').replace(']', '')
-    try:
-        command = 'python parallel_word_rec_EmbedNet.py --use_model --hidden_layers {} --model_path {}  --testing --test_split 0.75858 > {}.txt'.format(hidden_string, temp_model_path, args.model_name)
-    except Exception as e:
-        print(e)
-        pdb.set_trace()
-    os.system(command)
-    data = open('{}.txt'.format(args.model_name), 'r').readlines()
-    accuracy = data[0].split()[-1]
-    return float(accuracy)
-
-
-triplet = Triplets(topk_info, image_info, topk_embeds, image_embeds, args.train_percentage, args.margin, verbose=True)
+triplet = Triplets(topk_info, image_file, topk_embeds, image_embeds, args.train_percentage, args.margin, verbose=True)
 train_list = triplet.initial_list()
 
 train_data_loader, val_data_loader = get_dataloaders(train_list)
@@ -176,6 +157,8 @@ def calculate_accuracy(model_path):
             anchor = anchor.cuda().double()
             positive = positive.cuda().double()
             negative = negative.cuda().double()
+        else:
+            model = model.double()
         model.zero_grad()
         anchor_ = model(anchor)
         positive_ = model(positive)
@@ -185,8 +168,6 @@ def calculate_accuracy(model_path):
         tr_loss.backward()
         optimizer.step()          
         train_loss_per_epoch += float(tr_loss)
-        writer.add_scalar('Train Loss/Batch', float(tr_loss), train_batch_count)
-    writer.add_scalar('Train Loss/Epoch', train_loss_per_epoch, epoch)
     for data_point in tqdm(val_data_loader, desc='[INFO] Validation'):
         validation_batch_count += 1
         anchor = data_point['anchor']
@@ -203,26 +184,15 @@ def calculate_accuracy(model_path):
         negative_ = model(negative)  
         val_loss = criterion(anchor_, positive_, negative_)
         val_loss_per_epoch += float(val_loss)
-        writer.add_scalar('Validation Loss/Batch', float0(val_loss), validation_batch_count)
-    writer.add_scalar('Validation Loss/Epoch', val_loss_per_epoch, epoch)
-    # Saving model based on the current accuracy
-    save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer, temp=True)
-    updated_accuracy = calculate_accuracy(os.path.join(args.base_dir, model_dir))
-    writer.add_scalar('Word Accuracy/Epoch', updated_accuracy, epoch)
-    if updated_accuracy > accuracy:
-        accuracy = updated_accuracy
-        save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer, accuracy=True)
-        model_saved_epoch = epoch + 1
     if val_loss_per_epoch < base_valid:
         base_valid = val_loss_per_epoch
         save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer)
-    print('[INFO] Train Loss {}, validation loss {} accuracy {}.'.format(round(train_loss_per_epoch, 3), round(val_loss_per_epoch, 3), round(updated_accuracy, 3)))
+    print('[INFO] Train Loss {}, validation loss {}.'.format(round(train_loss_per_epoch, 3), round(val_loss_per_epoch, 3)))
     if (epoch + 1) - model_saved_epoch >= 5:
         print('[INFO] Updating the train and validation list...')
-        updated_list, new_hard_neg_number = triplet.EmbedNet_embeds(model, 128)
+        updated_list, new_hard_neg_number = triplet.embednet_embeds(model, 128)
         if new_hard_neg_number < old_hard_neg_number:
             save_checkpoint(os.path.join(args.base_dir, model_dir), epoch, model, optimizer, hard=True)
             old_hard_neg_number = new_hard_neg_number
         train_data_loader, val_data_loader = get_dataloaders(updated_list)
         model_saved_epoch = epoch + 1
-    writer.add_scalars('Training Curves', {'Train Loss': train_loss_per_epoch, 'Validation Loss': val_loss_per_epoch}, epoch)
diff --git a/src/models.py b/src/models.py
@@ -48,8 +48,6 @@ def __init__(self, in_features, out_features, hidden_layers=[1024, 512, 256, 128
         self.hidden_layers = hidden_layers
         self.layers = nn.ModuleList()
         current_dim = self.in_features
-        # if self.hidden_layers[-1] != out_features:
-        #     print('[INFO] Last hidden layer output and final layer output is different.')
         for hidden_dim in self.hidden_layers:
             self.layers.append(nn.Linear(current_dim, hidden_dim))
             current_dim = hidden_dim

diff --git a/src/online_triplets.py → src/triplets.py b/src/online_triplets.py → src/triplets.py