From e8b239045f7e5d950a1e47804026ff75eaf0d18d Mon Sep 17 00:00:00 2001
From: "Zengyi.Qin" <Zengyi.Qin@outlook.com>
Date: Fri, 8 Nov 2019 10:00:45 -0500
Subject: [PATCH] update

---
 keypoints/cvae/build.py                       | 296 ---------------
 keypoints/cvae/decoder.py                     |  71 ++--
 keypoints/cvae/discriminator.py               |  56 ---
 keypoints/cvae/encoder.py                     |  44 ---
 keypoints/cvae/network.py                     |  44 +++
 keypoints/cvae/reader.py                      | 346 +++++++-----------
 keypoints/cvae/sort.py                        |  76 ++--
 keypoints/main.py                             |   9 -
 keypoints/merge.py                            |  26 +-
 .../utils/grasp/make_inputs_multiproc.py      |  34 +-
 .../utils/keypoint/make_inputs_multiproc.py   |  32 +-
 keypoints/utils/visualize/visualize.py        |   1 +
 setup.py                                      |  18 -
 13 files changed, 320 insertions(+), 733 deletions(-)
 delete mode 100644 setup.py

diff --git a/keypoints/cvae/build.py b/keypoints/cvae/build.py
index 96c5f18..c5b35dc 100644
--- a/keypoints/cvae/build.py
+++ b/keypoints/cvae/build.py
@@ -1301,141 +1301,6 @@ def train_vae_keypoint(data_path,
                                task_name, str(step).zfill(6)))
 
 
-def train_vae_action(data_path,
-                     steps=120000,
-                     batch_size=256,
-                     eval_size=128,
-                     l2_weight=1e-6,
-                     log_step=20,
-                     eval_step=4000,
-                     save_step=1000,
-                     model_path=None,
-                     task_name='task',
-                     optimizer='Adam'):
-    """Trains the VAE for action generation. This is only for
-       the End-to-End baseline where the actions are directly
-       predicted from the visual observation.
-    
-    Args:
-        data_path: The training data in a single h5df file.
-        steps: The total number of training steps.
-        batch_size: The training batch size.
-        eval_size: The evaluation batch size.
-        l2_weight: The L2 regularization weight.
-        log_step: The interval for logging.
-        eval_step: The interval for evaluation.
-        save_step: The interval for saving the model weights.
-        model_path: The pretrained model.
-        task_name: The name of the task to be trained on. This
-            is only for distinguishing the name of log files and
-            the saved model.
-        optimizer: Adam or SGDM.
-
-    Returns:
-        None.
-    """
-    loader = ActionReader(data_path)
-
-    graph = build_action_training_graph()
-    learning_rate = tf.placeholder(tf.float32, shape=())
-
-    point_cloud_tf = graph['point_cloud_tf']
-    grasp_point_tf = graph['grasp_point_tf']
-    translation_tf = graph['translation_tf']
-    rotation_tf = graph['rotation_tf']
-
-    loss_vae_grasp = graph['loss_vae_grasp']
-    loss_vae_trans = graph['loss_vae_trans']
-    loss_vae_rot = graph['loss_vae_rot']
-    loss_vae_mmd = graph['loss_vae_mmd'] * 0.005
-
-    z_mean = graph['z_mean']
-    z_std = graph['z_std']
-
-    std_gt_grasp = graph['std_gt_grasp']
-    std_gt_trans = graph['std_gt_trans']
-    std_gt_rot = graph['std_gt_rot']
-
-    weight_loss = [tf.nn.l2_loss(var) for var
-                   in tf.trainable_variables()]
-    weight_loss = tf.reduce_sum(weight_loss) * l2_weight
-
-    loss_vae = (loss_vae_grasp + loss_vae_trans +
-                loss_vae_rot + loss_vae_mmd)
-    loss = weight_loss + loss_vae
-
-    if optimizer == 'Adam':
-        train_op = tf.train.AdamOptimizer(
-            learning_rate=learning_rate).minimize(loss)
-    elif optimizer == 'SGDM':
-        train_op = tf.train.MomentumOptimizer(
-            learning_rate=learning_rate,
-            momentum=0.9).minimize(loss)
-    else:
-        raise NotImplementedError
-
-    all_vars = tf.get_collection_ref(
-        tf.GraphKeys.GLOBAL_VARIABLES)
-    var_list_vae = [var for var in all_vars
-                    if 'vae_action' in var.name and
-                       'Momentum' not in var.name and
-                       'Adam' not in var.name]
-
-    saver = tf.train.Saver(var_list=var_list_vae)
-
-    config = tf.ConfigProto()
-    config.gpu_options.allow_growth = True
-    config.allow_soft_placement = True
-
-    with tf.Session(config=config) as sess:
-        sess.run([tf.global_variables_initializer()])
-        if model_path:
-            running_log.write('vae_action_{}'.format(task_name),
-                              'loading model from {}'.format(model_path))
-            saver.restore(sess, model_path)
-
-        for step in range(steps + 1):
-            pos_p_np, pos_a_np = loader.sample_pos_train(batch_size)
-
-            pos_grasp_np, pos_trans_np, pos_rot_np = np.split(
-                pos_a_np, [3, 5], axis=1)
-            pos_rot_np = np.concatenate([np.cos(pos_rot_np),
-                                         np.sin(pos_rot_np)], axis=1)
-
-            feed_dict = {point_cloud_tf: pos_p_np,
-                         grasp_point_tf: pos_grasp_np,
-                         translation_tf: pos_trans_np,
-                         rotation_tf: pos_rot_np,
-                         learning_rate: get_learning_rate(step, steps)}
-
-            [_, loss_np, vae_grasp, vae_trans, vae_rot, vae_mmd, weight,
-             std_gt_grasp_np, std_gt_trans_np, std_gt_rot_np,
-             z_mean_np, z_std_np] = sess.run([
-                 train_op, loss, loss_vae_grasp,
-                 loss_vae_trans, loss_vae_rot, loss_vae_mmd,
-                 weight_loss, std_gt_grasp, std_gt_trans, std_gt_rot,
-                 z_mean, z_std],
-                feed_dict=feed_dict)
-
-            if step % log_step == 0:
-                running_log.write('vae_action_{}'.format(task_name),
-                                  'step: {}/{}, '.format(step, steps) +
-                                  'loss: {:.3f}, grasp: {:.3f}/{:.3f}, '.format(
-                                      loss_np, vae_grasp, std_gt_grasp_np) +
-                                  'trans: {:.3f}/{:.3f}, '.format(
-                                      vae_trans, std_gt_trans_np) +
-                                  'rot: {:.3f}/{:.3f}, '.format(
-                                      vae_rot, std_gt_rot_np) +
-                                  'mmd: {:.3f} ({:.3f} {:.3f}), '.format(
-                                      vae_mmd, z_mean_np, z_std_np))
-
-            if step > 0 and step % save_step == 0:
-                makedir('./runs/vae')
-                saver.save(sess,
-                           './runs/vae/vae_action_{}_{}'.format(
-                               task_name, str(step).zfill(6)))
-
-
 def train_gcnn_grasp(data_path,
                      steps=60000,
                      batch_size=256,
@@ -1606,44 +1471,6 @@ def load_samples(loader, batch_size, stage, noise_level=0.2):
     return p_np, grasp_np, funct_np, funct_vect_np, label_np
 
 
-def load_samples_action(loader, batch_size, stage):
-    """Loads the training and evaluation data.
-    
-    Args: 
-        loader: A Reader instance.
-        batch_size: The training or evaluation batch size.
-        stage: 'train' or 'val'.
-        
-    Return:
-        p_np: A numpy array of point cloud.
-        grasp_np: A numpy array of grasp point.
-        trans_np: The translation part of the action.
-        rot_np: The rotation part of the action.
-        label: The binary success label of the actions given the 
-            point cloud as visual observation.
-    """
-    if stage == 'train':
-        pos_p_np, pos_a_np = loader.sample_pos_train(batch_size // 2)
-        neg_p_np, neg_a_np = loader.sample_neg_train(batch_size // 2)
-    elif stage == 'val':
-        pos_p_np, pos_a_np = loader.sample_pos_val(batch_size // 2)
-        neg_p_np, neg_a_np = loader.sample_neg_val(batch_size // 2)
-    else:
-        raise NotImplementedError
-
-    num_pos, num_neg = pos_p_np.shape[0], neg_p_np.shape[0]
-    label_np = np.concatenate(
-        [np.ones(shape=(num_pos, 1)),
-         np.zeros(shape=(num_neg, 1))],
-        axis=0).astype(np.float32)
-    p_np = np.concatenate([pos_p_np, neg_p_np], axis=0)
-    a_np = np.concatenate([pos_a_np, neg_a_np], axis=0)
-    grasp_np, trans_np, rot_np = np.split(
-        a_np, [3, 5], axis=1)
-    rot_np = np.concatenate([np.cos(rot_np), np.sin(rot_np)], axis=1)
-    return p_np, grasp_np, trans_np, rot_np, label_np
-
-
 def train_discr_keypoint(data_path,
                          steps=120000,
                          batch_size=128,
@@ -1772,129 +1599,6 @@ def train_discr_keypoint(data_path,
                                           noise_level, acc_np * 100))
 
 
-def train_discr_action(data_path,
-                       steps=120000,
-                       batch_size=128,
-                       eval_size=128,
-                       l2_weight=1e-6,
-                       log_step=20,
-                       eval_step=1000,
-                       save_step=1000,
-                       model_path=None,
-                       task_name='task',
-                       optimizer='Adam'):
-    """Trains the action evaluation network.
-    
-    Args:
-        data_path: The training data in a single h5df file.
-        steps: The total number of training steps.
-        batch_size: The training batch size.
-        eval_size: The evaluation batch size.
-        l2_weight: The L2 regularization weight.
-        log_step: The interval for logging.
-        eval_step: The interval for evaluation.
-        save_step: The interval for saving the model weights.
-        model_path: The pretrained model.
-        task_name: The name of the task to be trained on. This
-            is only for distinguishing the name of log files and
-            the saved model.
-        optimizer: 'Adam' or 'SGDM'.
-
-    Returns:
-        None.
-    """
-    loader = ActionReader(data_path)
-
-    graph = build_action_training_graph()
-    learning_rate = tf.placeholder(tf.float32, shape=())
-
-    point_cloud_tf = graph['point_cloud_tf']
-    grasp_point_tf = graph['grasp_point_tf']
-    translation_tf = graph['translation_tf']
-    rotation_tf = graph['rotation_tf']
-
-    actions_label_tf = graph['actions_label_tf']
-    loss_discr = graph['loss_discr']
-    acc_discr = graph['acc_discr']
-
-    weight_loss = [tf.nn.l2_loss(var) for var
-                   in tf.trainable_variables()]
-    weight_loss = tf.reduce_sum(weight_loss) * l2_weight
-
-    loss = weight_loss + loss_discr
-
-    if optimizer == 'Adam':
-        train_op = tf.train.AdamOptimizer(
-            learning_rate=learning_rate).minimize(loss)
-    elif optimizer == 'SGDM':
-        train_op = tf.train.MomentumOptimizer(
-            learning_rate=learning_rate,
-            momentum=0.9).minimize(loss)
-    else:
-        raise NotImplementedError
-
-    all_vars = tf.get_collection_ref(
-        tf.GraphKeys.GLOBAL_VARIABLES)
-    var_list_vae = [var for var in all_vars
-                    if 'action_discriminator' in var.name and
-                       'Momentum' not in var.name and
-                       'Adam' not in var.name]
-
-    saver = tf.train.Saver(var_list=var_list_vae)
-    config = tf.ConfigProto()
-    config.gpu_options.allow_growth = True
-    config.allow_soft_placement = True
-
-    with tf.Session(config=config) as sess:
-        sess.run([tf.global_variables_initializer()])
-        if model_path:
-            running_log.write('discr_action_{}'.format(task_name),
-                              'loading model from {}'.format(model_path))
-            saver.restore(sess, model_path)
-
-        for step in range(steps + 1):
-            p_np, grasp_np, trans_np, rot_np, label_np = load_samples_action(
-                loader, batch_size, 'train')
-            feed_dict = {point_cloud_tf: p_np,
-                         grasp_point_tf: grasp_np,
-                         translation_tf: trans_np,
-                         rotation_tf: rot_np,
-                         actions_label_tf: label_np,
-                         learning_rate: get_learning_rate(step, steps)}
-
-            [_, loss_np, acc_np, weight
-             ] = sess.run([
-                 train_op, loss, acc_discr, weight_loss],
-                feed_dict=feed_dict)
-
-            if step % log_step == 0:
-                running_log.write('discr_action_{}'.format(task_name),
-                                  'step: {}/{}, '.format(step, steps) +
-                                  'loss: {:.3f}, acc: {:.3f}'.format(
-                                      loss_np, acc_np * 100))
-
-            if step > 0 and step % save_step == 0:
-                saver.save(sess,
-                           './runs/discr/discr_action_{}_{}'.format(
-                               task_name, str(step).zfill(6)))
-
-            if step > 0 and step % eval_step == 0:
-                for noise_level in [0.1, 0.2, 0.4, 0.8]:
-                    [p_np, grasp_np, trans_np,
-                        rot_np, label_np] = load_samples_action(
-                        loader, batch_size, 'train')
-                    feed_dict = {point_cloud_tf: p_np,
-                                 grasp_point_tf: grasp_np,
-                                 translation_tf: trans_np,
-                                 rotation_tf: rot_np,
-                                 actions_label_tf: label_np}
-
-                    [acc_np] = sess.run([acc_discr], feed_dict=feed_dict)
-                    running_log.write('discr_action_{}'.format(task_name),
-                                      'noise: {:.3f}, acc: {:.3f}'.format(
-                                          noise_level, acc_np * 100))
-
-
 def inference_grasp(data_path,
                     model_path,
                     batch_size=128,
diff --git a/keypoints/cvae/decoder.py b/keypoints/cvae/decoder.py
index 2a1b678..e813cf7 100644
--- a/keypoints/cvae/decoder.py
+++ b/keypoints/cvae/decoder.py
@@ -1,10 +1,21 @@
+"""The decoders of the variational encoders (VAE)."""
 import tensorflow as tf
 from cvae.network import Network
 
 
 class GraspDecoder(Network):
+    """The grasp decoder."""
 
     def build_model(self, x, z):
+        """Builds the model graph.
+
+        Args: 
+            x: The point cloud tensor.
+            z: The latent code parameters.
+
+        Returns:
+            g: The generated grasps.
+        """
         with tf.variable_scope('vae_grasp_decoder',
                                reuse=tf.AUTO_REUSE):
 
@@ -56,9 +67,25 @@ def build_model(self, x, z):
 
 
 class KeypointDecoder(Network):
+    """The keypoint decoder."""
 
     def build_model(self, x, z, nv=0, 
                     truncated_normal=False):
+        """Builds the model graph.
+
+        Args:
+            x: The point cloud tensor.
+            z: The latent code parameters.
+            nv: The number of vectors pointing from
+                the function point to the effect point.
+            truncated_normal: Whether to use the truncated
+                normal distribution to sample random seeds.
+
+        Returns:
+            keypoints: The generated grasp point and function point.
+            funct_vect: The generated vectors pointing from the 
+                function point to the effect point.
+        """
         with tf.variable_scope('vae_keypoint_decoder',
                                reuse=tf.AUTO_REUSE):
 
@@ -123,47 +150,3 @@ def build_model(self, x, z, nv=0,
                          functional_keypoints]
             keypoints = [tf.squeeze(k, 2) for k in keypoints]
         return keypoints, funct_vect
-
-
-class ActionDecoder(Network):
-
-    def build_model(self, x, z):
-        with tf.variable_scope('vae_action_decoder',
-                               reuse=tf.AUTO_REUSE):
-            miu, sigma = tf.split(z, 2, axis=1)
-            z = miu + sigma * tf.random.truncated_normal(
-                    tf.shape(sigma), 0.0, 1.0)
-            _, z_dim = z.get_shape().as_list()
-            z = tf.reshape(z, [-1, 1, 1, z_dim])
-
-            mean_x = tf.reduce_mean(x,
-                                    axis=1, keepdims=True)
-            x = x - mean_x
-            p = x
-
-            x = self.conv_layer(x, 16, name='conv1_1')
-            x = self.conv_layer(x, 16, name='conv1_2')
-            x = self.concat_xz(x, z)
-
-            x = self.conv_layer(x, 32, name='conv2_1')
-            x = self.conv_layer(x, 32, name='conv2_2')
-            x = self.concat_xz(x, z)
-
-            x = self.conv_layer(x, 64, name='conv3_1')
-            x = self.conv_layer(x, 256, name='conv3_2')
-            x, p = self.down_sample(x, p, 64, 0.7, 'down_sample')
-
-            x_a = self.conv_layer(x, 256, name='conv6_1')
-            x_a = tf.reduce_max(x_a, axis=1, keepdims=True)
-            grasp = self.conv_layer(x_a, 3, name='conv6_2',
-                                    linear=True) + mean_x
-            trans = self.conv_layer(x_a, 2, name='conv6_3',
-                                    linear=True)
-            rot = self.conv_layer(x_a, 2, name='conv6_4',
-                                  linear=True)
-            grasp = tf.squeeze(grasp, axis=[1, 2])
-            trans = tf.squeeze(trans, axis=[1, 2])
-            rot = tf.squeeze(rot, axis=[1, 2])
-
-        return grasp, trans, rot
-
diff --git a/keypoints/cvae/discriminator.py b/keypoints/cvae/discriminator.py
index 5fdf2da..3fa2801 100644
--- a/keypoints/cvae/discriminator.py
+++ b/keypoints/cvae/discriminator.py
@@ -118,61 +118,5 @@ def build_model(self, x, ks, v=None):
 
             return p
 
-
-class ActionDiscriminator(Network):
-    """The action evaluation network."""
-
-    def build_model(self, x, ats):
-        """Builds the model graph.
-            
-        Args: 
-            x: The point cloud tensor.
-            ats: The grasp, translation and rotation of the action.
-
-        Returns:
-            p: The action evaluation score. A higher
-                score indicates a higher quality of the
-                predicted action given the point cloud.
-        """
-        with tf.variable_scope('action_discriminator',
-                               reuse=tf.AUTO_REUSE):
-            grasp, trans, rot = ats
-
-            x_g = x - tf.reshape(grasp, [-1, 1, 1, 3])
-
-            vx, vy = tf.split(rot, 2, axis=1)
-            rz = tf.atan2(vy, vx)
-            zero = tf.zeros_like(rz)
-            pose_t = tf.concat(
-                    [zero, zero, zero, zero, zero, rz], axis=1)
-            x_t = tf.expand_dims(
-                  align(tf.squeeze(x_g, 2), pose_t), 2)
-            x_t = x_t + tf.reshape(
-                    tf.concat([trans, zero], axis=1), [-1, 1, 1, 3])
-
-            x = tf.concat([x_g, x_t], axis=1)
-
-            x = self.conv_layer(x, 16, name='conv1_1')
-            x = self.conv_layer(x, 16, name='conv1_2')
-
-            x = self.conv_layer(x, 32, name='conv2_1')
-            x = self.conv_layer(x, 32, name='conv2_2')
-
-            x = self.conv_layer(x, 64, name='conv3_1')
-            x = self.conv_layer(x, 64, name='conv3_2')
-
-            x = self.conv_layer(x, 512, name='conv4_1')
-
-            x_g, x_t = tf.split(x, 2, axis=1)
-
-            x_g = tf.reduce_max(x_g, [1, 2], keepdims=False)
-            x_t = tf.reduce_max(x_t, [1, 2], keepdims=False)
-
-            x = tf.concat([x_g, x_t], axis=1)
-
-            x = self.fc_layer(x, 256, name='fc1')
-            x = self.fc_layer(x, 256, name='fc2')
-            p = self.fc_layer(x, 1, linear=True, name='out')
-
             return p
 
diff --git a/keypoints/cvae/encoder.py b/keypoints/cvae/encoder.py
index d29b2fe..7f255eb 100644
--- a/keypoints/cvae/encoder.py
+++ b/keypoints/cvae/encoder.py
@@ -123,50 +123,6 @@ def build_model(self, x, ks, v=None):
             z = tf.concat([miu, sigma_sp], axis=1)
         return z
 
-
-class ActionEncoder(Network):
-
-    def build_model(self, x, ats):
-        """Builds the vae keypoint encoder
-
-        Args:
-            x: (B, N, 1, 3) Input point cloud
-            ats: [(B, 1, 3), (B, 1, 3), (B, 1, 3)] Actions
-
-        Returns:
-            z: (B, 2*D) Mean and var of the latent
-            variable with dimension D
-
-        """
-
-        with tf.variable_scope('vae_action_encoder',
-                               reuse=tf.AUTO_REUSE):
-
-            mean_x = tf.reduce_mean(x,
-                                    axis=1, keepdims=True)
-            x = x - mean_x
-            mean_r = tf.reduce_mean(
-                tf.linalg.norm(
-                    x, axis=3, keepdims=True),
-                axis=1, keepdims=True)
-            x = x / (mean_r + 1e-6)
-
-            grasp, trans, rot = ats
-            grasp = grasp - tf.squeeze(mean_x, [1, 2])
-            a = tf.concat([grasp, trans, rot], axis=1)
-
-            x = self.conv_layer(x, 16, name='conv1_1')
-            x = self.conv_layer(x, 16, name='conv1_2')
-
-            x = self.conv_layer(x, 32, name='conv2_1')
-            x = self.conv_layer(x, 32, name='conv2_2')
-
-            x = self.conv_layer(x, 32, name='conv5_1')
-            x = self.conv_layer(x, 256, name='conv5_2')
-
-            x = tf.reduce_max(x, axis=[1, 2], keepdims=False)
-            x = tf.concat([x, a], axis=1)
-
             x = self.fc_layer(x, 256, name='fc1')
             x = self.fc_layer(x, 256, name='fc2')
             z = self.fc_layer(x, 4, linear=True, name='out')
diff --git a/keypoints/cvae/network.py b/keypoints/cvae/network.py
index 1847815..7229837 100644
--- a/keypoints/cvae/network.py
+++ b/keypoints/cvae/network.py
@@ -2,11 +2,13 @@
 
 
 class Network(object):
+    """The base class of neural networks."""
 
     def __init__(self):
         return
 
     def batch_norm_layer(self, x, eps=0.01):
+        """Batch normalization."""
         dimension = x.get_shape().as_list()[-1]
         mean, variance = tf.nn.moments(x, axes=[0, 1, 2])
         beta = tf.get_variable(
@@ -31,6 +33,19 @@ def batch_norm_layer(self, x, eps=0.01):
     def conv_layer(self, x, out_channels,
                    kernel_size=1, dilation=1,
                    linear=False, name=None):
+        """The convolution layer.
+
+        Args:
+            x: The input feature map.
+            out_channels: The output channels.
+            kernel_size: The size of the convolution kernel.
+            dilation: The dilation rate.
+            linear: If True, return without batch normalization or relu.
+            name: The name scope of the variables.
+
+        Returns:
+            x: The output feature map.
+        """
         with tf.variable_scope(name):
             x = tf.layers.conv2d(
                 x, out_channels,
@@ -45,6 +60,17 @@ def conv_layer(self, x, out_channels,
 
     def fc_layer(self, x, out_size,
                  linear=False, name=None):
+        """The fully connected layer.
+
+        Args: 
+            x: The input units.
+            out_size: The size of the output units.
+            linear: If True, relu will not be applied.
+            name: The name scope of the variables.
+        
+        Returns:
+            x: The output units.
+        """
         with tf.variable_scope(name):
             x = tf.contrib.layers.flatten(x)
             x = tf.contrib.layers.fully_connected(
@@ -55,6 +81,7 @@ def fc_layer(self, x, out_size,
             return x
 
     def max_pool(self, x, ksize, stride, name):
+        """The maxpooling layer."""
         with tf.variable_scope(name):
             x = tf.contrib.layers.max_pool2d(
                 x, kernel_size=ksize,
@@ -62,6 +89,22 @@ def max_pool(self, x, ksize, stride, name):
             return x
 
     def down_sample(self, x, p, stride, thres, name):
+        """Down samples the point cloud feature maps.
+
+        Args:
+            x: The point cloud features from the last layer.
+            p: The points where the feautures come from.
+            stride: The down sample stride. The central points
+                are chosen from p at this stride.
+            thres: For each central point, the surrounding points
+                within this distance threshold are considered in
+                the average pooling.
+            name: The name of this operation.
+
+        Returns:
+            x: The point cloud features after down sampling.
+            new_p: The points where the new features come from.
+        """      
         with tf.variable_scope(name):
             print('Original shape {}'.format(
                 x.get_shape().as_list()))
@@ -90,6 +133,7 @@ def down_sample(self, x, p, stride, thres, name):
             return x, new_p
 
     def concat_xz(self, x, z):
+        """Concatenates the point cloud and the latent code."""
         _, n, _, _ = x.get_shape().as_list()
         z_n = tf.tile(z, [1, n, 1, 1])
         return tf.concat([x, z_n], axis=3)
diff --git a/keypoints/cvae/reader.py b/keypoints/cvae/reader.py
index 16f6f3a..60cce58 100644
--- a/keypoints/cvae/reader.py
+++ b/keypoints/cvae/reader.py
@@ -5,8 +5,19 @@
 
 
 class GraspReader(object):
+    """Grasp data reader."""
 
     def __init__(self, data_path, trainval_ratio=0.9):
+        """Initialization.
+
+        Args:
+            data_path: Path to the hdf5 file.
+            trainval_ratio: The ratio between the training
+                and validation data.
+        
+        Returns:
+            None.
+        """
         logging.info('Loading {}'.format(data_path))
         f = h5py.File(data_path, 'r')
         self.pos_p = f['pos_point_cloud']
@@ -23,11 +34,20 @@ def __init__(self, data_path, trainval_ratio=0.9):
         return
 
     def make_suitable(self, indices):
+        """Removes the repeated indices."""
         indices = sorted(set(list(indices)))
         return indices
 
     def random_rotate(self, p, g):
-        """Randomly rotate point cloud and grasps
+        """Randomly rotates point cloud and grasps.
+        
+        Args:
+            p: The point cloud tensor.
+            g: The grasp.
+
+        Returns:
+            p: The rotated point cloud.
+            g: The rotated grasp.
         """
         num = p.shape[0]
         drz = np.random.uniform(
@@ -51,6 +71,17 @@ def random_rotate(self, p, g):
         return p, g
 
     def random_disturb(self, p, g):
+        """Adds random noise to the point cloud and 
+           the grasp to generate negative examples.
+
+        Args:
+            p: The point cloud tensor.
+            g: The grasp.
+
+        Returns:
+            p: The disturbed point cloud.
+            g: The disturbed grasp.
+        """
         p_mean = np.mean(p, axis=1)
         g_xyz, g_rx, g_ry, g_rz = \
             np.split(g, [3, 4, 5], axis=1)
@@ -70,6 +101,15 @@ def random_disturb(self, p, g):
         return p, g
 
     def sample_pos_train(self, size):
+        """Randomly chooses postive examples for training.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            pos_p: The point cloud.
+            pos_g: The positive grasps.
+        """
         indices = np.random.randint(
             low=0,
             high=int(self.pos_p.shape[0]
@@ -83,6 +123,15 @@ def sample_pos_train(self, size):
         return pos_p, pos_g
 
     def sample_neg_train(self, size):
+        """Randomly chooses negative examples for training.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            neg_p: The point cloud.
+            neg_g: The negative grasps.
+        """
         indices = np.random.randint(
             low=0,
             high=int(self.neg_p.shape[0]
@@ -99,6 +148,15 @@ def sample_neg_train(self, size):
         return neg_p, neg_g
 
     def sample_pos_val(self, size):
+        """Randomly chooses postive examples for validation.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            pos_p: The point cloud.
+            pos_g: The positive grasps.
+        """
         indices = np.random.randint(
             high=self.pos_p.shape[0],
             low=int(self.pos_p.shape[0]
@@ -112,6 +170,15 @@ def sample_pos_val(self, size):
         return pos_p, pos_g
 
     def sample_neg_val(self, size):
+        """Randomly chooses negative examples for validation.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            neg_p: The point cloud.
+            neg_g: The negative grasps.
+        """
         indices = np.random.randint(
             high=self.neg_p.shape[0],
             low=int(self.neg_p.shape[0]
@@ -130,11 +197,25 @@ def sample_neg_val(self, size):
 
 
 class KeypointReader(object):
+    """Keypoint data reader."""
+
 
     def __init__(self, 
                  data_path, 
                  trainval_ratio=0.8, 
                  num_keypoints=2):
+        """Initialization.
+      
+        Args:
+            data_path: Path to hdf5 file.
+            trainval_ratio: The ratio between the training
+                and validation data.
+            num_keypoints: 3 if the effect point is considered
+                else 2.
+        
+        Returns:
+            None.
+        """
         logging.info('Loading {}'.format(data_path))
         f = h5py.File(data_path, 'r')
         self.pos_p = f['pos_point_cloud']
@@ -150,12 +231,21 @@ def __init__(self,
         return
 
     def make_suitable(self, indices):
+        """Removes the repeated indices."""
         indices = sorted(set(list(indices)))
         return indices
 
     def random_rotate(self, p, k):
-        """Randomly rotate point cloud and keypoints
-           for data augmentation
+        """Randomly rotates point cloud and keypoints
+           for data augmentation.
+
+        Args:
+            p: The point cloud.
+            k: The keypoints.
+
+        Returns:
+            p: The rotated point cloud.
+            k: The rotated keypoints.
         """
         num = p.shape[0]
         drz = np.random.uniform(
@@ -179,8 +269,16 @@ def random_rotate(self, p, k):
         return p, k
 
     def random_disturb(self, p, k, scale_down=0.2):
-        """Randomly disturb keypoints for creating
-           new negative examples
+        """Randomly disturbs keypoints for creating
+           new negative examples.
+
+        Args:
+            p: The point cloud.
+            k: The keypoints.
+
+        Returns:
+            p: The rotated point cloud.
+            k: The rotated keypoints.
         """
         mean_p = np.mean(p, axis=1, keepdims=True)
         std_p = np.std(p - mean_p, axis=1, keepdims=True)
@@ -190,6 +288,15 @@ def random_disturb(self, p, k, scale_down=0.2):
         return p, k
 
     def sample_pos_train(self, size):
+        """Randomly chooses postive examples for training.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            pos_p: The point cloud.
+            pos_g: The positive keypoints.
+        """
         indices = np.random.randint(
             low=0,
             high=int(self.pos_p.shape[0]
@@ -203,6 +310,15 @@ def sample_pos_train(self, size):
         return pos_p, pos_k
 
     def sample_neg_train(self, size):
+        """Randomly chooses negative examples for training.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            neg_p: The point cloud.
+            neg_g: The negative keypoints.
+        """
         indices = np.random.randint(
             low=0,
             high=int(self.neg_p.shape[0]
@@ -219,6 +335,15 @@ def sample_neg_train(self, size):
         return neg_p, neg_k
 
     def sample_pos_val(self, size):
+        """Randomly chooses postive examples for validation.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            pos_p: The point cloud.
+            pos_g: The positive keypoints.
+        """
         indices = np.random.randint(
             high=self.pos_p.shape[0],
             low=int(self.pos_p.shape[0]
@@ -232,6 +357,15 @@ def sample_pos_val(self, size):
         return pos_p, pos_k
 
     def sample_neg_val(self, size):
+        """Randomly chooses negative examples for validation.
+        
+        Args:
+            size: The number of output examples.
+         
+        Returns:
+            neg_p: The point cloud.
+            neg_g: The negative keypoints.
+        """
         indices = np.random.randint(
             high=self.neg_p.shape[0],
             low=int(self.neg_p.shape[0]
@@ -247,205 +381,3 @@ def sample_neg_val(self, size):
             neg_p, neg_k = self.random_disturb(
                 neg_p, neg_k)
         return neg_p, neg_k
-
-
-class ActionReader(object):
-
-    def __init__(self, 
-                 data_path, 
-                 trainval_ratio=0.8):
-        logging.info('Loading {}'.format(data_path))
-        f = h5py.File(data_path, 'r')
-        self.pos_p = f['pos_point_cloud']
-        self.pos_a = f['pos_action']
-        self.neg_p = f['neg_point_cloud']
-        self.neg_a = f['neg_action']
-
-        self.trainval_ratio = trainval_ratio
-        print('Number positive: {}'.format(
-            self.pos_p.shape[0]))
-        return
-
-    def make_suitable(self, indices):
-        indices = sorted(set(list(indices)))
-        return indices
-
-    def sample_pos_train(self, size):
-        indices = np.random.randint(
-            low=0,
-            high=int(self.pos_p.shape[0]
-                     * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        pos_p = np.array(self.pos_p[indices],
-                         dtype=np.float32)
-        pos_a = np.array(self.pos_a[indices],
-                         dtype=np.float32)
-        return pos_p, pos_a
-
-    def sample_neg_train(self, size):
-        indices = np.random.randint(
-            low=0,
-            high=int(self.neg_p.shape[0]
-                     * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        neg_p = np.array(self.neg_p[indices],
-                         dtype=np.float32)
-        neg_a = np.array(self.neg_a[indices],
-                         dtype=np.float32)
-        return neg_p, neg_a
-
-    def sample_pos_val(self, size):
-        indices = np.random.randint(
-            high=self.pos_p.shape[0],
-            low=int(self.pos_p.shape[0]
-                    * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        pos_p = np.array(self.pos_p[indices],
-                         dtype=np.float32)
-        pos_a = np.array(self.pos_a[indices],
-                         dtype=np.float32)
-        return pos_p, pos_a
-
-    def sample_neg_val(self, size):
-        indices = np.random.randint(
-            high=self.neg_p.shape[0],
-            low=int(self.neg_p.shape[0]
-                    * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        neg_p = np.array(self.neg_p[indices],
-                         dtype=np.float32)
-        neg_a = np.array(self.neg_a[indices],
-                         dtype=np.float32)
-        return neg_p, neg_a
-
-
-class MultitaskReader(object):
-
-    def __init__(self, data_path, trainval_ratio=0.8):
-        logging.info('Loading {}'.format(data_path))
-        f = h5py.File(data_path, 'r')
-
-        self.pos_p = f['pos_point_cloud']
-        pos_act = f['pos_actions']
-        self.pos_g, self.pos_k = np.split(
-                pos_act, [2], axis=1)
-        self.pos_g = np.reshape(self.pos_g, [-1, 6])
-
-        self.neg_p = f['neg_point_cloud']
-        neg_act = f['neg_actions']
-        self.neg_g, self.neg_k = np.split(
-                neg_act, [2], axis=1)
-        self.neg_g = np.reshape(self.neg_g, [-1, 6])
-
-        self.trainval_ratio = trainval_ratio
-        return
-
-    def make_suitable(self, indices):
-        indices = sorted(set(list(indices)))
-        return indices
-
-    def random_rotate(self, p, g, k):
-        """Randomly rotate point cloud and grasps
-        """
-        num = p.shape[0]
-        drz = np.random.uniform(
-            0, np.pi * 2, size=(num, 1))
-        g_xyz, g_rx, g_ry, g_rz = \
-            np.split(g, [3, 4, 5], axis=1)
-        zeros = np.zeros_like(drz)
-        ones = np.ones_like(drz)
-        mat_drz = np.concatenate(
-            [np.cos(drz), -np.sin(drz), zeros,
-             np.sin(drz), np.cos(drz), zeros,
-             zeros, zeros, ones],
-            axis=1)
-        mat_drz = np.reshape(mat_drz, [num, 3, 3])
-        mat_drz_t = np.transpose(mat_drz, [0, 2, 1])
-        p = np.matmul(p - g_xyz[:, np.newaxis],
-                      mat_drz_t) + g_xyz[:, np.newaxis]
-        k = np.matmul(k - g_xyz[:, np.newaxis],
-                      mat_drz_t) + g_xyz[:, np.newaxis]
-        g_rz = g_rz + drz
-        g = np.concatenate(
-            [g_xyz, g_rx, g_ry, g_rz], axis=1)
-        return p, g, k
-
-    def random_disturb(self, p, g, k):
-        p_mean = np.mean(p, axis=1)
-        g_xyz, g_rx, g_ry, g_rz = \
-            np.split(g, [3, 4, 5], axis=1)
-        g_xyz = g_xyz + np.random.normal(
-            size=np.shape(g_xyz)) * np.std(
-            g_xyz - p_mean,
-            axis=0, keepdims=True)
-        g_rz = g_rz + np.random.uniform(
-            low=0, high=np.pi * 2,
-            size=np.shape(g_rz))
-        g = np.concatenate(
-            [g_xyz, g_rx, g_ry, g_rz], axis=1)
-        return p, g, k
-
-    def sample_pos_train(self, size):
-        indices = np.random.randint(
-            low=0,
-            high=int(self.pos_p.shape[0]
-                     * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        pos_p = np.array(self.pos_p[indices, :, :],
-                         dtype=np.float32)
-        pos_g = np.array(self.pos_g[indices, :],
-                         dtype=np.float32)
-        pos_k = np.array(self.pos_k[indices, :],
-                         dtype=np.float32)
-        return pos_p, pos_g, pos_k
-
-    def sample_neg_train(self, size):
-        indices = np.random.randint(
-            low=0,
-            high=int(self.neg_p.shape[0]
-                     * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        neg_p = np.array(self.neg_p[indices],
-                         dtype=np.float32)
-        neg_g = np.array(self.neg_g[indices],
-                         dtype=np.float32)
-        neg_k = np.array(self.neg_k[indices],
-                         dtype=np.float32)
-        return neg_p, neg_g, neg_k
-
-    def sample_pos_val(self, size):
-        indices = np.random.randint(
-            high=self.pos_p.shape[0],
-            low=int(self.pos_p.shape[0]
-                    * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        pos_p = np.array(self.pos_p[indices],
-                         dtype=np.float32)
-        pos_g = np.array(self.pos_g[indices],
-                         dtype=np.float32)
-        pos_k = np.array(self.pos_k[indices],
-                         dtype=np.float32)
-        return pos_p, pos_g, pos_k
-
-    def sample_neg_val(self, size):
-        indices = np.random.randint(
-            high=self.neg_p.shape[0],
-            low=int(self.neg_p.shape[0]
-                    * self.trainval_ratio),
-            size=size)
-        indices = self.make_suitable(indices)
-        neg_p = np.array(self.neg_p[indices],
-                         dtype=np.float32)
-        neg_g = np.array(self.neg_g[indices],
-                         dtype=np.float32)
-        neg_k = np.array(self.neg_k[indices],
-                         dtype=np.float32)
-        return neg_p, neg_g, neg_k
-
diff --git a/keypoints/cvae/sort.py b/keypoints/cvae/sort.py
index 0a4c16b..7568022 100644
--- a/keypoints/cvae/sort.py
+++ b/keypoints/cvae/sort.py
@@ -3,11 +3,13 @@
 
 
 def sort_tf(x, is_training=tf.constant(False, dtype=tf.bool)):
-    """ Sort the point cloud wrt. the priciple dimension
-        input:
-            x: (B, N, 3)
-        return:
-            y: (B, N, 3)
+    """ Sorts the point cloud wrt. the priciple dimension.
+    
+    Args:
+        x: (B, N, 3) The point cloud.
+    
+    Returns:
+        y: (B, N, 3) The sorted point cloud.
     """
     x_mean = tf.reduce_mean(
         x, axis=1, keep_dims=True)
@@ -34,11 +36,13 @@ def sort_tf(x, is_training=tf.constant(False, dtype=tf.bool)):
 
 
 def std(v):
-    """
-        input:
-            v: (B, 1, 3)
-        return:
-            o: (1, 1, 3)
+    """Computes the standard deviation.
+
+    Args:
+        v: (B, 1, 3) The input data.
+
+    Returns:
+        o: (1, 1, 3) The std wrt. the first axis.
     """
     v = v - tf.reduce_mean(v, axis=0, keep_dims=True)
     o = tf.reduce_mean(v * v, axis=0, keep_dims=True)
@@ -47,11 +51,13 @@ def std(v):
 
 
 def random_noise(v, std_ratio=0.5):
-    """
-        input:
-            v: (B, 1, 3)
-        return:
-            vn: (B, 1, 3)
+    """Adds random noise to unit vectors.
+
+    Args:
+        v: (B, 1, 3) The input unit vectors.
+
+    Returns:
+        vn: (B, 1, 3) The unit vectors with noise.
     """
     r = tf.random_normal(
         shape=tf.shape(v)) * std(v) * std_ratio
@@ -62,6 +68,16 @@ def random_noise(v, std_ratio=0.5):
 
 
 def rotation_matrix(alpha, beta, gamma):
+    """Computes the rotation matrix.
+
+    Args:
+        alpha: The rotation around x axis.
+        beta: The rotation around y axis.
+        gamma: The rotation around z axis.
+
+    Returns:
+        The rotation matrix.
+    """
     Rx = np.array([[1, 0, 0],
                    [0, np.cos(alpha), -np.sin(alpha)],
                    [0, np.sin(alpha), np.cos(alpha)]])
@@ -76,13 +92,15 @@ def rotation_matrix(alpha, beta, gamma):
 
 
 def rot_mat(rx, ry, rz):
-    """ Compute rotation matrics
-        input:
-            rx: (B, 1)
-            ry: (B, 1)
-            rz: (B, 1)
-        return:
-            R: (B, 3, 3)
+    """Computes a batch of rotation matrices.
+
+    Args:
+        rx: (B, 1) The rotation around x axis.
+        ry: (B, 1) The rotation around y axis.
+        rz: (B, 1) The rotation around z axis.
+
+    Returns:
+        R: (B, 3, 3) The rotation matrix.
     """
     zeros = tf.zeros_like(rx)
     ones = tf.ones_like(rx)
@@ -106,12 +124,14 @@ def rot_mat(rx, ry, rz):
 
 
 def align(x, p):
-    """ Align the point cloud wrt. 6-DoF grasp
-        input:
-            x: (B, N, 3)
-            p: (B, 6)
-        return:
-            y: (B, N, 3)
+    """ Aligns the point cloud wrt. 6-DoF grasp.
+    
+    Args:
+        x: (B, N, 3) The input point cloud.
+        p: (B, 6) The 6-DoF grasps.
+    
+    Returns:
+        y: (B, N, 3) The aligned point cloud.
     """
     c, rx, ry, rz = tf.split(p, [3, 1, 1, 1], axis=1)
     R = rot_mat(-rx, -ry, -rz)
diff --git a/keypoints/main.py b/keypoints/main.py
index f64ff1e..dafd010 100644
--- a/keypoints/main.py
+++ b/keypoints/main.py
@@ -3,7 +3,6 @@
 
 from cvae.build import train_vae_grasp, train_gcnn_grasp
 from cvae.build import train_vae_keypoint, train_discr_keypoint
-from cvae.build import train_vae_action, train_discr_action
 from cvae.build import inference_grasp, inference_keypoint
 
 parser = argparse.ArgumentParser()
@@ -60,14 +59,6 @@
     train_discr_keypoint(data_path=args.data_path,
                          model_path=args.model_path,
                          task_name=args.task_name)
-elif args.mode == 'vae_action':
-    train_vae_action(data_path=args.data_path, 
-                     model_path=args.model_path,
-                     task_name=args.task_name)
-elif args.mode == 'discr_action':
-    train_discr_action(data_path=args.data_path, 
-                       model_path=args.model_path,
-                       task_name=args.task_name)
 elif args.mode == 'inference_keypoint':
     inference_keypoint(
         data_path=args.data_path,
diff --git a/keypoints/merge.py b/keypoints/merge.py
index 8ffa79e..1fedc04 100644
--- a/keypoints/merge.py
+++ b/keypoints/merge.py
@@ -4,7 +4,6 @@
 
 from cvae.build import build_grasp_inference_graph
 from cvae.build import build_keypoint_inference_graph
-from cvae.build import build_action_inference_graph
 
 
 parser = argparse.ArgumentParser()
@@ -19,8 +18,6 @@
                     type=str)
 parser.add_argument('--keypoint',
                     type=str)
-parser.add_argument('--action',
-                    type=str)
 parser.add_argument('--num_funct_vect',
                     type=str,
                     default='1')
@@ -38,12 +35,6 @@
     build_grasp_inference_graph()
     build_keypoint_inference_graph(
             num_funct_vect=int(args.num_funct_vect))
-elif args.model == 'action':
-    build_action_inference_graph()
-elif args.model == 'grasp_action':
-    build_grasp_inference_graph()
-    build_action_inference_graph()
-
 else:
     raise ValueError(args.model)
 
@@ -51,7 +42,7 @@
 
     vars = tf.global_variables()
 
-    if args.model in ['grasp', 'keypoint', 'action']:
+    if args.model in ['grasp', 'keypoint']:
         # Merges the generation network (VAE) and 
         # the evaluation network (binary classifier).
         vars_vae = [var for var in vars if 'vae' in var.name]
@@ -78,20 +69,5 @@
         saver_keypoint.restore(sess, args.keypoint)
         saver.save(sess, args.output)
 
-    elif args.model == 'grasp_action':
-        # Merges the grasp prediction network and the action network
-        # This is only for the End-to-End baseline where we directly
-        # predict the actions from the visual observation.
-        vars_grasp = [var for var in vars if 'grasp' in var.name]
-        vars_action = [var for var in vars if 'action' in var.name]
-
-        saver = tf.train.Saver(var_list=vars)
-        saver_grasp = tf.train.Saver(var_list=vars_grasp)
-        saver_action = tf.train.Saver(var_list=vars_action)
-
-        saver_grasp.restore(sess, args.grasp)
-        saver_action.restore(sess, args.action)
-        saver.save(sess, args.output)
-
     else:
         raise ValueError
diff --git a/keypoints/utils/grasp/make_inputs_multiproc.py b/keypoints/utils/grasp/make_inputs_multiproc.py
index 779713d..c7ceca3 100644
--- a/keypoints/utils/grasp/make_inputs_multiproc.py
+++ b/keypoints/utils/grasp/make_inputs_multiproc.py
@@ -35,12 +35,15 @@
                  for x in range(0, len(data_list), batch_size)]
 
 class Logger(object):
+    """Logging utils."""    
 
     def __init__(self, output='./output.log'):
+        """Initialization."""
         self.output = output
         return
 
     def write(self, message):
+        """Writes the message to the log file."""
         with open(self.output, 'a') as f:
             f.write(message + '\r\n')
         return
@@ -55,7 +58,23 @@ def save_data(pos_point_cloud,
               neg_grasp,
               save_path,
               scale=20):
-
+    """Saves the data to hdf5 file.
+
+    Args: 
+        pos_point_cloud: The point cloud associated 
+            associated with the positive grasps.
+        neg_point_cloud: The point cloud associated 
+            associated with the negative grasps.
+        pos_grasp: The positive grasps.
+        neg_grasp: The negative grasps.
+        save_path: The hdf5 file name.
+        scale: The constant to be multiplied with the
+            point cloud and grasp coordinates to fit 
+            the input scale of the network.
+
+    Returns:
+        None.
+    """
     scale_grasp = np.reshape([scale, scale, scale, 1, 1, 1], (1, 6))
 
     pos_point_cloud = np.concatenate(pos_point_cloud, axis=0) * scale
@@ -111,6 +130,16 @@ def save_data(pos_point_cloud,
 
 
 def append_data(data_list, lock, save_path='./data.hdf5'):
+    """Appends data to a hdf5 file.
+
+    Args:
+        data_list: The list of data to be saved.
+        lock: The lock to avoid io conflict.
+        save_path: The path to hdf5 file.
+        
+    Returns:
+        None.
+    """
     pos_point_cloud = []
     neg_point_cloud = []
     pos_grasp = []
@@ -128,8 +157,6 @@ def append_data(data_list, lock, save_path='./data.hdf5'):
             grasp_4dof = np.load(f)
 
         grasp = grasp_4dof
-
-        lock.acquire()
         if grasp[0] > 0:
             pos_point_cloud.append(
                 point_cloud[np.newaxis])
@@ -141,7 +168,6 @@ def append_data(data_list, lock, save_path='./data.hdf5'):
             neg_grasp.append(grasp[np.newaxis, 1:])
         else:
             pass
-        lock.release()
 
     lock.acquire()
     save_data(pos_point_cloud,
diff --git a/keypoints/utils/keypoint/make_inputs_multiproc.py b/keypoints/utils/keypoint/make_inputs_multiproc.py
index 3623cc9..30dbc51 100644
--- a/keypoints/utils/keypoint/make_inputs_multiproc.py
+++ b/keypoints/utils/keypoint/make_inputs_multiproc.py
@@ -42,12 +42,14 @@
 
 
 class Logger(object):
-
+    """Logging utils."""  
     def __init__(self, output='./output.log'):
+        """Initialization."""
         self.output = output
         return
 
     def write(self, message):
+        """Writes the message to the log file."""
         with open(self.output, 'a') as f:
             f.write(message + '\r\n')
         return
@@ -63,7 +65,23 @@ def save_data(pos_point_cloud,
               save_path,
               lock,
               scale=20):
-
+    """Saves the data to hdf5 file.
+
+    Args: 
+        pos_point_cloud: The point cloud associated 
+            associated with the positive keypoints.
+        neg_point_cloud: The point cloud associated 
+            associated with the negative keypoints.
+        pos_grasp: The positive keypoints.
+        neg_grasp: The negative keypoints.
+        save_path: The hdf5 file name.
+        scale: The constant to be multiplied with the
+            point cloud and grasp coordinates to fit 
+            the input scale of the network.
+
+    Returns:
+        None.
+    """
     pos_point_cloud = np.concatenate(pos_point_cloud, axis=0) * scale
     neg_point_cloud = np.concatenate(neg_point_cloud, axis=0) * scale
 
@@ -127,6 +145,16 @@ def save_data(pos_point_cloud,
     lock.release()
 
 def append_data(data_list, lock, save_path):
+    """Appends data to a hdf5 file.
+
+    Args:
+        data_list: The list of data to be saved.
+        lock: The lock to avoid io conflict.
+        save_path: The path to hdf5 file.
+        
+    Returns:
+        None.
+    """
     pos_point_cloud = []
     neg_point_cloud = []
     pos_keypoints = []
diff --git a/keypoints/utils/visualize/visualize.py b/keypoints/utils/visualize/visualize.py
index b5a17d0..b099e99 100644
--- a/keypoints/utils/visualize/visualize.py
+++ b/keypoints/utils/visualize/visualize.py
@@ -1,3 +1,4 @@
+"""Visualizes the point cloud and keypoints stored in npy files."""
 import os
 import argparse
 import numpy as np
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 03a77e8..0000000
--- a/setup.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import os
-
-# Install the dependencies
-os.system('pip install --upgrade numpy scipy h5py pyyaml future opencv-python matplotlib easydict gym sklearn python-pcl cvxpy')
-
-os.system('pip install pillow --no-cache-dir')
-
-# Install Tensorflow
-os.system('pip install tf-nightly-gpu==1.13.0.dev20190117')
-
-os.system('pip install tf-agents==0.2.0rc2')
-
-os.system('pip install tensorflow-probability==0.5.0')
-
-os.system('pip install tf-estimator-nightly==1.13.0.dev2019010910')
-
-# Install Pybullet
-os.system('pip install -e git+https://github.com/bulletphysics/bullet3@6a74f63604ceecd1db5c71036ffb0dbf17294579#egg=pybullet')