From e8b239045f7e5d950a1e47804026ff75eaf0d18d Mon Sep 17 00:00:00 2001 From: "Zengyi.Qin" Date: Fri, 8 Nov 2019 10:00:45 -0500 Subject: [PATCH] update --- keypoints/cvae/build.py | 296 --------------- keypoints/cvae/decoder.py | 71 ++-- keypoints/cvae/discriminator.py | 56 --- keypoints/cvae/encoder.py | 44 --- keypoints/cvae/network.py | 44 +++ keypoints/cvae/reader.py | 346 +++++++----------- keypoints/cvae/sort.py | 76 ++-- keypoints/main.py | 9 - keypoints/merge.py | 26 +- .../utils/grasp/make_inputs_multiproc.py | 34 +- .../utils/keypoint/make_inputs_multiproc.py | 32 +- keypoints/utils/visualize/visualize.py | 1 + setup.py | 18 - 13 files changed, 320 insertions(+), 733 deletions(-) delete mode 100644 setup.py diff --git a/keypoints/cvae/build.py b/keypoints/cvae/build.py index 96c5f18..c5b35dc 100644 --- a/keypoints/cvae/build.py +++ b/keypoints/cvae/build.py @@ -1301,141 +1301,6 @@ def train_vae_keypoint(data_path, task_name, str(step).zfill(6))) -def train_vae_action(data_path, - steps=120000, - batch_size=256, - eval_size=128, - l2_weight=1e-6, - log_step=20, - eval_step=4000, - save_step=1000, - model_path=None, - task_name='task', - optimizer='Adam'): - """Trains the VAE for action generation. This is only for - the End-to-End baseline where the actions are directly - predicted from the visual observation. - - Args: - data_path: The training data in a single h5df file. - steps: The total number of training steps. - batch_size: The training batch size. - eval_size: The evaluation batch size. - l2_weight: The L2 regularization weight. - log_step: The interval for logging. - eval_step: The interval for evaluation. - save_step: The interval for saving the model weights. - model_path: The pretrained model. - task_name: The name of the task to be trained on. This - is only for distinguishing the name of log files and - the saved model. - optimizer: Adam or SGDM. - - Returns: - None. - """ - loader = ActionReader(data_path) - - graph = build_action_training_graph() - learning_rate = tf.placeholder(tf.float32, shape=()) - - point_cloud_tf = graph['point_cloud_tf'] - grasp_point_tf = graph['grasp_point_tf'] - translation_tf = graph['translation_tf'] - rotation_tf = graph['rotation_tf'] - - loss_vae_grasp = graph['loss_vae_grasp'] - loss_vae_trans = graph['loss_vae_trans'] - loss_vae_rot = graph['loss_vae_rot'] - loss_vae_mmd = graph['loss_vae_mmd'] * 0.005 - - z_mean = graph['z_mean'] - z_std = graph['z_std'] - - std_gt_grasp = graph['std_gt_grasp'] - std_gt_trans = graph['std_gt_trans'] - std_gt_rot = graph['std_gt_rot'] - - weight_loss = [tf.nn.l2_loss(var) for var - in tf.trainable_variables()] - weight_loss = tf.reduce_sum(weight_loss) * l2_weight - - loss_vae = (loss_vae_grasp + loss_vae_trans + - loss_vae_rot + loss_vae_mmd) - loss = weight_loss + loss_vae - - if optimizer == 'Adam': - train_op = tf.train.AdamOptimizer( - learning_rate=learning_rate).minimize(loss) - elif optimizer == 'SGDM': - train_op = tf.train.MomentumOptimizer( - learning_rate=learning_rate, - momentum=0.9).minimize(loss) - else: - raise NotImplementedError - - all_vars = tf.get_collection_ref( - tf.GraphKeys.GLOBAL_VARIABLES) - var_list_vae = [var for var in all_vars - if 'vae_action' in var.name and - 'Momentum' not in var.name and - 'Adam' not in var.name] - - saver = tf.train.Saver(var_list=var_list_vae) - - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.allow_soft_placement = True - - with tf.Session(config=config) as sess: - sess.run([tf.global_variables_initializer()]) - if model_path: - running_log.write('vae_action_{}'.format(task_name), - 'loading model from {}'.format(model_path)) - saver.restore(sess, model_path) - - for step in range(steps + 1): - pos_p_np, pos_a_np = loader.sample_pos_train(batch_size) - - pos_grasp_np, pos_trans_np, pos_rot_np = np.split( - pos_a_np, [3, 5], axis=1) - pos_rot_np = np.concatenate([np.cos(pos_rot_np), - np.sin(pos_rot_np)], axis=1) - - feed_dict = {point_cloud_tf: pos_p_np, - grasp_point_tf: pos_grasp_np, - translation_tf: pos_trans_np, - rotation_tf: pos_rot_np, - learning_rate: get_learning_rate(step, steps)} - - [_, loss_np, vae_grasp, vae_trans, vae_rot, vae_mmd, weight, - std_gt_grasp_np, std_gt_trans_np, std_gt_rot_np, - z_mean_np, z_std_np] = sess.run([ - train_op, loss, loss_vae_grasp, - loss_vae_trans, loss_vae_rot, loss_vae_mmd, - weight_loss, std_gt_grasp, std_gt_trans, std_gt_rot, - z_mean, z_std], - feed_dict=feed_dict) - - if step % log_step == 0: - running_log.write('vae_action_{}'.format(task_name), - 'step: {}/{}, '.format(step, steps) + - 'loss: {:.3f}, grasp: {:.3f}/{:.3f}, '.format( - loss_np, vae_grasp, std_gt_grasp_np) + - 'trans: {:.3f}/{:.3f}, '.format( - vae_trans, std_gt_trans_np) + - 'rot: {:.3f}/{:.3f}, '.format( - vae_rot, std_gt_rot_np) + - 'mmd: {:.3f} ({:.3f} {:.3f}), '.format( - vae_mmd, z_mean_np, z_std_np)) - - if step > 0 and step % save_step == 0: - makedir('./runs/vae') - saver.save(sess, - './runs/vae/vae_action_{}_{}'.format( - task_name, str(step).zfill(6))) - - def train_gcnn_grasp(data_path, steps=60000, batch_size=256, @@ -1606,44 +1471,6 @@ def load_samples(loader, batch_size, stage, noise_level=0.2): return p_np, grasp_np, funct_np, funct_vect_np, label_np -def load_samples_action(loader, batch_size, stage): - """Loads the training and evaluation data. - - Args: - loader: A Reader instance. - batch_size: The training or evaluation batch size. - stage: 'train' or 'val'. - - Return: - p_np: A numpy array of point cloud. - grasp_np: A numpy array of grasp point. - trans_np: The translation part of the action. - rot_np: The rotation part of the action. - label: The binary success label of the actions given the - point cloud as visual observation. - """ - if stage == 'train': - pos_p_np, pos_a_np = loader.sample_pos_train(batch_size // 2) - neg_p_np, neg_a_np = loader.sample_neg_train(batch_size // 2) - elif stage == 'val': - pos_p_np, pos_a_np = loader.sample_pos_val(batch_size // 2) - neg_p_np, neg_a_np = loader.sample_neg_val(batch_size // 2) - else: - raise NotImplementedError - - num_pos, num_neg = pos_p_np.shape[0], neg_p_np.shape[0] - label_np = np.concatenate( - [np.ones(shape=(num_pos, 1)), - np.zeros(shape=(num_neg, 1))], - axis=0).astype(np.float32) - p_np = np.concatenate([pos_p_np, neg_p_np], axis=0) - a_np = np.concatenate([pos_a_np, neg_a_np], axis=0) - grasp_np, trans_np, rot_np = np.split( - a_np, [3, 5], axis=1) - rot_np = np.concatenate([np.cos(rot_np), np.sin(rot_np)], axis=1) - return p_np, grasp_np, trans_np, rot_np, label_np - - def train_discr_keypoint(data_path, steps=120000, batch_size=128, @@ -1772,129 +1599,6 @@ def train_discr_keypoint(data_path, noise_level, acc_np * 100)) -def train_discr_action(data_path, - steps=120000, - batch_size=128, - eval_size=128, - l2_weight=1e-6, - log_step=20, - eval_step=1000, - save_step=1000, - model_path=None, - task_name='task', - optimizer='Adam'): - """Trains the action evaluation network. - - Args: - data_path: The training data in a single h5df file. - steps: The total number of training steps. - batch_size: The training batch size. - eval_size: The evaluation batch size. - l2_weight: The L2 regularization weight. - log_step: The interval for logging. - eval_step: The interval for evaluation. - save_step: The interval for saving the model weights. - model_path: The pretrained model. - task_name: The name of the task to be trained on. This - is only for distinguishing the name of log files and - the saved model. - optimizer: 'Adam' or 'SGDM'. - - Returns: - None. - """ - loader = ActionReader(data_path) - - graph = build_action_training_graph() - learning_rate = tf.placeholder(tf.float32, shape=()) - - point_cloud_tf = graph['point_cloud_tf'] - grasp_point_tf = graph['grasp_point_tf'] - translation_tf = graph['translation_tf'] - rotation_tf = graph['rotation_tf'] - - actions_label_tf = graph['actions_label_tf'] - loss_discr = graph['loss_discr'] - acc_discr = graph['acc_discr'] - - weight_loss = [tf.nn.l2_loss(var) for var - in tf.trainable_variables()] - weight_loss = tf.reduce_sum(weight_loss) * l2_weight - - loss = weight_loss + loss_discr - - if optimizer == 'Adam': - train_op = tf.train.AdamOptimizer( - learning_rate=learning_rate).minimize(loss) - elif optimizer == 'SGDM': - train_op = tf.train.MomentumOptimizer( - learning_rate=learning_rate, - momentum=0.9).minimize(loss) - else: - raise NotImplementedError - - all_vars = tf.get_collection_ref( - tf.GraphKeys.GLOBAL_VARIABLES) - var_list_vae = [var for var in all_vars - if 'action_discriminator' in var.name and - 'Momentum' not in var.name and - 'Adam' not in var.name] - - saver = tf.train.Saver(var_list=var_list_vae) - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.allow_soft_placement = True - - with tf.Session(config=config) as sess: - sess.run([tf.global_variables_initializer()]) - if model_path: - running_log.write('discr_action_{}'.format(task_name), - 'loading model from {}'.format(model_path)) - saver.restore(sess, model_path) - - for step in range(steps + 1): - p_np, grasp_np, trans_np, rot_np, label_np = load_samples_action( - loader, batch_size, 'train') - feed_dict = {point_cloud_tf: p_np, - grasp_point_tf: grasp_np, - translation_tf: trans_np, - rotation_tf: rot_np, - actions_label_tf: label_np, - learning_rate: get_learning_rate(step, steps)} - - [_, loss_np, acc_np, weight - ] = sess.run([ - train_op, loss, acc_discr, weight_loss], - feed_dict=feed_dict) - - if step % log_step == 0: - running_log.write('discr_action_{}'.format(task_name), - 'step: {}/{}, '.format(step, steps) + - 'loss: {:.3f}, acc: {:.3f}'.format( - loss_np, acc_np * 100)) - - if step > 0 and step % save_step == 0: - saver.save(sess, - './runs/discr/discr_action_{}_{}'.format( - task_name, str(step).zfill(6))) - - if step > 0 and step % eval_step == 0: - for noise_level in [0.1, 0.2, 0.4, 0.8]: - [p_np, grasp_np, trans_np, - rot_np, label_np] = load_samples_action( - loader, batch_size, 'train') - feed_dict = {point_cloud_tf: p_np, - grasp_point_tf: grasp_np, - translation_tf: trans_np, - rotation_tf: rot_np, - actions_label_tf: label_np} - - [acc_np] = sess.run([acc_discr], feed_dict=feed_dict) - running_log.write('discr_action_{}'.format(task_name), - 'noise: {:.3f}, acc: {:.3f}'.format( - noise_level, acc_np * 100)) - - def inference_grasp(data_path, model_path, batch_size=128, diff --git a/keypoints/cvae/decoder.py b/keypoints/cvae/decoder.py index 2a1b678..e813cf7 100644 --- a/keypoints/cvae/decoder.py +++ b/keypoints/cvae/decoder.py @@ -1,10 +1,21 @@ +"""The decoders of the variational encoders (VAE).""" import tensorflow as tf from cvae.network import Network class GraspDecoder(Network): + """The grasp decoder.""" def build_model(self, x, z): + """Builds the model graph. + + Args: + x: The point cloud tensor. + z: The latent code parameters. + + Returns: + g: The generated grasps. + """ with tf.variable_scope('vae_grasp_decoder', reuse=tf.AUTO_REUSE): @@ -56,9 +67,25 @@ def build_model(self, x, z): class KeypointDecoder(Network): + """The keypoint decoder.""" def build_model(self, x, z, nv=0, truncated_normal=False): + """Builds the model graph. + + Args: + x: The point cloud tensor. + z: The latent code parameters. + nv: The number of vectors pointing from + the function point to the effect point. + truncated_normal: Whether to use the truncated + normal distribution to sample random seeds. + + Returns: + keypoints: The generated grasp point and function point. + funct_vect: The generated vectors pointing from the + function point to the effect point. + """ with tf.variable_scope('vae_keypoint_decoder', reuse=tf.AUTO_REUSE): @@ -123,47 +150,3 @@ def build_model(self, x, z, nv=0, functional_keypoints] keypoints = [tf.squeeze(k, 2) for k in keypoints] return keypoints, funct_vect - - -class ActionDecoder(Network): - - def build_model(self, x, z): - with tf.variable_scope('vae_action_decoder', - reuse=tf.AUTO_REUSE): - miu, sigma = tf.split(z, 2, axis=1) - z = miu + sigma * tf.random.truncated_normal( - tf.shape(sigma), 0.0, 1.0) - _, z_dim = z.get_shape().as_list() - z = tf.reshape(z, [-1, 1, 1, z_dim]) - - mean_x = tf.reduce_mean(x, - axis=1, keepdims=True) - x = x - mean_x - p = x - - x = self.conv_layer(x, 16, name='conv1_1') - x = self.conv_layer(x, 16, name='conv1_2') - x = self.concat_xz(x, z) - - x = self.conv_layer(x, 32, name='conv2_1') - x = self.conv_layer(x, 32, name='conv2_2') - x = self.concat_xz(x, z) - - x = self.conv_layer(x, 64, name='conv3_1') - x = self.conv_layer(x, 256, name='conv3_2') - x, p = self.down_sample(x, p, 64, 0.7, 'down_sample') - - x_a = self.conv_layer(x, 256, name='conv6_1') - x_a = tf.reduce_max(x_a, axis=1, keepdims=True) - grasp = self.conv_layer(x_a, 3, name='conv6_2', - linear=True) + mean_x - trans = self.conv_layer(x_a, 2, name='conv6_3', - linear=True) - rot = self.conv_layer(x_a, 2, name='conv6_4', - linear=True) - grasp = tf.squeeze(grasp, axis=[1, 2]) - trans = tf.squeeze(trans, axis=[1, 2]) - rot = tf.squeeze(rot, axis=[1, 2]) - - return grasp, trans, rot - diff --git a/keypoints/cvae/discriminator.py b/keypoints/cvae/discriminator.py index 5fdf2da..3fa2801 100644 --- a/keypoints/cvae/discriminator.py +++ b/keypoints/cvae/discriminator.py @@ -118,61 +118,5 @@ def build_model(self, x, ks, v=None): return p - -class ActionDiscriminator(Network): - """The action evaluation network.""" - - def build_model(self, x, ats): - """Builds the model graph. - - Args: - x: The point cloud tensor. - ats: The grasp, translation and rotation of the action. - - Returns: - p: The action evaluation score. A higher - score indicates a higher quality of the - predicted action given the point cloud. - """ - with tf.variable_scope('action_discriminator', - reuse=tf.AUTO_REUSE): - grasp, trans, rot = ats - - x_g = x - tf.reshape(grasp, [-1, 1, 1, 3]) - - vx, vy = tf.split(rot, 2, axis=1) - rz = tf.atan2(vy, vx) - zero = tf.zeros_like(rz) - pose_t = tf.concat( - [zero, zero, zero, zero, zero, rz], axis=1) - x_t = tf.expand_dims( - align(tf.squeeze(x_g, 2), pose_t), 2) - x_t = x_t + tf.reshape( - tf.concat([trans, zero], axis=1), [-1, 1, 1, 3]) - - x = tf.concat([x_g, x_t], axis=1) - - x = self.conv_layer(x, 16, name='conv1_1') - x = self.conv_layer(x, 16, name='conv1_2') - - x = self.conv_layer(x, 32, name='conv2_1') - x = self.conv_layer(x, 32, name='conv2_2') - - x = self.conv_layer(x, 64, name='conv3_1') - x = self.conv_layer(x, 64, name='conv3_2') - - x = self.conv_layer(x, 512, name='conv4_1') - - x_g, x_t = tf.split(x, 2, axis=1) - - x_g = tf.reduce_max(x_g, [1, 2], keepdims=False) - x_t = tf.reduce_max(x_t, [1, 2], keepdims=False) - - x = tf.concat([x_g, x_t], axis=1) - - x = self.fc_layer(x, 256, name='fc1') - x = self.fc_layer(x, 256, name='fc2') - p = self.fc_layer(x, 1, linear=True, name='out') - return p diff --git a/keypoints/cvae/encoder.py b/keypoints/cvae/encoder.py index d29b2fe..7f255eb 100644 --- a/keypoints/cvae/encoder.py +++ b/keypoints/cvae/encoder.py @@ -123,50 +123,6 @@ def build_model(self, x, ks, v=None): z = tf.concat([miu, sigma_sp], axis=1) return z - -class ActionEncoder(Network): - - def build_model(self, x, ats): - """Builds the vae keypoint encoder - - Args: - x: (B, N, 1, 3) Input point cloud - ats: [(B, 1, 3), (B, 1, 3), (B, 1, 3)] Actions - - Returns: - z: (B, 2*D) Mean and var of the latent - variable with dimension D - - """ - - with tf.variable_scope('vae_action_encoder', - reuse=tf.AUTO_REUSE): - - mean_x = tf.reduce_mean(x, - axis=1, keepdims=True) - x = x - mean_x - mean_r = tf.reduce_mean( - tf.linalg.norm( - x, axis=3, keepdims=True), - axis=1, keepdims=True) - x = x / (mean_r + 1e-6) - - grasp, trans, rot = ats - grasp = grasp - tf.squeeze(mean_x, [1, 2]) - a = tf.concat([grasp, trans, rot], axis=1) - - x = self.conv_layer(x, 16, name='conv1_1') - x = self.conv_layer(x, 16, name='conv1_2') - - x = self.conv_layer(x, 32, name='conv2_1') - x = self.conv_layer(x, 32, name='conv2_2') - - x = self.conv_layer(x, 32, name='conv5_1') - x = self.conv_layer(x, 256, name='conv5_2') - - x = tf.reduce_max(x, axis=[1, 2], keepdims=False) - x = tf.concat([x, a], axis=1) - x = self.fc_layer(x, 256, name='fc1') x = self.fc_layer(x, 256, name='fc2') z = self.fc_layer(x, 4, linear=True, name='out') diff --git a/keypoints/cvae/network.py b/keypoints/cvae/network.py index 1847815..7229837 100644 --- a/keypoints/cvae/network.py +++ b/keypoints/cvae/network.py @@ -2,11 +2,13 @@ class Network(object): + """The base class of neural networks.""" def __init__(self): return def batch_norm_layer(self, x, eps=0.01): + """Batch normalization.""" dimension = x.get_shape().as_list()[-1] mean, variance = tf.nn.moments(x, axes=[0, 1, 2]) beta = tf.get_variable( @@ -31,6 +33,19 @@ def batch_norm_layer(self, x, eps=0.01): def conv_layer(self, x, out_channels, kernel_size=1, dilation=1, linear=False, name=None): + """The convolution layer. + + Args: + x: The input feature map. + out_channels: The output channels. + kernel_size: The size of the convolution kernel. + dilation: The dilation rate. + linear: If True, return without batch normalization or relu. + name: The name scope of the variables. + + Returns: + x: The output feature map. + """ with tf.variable_scope(name): x = tf.layers.conv2d( x, out_channels, @@ -45,6 +60,17 @@ def conv_layer(self, x, out_channels, def fc_layer(self, x, out_size, linear=False, name=None): + """The fully connected layer. + + Args: + x: The input units. + out_size: The size of the output units. + linear: If True, relu will not be applied. + name: The name scope of the variables. + + Returns: + x: The output units. + """ with tf.variable_scope(name): x = tf.contrib.layers.flatten(x) x = tf.contrib.layers.fully_connected( @@ -55,6 +81,7 @@ def fc_layer(self, x, out_size, return x def max_pool(self, x, ksize, stride, name): + """The maxpooling layer.""" with tf.variable_scope(name): x = tf.contrib.layers.max_pool2d( x, kernel_size=ksize, @@ -62,6 +89,22 @@ def max_pool(self, x, ksize, stride, name): return x def down_sample(self, x, p, stride, thres, name): + """Down samples the point cloud feature maps. + + Args: + x: The point cloud features from the last layer. + p: The points where the feautures come from. + stride: The down sample stride. The central points + are chosen from p at this stride. + thres: For each central point, the surrounding points + within this distance threshold are considered in + the average pooling. + name: The name of this operation. + + Returns: + x: The point cloud features after down sampling. + new_p: The points where the new features come from. + """ with tf.variable_scope(name): print('Original shape {}'.format( x.get_shape().as_list())) @@ -90,6 +133,7 @@ def down_sample(self, x, p, stride, thres, name): return x, new_p def concat_xz(self, x, z): + """Concatenates the point cloud and the latent code.""" _, n, _, _ = x.get_shape().as_list() z_n = tf.tile(z, [1, n, 1, 1]) return tf.concat([x, z_n], axis=3) diff --git a/keypoints/cvae/reader.py b/keypoints/cvae/reader.py index 16f6f3a..60cce58 100644 --- a/keypoints/cvae/reader.py +++ b/keypoints/cvae/reader.py @@ -5,8 +5,19 @@ class GraspReader(object): + """Grasp data reader.""" def __init__(self, data_path, trainval_ratio=0.9): + """Initialization. + + Args: + data_path: Path to the hdf5 file. + trainval_ratio: The ratio between the training + and validation data. + + Returns: + None. + """ logging.info('Loading {}'.format(data_path)) f = h5py.File(data_path, 'r') self.pos_p = f['pos_point_cloud'] @@ -23,11 +34,20 @@ def __init__(self, data_path, trainval_ratio=0.9): return def make_suitable(self, indices): + """Removes the repeated indices.""" indices = sorted(set(list(indices))) return indices def random_rotate(self, p, g): - """Randomly rotate point cloud and grasps + """Randomly rotates point cloud and grasps. + + Args: + p: The point cloud tensor. + g: The grasp. + + Returns: + p: The rotated point cloud. + g: The rotated grasp. """ num = p.shape[0] drz = np.random.uniform( @@ -51,6 +71,17 @@ def random_rotate(self, p, g): return p, g def random_disturb(self, p, g): + """Adds random noise to the point cloud and + the grasp to generate negative examples. + + Args: + p: The point cloud tensor. + g: The grasp. + + Returns: + p: The disturbed point cloud. + g: The disturbed grasp. + """ p_mean = np.mean(p, axis=1) g_xyz, g_rx, g_ry, g_rz = \ np.split(g, [3, 4, 5], axis=1) @@ -70,6 +101,15 @@ def random_disturb(self, p, g): return p, g def sample_pos_train(self, size): + """Randomly chooses postive examples for training. + + Args: + size: The number of output examples. + + Returns: + pos_p: The point cloud. + pos_g: The positive grasps. + """ indices = np.random.randint( low=0, high=int(self.pos_p.shape[0] @@ -83,6 +123,15 @@ def sample_pos_train(self, size): return pos_p, pos_g def sample_neg_train(self, size): + """Randomly chooses negative examples for training. + + Args: + size: The number of output examples. + + Returns: + neg_p: The point cloud. + neg_g: The negative grasps. + """ indices = np.random.randint( low=0, high=int(self.neg_p.shape[0] @@ -99,6 +148,15 @@ def sample_neg_train(self, size): return neg_p, neg_g def sample_pos_val(self, size): + """Randomly chooses postive examples for validation. + + Args: + size: The number of output examples. + + Returns: + pos_p: The point cloud. + pos_g: The positive grasps. + """ indices = np.random.randint( high=self.pos_p.shape[0], low=int(self.pos_p.shape[0] @@ -112,6 +170,15 @@ def sample_pos_val(self, size): return pos_p, pos_g def sample_neg_val(self, size): + """Randomly chooses negative examples for validation. + + Args: + size: The number of output examples. + + Returns: + neg_p: The point cloud. + neg_g: The negative grasps. + """ indices = np.random.randint( high=self.neg_p.shape[0], low=int(self.neg_p.shape[0] @@ -130,11 +197,25 @@ def sample_neg_val(self, size): class KeypointReader(object): + """Keypoint data reader.""" + def __init__(self, data_path, trainval_ratio=0.8, num_keypoints=2): + """Initialization. + + Args: + data_path: Path to hdf5 file. + trainval_ratio: The ratio between the training + and validation data. + num_keypoints: 3 if the effect point is considered + else 2. + + Returns: + None. + """ logging.info('Loading {}'.format(data_path)) f = h5py.File(data_path, 'r') self.pos_p = f['pos_point_cloud'] @@ -150,12 +231,21 @@ def __init__(self, return def make_suitable(self, indices): + """Removes the repeated indices.""" indices = sorted(set(list(indices))) return indices def random_rotate(self, p, k): - """Randomly rotate point cloud and keypoints - for data augmentation + """Randomly rotates point cloud and keypoints + for data augmentation. + + Args: + p: The point cloud. + k: The keypoints. + + Returns: + p: The rotated point cloud. + k: The rotated keypoints. """ num = p.shape[0] drz = np.random.uniform( @@ -179,8 +269,16 @@ def random_rotate(self, p, k): return p, k def random_disturb(self, p, k, scale_down=0.2): - """Randomly disturb keypoints for creating - new negative examples + """Randomly disturbs keypoints for creating + new negative examples. + + Args: + p: The point cloud. + k: The keypoints. + + Returns: + p: The rotated point cloud. + k: The rotated keypoints. """ mean_p = np.mean(p, axis=1, keepdims=True) std_p = np.std(p - mean_p, axis=1, keepdims=True) @@ -190,6 +288,15 @@ def random_disturb(self, p, k, scale_down=0.2): return p, k def sample_pos_train(self, size): + """Randomly chooses postive examples for training. + + Args: + size: The number of output examples. + + Returns: + pos_p: The point cloud. + pos_g: The positive keypoints. + """ indices = np.random.randint( low=0, high=int(self.pos_p.shape[0] @@ -203,6 +310,15 @@ def sample_pos_train(self, size): return pos_p, pos_k def sample_neg_train(self, size): + """Randomly chooses negative examples for training. + + Args: + size: The number of output examples. + + Returns: + neg_p: The point cloud. + neg_g: The negative keypoints. + """ indices = np.random.randint( low=0, high=int(self.neg_p.shape[0] @@ -219,6 +335,15 @@ def sample_neg_train(self, size): return neg_p, neg_k def sample_pos_val(self, size): + """Randomly chooses postive examples for validation. + + Args: + size: The number of output examples. + + Returns: + pos_p: The point cloud. + pos_g: The positive keypoints. + """ indices = np.random.randint( high=self.pos_p.shape[0], low=int(self.pos_p.shape[0] @@ -232,6 +357,15 @@ def sample_pos_val(self, size): return pos_p, pos_k def sample_neg_val(self, size): + """Randomly chooses negative examples for validation. + + Args: + size: The number of output examples. + + Returns: + neg_p: The point cloud. + neg_g: The negative keypoints. + """ indices = np.random.randint( high=self.neg_p.shape[0], low=int(self.neg_p.shape[0] @@ -247,205 +381,3 @@ def sample_neg_val(self, size): neg_p, neg_k = self.random_disturb( neg_p, neg_k) return neg_p, neg_k - - -class ActionReader(object): - - def __init__(self, - data_path, - trainval_ratio=0.8): - logging.info('Loading {}'.format(data_path)) - f = h5py.File(data_path, 'r') - self.pos_p = f['pos_point_cloud'] - self.pos_a = f['pos_action'] - self.neg_p = f['neg_point_cloud'] - self.neg_a = f['neg_action'] - - self.trainval_ratio = trainval_ratio - print('Number positive: {}'.format( - self.pos_p.shape[0])) - return - - def make_suitable(self, indices): - indices = sorted(set(list(indices))) - return indices - - def sample_pos_train(self, size): - indices = np.random.randint( - low=0, - high=int(self.pos_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - pos_p = np.array(self.pos_p[indices], - dtype=np.float32) - pos_a = np.array(self.pos_a[indices], - dtype=np.float32) - return pos_p, pos_a - - def sample_neg_train(self, size): - indices = np.random.randint( - low=0, - high=int(self.neg_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - neg_p = np.array(self.neg_p[indices], - dtype=np.float32) - neg_a = np.array(self.neg_a[indices], - dtype=np.float32) - return neg_p, neg_a - - def sample_pos_val(self, size): - indices = np.random.randint( - high=self.pos_p.shape[0], - low=int(self.pos_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - pos_p = np.array(self.pos_p[indices], - dtype=np.float32) - pos_a = np.array(self.pos_a[indices], - dtype=np.float32) - return pos_p, pos_a - - def sample_neg_val(self, size): - indices = np.random.randint( - high=self.neg_p.shape[0], - low=int(self.neg_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - neg_p = np.array(self.neg_p[indices], - dtype=np.float32) - neg_a = np.array(self.neg_a[indices], - dtype=np.float32) - return neg_p, neg_a - - -class MultitaskReader(object): - - def __init__(self, data_path, trainval_ratio=0.8): - logging.info('Loading {}'.format(data_path)) - f = h5py.File(data_path, 'r') - - self.pos_p = f['pos_point_cloud'] - pos_act = f['pos_actions'] - self.pos_g, self.pos_k = np.split( - pos_act, [2], axis=1) - self.pos_g = np.reshape(self.pos_g, [-1, 6]) - - self.neg_p = f['neg_point_cloud'] - neg_act = f['neg_actions'] - self.neg_g, self.neg_k = np.split( - neg_act, [2], axis=1) - self.neg_g = np.reshape(self.neg_g, [-1, 6]) - - self.trainval_ratio = trainval_ratio - return - - def make_suitable(self, indices): - indices = sorted(set(list(indices))) - return indices - - def random_rotate(self, p, g, k): - """Randomly rotate point cloud and grasps - """ - num = p.shape[0] - drz = np.random.uniform( - 0, np.pi * 2, size=(num, 1)) - g_xyz, g_rx, g_ry, g_rz = \ - np.split(g, [3, 4, 5], axis=1) - zeros = np.zeros_like(drz) - ones = np.ones_like(drz) - mat_drz = np.concatenate( - [np.cos(drz), -np.sin(drz), zeros, - np.sin(drz), np.cos(drz), zeros, - zeros, zeros, ones], - axis=1) - mat_drz = np.reshape(mat_drz, [num, 3, 3]) - mat_drz_t = np.transpose(mat_drz, [0, 2, 1]) - p = np.matmul(p - g_xyz[:, np.newaxis], - mat_drz_t) + g_xyz[:, np.newaxis] - k = np.matmul(k - g_xyz[:, np.newaxis], - mat_drz_t) + g_xyz[:, np.newaxis] - g_rz = g_rz + drz - g = np.concatenate( - [g_xyz, g_rx, g_ry, g_rz], axis=1) - return p, g, k - - def random_disturb(self, p, g, k): - p_mean = np.mean(p, axis=1) - g_xyz, g_rx, g_ry, g_rz = \ - np.split(g, [3, 4, 5], axis=1) - g_xyz = g_xyz + np.random.normal( - size=np.shape(g_xyz)) * np.std( - g_xyz - p_mean, - axis=0, keepdims=True) - g_rz = g_rz + np.random.uniform( - low=0, high=np.pi * 2, - size=np.shape(g_rz)) - g = np.concatenate( - [g_xyz, g_rx, g_ry, g_rz], axis=1) - return p, g, k - - def sample_pos_train(self, size): - indices = np.random.randint( - low=0, - high=int(self.pos_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - pos_p = np.array(self.pos_p[indices, :, :], - dtype=np.float32) - pos_g = np.array(self.pos_g[indices, :], - dtype=np.float32) - pos_k = np.array(self.pos_k[indices, :], - dtype=np.float32) - return pos_p, pos_g, pos_k - - def sample_neg_train(self, size): - indices = np.random.randint( - low=0, - high=int(self.neg_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - neg_p = np.array(self.neg_p[indices], - dtype=np.float32) - neg_g = np.array(self.neg_g[indices], - dtype=np.float32) - neg_k = np.array(self.neg_k[indices], - dtype=np.float32) - return neg_p, neg_g, neg_k - - def sample_pos_val(self, size): - indices = np.random.randint( - high=self.pos_p.shape[0], - low=int(self.pos_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - pos_p = np.array(self.pos_p[indices], - dtype=np.float32) - pos_g = np.array(self.pos_g[indices], - dtype=np.float32) - pos_k = np.array(self.pos_k[indices], - dtype=np.float32) - return pos_p, pos_g, pos_k - - def sample_neg_val(self, size): - indices = np.random.randint( - high=self.neg_p.shape[0], - low=int(self.neg_p.shape[0] - * self.trainval_ratio), - size=size) - indices = self.make_suitable(indices) - neg_p = np.array(self.neg_p[indices], - dtype=np.float32) - neg_g = np.array(self.neg_g[indices], - dtype=np.float32) - neg_k = np.array(self.neg_k[indices], - dtype=np.float32) - return neg_p, neg_g, neg_k - diff --git a/keypoints/cvae/sort.py b/keypoints/cvae/sort.py index 0a4c16b..7568022 100644 --- a/keypoints/cvae/sort.py +++ b/keypoints/cvae/sort.py @@ -3,11 +3,13 @@ def sort_tf(x, is_training=tf.constant(False, dtype=tf.bool)): - """ Sort the point cloud wrt. the priciple dimension - input: - x: (B, N, 3) - return: - y: (B, N, 3) + """ Sorts the point cloud wrt. the priciple dimension. + + Args: + x: (B, N, 3) The point cloud. + + Returns: + y: (B, N, 3) The sorted point cloud. """ x_mean = tf.reduce_mean( x, axis=1, keep_dims=True) @@ -34,11 +36,13 @@ def sort_tf(x, is_training=tf.constant(False, dtype=tf.bool)): def std(v): - """ - input: - v: (B, 1, 3) - return: - o: (1, 1, 3) + """Computes the standard deviation. + + Args: + v: (B, 1, 3) The input data. + + Returns: + o: (1, 1, 3) The std wrt. the first axis. """ v = v - tf.reduce_mean(v, axis=0, keep_dims=True) o = tf.reduce_mean(v * v, axis=0, keep_dims=True) @@ -47,11 +51,13 @@ def std(v): def random_noise(v, std_ratio=0.5): - """ - input: - v: (B, 1, 3) - return: - vn: (B, 1, 3) + """Adds random noise to unit vectors. + + Args: + v: (B, 1, 3) The input unit vectors. + + Returns: + vn: (B, 1, 3) The unit vectors with noise. """ r = tf.random_normal( shape=tf.shape(v)) * std(v) * std_ratio @@ -62,6 +68,16 @@ def random_noise(v, std_ratio=0.5): def rotation_matrix(alpha, beta, gamma): + """Computes the rotation matrix. + + Args: + alpha: The rotation around x axis. + beta: The rotation around y axis. + gamma: The rotation around z axis. + + Returns: + The rotation matrix. + """ Rx = np.array([[1, 0, 0], [0, np.cos(alpha), -np.sin(alpha)], [0, np.sin(alpha), np.cos(alpha)]]) @@ -76,13 +92,15 @@ def rotation_matrix(alpha, beta, gamma): def rot_mat(rx, ry, rz): - """ Compute rotation matrics - input: - rx: (B, 1) - ry: (B, 1) - rz: (B, 1) - return: - R: (B, 3, 3) + """Computes a batch of rotation matrices. + + Args: + rx: (B, 1) The rotation around x axis. + ry: (B, 1) The rotation around y axis. + rz: (B, 1) The rotation around z axis. + + Returns: + R: (B, 3, 3) The rotation matrix. """ zeros = tf.zeros_like(rx) ones = tf.ones_like(rx) @@ -106,12 +124,14 @@ def rot_mat(rx, ry, rz): def align(x, p): - """ Align the point cloud wrt. 6-DoF grasp - input: - x: (B, N, 3) - p: (B, 6) - return: - y: (B, N, 3) + """ Aligns the point cloud wrt. 6-DoF grasp. + + Args: + x: (B, N, 3) The input point cloud. + p: (B, 6) The 6-DoF grasps. + + Returns: + y: (B, N, 3) The aligned point cloud. """ c, rx, ry, rz = tf.split(p, [3, 1, 1, 1], axis=1) R = rot_mat(-rx, -ry, -rz) diff --git a/keypoints/main.py b/keypoints/main.py index f64ff1e..dafd010 100644 --- a/keypoints/main.py +++ b/keypoints/main.py @@ -3,7 +3,6 @@ from cvae.build import train_vae_grasp, train_gcnn_grasp from cvae.build import train_vae_keypoint, train_discr_keypoint -from cvae.build import train_vae_action, train_discr_action from cvae.build import inference_grasp, inference_keypoint parser = argparse.ArgumentParser() @@ -60,14 +59,6 @@ train_discr_keypoint(data_path=args.data_path, model_path=args.model_path, task_name=args.task_name) -elif args.mode == 'vae_action': - train_vae_action(data_path=args.data_path, - model_path=args.model_path, - task_name=args.task_name) -elif args.mode == 'discr_action': - train_discr_action(data_path=args.data_path, - model_path=args.model_path, - task_name=args.task_name) elif args.mode == 'inference_keypoint': inference_keypoint( data_path=args.data_path, diff --git a/keypoints/merge.py b/keypoints/merge.py index 8ffa79e..1fedc04 100644 --- a/keypoints/merge.py +++ b/keypoints/merge.py @@ -4,7 +4,6 @@ from cvae.build import build_grasp_inference_graph from cvae.build import build_keypoint_inference_graph -from cvae.build import build_action_inference_graph parser = argparse.ArgumentParser() @@ -19,8 +18,6 @@ type=str) parser.add_argument('--keypoint', type=str) -parser.add_argument('--action', - type=str) parser.add_argument('--num_funct_vect', type=str, default='1') @@ -38,12 +35,6 @@ build_grasp_inference_graph() build_keypoint_inference_graph( num_funct_vect=int(args.num_funct_vect)) -elif args.model == 'action': - build_action_inference_graph() -elif args.model == 'grasp_action': - build_grasp_inference_graph() - build_action_inference_graph() - else: raise ValueError(args.model) @@ -51,7 +42,7 @@ vars = tf.global_variables() - if args.model in ['grasp', 'keypoint', 'action']: + if args.model in ['grasp', 'keypoint']: # Merges the generation network (VAE) and # the evaluation network (binary classifier). vars_vae = [var for var in vars if 'vae' in var.name] @@ -78,20 +69,5 @@ saver_keypoint.restore(sess, args.keypoint) saver.save(sess, args.output) - elif args.model == 'grasp_action': - # Merges the grasp prediction network and the action network - # This is only for the End-to-End baseline where we directly - # predict the actions from the visual observation. - vars_grasp = [var for var in vars if 'grasp' in var.name] - vars_action = [var for var in vars if 'action' in var.name] - - saver = tf.train.Saver(var_list=vars) - saver_grasp = tf.train.Saver(var_list=vars_grasp) - saver_action = tf.train.Saver(var_list=vars_action) - - saver_grasp.restore(sess, args.grasp) - saver_action.restore(sess, args.action) - saver.save(sess, args.output) - else: raise ValueError diff --git a/keypoints/utils/grasp/make_inputs_multiproc.py b/keypoints/utils/grasp/make_inputs_multiproc.py index 779713d..c7ceca3 100644 --- a/keypoints/utils/grasp/make_inputs_multiproc.py +++ b/keypoints/utils/grasp/make_inputs_multiproc.py @@ -35,12 +35,15 @@ for x in range(0, len(data_list), batch_size)] class Logger(object): + """Logging utils.""" def __init__(self, output='./output.log'): + """Initialization.""" self.output = output return def write(self, message): + """Writes the message to the log file.""" with open(self.output, 'a') as f: f.write(message + '\r\n') return @@ -55,7 +58,23 @@ def save_data(pos_point_cloud, neg_grasp, save_path, scale=20): - + """Saves the data to hdf5 file. + + Args: + pos_point_cloud: The point cloud associated + associated with the positive grasps. + neg_point_cloud: The point cloud associated + associated with the negative grasps. + pos_grasp: The positive grasps. + neg_grasp: The negative grasps. + save_path: The hdf5 file name. + scale: The constant to be multiplied with the + point cloud and grasp coordinates to fit + the input scale of the network. + + Returns: + None. + """ scale_grasp = np.reshape([scale, scale, scale, 1, 1, 1], (1, 6)) pos_point_cloud = np.concatenate(pos_point_cloud, axis=0) * scale @@ -111,6 +130,16 @@ def save_data(pos_point_cloud, def append_data(data_list, lock, save_path='./data.hdf5'): + """Appends data to a hdf5 file. + + Args: + data_list: The list of data to be saved. + lock: The lock to avoid io conflict. + save_path: The path to hdf5 file. + + Returns: + None. + """ pos_point_cloud = [] neg_point_cloud = [] pos_grasp = [] @@ -128,8 +157,6 @@ def append_data(data_list, lock, save_path='./data.hdf5'): grasp_4dof = np.load(f) grasp = grasp_4dof - - lock.acquire() if grasp[0] > 0: pos_point_cloud.append( point_cloud[np.newaxis]) @@ -141,7 +168,6 @@ def append_data(data_list, lock, save_path='./data.hdf5'): neg_grasp.append(grasp[np.newaxis, 1:]) else: pass - lock.release() lock.acquire() save_data(pos_point_cloud, diff --git a/keypoints/utils/keypoint/make_inputs_multiproc.py b/keypoints/utils/keypoint/make_inputs_multiproc.py index 3623cc9..30dbc51 100644 --- a/keypoints/utils/keypoint/make_inputs_multiproc.py +++ b/keypoints/utils/keypoint/make_inputs_multiproc.py @@ -42,12 +42,14 @@ class Logger(object): - + """Logging utils.""" def __init__(self, output='./output.log'): + """Initialization.""" self.output = output return def write(self, message): + """Writes the message to the log file.""" with open(self.output, 'a') as f: f.write(message + '\r\n') return @@ -63,7 +65,23 @@ def save_data(pos_point_cloud, save_path, lock, scale=20): - + """Saves the data to hdf5 file. + + Args: + pos_point_cloud: The point cloud associated + associated with the positive keypoints. + neg_point_cloud: The point cloud associated + associated with the negative keypoints. + pos_grasp: The positive keypoints. + neg_grasp: The negative keypoints. + save_path: The hdf5 file name. + scale: The constant to be multiplied with the + point cloud and grasp coordinates to fit + the input scale of the network. + + Returns: + None. + """ pos_point_cloud = np.concatenate(pos_point_cloud, axis=0) * scale neg_point_cloud = np.concatenate(neg_point_cloud, axis=0) * scale @@ -127,6 +145,16 @@ def save_data(pos_point_cloud, lock.release() def append_data(data_list, lock, save_path): + """Appends data to a hdf5 file. + + Args: + data_list: The list of data to be saved. + lock: The lock to avoid io conflict. + save_path: The path to hdf5 file. + + Returns: + None. + """ pos_point_cloud = [] neg_point_cloud = [] pos_keypoints = [] diff --git a/keypoints/utils/visualize/visualize.py b/keypoints/utils/visualize/visualize.py index b5a17d0..b099e99 100644 --- a/keypoints/utils/visualize/visualize.py +++ b/keypoints/utils/visualize/visualize.py @@ -1,3 +1,4 @@ +"""Visualizes the point cloud and keypoints stored in npy files.""" import os import argparse import numpy as np diff --git a/setup.py b/setup.py deleted file mode 100644 index 03a77e8..0000000 --- a/setup.py +++ /dev/null @@ -1,18 +0,0 @@ -import os - -# Install the dependencies -os.system('pip install --upgrade numpy scipy h5py pyyaml future opencv-python matplotlib easydict gym sklearn python-pcl cvxpy') - -os.system('pip install pillow --no-cache-dir') - -# Install Tensorflow -os.system('pip install tf-nightly-gpu==1.13.0.dev20190117') - -os.system('pip install tf-agents==0.2.0rc2') - -os.system('pip install tensorflow-probability==0.5.0') - -os.system('pip install tf-estimator-nightly==1.13.0.dev2019010910') - -# Install Pybullet -os.system('pip install -e git+https://github.com/bulletphysics/bullet3@6a74f63604ceecd1db5c71036ffb0dbf17294579#egg=pybullet')