diff --git a/README.md b/README.md new file mode 100644 index 0000000..f8e5a64 --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +# Horticultural Temporal Fruit Monitoring via 3D Instance Segmentation and Re-Identification using Point Clouds + +``` +python3.8 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +### Install MinkowskiEngine + +If you don't have it, it's better to install ninja to compile MinkowskiEngine +``` +pip install ninja +``` + +``` +git clone https://github.com/NVIDIA/MinkowskiEngine.git +cd MinkowskiEngine +python setup.py install +``` + +## Instance Segmentation +``` +export PYTHONPATH=$(realpath instance_segmentation) +cd instance_segmentation/mink_pan +python scripts/run_full_row.py --modelpath {PATH_TO_MODEL} +``` + +## Re-Identification +``` +cd re_identification +python3 associate.py --data {PATH_TO_DATA_FOLDER} --iou 0.05 +python3 extractor.py --data {PATH_TO_DATA_FOLDER} --iou 0.05 +python3 train.py --data {PATH_TO_DATA_FOLDER} --iou 0.05 --mode testinst +``` + + +## Visualize Results +``` +visualize entire test set in 2D +python3 visualize_clouds.py --data {PATH_TO_DATA_FOLDER} --iou 0.05 + +visualize entire test set in 2D +python3 visualize_clouds3D.py --data {PATH_TO_DATA_FOLDER} --iou 0.05 + +``` \ No newline at end of file diff --git a/instance_segmentation/mink_pan/config/backbone.yaml b/instance_segmentation/mink_pan/config/backbone.yaml new file mode 100644 index 0000000..1c765fa --- /dev/null +++ b/instance_segmentation/mink_pan/config/backbone.yaml @@ -0,0 +1,6 @@ +BACKBONE: + DIMENSION: 3 + INPUT_DIM: 6 # coords + rgb + CR: 1 # channel ratio, change feature dimensions + CHANNELS: [32, 32, 64, 128, 256, 256, 128, 96, 96] + RESOLUTION: 0.0005 diff --git a/instance_segmentation/mink_pan/config/model.yaml b/instance_segmentation/mink_pan/config/model.yaml new file mode 100644 index 0000000..94405b5 --- /dev/null +++ b/instance_segmentation/mink_pan/config/model.yaml @@ -0,0 +1,62 @@ +EXPERIMENT: + ID: mink_panoptic + +TRAIN: + LR: 0.02 + MAX_EPOCH: 100 + BATCH_SIZE: 2 + BATCH_ACC: 4 + NUM_WORKERS: 6 + N_GPUS: -1 + FREEZE_MODULES: [] + #FREEZE_MODULES: ['BACKBONE','SEM_HEAD','INS_HEAD'] + AUG: True + SUBSAMPLE: True + +MODEL: + DATASET: STRAWBERRIES + +POST: + ALG: MEANSHIFT #HDBSCAN, MEANSHIFT + BANDWIDTH: 0.01 #Meanshift + MIN_CLUSTER: 5 #hdbscan #10 for nuscenes + +LOSS: + SEM: + WEIGHTS: [2,6] #Ce, Lovasz + +STRAWBERRIES: + PATH: /home/fusy/Documents/strawberries/DeployStrawberries/data #data/kitti + CONFIG: datasets/strawberries.yaml + NUM_CLASSES: 2 #Get from yaml + IGNORE_LABEL: -1 + SPACE: # coord limits + [[-48.0,48.0],[-48.0,48.0],[-4.0,1.5]] + MIN_VOLUME_SPACE: [3,'-pi',-3] + MAX_VOLUME_SPACE: [50,'pi',1.5] + MINI: False + SUB_NUM_POINTS: 400000 + +KITTI: + PATH: data/kitti + CONFIG: datasets/semantic-kitti.yaml + NUM_CLASSES: 20 #Get from yaml + IGNORE_LABEL: 0 + SPACE: # coord limits + [[-48.0,48.0],[-48.0,48.0],[-4.0,1.5]] + MIN_VOLUME_SPACE: [3,'-pi',-3] + MAX_VOLUME_SPACE: [50,'pi',1.5] + MINI: False + SUB_NUM_POINTS: 80000 + +NUSCENES: + PATH: data/nuscenes + CONFIG: datasets/semantic-nuscenes.yaml + NUM_CLASSES: 17 #Get from yaml + IGNORE_LABEL: 0 + SPACE: # coord limits + [[-50.0,50.0],[-50.0,50.0],[-5.0,3]] + MIN_VOLUME_SPACE: [0,'-pi',-5] + MAX_VOLUME_SPACE: [50,'pi',3] + MINI: False + SUB_NUM_POINTS: 50000 diff --git a/instance_segmentation/mink_pan/datasets/daniel_dataset.py b/instance_segmentation/mink_pan/datasets/daniel_dataset.py new file mode 100644 index 0000000..69fd5ca --- /dev/null +++ b/instance_segmentation/mink_pan/datasets/daniel_dataset.py @@ -0,0 +1,564 @@ +import os + +import numpy as np +import yaml +from pytorch_lightning import LightningDataModule +from torch.utils.data import DataLoader, Dataset + +import open3d as o3d +import random +with open("/home/penguin2/Documents/Strawberries/comprehensivemodel/data/transformations.yaml") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + +gt_08 = np.asarray(transformations["gt_08"]) +gt_14 = np.asarray(transformations["gt_14"]) +gt_21 = np.asarray(transformations["gt_21"]) + +print(os.getcwd()) + + +class SemanticDatasetModule(LightningDataModule): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + self.things_ids = [] + self.color_map = [] + self.label_names = [] + self.dataset = cfg.MODEL.DATASET + self.mini = cfg[cfg.MODEL.DATASET].MINI + self.min_volume_space = cfg[cfg.MODEL.DATASET].MIN_VOLUME_SPACE + self.max_volume_space = cfg[cfg.MODEL.DATASET].MAX_VOLUME_SPACE + + def prepare_data(self): + pass + + def setup(self, stage=None): + if "ONLY_SEQ" in self.cfg.TRAIN.keys(): + only_seq = self.cfg.TRAIN.ONLY_SEQ + else: + only_seq = None + val_split = "valid" + + if self.mini: + val_split = "mini_" + val_split + train_split = "mini_train" + else: + train_split = "train" + + train_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH + "/sequences/", + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split=train_split, + seq=only_seq, + dataset=self.dataset, + ) + self.train_pan_set = PanopticDataset( + dataset=train_set, + split="train", + space=self.cfg[self.cfg.MODEL.DATASET].SPACE, + num_pts=self.cfg[self.cfg.MODEL.DATASET].SUB_NUM_POINTS, + subsample=self.cfg.TRAIN.SUBSAMPLE, + aug=self.cfg.TRAIN.AUG, + ) + + val_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH + "/sequences/", + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split=val_split, + seq=only_seq, + dataset=self.dataset, + ) + self.val_pan_set = PanopticDataset( + dataset=val_set, split="valid", space=self.cfg[self.cfg.MODEL.DATASET].SPACE + ) + + test_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH + "/sequences/", + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split="test", + seq=only_seq, + dataset=self.dataset, + ) + self.test_pan_set = PanopticDataset( + dataset=test_set, split="test", space=self.cfg[self.cfg.MODEL.DATASET].SPACE + ) + + print("created test") + + self.things_ids = train_set.things_ids + self.color_map = train_set.color_map + self.label_names = train_set.label_names + + def train_dataloader(self): + dataset = self.train_pan_set + collate_fn = BatchCollation() + self.train_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=True, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.train_iter = iter(self.train_loader) + return self.train_loader + + def val_dataloader(self): + dataset = self.val_pan_set + collate_fn = BatchCollation() + self.valid_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=False, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.valid_iter = iter(self.valid_loader) + return self.valid_loader + + def test_dataloader(self): + dataset = self.test_pan_set + collate_fn = BatchCollation() + self.test_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=False, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.test_iter = iter(self.test_loader) + return self.test_loader + + +class ExtremeInfo: + def __init__(self, pcd, extension): + print("generating extremaaaaaaaaaaaaaa ", end="") + self.extension = extension + points = np.array(pcd.points) + colors = np.array(pcd.colors) + self.minpivot = points[:, 0].min() + self.extension[0]/2 + self.maxpivot = points[:, 0].max() - self.extension[0]/2 + self.pivotrange = self.maxpivot-self.minpivot + print(f"{self.minpivot:8.3f} {self.maxpivot:8.3f} {self.pivotrange:8.3f} done.") + + def get(self): + random_extension = random.random() * (self.extension[1]/2 - self.extension[0]/2) + self.extension[0]/2 + pivot = random.random() * self.pivotrange + self.minpivot + min_x = pivot-random_extension + max_x = pivot+random_extension + return min_x, max_x + + +class SemanticDataset(Dataset): + + def __init__(self, data_path, cfg_path, split="train", seq=None, dataset="KITTI"): + yaml_path = cfg_path + with open(yaml_path, "r") as stream: + semyaml = yaml.safe_load(stream) + + self.things = get_things(dataset) + self.stuff = get_stuff(dataset) + + self.label_names = {**self.things, **self.stuff} + self.things_ids = get_things_ids(dataset) + + self.color_map = semyaml["color_map_learning"] + self.labels = semyaml["labels"] + self.learning_map = semyaml["learning_map"] + self.inv_learning_map = semyaml["learning_map_inv"] + self.split = split + split = semyaml["split"][self.split] + + if seq: + split = [seq] + + self.test = True + + if self.test: + self.paths = [ + "/home/penguin2/Documents/Strawberries/data/reduced_14_21_2.ply" + ] + self.Ts = [gt_21] + self.pcds = self.read_pcd() + self.extension = [0.20, 0.3] + self.labelpaths = [ + "/home/penguin2/Documents/Strawberries/data/reduced_14_21_2.npy" + ] + else: + self.paths = [ + "/home/penguin2/Documents/Strawberries/data/reduced_08_14_1.ply", + "/home/penguin2/Documents/Strawberries/data/reduced_08_14_2.ply", + ] + self.Ts = [gt_08, gt_14] + self.pcds = self.read_pcd() + self.extension = [0.20, 0.3] + self.labelpaths = [ + "/home/penguin2/Documents/Strawberries/data/reduced_08_14_1.npy", + "/home/penguin2/Documents/Strawberries/data/reduced_08_14_2.npy", + ] + + print("SPLIT", self.split, self.paths) + + self.labels = [np.fromfile(self.labelpaths[i], dtype=np.int32) for i in range(len(self.labelpaths))] + + for idx, (pc, lab) in enumerate(zip(self.pcds, self.labels)): + print("cloud", idx, "has", np.array(pc.points).shape, "labels", lab.shape, "unique ids:", np.unique(lab).shape) + + self.infos = [ExtremeInfo(self.pcds[i], self.extension) for i in range(len(self.pcds))] + + + def read_pcd(self): + print('reading pcds') + return [self.read(i) for i in range(len(self.paths))] + + def read(self, idx): + #print("reading pcd ", idx, end="... ") + print("SPLIT", self.split, self.paths) + + pcd = o3d.io.read_point_cloud(self.paths[idx]) + pcd.transform(self.Ts[idx]) + print("done!") + return pcd + + + def __len__(self): + return 1000 if self.split=="train" else 1 + #return len(self.im_idx) + + def __getitem__(self, index): + + while True: + pcd_idx = random.randint(0, len(self.paths)-1) + #print("chosen pcd idx", pcd_idx) + + points = np.array(self.pcds[pcd_idx].points) + colors = np.array(self.pcds[pcd_idx].colors) + #min_x, max_x = 28.5, 28.7 + min_x, max_x = self.infos[pcd_idx].get() + + mask = np.logical_and(points[:, 0]>=min_x, points[:, 0]<=max_x) + + ins_labels = self.labels[pcd_idx][mask] + + if np.unique(ins_labels).shape[0]>1: + break + + + points = points[mask] + colors = colors[mask] + + mid_x = points[:, 0].min() + (points[:, 0].max() - points[:, 0].min())/2.0 + mid_y = points[:, 1].min() + (points[:, 1].max() - points[:, 1].min())/2.0 + mid_z = points[:, 2].min() + (points[:, 2].max() - points[:, 2].min())/2.0 + + points[:, 0] -= mid_x + points[:, 1] -= mid_y + points[:, 2] -= mid_z + + + #feats = np.hstack([points, colors]) + sem_labels = np.array(ins_labels, copy=True) + sem_labels[sem_labels>0] = 1 + + return (points, sem_labels.astype(np.int64), ins_labels.astype(np.int64), colors, None, None, None)#fname, pose, token) + + + +class PanopticDataset(Dataset): + def __init__(self, dataset, split, space, num_pts=0, subsample=False, aug=False): + self.dataset = dataset + self.num_points = num_pts + self.split = split + self.aug = aug + self.subsample = subsample + self.th_ids = dataset.things_ids + self.xlim = space[0] + self.ylim = space[1] + self.zlim = space[2] + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + data = self.dataset[index] + xyz, sem_labels, ins_labels, intensity, fname, pose, token = data + foreground = np.isin(sem_labels, self.th_ids).reshape(-1) + keep = np.argwhere( + (self.xlim[0] < xyz[:, 0]) + & (xyz[:, 0] < self.xlim[1]) + & (self.ylim[0] < xyz[:, 1]) + & (xyz[:, 1] < self.ylim[1]) + & (self.zlim[0] < xyz[:, 2]) + & (xyz[:, 2] < self.zlim[1]) + )[:, 0] + xyz = xyz[keep] + sem_labels = sem_labels[keep] + ins_labels = ins_labels[keep] + intensity = intensity[keep] + foreground = foreground[keep] + + feats = np.concatenate((xyz, intensity), axis=1) + + # Subsample + if self.split == "train" and self.subsample and len(xyz) > self.num_points: + idx = np.random.choice(np.arange(len(xyz)), self.num_points, replace=False) + xyz = xyz[idx] + sem_labels = sem_labels[idx] + ins_labels = ins_labels[idx] + feats = feats[idx] + intensity = intensity[idx] + foreground = foreground[idx] + + if self.split == "train" and self.aug: + xyz = pcd_augmentations(xyz) + + offset = get_offsets(xyz, ins_labels, sem_labels, self.th_ids) + return ( + xyz, + feats, + sem_labels, + ins_labels, + offset, + foreground, + fname, + pose, + token, + ) + + +class BatchCollation: + def __init__(self): + self.keys = [ + "pt_coord", + "feats", + "sem_label", + "ins_label", + "offset", + "foreground", + "fname", + "pose", + "token", + ] + + def __call__(self, data): + return {self.keys[i]: list(x) for i, x in enumerate(zip(*data))} + + +def pcd_augmentations(xyz): + # rotation + rotate_rad = np.deg2rad(np.random.random() * 360) + c, s = np.cos(rotate_rad), np.sin(rotate_rad) + j = np.matrix([[c, s], [-s, c]]) + xyz[:, :2] = np.dot(xyz[:, :2], j) + + # flip + flip_type = np.random.choice(4, 1) + if flip_type == 1: + xyz[:, 0] = -xyz[:, 0] + elif flip_type == 2: + xyz[:, 1] = -xyz[:, 1] + elif flip_type == 3: + xyz[:, 0] = -xyz[:, 0] + xyz[:, 1] = -xyz[:, 1] + + # scale + noise_scale = np.random.uniform(0.95, 1.05) + xyz[:, 0] = noise_scale * xyz[:, 0] + xyz[:, 1] = noise_scale * xyz[:, 1] + + # transform + trans_std = [0.1, 0.1, 0.1] + noise_translate = np.array( + [ + np.random.normal(0, trans_std[0], 1), + np.random.normal(0, trans_std[1], 1), + np.random.normal(0, trans_std[2], 1), + ] + ).T + xyz[:, 0:3] += noise_translate + + return xyz + + +def absoluteFilePaths(directory): + for dirpath, _, filenames in os.walk(directory): + for f in filenames: + yield os.path.abspath(os.path.join(dirpath, f)) + + +def absoluteDirPath(directory): + return os.path.abspath(directory) + + +def parse_calibration(filename): + calib = {} + calib_file = open(filename) + for line in calib_file: + key, content = line.strip().split(":") + values = [float(v) for v in content.strip().split()] + pose = np.zeros((4, 4)) + pose[0, 0:4] = values[0:4] + pose[1, 0:4] = values[4:8] + pose[2, 0:4] = values[8:12] + pose[3, 3] = 1.0 + calib[key] = pose + calib_file.close() + return calib + + +def parse_poses(filename, calibration): + file = open(filename) + poses = [] + Tr = calibration["Tr"] + Tr_inv = np.linalg.inv(Tr) + for line in file: + values = [float(v) for v in line.strip().split()] + pose = np.zeros((4, 4)) + pose[0, 0:4] = values[0:4] + pose[1, 0:4] = values[4:8] + pose[2, 0:4] = values[8:12] + pose[3, 3] = 1.0 + poses.append(np.matmul(Tr_inv, np.matmul(pose, Tr))) + return poses + + +def load_poses(pose_files, calib_files): + poses = [] + # go through every file and get all poses + # add them to match im_idx + for i in range(len(pose_files)): + calib = parse_calibration(calib_files[i]) + seq_poses_f64 = parse_poses(pose_files[i], calib) + seq_poses = [pose.astype(np.float32) for pose in seq_poses_f64] + poses += seq_poses + return poses + + +def load_tokens(token_files): + if len(token_files) == 0: + return [] + token_files.sort() + tokens = [] + # go through every file and get all tokens + for f in token_files: + token_file = open(f) + for line in token_file: + token = line.strip() + tokens.append(token) + token_file.close() + return tokens + + +def getDir(obj): + return os.path.dirname(os.path.abspath(obj)) + + +def calc_xyz_middle(xyz): + return np.array( + [ + (np.max(xyz[:, 0]) + np.min(xyz[:, 0])) / 2.0, + (np.max(xyz[:, 1]) + np.min(xyz[:, 1])) / 2.0, + (np.max(xyz[:, 2]) + np.min(xyz[:, 2])) / 2.0, + ], + dtype=np.float32, + ) + + +def get_offsets(xyz, ins_labels, sem_labels, th_ids): + offsets = np.zeros([xyz.shape[0], 3], dtype=np.float32) + things_ids, th_idx = np.unique(ins_labels, return_index=True) + keep_th = np.array( + [i for i, idx in enumerate(th_idx) if sem_labels[idx] in th_ids], dtype=int + ) + # remove instances with wrong sem class + things_ids = things_ids[keep_th] + th_idx = th_idx[keep_th] + for ti in things_ids: + idx = ins_labels == ti + xyz_i = xyz[idx] + if xyz_i.shape[0] <= 0: + continue + center = calc_xyz_middle(xyz_i) + offsets[idx] = center - xyz_i + return offsets + + +def get_things(dataset): + if dataset == "KITTI": + things = { + 1: "car", + 2: "bicycle", + 3: "motorcycle", + 4: "truck", + 5: "other-vehicle", + 6: "person", + 7: "bicyclist", + 8: "motorcyclist", + } + elif dataset == "NUSCENES": + things = { + 2: "bycicle", + 3: "bus", + 4: "car", + 5: "construction-vehicle", + 6: "motorcycle", + 7: "pedestrian", + 9: "trailer", + 10: "truck", + } + elif dataset == "STRAWBERRIES": + things = { + 1: "strawberry", + } + return things + + +def get_stuff(dataset): + if dataset == "KITTI": + stuff = { + 9: "road", + 10: "parking", + 11: "sidewalk", + 12: "other-ground", + 13: "building", + 14: "fence", + 15: "vegetation", + 16: "trunk", + 17: "terrain", + 18: "pole", + 19: "traffic-sign", + } + elif dataset == "NUSCENES": + stuff = { + 1: "barrier", + 8: "traffic_cone", + 11: "driveable_surface", + 12: "other_flat", + 13: "sidewalk", + 14: "terrain", + 15: "manmade", + 16: "vegetation", + } + elif dataset == "STRAWBERRIES": + stuff = {} + return stuff + + +def get_things_ids(dataset): + if dataset == "KITTI": + return [1, 2, 3, 4, 5, 6, 7, 8] + elif dataset == "NUSCENES": + return [2, 3, 4, 5, 6, 7, 9, 10] + elif dataset == "STRAWBERRIES": + return [1] diff --git a/instance_segmentation/mink_pan/datasets/deploy_dataset.py b/instance_segmentation/mink_pan/datasets/deploy_dataset.py new file mode 100644 index 0000000..b09eeca --- /dev/null +++ b/instance_segmentation/mink_pan/datasets/deploy_dataset.py @@ -0,0 +1,575 @@ +import os + +import numpy as np +import yaml +from pytorch_lightning import LightningDataModule +from torch.utils.data import DataLoader, Dataset + +import open3d as o3d +import random + + + +class SemanticDatasetModule(LightningDataModule): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + self.things_ids = [] + self.color_map = [] + self.label_names = [] + self.dataset = cfg.MODEL.DATASET + self.mini = cfg[cfg.MODEL.DATASET].MINI + self.min_volume_space = cfg[cfg.MODEL.DATASET].MIN_VOLUME_SPACE + self.max_volume_space = cfg[cfg.MODEL.DATASET].MAX_VOLUME_SPACE + + def prepare_data(self): + pass + + def setup(self, stage=None): + if "ONLY_SEQ" in self.cfg.TRAIN.keys(): + only_seq = self.cfg.TRAIN.ONLY_SEQ + else: + only_seq = None + val_split = "valid" + + if self.mini: + val_split = "mini_" + val_split + train_split = "mini_train" + else: + train_split = "train" + + train_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH, + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split=train_split, + seq=only_seq, + dataset=self.dataset, + ) + self.train_pan_set = PanopticDataset( + dataset=train_set, + split="train", + space=self.cfg[self.cfg.MODEL.DATASET].SPACE, + num_pts=self.cfg[self.cfg.MODEL.DATASET].SUB_NUM_POINTS, + subsample=self.cfg.TRAIN.SUBSAMPLE, + aug=self.cfg.TRAIN.AUG, + ) + + val_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH, + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split=val_split, + seq=only_seq, + dataset=self.dataset, + ) + self.val_pan_set = PanopticDataset( + dataset=val_set, split="valid", space=self.cfg[self.cfg.MODEL.DATASET].SPACE + ) + + test_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH, + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split="test", + seq=only_seq, + dataset=self.dataset, + ) + self.test_pan_set = PanopticDataset( + dataset=test_set, split="test", space=self.cfg[self.cfg.MODEL.DATASET].SPACE + ) + + print("created test") + + self.things_ids = train_set.things_ids + self.color_map = train_set.color_map + self.label_names = train_set.label_names + + def train_dataloader(self): + dataset = self.train_pan_set + collate_fn = BatchCollation() + self.train_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=True, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.train_iter = iter(self.train_loader) + return self.train_loader + + def val_dataloader(self): + dataset = self.val_pan_set + collate_fn = BatchCollation() + self.valid_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=False, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.valid_iter = iter(self.valid_loader) + return self.valid_loader + + def test_dataloader(self): + dataset = self.test_pan_set + collate_fn = BatchCollation() + self.test_loader = DataLoader( + dataset=dataset, + batch_size=1,#self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=False, + num_workers=0,#self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + + self.test_iter = iter(self.test_loader) + return self.test_loader + + +class ExtremeInfo: + def __init__(self, pcd, extension): + print("generating extrema ", end="") + self.extension = extension + points = np.array(pcd.points) + colors = np.array(pcd.colors) + + self.min_x = points[:, 0].min() + self.max_x = points[:, 0].max() + + self.minpivot = self.min_x + self.extension[0]/2 + self.maxpivot = self.max_x - self.extension[0]/2 + self.pivotrange = self.maxpivot-self.minpivot + print(f"{self.minpivot:8.3f} {self.maxpivot:8.3f} {self.pivotrange:8.3f} done.") + + def get(self): + random_extension = random.random() * (self.extension[1]/2 - self.extension[0]/2) + self.extension[0]/2 + pivot = random.random() * self.pivotrange + self.minpivot + min_x = pivot-random_extension + max_x = pivot+random_extension + return min_x, max_x + + +class SemanticDataset(Dataset): + + def __init__(self, data_path, cfg_path, split="train", seq=None, dataset="KITTI"): + yaml_path = cfg_path + with open(yaml_path, "r") as stream: + semyaml = yaml.safe_load(stream) + + self.things = get_things(dataset) + self.stuff = get_stuff(dataset) + + self.label_names = {**self.things, **self.stuff} + self.things_ids = get_things_ids(dataset) + + self.color_map = semyaml["color_map_learning"] + self.labels = semyaml["labels"] + self.learning_map = semyaml["learning_map"] + self.inv_learning_map = semyaml["learning_map_inv"] + self.split = split + split = semyaml["split"][self.split] + + with open(f"{data_path}/transformations.yaml") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + + gt_08 = np.asarray(transformations["gt_08"]) + gt_14 = np.asarray(transformations["gt_14"]) + gt_21 = np.asarray(transformations["gt_21"]) + + if seq: + split = [seq] + + self.test = True + + if self.test: + self.paths = [ + f"{data_path}/reduced_14_21_2.ply" + ] + self.Ts = [gt_21] + self.pcds = self.read_pcd() + self.extension = [0.20, 0.3] + self.labelpaths = [ + f"{data_path}/reduced_14_21_2.npy" + ] + else: + self.paths = [ + f"{data_path}/reduced_08_14_1.ply", + f"{data_path}/reduced_08_14_2.ply", + ] + self.Ts = [gt_08, gt_14] + self.pcds = self.read_pcd() + self.extension = [0.20, 0.3] + self.labelpaths = [ + f"{data_path}/reduced_08_14_1.npy", + f"{data_path}/reduced_08_14_2.npy", + ] + + print("SPLIT", self.split, self.paths) + + self.labels = [np.fromfile(self.labelpaths[i], dtype=np.int32) for i in range(len(self.labelpaths))] + + for idx, (pc, lab) in enumerate(zip(self.pcds, self.labels)): + print("cloud", idx, "has", np.array(pc.points).shape, "labels", lab.shape, "unique ids:", np.unique(lab).shape) + + self.infos = [ExtremeInfo(self.pcds[i], self.extension) for i in range(len(self.pcds))] + + #self.fullpivots = [info.generate_full_pivots() for info in self.infos] + + + self.ext = 0.3 + + def read_pcd(self): + print('reading pcds') + return [self.read(i) for i in range(len(self.paths))] + + def read(self, idx): + print("reading pcd ", idx, end="... ") + pcd = o3d.io.read_point_cloud(self.paths[idx]) + pcd.transform(self.Ts[idx]) + print("done!") + return pcd + + + def __len__(self): + return 1#len(self.fullpivots[0]) + + def __getitem__(self, min_x, max_x): + + pcd_idx = 0 + + points = np.array(self.pcds[pcd_idx].points) + colors = np.array(self.pcds[pcd_idx].colors) + + #min_x, max_x = 28.5, 28.7 + #min_x, max_x = self.infos[pcd_idx].get() + #pivot = self.fullpivots[pcd_idx][index] + #min_x, max_x = pivot-self.extension[0]/2, pivot+self.extension[0]/2 + + print(f"CUTTING from {min_x:8.3f} to {max_x:8.3f}") + + mask = np.logical_and(points[:, 0]>=min_x, points[:, 0]<=max_x) + + ins_labels = self.labels[pcd_idx][mask] + + points = points[mask] + colors = colors[mask] + + mid_x = points[:, 0].min() + (points[:, 0].max() - points[:, 0].min())/2.0 + mid_y = points[:, 1].min() + (points[:, 1].max() - points[:, 1].min())/2.0 + mid_z = points[:, 2].min() + (points[:, 2].max() - points[:, 2].min())/2.0 + + points[:, 0] -= mid_x + points[:, 1] -= mid_y + points[:, 2] -= mid_z + + #feats = np.hstack([points, colors]) + sem_labels = np.array(ins_labels, copy=True) + sem_labels[sem_labels>0] = 1 + + return (points, sem_labels.astype(np.int64), ins_labels.astype(np.int64), colors, mask, mid_x, None)#fname, pose, token) + + + +class PanopticDataset(Dataset): + def __init__(self, dataset, split, space, num_pts=0, subsample=False, aug=False): + self.dataset = dataset + self.num_points = num_pts + self.split = split + self.aug = aug + self.subsample = subsample + self.th_ids = dataset.things_ids + self.xlim = space[0] + self.ylim = space[1] + self.zlim = space[2] + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, min_x, max_x): + #data = self.dataset[index] + data = self.dataset.__getitem__(min_x, max_x) + xyz, sem_labels, ins_labels, intensity, totmask, mid_x, token = data + foreground = np.isin(sem_labels, self.th_ids).reshape(-1) + + #import ipdb; ipdb.set_trace() + + keep = np.argwhere( + (self.xlim[0] < xyz[:, 0]) + & (xyz[:, 0] < self.xlim[1]) + & (self.ylim[0] < xyz[:, 1]) + & (xyz[:, 1] < self.ylim[1]) + & (self.zlim[0] < xyz[:, 2]) + & (xyz[:, 2] < self.zlim[1]) + )[:, 0] + xyz = xyz[keep] + sem_labels = sem_labels[keep] + ins_labels = ins_labels[keep] + intensity = intensity[keep] + foreground = foreground[keep] + + feats = np.concatenate((xyz, intensity), axis=1) + + # Subsample + if self.split == "train" and self.subsample and len(xyz) > self.num_points: + idx = np.random.choice(np.arange(len(xyz)), self.num_points, replace=False) + xyz = xyz[idx] + sem_labels = sem_labels[idx] + ins_labels = ins_labels[idx] + feats = feats[idx] + intensity = intensity[idx] + foreground = foreground[idx] + + if self.split == "train" and self.aug: + xyz = pcd_augmentations(xyz) + + offset = get_offsets(xyz, ins_labels, sem_labels, self.th_ids) + + #import ipdb; ipdb.set_trace() + + return ( + xyz, + feats, + sem_labels, + ins_labels, + offset, + foreground, + totmask, + mid_x, + token, + ) + + +class BatchCollation: + def __init__(self): + self.keys = [ + "pt_coord", + "feats", + "sem_label", + "ins_label", + "offset", + "foreground", + "totmask", + "mid_x", + "token", + ] + + def __call__(self, data): + return {self.keys[i]: list(x) for i, x in enumerate(zip(*data))} + + +def pcd_augmentations(xyz): + # rotation + rotate_rad = np.deg2rad(np.random.random() * 360) + c, s = np.cos(rotate_rad), np.sin(rotate_rad) + j = np.matrix([[c, s], [-s, c]]) + xyz[:, :2] = np.dot(xyz[:, :2], j) + + # flip + flip_type = np.random.choice(4, 1) + if flip_type == 1: + xyz[:, 0] = -xyz[:, 0] + elif flip_type == 2: + xyz[:, 1] = -xyz[:, 1] + elif flip_type == 3: + xyz[:, 0] = -xyz[:, 0] + xyz[:, 1] = -xyz[:, 1] + + # scale + noise_scale = np.random.uniform(0.95, 1.05) + xyz[:, 0] = noise_scale * xyz[:, 0] + xyz[:, 1] = noise_scale * xyz[:, 1] + + # transform + trans_std = [0.1, 0.1, 0.1] + noise_translate = np.array( + [ + np.random.normal(0, trans_std[0], 1), + np.random.normal(0, trans_std[1], 1), + np.random.normal(0, trans_std[2], 1), + ] + ).T + xyz[:, 0:3] += noise_translate + + return xyz + + +def absoluteFilePaths(directory): + for dirpath, _, filenames in os.walk(directory): + for f in filenames: + yield os.path.abspath(os.path.join(dirpath, f)) + + +def absoluteDirPath(directory): + return os.path.abspath(directory) + + +def parse_calibration(filename): + calib = {} + calib_file = open(filename) + for line in calib_file: + key, content = line.strip().split(":") + values = [float(v) for v in content.strip().split()] + pose = np.zeros((4, 4)) + pose[0, 0:4] = values[0:4] + pose[1, 0:4] = values[4:8] + pose[2, 0:4] = values[8:12] + pose[3, 3] = 1.0 + calib[key] = pose + calib_file.close() + return calib + + +def parse_poses(filename, calibration): + file = open(filename) + poses = [] + Tr = calibration["Tr"] + Tr_inv = np.linalg.inv(Tr) + for line in file: + values = [float(v) for v in line.strip().split()] + pose = np.zeros((4, 4)) + pose[0, 0:4] = values[0:4] + pose[1, 0:4] = values[4:8] + pose[2, 0:4] = values[8:12] + pose[3, 3] = 1.0 + poses.append(np.matmul(Tr_inv, np.matmul(pose, Tr))) + return poses + + +def load_poses(pose_files, calib_files): + poses = [] + # go through every file and get all poses + # add them to match im_idx + for i in range(len(pose_files)): + calib = parse_calibration(calib_files[i]) + seq_poses_f64 = parse_poses(pose_files[i], calib) + seq_poses = [pose.astype(np.float32) for pose in seq_poses_f64] + poses += seq_poses + return poses + + +def load_tokens(token_files): + if len(token_files) == 0: + return [] + token_files.sort() + tokens = [] + # go through every file and get all tokens + for f in token_files: + token_file = open(f) + for line in token_file: + token = line.strip() + tokens.append(token) + token_file.close() + return tokens + + +def getDir(obj): + return os.path.dirname(os.path.abspath(obj)) + + +def calc_xyz_middle(xyz): + return np.array( + [ + (np.max(xyz[:, 0]) + np.min(xyz[:, 0])) / 2.0, + (np.max(xyz[:, 1]) + np.min(xyz[:, 1])) / 2.0, + (np.max(xyz[:, 2]) + np.min(xyz[:, 2])) / 2.0, + ], + dtype=np.float32, + ) + + +def get_offsets(xyz, ins_labels, sem_labels, th_ids): + offsets = np.zeros([xyz.shape[0], 3], dtype=np.float32) + things_ids, th_idx = np.unique(ins_labels, return_index=True) + keep_th = np.array( + [i for i, idx in enumerate(th_idx) if sem_labels[idx] in th_ids], dtype=int + ) + # remove instances with wrong sem class + things_ids = things_ids[keep_th] + th_idx = th_idx[keep_th] + for ti in things_ids: + idx = ins_labels == ti + xyz_i = xyz[idx] + if xyz_i.shape[0] <= 0: + continue + center = calc_xyz_middle(xyz_i) + offsets[idx] = center - xyz_i + return offsets + + +def get_things(dataset): + if dataset == "KITTI": + things = { + 1: "car", + 2: "bicycle", + 3: "motorcycle", + 4: "truck", + 5: "other-vehicle", + 6: "person", + 7: "bicyclist", + 8: "motorcyclist", + } + elif dataset == "NUSCENES": + things = { + 2: "bycicle", + 3: "bus", + 4: "car", + 5: "construction-vehicle", + 6: "motorcycle", + 7: "pedestrian", + 9: "trailer", + 10: "truck", + } + elif dataset == "STRAWBERRIES": + things = { + 1: "strawberry", + } + return things + + +def get_stuff(dataset): + if dataset == "KITTI": + stuff = { + 9: "road", + 10: "parking", + 11: "sidewalk", + 12: "other-ground", + 13: "building", + 14: "fence", + 15: "vegetation", + 16: "trunk", + 17: "terrain", + 18: "pole", + 19: "traffic-sign", + } + elif dataset == "NUSCENES": + stuff = { + 1: "barrier", + 8: "traffic_cone", + 11: "driveable_surface", + 12: "other_flat", + 13: "sidewalk", + 14: "terrain", + 15: "manmade", + 16: "vegetation", + } + elif dataset == "STRAWBERRIES": + stuff = {} + return stuff + + +def get_things_ids(dataset): + if dataset == "KITTI": + return [1, 2, 3, 4, 5, 6, 7, 8] + elif dataset == "NUSCENES": + return [2, 3, 4, 5, 6, 7, 9, 10] + elif dataset == "STRAWBERRIES": + return [1] diff --git a/instance_segmentation/mink_pan/datasets/semantic-kitti.yaml b/instance_segmentation/mink_pan/datasets/semantic-kitti.yaml new file mode 100644 index 0000000..c959491 --- /dev/null +++ b/instance_segmentation/mink_pan/datasets/semantic-kitti.yaml @@ -0,0 +1,233 @@ +# This file is covered by the LICENSE file in the root of this project. +labels: + 0 : "unlabeled" + 1 : "outlier" + 10: "car" + 11: "bicycle" + 13: "bus" + 15: "motorcycle" + 16: "on-rails" + 18: "truck" + 20: "other-vehicle" + 30: "person" + 31: "bicyclist" + 32: "motorcyclist" + 40: "road" + 44: "parking" + 48: "sidewalk" + 49: "other-ground" + 50: "building" + 51: "fence" + 52: "other-structure" + 60: "lane-marking" + 70: "vegetation" + 71: "trunk" + 72: "terrain" + 80: "pole" + 81: "traffic-sign" + 99: "other-object" + 252: "moving-car" + 253: "moving-bicyclist" + 254: "moving-person" + 255: "moving-motorcyclist" + 256: "moving-on-rails" + 257: "moving-bus" + 258: "moving-truck" + 259: "moving-other-vehicle" +color_map: # bgr + 0 : [0, 0, 0] + 1 : [0, 0, 255] + 10: [245, 150, 100] + 11: [245, 230, 100] + 13: [250, 80, 100] + 15: [150, 60, 30] + 16: [255, 0, 0] + 18: [180, 30, 80] + 20: [255, 0, 0] + 30: [30, 30, 255] + 31: [200, 40, 255] + 32: [90, 30, 150] + 40: [255, 0, 255] + 44: [255, 150, 255] + 48: [75, 0, 75] + 49: [75, 0, 175] + 50: [0, 200, 255] + 51: [50, 120, 255] + 52: [0, 150, 255] + 60: [170, 255, 150] + 70: [0, 175, 0] + 71: [0, 60, 135] + 72: [80, 240, 150] + 80: [150, 240, 255] + 81: [0, 0, 255] + 99: [255, 255, 50] + 252: [245, 150, 100] + 256: [255, 0, 0] + 253: [200, 40, 255] + 254: [30, 30, 255] + 255: [90, 30, 150] + 257: [250, 80, 100] + 258: [180, 30, 80] + 259: [255, 0, 0] +content: # as a ratio with the total number of points + 0: 0.018889854628292943 + 1: 0.0002937197336781505 + 10: 0.040818519255974316 + 11: 0.00016609538710764618 + 13: 2.7879693665067774e-05 + 15: 0.00039838616015114444 + 16: 0.0 + 18: 0.0020633612104619787 + 20: 0.0016218197275284021 + 30: 0.00017698551338515307 + 31: 1.1065903904919655e-08 + 32: 5.532951952459828e-09 + 40: 0.1987493871255525 + 44: 0.014717169549888214 + 48: 0.14392298360372 + 49: 0.0039048553037472045 + 50: 0.1326861944777486 + 51: 0.0723592229456223 + 52: 0.002395131480328884 + 60: 4.7084144280367186e-05 + 70: 0.26681502148037506 + 71: 0.006035012012626033 + 72: 0.07814222006271769 + 80: 0.002855498193863172 + 81: 0.0006155958086189918 + 99: 0.009923127583046915 + 252: 0.001789309418528068 + 253: 0.00012709999297008662 + 254: 0.00016059776092534436 + 255: 3.745553104802113e-05 + 256: 0.0 + 257: 0.00011351574470342043 + 258: 0.00010157861367183268 + 259: 4.3840131989471124e-05 +# classes that are indistinguishable from single scan or inconsistent in +# ground truth are mapped to their closest equivalent +learning_map: + 0 : 0 # "unlabeled" + 1 : 0 # "outlier" mapped to "unlabeled" --------------------------mapped + 10: 1 # "car" + 11: 2 # "bicycle" + 13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped + 15: 3 # "motorcycle" + 16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped + 18: 4 # "truck" + 20: 5 # "other-vehicle" + 30: 6 # "person" + 31: 7 # "bicyclist" + 32: 8 # "motorcyclist" + 40: 9 # "road" + 44: 10 # "parking" + 48: 11 # "sidewalk" + 49: 12 # "other-ground" + 50: 13 # "building" + 51: 14 # "fence" + 52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped + 60: 9 # "lane-marking" to "road" ---------------------------------mapped + 70: 15 # "vegetation" + 71: 16 # "trunk" + 72: 17 # "terrain" + 80: 18 # "pole" + 81: 19 # "traffic-sign" + 99: 0 # "other-object" to "unlabeled" ----------------------------mapped + 252: 1 # "moving-car" to "car" ------------------------------------mapped + 253: 7 # "moving-bicyclist" to "bicyclist" ------------------------mapped + 254: 6 # "moving-person" to "person" ------------------------------mapped + 255: 8 # "moving-motorcyclist" to "motorcyclist" ------------------mapped + 256: 5 # "moving-on-rails" mapped to "other-vehicle" --------------mapped + 257: 5 # "moving-bus" mapped to "other-vehicle" -------------------mapped + 258: 4 # "moving-truck" to "truck" --------------------------------mapped + 259: 5 # "moving-other"-vehicle to "other-vehicle" ----------------mapped +learning_map_inv: # inverse of previous map + 0: 0 # "unlabeled", and others ignored + 1: 10 # "car" + 2: 11 # "bicycle" + 3: 15 # "motorcycle" + 4: 18 # "truck" + 5: 20 # "other-vehicle" + 6: 30 # "person" + 7: 31 # "bicyclist" + 8: 32 # "motorcyclist" + 9: 40 # "road" + 10: 44 # "parking" + 11: 48 # "sidewalk" + 12: 49 # "other-ground" + 13: 50 # "building" + 14: 51 # "fence" + 15: 70 # "vegetation" + 16: 71 # "trunk" + 17: 72 # "terrain" + 18: 80 # "pole" + 19: 81 # "traffic-sign" +learning_ignore: # Ignore classes + 0: True # "unlabeled", and others ignored + 1: False # "car" + 2: False # "bicycle" + 3: False # "motorcycle" + 4: False # "truck" + 5: False # "other-vehicle" + 6: False # "person" + 7: False # "bicyclist" + 8: False # "motorcyclist" + 9: False # "road" + 10: False # "parking" + 11: False # "sidewalk" + 12: False # "other-ground" + 13: False # "building" + 14: False # "fence" + 15: False # "vegetation" + 16: False # "trunk" + 17: False # "terrain" + 18: False # "pole" + 19: False # "traffic-sign" +color_map_learning : { + 0: [0, 0, 0], + 1: [245, 150, 100], + 2: [245, 230, 100], + 3: [150, 60, 30], + 4: [180, 30, 80], + 5: [255, 0, 0], + 6: [30, 30, 255], + 7: [200, 40, 255], + 8: [90, 30, 150], + 9: [255, 0, 255], + 10: [255, 150, 255], + 11: [75, 0, 75], + 12: [75, 0, 175], + 13: [0, 200, 255], + 14: [50, 120, 255], + 15: [0, 175, 0], + 16: [0, 60, 135], + 17: [80, 240, 150], + 18: [150, 240, 255], + 19: [0, 0, 255], +} +split: # sequence numbers + train: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 9 + - 10 + valid: + - 8 + test: + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 diff --git a/instance_segmentation/mink_pan/datasets/semantic-nuscenes.yaml b/instance_segmentation/mink_pan/datasets/semantic-nuscenes.yaml new file mode 100644 index 0000000..53c7b79 --- /dev/null +++ b/instance_segmentation/mink_pan/datasets/semantic-nuscenes.yaml @@ -0,0 +1,178 @@ +labels: + 0: 'noise' + 1: 'animal' + 2: 'human.pedestrian.adult' + 3: 'human.pedestrian.child' + 4: 'human.pedestrian.construction_worker' + 5: 'human.pedestrian.personal_mobility' + 6: 'human.pedestrian.police_officer' + 7: 'human.pedestrian.stroller' + 8: 'human.pedestrian.wheelchair' + 9: 'movable_object.barrier' + 10: 'movable_object.debris' + 11: 'movable_object.pushable_pullable' + 12: 'movable_object.trafficcone' + 13: 'static_object.bicycle_rack' + 14: 'vehicle.bicycle' + 15: 'vehicle.bus.bendy' + 16: 'vehicle.bus.rigid' + 17: 'vehicle.car' + 18: 'vehicle.construction' + 19: 'vehicle.emergency.ambulance' + 20: 'vehicle.emergency.police' + 21: 'vehicle.motorcycle' + 22: 'vehicle.trailer' + 23: 'vehicle.truck' + 24: 'flat.driveable_surface' + 25: 'flat.other' + 26: 'flat.sidewalk' + 27: 'flat.terrain' + 28: 'static.manmade' + 29: 'static.other' + 30: 'static.vegetation' + 31: 'vehicle.ego' +labels_16: + 0: 'noise' + 1: 'barrier' + 2: 'bicycle' + 3: 'bus' + 4: 'car' + 5: 'construction_vehicle' + 6: 'motorcycle' + 7: 'pedestrian' + 8: 'traffic_cone' + 9: 'trailer' + 10: 'truck' + 11: 'driveable_surface' + 12: 'other_flat' + 13: 'sidewalk' + 14: 'terrain' + 15: 'manmade' + 16: 'vegetation' +color_map: # bgr + 0 : [0, 0, 0] + 1 : [0, 0, 255] + 2: [245, 150, 100] + 3: [245, 230, 100] + 4: [250, 80, 100] + 5: [150, 60, 30] + 6: [255, 0, 0] + 7: [180, 30, 80] + 8: [255, 0, 0] + 9: [30, 30, 255] + 10: [200, 40, 255] + 11: [90, 30, 150] + 12: [255, 0, 255] + 13: [255, 150, 255] + 14: [75, 0, 75] + 15: [75, 0, 175] + 16: [0, 200, 255] + 17: [50, 120, 255] + 18: [0, 150, 255] + 19: [170, 255, 150] + 20: [0, 175, 0] + 21: [0, 60, 135] + 22: [80, 240, 150] + 23: [150, 240, 255] + 24: [0, 0, 255] + 25: [255, 255, 50] + 26: [245, 150, 100] + 27: [255, 0, 0] + 28: [200, 40, 255] + 29: [30, 30, 255] + 30: [90, 30, 150] + 31: [250, 80, 100] +learning_map: + 1: 0 #noise + 5: 0 #noise + 7: 0 #noise + 8: 0 #noise + 10: 0 #noise + 11: 0 #noise + 13: 0 #noise + 19: 0 #noise + 20: 0 #noise + 0: 0 #noise + 29: 0 #noise + 31: 0 #noise + 9: 1 #barrier + 14: 2 #bicycle + 15: 3 #bus + 16: 3 #bus + 17: 4 #car + 18: 5 #construction vehicle + 21: 6 #motorcycle + 2: 7 #pedestrian + 3: 7 #pedestrian + 4: 7 #pedestrian + 6: 7 #pedestrian + 12: 8 #trafic cone + 22: 9 #trailer + 23: 10 #truck + 24: 11 #driveble surface + 25: 12 #other + 26: 13 #sidewalk + 27: 14 #terrain + 28: 15 #manmade + 30: 16 #vegetation +learning_map_inv: + 0: 0 + 1: 9 + 2: 14 + 3: 16 + 4: 17 + 5: 18 + 6: 21 + 7: 2 + 8: 12 + 9: 22 + 10: 23 + 11: 24 + 12: 25 + 13: 26 + 14: 27 + 15: 28 + 16: 30 +learning_ignore: + 0: True + 1: False + 2: False + 3: False + 4: False + 5: False + 6: False + 7: False + 8: False + 9: False + 10: False + 11: False + 12: False + 13: False + 14: False + 15: False + 16: False +color_map_learning : { + 0: [0, 0, 0], + 1: [245, 150, 100], + 2: [245, 230, 100], + 3: [150, 60, 30], + 4: [180, 30, 80], + 5: [255, 0, 0], + 6: [30, 30, 255], + 7: [200, 40, 255], + 8: [90, 30, 150], + 9: [255, 0, 255], + 10: [255, 150, 255], + 11: [75, 0, 75], + 12: [75, 0, 175], + 13: [0, 200, 255], + 14: [50, 120, 255], + 15: [0, 175, 0], + 16: [0, 60, 135], +} +split: # sequence numbers + mini_train: [61, 553, 655, 757, 796, 1077, 1094, 1100] + mini_valid: [103, 916] + train: [1,2,4,5,6,7,8,9,10,11,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,138,139,149,150,151,152,154,155,157,158,159,160,161,162,163,164,165,166,167,168,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,187,188,190,191,192,193,194,195,196,199,200,202,203,204,206,207,208,209,210,211,212,213,214,218,219,220,222,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,315,316,317,318,321,323,324,328,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,405,406,407,408,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,461,462,463,464,465,467,468,469,471,472,474,475,476,477,478,479,480,499,500,501,502,504,505,506,507,508,509,510,511,512,513,514,515,517,518,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,541,542,543,544,545,546,566,568,570,571,572,573,574,575,576,577,578,580,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,681,683,684,685,686,687,688,689,695,696,697,698,700,701,703,704,705,706,707,708,709,710,711,72,713,714,715,716,717,718,719,726,727,728,730,731,733,734,735,736,737,738,739,740,741,744,746,747,749,750,751,752,757,758,759,760,761,762,763,764,765,767,768,769,786,787,789,790,791,792,803,804,805,806,808,809,810,811,812,813,815,816,817,819,820,821,822,847,848,849,850,851,852,853,854,855,856,858,860,861,862,863,864,865,866,868,869,870,871,872,873,875,876,877,878,880,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,945,947,949,952,953,955,956,957,958,959,960,961,975,976,977,978,979,980,981,982,983,984,988,989,990,991,992,994,995,996,997,998,999,1000,1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1104,1105,1106,1107,1108,1109,1110] + valid: [3,12,13,14,15,16,17,18,35,36,38,39,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,221,268,269,270,271,272,273,274,275,276,277,278,329,330,331,332,344,345,346,519,520,521,522,523,524,552,553,554,555,556,557,558,559,560,561,562,563,564,565,625,626,627,629,630,632,633,634,635,636,637,638,770,771,775,777,778,780,781,782,783,784,794,795,796,797,798,799,800,802,904,905,906,907,908,909,910,911,912,913,914,915,916,917,919,920,921,922,923,924,925,926,927,928,929,930,931,962,963,966,967,968,969,971,972,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073] + test: [77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,111,112,113,114,115,116,117,118,119,140,142,143,144,145,146,147,148,265,266,279,280,281,282,307,308,309,310,311,312,313,314,333,334,335,336,337,338,339,340,341,342,343,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,547,548,549,550,551,601,602,603,604,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,827,828,829,830,831,833,834,835,836,837,838,839,840,841,842,844,845,846,932,933,935,936,937,938,939,940,941,942,943,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043] diff --git a/instance_segmentation/mink_pan/datasets/semantic_dataset.py b/instance_segmentation/mink_pan/datasets/semantic_dataset.py new file mode 100644 index 0000000..7a8b462 --- /dev/null +++ b/instance_segmentation/mink_pan/datasets/semantic_dataset.py @@ -0,0 +1,501 @@ +import os + +import numpy as np +import yaml +from pytorch_lightning import LightningDataModule +from torch.utils.data import DataLoader, Dataset + + +class SemanticDatasetModule(LightningDataModule): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + self.things_ids = [] + self.color_map = [] + self.label_names = [] + self.dataset = cfg.MODEL.DATASET + self.mini = cfg[cfg.MODEL.DATASET].MINI + self.min_volume_space = cfg[cfg.MODEL.DATASET].MIN_VOLUME_SPACE + self.max_volume_space = cfg[cfg.MODEL.DATASET].MAX_VOLUME_SPACE + + def prepare_data(self): + pass + + def setup(self, stage=None): + if "ONLY_SEQ" in self.cfg.TRAIN.keys(): + only_seq = self.cfg.TRAIN.ONLY_SEQ + else: + only_seq = None + val_split = "valid" + + if self.mini: + val_split = "mini_" + val_split + train_split = "mini_train" + else: + train_split = "train" + + train_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH + "/sequences/", + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split=train_split, + seq=only_seq, + dataset=self.dataset, + ) + self.train_pan_set = PanopticDataset( + dataset=train_set, + split="train", + space=self.cfg[self.cfg.MODEL.DATASET].SPACE, + num_pts=self.cfg[self.cfg.MODEL.DATASET].SUB_NUM_POINTS, + subsample=self.cfg.TRAIN.SUBSAMPLE, + aug=self.cfg.TRAIN.AUG, + ) + + val_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH + "/sequences/", + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split=val_split, + seq=only_seq, + dataset=self.dataset, + ) + self.val_pan_set = PanopticDataset( + dataset=val_set, split="valid", space=self.cfg[self.cfg.MODEL.DATASET].SPACE + ) + + test_set = SemanticDataset( + self.cfg[self.cfg.MODEL.DATASET].PATH + "/sequences/", + self.cfg[self.cfg.MODEL.DATASET].CONFIG, + split="test", + seq=only_seq, + dataset=self.dataset, + ) + self.test_pan_set = PanopticDataset( + dataset=test_set, split="test", space=self.cfg[self.cfg.MODEL.DATASET].SPACE + ) + + self.things_ids = train_set.things_ids + self.color_map = train_set.color_map + self.label_names = train_set.label_names + + def train_dataloader(self): + dataset = self.train_pan_set + collate_fn = BatchCollation() + self.train_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=True, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.train_iter = iter(self.train_loader) + return self.train_loader + + def val_dataloader(self): + dataset = self.val_pan_set + collate_fn = BatchCollation() + self.valid_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=False, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.valid_iter = iter(self.valid_loader) + return self.valid_loader + + def test_dataloader(self): + dataset = self.test_pan_set + collate_fn = BatchCollation() + self.test_loader = DataLoader( + dataset=dataset, + batch_size=self.cfg.TRAIN.BATCH_SIZE, + collate_fn=collate_fn, + shuffle=False, + num_workers=self.cfg.TRAIN.NUM_WORKERS, + pin_memory=True, + drop_last=False, + timeout=0, + ) + self.test_iter = iter(self.test_loader) + return self.test_loader + + +class SemanticDataset(Dataset): + def __init__(self, data_path, cfg_path, split="train", seq=None, dataset="KITTI"): + yaml_path = cfg_path + with open(yaml_path, "r") as stream: + semyaml = yaml.safe_load(stream) + + self.things = get_things(dataset) + self.stuff = get_stuff(dataset) + + self.label_names = {**self.things, **self.stuff} + self.things_ids = get_things_ids(dataset) + + self.color_map = semyaml["color_map_learning"] + self.labels = semyaml["labels"] + self.learning_map = semyaml["learning_map"] + self.inv_learning_map = semyaml["learning_map_inv"] + self.split = split + split = semyaml["split"][self.split] + + if seq: + split = [seq] + + self.im_idx = [] + pose_files = [] + calib_files = [] + token_files = [] + fill = 2 if dataset == "KITTI" else 4 + for i_folder in split: + self.im_idx += absoluteFilePaths( + "/".join([data_path, str(i_folder).zfill(fill), "velodyne"]) + ) + pose_files.append( + absoluteDirPath( + "/".join([data_path, str(i_folder).zfill(fill), "poses.txt"]) + ) + ) + calib_files.append( + absoluteDirPath( + "/".join([data_path, str(i_folder).zfill(fill), "calib.txt"]) + ) + ) + if dataset == "NUSCENES": + token_files.append( + absoluteDirPath( + "/".join( + [data_path, str(i_folder).zfill(fill), "lidar_tokens.txt"] + ) + ) + ) + + self.im_idx.sort() + self.poses = load_poses(pose_files, calib_files) + self.tokens = load_tokens(token_files) + + def __len__(self): + return len(self.im_idx) + + def __getitem__(self, index): + fname = self.im_idx[index] + pose = self.poses[index] + points = np.fromfile(self.im_idx[index], dtype=np.float32).reshape((-1, 4)) + xyz = points[:, :3] + intensity = points[:, 3] + if len(intensity.shape) == 2: + intensity = np.squeeze(intensity) + token = "0" + if len(self.tokens) > 0: + token = self.tokens[index] + if self.split == "test": + annotated_data = np.expand_dims( + np.zeros_like(points[:, 0], dtype=int), axis=1 + ) + sem_labels = annotated_data + ins_labels = annotated_data + else: + annotated_data = np.fromfile( + self.im_idx[index].replace("velodyne", "labels")[:-3] + "label", + dtype=np.int32, + ).reshape((-1, 1)) + sem_labels = annotated_data & 0xFFFF # delete high 16 digits binary + ins_labels = annotated_data >> 16 + sem_labels = np.vectorize(self.learning_map.__getitem__)(sem_labels) + + return (xyz, sem_labels, ins_labels, intensity, fname, pose, token) + + +class PanopticDataset(Dataset): + def __init__(self, dataset, split, space, num_pts=0, subsample=False, aug=False): + self.dataset = dataset + self.num_points = num_pts + self.split = split + self.aug = aug + self.subsample = subsample + self.th_ids = dataset.things_ids + self.xlim = space[0] + self.ylim = space[1] + self.zlim = space[2] + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + data = self.dataset[index] + xyz, sem_labels, ins_labels, intensity, fname, pose, token = data + foreground = np.isin(sem_labels, self.th_ids).reshape(-1) + keep = np.argwhere( + (self.xlim[0] < xyz[:, 0]) + & (xyz[:, 0] < self.xlim[1]) + & (self.ylim[0] < xyz[:, 1]) + & (xyz[:, 1] < self.ylim[1]) + & (self.zlim[0] < xyz[:, 2]) + & (xyz[:, 2] < self.zlim[1]) + )[:, 0] + xyz = xyz[keep] + sem_labels = sem_labels[keep] + ins_labels = ins_labels[keep] + intensity = intensity[keep] + foreground = foreground[keep] + + feats = np.concatenate((xyz, np.expand_dims(intensity, axis=1)), axis=1) + + # Subsample + if self.split == "train" and self.subsample and len(xyz) > self.num_points: + idx = np.random.choice(np.arange(len(xyz)), self.num_points, replace=False) + xyz = xyz[idx] + sem_labels = sem_labels[idx] + ins_labels = ins_labels[idx] + feats = feats[idx] + intensity = intensity[idx] + foreground = foreground[idx] + + if self.split == "train" and self.aug: + xyz = pcd_augmentations(xyz) + + offset = get_offsets(xyz, ins_labels, sem_labels, self.th_ids) + + return ( + xyz, + feats, + sem_labels, + ins_labels, + offset, + foreground, + fname, + pose, + token, + ) + + +class BatchCollation: + def __init__(self): + self.keys = [ + "pt_coord", + "feats", + "sem_label", + "ins_label", + "offset", + "foreground", + "fname", + "pose", + "token", + ] + + def __call__(self, data): + return {self.keys[i]: list(x) for i, x in enumerate(zip(*data))} + + +def pcd_augmentations(xyz): + # rotation + rotate_rad = np.deg2rad(np.random.random() * 360) + c, s = np.cos(rotate_rad), np.sin(rotate_rad) + j = np.matrix([[c, s], [-s, c]]) + xyz[:, :2] = np.dot(xyz[:, :2], j) + + # flip + flip_type = np.random.choice(4, 1) + if flip_type == 1: + xyz[:, 0] = -xyz[:, 0] + elif flip_type == 2: + xyz[:, 1] = -xyz[:, 1] + elif flip_type == 3: + xyz[:, 0] = -xyz[:, 0] + xyz[:, 1] = -xyz[:, 1] + + # scale + noise_scale = np.random.uniform(0.95, 1.05) + xyz[:, 0] = noise_scale * xyz[:, 0] + xyz[:, 1] = noise_scale * xyz[:, 1] + + # transform + trans_std = [0.1, 0.1, 0.1] + noise_translate = np.array( + [ + np.random.normal(0, trans_std[0], 1), + np.random.normal(0, trans_std[1], 1), + np.random.normal(0, trans_std[2], 1), + ] + ).T + xyz[:, 0:3] += noise_translate + + return xyz + + +def absoluteFilePaths(directory): + for dirpath, _, filenames in os.walk(directory): + for f in filenames: + yield os.path.abspath(os.path.join(dirpath, f)) + + +def absoluteDirPath(directory): + return os.path.abspath(directory) + + +def parse_calibration(filename): + calib = {} + calib_file = open(filename) + for line in calib_file: + key, content = line.strip().split(":") + values = [float(v) for v in content.strip().split()] + pose = np.zeros((4, 4)) + pose[0, 0:4] = values[0:4] + pose[1, 0:4] = values[4:8] + pose[2, 0:4] = values[8:12] + pose[3, 3] = 1.0 + calib[key] = pose + calib_file.close() + return calib + + +def parse_poses(filename, calibration): + file = open(filename) + poses = [] + Tr = calibration["Tr"] + Tr_inv = np.linalg.inv(Tr) + for line in file: + values = [float(v) for v in line.strip().split()] + pose = np.zeros((4, 4)) + pose[0, 0:4] = values[0:4] + pose[1, 0:4] = values[4:8] + pose[2, 0:4] = values[8:12] + pose[3, 3] = 1.0 + poses.append(np.matmul(Tr_inv, np.matmul(pose, Tr))) + return poses + + +def load_poses(pose_files, calib_files): + poses = [] + # go through every file and get all poses + # add them to match im_idx + for i in range(len(pose_files)): + calib = parse_calibration(calib_files[i]) + seq_poses_f64 = parse_poses(pose_files[i], calib) + seq_poses = [pose.astype(np.float32) for pose in seq_poses_f64] + poses += seq_poses + return poses + + +def load_tokens(token_files): + if len(token_files) == 0: + return [] + token_files.sort() + tokens = [] + # go through every file and get all tokens + for f in token_files: + token_file = open(f) + for line in token_file: + token = line.strip() + tokens.append(token) + token_file.close() + return tokens + + +def getDir(obj): + return os.path.dirname(os.path.abspath(obj)) + + +def calc_xyz_middle(xyz): + return np.array( + [ + (np.max(xyz[:, 0]) + np.min(xyz[:, 0])) / 2.0, + (np.max(xyz[:, 1]) + np.min(xyz[:, 1])) / 2.0, + (np.max(xyz[:, 2]) + np.min(xyz[:, 2])) / 2.0, + ], + dtype=np.float32, + ) + + +def get_offsets(xyz, ins_labels, sem_labels, th_ids): + offsets = np.zeros([xyz.shape[0], 3], dtype=np.float32) + things_ids, th_idx = np.unique(ins_labels[:, 0], return_index=True) + keep_th = np.array( + [i for i, idx in enumerate(th_idx) if sem_labels[idx] in th_ids], dtype=int + ) + # remove instances with wrong sem class + things_ids = things_ids[keep_th] + th_idx = th_idx[keep_th] + for ti in things_ids: + idx = ins_labels[:, 0] == ti + xyz_i = xyz[idx] + if xyz_i.shape[0] <= 0: + continue + center = calc_xyz_middle(xyz_i) + offsets[idx] = center - xyz_i + return offsets + + +def get_things(dataset): + if dataset == "KITTI": + things = { + 1: "car", + 2: "bicycle", + 3: "motorcycle", + 4: "truck", + 5: "other-vehicle", + 6: "person", + 7: "bicyclist", + 8: "motorcyclist", + } + elif dataset == "NUSCENES": + things = { + 2: "bycicle", + 3: "bus", + 4: "car", + 5: "construction-vehicle", + 6: "motorcycle", + 7: "pedestrian", + 9: "trailer", + 10: "truck", + } + elif dataset == "STRAWBERRIES": + things = { + 1: "strawberry", + } + return things + + +def get_stuff(dataset): + if dataset == "KITTI": + stuff = { + 9: "road", + 10: "parking", + 11: "sidewalk", + 12: "other-ground", + 13: "building", + 14: "fence", + 15: "vegetation", + 16: "trunk", + 17: "terrain", + 18: "pole", + 19: "traffic-sign", + } + elif dataset == "NUSCENES": + stuff = { + 1: "barrier", + 8: "traffic_cone", + 11: "driveable_surface", + 12: "other_flat", + 13: "sidewalk", + 14: "terrain", + 15: "manmade", + 16: "vegetation", + } + elif dataset == "STRAWBERRIES": + stuff = {} + return stuff + + +def get_things_ids(dataset): + if dataset == "KITTI": + return [1, 2, 3, 4, 5, 6, 7, 8] + elif dataset == "NUSCENES": + return [2, 3, 4, 5, 6, 7, 9, 10] + elif dataset == "STRAWBERRIES": + return [1] \ No newline at end of file diff --git a/instance_segmentation/mink_pan/datasets/strawberries.yaml b/instance_segmentation/mink_pan/datasets/strawberries.yaml new file mode 100644 index 0000000..94eeaca --- /dev/null +++ b/instance_segmentation/mink_pan/datasets/strawberries.yaml @@ -0,0 +1,53 @@ +# This file is covered by the LICENSE file in the root of this project. +labels: + 0 : "unlabeled" + 1 : "strawberry" +color_map: # bgr + 0 : [0, 0, 0] + 1 : [255, 0, 0] +content: # as a ratio with the total number of points + 0: 0.9 + 1: 0.1 +# classes that are indistinguishable from single scan or inconsistent in +# ground truth are mapped to their closest equivalent +learning_map: + 0 : 0 # "unlabeled" + 1 : 1 # "strawberry" +learning_map_inv: # inverse of previous map + 0: 0 # "unlabeled", and others ignored + 1: 1 # "strawberry" +learning_ignore: # Ignore classes + 0: False # "unlabeled", and others ignored + 1: False # "car" + +color_map_learning : { + 0: [0, 0, 0], + 1: [255, 0, 0], + +} +split: # sequence numbers + train: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 9 + - 10 + valid: + - 8 + test: + - 11 + - 12 + - 13 + - 14 + - 15 + - 16 + - 17 + - 18 + - 19 + - 20 + - 21 diff --git a/instance_segmentation/mink_pan/models/__init__.py b/instance_segmentation/mink_pan/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/instance_segmentation/mink_pan/models/backbone.py b/instance_segmentation/mink_pan/models/backbone.py new file mode 100644 index 0000000..cb6a0ee --- /dev/null +++ b/instance_segmentation/mink_pan/models/backbone.py @@ -0,0 +1,217 @@ +import MinkowskiEngine as ME +import numpy as np +import torch +import torch.nn as nn + + +class MinkEncoderDecoder(nn.Module): + """ + Basic ResNet architecture using sparse convolutions + """ + + def __init__(self, cfg): + super().__init__() + + cr = cfg.CR + self.D = cfg.DIMENSION + input_dim = cfg.INPUT_DIM + self.res = cfg.RESOLUTION + + cs = cfg.CHANNELS + cs = [int(cr * x) for x in cs] + self.stem = nn.Sequential( + ME.MinkowskiConvolution( + input_dim, cs[0], kernel_size=3, stride=1, dimension=self.D + ), + ME.MinkowskiBatchNorm(cs[0]), + ME.MinkowskiReLU(True), + ME.MinkowskiConvolution( + cs[0], cs[0], kernel_size=3, stride=1, dimension=self.D + ), + ME.MinkowskiBatchNorm(cs[0]), + ME.MinkowskiReLU(inplace=True), + ) + + self.stage1 = nn.Sequential( + BasicConvolutionBlock(cs[0], cs[0], ks=2, stride=2, dilation=1, D=self.D), + ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1, D=self.D), + ) + + self.stage2 = nn.Sequential( + BasicConvolutionBlock(cs[1], cs[1], ks=2, stride=2, dilation=1, D=self.D), + ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1, D=self.D), + ) + + self.stage3 = nn.Sequential( + BasicConvolutionBlock(cs[2], cs[2], ks=2, stride=2, dilation=1, D=self.D), + ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1, D=self.D), + ) + + self.stage4 = nn.Sequential( + BasicConvolutionBlock(cs[3], cs[3], ks=2, stride=2, dilation=1, D=self.D), + ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1, D=self.D), + ) + + self.up1 = nn.ModuleList( + [ + BasicDeconvolutionBlock(cs[4], cs[5], ks=2, stride=2, D=self.D), + nn.Sequential( + ResidualBlock( + cs[5] + cs[3], cs[5], ks=3, stride=1, dilation=1, D=self.D + ), + ResidualBlock(cs[5], cs[5], ks=3, stride=1, dilation=1, D=self.D), + ), + ] + ) + + self.up2 = nn.ModuleList( + [ + BasicDeconvolutionBlock(cs[5], cs[6], ks=2, stride=2, D=self.D), + nn.Sequential( + ResidualBlock( + cs[6] + cs[2], cs[6], ks=3, stride=1, dilation=1, D=self.D + ), + ResidualBlock(cs[6], cs[6], ks=3, stride=1, dilation=1, D=self.D), + ), + ] + ) + + self.up3 = nn.ModuleList( + [ + BasicDeconvolutionBlock(cs[6], cs[7], ks=2, stride=2, D=self.D), + nn.Sequential( + ResidualBlock( + cs[7] + cs[1], cs[7], ks=3, stride=1, dilation=1, D=self.D + ), + ResidualBlock(cs[7], cs[7], ks=3, stride=1, dilation=1, D=self.D), + ), + ] + ) + + self.up4 = nn.ModuleList( + [ + BasicDeconvolutionBlock(cs[7], cs[8], ks=2, stride=2, D=self.D), + nn.Sequential( + ResidualBlock( + cs[8] + cs[0], cs[8], ks=3, stride=1, dilation=1, D=self.D + ), + ResidualBlock(cs[8], cs[8], ks=3, stride=1, dilation=1, D=self.D), + ), + ] + ) + + def forward(self, x): + in_field = self.TensorField(x) + + x0 = self.stem(in_field.sparse()) + x1 = self.stage1(x0) + x2 = self.stage2(x1) + x3 = self.stage3(x2) + x4 = self.stage4(x3) + + y1 = self.up1[0](x4) + y1 = ME.cat(y1, x3) + y1 = self.up1[1](y1) + + y2 = self.up2[0](y1) + y2 = ME.cat(y2, x2) + y2 = self.up2[1](y2) + + y3 = self.up3[0](y2) + y3 = ME.cat(y3, x1) + y3 = self.up3[1](y3) + + y4 = self.up4[0](y3) + y4 = ME.cat(y4, x0) + y4 = self.up4[1](y4) + + return y4, in_field + + def TensorField(self, x): + """ + Build a tensor field from coordinates and features in the + input batch + The coordinates are quantized using the provided resolution + + """ + feat_tfield = ME.TensorField( + features=torch.from_numpy(np.concatenate(x["feats"], 0)).float(), + coordinates=ME.utils.batched_coordinates( + [i / self.res for i in x["pt_coord"]], dtype=torch.float32 + ), + quantization_mode=ME.SparseTensorQuantizationMode.UNWEIGHTED_AVERAGE, + minkowski_algorithm=ME.MinkowskiAlgorithm.SPEED_OPTIMIZED, + device="cuda", + ) + return feat_tfield + + +## Blocks + + +class BasicConvolutionBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1, dilation=1, D=3): + super().__init__() + self.net = nn.Sequential( + ME.MinkowskiConvolution( + inc, outc, kernel_size=ks, dilation=dilation, stride=stride, dimension=D + ), + ME.MinkowskiBatchNorm(outc), + ME.MinkowskiLeakyReLU(inplace=True), + ) + + def forward(self, x): + out = self.net(x) + return out + + +class BasicDeconvolutionBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1, D=3): + super().__init__() + self.net = nn.Sequential( + ME.MinkowskiConvolutionTranspose( + inc, outc, kernel_size=ks, stride=stride, dimension=D + ), + ME.MinkowskiBatchNorm(outc), + ME.MinkowskiLeakyReLU(inplace=True), + ) + + def forward(self, x): + return self.net(x) + + +class ResidualBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1, dilation=1, D=3): + super().__init__() + self.net = nn.Sequential( + ME.MinkowskiConvolution( + inc, outc, kernel_size=ks, dilation=dilation, stride=stride, dimension=D + ), + ME.MinkowskiBatchNorm(outc), + ME.MinkowskiReLU(inplace=True), + ME.MinkowskiConvolution( + outc, outc, kernel_size=ks, dilation=dilation, stride=1, dimension=D + ), + ME.MinkowskiBatchNorm(outc), + ) + + self.downsample = ( + nn.Sequential() + if (inc == outc and stride == 1) + else nn.Sequential( + ME.MinkowskiConvolution( + inc, outc, kernel_size=1, dilation=1, stride=stride, dimension=D + ), + ME.MinkowskiBatchNorm(outc), + ) + ) + + self.relu = ME.MinkowskiReLU(inplace=True) + + def forward(self, x): + out = self.relu(self.net(x) + self.downsample(x)) + return out diff --git a/instance_segmentation/mink_pan/models/loss.py b/instance_segmentation/mink_pan/models/loss.py new file mode 100644 index 0000000..cac042c --- /dev/null +++ b/instance_segmentation/mink_pan/models/loss.py @@ -0,0 +1,152 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/models/detr.py +from itertools import filterfalse + +import open3d as o3d +import torch +import torch.nn.functional as F +from torch import nn +from torch.autograd import Variable + + +class SemLoss(nn.Module): + def __init__(self, w): + super().__init__() + + self.ce_w, self.lov_w = w + + self.cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=0) + + def forward(self, outputs, targets): + ce = self.cross_entropy(outputs, targets) + lovasz = self.lovasz_softmax(F.softmax(outputs, dim=1), targets) + loss = {"sem_ce": self.ce_w * ce, "sem_lov": self.lov_w * lovasz} + return loss + + def lovasz_grad(self, gt_sorted): + """ + Computes gradient of the Lovasz extension w.r.t sorted errors + See Alg. 1 in paper + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1.0 - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + def lovasz_softmax(self, probas, labels, classes="present", ignore=None): + """ + Multi-class Lovasz-Softmax loss + probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). + Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. + labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + per_image: compute the loss per image instead of per batch + ignore: void class labels + """ + loss = self.lovasz_softmax_flat( + *self.flatten_probas(probas, labels, ignore), classes=classes + ) + return loss + + def lovasz_softmax_flat(self, probas, labels, classes="present"): + """ + Multi-class Lovasz-Softmax loss + probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) + labels: [P] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + """ + if probas.numel() == 0: + # only void pixels, the gradients should be 0 + return probas * 0.0 + C = probas.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ["all", "present"] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (Variable(fg) - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append( + torch.dot(errors_sorted, Variable(self.lovasz_grad(fg_sorted))) + ) + return self.mean(losses) + + def flatten_probas(self, probas, labels, ignore=None): + """ + Flattens predictions in the batch + """ + # Probabilities from SparseTensor.features already flattened + N, C = probas.size() + probas = probas.contiguous().view(-1, C) + labels = labels.view(-1) + if ignore is None: + return probas, labels + valid = labels != ignore + vprobas = probas[torch.nonzero(valid).squeeze()] + vlabels = labels[valid] + return vprobas, vlabels + + def isnan(self, x): + return x != x + + def mean(self, l, ignore_nan=False, empty=0): + """ + nanmean compatible with generators. + """ + l = iter(l) + if ignore_nan: + l = filterfalse(self.isnan, l) + try: + n = 1 + acc = next(l) + except StopIteration: + if empty == "raise": + raise ValueError("Empty mean") + return empty + for n, v in enumerate(l, 2): + acc += v + if n == 1: + return acc + return acc / n + + +class InsLoss(nn.Module): + def __init__(self): + super().__init__() + pass + + def single_offset_regress_vec(self, pt_offsets, gt_offsets, valid): + pt_diff = pt_offsets - gt_offsets # (N, 3) + pt_dist = torch.sum(torch.abs(pt_diff), dim=-1) # (N) + valid = valid.view(-1).float() + offset_norm_loss = torch.sum(pt_dist * valid) / (torch.sum(valid) + 1e-6) + return (offset_norm_loss,) + + def forward(self, offsets, gt_offsets, valid): + loss_list_list = [] + for i in range(len(offsets)): + loss_list = self.single_offset_regress_vec( + offsets[i], gt_offsets[i], valid[i] + ) + loss_len = len(loss_list) + if len(loss_list_list) < loss_len: + loss_list_list = [[] for j in range(loss_len)] + for j in range(loss_len): + loss_list_list[j].append(loss_list[j]) + mean_loss_list = [] + for i in range(len(loss_list_list)): + mean_loss_list.append(torch.mean(torch.stack(loss_list_list[i]))) + return sum(mean_loss_list) diff --git a/instance_segmentation/mink_pan/models/model.py b/instance_segmentation/mink_pan/models/model.py new file mode 100644 index 0000000..b54daf4 --- /dev/null +++ b/instance_segmentation/mink_pan/models/model.py @@ -0,0 +1,300 @@ +import mink_pan.utils.plot as pl +import MinkowskiEngine as ME +import numpy as np +import torch +import torch.nn as nn +from mink_pan.models.backbone import MinkEncoderDecoder +from mink_pan.models.loss import InsLoss, SemLoss +from mink_pan.utils.clustering import Clustering +from mink_pan.utils.evaluate_panoptic import PanopticKittiEvaluator +from pytorch_lightning.core.module import LightningModule +from torch.utils.tensorboard import SummaryWriter +import random +import open3d as o3d + + + +class MinkPan(LightningModule): + def __init__(self, hparams): + super().__init__() + self.save_hyperparameters(dict(hparams)) + self.cfg = hparams + + backbone = MinkEncoderDecoder(hparams.BACKBONE) + self.backbone = ME.MinkowskiSyncBatchNorm.convert_sync_batchnorm(backbone) + self.sem_head = SemHead(hparams) + self.ins_head = InsHead(hparams) + + self.sem_loss = SemLoss(hparams.LOSS.SEM.WEIGHTS) + self.ins_loss = InsLoss() + self.cluster = Clustering(hparams.POST) + self.evaluator = PanopticKittiEvaluator(hparams.STRAWBERRIES) + self.freezeModules() + + #print("loading model", end=" ...") + #checkpoint = torch.load(f"model@0000.pt", map_location=torch.device("cuda")) + #self.load_state_dict(checkpoint['encoder_state_dict']) + #print("done!") + self.epoch = 0 + self.steps = 0 + + self.n_scans = 0 + + def freezeModules(self): + freeze_dict = { + "BACKBONE": self.backbone, + "SEM_HEAD": self.sem_head, + "INS_HEAD": self.ins_head, + } + print("Frozen modules: ", self.cfg.TRAIN.FREEZE_MODULES) + for module in self.cfg.TRAIN.FREEZE_MODULES: + for param in freeze_dict[module].parameters(): + param.requires_grad = False + + def forward(self, x): + feats, in_field = self.backbone(x) + sem_logits = self.sem_head(feats, in_field) + offsets, ins_feat = self.ins_head(feats, in_field) + return sem_logits, offsets, ins_feat + + def getLoss(self, x, logits, offsets): + logits = torch.cat(logits) + labs = torch.from_numpy(np.concatenate(x["sem_label"])).to(logits.device) + loss = self.sem_loss(logits, labs.view(-1)) + foreground = [ + torch.from_numpy(f).to(offsets[0].device) for f in x["foreground"] + ] + gt_offsets = [torch.from_numpy(o).to(offsets[0].device) for o in x["offset"]] + ins_loss = self.ins_loss(offsets, gt_offsets, foreground) + loss["ins"] = 10*ins_loss + + return loss + + def training_step(self, x: dict, idx): + sem_logits, offsets, ins_feat = self(x) + loss_dict = self.getLoss(x, sem_logits, offsets) + + for k, v in loss_dict.items(): + self.log(f"train/{k}", v, batch_size=self.cfg.TRAIN.BATCH_SIZE) + + total_loss = sum(loss_dict.values()) + self.log("train_loss", total_loss, batch_size=self.cfg.TRAIN.BATCH_SIZE) + torch.cuda.empty_cache() + self.steps += 1 + return total_loss + + def validation_step(self, x: dict, idx): + + if "EVALUATE" in self.cfg: + self.evaluation_step(x, idx) + return + sem_logits, offsets, ins_feat = self(x) + + loss_dict = self.getLoss(x, sem_logits, offsets) + + +# sem_logits_gt = torch.zeros((sem_logits[0].shape[0], 2)).cuda() +# sem_logits_gt[x["sem_label"][0]==0, 0] = 1.0 +# sem_logits_gt[x["sem_label"][0]==1, 1] = 1.0 + + sem = torch.argmax(sem_logits[0], dim=1).cpu().numpy() + if sem[sem==1].shape[0]>(sem.shape[0]/5): + print("skipping validation") + self.skipped=True + return 100 + self.skipped=False + + #sem_pred, ins_pred = self.inference(x, [sem_logits_gt], offsets) #sem_logits, offsets) + sem_pred, ins_pred = self.inference(x, sem_logits, offsets) #sem_logits, offsets) + + + print("unique groundtruth insts", np.unique(x["ins_label"][0])) + print("unique predicted insts", np.unique(ins_pred[0])) + + inst = ins_pred[0] + #inst = x["ins_label"][0] + u = np.unique(inst) + table = np.random.uniform(0.1, 1.0, (u.max()+1, 3)) + table[0, :] = 0 + idxcol = table[inst] + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(x["pt_coord"][0]) + pcd.colors = o3d.utility.Vector3dVector(idxcol) + o3d.visualization.draw(pcd) + + for k, v in loss_dict.items(): + self.log(f"val/{k}", v, batch_size=self.cfg.TRAIN.BATCH_SIZE) + + total_loss = sum(loss_dict.values()) + self.log("val_loss", total_loss, batch_size=self.cfg.TRAIN.BATCH_SIZE) + self.evaluator.update(sem_pred, ins_pred, x) + torch.cuda.empty_cache() + + coords = x["pt_coord"][0] + + coords[x["foreground"][0]] = coords[x["foreground"][0]] + x["offset"][0][x["foreground"][0]]#offsets[0][x["foreground"][0]].detach().cpu().numpy() + + colors = np.zeros_like(coords) + + colors[x["foreground"][0]] = [1, 0 ,0] + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(coords) + pcd.colors = o3d.utility.Vector3dVector(colors) + o3d.visualization.draw(pcd) + + coords = x["pt_coord"][0] + + coords[x["foreground"][0]] = coords[x["foreground"][0]] + offsets[0][x["foreground"][0]].detach().cpu().numpy() + + colors = np.zeros_like(coords) + + colors[x["foreground"][0]] = [0, 0, 1] + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(coords) + pcd.colors = o3d.utility.Vector3dVector(colors) + o3d.visualization.draw(pcd) + + return total_loss + + def on_validation_epoch_end(self):#, outputs): + if self.skipped: + return + self.log( + "metrics/pq", + self.evaluator.get_mean_pq(), + batch_size=self.cfg.TRAIN.BATCH_SIZE, + ) + self.log( + "metrics/iou", + self.evaluator.get_mean_iou(), + batch_size=self.cfg.TRAIN.BATCH_SIZE, + ) + self.log( + "metrics/rq", + self.evaluator.get_mean_rq(), + batch_size=self.cfg.TRAIN.BATCH_SIZE, + ) + self.evaluator.reset() + + print("saving model") + torch.save({'encoder_state_dict': self.state_dict()}, + f"model@{str(self.epoch).zfill(4)}.pt") + + def evaluation_step(self, x: dict, idx): + sem_logits, offsets, ins_feat = self(x) + sem_pred, ins_pred = self.inference(x, sem_logits, offsets) + + if "VIS_PRED" in self.cfg: + pl.plot_instances(x["pt_coord"], ins_pred) + color_map = self.trainer.datamodule.color_map + pl.plot_semantics(x["pt_coord"], sem_pred, color_map) + else: + self.evaluator.update(sem_pred, ins_pred, x) + + if "SAVE_VAL" in self.cfg: + pl.plot_instances(x["pt_coord"], ins_pred, save=True, n=self.n_scans) + color_map = self.trainer.datamodule.color_map + pl.plot_semantics( + x["pt_coord"], sem_pred, color_map, save=True, n=self.n_scans + ) + self.n_scans += 1 + + def test_step(self, x: dict, idx): + pass + + def setup(self, stage=None): + ids = self.trainer.datamodule.things_ids + self.cluster.set_ids(ids) + + def inference(self, x, sem_logits, offsets): + sem_pred = [] + ins_pred = [] + for i in range(len(sem_logits)): + sem = torch.argmax(sem_logits[i], dim=1).cpu().numpy() + ins = self.cluster(sem, offsets[i], x["pt_coord"][i]) + sem_pred.append(sem) + ins_pred.append(ins) + return sem_pred, ins_pred + + def sem_inference(self, sem_logits): + sem_pred = [] + for i in range(len(sem_logits)): + sem = torch.argmax(sem_logits[i], dim=1).cpu().numpy() + sem_pred.append(sem) + return sem_pred + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=self.cfg.TRAIN.LR) + # scheduler = torch.optim.lr_scheduler.StepLR( + # optimizer, step_size=self.cfg.TRAIN.STEP, gamma=self.cfg.TRAIN.DECAY + # ) + # return [optimizer], [scheduler] + return [optimizer] + + +## Heads +class SemHead(nn.Module): + def __init__(self, cfg): + super().__init__() + n_cls = int(cfg[cfg.MODEL.DATASET].NUM_CLASSES) + out = int(cfg.BACKBONE.CHANNELS[-1] * cfg.BACKBONE.CR) + self.conv = ME.MinkowskiConvolution( + out, n_cls, kernel_size=3, dilation=1, stride=1, dimension=3 + ) + + def forward(self, x, in_field): + logits = self.conv(x) + ## vox2points + logits = logits.slice(in_field) + logits = logits.decomposed_features + return logits + + +class InsHead(nn.Module): + def __init__(self, cfg): + super().__init__() + out = int(cfg.BACKBONE.CHANNELS[-1] * cfg.BACKBONE.CR) + init = int(cfg.BACKBONE.CHANNELS[0] * cfg.BACKBONE.CR) + + self.res = cfg.BACKBONE.RESOLUTION + + self.conv1 = ME.MinkowskiConvolution( + out, out, kernel_size=3, dilation=1, stride=1, dimension=3 + ) + self.bn1 = ME.MinkowskiBatchNorm(out) + self.act1 = ME.MinkowskiLeakyReLU(True) + self.conv2 = ME.MinkowskiConvolution( + out, 2 * init, kernel_size=3, dilation=1, stride=1, dimension=3 + ) + self.bn2 = ME.MinkowskiBatchNorm(2 * init) + self.act2 = ME.MinkowskiLeakyReLU(True) + self.conv3 = ME.MinkowskiConvolution( + 2 * init, init, kernel_size=3, dilation=1, stride=1, dimension=3 + ) + self.bn3 = ME.MinkowskiBatchNorm(init) + self.act3 = ME.MinkowskiLeakyReLU(True) + + self.offset = nn.Sequential( + nn.Linear(init + 3, init, bias=True), nn.BatchNorm1d(init), nn.ReLU() + ) + self.offset_linear = nn.Linear(init, 3, bias=True) + + def forward(self, x, in_field): + out = self.conv1(x) + out = self.act1(self.bn1(out)) + out = self.conv2(out) + out = self.act2(self.bn2(out)) + out = self.conv3(out) + out = self.act3(self.bn3(out)) + ## vox2points + out = out.slice(in_field) + feats = out.decomposed_features + coors = [c * self.res for c in out.decomposed_coordinates] + + offsets = [ + self.offset_linear(self.offset(torch.cat((f, c), dim=1))) + for f, c in zip(feats, coors) + ] + return offsets, feats diff --git a/instance_segmentation/mink_pan/requirements.txt b/instance_segmentation/mink_pan/requirements.txt new file mode 100644 index 0000000..1f7c1fa --- /dev/null +++ b/instance_segmentation/mink_pan/requirements.txt @@ -0,0 +1,11 @@ +Click==7.0 +easydict==1.9 +hdbscan==0.8.28 +matplotlib==3.5.1 +MinkowskiEngine==0.5.4 +numpy==1.20.3 +open3d==0.13.0 +pytorch_lightning==1.5.6 +PyYAML==6.0 +scikit_learn==1.1.2 +torch diff --git a/instance_segmentation/mink_pan/scripts/run_full_row.py b/instance_segmentation/mink_pan/scripts/run_full_row.py new file mode 100644 index 0000000..8a08e29 --- /dev/null +++ b/instance_segmentation/mink_pan/scripts/run_full_row.py @@ -0,0 +1,131 @@ +import os +import subprocess +from os.path import join + +#import click +import torch +import yaml +from easydict import EasyDict as edict +from mink_pan.datasets.deploy_dataset import SemanticDatasetModule +from mink_pan.models.model import MinkPan + +from pytorch_lightning import Trainer +from pytorch_lightning import loggers as pl_loggers +from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint +from torch.utils.data import DataLoader, Dataset +import numpy as np +import open3d as o3d +import random +from mink_pan.utils.evaluate_panoptic import PanopticKittiEvaluator +import typer +cli = typer.Typer() + +@cli.command() +def main( + modelpath: str = typer.Option( + ..., + "--modelpath", + help="data path ()", + ), +): + model_cfg = edict( + yaml.safe_load(open("config/model.yaml")) + ) + backbone_cfg = edict( + yaml.safe_load(open("config/backbone.yaml")) + ) + cfg = edict({**model_cfg, **backbone_cfg}) + + + data = SemanticDatasetModule(cfg) + model = MinkPan(cfg) + model.to(torch.device("cuda")) + model.eval() + + inseval = PanopticKittiEvaluator(cfg.STRAWBERRIES) + + w = torch.load(modelpath, map_location="cpu") + model.load_state_dict(w["encoder_state_dict"], strict=False) + tb_logger = pl_loggers.TensorBoardLogger( + "experiments/" + cfg.EXPERIMENT.ID, default_hp_metric=False + ) + + data.setup() + model.cluster.set_ids(data.things_ids) + + cont = 0 + + keys = [ + "pt_coord", + "feats", + "sem_label", + "ins_label", + "offset", + "foreground", + "totmask", + "mid_x", + "token", + ] + + min_x = 27.58 + max_x = 29.45 + ext = 0.3 + + coords = np.array(data.test_pan_set.dataset.pcds[0].points) + full_ins = np.zeros(coords.shape[0], dtype=np.int32) + full_offset = np.zeros_like(coords) + + A = min_x + B = A + ext + eps = 0.05 + + while True: + x = data.test_pan_set.__getitem__(A, B) + x = {keys[i]: list(j) for i, j in enumerate(zip(x))} + + + with torch.no_grad(): + sem_logits, offsets, ins_feat = model(x) + sem_pred, ins_pred = model.inference(x, sem_logits, offsets) + + vA = A if A <= min_x else A + eps + vB = B if B >= max_x else B - eps + print(f"{ A:6.3f} : { B:6.3f} / { max_x:6.3f}") + + inseval.update(sem_pred, ins_pred, x) + + unique_pr = np.unique(ins_pred[0]) + if 0 in unique_pr: + unique_pr = np.delete(unique_pr, 0) + + ins_offset = full_ins.max() + + MAXX = 0 + + for k in unique_pr: + kth_inst_mask = ins_pred[0] == k + kth_inst_pts_x = x["pt_coord"][0][kth_inst_mask][:, 0] + x["mid_x"][0] + + mask = np.logical_and(kth_inst_pts_x>=vA, kth_inst_pts_xMAXX: + MAXX = maxx + + A = MAXX - eps + B = A + ext + if B>max_x: + break + + full_ins.tofile("predicted_instances.npy") + #full_offset.tofile("predicted_offsets.npy") + + inseval.print_results() + inseval.print_fp_fn() + +if __name__ == "__main__": + cli() \ No newline at end of file diff --git a/instance_segmentation/mink_pan/scripts/train_model.py b/instance_segmentation/mink_pan/scripts/train_model.py new file mode 100644 index 0000000..d5a051c --- /dev/null +++ b/instance_segmentation/mink_pan/scripts/train_model.py @@ -0,0 +1,114 @@ +import os +import subprocess +from os.path import join + +#import click +import torch +import yaml +from easydict import EasyDict as edict +from mink_pan.datasets.daniel_dataset import SemanticDatasetModule +from mink_pan.models.model import MinkPan +from pytorch_lightning import Trainer +from pytorch_lightning import loggers as pl_loggers +from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint + +#@click.command() +#@click.option("--w", type=str, default=None, required=False, help="weights to load") +#@click.option( +# "--ckpt", +# type=str, +# default=None, +# required=False, +# help="checkpoint to resume training", +#) +#@click.option("--nuscenes", is_flag=True) +#@click.option("--mini", is_flag=True, help="use mini split for nuscenes") +#@click.option( +# "--seq", +# type=int, +# default=None, +# required=False, +# help="use a single sequence for train and val", +#) +#@click.option( +# "--id", type=str, default=None, required=False, help="set id of the experiment" +#) +def main(w=None, ckpt=None, nuscenes=False, mini=False, seq=None, id=None): + model_cfg = edict( + yaml.safe_load(open(join(getDir(__file__), "../config/model.yaml"))) + ) + backbone_cfg = edict( + yaml.safe_load(open(join(getDir(__file__), "../config/backbone.yaml"))) + ) + cfg = edict({**model_cfg, **backbone_cfg}) + cfg.git_commit_version = str( + subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip() + ) + + if nuscenes: + cfg.MODEL.DATASET = "NUSCENES" + if mini and nuscenes: + cfg.NUSCENES.MINI = True + if seq: + cfg.TRAIN.ONLY_SEQ = seq + if id: + cfg.EXPERIMENT.ID = id + + data = SemanticDatasetModule(cfg) + model = MinkPan(cfg) + if w: + w = torch.load(w, map_location="cpu") + model.load_state_dict(w["state_dict"], strict=False) + + tb_logger = pl_loggers.TensorBoardLogger( + "experiments/" + cfg.EXPERIMENT.ID, default_hp_metric=False + ) + + # Callbacks + lr_monitor = LearningRateMonitor(logging_interval="step") + + iou_ckpt = ModelCheckpoint( + monitor="metrics/iou", + filename=cfg.EXPERIMENT.ID + "_{epoch:02d}_{iou:.2f}", + mode="max", + save_last=True, + ) +# pq_ckpt = ModelCheckpoint( +# monitor="metrics/iou", +# filename=cfg.EXPERIMENT.ID + "_{epoch:02d}_{pq:.2f}", +# mode="max", +# save_last=True, +# ) + + trainer = Trainer( + # num_sanity_val_steps=0, + #gpus=cfg.TRAIN.N_GPUS, + accelerator="auto", + logger=tb_logger, + max_epochs=cfg.TRAIN.MAX_EPOCH, + callbacks=[lr_monitor, iou_ckpt],#, pq_ckpt], + # track_grad_norm=2, + log_every_n_steps=1, + gradient_clip_val=0.5, + # overfit_batches=0.0001, + accumulate_grad_batches=cfg.TRAIN.BATCH_ACC, + #resume_from_checkpoint=ckpt, + ) + + ###### Learning rate finder + # import matplotlib.pyplot as plt + # lr_finder = trainer.tuner.lr_find(model,data,min_lr=1e-8,max_lr=1e-2, num_training=10000) + # fig = lr_finder.plot(suggest=True) + # plt.savefig('lr_finder') + + quit() + + trainer.fit(model, data) + + +def getDir(obj): + return os.path.dirname(os.path.abspath(obj)) + + +if __name__ == "__main__": + main() diff --git a/instance_segmentation/mink_pan/utils/clustering.py b/instance_segmentation/mink_pan/utils/clustering.py new file mode 100644 index 0000000..114c1b1 --- /dev/null +++ b/instance_segmentation/mink_pan/utils/clustering.py @@ -0,0 +1,73 @@ +#import hdbscan +import numpy as np +import torch.nn as nn +from sklearn.cluster import MeanShift + +class Clustering(nn.Module): + def __init__(self, cfg): + super().__init__() + self.bandwidth = cfg.BANDWIDTH + self.min_cluster_size = cfg.MIN_CLUSTER + clustering = cfg.ALG + if clustering == "MEANSHIFT": + self.clustering = self.meanshift_cluster + elif clustering == "HDBSCAN": + self.clustering = self.hdbscan_cluster + + def set_ids(self, ids): + self.things_ids = ids + + def forward(self, sem_preds, offsets, coors): + last_id = 0 + pt_offsets = offsets.detach().cpu().numpy().reshape(-1, 3) + valid = np.isin(sem_preds, self.things_ids).reshape(-1) + ids = [] + clustered_ids = self.clustering(coors + pt_offsets, valid) + thing_ind = np.where(clustered_ids != 0) + clustered_ids[thing_ind] += last_id + 1 + last_id = max(clustered_ids) + ids = clustered_ids + + return ids + + def meanshift_cluster(self, shifted_pts, valid): + shift_dim = shifted_pts.shape[1] + clustered_ins_ids = np.zeros(shifted_pts.shape[0], dtype=np.int32) + valid_shifts = ( + shifted_pts[valid, :].reshape(-1, shift_dim) + if valid is not None + else shifted_pts + ) + + + if valid_shifts.shape[0] == 0: + return clustered_ins_ids + + ms = MeanShift(bandwidth=self.bandwidth, bin_seeding=True) + try: + ms.fit(valid_shifts) + except Exception as e: + ms = MeanShift(bandwidth=self.bandwidth) + ms.fit(valid_shifts) + print("\nException: {}.".format(e)) + print("Disable bin_seeding.") + labels = ms.labels_ + 1 + assert np.min(labels) > 0 + if valid is not None: + clustered_ins_ids[valid] = labels + return clustered_ins_ids + else: + return labels + + def hdbscan_cluster(self, shifted_pcd, valid): + clustered_ins_ids = np.zeros(shifted_pcd.shape[0], dtype=np.int32) + valid_shifts = shifted_pcd[valid, :].reshape(-1, 3) + if valid_shifts.shape[0] <= self.min_cluster_size: + return clustered_ins_ids + cluster = hdbscan.HDBSCAN( + min_cluster_size=self.min_cluster_size, allow_single_cluster=True + ).fit(valid_shifts) + instance_labels = cluster.labels_ + instance_labels += -instance_labels.min() + 1 + clustered_ins_ids[valid] = instance_labels + return clustered_ins_ids diff --git a/instance_segmentation/mink_pan/utils/eval_np.py b/instance_segmentation/mink_pan/utils/eval_np.py new file mode 100755 index 0000000..a0bd00f --- /dev/null +++ b/instance_segmentation/mink_pan/utils/eval_np.py @@ -0,0 +1,783 @@ +#!/usr/bin/env python3 + +# This file is covered by the LICENSE file in the root of this project. + +import math +import time + +import numpy as np + + +class PanopticEval: + """ Panoptic evaluation using numpy + + authors: Andres Milioto and Jens Behley + + """ + + def __init__( + self, n_classes, device=None, ignore=None, offset=2 ** 32, min_points=30 + ): + self.n_classes = n_classes + assert device == None + self.ignore = np.array(ignore, dtype=np.int64) + self.include = np.array( + [n for n in range(self.n_classes) if n not in self.ignore], dtype=np.int64 + ) + + # print("[PANOPTIC EVAL] IGNORE: ", self.ignore) + # print("[PANOPTIC EVAL] INCLUDE: ", self.include) + + self.reset() + self.offset = offset # largest number of instances in a given scan + self.min_points = ( + min_points + ) # smallest number of points to consider instances in gt + self.eps = 1e-15 + + def num_classes(self): + return self.n_classes + + def merge(self, evaluator): + self.px_iou_conf_matrix += evaluator.px_iou_conf_matrix + self.pan_tp += evaluator.pan_tp + self.pan_iou += evaluator.pan_iou + self.pan_fp += evaluator.pan_fp + self.pan_fn += evaluator.pan_fn + + self.evaluated_fnames += evaluator.evaluated_fnames + + def reset(self): + # general things + # iou stuff + self.px_iou_conf_matrix = np.zeros( + (self.n_classes, self.n_classes), dtype=np.int64 + ) + # panoptic stuff + self.pan_tp = np.zeros(self.n_classes, dtype=np.int64) + self.pan_iou = np.zeros(self.n_classes, dtype=np.double) + self.pan_fp = np.zeros(self.n_classes, dtype=np.int64) + self.pan_fn = np.zeros(self.n_classes, dtype=np.int64) + + self.tps = [] + self.fps = [] + self.fns = [] + + self.evaluated_fnames = [] + + ################################# IoU STUFF ################################## + def addBatchSemIoU(self, x_sem, y_sem): + # idxs are labels and predictions + idxs = np.stack([x_sem, y_sem], axis=0) + + # make confusion matrix (cols = gt, rows = pred) + np.add.at(self.px_iou_conf_matrix, tuple(idxs), 1) + + def getSemIoUStats(self): + # clone to avoid modifying the real deal + conf = self.px_iou_conf_matrix.copy().astype(np.double) + # remove fp from confusion on the ignore classes predictions + # points that were predicted of another class, but were ignore + # (corresponds to zeroing the cols of those classes, since the predictions + # go on the rows) + conf[:, self.ignore] = 0 + + # get the clean stats + tp = conf.diagonal() + fp = conf.sum(axis=1) - tp + fn = conf.sum(axis=0) - tp + return tp, fp, fn + + def getSemIoU(self): + tp, fp, fn = self.getSemIoUStats() + # print(f"tp={tp}") + # print(f"fp={fp}") + # print(f"fn={fn}") + intersection = tp + union = tp + fp + fn + union = np.maximum(union, self.eps) + iou = intersection.astype(np.double) / union.astype(np.double) + iou_mean = ( + intersection[self.include].astype(np.double) + / union[self.include].astype(np.double) + ).mean() + + + #import ipdb; ipdb.set_trace() + print(self.px_iou_conf_matrix) + + return iou_mean, iou # returns "iou mean", "iou per class" ALL CLASSES + + def getSemAcc(self): + tp, fp, fn = self.getSemIoUStats() + total_tp = tp.sum() + total = tp[self.include].sum() + fp[self.include].sum() + total = np.maximum(total, self.eps) + acc_mean = total_tp.astype(np.double) / total.astype(np.double) + + return acc_mean # returns "acc mean" + + ################################# IoU STUFF ################################## + ############################################################################## + + ############################# Panoptic STUFF ################################ + def addBatchPanoptic(self, x_sem_row, x_inst_row, y_sem_row, y_inst_row): + # make sure instances are not zeros (it messes with my approach) + x_inst_row = x_inst_row + 1 + y_inst_row = y_inst_row + 1 + + # only interested in points that are outside the void area (not in excluded classes) + for cl in self.ignore: + # make a mask for this class + gt_not_in_excl_mask = y_sem_row != cl + # remove all other points + x_sem_row = x_sem_row[gt_not_in_excl_mask] + y_sem_row = y_sem_row[gt_not_in_excl_mask] + x_inst_row = x_inst_row[gt_not_in_excl_mask] + y_inst_row = y_inst_row[gt_not_in_excl_mask] + + metrics = {} + + # first step is to count intersections > 0.5 IoU for each class (except the ignored ones) + for cl in self.include: + # print("*"*80) + # print("CLASS", cl.item()) + # get a class mask + x_inst_in_cl_mask = x_sem_row == cl + y_inst_in_cl_mask = y_sem_row == cl + + # get instance points in class (makes outside stuff 0) + x_inst_in_cl = x_inst_row * x_inst_in_cl_mask.astype(np.int64) + y_inst_in_cl = y_inst_row * y_inst_in_cl_mask.astype(np.int64) + + # generate the areas for each unique instance prediction + unique_pred, counts_pred = np.unique( + x_inst_in_cl[x_inst_in_cl > 0], return_counts=True + ) + id2idx_pred = {id: idx for idx, id in enumerate(unique_pred)} + matched_pred = np.array([False] * unique_pred.shape[0]) + # print("Unique predictions:", unique_pred) + + # generate the areas for each unique instance gt_np + unique_gt, counts_gt = np.unique( + y_inst_in_cl[y_inst_in_cl > 0], return_counts=True + ) + id2idx_gt = {id: idx for idx, id in enumerate(unique_gt)} + matched_gt = np.array([False] * unique_gt.shape[0]) + # print("Unique ground truth:", unique_gt) + + # generate intersection using offset + valid_combos = np.logical_and(x_inst_in_cl > 0, y_inst_in_cl > 0) + offset_combo = ( + x_inst_in_cl[valid_combos] + self.offset * y_inst_in_cl[valid_combos] + ) + unique_combo, counts_combo = np.unique(offset_combo, return_counts=True) + + # generate an intersection map + # count the intersections with over 0.5 IoU as TP + gt_labels = unique_combo // self.offset + pred_labels = unique_combo % self.offset + gt_areas = np.array([counts_gt[id2idx_gt[id]] for id in gt_labels]) + pred_areas = np.array([counts_pred[id2idx_pred[id]] for id in pred_labels]) + intersections = counts_combo + unions = gt_areas + pred_areas - intersections + ious = intersections.astype(float) / unions.astype(float) + + tp_indexes = ious > 0.5 + self.pan_tp[cl] += np.sum(tp_indexes) + self.pan_iou[cl] += np.sum(ious[tp_indexes]) + + matched_gt[[id2idx_gt[id] for id in gt_labels[tp_indexes]]] = True + matched_pred[[id2idx_pred[id] for id in pred_labels[tp_indexes]]] = True + + # count the FN + self.pan_fn[cl] += np.sum( + np.logical_and(counts_gt >= self.min_points, matched_gt == False) + ) + + # count the FP + self.pan_fp[cl] += np.sum( + np.logical_and(counts_pred >= self.min_points, matched_pred == False) + ) + + if cl>0: + from tqdm import tqdm + for threshold in tqdm(np.arange(0.5, 0.95+0.01, 0.05)): + tp_indexes = ious > threshold + self.tps.append(np.sum(tp_indexes)) + + matched_gt[[id2idx_gt[id] for id in gt_labels[tp_indexes]]] = True + matched_pred[[id2idx_pred[id] for id in pred_labels[tp_indexes]]] = True + + # count the FN + self.fns.append(np.sum( + np.logical_and(counts_gt >= self.min_points, matched_gt == False) + )) + + # count the FP + self.fps.append(np.sum( + np.logical_and(counts_pred >= self.min_points, matched_pred == False) + )) + + + + def getPQ(self): + # first calculate for all classes + sq_all = self.pan_iou.astype(np.double) / np.maximum( + self.pan_tp.astype(np.double), self.eps + ) + rq_all = self.pan_tp.astype(np.double) / np.maximum( + self.pan_tp.astype(np.double) + + 0.5 * self.pan_fp.astype(np.double) + + 0.5 * self.pan_fn.astype(np.double), + self.eps, + ) + pq_all = sq_all * rq_all + + + # then do the REAL mean (no ignored classes) + SQ = sq_all[self.include].mean() + RQ = rq_all[self.include].mean() + PQ = pq_all[self.include].mean() + + return PQ, SQ, RQ, pq_all, sq_all, rq_all + + ############################# Panoptic STUFF ################################ + ############################################################################## + + def addBatch(self, x_sem, x_inst, y_sem, y_inst): # x=preds, y=targets + """ IMPORTANT: Inputs must be batched. Either [N,H,W], or [N, P] + """ + # add to IoU calculation (for checking purposes) + self.addBatchSemIoU(x_sem, y_sem) + + # now do the panoptic stuff + self.addBatchPanoptic(x_sem, x_inst, y_sem, y_inst) + + def addBatch_w_fname( + self, x_sem, x_inst, y_sem, y_inst, fname + ): # x=preds, y=targets + """ IMPORTANT: Inputs must be batched. Either [N,H,W], or [N, P] + """ + # add to IoU calculation (for checking purposes) + self.addBatchSemIoU(x_sem, y_sem) + + # now do the panoptic stuff + self.addBatchPanoptic(x_sem, x_inst, y_sem, y_inst) + + self.evaluated_fnames.append(fname) + + +class Panoptic4DEval: + """ Panoptic evaluation using numpy + + authors: Andres Milioto, Jens Behley, Aljosa Osep + + """ + + def __init__( + self, n_classes, device=None, ignore=None, offset=2 ** 32, min_points=30 + ): + self.n_classes = n_classes + # assert (device == None) + self.ignore = np.array(ignore, dtype=np.int64) + self.include = np.array( + [n for n in range(self.n_classes) if n not in self.ignore], dtype=np.int64 + ) + # print("[PANOPTIC4D EVAL] IGNORE: ", self.ignore) + # print("[PANOPTIC4D EVAL] INCLUDE: ", self.include) + + self.reset() + self.offset = offset # largest number of instances in a given scan + self.min_points = ( + min_points + ) # smallest number of points to consider instances in gt + self.eps = 1e-15 + + def num_classes(self): + return self.n_classes + + def reset(self): + # general things + # iou stuff + self.px_iou_conf_matrix = np.zeros( + (self.n_classes, self.n_classes), dtype=np.int64 + ) + + self.sequences = [] + self.preds = {} + self.gts = {} + self.intersects = {} + self.intersects_ovr = {} + + # Per-class association quality collect here + self.pan_aq = np.zeros(self.n_classes, dtype=np.double) + self.pan_aq_ovr = 0.0 + + ################################# IoU STUFF ################################## + def addBatchSemIoU(self, x_sem, y_sem): + # idxs are labels and predictions + idxs = np.stack([x_sem, y_sem], axis=0) + + # make confusion matrix (cols = gt, rows = pred) + np.add.at(self.px_iou_conf_matrix, tuple(idxs), 1) + + def getSemIoUStats(self): + # clone to avoid modifying the real deal + conf = self.px_iou_conf_matrix.copy().astype(np.double) + # remove fp from confusion on the ignore classes predictions + # points that were predicted of another class, but were ignore + # (corresponds to zeroing the cols of those classes, since the predictions + # go on the rows) + conf[:, self.ignore] = 0 + + # get the clean stats + tp = conf.diagonal() + fp = conf.sum(axis=1) - tp + fn = conf.sum(axis=0) - tp + return tp, fp, fn + + def getSemIoU(self): + tp, fp, fn = self.getSemIoUStats() + # print(f"tp={tp}") + # print(f"fp={fp}") + # print(f"fn={fn}") + intersection = tp + union = tp + fp + fn + + num_present_classes = np.count_nonzero(union) + + union = np.maximum(union, self.eps) + iou = intersection.astype(np.double) / union.astype(np.double) + iou_mean = np.sum(iou) / num_present_classes + + return iou_mean, iou # returns "iou mean", "iou per class" ALL CLASSES + + def getSemAcc(self): + tp, fp, fn = self.getSemIoUStats() + total_tp = tp.sum() + total = tp[self.include].sum() + fp[self.include].sum() + total = np.maximum(total, self.eps) + acc_mean = total_tp.astype(np.double) / total.astype(np.double) + + return acc_mean # returns "acc mean" + + ################################# IoU STUFF ################################## + ############################################################################## + + ############################# Panoptic STUFF ################################ + + def update_dict_stat(self, stat_dict, unique_ids, unique_cnts): + for uniqueid, counts in zip(unique_ids, unique_cnts): + if uniqueid == 1: + continue # 1 -- no instance + if uniqueid in stat_dict: + stat_dict[uniqueid] += counts + else: + stat_dict[uniqueid] = counts + + def addBatchPanoptic4D(self, seq, x_sem_row, x_inst_row, y_sem_row, y_inst_row): + + # start = time.time() + if seq not in self.sequences: + self.sequences.append(seq) + self.preds[seq] = {} + self.gts[seq] = [{} for i in range(self.n_classes)] + self.intersects[seq] = [{} for i in range(self.n_classes)] + self.intersects_ovr[seq] = [{} for i in range(self.n_classes)] + + # make sure instances are not zeros (it messes with my approach) + x_inst_row = x_inst_row + 1 + y_inst_row = y_inst_row + 1 + + # only interested in points that are outside the void area (not in excluded classes) + for cl in self.ignore: + # make a mask for this class + gt_not_in_excl_mask = y_sem_row != cl + # remove all other points + x_sem_row = x_sem_row[gt_not_in_excl_mask] + y_sem_row = y_sem_row[gt_not_in_excl_mask] + x_inst_row = x_inst_row[gt_not_in_excl_mask] + y_inst_row = y_inst_row[gt_not_in_excl_mask] + + for cl in self.include: + # print("*"*80) + # print("CLASS", cl.item()) + + # Per-class accumulated stats + cl_preds = self.preds[seq] + cl_gts = self.gts[seq][cl] + cl_intersects = self.intersects[seq][cl] + + # get a binary class mask (filter acc. to semantic class!) + x_inst_in_cl_mask = x_sem_row == cl + y_inst_in_cl_mask = y_sem_row == cl + + # get instance points in class (mask-out everything but _this_ class) + x_inst_in_cl = x_inst_row * x_inst_in_cl_mask.astype(np.int64) + y_inst_in_cl = y_inst_row * y_inst_in_cl_mask.astype(np.int64) + + # generate the areas for each unique instance gt_np (i.e., set2) + unique_gt, counts_gt = np.unique( + y_inst_in_cl[y_inst_in_cl > 0], return_counts=True + ) + self.update_dict_stat( + cl_gts, + unique_gt[counts_gt > self.min_points], + counts_gt[counts_gt > self.min_points], + ) + + valid_combos_min_point = np.zeros_like( + y_inst_in_cl + ) # instances which have at least self.min points + for valid_id in unique_gt[counts_gt > self.min_points]: + valid_combos_min_point = np.logical_or( + valid_combos_min_point, y_inst_in_cl == valid_id + ) + + y_inst_in_cl = y_inst_in_cl * valid_combos_min_point + # generate the areas for each unique instance prediction (i.e., set1) + unique_pred, counts_pred = np.unique( + x_inst_in_cl[x_inst_in_cl > 0], return_counts=True + ) + + # is there better way to do this? + self.update_dict_stat(cl_preds, unique_pred, counts_pred) + + valid_combos = np.logical_and( + x_inst_row > 0, y_inst_in_cl > 0 + ) # Convert to boolean and do logical and, based on semantics + + # generate intersection using offset + offset_combo = ( + x_inst_row[valid_combos] + self.offset * y_inst_in_cl[valid_combos] + ) + unique_combo, counts_combo = np.unique(offset_combo, return_counts=True) + + self.update_dict_stat(cl_intersects, unique_combo, counts_combo) + + def getPQ4D(self): + num_tubes = [0] * self.n_classes + for seq in self.sequences: + for cl in self.include: + cl_preds = self.preds[seq] + cl_gts = self.gts[seq][cl] + cl_intersects = self.intersects[seq][cl] + outer_sum = 0.0 + outer_sum_iou = 0.0 + num_tubes[cl] += len(cl_gts) + for gt_id, gt_size in cl_gts.items(): + inner_sum = 0.0 + inner_sum_iou = 0.0 + for pr_id, pr_size in cl_preds.items(): + # TODO: pay attention for zero intersection! + TPA_key = pr_id + self.offset * gt_id + if TPA_key in cl_intersects: + TPA = cl_intersects[TPA_key] + Prec = TPA / float( + pr_size + ) # TODO I dont think these can ever be zero, but double check + Recall = TPA / float(gt_size) + TPA_ovr = self.intersects[seq][cl][TPA_key] + inner_sum_iou += TPA_ovr * ( + TPA_ovr / (gt_size + pr_size - TPA_ovr) + ) + if Prec > 1.0 or Recall > 1.0: + print("something wrong !!") + outer_sum_iou += 1.0 / float(gt_size) * float(inner_sum_iou) + self.pan_aq[ + cl + ] += outer_sum_iou # 1.0/float(len(cl_gts)) # Normalize by #tubes + self.pan_aq_ovr += outer_sum_iou + # ========== + + AQ_overall = np.sum(self.pan_aq_ovr) / np.sum(num_tubes[1:9]) + AQ = self.pan_aq / np.maximum(num_tubes, self.eps) + + iou_mean, iou = self.getSemIoU() + + PQ4D = math.sqrt(AQ_overall * iou_mean) + return PQ4D, AQ_overall, AQ, iou, iou_mean + + ############################# Panoptic STUFF ################################ + ############################################################################## + + def addBatch(self, seq, x_sem, x_inst, y_sem, y_inst): # x=preds, y=targets + """ IMPORTANT: Inputs must be batched. Either [N,H,W], or [N, P] + """ + # add to IoU calculation (for checking purposes) + self.addBatchSemIoU(x_sem, y_sem) + + # now do the panoptic stuff + self.addBatchPanoptic4D(seq, x_sem, x_inst, y_sem, y_inst) + + +def gen_psuedo_labels(n=50): + # generate ground truth and prediction + sem_pred = [] + inst_pred = [] + sem_gt = [] + inst_gt = [] + + # some ignore stuff + N_ignore = n + sem_pred.extend([0 for i in range(N_ignore)]) + inst_pred.extend([0 for i in range(N_ignore)]) + sem_gt.extend([0 for i in range(N_ignore)]) + inst_gt.extend([0 for i in range(N_ignore)]) + + # grass segment + N_grass = n + 1 + N_grass_pred = np.random.randint(0, N_grass) # rest is sky + sem_pred.extend([1 for i in range(N_grass_pred)]) # grass + sem_pred.extend([2 for i in range(N_grass - N_grass_pred)]) # sky + inst_pred.extend([0 for i in range(N_grass)]) + sem_gt.extend([1 for i in range(N_grass)]) # grass + inst_gt.extend([0 for i in range(N_grass)]) + + # sky segment + N_sky = n + 2 + N_sky_pred = np.random.randint(0, N_sky) # rest is grass + sem_pred.extend([2 for i in range(N_sky_pred)]) # sky + sem_pred.extend([1 for i in range(N_sky - N_sky_pred)]) # grass + inst_pred.extend([0 for i in range(N_sky)]) # first instance + sem_gt.extend([2 for i in range(N_sky)]) # sky + inst_gt.extend([0 for i in range(N_sky)]) # first instance + + # wrong dog as person prediction + N_dog = n + 3 + N_person = N_dog + sem_pred.extend([3 for i in range(N_person)]) + inst_pred.extend([35 for i in range(N_person)]) + sem_gt.extend([4 for i in range(N_dog)]) + inst_gt.extend([22 for i in range(N_dog)]) + + # two persons in prediction, but three in gt + N_person = n + 4 + sem_pred.extend([3 for i in range(6 * N_person)]) + inst_pred.extend([8 for i in range(4 * N_person)]) + inst_pred.extend([95 for i in range(2 * N_person)]) + sem_gt.extend([3 for i in range(6 * N_person)]) + inst_gt.extend([33 for i in range(3 * N_person)]) + inst_gt.extend([42 for i in range(N_person)]) + inst_gt.extend([11 for i in range(2 * N_person)]) + + # gt and pred to numpy + sem_pred = np.array(sem_pred, dtype=np.int64).reshape(1, -1) + inst_pred = np.array(inst_pred, dtype=np.int64).reshape(1, -1) + sem_gt = np.array(sem_gt, dtype=np.int64).reshape(1, -1) + inst_gt = np.array(inst_gt, dtype=np.int64).reshape(1, -1) + + return sem_pred, inst_pred, sem_gt, inst_gt + + +def test_4D(): + classes = 3 # ignore, car, truck + cl_strings = ["ignore", "car", "truck"] + ignore = [0] # only ignore ignore class + + sem_gt = np.zeros(20, dtype=np.int32) + sem_gt[5:10] = 1 + sem_gt[10:] = 2 + + inst_gt = np.zeros(20, dtype=np.int32) + inst_gt[5:10] = 1 + inst_gt[10:] = 1 + inst_gt[15:] = 2 + + # we have 3 instance 1 car, 2 truck as gt + sem_pred = np.zeros(20, dtype=np.int32) + sem_pred[5:10] = 1 + sem_pred[10:15] = 2 + sem_pred[15:] = 1 + + inst_pred = np.zeros(20, dtype=np.int32) + inst_pred[5:10] = 1 + inst_pred[10:] = 2 + + # evaluator + class_evaluator = Panoptic4DEval(3, None, ignore, offset=2 ** 32, min_points=1) + class_evaluator.addBatch(1, sem_pred, inst_pred, sem_gt, inst_gt) + PQ4D, AQ_ovr, AQ, iou, iou_mean = class_evaluator.getPQ4D() + + print("=== Results ===") + print("PQ4D:", PQ4D) + print("AQ_mean:", AQ_ovr) + print("AQ:", AQ) + print("iou:", iou) + print("iou_mean:", iou_mean) + + +if __name__ == "__main__": + # generate problem from He paper (https://arxiv.org/pdf/1801.00868.pdf) + classes = 5 # ignore, grass, sky, person, dog + cl_strings = ["ignore", "grass", "sky", "person", "dog"] + ignore = [0] # only ignore ignore class + min_points = 1 # for this example we care about all points + + sem_pred_0, inst_pred_0, sem_gt_0, inst_gt_0 = gen_psuedo_labels(50) + sem_pred_1, inst_pred_1, sem_gt_1, inst_gt_1 = gen_psuedo_labels(51) + + # evaluator + evaluator = PanopticEval(classes, ignore=ignore, min_points=1) + evaluator.addBatch(sem_pred_0, inst_pred_0, sem_gt_0, inst_gt_0) + evaluator.addBatch(sem_pred_1, inst_pred_1, sem_gt_1, inst_gt_1) + + evaluator_0 = PanopticEval(classes, ignore=ignore, min_points=1) + evaluator_0.addBatch(sem_pred_0, inst_pred_0, sem_gt_0, inst_gt_0) + + evaluator_1 = PanopticEval(classes, ignore=ignore, min_points=1) + evaluator_1.addBatch(sem_pred_1, inst_pred_1, sem_gt_1, inst_gt_1) + + pq, sq, rq, all_pq, all_sq, all_rq = evaluator.getPQ() + iou, all_iou = evaluator.getSemIoU() + + # [PANOPTIC EVAL] IGNORE: [0] + # [PANOPTIC EVAL] INCLUDE: [1 2 3 4] + # TOTALS + # PQ: 0.47916666666666663 + # SQ: 0.5520833333333333 + # RQ: 0.6666666666666666 + # IoU: 0.5476190476190476 + # Class ignore PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + # Class grass PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class sky PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class person PQ: 0.5833333333333333 SQ: 0.875 RQ: 0.6666666666666666 IoU: 0.8571428571428571 + # Class dog PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + + print("TOTALS") + print("PQ:", pq.item(), pq.item() == 0.47916666666666663) + print("SQ:", sq.item(), sq.item() == 0.5520833333333333) + print("RQ:", rq.item(), rq.item() == 0.6666666666666666) + print("IoU:", iou.item(), iou.item() == 0.5476190476190476) + for i, (pq, sq, rq, iou) in enumerate(zip(all_pq, all_sq, all_rq, all_iou)): + print( + "Class", + cl_strings[i], + "\t", + "PQ:", + pq.item(), + "SQ:", + sq.item(), + "RQ:", + rq.item(), + "IoU:", + iou.item(), + ) + + pq, sq, rq, all_pq, all_sq, all_rq = evaluator_0.getPQ() + iou, all_iou = evaluator_0.getSemIoU() + + # [PANOPTIC EVAL] IGNORE: [0] + # [PANOPTIC EVAL] INCLUDE: [1 2 3 4] + # TOTALS + # PQ: 0.47916666666666663 + # SQ: 0.5520833333333333 + # RQ: 0.6666666666666666 + # IoU: 0.5476190476190476 + # Class ignore PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + # Class grass PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class sky PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class person PQ: 0.5833333333333333 SQ: 0.875 RQ: 0.6666666666666666 IoU: 0.8571428571428571 + # Class dog PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + + print("TOTALS") + print("PQ:", pq.item(), pq.item() == 0.47916666666666663) + print("SQ:", sq.item(), sq.item() == 0.5520833333333333) + print("RQ:", rq.item(), rq.item() == 0.6666666666666666) + print("IoU:", iou.item(), iou.item() == 0.5476190476190476) + for i, (pq, sq, rq, iou) in enumerate(zip(all_pq, all_sq, all_rq, all_iou)): + print( + "Class", + cl_strings[i], + "\t", + "PQ:", + pq.item(), + "SQ:", + sq.item(), + "RQ:", + rq.item(), + "IoU:", + iou.item(), + ) + + pq, sq, rq, all_pq, all_sq, all_rq = evaluator_1.getPQ() + iou, all_iou = evaluator_1.getSemIoU() + + # [PANOPTIC EVAL] IGNORE: [0] + # [PANOPTIC EVAL] INCLUDE: [1 2 3 4] + # TOTALS + # PQ: 0.47916666666666663 + # SQ: 0.5520833333333333 + # RQ: 0.6666666666666666 + # IoU: 0.5476190476190476 + # Class ignore PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + # Class grass PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class sky PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class person PQ: 0.5833333333333333 SQ: 0.875 RQ: 0.6666666666666666 IoU: 0.8571428571428571 + # Class dog PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + + print("TOTALS") + print("PQ:", pq.item(), pq.item() == 0.47916666666666663) + print("SQ:", sq.item(), sq.item() == 0.5520833333333333) + print("RQ:", rq.item(), rq.item() == 0.6666666666666666) + print("IoU:", iou.item(), iou.item() == 0.5476190476190476) + for i, (pq, sq, rq, iou) in enumerate(zip(all_pq, all_sq, all_rq, all_iou)): + print( + "Class", + cl_strings[i], + "\t", + "PQ:", + pq.item(), + "SQ:", + sq.item(), + "RQ:", + rq.item(), + "IoU:", + iou.item(), + ) + + import pickle + + with open("test.pkl", "wb") as f: + pickle.dump(evaluator_0, f) + + with open("test.pkl", "rb") as f: + evaluator_read = pickle.load(f) + evaluator_1.merge(evaluator_read) + + pq, sq, rq, all_pq, all_sq, all_rq = evaluator_1.getPQ() + iou, all_iou = evaluator_1.getSemIoU() + + # [PANOPTIC EVAL] IGNORE: [0] + # [PANOPTIC EVAL] INCLUDE: [1 2 3 4] + # TOTALS + # PQ: 0.47916666666666663 + # SQ: 0.5520833333333333 + # RQ: 0.6666666666666666 + # IoU: 0.5476190476190476 + # Class ignore PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + # Class grass PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class sky PQ: 0.6666666666666666 SQ: 0.6666666666666666 RQ: 1.0 IoU: 0.6666666666666666 + # Class person PQ: 0.5833333333333333 SQ: 0.875 RQ: 0.6666666666666666 IoU: 0.8571428571428571 + # Class dog PQ: 0.0 SQ: 0.0 RQ: 0.0 IoU: 0.0 + + print("TOTALS") + print("PQ:", pq.item(), pq.item() == 0.47916666666666663) + print("SQ:", sq.item(), sq.item() == 0.5520833333333333) + print("RQ:", rq.item(), rq.item() == 0.6666666666666666) + print("IoU:", iou.item(), iou.item() == 0.5476190476190476) + for i, (pq, sq, rq, iou) in enumerate(zip(all_pq, all_sq, all_rq, all_iou)): + print( + "Class", + cl_strings[i], + "\t", + "PQ:", + pq.item(), + "SQ:", + sq.item(), + "RQ:", + rq.item(), + "IoU:", + iou.item(), + ) diff --git a/instance_segmentation/mink_pan/utils/evaluate_panoptic.py b/instance_segmentation/mink_pan/utils/evaluate_panoptic.py new file mode 100755 index 0000000..2a26d2b --- /dev/null +++ b/instance_segmentation/mink_pan/utils/evaluate_panoptic.py @@ -0,0 +1,207 @@ +import numpy as np +import yaml +from mink_pan.utils.eval_np import PanopticEval +#from eval_np import PanopticEval + + +class PanopticKittiEvaluator: + def __init__(self, cfg=None): + + dataset_config_file = cfg.CONFIG + #dataset_config_file = "/home/penguin2/Documents/Strawberries/instance_segmentation/mink_panoptic/mink_pan/datasets/strawberries.yaml" + self.load_kitti_config(dataset_config_file) + + min_points = 20 + self.evaluator = PanopticEval( + self.nr_classes, None, self.ignore_class, min_points=min_points + ) + self.class_metrics = {} + self.mean_metrics = {} + + def reset(self): + self.evaluator.reset() + self.class_metrics = {} + self.mean_metrics = {} + + def load_kitti_config(self, config_file): + # Load semantic-kitti config + # data = yaml.safe_load(open('datasets/semantic-kitti.yaml', 'r')) + data = yaml.safe_load(open(config_file, "r")) + # get number of interest classes, and the label mappings + class_remap = data["learning_map"] + self.class_inv_remap = data["learning_map_inv"] + class_ignore = data["learning_ignore"] + self.nr_classes = len(self.class_inv_remap) + self.class_strings = data["labels"] + # make lookup table for mapping + maxkey = max(class_remap.keys()) + # +100 hack making lut bigger just in case there are unknown labels + class_lut = np.zeros((maxkey + 100), dtype=np.int32) + class_lut[list(class_remap.keys())] = list(class_remap.values()) + self.ignore_class = [cl for cl, ignored in class_ignore.items() if ignored] + + self.class_inv_lut = np.zeros((2), dtype=np.int32) + + self.class_inv_lut[list(self.class_inv_remap.keys())] = list( + self.class_inv_remap.values() + ) + + self.things = [ + "strawberry", + ] + self.stuff = ["unlabeled",] + self.all_classes = self.things + self.stuff + + def update2(self, sem_preds, ins_preds, sem_label, ins_label): + for i in range(len(sem_preds)): + self.evaluator.addBatch( + sem_preds, + ins_preds, + sem_label, #inputs["sem_label"][i].reshape(-1), + ins_label, #inputs["ins_label"][i].reshape(-1), + #inputs["fname"][i], + ) + self.update_metrics() + + def update(self, sem_preds, ins_preds, inputs): + for i in range(len(sem_preds)): + self.evaluator.addBatch( + sem_preds[i], + ins_preds[i], + inputs["sem_label"][i].reshape(-1), + inputs["ins_label"][i].reshape(-1), + #inputs["fname"][i], + ) + self.update_metrics() + + def get_class_inv_lut(self): + return self.class_inv_lut + + def update_metrics(self): + class_PQ, class_SQ, class_RQ, class_all_PQ, class_all_SQ, class_all_RQ = ( + self.evaluator.getPQ() + ) + class_IoU, class_all_IoU = self.evaluator.getSemIoU() + + # now make a nice dictionary + output_dict = {} + + # make python variables + class_PQ = class_PQ.item() + class_SQ = class_SQ.item() + class_RQ = class_RQ.item() + class_all_PQ = class_all_PQ.flatten().tolist() + class_all_SQ = class_all_SQ.flatten().tolist() + class_all_RQ = class_all_RQ.flatten().tolist() + class_IoU = class_IoU.item() + class_all_IoU = class_all_IoU.flatten().tolist() + + output_dict["all"] = {} + output_dict["all"]["PQ"] = class_PQ + output_dict["all"]["SQ"] = class_SQ + output_dict["all"]["RQ"] = class_RQ + output_dict["all"]["IoU"] = class_IoU + + classwise_tables = {} + + for idx, (pq, rq, sq, iou) in enumerate( + zip(class_all_PQ, class_all_RQ, class_all_SQ, class_all_IoU) + ): + class_str = self.class_strings[self.class_inv_remap[idx]] + output_dict[class_str] = {} + output_dict[class_str]["PQ"] = pq + output_dict[class_str]["SQ"] = sq + output_dict[class_str]["RQ"] = rq + output_dict[class_str]["IoU"] = iou + + # Save per class metrics + self.class_metrics = output_dict + + PQ_all = np.mean([float(output_dict[c]["PQ"]) for c in self.all_classes]) + PQ_dagger = np.mean( + [float(output_dict[c]["PQ"]) for c in self.things] + + [float(output_dict[c]["IoU"]) for c in self.stuff] + ) + RQ_all = np.mean([float(output_dict[c]["RQ"]) for c in self.all_classes]) + SQ_all = np.mean([float(output_dict[c]["SQ"]) for c in self.all_classes]) + + PQ_things = np.mean([float(output_dict[c]["PQ"]) for c in self.things]) + RQ_things = np.mean([float(output_dict[c]["RQ"]) for c in self.things]) + SQ_things = np.mean([float(output_dict[c]["SQ"]) for c in self.things]) + + PQ_stuff = np.mean([float(output_dict[c]["PQ"]) for c in self.stuff]) + RQ_stuff = np.mean([float(output_dict[c]["RQ"]) for c in self.stuff]) + SQ_stuff = np.mean([float(output_dict[c]["SQ"]) for c in self.stuff]) + mIoU = output_dict["all"]["IoU"] + + codalab_output = {} + codalab_output["pq_mean"] = float(PQ_all) + codalab_output["pq_dagger"] = float(PQ_dagger) + codalab_output["sq_mean"] = float(SQ_all) + codalab_output["rq_mean"] = float(RQ_all) + codalab_output["iou_mean"] = float(mIoU) + codalab_output["pq_stuff"] = float(PQ_stuff) + codalab_output["rq_stuff"] = float(RQ_stuff) + codalab_output["sq_stuff"] = float(SQ_stuff) + codalab_output["pq_things"] = float(PQ_things) + codalab_output["rq_things"] = float(RQ_things) + codalab_output["sq_things"] = float(SQ_things) + + # Save mean metrics + self.mean_metrics = codalab_output + + def get_mean_pq(self): + return self.mean_metrics["pq_mean"] + + def get_mean_iou(self): + return self.mean_metrics["iou_mean"] + + def get_mean_rq(self): + return self.mean_metrics["rq_mean"] + + def get_class_metrics(self): + return self.class_metrics + + def print_results(self): + evaluated_fnames = self.evaluator.evaluated_fnames + print( + "Evaluated {} frames. Duplicated frame number: {}".format( + len(evaluated_fnames), + len(evaluated_fnames) - len(set(evaluated_fnames)), + ) + ) + print("| | PQ | RQ | SQ | IoU |") + for k, v in self.class_metrics.items(): + print( + "|{}| {:.4f} | {:.4f} | {:.4f} | {:.4f} |".format( + k.ljust(8)[-8:], v["PQ"], v["RQ"], v["SQ"], v["IoU"] + ) + ) + for key in self.mean_metrics.keys(): + print("{}:\t{}".format(key, self.mean_metrics[key])) + + + #print("tps", self.evaluator.tps) + #print("fps", self.evaluator.fps) + #print("fns", self.evaluator.fns) + + tps = np.array(self.evaluator.tps) + fps = np.array(self.evaluator.fps) + fns = np.array(self.evaluator.fns) + + aps = np.where(fps+tps==0, 1, tps / (fps+tps)) + + #print("aps", aps) + print() + print("mAP", aps.mean()) + print() + + + + def print_fp_fn(self): + print("True Positive: ") + print("\t|\t".join([str(x) for x in self.evaluator.pan_tp])) + print("False Positive: ") + print("\t|\t".join([str(x) for x in self.evaluator.pan_fp])) + print("False Negative: ") + print("\t|\t".join([str(x) for x in self.evaluator.pan_fn])) diff --git a/instance_segmentation/mink_pan/utils/plot.py b/instance_segmentation/mink_pan/utils/plot.py new file mode 100644 index 0000000..1801e38 --- /dev/null +++ b/instance_segmentation/mink_pan/utils/plot.py @@ -0,0 +1,59 @@ +import matplotlib.pyplot as plt +import numpy as np +import open3d as o3d +import torch +import torch.nn.functional as F +from matplotlib import cm, colors + +norm = colors.Normalize(vmin=0, vmax=1) +colormap = cm.ScalarMappable(norm=norm) + + +def save_pcd(pcd, name): + vis = o3d.visualization.Visualizer() + vis.create_window(visible=False) + vis.add_geometry(pcd) + vis.update_geometry(pcd) + vis.poll_events() + vis.update_renderer() + vis.capture_screen_image(name + ".png", do_render=True) + vis.destroy_window() + + +def plot_instances(points, ins_pred, save=False, n=0): + inst_colors = generate_inst_colors() + for pts, pred in zip(points, ins_pred): + ids = np.unique(pred) + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(pts) + colors = np.zeros_like(pts) + for i in ids: + colors[pred == i] = inst_colors[i] + pcd.colors = o3d.utility.Vector3dVector(np.array(colors)) + if save: + o3d.io.write_point_cloud("val_pred/ins/" + str(n).zfill(6) + ".ply", pcd) + n += 1 + else: + o3d.visualization.draw_geometries([pcd]) + + +def plot_semantics(points, sem_pred, color_map, save=False, n=0): + for pts, pred in zip(points, sem_pred): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(pts) + colors = np.array([color_map[lbl.item()][::-1] for lbl in pred]) / 255 + pcd.colors = o3d.utility.Vector3dVector(np.array(colors)) + if save: + o3d.io.write_point_cloud("val_pred/sem/" + str(n).zfill(6) + ".ply", pcd) + else: + o3d.visualization.draw_geometries([pcd]) + + +def generate_inst_colors(): + max_inst_id = 100000 # val set + # max_inst_id = 100000000 #test set + # make instance colors + inst_colors = np.random.uniform(low=0.0, high=1.0, size=(max_inst_id, 3)) + # force zero to a gray-ish color + inst_colors[0] = np.full((3), 0.9) + return inst_colors diff --git a/instance_segmentation/mink_pan/utils/testing.py b/instance_segmentation/mink_pan/utils/testing.py new file mode 100644 index 0000000..9a31512 --- /dev/null +++ b/instance_segmentation/mink_pan/utils/testing.py @@ -0,0 +1,19 @@ +import os + +import numpy as np + + +def save_results(sem_preds, ins_preds, output_dir, batch, class_inv_lut): + for i in range(len(sem_preds)): + sem = sem_preds[i] + ins = ins_preds[i] + sem_inv = class_inv_lut[sem].astype(np.uint32) + label = sem_inv.reshape(-1, 1) + ( + (ins.astype(np.uint32) << 16) & 0xFFFF0000 + ).reshape(-1, 1) + + pcd_path = batch["fname"][i] + seq = pcd_path.split("/")[-3] + pcd_fname = pcd_path.split("/")[-1].split(".")[-2] + ".label" + fname = os.path.join(output_dir, seq, "predictions", pcd_fname) + label.reshape(-1).astype(np.uint32).tofile(fname) diff --git a/re_identification/animate3D.py b/re_identification/animate3D.py new file mode 100755 index 0000000..8840066 --- /dev/null +++ b/re_identification/animate3D.py @@ -0,0 +1,224 @@ +import open3d as o3d +import numpy as np +import yaml +from datasetgcn import * +from lossconnection import LossConnection +from tqdm import tqdm + +import typer +cli = typer.Typer() + +def apply(pcd, T, min_x, max_x, offset=None, mask=False): + pcd.transform(T) + + if mask: + points = np.array(pcd.points) + colors = np.array(pcd.colors) + mask1 = np.logical_and(points[:, 0]>=min_x, points[:, 0]<=max_x) + #mask2 = np.logical_and(points[:, 1]>=0.5, points[:, 1]<=1.785) + mask2 = np.logical_and(points[:, 1]>=0.5, points[:, 1]<=1.745) + mask = np.logical_and(mask1, mask2) + pcd.points = o3d.utility.Vector3dVector(points[mask]) + pcd.colors = o3d.utility.Vector3dVector(colors[mask]) + + R = pcd.get_rotation_matrix_from_xyz((0, 0, -np.pi / 3)) + rotc = pcd.get_center() + # pcd.rotate(R, center=rotc) + if offset is not None: + pcd.translate(offset) + + return pcd, rotc + +def apply_to_P(P, T, rot_center, offset=None): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(P) + ##pcd.transform(T) + R = pcd.get_rotation_matrix_from_xyz((0, 0, -np.pi / 3)) + # pcd.rotate(R, center=rot_center) + if offset is not None: + pcd.translate(offset) + return np.array(pcd.points) + +def main( + datapath: str = typer.Option( + ..., + "--data", + help="data path", + ), + iou: float = typer.Option( + -1.0, + "--iou", + help="IoU threshold. If negative, load GT annotations.", + ), +): + transformation_path = os.path.join(datapath, "transformations.yaml") + + with open(transformation_path, "r") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + + gt_08 = np.asarray(transformations["gt_08"]) + gt_14 = np.asarray(transformations["gt_14"]) + gt_21 = np.asarray(transformations["gt_21"]) + + min_x = 27.6 + max_x = 28.5 + + if iou<0: + straw_path = os.path.join(datapath, f"14_21") + else: + straw_path = os.path.join(datapath, f"14_21_inst@{iou}") + + data_21_gt = Strawberries(f"{straw_path}/strawberries_21", os.path.join(straw_path, "selections_2.json"), gt_21, min_x=min_x, max_x=max_x) + data_14_test = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_1.json"), gt_14, min_x=min_x, max_x=max_x) + conn_gt_test = LossConnection(os.path.join(straw_path, "connections.json"), data_14_test, data_21_gt) + + straw_path = os.path.join(datapath, f"08_14") + data_14_gt = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_2.json"), gt_14, min_x=min_x, max_x=max_x) + data_08_gt = Strawberries(f"{straw_path}/strawberries_08", os.path.join(straw_path, "selections_1.json"), gt_08, min_x=min_x, max_x=max_x) + conn_gt = LossConnection(os.path.join(straw_path, "connections.json"), data_08_gt, data_14_gt) + + cloud_path = os.path.join(datapath, "reduced_08_14_1.ply") + pcd08 = o3d.io.read_point_cloud(cloud_path) + pcd08, rotc08 = apply(pcd08, gt_08, min_x, max_x, mask=True) + + + # off08_14 = [0, 0.151, 0] + off08_14 = [0, 0.25, 0] + cloud_path = os.path.join(datapath, "reduced_08_14_2.ply") + pcd14 = o3d.io.read_point_cloud(cloud_path) + pcd14, rotc14 = apply(pcd14, gt_14, min_x, max_x, off08_14, mask=True) + + + # off14_21 = [0, 0.32, 0] + off14_21 = [0, 0.5, 0] + cloud_path = os.path.join(datapath, "reduced_14_21_2.ply") + pcd21 = o3d.io.read_point_cloud(cloud_path) + pcd21, rotc21 = apply(pcd21, gt_21, min_x, max_x, off14_21, mask=True) + + + points, lines = [], [] + mask14 = np.zeros(data_14_gt.centers.shape[0], dtype=bool) + + P08 = apply_to_P(data_08_gt.centers, gt_08, rotc08, None) + P14 = apply_to_P(data_14_gt.centers, gt_14, rotc14, off08_14) + P14_test = apply_to_P(data_14_test.centers, gt_14, rotc14, off08_14) + P21 = apply_to_P(data_21_gt.centers, gt_21, rotc21, off14_21) + + for k_n in tqdm(conn_gt.connections_keys): + k_p = conn_gt.connections_keys[k_n] + + c_14 = P14[data_14_gt.keys.index(k_n)] + c_08 = P08[data_08_gt.keys.index(k_p)] + + mask14[data_14_gt.keys.index(k_n)] = True + + points.append(c_14) + points.append(c_08) + lines.append([len(points)-2, len(points)-1]) + + colors = [[0, 0.5, 0] for i in range(len(lines))] + + line_set_14 = o3d.geometry.LineSet( + points=o3d.utility.Vector3dVector(points), + lines=o3d.utility.Vector2iVector(lines), + ) + line_set_14.colors = o3d.utility.Vector3dVector(colors) + + spheres_14 = [] + spheres_14_c = [] + spheres_14_r = [] + + pcd14_points = np.array(pcd14.points) + pcd14_colors = np.array(pcd14.colors) + + for idx in range(len(data_14_gt.keys)): + if not mask14[idx]: + mask = np.linalg.norm(pcd14_points - P14[idx], axis=-1) <= max(data_14_gt.fruit_radius[idx], 1e-4) + pcd14_colors[mask] = [0.1, 0.1, 0.7] + # sphere = o3d.geometry.TriangleMesh.create_sphere(radius=data_14_gt.fruit_radius[idx]) + # sphere.translate(P14[idx]) + # sphere.paint_uniform_color([0.1, 0.1, 0.7]) + # spheres_14.append(sphere) + # spheres_14_c.append(sphere.get_center()) + # spheres_14_r.append(data_14_gt.fruit_radius[idx]) + + pcd14.colors = o3d.utility.Vector3dVector(pcd14_colors) + + ################ 21 to 14 + + points, lines = [], [] + mask21 = np.zeros(P21.shape[0], dtype=bool) + + for k_n in tqdm(conn_gt_test.connections_keys): + k_p = conn_gt_test.connections_keys[k_n] + + c_21 = P21[data_21_gt.keys.index(k_n)] + c_14 = P14_test[data_14_test.keys.index(k_p)] + + mask21[data_21_gt.keys.index(k_n)] = True + + points.append(c_21) + points.append(c_14) + lines.append([len(points)-2, len(points)-1]) + + colors = [[0, 0.5, 0] for i in range(len(lines))] + + line_set_21 = o3d.geometry.LineSet( + points=o3d.utility.Vector3dVector(points), + lines=o3d.utility.Vector2iVector(lines), + ) + line_set_21.colors = o3d.utility.Vector3dVector(colors) + + spheres_21 = [] + spheres_21_c = [] + spheres_21_r = [] + + pcd21_points = np.array(pcd21.points) + pcd21_colors = np.array(pcd21.colors) + + for idx in range(len(data_21_gt.keys)): + if not mask21[idx]: + mask = np.linalg.norm(pcd21_points - P21[idx], axis=-1) <= max(data_21_gt.fruit_radius[idx], 1e-4) + pcd21_colors[mask] = [0.1, 0.1, 0.7] + # sphere = o3d.geometry.TriangleMesh.create_sphere(radius=max(data_21_gt.fruit_radius[idx], 1e-4)) + # sphere.translate(P21[idx]) + # sphere.paint_uniform_color([0.1, 0.1, 0.7]) + # spheres_21.append(sphere) + # spheres_21_c.append(sphere.get_center()) + # spheres_21_r.append(data_21_gt.fruit_radius[idx]) + + pcd21.colors = o3d.utility.Vector3dVector(pcd21_colors) + + visualizer = o3d.visualization.Visualizer() + visualizer.create_window(width=3200, height=1600, visible=True) + visualizer.add_geometry(pcd08) + visualizer.add_geometry(pcd14) + visualizer.add_geometry(pcd21) + visualizer.add_geometry(line_set_14) + visualizer.add_geometry(line_set_21) + + # for s in spheres_14: + # visualizer.add_geometry(s) + # for s in spheres_21: + # visualizer.add_geometry(s) + visualizer.get_render_option().load_from_json("view_animate3D.json") + param = o3d.io.read_pinhole_camera_parameters("view_animate3D_pinhole.json") + visualizer.get_view_control().convert_from_pinhole_camera_parameters(param, True) + + def rotate_view(vis): + ctr = vis.get_view_control() + ctr.rotate(-1.5, 0.0) + return False + + visualizer.register_animation_callback(rotate_view) + visualizer.run() + # visualizer.get_render_option().save_to_json("view_animate3D.json") + # param = visualizer.get_view_control().convert_to_pinhole_camera_parameters() + # o3d.io.write_pinhole_camera_parameters("view_animate3D_pinhole.json", param) + + +if __name__ == "__main__": + typer.run(main) diff --git a/re_identification/associate.py b/re_identification/associate.py new file mode 100644 index 0000000..e903c5e --- /dev/null +++ b/re_identification/associate.py @@ -0,0 +1,165 @@ +import numpy as np +import open3d as o3d +import yaml +import json +from datasetgcn import * +from tqdm import tqdm + +import typer +cli = typer.Typer() + +@cli.command() +def main( + datapath: str = typer.Option( + ..., + "--data", + help="data path ()", + ), + iou_th: float = typer.Option( + ..., + "--iou", + help="IoU threshold for associating to ground truth (to build connections)", + ), +): + print("producing with IoU threshold:", iou_th) + + with open(f"{datapath}/transformations.yaml") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + + gt_08 = np.asarray(transformations["gt_08"]) + gt_14 = np.asarray(transformations["gt_14"]) + gt_21 = np.asarray(transformations["gt_21"]) + + + ## load big cloud + path_to_cloud = f"{datapath}/reduced_14_21_2.ply" + # o3d load cloud + pcd = o3d.io.read_point_cloud(path_to_cloud) + pcd.transform(gt_21) + points = np.array(pcd.points) + colors = np.array(pcd.colors) + + ## load gt inst segm labels + ## use Strawberry class + test_next = Strawberries(f"{datapath}/14_21/strawberries_21", os.path.join(datapath, "14_21/selections_2.json"), gt_21, min_x=27.58, max_x=29.45) + + + ## load predicted inst segm labels + path_to_pred_instances = "../instance_segmentation/mink_pan/predicted_instances.npy" + instances = np.fromfile(path_to_pred_instances, dtype=np.int32) + assert instances.shape[0] == points.shape[0], str(instances.shape[0]) + " " + str(points.shape[0]) + + unique_insts, count = np.unique(instances, return_counts=True) + print("found", len(unique_insts), "unique predicted instances (including idx 0)") + + selections = {} + unassigned_id = max(test_next.keys) + 1000 + + def get_sphere_volume(r): + return 4/3*np.pi*(r**3) + def get_intersection_volume(c0, c1, r0, r1): + d = np.linalg.norm(c0-c1) + if d>= (r0+r1): + return 0 + if d<=abs(r0-r1): + return get_sphere_volume(min(r0, r1)) + return np.pi * (r0+r1-d)**2 * (d**2 + 2*d*(r0+r1) - 3*(r0-r1)**2) / 12.0 / d + def get_iou(c0, c1, r0, r1): + intersection = get_intersection_volume(c0, c1, r0, r1) + union = get_sphere_volume(r0) + get_sphere_volume(r1) - intersection + return intersection/union + + pred_keys = {} + + for u in tqdm(unique_insts, total=unique_insts.shape[0]): + if u ==0: + continue + mask = instances == u + pts_u = points[mask] + clr_u = colors[mask] + center = pts_u.mean(axis=0) + assert center.shape[0] == 3, "something wrong with np mean axis" + radius = np.linalg.norm(pts_u - center, axis=1, ord=2).max() + if radius>0: + pred_keys[u] = [center, radius, clr_u.mean(axis=0), clr_u.std(axis=0)] + + + pred_keys_list = list(pred_keys.keys()) + mask_alreadyassigned = np.zeros(len(pred_keys_list), dtype=bool) + mask_newid = np.zeros(len(pred_keys_list), dtype=int) + + for k in tqdm(test_next.keys, total=len(test_next.keys)): + ious_with_pred_spheres = np.zeros(len(pred_keys_list)) + + kidx = test_next.keys.index(k) + c = test_next.centers[kidx] + r = test_next.fruit_radius[kidx] + + for pre_idx, prk in enumerate(pred_keys_list): + iou = get_iou(c, pred_keys[prk][0], r, pred_keys[prk][1]) + ious_with_pred_spheres[pre_idx] = iou + + ious_with_pred_spheres[mask_alreadyassigned] = -1 + best_idx = np.argmax(ious_with_pred_spheres) + best_iou = ious_with_pred_spheres[best_idx] + + if best_iou > iou_th: + mask_newid[best_idx] = k + mask_alreadyassigned[best_idx]=True + + + print("assigned: ", mask_alreadyassigned.sum(), "/", len(pred_keys_list)) + print("gt", len(test_next.keys)) + + for i in range(mask_alreadyassigned.shape[0]): + predk = pred_keys_list[i] + k = mask_newid[i] + + center_ = np.ones((1, 4)) + center_[0, :3] = pred_keys[predk][0] + center = np.matmul(np.linalg.inv(gt_21), center_.T).T[0, :3] + + if k>0: + selections[int(k)] = { + "id": int(k), + "center": center.tolist(), + "radius": pred_keys[predk][1], + "color_mean": pred_keys[predk][2].tolist(), + "color_std": pred_keys[predk][3].tolist(), + "color_display": np.random.uniform(3, 0, 1).tolist(), + "bestiou": best_iou, + } + else: + selections[int(unassigned_id)] = { + "id": int(unassigned_id), + "center": center.tolist(), + "radius": pred_keys[predk][1], + "color_mean": pred_keys[predk][2].tolist(), + "color_std": pred_keys[predk][3].tolist(), + "color_display": np.random.uniform(3, 0, 1).tolist(), + "bestiou": best_iou, + } + unassigned_id += 1 + + + outpath = f"{datapath}/14_21_inst@{iou_th}" + os.makedirs(outpath, exist_ok=True) + + os.system(f'ln -s {datapath}/14_21/cloud_1.ply {os.path.join(outpath, "cloud_1.ply")}') + os.system(f'ln -s {datapath}/14_21/cloud_2.ply {os.path.join(outpath, "cloud_2.ply")}') + os.system(f'ln -s {datapath}/14_21/connections.json {os.path.join(outpath, "connections.json")}') + os.system(f'ln -s {datapath}/14_21/selections_1.json {os.path.join(outpath, "selections_1.json")}') + os.system(f'ln -s {datapath}/14_21/strawberries_14_more {os.path.join(outpath, "strawberries_14_more")}') + os.makedirs(os.path.join(outpath, "strawberries_21_more"), exist_ok=True) + + + print("dumping to json", end="...") + with open(os.path.join(outpath, "selections_2.json"), "w") as outfile: + json.dump(selections, outfile, indent = 4) + print("done! finished.") + +if __name__ == "__main__": + cli() \ No newline at end of file diff --git a/re_identification/backbone.py b/re_identification/backbone.py new file mode 100644 index 0000000..aac6595 --- /dev/null +++ b/re_identification/backbone.py @@ -0,0 +1,213 @@ +import MinkowskiEngine as ME +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import open3d as o3d +import math +import cv2 + + + +class MinkEncoderDecoder(nn.Module): + """ + Basic ResNet architecture using sparse convolutions + """ + + def __init__(self, cfg, template_points): + super().__init__() + + cr = cfg.CR + self.D = cfg.DIMENSION + input_dim = cfg.INPUT_DIM + self.res = cfg.RESOLUTION + self.interpolate = False #cfg.INTERPOLATE + self.feat_key = cfg.FEAT_KEY + + self.template_points = template_points + + cs = cfg.CHANNELS + cs = [int(cr*x) for x in cs] + self.stem = nn.Sequential( + ME.MinkowskiConvolution(input_dim, + cs[0], + kernel_size=3, + stride=1, + dimension=self.D), + ME.MinkowskiBatchNorm(cs[0]), + ME.MinkowskiReLU(True), + ME.MinkowskiConvolution(cs[0], + cs[0], + kernel_size=3, + stride=1, + dimension=self.D), + ME.MinkowskiBatchNorm(cs[0]), + ME.MinkowskiReLU(inplace=True), + ) + + self.stage1 = nn.Sequential( + BasicConvolutionBlock(cs[0], + cs[0], + ks=2, + stride=2, + dilation=1, + D=self.D), + ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1, D=self.D), + ) + + self.stage2 = nn.Sequential( + BasicConvolutionBlock(cs[1], + cs[1], + ks=2, + stride=2, + dilation=1, + D=self.D), + ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1, D=self.D), + ) + + self.stage3 = nn.Sequential( + BasicConvolutionBlock(cs[2], + cs[2], + ks=2, + stride=2, + dilation=1, + D=self.D), + ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1, D=self.D), + ) + + self.stage4 = nn.Sequential( + BasicConvolutionBlock(cs[3], + cs[3], + ks=2, + stride=2, + dilation=1, + D=self.D), + ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1, D=self.D), + ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1, D=self.D), + ) + self.pool = ME.MinkowskiGlobalAvgPooling() + + def forward(self, in_field): + x0 = self.stem(in_field.sparse()) + x1 = self.stage1(x0) + x2 = self.stage2(x1) + x3 = self.stage3(x2) + x4 = self.stage4(x3) + return torch.cat(self.pool(x4).decomposed_features) + + +class BasicConvolutionBlock(nn.Module): + + def __init__(self, inc, outc, ks=3, stride=1, dilation=1, D=3): + super().__init__() + self.net = nn.Sequential( + ME.MinkowskiConvolution(inc, + outc, + kernel_size=ks, + dilation=dilation, + stride=stride, + dimension=D), + ME.MinkowskiBatchNorm(outc), + ME.MinkowskiLeakyReLU(inplace=True), + ) + + def forward(self, x): + out = self.net(x) + return out + + +class BasicDeconvolutionBlock(nn.Module): + + def __init__(self, inc, outc, ks=3, stride=1, D=3): + super().__init__() + self.net = nn.Sequential( + ME.MinkowskiConvolutionTranspose(inc, + outc, + kernel_size=ks, + stride=stride, + dimension=D), + ME.MinkowskiBatchNorm(outc), + ME.MinkowskiLeakyReLU(inplace=True), + ) + + def forward(self, x): + return self.net(x) + + +class ResidualBlock(nn.Module): + + def __init__(self, inc, outc, ks=3, stride=1, dilation=1, D=3): + super().__init__() + self.net = nn.Sequential( + ME.MinkowskiConvolution(inc, + outc, + kernel_size=ks, + dilation=dilation, + stride=stride, + dimension=D), + ME.MinkowskiBatchNorm(outc), + ME.MinkowskiReLU(inplace=True), + ME.MinkowskiConvolution(outc, + outc, + kernel_size=ks, + dilation=dilation, + stride=1, + dimension=D), + ME.MinkowskiBatchNorm(outc), + ) + + self.downsample = (nn.Sequential() if + (inc == outc and stride == 1) else nn.Sequential( + ME.MinkowskiConvolution(inc, + outc, + kernel_size=1, + dilation=1, + stride=stride, + dimension=D), + ME.MinkowskiBatchNorm(outc), + )) + + self.relu = ME.MinkowskiReLU(inplace=True) + + def forward(self, x): + out = self.relu(self.net(x) + self.downsample(x)) + return out + +class PositionalEncoder(nn.Module): + def __init__(self, feature_dim): + super().__init__() + self.max_freq = 10000 #cfg.MAX_FREQ + self.dimensionality = 3 #cfg.DIMENSIONALITY + self.num_bands = math.floor(feature_dim / self.dimensionality / 2) + self.base = 2#cfg.BASE + pad = feature_dim - self.num_bands * 2 * self.dimensionality + self.zero_pad = nn.ZeroPad2d((pad, 0, 0, 0)) # left padding + + def forward(self, _x): + """ + _x [B,N,3]: batched point coordinates + returns: [B,N,C]: positional encoding of dimension C + """ + x = _x.clone() + x[:, :, 0] = x[:, :, 0] / 48 + x[:, :, 1] = x[:, :, 1] / 48 + x[:, :, 2] = x[:, :, 2] / 4 + x = x.unsqueeze(-1) + scales = torch.logspace( + 0.0, + math.log(self.max_freq / 2) / math.log(self.base), + self.num_bands, + base=self.base, + device=x.device, + dtype=x.dtype, + ) + # reshaping + scales = scales[(*((None,) * (len(x.shape) - 1)), Ellipsis)] + x = x * scales * math.pi + x = torch.cat([x.sin(), x.cos()], dim=-1) + x = x.flatten(2) + enc = self.zero_pad(x) + return enc diff --git a/re_identification/backbone.yaml b/re_identification/backbone.yaml new file mode 100644 index 0000000..c7b25d4 --- /dev/null +++ b/re_identification/backbone.yaml @@ -0,0 +1,35 @@ +GENERAL: + initial_lr: 3.0e-4 + +BACKBONE: + NAME: "minkunet" + + minkunet: + DIMENSION: 3 # 3D pts + CR: 1 # channel ratio, change feature dimensions + #CHANNELS: [8, 12, 16, 16, 16] + CHANNELS: [8, 8, 16, 16, 64] + #CHANNELS: [8, 16, 16, 32, 64] + #CHANNELS: [8, 16, 32, 32, 32] + RESOLUTION: 0.0005 + INTERPOLATE: True # feature interpolation + KNN_UP: 3 + FEAT_KEY: normals #dict key to acces features + INPUT_DIM: 6 # feat dim: coords + intensity + PRETRAINING: + NUM_NEG: 600 + dropout: 0.05 + +NECK: + NAME: "None" + #NAME: "gcn" + +NECK_gcn: + CONV_LAYER: "GCNConv" + #CONV_LAYER: "EdgeConv" + graph_radius: 0.2 + +MATCHER: + INTERN_DIM: 512 + DIM_FEEDFORWARD: 512 #512 + NUM_HEADS: 8 diff --git a/re_identification/datasetgcn.py b/re_identification/datasetgcn.py new file mode 100644 index 0000000..10a3f26 --- /dev/null +++ b/re_identification/datasetgcn.py @@ -0,0 +1,272 @@ +import open3d as o3d +import json +from tqdm import tqdm +import yaml +import numpy as np +import open3d.visualization as vis +import random +import torch +from torch.utils.data import Dataset +from scipy.spatial.transform import Rotation as R +import glob, os +import MinkowskiEngine as ME + + +def transform(point, T): + point = np.hstack([np.array(point), np.array([1])]) + return np.matmul(T, point)[:3] + + + +class Strawberries(Dataset): + def __init__(self, ply_path, annopath, T, min_x, max_x, min_x2=None, max_x2=None, training=False): + self.centers = [] + self.failcenters = [] + self.failpath = [] + self.keys = [] + self.fruit_radius = [] + self.T = T + self.foldername1 = ply_path #f"../strawberries_{seq}" + self.foldername2 = ply_path + "_more" #f"../strawberries_{seq}_more" + #self.foldername_detectorfail = f"../strawberries_detectorfailssimulation_{seq}" + self.training = training + + self.nomink = False + self.N_max = 500 + + self.generatefails = False + + with open(annopath) as f: + annotations = json.load(f) + + for k in annotations: + self.centers.append(annotations[k]['center']) + self.keys.append(annotations[k]['id']) + self.fruit_radius.append(annotations[k]['radius']) + + self.centers = np.array(self.centers) + self.keys = np.array(self.keys) + self.fruit_radius = np.array(self.fruit_radius) + + #### transform centers to world frame #### + pts = np.ones((self.centers.shape[0], 4)) + pts[:, :3] = self.centers + pts = pts.T + self.centers = np.matmul(self.T, pts).T[:, :3] + + #print("min x: ", self.centers[:, 0].min()) + #print("max x: ", self.centers[:, 0].max()) + + if self.training and self.generatefails: + + files = glob.glob(f"{self.foldername_detectorfail}/fail_*") + for file in files: + pcd = o3d.io.read_point_cloud(file) + self.failcenters.append(pcd.get_center()) + self.failpath.append(os.path.basename(file)) + + self.failcenters = np.array(self.failcenters) + self.failpath = np.array(self.failpath) + + + #### transform centers to world frame #### + pts = np.ones((self.failcenters.shape[0], 4)) + pts[:, :3] = self.failcenters + pts = pts.T + self.failcenters = np.matmul(self.T, pts).T[:, :3] + + + #### load only part of the dataset #### + if training: + if min_x2 is None: + mask = np.logical_and(self.centers[:, 0]>min_x, self.centers[:, 0]min_x2, self.centers[:, 0]min_x, self.centers[:, 0]min_x2, self.centers[:, 0]min_x,self.centers[:, 0]min_x,self.failcenters[:, 0]self.N_max: + idxs = np.arange(0, pcd_features.shape[0], 1) + np.random.shuffle(idxs) + idxs = idxs[:self.N_max] + pcd_features = pcd_features[idxs] + elif pcd_features.shape[0] [C, M] -> [1, C, M] -> [N, C, M] -> [M, N, C] + next_rep = descriptors_next.T[None].expand(N, C, M).permute(2, 0, 1) + + ## [N, C] -> [1, N, C] -> [M, N, C] + from_rep = descriptors_from[None].expand(M, N, C) + + vec = next_centers_minus_from_centers.cuda() + if augment: + vec += torch.normal(0, self.std_err, vec.shape).cuda() + vecenv = self.posenc(vec) + + """ + ## +-----+ + ## C/____ /| 1) from descriptors (from_rep) + ## C/ /|+ 0) next descriptors (next_rep) + ## +-----+ |/ + ## M | | / + ## | |/ ~~~~ will be altered with position encoding (vecenv) + ## +-----+ + ## N + """ + src = torch.cat((next_rep, from_rep), dim=2) + vecenv + unk = torch.zeros((src.shape[0], 1, src.shape[2])).cuda() + + """ + ## +0-----+ + ## C/0____ /| 1) from descriptors (from_rep) + ## C/0 /|+ 0) next descriptors (next_rep) + ## +0-----+ |/ + ## M |0 | / + ## |0 |/ first "column" is for "no match" prediction + ## +0-----+ + ## N + """ + return torch.cat((unk, src), dim=1) + + def forward(self, descriptors_from, graph_descriptors_next, next_centered, pos_augment): + src = self.getreadyforattention(descriptors_from, graph_descriptors_next, next_centered, pos_augment) + predicted_matrix_logits = self.selfattention(src) + return predicted_matrix_logits \ No newline at end of file diff --git a/re_identification/singlepcd.py b/re_identification/singlepcd.py new file mode 100755 index 0000000..cb84007 --- /dev/null +++ b/re_identification/singlepcd.py @@ -0,0 +1,287 @@ +import open3d as o3d +import numpy as np +import yaml +from datasetgcn import * +from lossconnection import LossConnection +from tqdm import tqdm + +import typer +cli = typer.Typer() + +def apply(pcd, T, min_x, max_x, offset=None, mask=False): + pcd.transform(T) + + if mask: + points = np.array(pcd.points) + colors = np.array(pcd.colors) + mask1 = np.logical_and(points[:, 0]>=min_x, points[:, 0]<=max_x) + #mask2 = np.logical_and(points[:, 1]>=0.5, points[:, 1]<=1.785) + mask2 = np.logical_and(points[:, 1]>=0.5, points[:, 1]<=1.745) + mask = np.logical_and(mask1, mask2) + pcd.points = o3d.utility.Vector3dVector(points[mask]) + pcd.colors = o3d.utility.Vector3dVector(colors[mask]) + + R = pcd.get_rotation_matrix_from_xyz((0, 0, -np.pi / 3)) + rotc = pcd.get_center() + # pcd.rotate(R, center=rotc) + if offset is not None: + pcd.translate(offset) + + return pcd, rotc + +def apply_to_P(P, T, rot_center, offset=None): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(P) + ##pcd.transform(T) + R = pcd.get_rotation_matrix_from_xyz((0, 0, -np.pi / 3)) + # pcd.rotate(R, center=rot_center) + if offset is not None: + pcd.translate(offset) + return np.array(pcd.points) + +def get_inst_mask(data, coords, data_centers_modified): + inst_mask = np.zeros(coords.shape[0], dtype=np.int32) + + for k, c, r in tqdm(zip(data.keys, data_centers_modified, data.fruit_radius)): + if k==0: + continue + mask = np.linalg.norm(coords-c, axis=1) <= r + inst_mask[mask] = k + #break + return inst_mask + +def mask2colors(colors, inst_mask): + u = np.unique(inst_mask) + np.random.seed(0) + table = np.random.uniform(0.1, 1.0, (u.max()+1, 3)) + table[0, :] = 0 + + newcolors = table[inst_mask] + newcolors = np.where(newcolors==0, colors/2, newcolors) + return newcolors + +def homokeys(data_from, data_next, conn): + keys_from = data_from.keys + keys_next = data_next.keys + + new_ids = np.zeros(len(keys_from), dtype=np.int32) + offset_id = 5000 + + for i, k_from in enumerate(keys_from): + if conn.connmatrix[i+1, 0] == 1: # no match + new_ids[i] = offset_id + offset_id += 1 + else: + new_ids[i] = keys_next[conn.connmatrix[i+1, :].argmax()-1] + return new_ids + +def main( + datapath: str = typer.Option( + ..., + "--data", + help="data path", + ), + iou: float = typer.Option( + -1.0, + "--iou", + help="IoU threshold. If negative, load GT annotations.", + ), +): + transformation_path = os.path.join(datapath, "transformations.yaml") + + with open(transformation_path, "r") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + + gt_08 = np.asarray(transformations["gt_08"]) + gt_14 = np.asarray(transformations["gt_14"]) + gt_21 = np.asarray(transformations["gt_21"]) + + min_x = 28.5 #27.6 + max_x = 29.3 #28.5 + + if iou<0: + straw_path = os.path.join(datapath, f"14_21") + else: + straw_path = os.path.join(datapath, f"14_21_inst@{iou}") + + data_21_test = Strawberries(f"{straw_path}/strawberries_21", os.path.join(straw_path, "selections_2.json"), gt_21, min_x=min_x, max_x=max_x) + data_14_test = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_1.json"), gt_14, min_x=min_x, max_x=max_x) + conn_gt_test = LossConnection(os.path.join(straw_path, "connections.json"), data_14_test, data_21_test) + + straw_path = os.path.join(datapath, f"08_14") + data_14_gt = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_2.json"), gt_14, min_x=min_x, max_x=max_x) + data_08_gt = Strawberries(f"{straw_path}/strawberries_08", os.path.join(straw_path, "selections_1.json"), gt_08, min_x=min_x, max_x=max_x) + conn_gt = LossConnection(os.path.join(straw_path, "connections.json"), data_08_gt, data_14_gt) + + cloud_path = os.path.join(datapath, "reduced_08_14_1.ply") + pcd08 = o3d.io.read_point_cloud(cloud_path) + pcd08, rotc08 = apply(pcd08, gt_08, min_x, max_x, mask=True) + + + # off08_14 = [0, 0.151, 0] + off08_14 = [0, 0.25, 0] + cloud_path = os.path.join(datapath, "reduced_08_14_2.ply") + pcd14 = o3d.io.read_point_cloud(cloud_path) + pcd14, rotc14 = apply(pcd14, gt_14, min_x, max_x, off08_14, mask=True) + + + # off14_21 = [0, 0.32, 0] + off14_21 = [0, 0.5, 0] + cloud_path = os.path.join(datapath, "reduced_14_21_2.ply") + pcd21 = o3d.io.read_point_cloud(cloud_path) + pcd21, rotc21 = apply(pcd21, gt_21, min_x, max_x, off14_21, mask=True) + + + points, lines = [], [] + mask14 = np.zeros(data_14_gt.centers.shape[0], dtype=bool) + + P08 = apply_to_P(data_08_gt.centers, gt_08, rotc08, None) + P14 = apply_to_P(data_14_gt.centers, gt_14, rotc14, off08_14) + P14_test = apply_to_P(data_14_test.centers, gt_14, rotc14, off08_14) + P21 = apply_to_P(data_21_test.centers, gt_21, rotc21, off14_21) + + for k_n in tqdm(conn_gt.connections_keys): + k_p = conn_gt.connections_keys[k_n] + + c_14 = P14[data_14_gt.keys.index(k_n)] + c_08 = P08[data_08_gt.keys.index(k_p)] + + mask14[data_14_gt.keys.index(k_n)] = True + + points.append(c_14) + points.append(c_08) + lines.append([len(points)-2, len(points)-1]) + + colors = [[0, 0.5, 0] for i in range(len(lines))] + + line_set_14 = o3d.geometry.LineSet( + points=o3d.utility.Vector3dVector(points), + lines=o3d.utility.Vector2iVector(lines), + ) + line_set_14.colors = o3d.utility.Vector3dVector(colors) + + pcd14_points = np.array(pcd14.points) + pcd14_colors = np.array(pcd14.colors) + + for idx in range(len(data_14_gt.keys)): + if not mask14[idx]: + mask = np.linalg.norm(pcd14_points - P14[idx], axis=-1) <= max(data_14_gt.fruit_radius[idx], 1e-4) + pcd14_colors[mask] = [0.1, 0.1, 0.7] + + pcd14.colors = o3d.utility.Vector3dVector(pcd14_colors) + + ################ 21 to 14 + + points, lines = [], [] + mask21 = np.zeros(P21.shape[0], dtype=bool) + + for k_n in tqdm(conn_gt_test.connections_keys): + k_p = conn_gt_test.connections_keys[k_n] + + c_21 = P21[data_21_test.keys.index(k_n)] + c_14 = P14_test[data_14_test.keys.index(k_p)] + + mask21[data_21_test.keys.index(k_n)] = True + + points.append(c_21) + points.append(c_14) + lines.append([len(points)-2, len(points)-1]) + + colors = [[0, 0.5, 0] for i in range(len(lines))] + + line_set_21 = o3d.geometry.LineSet( + points=o3d.utility.Vector3dVector(points), + lines=o3d.utility.Vector2iVector(lines), + ) + line_set_21.colors = o3d.utility.Vector3dVector(colors) + + + singlepcd = False + coloredsinglepcd = True + + if singlepcd: + pcd21_points = np.array(pcd21.points) + pcd21_colors = np.array(pcd21.colors) + + for idx in range(len(data_21_test.keys)): + if not mask21[idx]: + mask = np.linalg.norm(pcd21_points - P21[idx], axis=-1) <= max(data_21_test.fruit_radius[idx], 1e-4) + pcd21_colors[mask] = [0.1, 0.1, 0.7] + + pcd21.colors = o3d.utility.Vector3dVector(pcd21_colors) + elif coloredsinglepcd: + pcd21_points = np.array(pcd21.points) + pcd21_colors = np.array(pcd21.colors) + + inst_mask = get_inst_mask(data_21_test, pcd21_points, P21) + pcd21.colors = o3d.utility.Vector3dVector(mask2colors(pcd21_colors, inst_mask)) + + pcd14_points = np.array(pcd14.points) + pcd14_colors = np.array(pcd14.colors) + new_keys14_test = homokeys(data_14_test, data_21_test, conn_gt_test) + data_14_test.keys = new_keys14_test + + inst_mask = get_inst_mask(data_14_test, pcd14_points, P14) + pcd14.colors = o3d.utility.Vector3dVector(mask2colors(pcd14_colors, inst_mask)) + + with open("instsegm_gcnconv_2_minxmaxx.pickle", "rb") as handle: + import pickle + metrics = pickle.load(handle) + metrics["gt_keys"] -= 1 + mask = metrics["mask"] + target_p = metrics["hungpreds"] + + # tpm, wpm, tn, fp, fn = 0, 1, 2, 3, 4 | RGB + colors = [[0, 255, 0], [255, 0, 0], [0, 255, 0], [255, 165, 0], [255, 0, 0]] + colors = np.array(colors)/255.0 + + points, lines = [], [] + line_color = [] + for i in range(mask.shape[0]): + + if mask[i] in [0, 1, 3]: # tpm, wpm, fp + c_21 = P21[i] + c_14 = P14_test[target_p[i]-1] + + mask21[data_21_test.keys.index(k_n)] = True + + points.append(c_21) + points.append(c_14) + lines.append([len(points)-2, len(points)-1]) + line_color.append(colors[mask[i]]) + + line_set_21 = o3d.geometry.LineSet( + points=o3d.utility.Vector3dVector(points), + lines=o3d.utility.Vector2iVector(lines), + ) + line_set_21.colors = o3d.utility.Vector3dVector(line_color) + + visualizer = o3d.visualization.Visualizer() + visualizer.create_window(width=3200, height=1600, visible=True) + # visualizer.add_geometry(pcd08) + visualizer.add_geometry(pcd14) + visualizer.add_geometry(pcd21) + # visualizer.add_geometry(line_set_14) + visualizer.add_geometry(line_set_21) + + visualizer.get_render_option().load_from_json("view_animate3D.json") + param = o3d.io.read_pinhole_camera_parameters("view_animate3D_pinhole2.json") + visualizer.get_view_control().convert_from_pinhole_camera_parameters(param, True) + + def rotate_view(vis): + ctr = vis.get_view_control() + ctr.rotate(-1.5, 0.0) + return False + + visualizer.register_animation_callback(rotate_view) + visualizer.run() + # visualizer.get_render_option().save_to_json("view_animate3D.json") + # param = visualizer.get_view_control().convert_to_pinhole_camera_parameters() + # o3d.io.write_pinhole_camera_parameters("view_animate3D_pinhole2.json", param) + + +if __name__ == "__main__": + typer.run(main) diff --git a/re_identification/train.py b/re_identification/train.py new file mode 100644 index 0000000..a2b8cbe --- /dev/null +++ b/re_identification/train.py @@ -0,0 +1,556 @@ +import json +from tqdm import tqdm +import yaml +import numpy as np +import torch +from torch.utils.data import DataLoader +from easydict import EasyDict as edict +import itertools +import pickle + +from pathlib import Path +from typing import Dict, List, Tuple +from scipy.optimize import linear_sum_assignment + +from datasetgcn import * +from model_zoo import * +from matchingloss import MatchingLoss + +from lossconnection import LossConnection +from utils_metrics import * +import typer +cli = typer.Typer() + + +device = torch.device("cuda") + +###### GENERAL PARAMETERS ###### +max_epochs_num = 200 +#mode = "testinst" #"kfold_crossvalidation", "finaltrain", "test", "testinst" +seeds = [ 0, 13, 42, 1997] + +datasets_from = [] +datasets_next = [] + +dataloaders_from = [] +dataloaders_next = [] + +connections = [] + + +class CrossValidationSetup: + def __init__(self, foldidx, seedidx, mode, datapath, iou_th=None): + + self.foldidx = foldidx + self.seedidx = seedidx + self.mode = mode + self.datapath = datapath + self.iou_th = iou_th + self.seed = seeds[self.seedidx] + + self.minx = 28.5 #27.58 + self.maxx = 29.3 #28.5 #29.45 + + self.epoch = 0 + self.max_epochs_num = max_epochs_num + + with open(f"{self.datapath}/transformations.yaml") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + + gt_08 = np.asarray(transformations["gt_08"]) + gt_14 = np.asarray(transformations["gt_14"]) + gt_21 = np.asarray(transformations["gt_21"]) + + Ts = {"08": gt_08, "14": gt_14, "21": gt_21} + + with open("backbone.yaml") as stream: + try: + cfg = edict(yaml.safe_load(stream)) + except yaml.YAMLError as exc: + print(exc) + + self.learning_rate = float(cfg.GENERAL["initial_lr"]) + + torch.manual_seed(self.seed) + random.seed(self.seed) + np.random.seed(self.seed) + + if self.mode in ["kfold_crossvalidation", "finaltrain"]: + from_ = "08" + next_ = "14" + foldspath = f"{self.datapath}/{from_}_{next_}/kfoldsplit" + #datapath = "data/08_14" + + fold_splits = glob.glob(os.path.join(foldspath, "split_*")) + fold_splits.sort() + + for split_idx, foldpath in enumerate(fold_splits): + data_from = Strawberries(f"{self.datapath}/{from_}_{next_}/strawberries_{from_}", os.path.join(foldpath, "selections_1.json"), Ts[from_], min_x=0, max_x=100) + data_next = Strawberries(f"{self.datapath}/{from_}_{next_}/strawberries_{next_}", os.path.join(foldpath, "selections_2.json"), Ts[next_], min_x=0, max_x=100) + datasets_from.append(data_from) + datasets_next.append(data_next) + + dataloaders_from.append(DataLoader(datasets_from[-1], batch_size=len(datasets_from[-1]), shuffle=True, collate_fn=datasets_from[-1].custom_collation_fn)) + dataloaders_next.append(DataLoader(datasets_next[-1], batch_size=len(datasets_next[-1]), shuffle=True, collate_fn=datasets_next[-1].custom_collation_fn)) + + connections.append(LossConnection(os.path.join(self.datapath, f"{from_}_{next_}", "connections.json"), datasets_from[-1], datasets_next[-1])) + connections[-1].printSummary(split_idx) + + if self.mode=="kfold_crossvalidation": + self.setupKFold() + + elif self.mode == "finaltrain": + self.setupFinalTrain() + + elif self.mode == "test": + print("|** instance segmentation test set summary") + #datapath = "data/testdata_14_21" + self.test_from = Strawberries(f"{self.datapath}/14_21/strawberries_14", os.path.join(self.datapath, "selections_1.json"), Ts["14"], min_x=self.minx, max_x=self.maxx, training=False) + self.test_next = Strawberries(f"{self.datapath}/14_21/strawberries_21", os.path.join(self.datapath, "selections_2.json"), Ts["21"], min_x=self.minx, max_x=self.maxx, training=False) + self.testloader_from = DataLoader(self.test_from, batch_size=len(self.test_from), shuffle=False, collate_fn=self.test_from.custom_collation_fn) + self.testloader_next = DataLoader(self.test_next, batch_size=len(self.test_next), shuffle=False, collate_fn=self.test_next.custom_collation_fn) + self.test_gt = LossConnection(os.path.join(self.datapath, "connections.json"), self.test_from, self.test_next) + self.test_gt.printSummary(0) + + elif self.mode == "testinst": + print("|** instance segmentation test set summary") + self.test_from = Strawberries(f"{self.datapath}/14_21_inst@{iou_th}/strawberries_14", os.path.join(self.datapath, f"14_21_inst@{iou_th}/selections_1.json"), Ts["14"], min_x=self.minx, max_x=self.maxx, training=False) + self.test_next = Strawberries(f"{self.datapath}/14_21_inst@{iou_th}/strawberries_21", os.path.join(self.datapath, f"14_21_inst@{iou_th}/selections_2.json"), Ts["21"], min_x=self.minx, max_x=self.maxx, training=False) + self.testloader_from = DataLoader(self.test_from, batch_size=len(self.test_from), shuffle=False, collate_fn=self.test_from.custom_collation_fn) + self.testloader_next = DataLoader(self.test_next, batch_size=len(self.test_next), shuffle=False, collate_fn=self.test_next.custom_collation_fn) + self.test_gt = LossConnection(os.path.join(self.datapath, f"14_21_inst@{iou_th}", "connections.json"), self.test_from, self.test_next) + self.test_gt.printSummary(0) + + elif self.mode not in ["test", "testinst"]: + quit(f"Mode {self.mode} not implemented. Error.") + + self.encoder = Encoder(cfg).to(device) + self.matcher = Matcher(cfg.MATCHER, self.encoder.descriptor_len).to(device) + + self.optimizer = torch.optim.AdamW(list(self.encoder.parameters()) + list(self.matcher.parameters()), lr=self.learning_rate) + #self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=1, gamma=0.99) + + self.crossentropy = nn.CrossEntropyLoss() + self.matchingloss = MatchingLoss() + + self.prev_train_acc, self.prev_val_acc, self.prev_test_acc = 0, 0, 0 + self.prev_train_loss, self.prev_val_loss, self.prev_test_loss = 10000000, 10000000, 10000000 + + self.prev_val_f1 = 0 + + + results_outbasepath = os.path.join(self.datapath, "log", self.encoder.str + "," + self.matcher.str) + if self.mode == "kfold_crossvalidation": + self.logpath = f"{results_outbasepath}/fold_{str(self.foldidx).zfill(2)}/seed_{str(self.seed).zfill(5)}" + else: + self.logpath = f"{results_outbasepath}/fullset/seed_{str(self.seed).zfill(5)}" + print("logpath: ", self.logpath) + os.makedirs(self.logpath, exist_ok=True) + + if self.mode in ["test", "testinst"]: + checkpoint_path = os.path.join(self.logpath, "best_model_f1.pt") + if not os.path.isfile(checkpoint_path): + quit(f"file {checkpoint_path} does not exist! Error.") + print(f"loading model from {checkpoint_path}", end="... ") + + checkpoint = torch.load(checkpoint_path, map_location=device) + self.encoder.load_state_dict(checkpoint['encoder_state_dict']) + self.matcher.load_state_dict(checkpoint['matcher_state_dict']) + self.encoder.eval() + self.matcher.eval() + print("done!") + + self.logpath_train = os.path.join(self.logpath, "train") + self.logpath_valid = os.path.join(self.logpath, "valid") + self.logpath_test = os.path.join(self.logpath, "test") + os.makedirs(self.logpath_train, exist_ok=True) + os.makedirs(self.logpath_valid, exist_ok=True) + os.makedirs(self.logpath_test, exist_ok=True) + + + if self.mode in ["kfold_crossvalidation", "finaltrain"]: + + self.logfilepath_train = os.path.join(self.logpath, "log_train.txt") + self.logfile_train = open(self.logfilepath_train, "w") + + self.logfilepath_valid = os.path.join(self.logpath, "log_valid.txt") + self.logfile_valid = open(self.logfilepath_valid, "w") + self.logfilepath_train_best_loss = os.path.join(self.logpath, "log_train_best_loss.txt") + self.logfilepath_valid_best_loss = os.path.join(self.logpath, "log_valid_best_loss.txt") + + if self.mode ==["test", "testinst"]: + self.logfilepath_test = os.path.join(self.logpath, f"log_{self.mode}.txt") + self.logfile_test = open(self.logfilepath_test, "w") + + self.logfilepath_test_best_acc = os.path.join(self.logpath, "log_test_best_acc.txt") + self.logfilepath_test_best_loss = os.path.join(self.logpath, "log_test_best_loss.txt") + + self.logfilepath = os.path.join(self.logpath, "log.txt") + self.logfile = open(self.logfilepath, "a") + self.logfile.write(f"foldidx {self.foldidx}\n") + self.logfile.write(f"seedidx {self.seedidx}\n") + self.logfile.write(f"seed {seeds[self.seedidx]}\n") + self.logfile.write(f"initial learning rate {self.learning_rate}\n") + self.logfile.write(f"optimizer AdamW\n") + self.logfile.write(f"scheduler StepLR with gamma {0.99}\n") + self.logfile.write(f"max_epochs_num {self.max_epochs_num}\n") + + def setupKFold(self): + self.trainsets_idx = np.arange(len(fold_splits), dtype=np.int32) + self.trainsets_idx = np.delete(self.trainsets_idx, self.foldidx) + self.validsets_idx = np.array([self.foldidx]) + self.setupTrainValidSets() + + def setupFinalTrain(self): + self.trainsets_idx = np.arange(len(fold_splits)-1, dtype=np.int32) + self.validsets_idx = np.array([len(fold_splits)-1]) + self.setupTrainValidSets() + + def setupTrainValidSets(self): + self.trainsets_from = [datasets_from[i] for i in self.trainsets_idx] + self.trainsets_next = [datasets_next[i] for i in self.trainsets_idx] + self.validsets_from = [datasets_from[i] for i in self.validsets_idx] + self.validsets_next = [datasets_next[i] for i in self.validsets_idx] + + self.trainloaders_from = [dataloaders_from[i] for i in self.trainsets_idx] + self.trainloaders_next = [dataloaders_next[i] for i in self.trainsets_idx] + self.train_gts = [connections[i] for i in self.trainsets_idx] + self.valid_gts = [connections[i] for i in self.validsets_idx] + + print("train on: ", self.trainsets_idx) + print("valid on: ", self.validsets_idx) + print() + + def process(self, dlf, tf, dln, tn, conn): + descriptors_from, from_idxs_b, from_keys_b = self.encoder(dlf, tf) + descriptors_next, next_idxs_b, next_keys_b = self.encoder(dln, tn) + + ## matching + next_centered, connmatrix = conn.getdata(from_idxs_b, next_idxs_b) + + predicted_matrix_logits = self.matcher(descriptors_from, descriptors_next, next_centered, tf.training) + + gt_one_hot = connmatrix.T[1:, :] + gt_argmax = gt_one_hot.argmax(dim=1) + + return predicted_matrix_logits, gt_one_hot, gt_argmax, {"from_idxs_b":from_idxs_b, "from_keys_b": from_keys_b, "next_idxs_b":next_idxs_b, "next_keys_b": next_keys_b} + + + def do_one_train_epoch(self): + + print(f"epoch {self.epoch} started")# with lr {self.scheduler.get_last_lr()}") + + self.encoder.train() + self.matcher.train() + + pred_logits = [] + gt_one_hots = [] + gt_argmaxs = [] + pluss = [] + loss = 0 + + for tlf, tf, tln, tn, gt in zip(self.trainloaders_from, self.trainsets_from, self.trainloaders_next, self.trainsets_next, self.train_gts): + tf.training=True + tn.training=True + predicted_matrix_logits, gt_one_hot, gt_argmax, plus = self.process(tlf, tf, tln, tn, gt) + + weight_zero = 0.08 + weight_other = 0.92/(predicted_matrix_logits.shape[1]-1) + weights = torch.ones(predicted_matrix_logits.shape[1], device=device)*weight_other + weights[0]=weight_zero + celoss = nn.CrossEntropyLoss(weight=weights) + + loss += celoss(predicted_matrix_logits, gt_argmax) + self.matchingloss(predicted_matrix_logits) + pred_logits.append(predicted_matrix_logits.detach().cpu()) + gt_one_hots.append(gt_one_hot.detach().cpu()) + gt_argmaxs.append(gt_argmax.detach().cpu()) + pluss.append(plus) + del predicted_matrix_logits, gt_one_hot, gt_argmax, plus + del tlf, tf, tln, gt + torch.cuda.empty_cache() + + self.optimizer.zero_grad() + print("loss", loss.item()) + loss.backward() + self.optimizer.step() + + with open(os.path.join(self.logpath_train, f"epoch_{str(self.epoch).zfill(3)}.pickle"), 'wb') as handle: + pickle.dump({ + 'predicted_matrix': pred_logits, + 'gt': gt_one_hots, + }, handle) + torch.cuda.empty_cache() + + m = computeMetricsList(pred_logits, gt_one_hots) + ma = computeMetricsAlessandroList(pred_logits, gt_one_hots) + + if self.prev_train_loss < loss: + self.prev_train_loss = loss.item() + + with open(os.path.join(self.logpath, f"best_train_loss.pickle"), 'wb') as handle: + pickle.dump({ + 'predicted_matrix': pred_logits, + 'gt': gt_one_hots, + }, handle) + with open(self.logfilepath_train_best_loss, "w") as f: + f.write(f"epoch: {self.epoch}\nacc: {m['acc']}\nloss: {loss.item()}\nprecision: {ma['prec']}\nrecall: {ma['rec']}\nf1 {ma['f1']}") + torch.cuda.empty_cache() + + logtxt = f'epoch {self.epoch:3d} - avg loss: {loss: 8.6f} - accuracy {m["acc"]:5.3f} best: {self.prev_train_acc:5.3f} ( {m["cor"]:3d} / {m["tot"]:3d} ) (tn {m["tn"]:3d} / {m["fn"]:3d} | {m["gtz"]:3d}) - precision {ma["prec"]:4.2f} recall {ma["rec"]:4.2f} f1 {ma["f1"]:4.2f}' + print(logtxt) + self.logfile.write(logtxt + "\n") + self.logfile_train.write(logtxt + "\n") + + return m["acc"] + + def do_one_valid_phase(self): + self.encoder.eval() + self.matcher.eval() + + pred_logits = [] + gt_one_hots = [] + gt_argmaxs = [] + pluss = [] + loss = 0 + + with torch.no_grad(): + for vf, vn, gt in zip(self.validsets_from, self.validsets_next, self.valid_gts): + vf.training=False + vn.training=False + + dataloader_from = DataLoader(vf, batch_size=len(vf), shuffle=False, collate_fn=vf.custom_collation_fn) + dataloader_next = DataLoader(vn, batch_size=len(vn), shuffle=False, collate_fn=vn.custom_collation_fn) + + predicted_matrix_logits, gt_one_hot, gt_argmax, plus = self.process(dataloader_from, vf, dataloader_next, vn, gt) + loss += self.crossentropy(predicted_matrix_logits, gt_argmax) #+ self.matchingloss(predicted_matrix_logits) + + pred_logits.append(predicted_matrix_logits.detach().cpu()) + gt_one_hots.append(gt_one_hot.detach().cpu()) + gt_argmaxs.append(gt_argmax.detach().cpu()) + pluss.append(plus) + + torch.cuda.empty_cache() + + m = computeMetricsList(pred_logits, gt_one_hots) + ma = computeMetricsAlessandroList(pred_logits, gt_one_hots) + + #self.scheduler.step() + + with open(os.path.join(self.logpath_valid, f"epoch_{str(self.epoch).zfill(3)}.pickle"), 'wb') as handle: + pickle.dump({ + 'predicted_matrix': pred_logits, + 'gt': gt_one_hots, + }, handle) + + + torch.cuda.empty_cache() + + msg = "val" + if self.prev_val_loss>loss.item(): + + logtxt = f"GREAT loss! \033[92m{loss:6.4f}\033[0m > {self.prev_val_loss:6.4f}" + print(logtxt) + logtxt = f"GREAT loss! {loss:6.4f} > {self.prev_val_loss:6.4f}" + self.logfile.write(logtxt + "\n") + + self.prev_val_loss = loss.item() + + with open(os.path.join(self.logpath, f"best_valid_loss.pickle"), 'wb') as handle: + pickle.dump({ + 'predicted_matrix': pred_logits, + 'gt': gt_one_hots, + }, handle) + with open(self.logfilepath_valid_best_loss, "w") as f: + f.write(f"epoch: {self.epoch}\nacc: {m['acc']}\nloss: {loss.item()}\nprecision: {ma['prec']}\nrecall: {ma['rec']}\nf1 {ma['f1']}") + + if self.mode == "finaltrain": + torch.save({ + 'epoch': self.epoch, + 'encoder_state_dict': self.encoder.state_dict(), + 'matcher_state_dict': self.matcher.state_dict(), + 'optimizer_state_dict': self.optimizer.state_dict(), + 'loss': loss, + 'f1': ma["f1"], + 'acc': m["acc"], + }, os.path.join(self.logpath, "best_model_loss.pt")) + + + if self.prev_val_f1 {self.prev_val_f1:6.4f}" + print(logtxt) + logtxt = f"GREAT f1! {ma['f1']:6.4f} > {self.prev_val_f1:6.4f}" + self.logfile.write(logtxt + "\n") + self.logfile_valid.write(logtxt + "\n") + self.prev_val_f1 = ma["f1"] + + if self.mode == "finaltrain": + torch.save({ + 'epoch': self.epoch, + 'encoder_state_dict': self.encoder.state_dict(), + 'matcher_state_dict': self.matcher.state_dict(), + 'optimizer_state_dict': self.optimizer.state_dict(), + 'loss': loss, + 'f1': ma["f1"], + 'acc': m["acc"], + }, os.path.join(self.logpath, "best_model_f1.pt")) + + + + logtxt = f'epoch {self.epoch:3d} - {msg} loss: {loss.item(): 8.6f} - accuracy {m["acc"]:5.3f} best: {self.prev_val_acc:5.3f} ( {m["cor"]:3d} / {m["tot"]:3d} ) (tn {m["tn"]:3d} / {m["fn"]:3d} | {m["gtz"]:3d}) - precision {ma["prec"]:4.2f} recall {ma["rec"]:4.2f} f1 {ma["f1"]:4.2f}' + print(logtxt) + self.logfile.write(logtxt + "\n") + self.logfile_valid.write(logtxt + "\n") + + return loss, m["cor"], m["tot"] + + def run(self): + for epoch in range(self.max_epochs_num): + self.epoch = epoch + + trainacc = self.do_one_train_epoch() + torch.cuda.empty_cache() + + if True:#epoch>40: + loss, correct, tot = self.do_one_valid_phase() + torch.cuda.empty_cache() + + self.logfile.flush() + self.logfile_train.flush() + self.logfile_valid.flush() + + def run_test(self): + self.epoch = -1 + + self.encoder.eval() + self.matcher.eval() + + with torch.no_grad(): + predicted_matrix_logits, gt_one_hot, gt_argmax, plus = self.process(self.testloader_from, self.test_from, self.testloader_next, self.test_next, self.test_gt) + loss = self.crossentropy(predicted_matrix_logits, gt_argmax) + self.matchingloss(predicted_matrix_logits) + + torch.cuda.empty_cache() + costmatrix = predicted_matrix_logits.clone() + + cont = 0 + + gt_argmax = gt_argmax.detach().cpu().numpy() + + probs = torch.nn.functional.softmax(predicted_matrix_logits, dim=-1).detach().cpu().numpy() + copynorm = np.copy(probs) + + probs = probs.max(axis=1) + + hungpreds = np.zeros_like(copynorm) + hungpreds[:, 0] = 1 + + + centers_from = self.test_from.centers[plus["from_idxs_b"]] + centers_next = self.test_next.centers[plus["next_idxs_b"]] + + dist = np.linalg.norm(centers_next - centers_from[:, None], axis=-1).T + + mask = np.zeros_like(copynorm).astype(float) + mask[:, 1:] = dist + copynorm[mask>0.05] = -1*np.inf + + + while cont < predicted_matrix_logits.shape[0]: + if copynorm.argmax() < 0: + print("breaking with argmax < 0") + break + rn, cf = np.unravel_index(copynorm.argmax(), copynorm.shape) + hungpreds[rn, 0] = 0 + hungpreds[rn, cf] = 1 + copynorm[rn, :] = -1 * np.inf + if cf>0: + copynorm[:, cf] = -1 * np.inf + cont += 1 + + hungpreds = hungpreds.argmax(axis=1) + + m = computeMetricsPret2(hungpreds, gt_argmax) + #print("f1p", m["f1p"], "f1n", m["f1n"], "f1", m["f1"]) + #print(m) + + mask = getMetricMask(hungpreds, gt_argmax) + + import pickle + with open("instsegm_gcnconv_2_minxmaxx.pickle", "wb") as handle: + pickle.dump({"gt_keys":gt_argmax, "hungpreds":hungpreds, "mask": mask, "plus":plus}, handle) + + return m["f1p"], m["f1n"], m["f1"] + + + +@cli.command() +def main( + mode: str = typer.Option( + ..., + "--mode", + help="mode of execution (kfold_crossvalidation, finaltrain, test, testinst)", + ), + datapath: str = typer.Option( + ..., + "--data", + help="data path ()", + ), + iou: float = typer.Option( + ..., + "--iou", + help="IoU threshold", + ), +): + f1ps, f1ns, f1s = [], [], [] + + print("mode: ", mode) + + for seedidx, _ in enumerate(seeds): + split = CrossValidationSetup(-1, seedidx, mode, datapath, iou) + f1p, f1n, f1 = split.run_test() + f1ps.append(f1p); f1ns.append(f1n); f1s.append(f1) + + f1ps, f1ns, f1s = np.array(f1ps), np.array(f1ns), np.array(f1s) + print("#####################") + print("final results") + print(f"f1p {f1ps.mean()*100:4.1f} +- {f1ps.std()*100:4.1f}") + print(f"f1n {f1ns.mean()*100:4.1f} +- {f1ns.std()*100:4.1f}") + print(f"f1 {f1s.mean()*100:4.1f} +- {f1s.std()*100:4.1f}") + print("#####################") + +if __name__ == "__main__": + cli() + +#if mode == "kfold_crossvalidation": +# for foldidx in range(len(fold_splits)): +# for seedidx, _ in enumerate(seeds): +# print(f"| ***** SPLIT {foldidx:2d} - seedidx {seedidx:6d} *****|") +# split = CrossValidationSetup(foldidx, seedidx, mode) +# split.run() +# +# +#elif mode=="finaltrain": +# for seedidx, _ in enumerate(seeds): +# split = CrossValidationSetup(-1, seedidx, mode) +# split.run() +# +#elif mode in ["test", "testinst"]: +# +# f1ps, f1ns, f1s = [], [], [] +# +# for seedidx, _ in enumerate(seeds): +# split = CrossValidationSetup(-1, seedidx, mode) +# f1p, f1n, f1 = split.run_test() +# f1ps.append(f1p) +# f1ns.append(f1n) +# f1s.append(f1) +# f1ps = np.array(f1ps) +# f1ns = np.array(f1ns) +# f1s = np.array(f1s) +# print() +# print("final results") +# print("f1p ", f1ps.mean(), "+-", f1ps.std()) +# print("f1n ", f1ns.mean(), "+-", f1ns.std()) +# print("f1 ", f1s.mean(), "+-", f1s.std()) \ No newline at end of file diff --git a/re_identification/utils_metrics.py b/re_identification/utils_metrics.py new file mode 100644 index 0000000..c49ed10 --- /dev/null +++ b/re_identification/utils_metrics.py @@ -0,0 +1,315 @@ +import torch +import numpy as np + +def computeMetrics(pred_logits, gt): + tpm, wpm, tn, fp, fn = 0, 0, 0, 0, 0 ## true_positive_matching, wrong_positive_matching + precision, recall, f1 = 0, 0, 0 + + pred = torch.nn.functional.softmax(pred_logits, dim=-1) + + for r in range(pred.shape[0]): + pred_match_idx = pred[r].argmax() + gt_match_idx = gt[r].argmax() + + if gt_match_idx==0: + if pred_match_idx==0: + tn += 1 + else: + fp += 1 + else: + if pred_match_idx==gt_match_idx: + tpm += 1 + else: + if pred_match_idx>0: + wpm += 1 + else: + fn += 1 + + #tot = pred.shape[0] ## aka + tot = tpm + wpm + tn + fp + fn ## just for fun + + accuracy = (tpm + tn) / tot + + if (tpm + fp + wpm) > 0: + precision = tpm / (tpm + fp + wpm) + + if (tpm + fn) > 0: + recall = tpm / (tpm + fn) + + if (precision + recall) > 0: + f1 = 2 * (precision * recall) / (precision + recall) + + return {"acc":accuracy, "prec":precision, + "rec":recall, "f1":f1, "tot":tot, + "tpm":tpm, "wpm":wpm, + "tn":tn, "fp":fp, "fn":fn, + "cor":tn+tpm, "z":tn+fn, "gtz":tn+fp} + + +def computeMetricsAlessandro(pred_logits, gt): + tp, fp, fn = 0, 0, 0 + + pred = torch.nn.functional.softmax(pred, dim=-1) + + for r in range(pred.shape[0]): + pred_match_idx = pred[r].argmax() + gt_match_idx = gt[r].argmax() + + if pred_match_idx>0: + if gt_match_idx==0: + fp += 1 + continue + if not pred_match_idx==gt_match_idx: + fp += 1 + continue + tp += 1 + + for r in range(pred.shape[0]): + pred_match_idx = pred[r].argmax() + gt_match_idx = gt[r].argmax() + + if gt_match_idx>0: + if pred_match_idx==0: + fn += 1 + continue + if not pred_match_idx==gt_match_idx: + fn += 1 + continue + + precision = 0 if (tp + fp) == 0 else tp / (tp + fp) + recall = 0 if (tp + fn) == 0 else tp / (tp + fn) + f1 = 0 if (precision + recall) == 0 else 2 * (precision * recall) / (precision + recall) + + return {"prec":precision, "rec":recall, "f1":f1, "tp":tp, "fp":fp, "fn":fn} + +def computeMetricsList(pred_list:list, gt_list:list) -> dict: + tpm, wpm, tn, fp, fn = 0, 0, 0, 0, 0 ## true_positive_matching, wrong_positive_matching + precision, recall, f1 = 0, 0, 0 + + assert len(pred_list) == len(gt_list) + + for pred, gt in zip(pred_list, gt_list): + + pred = torch.nn.functional.softmax(pred, dim=-1) + + for r in range(pred.shape[0]): + pred_match_idx = pred[r].argmax() + gt_match_idx = gt[r].argmax() + + if gt_match_idx==0: + if pred_match_idx==0: + tn += 1 + else: + fp += 1 + else: + if pred_match_idx==gt_match_idx: + tpm += 1 + else: + if pred_match_idx>0: + wpm += 1 + else: + fn += 1 + + #tot = pred.shape[0] ## aka + tot = tpm + wpm + tn + fp + fn ## just for fun + + accuracy = (tpm + tn) / tot + + if (tpm + fp + wpm) > 0: + precision = tpm / (tpm + fp + wpm) + + if (tpm + fn) > 0: + recall = tpm / (tpm + fn) + + if (precision + recall) > 0: + f1 = 2 * (precision * recall) / (precision + recall) + + return {"acc":accuracy, "prec":precision, + "rec":recall, "f1":f1, "tot":tot, + "tpm":tpm, "wpm":wpm, + "tn":tn, "fp":fp, "fn":fn, + "cor":tn+tpm, "z":tn+fn, "gtz":tn+fp} + + +def computeMetricsAlessandroList(pred_list:list, gt_list:list) -> dict: + tp, fp, fn = 0, 0, 0 + + assert len(pred_list) == len(gt_list) + + for pred, gt in zip(pred_list, gt_list): + + pred = torch.nn.functional.softmax(pred, dim=-1) + + for r in range(pred.shape[0]): + pred_match_idx = pred[r].argmax() + gt_match_idx = gt[r].argmax() + + if pred_match_idx>0: + if gt_match_idx==0: + fp += 1 + continue + if not pred_match_idx==gt_match_idx: + fp += 1 + continue + tp += 1 + + for r in range(pred.shape[0]): + pred_match_idx = pred[r].argmax() + gt_match_idx = gt[r].argmax() + + if gt_match_idx>0: + if pred_match_idx==0: + fn += 1 + continue + if not pred_match_idx==gt_match_idx: + fn += 1 + continue + + precision = 0 if (tp + fp) == 0 else tp / (tp + fp) + recall = 0 if (tp + fn) == 0 else tp / (tp + fn) + f1 = 0 if (precision + recall) == 0 else 2 * (precision * recall) / (precision + recall) + + return {"prec":precision, "rec":recall, "f1":f1, "tp":tp, "fp":fp, "fn":fn} + +def getMetricMask(predidx, gt): + tpm, wpm, tn, fp, fn = 0, 1, 2, 3, 4 + assert predidx.shape == gt.shape + + mask = np.zeros(predidx.shape[0], dtype=np.int32) + for r in range(predidx.shape[0]): + pred_match_idx = predidx[r] + gt_match_idx = gt[r] + if gt_match_idx==0: + if pred_match_idx==0: + mask[r] = tn + else: + mask[r] = fp + else: + if pred_match_idx==gt_match_idx: + mask[r] = tpm + else: + if pred_match_idx>0: + mask[r] = wpm + else: + mask[r] = fn + return mask + + +def computeMetricsPret(predidx, gt): + tpm, wpm, tn, fp, fn = 0, 0, 0, 0, 0 ## true_positive_matching, wrong_positive_matching + precision, recall, f1 = 0, 0, 0 + + #predidx = predidx + 1 + + print("predidx", predidx.shape[0]) + print("gt ", gt.shape[0]) + + for r in range(predidx.shape[0]): + pred_match_idx = predidx[r] + gt_match_idx = gt[r] + + + if gt_match_idx==0: + if pred_match_idx==0: + tn += 1 + else: + fp += 1 + else: + if pred_match_idx==gt_match_idx: + tpm += 1 + else: + if pred_match_idx>0: + wpm += 1 + else: + fn += 1 + + #tot = pred.shape[0] ## aka + tot = tpm + wpm + tn + fp + fn ## just for fun + + accuracy = (tpm + tn) / tot + + if (tpm + fp + wpm) > 0: + precision = tpm / (tpm + fp + wpm) + + if (tpm + fn) > 0: + recall = tpm / (tpm + fn) + + if (precision + recall) > 0: + f1 = 2 * (precision * recall) / (precision + recall) + + return {"acc":accuracy, "prec":precision, + "rec":recall, "f1":f1, "tot":tot, + "tpm":tpm, "wpm":wpm, + "tn":tn, "fp":fp, "fn":fn, + "cor":tn+tpm, "z":tn+fn, "gtz":tn+fp} + + +def computeMetricsAlessandroPret(predidx, gt): + tp, fp, fn = 0, 0, 0 + + #predidx = predidx + 1 + + for r in range(predidx.shape[0]): + pred_match_idx = predidx[r] + gt_match_idx = gt[r] + + if pred_match_idx>0: + if gt_match_idx==0: + fp += 1 + continue + if not pred_match_idx==gt_match_idx: + fp += 1 + continue + tp += 1 + + for r in range(predidx.shape[0]): + pred_match_idx = predidx[r].argmax() + gt_match_idx = gt[r].argmax() + + if gt_match_idx>0: + if pred_match_idx==0: + fn += 1 + continue + if not pred_match_idx==gt_match_idx: + fn += 1 + continue + + precision = 0 if (tp + fp) == 0 else tp / (tp + fp) + recall = 0 if (tp + fn) == 0 else tp / (tp + fn) + f1 = 0 if (precision + recall) == 0 else 2 * (precision * recall) / (precision + recall) + + return {"prec":precision, "rec":recall, "f1":f1, "tp":tp, "fp":fp, "fn":fn} + + +def computeMetricsPret2(predidx, gt): + tpm, wpm, tn, fp, fn = 0, 0, 0, 0, 0 ## true_positive_matching, wrong_positive_matching + precision, recall, f1 = 0, 0, 0 + + for r in range(predidx.shape[0]): + pred_match_idx = predidx[r] + gt_match_idx = gt[r] + + if gt_match_idx==0: + if pred_match_idx==0: + tn += 1 + else: + fp += 1 + else: + if pred_match_idx==gt_match_idx: + tpm += 1 + else: + if pred_match_idx>0: + wpm += 1 + else: + fn += 1 + + tot = tpm + wpm + tn + fp + fn + + f1p = (2*tpm) / (2*tpm + wpm + fn + fp) if tpm + wpm + fn + fp > 0 else 0 + f1n = (2*tn) / (2*tn + fn + fp) if 2*tn + fn + fp > 0 else 0 + f1 = (f1p + f1n) / 2 + + return {"f1p":f1p, "f1n":f1n, "f1":f1, "tot":tot, + "tpm":tpm, "wpm":wpm, + "tn":tn, "fp":fp, "fn":fn, + "cor":tn+tpm, "z":tn+fn, "gtz":tn+fp} \ No newline at end of file diff --git a/re_identification/view2.json b/re_identification/view2.json new file mode 100644 index 0000000..cee50f1 --- /dev/null +++ b/re_identification/view2.json @@ -0,0 +1,41 @@ +{ + "class_name" : "PinholeCameraParameters", + "extrinsic" : + [ + 1.0, + -0.0, + -0.0, + 0.0, + 0.0, + -1.0, + -0.0, + 0.0, + 0.0, + -0.0, + -1.0, + 0.0, + -30.500002676410524, + 1.8579855615197776, + 9.1633308321043749, + 1.0 + ], + "intrinsic" : + { + "height" : 1563, + "intrinsic_matrix" : + [ + 1353.5977061150777, + 0.0, + 0.0, + 0.0, + 1353.5977061150777, + 0.0, + 1279.5, + 781.0, + 1.0 + ], + "width" : 2560 + }, + "version_major" : 1, + "version_minor" : 0 +} \ No newline at end of file diff --git a/re_identification/view_animate3D.json b/re_identification/view_animate3D.json new file mode 100644 index 0000000..52debdf --- /dev/null +++ b/re_identification/view_animate3D.json @@ -0,0 +1,41 @@ +{ + "background_color" : [ 1.0, 1.0, 1.0 ], + "class_name" : "RenderOption", + "default_mesh_color" : [ 0.69999999999999996, 0.69999999999999996, 0.69999999999999996 ], + "image_max_depth" : 3000, + "image_stretch_option" : 1, + "interpolation_option" : 0, + "light0_color" : [ 1.0, 1.0, 1.0 ], + "light0_diffuse_power" : 0.66000000000000003, + "light0_position" : [ 0.0, 0.0, 2.0 ], + "light0_specular_power" : 0.20000000000000001, + "light0_specular_shininess" : 100.0, + "light1_color" : [ 1.0, 1.0, 1.0 ], + "light1_diffuse_power" : 0.66000000000000003, + "light1_position" : [ 0.0, 0.0, 2.0 ], + "light1_specular_power" : 0.20000000000000001, + "light1_specular_shininess" : 100.0, + "light2_color" : [ 1.0, 1.0, 1.0 ], + "light2_diffuse_power" : 0.66000000000000003, + "light2_position" : [ 0.0, 0.0, -2.0 ], + "light2_specular_power" : 0.20000000000000001, + "light2_specular_shininess" : 100.0, + "light3_color" : [ 1.0, 1.0, 1.0 ], + "light3_diffuse_power" : 0.66000000000000003, + "light3_position" : [ 0.0, 0.0, -2.0 ], + "light3_specular_power" : 0.20000000000000001, + "light3_specular_shininess" : 100.0, + "light_ambient_color" : [ 0.0, 0.0, 0.0 ], + "light_on" : true, + "line_width" : 1.0, + "mesh_color_option" : 1, + "mesh_shade_option" : 0, + "mesh_show_back_face" : false, + "mesh_show_wireframe" : false, + "point_color_option" : 0, + "point_show_normal" : false, + "point_size" : 2.0, + "show_coordinate_frame" : false, + "version_major" : 1, + "version_minor" : 0 +} \ No newline at end of file diff --git a/re_identification/view_animate3D_pinhole.json b/re_identification/view_animate3D_pinhole.json new file mode 100644 index 0000000..495dd19 --- /dev/null +++ b/re_identification/view_animate3D_pinhole.json @@ -0,0 +1,41 @@ +{ + "class_name" : "PinholeCameraParameters", + "extrinsic" : + [ + 0.9654808801913185, + 0.0056712300883197744, + 0.2604121869926263, + 0.0, + -0.25589048416824201, + -0.16607574700518096, + 0.95233340084700058, + 0.0, + 0.048649050321048687, + -0.98609669069820771, + -0.15889174458386315, + 0.0, + -26.595027858922464, + 0.76765908276627082, + -8.1429906381699428, + 1.0 + ], + "intrinsic" : + { + "height" : 1536, + "intrinsic_matrix" : + [ + 1330.2150202128978, + 0.0, + 0.0, + 0.0, + 1330.2150202128978, + 0.0, + 1243.5, + 767.5, + 1.0 + ], + "width" : 2488 + }, + "version_major" : 1, + "version_minor" : 0 +} \ No newline at end of file diff --git a/re_identification/view_animate3D_pinhole2.json b/re_identification/view_animate3D_pinhole2.json new file mode 100644 index 0000000..ede6c8b --- /dev/null +++ b/re_identification/view_animate3D_pinhole2.json @@ -0,0 +1,41 @@ +{ + "class_name" : "PinholeCameraParameters", + "extrinsic" : + [ + 0.98452813902812719, + -0.069228137018191943, + 0.16097145245913352, + 0.0, + -0.17153847464374611, + -0.19329357888896329, + 0.96602905964427033, + 0.0, + -0.035761643959828122, + -0.97869558975469317, + -0.20217825653616642, + 0.0, + -28.121196204302329, + 2.9716010245464339, + -5.8575393886868632, + 1.0 + ], + "intrinsic" : + { + "height" : 1536, + "intrinsic_matrix" : + [ + 1330.2150202128978, + 0.0, + 0.0, + 0.0, + 1330.2150202128978, + 0.0, + 1243.5, + 767.5, + 1.0 + ], + "width" : 2488 + }, + "version_major" : 1, + "version_minor" : 0 +} \ No newline at end of file diff --git a/re_identification/visualize_clouds.py b/re_identification/visualize_clouds.py new file mode 100755 index 0000000..11f082b --- /dev/null +++ b/re_identification/visualize_clouds.py @@ -0,0 +1,274 @@ +import open3d as o3d +import numpy as np +import yaml +from datasetgcn import * +from lossconnection import LossConnection +import pickle +import cv2 + +import typer +cli = typer.Typer() + + + +class VisualizeClouds: + def __init__(self, datapath, iou): + + self.datapath = datapath + self.iou = iou + + print(self.datapath + ) + + transformation_path = os.path.join(self.datapath, "transformations.yaml") + with open(transformation_path, "r") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + + self.gt_08 = np.asarray(transformations["gt_08"]) + self.gt_14 = np.asarray(transformations["gt_14"]) + self.gt_21 = np.asarray(transformations["gt_21"]) + + + def get_inst_mask(self, data, coords): + inst_mask = np.zeros(coords.shape[0], dtype=np.int32) + + for k, c, r in tqdm(zip(data.keys, data.centers, data.fruit_radius)): + if k==0: + continue + mask = np.linalg.norm(coords-c, axis=1) <= r + inst_mask[mask] = k + #break + return inst_mask + + def mask2colors(self, colors, inst_mask): + u = np.unique(inst_mask) + np.random.seed(0) + table = np.random.uniform(0.1, 1.0, (u.max()+1, 3)) + table[0, :] = 0 + + newcolors = table[inst_mask] + newcolors = np.where(newcolors==0, colors/2, newcolors) + return newcolors + + def homokeys(self, data_from, data_next, conn): + keys_from = data_from.keys + keys_next = data_next.keys + + new_ids = np.zeros(len(keys_from), dtype=np.int32) + offset_id = 5000 + + for i, k_from in enumerate(keys_from): + if conn.connmatrix[i+1, 0] == 1: # no match + new_ids[i] = offset_id + offset_id += 1 + else: + new_ids[i] = keys_next[conn.connmatrix[i+1, :].argmax()-1] + return new_ids + + + def produce_14gt(self, also_mask=True): + cloud_path = os.path.join(self.datapath, "reduced_08_14_2.ply") + pcd = o3d.io.read_point_cloud(cloud_path) + pcd.transform(self.gt_14) + points = np.array(pcd.points) + colors = np.array(pcd.colors) + + straw_path = os.path.join(self.datapath, f"14_21") + + data_14_gt = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_1.json"), self.gt_14, min_x=27.58, max_x=29.45) + data_21_gt = Strawberries(f"{straw_path}/strawberries_21", os.path.join(straw_path, "selections_2.json"), self.gt_21, min_x=27.58, max_x=29.45) + conn_gt = LossConnection(os.path.join(straw_path, "connections.json"), data_14_gt, data_21_gt) + + new_keys = self.homokeys(data_14_gt, data_21_gt, conn_gt) + data_14_gt.keys = new_keys.tolist() + inst_14_gt_mask = self.get_inst_mask(data_14_gt, points) + pcd.colors = o3d.utility.Vector3dVector(self.mask2colors(colors, inst_14_gt_mask)) + + visualizer = o3d.visualization.Visualizer() + visualizer.create_window(width=3200, height=1600, visible=True) + visualizer.add_geometry(pcd) + view_ctl = visualizer.get_view_control() + + param = view_ctl.convert_to_pinhole_camera_parameters() + view_ctl.convert_from_pinhole_camera_parameters(param, True) + visualizer.run() + + o3d.io.write_pinhole_camera_parameters("view2.json", param) + + visualizer.capture_screen_image("14gt.png") + + if also_mask: + camera = view_ctl.convert_to_pinhole_camera_parameters() + A = camera.intrinsic.intrinsic_matrix + T = camera.extrinsic[:3, :] + P = np.ones((data_14_gt.centers.shape[0], 4)) + P[:, :3] = data_14_gt.centers + P = A@(T@(P.T)) + P /= P[2, :] + P = np.round(P.T).astype(int)[:, :2] ## [[u,v]] + + recipe = {} + for i in range(P.shape[0]): + recipe[data_14_gt.keys[i]] = P[i] + with open("recipe_14gtmask.pickle", "wb") as handle: + pickle.dump(recipe, handle) + + img = cv2.imread("14gt.png", cv2.IMREAD_UNCHANGED) + for c in P: + img = cv2.circle(img, (c[0], c[1]), 10, [255, 0, 0], 3) + cv2.imshow("just a check, not saving this image", img) + cv2.waitKey(0) + + + def produce_21pred(self, also_mask=True): + cloud_path = os.path.join(self.datapath, "reduced_14_21_2.ply") + pcd = o3d.io.read_point_cloud(cloud_path) + pcd.transform(self.gt_21) + points = np.array(pcd.points) + colors = np.array(pcd.colors) + + straw_path = os.path.join(self.datapath, f"14_21_inst@{self.iou}") + data_21_pred = Strawberries(f"{straw_path}/strawberries_21", os.path.join(straw_path, "selections_2.json"), self.gt_21, min_x=27.58, max_x=29.45) + + inst_21_pred_mask = self.get_inst_mask(data_21_pred, points) + pcd.colors = o3d.utility.Vector3dVector(self.mask2colors(colors, inst_21_pred_mask)) + + visualizer = o3d.visualization.Visualizer() + visualizer.create_window(width=3200, height=1600, visible=True) + visualizer.add_geometry(pcd) + view_ctl = visualizer.get_view_control() + + param = o3d.io.read_pinhole_camera_parameters("view2.json") + view_ctl.convert_from_pinhole_camera_parameters(param, True) + visualizer.run() + visualizer.capture_screen_image("21pred.png") + camera = view_ctl.convert_to_pinhole_camera_parameters() + + A = camera.intrinsic.intrinsic_matrix + T = camera.extrinsic[:3, :] + + P = np.ones((data_21_pred.centers.shape[0], 4)) + P[:, :3] = data_21_pred.centers + P = A@(T@(P.T)) + P /= P[2, :] + P = np.round(P.T).astype(int)[:, :2] ## [[u,v]] + + recipe = {} + for i in range(P.shape[0]): + recipe[data_21_pred.keys[i]] = P[i] + with open("recipe_21predmask.pickle", "wb") as handle: + pickle.dump(recipe, handle) + + img = cv2.imread("21pred.png", cv2.IMREAD_UNCHANGED) + for c in P: + img = cv2.circle(img, (c[0], c[1]), 10, [255, 0, 0], 3) + cv2.imshow("just a check, not saving this image", img) + cv2.waitKey(0) + + + def draw_matches(self): + + straw_path = os.path.join(self.datapath, f"14_21_inst@{self.iou}") + data_14_gt = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_1.json"), self.gt_14, min_x=27.58, max_x=29.45) + data_21_pred = Strawberries(f"{straw_path}/strawberries_21", os.path.join(straw_path, "selections_2.json"), self.gt_21, min_x=27.58, max_x=29.45) + conn_gt = LossConnection(os.path.join(straw_path, "connections.json"), data_14_gt, data_21_pred) + new_keys14 = self.homokeys(data_14_gt, data_21_pred, conn_gt) + #data_14_gt.newkeys = new_keys#.tolist() + + img14 = cv2.imread("14gt.png", cv2.IMREAD_UNCHANGED) + img21 = cv2.imread("21pred.png", cv2.IMREAD_UNCHANGED) + + h, _, _ = img14.shape + + a = 1.7*(h//5) + b = 2.5*(h//4) + + a=int(a) + b=int(b) + img14 = img14[a:b, :, :] + img21 = img21[a:b, :, :] + + h2, w2, _ = img14.shape + + print(img14.shape, img21.shape) + + with open("recipe_14gtmask.pickle", "rb") as handle: + recipe14 = pickle.load(handle) + + with open("recipe_21predmask.pickle", "rb") as handle: + recipe21 = pickle.load(handle) + + with open("instsegm_gcnconv_2.pickle", "rb") as handle: + metrics = pickle.load(handle) + metrics["gt_keys"] -= 1 + + gt_argmax = metrics["gt_keys"] + hungpreds = metrics["hungpreds"] + mask = metrics["mask"] + + canvas = np.zeros((h2+h2, w2, 3), img14.dtype) + canvas[0:h2, :, :] = img21 + canvas[h2:, :, :] = img14 + + # tpm, wpm, tn, fp, fn = 0, 1, 2, 3, 4 + + colors = [[0, 255, 0], [0, 0, 255], [0, 255, 0], [0, 165, 255], [0, 0, 255]] + + #print(new_keys14) + #print(recipe14.keys()) + + for i, k in enumerate(recipe21.keys()): + s = recipe21[k] + + if hungpreds[i]==0: + pred_matching_key = 0 + else: + pred_matching_key = new_keys14[hungpreds[i]-1] + + color = colors[mask[i]] + + #if k in recipe14: + if pred_matching_key==0:# or pred_matching_key>=5000: + y = s[1] - a + canvas = cv2.circle(canvas, (s[0], y), 15, color, 2) + elif pred_matching_key>=5000: + scam = new_keys14.tolist().index(pred_matching_key) + pred_matching_key = list(recipe14.keys())[scam] + e = recipe14[pred_matching_key] + s[1] -= a + e[1] += h2 - a + canvas = cv2.line(canvas, s, e, color, 2) + else: + e = recipe14[pred_matching_key] + s[1] -= a + e[1] += h2 - a + canvas = cv2.line(canvas, s, e, color, 2) + + + cv2.imshow("p", canvas) + cv2.waitKey(0) + + +def main( + datapath: str = typer.Option( + ..., + "--data", + help="data path ()", + ), + iou: float = typer.Option( + ..., + "--iou", + help="IoU threshold", + ), +): + visclouds = VisualizeClouds(datapath, iou) + # visclouds.produce_14gt(also_mask=True) + # visclouds.produce_21pred(also_mask=True) + visclouds.draw_matches() + + +if __name__ == "__main__": + typer.run(main) diff --git a/re_identification/visualize_clouds3D.py b/re_identification/visualize_clouds3D.py new file mode 100755 index 0000000..5fa6469 --- /dev/null +++ b/re_identification/visualize_clouds3D.py @@ -0,0 +1,202 @@ +import open3d as o3d +import numpy as np +import yaml +from datasetgcn import * +from lossconnection import LossConnection +from tqdm import tqdm + +import typer +cli = typer.Typer() + +def apply(pcd, T, min_x, max_x, offset=None, mask=False): + pcd.transform(T) + + if mask: + points = np.array(pcd.points) + colors = np.array(pcd.colors) + mask1 = np.logical_and(points[:, 0]>=min_x, points[:, 0]<=max_x) + #mask2 = np.logical_and(points[:, 1]>=0.5, points[:, 1]<=1.785) + mask2 = np.logical_and(points[:, 1]>=0.5, points[:, 1]<=1.745) + mask = np.logical_and(mask1, mask2) + pcd.points = o3d.utility.Vector3dVector(points[mask]) + pcd.colors = o3d.utility.Vector3dVector(colors[mask]) + + R = pcd.get_rotation_matrix_from_xyz((0, 0, -np.pi / 3)) + rotc = pcd.get_center() + pcd.rotate(R, center=rotc) + if offset is not None: + pcd.translate(offset) + + return pcd, rotc + +def apply_to_P(P, T, rot_center, offset=None): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(P) + ##pcd.transform(T) + R = pcd.get_rotation_matrix_from_xyz((0, 0, -np.pi / 3)) + pcd.rotate(R, center=rot_center) + if offset is not None: + pcd.translate(offset) + return np.array(pcd.points) + +def main( + datapath: str = typer.Option( + ..., + "--data", + help="data path ()", + ), + iou: float = typer.Option( + ..., + "--iou", + help="IoU threshold", + ), +): + transformation_path = os.path.join(datapath, "transformations.yaml") + + with open(transformation_path, "r") as stream: + try: + transformations = yaml.safe_load(stream)["transformations"] + except yaml.YAMLError as exc: + print(exc) + + gt_08 = np.asarray(transformations["gt_08"]) + gt_14 = np.asarray(transformations["gt_14"]) + gt_21 = np.asarray(transformations["gt_21"]) + + min_x = 27.73 + max_x = 28.0 + + straw_path = os.path.join(datapath, f"14_21") + data_21_gt = Strawberries(f"{straw_path}/strawberries_21", os.path.join(straw_path, "selections_2.json"), gt_21, min_x=min_x, max_x=max_x) + data_14_test = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_1.json"), gt_14, min_x=min_x, max_x=max_x) + conn_gt_test = LossConnection(os.path.join(straw_path, "connections.json"), data_14_test, data_21_gt) + + straw_path = os.path.join(datapath, f"08_14") + data_14_gt = Strawberries(f"{straw_path}/strawberries_14", os.path.join(straw_path, "selections_2.json"), gt_14, min_x=min_x, max_x=max_x) + data_08_gt = Strawberries(f"{straw_path}/strawberries_08", os.path.join(straw_path, "selections_1.json"), gt_08, min_x=min_x, max_x=max_x) + conn_gt = LossConnection(os.path.join(straw_path, "connections.json"), data_08_gt, data_14_gt) + + cloud_path = os.path.join(datapath, "reduced_08_14_1.ply") + pcd08 = o3d.io.read_point_cloud(cloud_path) + pcd08, rotc08 = apply(pcd08, gt_08, min_x, max_x, mask=True) + + + off08_14 = [0, 0.151, 0] + #off08_14 = [0, 0.15, 0] + cloud_path = os.path.join(datapath, "reduced_08_14_2.ply") + pcd14 = o3d.io.read_point_cloud(cloud_path) + pcd14, rotc14 = apply(pcd14, gt_14, min_x, max_x, off08_14, mask=True) + + + off14_21 = [0, 0.32, 0] + #off14_21 = [0, 0.3, 0] + cloud_path = os.path.join(datapath, "reduced_14_21_2.ply") + pcd21 = o3d.io.read_point_cloud(cloud_path) + pcd21, rotc21 = apply(pcd21, gt_21, min_x, max_x, off14_21, mask=True) + + + points, lines = [], [] + mask14 = np.zeros(data_14_gt.centers.shape[0], dtype=bool) + + P08 = apply_to_P(data_08_gt.centers, gt_08, rotc08, None) + P14 = apply_to_P(data_14_gt.centers, gt_14, rotc14, off08_14) + P14_test = apply_to_P(data_14_test.centers, gt_14, rotc14, off08_14) + P21 = apply_to_P(data_21_gt.centers, gt_21, rotc21, off14_21) + + for k_n in tqdm(conn_gt.connections_keys): + k_p = conn_gt.connections_keys[k_n] + + c_14 = P14[data_14_gt.keys.index(k_n)] + c_08 = P08[data_08_gt.keys.index(k_p)] + + mask14[data_14_gt.keys.index(k_n)] = True + + points.append(c_14) + points.append(c_08) + lines.append([len(points)-2, len(points)-1]) + + colors = [[0, 0.5, 0] for i in range(len(lines))] + + line_set_14 = o3d.geometry.LineSet( + points=o3d.utility.Vector3dVector(points), + lines=o3d.utility.Vector2iVector(lines), + ) + line_set_14.colors = o3d.utility.Vector3dVector(colors) + + spheres_14 = [] + spheres_14_c = [] + spheres_14_r = [] + + for idx in range(len(data_14_gt.keys)): + if not mask14[idx]: + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=data_14_gt.fruit_radius[idx]) + sphere.translate(P14[idx]) + sphere.paint_uniform_color([0.1, 0.1, 0.7]) + spheres_14.append(sphere) + spheres_14_c.append(sphere.get_center()) + spheres_14_r.append(data_14_gt.fruit_radius[idx]) + + + ################ 21 to 14 + + points, lines = [], [] + mask21 = np.zeros(P21.shape[0], dtype=bool) + + for k_n in tqdm(conn_gt_test.connections_keys): + k_p = conn_gt_test.connections_keys[k_n] + + c_21 = P21[data_21_gt.keys.index(k_n)] + c_14 = P14_test[data_14_test.keys.index(k_p)] + + mask21[data_21_gt.keys.index(k_n)] = True + + points.append(c_21) + points.append(c_14) + lines.append([len(points)-2, len(points)-1]) + + colors = [[0, 0.5, 0] for i in range(len(lines))] + + line_set_21 = o3d.geometry.LineSet( + points=o3d.utility.Vector3dVector(points), + lines=o3d.utility.Vector2iVector(lines), + ) + line_set_21.colors = o3d.utility.Vector3dVector(colors) + + spheres_21 = [] + spheres_21_c = [] + spheres_21_r = [] + + for idx in range(len(data_21_gt.keys)): + if not mask21[idx]: + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=max(data_21_gt.fruit_radius[idx], 1e-4)) + sphere.translate(P21[idx]) + sphere.paint_uniform_color([0.1, 0.1, 0.7]) + spheres_21.append(sphere) + spheres_21_c.append(sphere.get_center()) + spheres_21_r.append(data_21_gt.fruit_radius[idx]) + + + visualizer = o3d.visualization.Visualizer() + visualizer.create_window(width=3200, height=1600, visible=True) + view_ctl = visualizer.get_view_control() + view_ctl.set_up([-0.0002971969814756552, -0.014997288869575557, 0.99988749017102752]) # set the negative direction of the y-axis as the up direction + view_ctl.set_front([-0.011999257536550094, -0.9998154952548014, -0.01499977556391714 ]) # set the positive direction of the x-axis toward you + view_ctl.set_lookat([28.447480121135829, 1.0, 0.58518670083317892 ]) # set the original point as the center point of the window + view_ctl.set_zoom(0.02) + #param = view_ctl.convert_to_pinhole_camera_parameters() + #o3d.io.write_pinhole_camera_parameters("view_motiv.json", param) + visualizer.add_geometry(pcd08) + visualizer.add_geometry(pcd14) + visualizer.add_geometry(pcd21) + visualizer.add_geometry(line_set_14) + visualizer.add_geometry(line_set_21) + + for s in spheres_14: + visualizer.add_geometry(s) + for s in spheres_21: + visualizer.add_geometry(s) + + visualizer.run() + +if __name__ == "__main__": + typer.run(main) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6f747d4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +open3d +torch_geometric==2.5.3 +numpy==1.24.4 +tqdm +pyyaml +typer +easydict==1.9 +matplotlib==3.5.1 +pytorch_lightning +scikit_learn==1.1.2 +ninja +opencv-python