From a301ebb02073fbae79ea1cd3f7b3b0c2321d5277 Mon Sep 17 00:00:00 2001 From: HTensor Date: Sat, 22 Apr 2023 11:32:10 +0800 Subject: [PATCH] added crestereo datasets & fixed some bugs --- IGEV-Stereo/core/igev_stereo.py | 2 +- IGEV-Stereo/core/stereo_datasets.py | 31 ++++++++++++++++++++++++++- IGEV-Stereo/core/utils/frame_utils.py | 7 +++++- IGEV-Stereo/demo_imgs.py | 1 + IGEV-Stereo/train_stereo.py | 12 ++++++----- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/IGEV-Stereo/core/igev_stereo.py b/IGEV-Stereo/core/igev_stereo.py index 3ce7946..e8ce791 100644 --- a/IGEV-Stereo/core/igev_stereo.py +++ b/IGEV-Stereo/core/igev_stereo.py @@ -165,7 +165,7 @@ class IGEVStereo(nn.Module): match_left = self.desc(self.conv(features_left[0])) match_right = self.desc(self.conv(features_right[0])) - gwc_volume = build_gwc_volume(match_left, match_right, 192//4, 8) + gwc_volume = build_gwc_volume(match_left, match_right, self.args.max_disp//4, 8) gwc_volume = self.corr_stem(gwc_volume) gwc_volume = self.corr_feature_att(gwc_volume, features_left[0]) geo_encoding_volume = self.cost_agg(gwc_volume, features_left) diff --git a/IGEV-Stereo/core/stereo_datasets.py b/IGEV-Stereo/core/stereo_datasets.py index ad7c7f5..206d802 100644 --- a/IGEV-Stereo/core/stereo_datasets.py +++ b/IGEV-Stereo/core/stereo_datasets.py @@ -73,6 +73,7 @@ class StereoDataset(data.Dataset): img2 = np.array(img2).astype(np.uint8) disp = np.array(disp).astype(np.float32) + assert not (True in np.isnan(disp)) flow = np.stack([disp, np.zeros_like(disp)], axis=-1) @@ -266,6 +267,31 @@ class KITTI(StereoDataset): self.image_list += [ [img1, img2] ] self.disparity_list += [ disp ] +class CREStereo(StereoDataset): + def __init__(self, aug_params=None, root='/data/CREStereo'): + super(CREStereo, self).__init__(aug_params, sparse=True, reader=frame_utils.readDispCREStereo) + self.root = root + assert os.path.exists(root) + + disp_list = self.selector('_left.disp.png') + image1_list = self.selector('_left.jpg') + image2_list = self.selector('_right.jpg') + + assert len(image1_list) == len(image2_list) == len(disp_list) > 0 + for img1, img2, disp in zip(image1_list, image2_list, disp_list): + # if random.randint(1, 20000) != 1: + # continue + self.image_list += [[img1, img2]] + self.disparity_list += [disp] + + def selector(self, suffix): + files = list(glob(os.path.join(self.root, f"hole/*{suffix}"))) + files += list(glob(os.path.join(self.root, f"shapenet/*{suffix}"))) + files += list(glob(os.path.join(self.root, f"tree/*{suffix}"))) + files += list(glob(os.path.join(self.root, f"reflective/*{suffix}"))) + return sorted(files) + + class Middlebury(StereoDataset): def __init__(self, aug_params=None, root='/data/Middlebury', split='F'): @@ -321,10 +347,13 @@ def fetch_dataloader(args): elif dataset_name.startswith('tartan_air'): new_dataset = TartanAir(aug_params, keywords=dataset_name.split('_')[2:]) logging.info(f"Adding {len(new_dataset)} samples from Tartain Air") + elif dataset_name.startswith('crestereo'): + new_dataset = CREStereo(aug_params) + logging.info(f"Adding {len(new_dataset)} samples from CREStereo") train_dataset = new_dataset if train_dataset is None else train_dataset + new_dataset train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, - pin_memory=True, shuffle=True, num_workers=int(os.environ.get('SLURM_CPUS_PER_TASK', 6))-2, drop_last=True) + pin_memory=False, shuffle=True, num_workers=int(os.environ.get('SLURM_CPUS_PER_TASK', 6))-2, drop_last=True) logging.info('Training with %d image pairs' % len(train_dataset)) return train_loader diff --git a/IGEV-Stereo/core/utils/frame_utils.py b/IGEV-Stereo/core/utils/frame_utils.py index 10d3d85..25d2afb 100644 --- a/IGEV-Stereo/core/utils/frame_utils.py +++ b/IGEV-Stereo/core/utils/frame_utils.py @@ -152,6 +152,11 @@ def readDispTartanAir(file_name): valid = disp > 0 return disp, valid +def readDispCREStereo(file_name): + disp = cv2.imread(file_name, cv2.IMREAD_UNCHANGED) + disp = disp.astype(np.float32) / 32.0 + valid = disp > 0.0 + return disp, valid def readDispMiddlebury(file_name): assert basename(file_name) == 'disp0GT.pfm' @@ -168,7 +173,7 @@ def writeFlowKITTI(filename, uv): valid = np.ones([uv.shape[0], uv.shape[1], 1]) uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16) cv2.imwrite(filename, uv[..., ::-1]) - + def read_gen(file_name, pil=False): ext = splitext(file_name)[-1] diff --git a/IGEV-Stereo/demo_imgs.py b/IGEV-Stereo/demo_imgs.py index 361d231..0855507 100644 --- a/IGEV-Stereo/demo_imgs.py +++ b/IGEV-Stereo/demo_imgs.py @@ -26,6 +26,7 @@ def demo(args): model.load_state_dict(torch.load(args.restore_ckpt)) model = model.module + # model = torch.compile(model) model.to(DEVICE) model.eval() diff --git a/IGEV-Stereo/train_stereo.py b/IGEV-Stereo/train_stereo.py index 92c9981..ad0b8b5 100644 --- a/IGEV-Stereo/train_stereo.py +++ b/IGEV-Stereo/train_stereo.py @@ -1,6 +1,7 @@ from __future__ import print_function, division +import math import os os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1' @@ -53,7 +54,7 @@ def sequence_loss(disp_preds, disp_init_pred, disp_gt, valid, loss_gamma=0.9, ma assert not torch.isinf(disp_gt[valid.bool()]).any() - disp_loss += 1.0 * F.smooth_l1_loss(disp_init_pred[valid.bool()], disp_gt[valid.bool()], size_average=True) + disp_loss += 1.0 * F.smooth_l1_loss(disp_init_pred[valid.bool()], disp_gt[valid.bool()], reduction='mean') for i in range(n_predictions): adjusted_loss_gamma = loss_gamma**(15/(n_predictions - 1)) i_weight = adjusted_loss_gamma**(n_predictions - i - 1) @@ -78,6 +79,7 @@ def fetch_optimizer(args, model): """ Create the optimizer and learning rate scheduler """ optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.wdecay, eps=1e-8) + # todo: cosine scheduler, warm-up scheduler = optim.lr_scheduler.OneCycleLR(optimizer, args.lr, args.num_steps+100, pct_start=0.01, cycle_momentum=False, anneal_strategy='linear') return optimizer, scheduler @@ -151,14 +153,13 @@ def train(args): model.train() model.module.freeze_bn() # We keep BatchNorm frozen - validation_frequency = 10000 + validation_frequency = 1000 scaler = GradScaler(enabled=args.mixed_precision) should_keep_training = True global_batch_num = 0 while should_keep_training: - for i_batch, (_, *data_blob) in enumerate(tqdm(train_loader)): optimizer.zero_grad() image1, image2, disp_gt, valid = [x.cuda() for x in data_blob] @@ -175,6 +176,7 @@ def train(args): scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) +#warning scaler.step(optimizer) scheduler.step() scaler.update() @@ -184,7 +186,7 @@ def train(args): save_path = Path(ckpt_path + '/%d_%s.pth' % (total_steps + 1, args.name)) logging.info(f"Saving file {save_path.absolute()}") torch.save(model.state_dict(), save_path) - results = validate_sceneflow(model.module, iters=args.valid_iters) + results = validate_middlebury(model.module, iters=args.valid_iters) logger.write_dict(results) model.train() model.module.freeze_bn() @@ -212,7 +214,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--name', default='igev-stereo', help="name your experiment") parser.add_argument('--restore_ckpt', default=None, help="") - parser.add_argument('--mixed_precision', default=True, action='store_true', help='use mixed precision') + parser.add_argument('--mixed_precision', default=False, action='store_true', help='use mixed precision') # Training parameters parser.add_argument('--batch_size', type=int, default=8, help="batch size used during training.")