237 lines
8.7 KiB
Python
237 lines
8.7 KiB
Python
|
from torch.utils.data import Dataset
|
||
|
import numpy as np
|
||
|
import os
|
||
|
from PIL import Image
|
||
|
from datasets.data_io import *
|
||
|
import cv2
|
||
|
import random
|
||
|
from torchvision import transforms
|
||
|
|
||
|
|
||
|
class MVSDataset(Dataset):
|
||
|
def __init__(self, datapath, listfile, mode, nviews, robust_train = False):
|
||
|
super(MVSDataset, self).__init__()
|
||
|
|
||
|
self.levels = 4
|
||
|
self.datapath = datapath
|
||
|
self.listfile = listfile
|
||
|
self.mode = mode
|
||
|
self.nviews = nviews
|
||
|
self.img_wh = (640, 512)
|
||
|
# self.img_wh = (1440, 1056)
|
||
|
self.robust_train = robust_train
|
||
|
|
||
|
|
||
|
assert self.mode in ["train", "val", "test"]
|
||
|
self.metas = self.build_list()
|
||
|
self.color_augment = transforms.ColorJitter(brightness=0.5, contrast=0.5)
|
||
|
|
||
|
def build_list(self):
|
||
|
metas = []
|
||
|
with open(self.listfile) as f:
|
||
|
scans = f.readlines()
|
||
|
scans = [line.rstrip() for line in scans]
|
||
|
|
||
|
for scan in scans:
|
||
|
pair_file = "Cameras_1/pair.txt"
|
||
|
|
||
|
with open(os.path.join(self.datapath, pair_file)) as f:
|
||
|
self.num_viewpoint = int(f.readline())
|
||
|
# viewpoints (49)
|
||
|
for view_idx in range(self.num_viewpoint):
|
||
|
ref_view = int(f.readline().rstrip())
|
||
|
src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
|
||
|
# light conditions 0-6
|
||
|
for light_idx in range(7):
|
||
|
metas.append((scan, light_idx, ref_view, src_views))
|
||
|
print("dataset", self.mode, "metas:", len(metas))
|
||
|
return metas
|
||
|
|
||
|
def __len__(self):
|
||
|
return len(self.metas)
|
||
|
|
||
|
def read_cam_file(self, filename):
|
||
|
with open(filename) as f:
|
||
|
lines = f.readlines()
|
||
|
lines = [line.rstrip() for line in lines]
|
||
|
# extrinsics: line [1,5), 4x4 matrix
|
||
|
extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ').reshape((4, 4))
|
||
|
# intrinsics: line [7-10), 3x3 matrix
|
||
|
intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ').reshape((3, 3))
|
||
|
depth_min = float(lines[11].split()[0])
|
||
|
depth_max = float(lines[11].split()[-1])
|
||
|
return intrinsics, extrinsics, depth_min, depth_max
|
||
|
|
||
|
def read_img(self, filename):
|
||
|
img = Image.open(filename)
|
||
|
if self.mode=='train':
|
||
|
img = self.color_augment(img)
|
||
|
# scale 0~255 to -1~1
|
||
|
np_img = 2*np.array(img, dtype=np.float32) / 255. - 1
|
||
|
h, w, _ = np_img.shape
|
||
|
np_img_ms = {
|
||
|
"level_3": cv2.resize(np_img, (w//8, h//8), interpolation=cv2.INTER_LINEAR),
|
||
|
"level_2": cv2.resize(np_img, (w//4, h//4), interpolation=cv2.INTER_LINEAR),
|
||
|
"level_1": cv2.resize(np_img, (w//2, h//2), interpolation=cv2.INTER_LINEAR),
|
||
|
"level_0": np_img
|
||
|
}
|
||
|
return np_img_ms
|
||
|
|
||
|
|
||
|
def prepare_img(self, hr_img):
|
||
|
#downsample
|
||
|
h, w = hr_img.shape
|
||
|
# original w,h: 1600, 1200; downsample -> 800, 600 ; crop -> 640, 512
|
||
|
hr_img = cv2.resize(hr_img, (w//2, h//2), interpolation=cv2.INTER_NEAREST)
|
||
|
#crop
|
||
|
h, w = hr_img.shape
|
||
|
target_h, target_w = self.img_wh[1], self.img_wh[0]
|
||
|
start_h, start_w = (h - target_h)//2, (w - target_w)//2
|
||
|
hr_img_crop = hr_img[start_h: start_h + target_h, start_w: start_w + target_w]
|
||
|
|
||
|
return hr_img_crop
|
||
|
|
||
|
def read_mask(self, filename):
|
||
|
img = Image.open(filename)
|
||
|
np_img = np.array(img, dtype=np.float32)
|
||
|
np_img = (np_img > 10).astype(np.float32)
|
||
|
return np_img
|
||
|
|
||
|
|
||
|
def read_depth_mask(self, filename, mask_filename, scale):
|
||
|
depth_hr = np.array(read_pfm(filename)[0], dtype=np.float32) * scale
|
||
|
depth_hr = np.squeeze(depth_hr,2)
|
||
|
depth_lr = self.prepare_img(depth_hr)
|
||
|
mask = self.read_mask(mask_filename)
|
||
|
mask = self.prepare_img(mask)
|
||
|
mask = mask.astype(np.bool_)
|
||
|
mask = mask.astype(np.float32)
|
||
|
|
||
|
h, w = depth_lr.shape
|
||
|
depth_lr_ms = {}
|
||
|
mask_ms = {}
|
||
|
|
||
|
for i in range(self.levels):
|
||
|
depth_cur = cv2.resize(depth_lr, (w//(2**i), h//(2**i)), interpolation=cv2.INTER_NEAREST)
|
||
|
mask_cur = cv2.resize(mask, (w//(2**i), h//(2**i)), interpolation=cv2.INTER_NEAREST)
|
||
|
depth_lr_ms[f"level_{i}"] = depth_cur
|
||
|
mask_ms[f"level_{i}"] = mask_cur
|
||
|
|
||
|
return depth_lr_ms, mask_ms
|
||
|
|
||
|
|
||
|
def __getitem__(self, idx):
|
||
|
meta = self.metas[idx]
|
||
|
scan, light_idx, ref_view, src_views = meta
|
||
|
# robust training strategy
|
||
|
if self.robust_train:
|
||
|
num_src_views = len(src_views)
|
||
|
index = random.sample(range(num_src_views), self.nviews - 1)
|
||
|
view_ids = [ref_view] + [src_views[i] for i in index]
|
||
|
scale = random.uniform(0.8, 1.25)
|
||
|
|
||
|
else:
|
||
|
view_ids = [ref_view] + src_views[:self.nviews - 1]
|
||
|
scale = 1
|
||
|
|
||
|
imgs_0 = []
|
||
|
imgs_1 = []
|
||
|
imgs_2 = []
|
||
|
imgs_3 = []
|
||
|
|
||
|
mask = None
|
||
|
depth = None
|
||
|
depth_min = None
|
||
|
depth_max = None
|
||
|
|
||
|
proj_matrices_0 = []
|
||
|
proj_matrices_1 = []
|
||
|
proj_matrices_2 = []
|
||
|
proj_matrices_3 = []
|
||
|
|
||
|
|
||
|
|
||
|
for i, vid in enumerate(view_ids):
|
||
|
img_filename = os.path.join(self.datapath,
|
||
|
'Rectified/{}_train/rect_{:0>3}_{}_r5000.png'.format(scan, vid + 1, light_idx))
|
||
|
proj_mat_filename = os.path.join(self.datapath, 'Cameras_1/{}_train/{:0>8}_cam.txt').format(scan, vid)
|
||
|
|
||
|
mask_filename = os.path.join(self.datapath, 'Depths_raw/{}/depth_visual_{:0>4}.png'.format(scan, vid))
|
||
|
depth_filename = os.path.join(self.datapath, 'Depths_raw/{}/depth_map_{:0>4}.pfm'.format(scan, vid))
|
||
|
|
||
|
imgs = self.read_img(img_filename)
|
||
|
imgs_0.append(imgs['level_0'])
|
||
|
imgs_1.append(imgs['level_1'])
|
||
|
imgs_2.append(imgs['level_2'])
|
||
|
imgs_3.append(imgs['level_3'])
|
||
|
|
||
|
intrinsics, extrinsics, depth_min_, depth_max_ = self.read_cam_file(proj_mat_filename)
|
||
|
extrinsics[:3,3] *= scale
|
||
|
intrinsics[0] *= 4
|
||
|
intrinsics[1] *= 4
|
||
|
|
||
|
proj_mat = extrinsics.copy()
|
||
|
intrinsics[:2,:] *= 0.125
|
||
|
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
|
||
|
proj_matrices_3.append(proj_mat)
|
||
|
|
||
|
proj_mat = extrinsics.copy()
|
||
|
intrinsics[:2,:] *= 2
|
||
|
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
|
||
|
proj_matrices_2.append(proj_mat)
|
||
|
|
||
|
proj_mat = extrinsics.copy()
|
||
|
intrinsics[:2,:] *= 2
|
||
|
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
|
||
|
proj_matrices_1.append(proj_mat)
|
||
|
|
||
|
proj_mat = extrinsics.copy()
|
||
|
intrinsics[:2,:] *= 2
|
||
|
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
|
||
|
proj_matrices_0.append(proj_mat)
|
||
|
|
||
|
if i == 0: # reference view
|
||
|
depth_min = depth_min_ * scale
|
||
|
depth_max = depth_max_ * scale
|
||
|
depth, mask = self.read_depth_mask(depth_filename, mask_filename, scale)
|
||
|
|
||
|
for l in range(self.levels):
|
||
|
mask[f'level_{l}'] = np.expand_dims(mask[f'level_{l}'],2)
|
||
|
mask[f'level_{l}'] = mask[f'level_{l}'].transpose([2,0,1])
|
||
|
depth[f'level_{l}'] = np.expand_dims(depth[f'level_{l}'],2)
|
||
|
depth[f'level_{l}'] = depth[f'level_{l}'].transpose([2,0,1])
|
||
|
|
||
|
# imgs: N*3*H0*W0, N is number of images
|
||
|
imgs_0 = np.stack(imgs_0).transpose([0, 3, 1, 2])
|
||
|
imgs_1 = np.stack(imgs_1).transpose([0, 3, 1, 2])
|
||
|
imgs_2 = np.stack(imgs_2).transpose([0, 3, 1, 2])
|
||
|
imgs_3 = np.stack(imgs_3).transpose([0, 3, 1, 2])
|
||
|
|
||
|
imgs = {}
|
||
|
imgs['level_0'] = imgs_0
|
||
|
imgs['level_1'] = imgs_1
|
||
|
imgs['level_2'] = imgs_2
|
||
|
imgs['level_3'] = imgs_3
|
||
|
|
||
|
# proj_matrices: N*4*4
|
||
|
proj_matrices_0 = np.stack(proj_matrices_0)
|
||
|
proj_matrices_1 = np.stack(proj_matrices_1)
|
||
|
proj_matrices_2 = np.stack(proj_matrices_2)
|
||
|
proj_matrices_3 = np.stack(proj_matrices_3)
|
||
|
|
||
|
proj={}
|
||
|
proj['level_3']=proj_matrices_3
|
||
|
proj['level_2']=proj_matrices_2
|
||
|
proj['level_1']=proj_matrices_1
|
||
|
proj['level_0']=proj_matrices_0
|
||
|
|
||
|
|
||
|
# data is numpy array
|
||
|
return {"imgs": imgs, # [N, 3, H, W]
|
||
|
"proj_matrices": proj, # [N,4,4]
|
||
|
"depth": depth, # [1, H, W]
|
||
|
"depth_min": depth_min, # scalar
|
||
|
"depth_max": depth_max, # scalar
|
||
|
"mask": mask} # [1, H, W]
|
||
|
|