IGEV/IGEV-MVS/datasets/dtu_yao_eval.py

159 lines
5.8 KiB
Python
Raw Permalink Normal View History

2023-03-20 19:52:04 +08:00
from torch.utils.data import Dataset
import numpy as np
import os
from PIL import Image
from datasets.data_io import *
import cv2
class MVSDataset(Dataset):
def __init__(self, datapath, listfile, nviews=5, img_wh=(1600, 1152)):
super(MVSDataset, self).__init__()
self.levels = 4
self.datapath = datapath
self.listfile = listfile
self.nviews = nviews
self.img_wh = img_wh
self.metas = self.build_list()
def build_list(self):
metas = []
with open(self.listfile) as f:
scans = f.readlines()
scans = [line.rstrip() for line in scans]
for scan in scans:
pair_file = "{}/pair.txt".format(scan)
# read the pair file
with open(os.path.join(self.datapath, pair_file)) as f:
num_viewpoint = int(f.readline())
# viewpoints (49)
for view_idx in range(num_viewpoint):
ref_view = int(f.readline().rstrip())
src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
metas.append((scan, ref_view, src_views))
print("dataset", "metas:", len(metas))
return metas
def __len__(self):
return len(self.metas)
def read_cam_file(self, filename):
with open(filename) as f:
lines = f.readlines()
lines = [line.rstrip() for line in lines]
# extrinsics: line [1,5), 4x4 matrix
extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ').reshape((4, 4))
# intrinsics: line [7-10), 3x3 matrix
intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ').reshape((3, 3))
depth_min = float(lines[11].split()[0])
depth_max = float(lines[11].split()[-1])
return intrinsics, extrinsics, depth_min, depth_max
def read_mask(self, filename):
img = Image.open(filename)
np_img = np.array(img, dtype=np.float32)
np_img = (np_img > 10).astype(np.float32)
return np_img
def read_img(self, filename):
img = Image.open(filename)
# scale 0~255 to -1~1
np_img = 2*np.array(img, dtype=np.float32) / 255. - 1
np_img = cv2.resize(np_img, self.img_wh, interpolation=cv2.INTER_LINEAR)
h, w, _ = np_img.shape
np_img_ms = {
"level_3": cv2.resize(np_img, (w//8, h//8), interpolation=cv2.INTER_LINEAR),
"level_2": cv2.resize(np_img, (w//4, h//4), interpolation=cv2.INTER_LINEAR),
"level_1": cv2.resize(np_img, (w//2, h//2), interpolation=cv2.INTER_LINEAR),
"level_0": np_img
}
return np_img_ms
def __getitem__(self, idx):
scan, ref_view, src_views = self.metas[idx]
# use only the reference view and first nviews-1 source views
view_ids = [ref_view] + src_views[:self.nviews - 1]
img_w = 1600
img_h = 1200
imgs_0 = []
imgs_1 = []
imgs_2 = []
imgs_3 = []
depth_min = None
depth_max = None
proj_matrices_0 = []
proj_matrices_1 = []
proj_matrices_2 = []
proj_matrices_3 = []
for i, vid in enumerate(view_ids):
img_filename = os.path.join(self.datapath, '{}/images/{:0>8}.jpg'.format(scan, vid))
proj_mat_filename = os.path.join(self.datapath, '{}/cams_1/{:0>8}_cam.txt'.format(scan, vid))
imgs = self.read_img(img_filename)
imgs_0.append(imgs['level_0'])
imgs_1.append(imgs['level_1'])
imgs_2.append(imgs['level_2'])
imgs_3.append(imgs['level_3'])
intrinsics, extrinsics, depth_min_, depth_max_ = self.read_cam_file(proj_mat_filename)
intrinsics[0] *= self.img_wh[0]/img_w
intrinsics[1] *= self.img_wh[1]/img_h
proj_mat = extrinsics.copy()
intrinsics[:2,:] *= 0.125
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
proj_matrices_3.append(proj_mat)
proj_mat = extrinsics.copy()
intrinsics[:2,:] *= 2
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
proj_matrices_2.append(proj_mat)
proj_mat = extrinsics.copy()
intrinsics[:2,:] *= 2
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
proj_matrices_1.append(proj_mat)
proj_mat = extrinsics.copy()
intrinsics[:2,:] *= 2
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
proj_matrices_0.append(proj_mat)
if i == 0: # reference view
depth_min = depth_min_
depth_max = depth_max_
imgs_0 = np.stack(imgs_0).transpose([0, 3, 1, 2])
imgs_1 = np.stack(imgs_1).transpose([0, 3, 1, 2])
imgs_2 = np.stack(imgs_2).transpose([0, 3, 1, 2])
imgs_3 = np.stack(imgs_3).transpose([0, 3, 1, 2])
imgs = {}
imgs['level_0'] = imgs_0
imgs['level_1'] = imgs_1
imgs['level_2'] = imgs_2
imgs['level_3'] = imgs_3
# proj_matrices: N*4*4
proj_matrices_0 = np.stack(proj_matrices_0)
proj_matrices_1 = np.stack(proj_matrices_1)
proj_matrices_2 = np.stack(proj_matrices_2)
proj_matrices_3 = np.stack(proj_matrices_3)
proj={}
proj['level_3']=proj_matrices_3
proj['level_2']=proj_matrices_2
proj['level_1']=proj_matrices_1
proj['level_0']=proj_matrices_0
return {"imgs": imgs, # N*3*H0*W0
"proj_matrices": proj, # N*4*4
"depth_min": depth_min, # scalar
"depth_max": depth_max, # scalar
"filename": scan + '/{}/' + '{:0>8}'.format(view_ids[0]) + "{}"}