IGEV/IGEV-MVS/datasets/dtu_yao.py

from torch.utils.data import Dataset
import numpy as np
import os
from PIL import Image
from datasets.data_io import *
import cv2
import random
from torchvision import transforms


class MVSDataset(Dataset):
    def __init__(self, datapath, listfile, mode, nviews, robust_train = False):
        super(MVSDataset, self).__init__()

        self.levels = 4
        self.datapath = datapath
        self.listfile = listfile
        self.mode = mode
        self.nviews = nviews
        self.img_wh = (640, 512)
        # self.img_wh = (1440, 1056)
        self.robust_train = robust_train
        

        assert self.mode in ["train", "val", "test"]
        self.metas = self.build_list()
        self.color_augment = transforms.ColorJitter(brightness=0.5, contrast=0.5)

    def build_list(self):
        metas = []
        with open(self.listfile) as f:
            scans = f.readlines()
            scans = [line.rstrip() for line in scans]

        for scan in scans:
            pair_file = "Cameras_1/pair.txt"
            
            with open(os.path.join(self.datapath, pair_file)) as f:
                self.num_viewpoint = int(f.readline())
                # viewpoints (49)
                for view_idx in range(self.num_viewpoint):
                    ref_view = int(f.readline().rstrip())
                    src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
                    # light conditions 0-6
                    for light_idx in range(7):
                        metas.append((scan, light_idx, ref_view, src_views))
        print("dataset", self.mode, "metas:", len(metas))
        return metas

    def __len__(self):
        return len(self.metas)

    def read_cam_file(self, filename):
        with open(filename) as f:
            lines = f.readlines()
            lines = [line.rstrip() for line in lines]
        # extrinsics: line [1,5), 4x4 matrix
        extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ').reshape((4, 4))
        # intrinsics: line [7-10), 3x3 matrix
        intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ').reshape((3, 3))
        depth_min = float(lines[11].split()[0])
        depth_max = float(lines[11].split()[-1])
        return intrinsics, extrinsics, depth_min, depth_max

    def read_img(self, filename):
        img = Image.open(filename)
        if self.mode=='train':
            img = self.color_augment(img)
        # scale 0~255 to -1~1
        np_img = 2*np.array(img, dtype=np.float32) / 255. - 1
        h, w, _ = np_img.shape
        np_img_ms = {
            "level_3": cv2.resize(np_img, (w//8, h//8), interpolation=cv2.INTER_LINEAR), 
            "level_2": cv2.resize(np_img, (w//4, h//4), interpolation=cv2.INTER_LINEAR),
            "level_1": cv2.resize(np_img, (w//2, h//2), interpolation=cv2.INTER_LINEAR),
            "level_0": np_img
        }
        return np_img_ms


    def prepare_img(self, hr_img):
        #downsample
        h, w = hr_img.shape
        # original w,h: 1600, 1200; downsample -> 800, 600 ; crop -> 640, 512
        hr_img = cv2.resize(hr_img, (w//2, h//2), interpolation=cv2.INTER_NEAREST)
        #crop
        h, w = hr_img.shape
        target_h, target_w = self.img_wh[1], self.img_wh[0]
        start_h, start_w = (h - target_h)//2, (w - target_w)//2
        hr_img_crop = hr_img[start_h: start_h + target_h, start_w: start_w + target_w]

        return hr_img_crop

    def read_mask(self, filename):
        img = Image.open(filename)
        np_img = np.array(img, dtype=np.float32)
        np_img = (np_img > 10).astype(np.float32)
        return np_img


    def read_depth_mask(self, filename, mask_filename, scale):
        depth_hr = np.array(read_pfm(filename)[0], dtype=np.float32) * scale
        depth_hr = np.squeeze(depth_hr,2)
        depth_lr = self.prepare_img(depth_hr)
        mask = self.read_mask(mask_filename)
        mask = self.prepare_img(mask)
        mask = mask.astype(np.bool_)
        mask = mask.astype(np.float32)
        
        h, w = depth_lr.shape
        depth_lr_ms = {}
        mask_ms = {}

        for i in range(self.levels):
            depth_cur = cv2.resize(depth_lr, (w//(2**i), h//(2**i)), interpolation=cv2.INTER_NEAREST)
            mask_cur = cv2.resize(mask, (w//(2**i), h//(2**i)), interpolation=cv2.INTER_NEAREST)
            depth_lr_ms[f"level_{i}"] = depth_cur
            mask_ms[f"level_{i}"] = mask_cur

        return depth_lr_ms, mask_ms


    def __getitem__(self, idx):
        meta = self.metas[idx]
        scan, light_idx, ref_view, src_views = meta
        # robust training strategy
        if self.robust_train:
            num_src_views = len(src_views)
            index = random.sample(range(num_src_views), self.nviews - 1)
            view_ids = [ref_view] + [src_views[i] for i in index]
            scale = random.uniform(0.8, 1.25)

        else:
            view_ids = [ref_view] + src_views[:self.nviews - 1]
            scale = 1

        imgs_0 = []
        imgs_1 = []
        imgs_2 = []
        imgs_3 = []

        mask = None
        depth = None
        depth_min = None
        depth_max = None

        proj_matrices_0 = []
        proj_matrices_1 = []
        proj_matrices_2 = []
        proj_matrices_3 = []


        for i, vid in enumerate(view_ids):
            img_filename = os.path.join(self.datapath,
                                    'Rectified/{}_train/rect_{:0>3}_{}_r5000.png'.format(scan, vid + 1, light_idx))
            proj_mat_filename = os.path.join(self.datapath, 'Cameras_1/{}_train/{:0>8}_cam.txt').format(scan, vid)

            mask_filename = os.path.join(self.datapath, 'Depths_raw/{}/depth_visual_{:0>4}.png'.format(scan, vid))
            depth_filename = os.path.join(self.datapath, 'Depths_raw/{}/depth_map_{:0>4}.pfm'.format(scan, vid))

            imgs = self.read_img(img_filename)
            imgs_0.append(imgs['level_0'])
            imgs_1.append(imgs['level_1'])
            imgs_2.append(imgs['level_2'])
            imgs_3.append(imgs['level_3'])

            intrinsics, extrinsics, depth_min_, depth_max_ = self.read_cam_file(proj_mat_filename)
            extrinsics[:3,3] *= scale
            intrinsics[0] *= 4
            intrinsics[1] *= 4

            proj_mat = extrinsics.copy()
            intrinsics[:2,:] *= 0.125
            proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
            proj_matrices_3.append(proj_mat)

            proj_mat = extrinsics.copy()
            intrinsics[:2,:] *= 2
            proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
            proj_matrices_2.append(proj_mat)

            proj_mat = extrinsics.copy()
            intrinsics[:2,:] *= 2
            proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
            proj_matrices_1.append(proj_mat)

            proj_mat = extrinsics.copy()
            intrinsics[:2,:] *= 2
            proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
            proj_matrices_0.append(proj_mat)

            if i == 0:  # reference view
                depth_min = depth_min_ * scale
                depth_max = depth_max_ * scale
                depth, mask = self.read_depth_mask(depth_filename, mask_filename, scale)

                for l in range(self.levels):
                    mask[f'level_{l}'] = np.expand_dims(mask[f'level_{l}'],2)
                    mask[f'level_{l}'] = mask[f'level_{l}'].transpose([2,0,1])
                    depth[f'level_{l}'] = np.expand_dims(depth[f'level_{l}'],2)
                    depth[f'level_{l}'] = depth[f'level_{l}'].transpose([2,0,1])

        # imgs: N*3*H0*W0, N is number of images
        imgs_0 = np.stack(imgs_0).transpose([0, 3, 1, 2])
        imgs_1 = np.stack(imgs_1).transpose([0, 3, 1, 2])
        imgs_2 = np.stack(imgs_2).transpose([0, 3, 1, 2])
        imgs_3 = np.stack(imgs_3).transpose([0, 3, 1, 2])

        imgs = {}
        imgs['level_0'] = imgs_0
        imgs['level_1'] = imgs_1
        imgs['level_2'] = imgs_2
        imgs['level_3'] = imgs_3

        # proj_matrices: N*4*4
        proj_matrices_0 = np.stack(proj_matrices_0)
        proj_matrices_1 = np.stack(proj_matrices_1)
        proj_matrices_2 = np.stack(proj_matrices_2)
        proj_matrices_3 = np.stack(proj_matrices_3)
        
        proj={}
        proj['level_3']=proj_matrices_3
        proj['level_2']=proj_matrices_2
        proj['level_1']=proj_matrices_1
        proj['level_0']=proj_matrices_0


        # data is numpy array
        return {"imgs": imgs,                   # [N, 3, H, W]
                "proj_matrices": proj,          # [N,4,4]
                "depth": depth,                 # [1, H, W]
                "depth_min": depth_min,         # scalar
                "depth_max": depth_max,         # scalar
                "mask": mask}                   # [1, H, W]
Add files via upload 2023-03-20 19:52:04 +08:00			`from torch.utils.data import Dataset`
			`import numpy as np`
			`import os`
			`from PIL import Image`
			`from datasets.data_io import *`
			`import cv2`
			`import random`
			`from torchvision import transforms`


			`class MVSDataset(Dataset):`
			`def __init__(self, datapath, listfile, mode, nviews, robust_train = False):`
			`super(MVSDataset, self).__init__()`

			`self.levels = 4`
			`self.datapath = datapath`
			`self.listfile = listfile`
			`self.mode = mode`
			`self.nviews = nviews`
			`self.img_wh = (640, 512)`
			`# self.img_wh = (1440, 1056)`
			`self.robust_train = robust_train`


			`assert self.mode in ["train", "val", "test"]`
			`self.metas = self.build_list()`
			`self.color_augment = transforms.ColorJitter(brightness=0.5, contrast=0.5)`

			`def build_list(self):`
			`metas = []`
			`with open(self.listfile) as f:`
			`scans = f.readlines()`
			`scans = [line.rstrip() for line in scans]`

			`for scan in scans:`
			`pair_file = "Cameras_1/pair.txt"`

			`with open(os.path.join(self.datapath, pair_file)) as f:`
			`self.num_viewpoint = int(f.readline())`
			`# viewpoints (49)`
			`for view_idx in range(self.num_viewpoint):`
			`ref_view = int(f.readline().rstrip())`
			`src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]`
			`# light conditions 0-6`
			`for light_idx in range(7):`
			`metas.append((scan, light_idx, ref_view, src_views))`
			`print("dataset", self.mode, "metas:", len(metas))`
			`return metas`

			`def __len__(self):`
			`return len(self.metas)`

			`def read_cam_file(self, filename):`
			`with open(filename) as f:`
			`lines = f.readlines()`
			`lines = [line.rstrip() for line in lines]`
			`# extrinsics: line [1,5), 4x4 matrix`
			`extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ').reshape((4, 4))`
			`# intrinsics: line [7-10), 3x3 matrix`
			`intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ').reshape((3, 3))`
			`depth_min = float(lines[11].split()[0])`
			`depth_max = float(lines[11].split()[-1])`
			`return intrinsics, extrinsics, depth_min, depth_max`

			`def read_img(self, filename):`
			`img = Image.open(filename)`
			`if self.mode=='train':`
			`img = self.color_augment(img)`
			`# scale 0~255 to -1~1`
			`np_img = 2*np.array(img, dtype=np.float32) / 255. - 1`
			`h, w, _ = np_img.shape`
			`np_img_ms = {`
			`"level_3": cv2.resize(np_img, (w//8, h//8), interpolation=cv2.INTER_LINEAR),`
			`"level_2": cv2.resize(np_img, (w//4, h//4), interpolation=cv2.INTER_LINEAR),`
			`"level_1": cv2.resize(np_img, (w//2, h//2), interpolation=cv2.INTER_LINEAR),`
			`"level_0": np_img`
			`}`
			`return np_img_ms`


			`def prepare_img(self, hr_img):`
			`#downsample`
			`h, w = hr_img.shape`
			`# original w,h: 1600, 1200; downsample -> 800, 600 ; crop -> 640, 512`
			`hr_img = cv2.resize(hr_img, (w//2, h//2), interpolation=cv2.INTER_NEAREST)`
			`#crop`
			`h, w = hr_img.shape`
			`target_h, target_w = self.img_wh[1], self.img_wh[0]`
			`start_h, start_w = (h - target_h)//2, (w - target_w)//2`
			`hr_img_crop = hr_img[start_h: start_h + target_h, start_w: start_w + target_w]`

			`return hr_img_crop`

			`def read_mask(self, filename):`
			`img = Image.open(filename)`
			`np_img = np.array(img, dtype=np.float32)`
			`np_img = (np_img > 10).astype(np.float32)`
			`return np_img`


			`def read_depth_mask(self, filename, mask_filename, scale):`
			`depth_hr = np.array(read_pfm(filename)[0], dtype=np.float32) * scale`
			`depth_hr = np.squeeze(depth_hr,2)`
			`depth_lr = self.prepare_img(depth_hr)`
			`mask = self.read_mask(mask_filename)`
			`mask = self.prepare_img(mask)`
			`mask = mask.astype(np.bool_)`
			`mask = mask.astype(np.float32)`

			`h, w = depth_lr.shape`
			`depth_lr_ms = {}`
			`mask_ms = {}`

			`for i in range(self.levels):`
			`depth_cur = cv2.resize(depth_lr, (w//(2i), h//(2i)), interpolation=cv2.INTER_NEAREST)`
			`mask_cur = cv2.resize(mask, (w//(2i), h//(2i)), interpolation=cv2.INTER_NEAREST)`
			`depth_lr_ms[f"level_{i}"] = depth_cur`
			`mask_ms[f"level_{i}"] = mask_cur`

			`return depth_lr_ms, mask_ms`


			`def __getitem__(self, idx):`
			`meta = self.metas[idx]`
			`scan, light_idx, ref_view, src_views = meta`
			`# robust training strategy`
			`if self.robust_train:`
			`num_src_views = len(src_views)`
			`index = random.sample(range(num_src_views), self.nviews - 1)`
			`view_ids = [ref_view] + [src_views[i] for i in index]`
			`scale = random.uniform(0.8, 1.25)`

			`else:`
			`view_ids = [ref_view] + src_views[:self.nviews - 1]`
			`scale = 1`

			`imgs_0 = []`
			`imgs_1 = []`
			`imgs_2 = []`
			`imgs_3 = []`

			`mask = None`
			`depth = None`
			`depth_min = None`
			`depth_max = None`

			`proj_matrices_0 = []`
			`proj_matrices_1 = []`
			`proj_matrices_2 = []`
			`proj_matrices_3 = []`



			`for i, vid in enumerate(view_ids):`
			`img_filename = os.path.join(self.datapath,`
			`'Rectified/{}_train/rect_{:0>3}_{}_r5000.png'.format(scan, vid + 1, light_idx))`
			`proj_mat_filename = os.path.join(self.datapath, 'Cameras_1/{}_train/{:0>8}_cam.txt').format(scan, vid)`

			`mask_filename = os.path.join(self.datapath, 'Depths_raw/{}/depth_visual_{:0>4}.png'.format(scan, vid))`
			`depth_filename = os.path.join(self.datapath, 'Depths_raw/{}/depth_map_{:0>4}.pfm'.format(scan, vid))`

			`imgs = self.read_img(img_filename)`
			`imgs_0.append(imgs['level_0'])`
			`imgs_1.append(imgs['level_1'])`
			`imgs_2.append(imgs['level_2'])`
			`imgs_3.append(imgs['level_3'])`

			`intrinsics, extrinsics, depth_min_, depth_max_ = self.read_cam_file(proj_mat_filename)`
			`extrinsics[:3,3] *= scale`
			`intrinsics[0] *= 4`
			`intrinsics[1] *= 4`

			`proj_mat = extrinsics.copy()`
			`intrinsics[:2,:] *= 0.125`
			`proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])`
			`proj_matrices_3.append(proj_mat)`

			`proj_mat = extrinsics.copy()`
			`intrinsics[:2,:] *= 2`
			`proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])`
			`proj_matrices_2.append(proj_mat)`

			`proj_mat = extrinsics.copy()`
			`intrinsics[:2,:] *= 2`
			`proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])`
			`proj_matrices_1.append(proj_mat)`

			`proj_mat = extrinsics.copy()`
			`intrinsics[:2,:] *= 2`
			`proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])`
			`proj_matrices_0.append(proj_mat)`

			`if i == 0: # reference view`
			`depth_min = depth_min_ * scale`
			`depth_max = depth_max_ * scale`
			`depth, mask = self.read_depth_mask(depth_filename, mask_filename, scale)`

			`for l in range(self.levels):`
			`mask[f'level_{l}'] = np.expand_dims(mask[f'level_{l}'],2)`
			`mask[f'level_{l}'] = mask[f'level_{l}'].transpose([2,0,1])`
			`depth[f'level_{l}'] = np.expand_dims(depth[f'level_{l}'],2)`
			`depth[f'level_{l}'] = depth[f'level_{l}'].transpose([2,0,1])`

			`# imgs: N3H0*W0, N is number of images`
			`imgs_0 = np.stack(imgs_0).transpose([0, 3, 1, 2])`
			`imgs_1 = np.stack(imgs_1).transpose([0, 3, 1, 2])`
			`imgs_2 = np.stack(imgs_2).transpose([0, 3, 1, 2])`
			`imgs_3 = np.stack(imgs_3).transpose([0, 3, 1, 2])`

			`imgs = {}`
			`imgs['level_0'] = imgs_0`
			`imgs['level_1'] = imgs_1`
			`imgs['level_2'] = imgs_2`
			`imgs['level_3'] = imgs_3`

			`# proj_matrices: N44`
			`proj_matrices_0 = np.stack(proj_matrices_0)`
			`proj_matrices_1 = np.stack(proj_matrices_1)`
			`proj_matrices_2 = np.stack(proj_matrices_2)`
			`proj_matrices_3 = np.stack(proj_matrices_3)`

			`proj={}`
			`proj['level_3']=proj_matrices_3`
			`proj['level_2']=proj_matrices_2`
			`proj['level_1']=proj_matrices_1`
			`proj['level_0']=proj_matrices_0`


			`# data is numpy array`
			`return {"imgs": imgs, # [N, 3, H, W]`
			`"proj_matrices": proj, # [N,4,4]`
			`"depth": depth, # [1, H, W]`
			`"depth_min": depth_min, # scalar`
			`"depth_max": depth_max, # scalar`
			`"mask": mask} # [1, H, W]`