diff --git a/IGEV-Stereo/core/utils/augmentor.py b/IGEV-Stereo/core/utils/augmentor.py index fa81ba9..eb32d04 100644 --- a/IGEV-Stereo/core/utils/augmentor.py +++ b/IGEV-Stereo/core/utils/augmentor.py @@ -5,7 +5,7 @@ import os import time from glob import glob from skimage import color, io -from PIL import Image +from PIL import Image, ImageEnhance import cv2 cv2.setNumThreads(0) @@ -198,21 +198,40 @@ class SparseFlowAugmentor: self.v_flip_prob = 0.1 # photometric augmentation params - self.photo_aug = Compose([ColorJitter(brightness=0.3, contrast=0.3, saturation=saturation_range, hue=0.3/3.14), AdjustGamma(*gamma)]) - self.asymmetric_color_aug_prob = 0.2 + # self.photo_aug = Compose([ColorJitter(brightness=0.3, contrast=0.3, saturation=saturation_range, hue=0.3/3.14), AdjustGamma(*gamma)]) self.eraser_aug_prob = 0.5 - + + def chromatic_augmentation(self, img): + random_brightness = np.random.uniform(0.8, 1.2) + random_contrast = np.random.uniform(0.8, 1.2) + random_gamma = np.random.uniform(0.8, 1.2) + + img = Image.fromarray(img) + + enhancer = ImageEnhance.Brightness(img) + img = enhancer.enhance(random_brightness) + enhancer = ImageEnhance.Contrast(img) + img = enhancer.enhance(random_contrast) + + gamma_map = [ + 255 * 1.0 * pow(ele / 255.0, random_gamma) for ele in range(256) + ] * 3 + img = img.point(gamma_map) # use PIL's point-function to accelerate this part + + img_ = np.array(img) + + return img_ + def color_transform(self, img1, img2): - image_stack = np.concatenate([img1, img2], axis=0) - image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8) - img1, img2 = np.split(image_stack, 2, axis=0) + img1 = self.chromatic_augmentation(img1) + img2 = self.chromatic_augmentation(img2) return img1, img2 def eraser_transform(self, img1, img2): ht, wd = img1.shape[:2] if np.random.rand() < self.eraser_aug_prob: mean_color = np.mean(img2.reshape(-1, 3), axis=0) - for _ in range(np.random.randint(1, 3)): + for _ in range(1): x0 = np.random.randint(0, wd) y0 = np.random.randint(0, ht) dx = np.random.randint(50, 100) diff --git a/IGEV-Stereo/train_stereo.py b/IGEV-Stereo/train_stereo.py index ad0b8b5..b989b14 100644 --- a/IGEV-Stereo/train_stereo.py +++ b/IGEV-Stereo/train_stereo.py @@ -240,8 +240,8 @@ if __name__ == '__main__': parser.add_argument('--max_disp', type=int, default=192, help="max disp of geometry encoding volume") # Data augmentation - parser.add_argument('--img_gamma', type=float, nargs='+', default=None, help="gamma range") - parser.add_argument('--saturation_range', type=float, nargs='+', default=[0, 1.4], help='color saturation') + # parser.add_argument('--img_gamma', type=float, nargs='+', default=None, help="gamma range") + # parser.add_argument('--saturation_range', type=float, nargs='+', default=[0, 1.4], help='color saturation') parser.add_argument('--do_flip', default=False, choices=['h', 'v'], help='flip the images horizontally or vertically') parser.add_argument('--spatial_scale', type=float, nargs='+', default=[-0.2, 0.4], help='re-scale the images randomly') parser.add_argument('--noyjitter', action='store_true', help='don\'t simulate imperfect rectification')