Skip to content

Commit

Permalink
Format code
Browse files Browse the repository at this point in the history
  • Loading branch information
dblasko committed Nov 11, 2023
1 parent 40cc785 commit da18251
Show file tree
Hide file tree
Showing 23 changed files with 760 additions and 392 deletions.
74 changes: 50 additions & 24 deletions dataset_generation/PretrainingDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,73 @@
from PIL import Image
import os


class PretrainingDataset(dataset.Dataset):
"""
Handles the pretraining dataset generated by running `pretraining_generation.py`.
"""
def __init__(self, img_folder, target_img_folder, img_size = 64, limit=None, train=True):

def __init__(
self, img_folder, target_img_folder, img_size=64, limit=None, train=True
):
super().__init__()
self.train = train
self.filenames = [os.path.join(img_folder, file) for file in os.listdir(img_folder) if not file.startswith('.DS_Store')]
self.filenames = [
os.path.join(img_folder, file)
for file in os.listdir(img_folder)
if not file.startswith(".DS_Store")
]
if limit is not None:
self.filenames = self.filenames[:limit]
self.target_filenames = [os.path.join(target_img_folder, file) for file in os.listdir(target_img_folder) if not file.startswith('.DS_Store')]
self.target_filenames = [
os.path.join(target_img_folder, file)
for file in os.listdir(target_img_folder)
if not file.startswith(".DS_Store")
]
if limit is not None:
self.target_filenames = self.target_filenames[:limit]

self.transform = T.Compose([T.Resize(img_size),
T.RandomCrop((img_size // 2, img_size // 2)),
T.ToTensor(),
T.Normalize([0.0,0.0,0.0], [1.0,1.0,1.0])]) if train else T.Compose([T.Resize(img_size),
T.ToTensor(),
T.Normalize([0.0,0.0,0.0], [1.0,1.0,1.0])])


self.transform = (
T.Compose(
[
T.Resize(img_size),
T.RandomCrop((img_size // 2, img_size // 2)),
T.ToTensor(),
T.Normalize([0.0, 0.0, 0.0], [1.0, 1.0, 1.0]),
]
)
if train
else T.Compose(
[
T.Resize(img_size),
T.ToTensor(),
T.Normalize([0.0, 0.0, 0.0], [1.0, 1.0, 1.0]),
]
)
)

def __getitem__(self, idx):
image = Image.open(self.filenames[idx]).convert('RGB')
target = Image.open(self.target_filenames[idx]).convert('RGB')

if not self.train and image.size[0] > image.size[1]: # to ensure all images of a batch are in the same orientation
image = Image.open(self.filenames[idx]).convert("RGB")
target = Image.open(self.target_filenames[idx]).convert("RGB")

if (
not self.train and image.size[0] > image.size[1]
): # to ensure all images of a batch are in the same orientation
image = image.rotate(90, expand=True)
target = target.rotate(90, expand=True)

image = self.transform(image)
target = self.transform(target)
if not self.train: # reshape to dimensions dividible by 8

if not self.train: # reshape to dimensions dividible by 8
if target.shape[1] % 8 != 0:
target = target[:, :-(target.shape[1] % 8), :]
image = image[:, :-(image.shape[1] % 8), :]
target = target[:, : -(target.shape[1] % 8), :]
image = image[:, : -(image.shape[1] % 8), :]
if target.shape[2] % 8 != 0:
target = target[:, :, :-(target.shape[2] % 8)]
image = image[:, :, :-(image.shape[2] % 8)]
target = target[:, :, : -(target.shape[2] % 8)]
image = image[:, :, : -(image.shape[2] % 8)]

return image, target

def __len__(self):
return len(self.filenames)
93 changes: 61 additions & 32 deletions dataset_generation/finetuning_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ def add_low_light_noise(image, **kwargs):
Adds random realistic noise to an image to simulate low-light conditions.
Expects 'color_shift_range' and 'noise_dispersion_range' to be passed in 'kwargs'.
"""
color_shift_range = kwargs.get('color_shift_range', (1, 3)) # 5 10
noise_dispersion_range = kwargs.get('noise_dispersion_range', (0.01, 0.05)) # .02 .09
color_shift_range = kwargs.get("color_shift_range", (1, 3)) # 5 10
noise_dispersion_range = kwargs.get(
"noise_dispersion_range", (0.01, 0.05)
) # .02 .09

# Sampling of the intensity of color noise and of the dispersion of the Gaussian noise
color_shift = np.random.uniform(*color_shift_range)
Expand All @@ -35,23 +37,31 @@ def add_low_light_noise(image, **kwargs):

if __name__ == "__main__":
np.random.seed(42)

for split in ['train', 'test', 'val']:
os.makedirs(f'data/finetuning/{split}', exist_ok=True)
os.makedirs(f'data/finetuning/{split}/imgs', exist_ok=True)
os.makedirs(f'data/finetuning/{split}/targets', exist_ok=True)

# TODO: based on image quantity, think of reusing same image with different levels of transformation? Or in the pytorch dataset add another variable level, and fixed here?
transform = A.Compose([
T.ColorJitter(brightness=(0.1,0.2), contrast=0, saturation=0, hue=0, always_apply=True, p=1), # Artificial darkening
A.Lambda(image=add_low_light_noise), # Randomized low-light gaussian noise
A.SmallestMaxSize(max_size=400),
])


for split in ["train", "test", "val"]:
os.makedirs(f"data/finetuning/{split}", exist_ok=True)
os.makedirs(f"data/finetuning/{split}/imgs", exist_ok=True)
os.makedirs(f"data/finetuning/{split}/targets", exist_ok=True)

transform = A.Compose(
[
T.ColorJitter(
brightness=(0.1, 0.2),
contrast=0,
saturation=0,
hue=0,
always_apply=True,
p=1,
), # Artificial darkening
A.Lambda(image=add_low_light_noise), # Randomized low-light gaussian noise
A.SmallestMaxSize(max_size=400),
]
)

transform_ground_truth = A.Compose([A.SmallestMaxSize(max_size=400)])

original_images = glob('data/finetuning/original_images/*')
original_images = glob("data/finetuning/original_images/*")

# Dataset splitting
total_size = len(original_images)
train_size = int(0.85 * total_size)
Expand All @@ -60,16 +70,16 @@ def add_low_light_noise(image, **kwargs):
indices = list(range(total_size))
np.random.shuffle(indices)
train_indices = indices[:train_size]
val_indices = indices[train_size:(train_size + val_size)]
test_indices = indices[(train_size + val_size):]
val_indices = indices[train_size : (train_size + val_size)]
test_indices = indices[(train_size + val_size) :]

def split_of_image(index):
if index in train_indices:
return 'train'
return "train"
elif index in val_indices:
return 'val'
return "val"
else:
return 'test'
return "test"

too_small_images = 0
total_images = 0
Expand All @@ -78,20 +88,39 @@ def split_of_image(index):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
transformed = transform(image=image)
transformed_image = transformed["image"]

transformed_ground_truth = transform_ground_truth(image=image)["image"]
try:
if transformed_ground_truth.shape[0] > transformed_ground_truth.shape[1]:
transformed_ground_truth = A.CenterCrop(height=704, width=400, always_apply=True, p=1)(image=transformed_ground_truth)["image"]
transformed_image = A.CenterCrop(height=704, width=400, always_apply=True, p=1)(image=transformed_image)["image"]
transformed_ground_truth = A.CenterCrop(
height=704, width=400, always_apply=True, p=1
)(image=transformed_ground_truth)["image"]
transformed_image = A.CenterCrop(
height=704, width=400, always_apply=True, p=1
)(image=transformed_image)["image"]
else:
transformed_ground_truth = A.CenterCrop(height=400, width=704, always_apply=True, p=1)(image=transformed_ground_truth)["image"]
transformed_image = A.CenterCrop(height=400, width=704, always_apply=True, p=1)(image=transformed_image)["image"]

transformed_ground_truth = A.CenterCrop(
height=400, width=704, always_apply=True, p=1
)(image=transformed_ground_truth)["image"]
transformed_image = A.CenterCrop(
height=400, width=704, always_apply=True, p=1
)(image=transformed_image)["image"]

split = split_of_image(idx)
cv2.imwrite(f'data/finetuning/{split}/imgs/img_{idx}.png', cv2.cvtColor(transformed_image, cv2.COLOR_RGB2BGR))
cv2.imwrite(f'data/finetuning/{split}/targets/img_{idx}.png', cv2.cvtColor(transformed_ground_truth, cv2.COLOR_RGB2BGR))
cv2.imwrite(
f"data/finetuning/{split}/imgs/img_{idx}.png",
cv2.cvtColor(transformed_image, cv2.COLOR_RGB2BGR),
)
cv2.imwrite(
f"data/finetuning/{split}/targets/img_{idx}.png",
cv2.cvtColor(transformed_ground_truth, cv2.COLOR_RGB2BGR),
)
except:
too_small_images += 1
total_images += 1
print(too_small_images, 'too small images weren\'t integrated to the dataset out of a total of ', total_images, 'images') # Decided to perform sorting in the dataset generation rather than at manual data curation
print(
too_small_images,
"too small images weren't integrated to the dataset out of a total of ",
total_images,
"images",
) # Decided to perform sorting in the dataset generation rather than at manual data curation
41 changes: 23 additions & 18 deletions dataset_generation/pretraining_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,29 @@

if __name__ == "__main__":
np.random.seed(42)
for split in ['train', 'test', 'val']:
os.makedirs(f'data/pretraining/{split}', exist_ok=True)
os.makedirs(f'data/pretraining/{split}/imgs', exist_ok=True)
os.makedirs(f'data/pretraining/{split}/targets', exist_ok=True)

for split in ["train", "test", "val"]:
os.makedirs(f"data/pretraining/{split}", exist_ok=True)
os.makedirs(f"data/pretraining/{split}/imgs", exist_ok=True)
os.makedirs(f"data/pretraining/{split}/targets", exist_ok=True)

dataset = load_dataset("huggan/night2day")
# dataset = load_dataset("geekyrakshit/LoL-Dataset")

train_size = int(0.85 * len(dataset['train']))
val_size = int(0.10 * len(dataset['train']))
test_size = len(dataset['train']) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset['train'], [train_size, val_size, test_size])

for dataset, dataset_name in [(train_dataset, 'train'), (val_dataset, 'val'), (test_dataset, 'test')]:

train_size = int(0.85 * len(dataset["train"]))
val_size = int(0.10 * len(dataset["train"]))
test_size = len(dataset["train"]) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
dataset["train"], [train_size, val_size, test_size]
)

for dataset, dataset_name in [
(train_dataset, "train"),
(val_dataset, "val"),
(test_dataset, "test"),
]:
for i in range(len(dataset)):
imageA = dataset[i]['imageA']
imageB = dataset[i]['imageB']
imageA.save(f'data/pretraining/{dataset_name}/imgs/train_img_{i}.png')
imageB.save(f'data/pretraining/{dataset_name}/targets/train_target_{i}.png')
imageA = dataset[i]["imageA"]
imageB = dataset[i]["imageB"]

imageA.save(f"data/pretraining/{dataset_name}/imgs/train_img_{i}.png")
imageB.save(f"data/pretraining/{dataset_name}/targets/train_target_{i}.png")
Loading

0 comments on commit da18251

Please sign in to comment.