Image Normalization

0. Package

import torch
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms


(Example) Dataset Structure

Custom Dataset
├── train
│   ├── class0
│   │   └── 0.png
│   │   └── 1.png
│   │   ...
│   │   └── n.png
│   ├── class1
│   │   └── 0.png
│   │   └── 1.png
│   │   ...
│   │   └── n.png
├── test
│   ├── class0
│   │   └── 0.png
│   │   └── 1.png
│   │   ...
│   │   └── n.png
│   ├── class1
│   │   └── 0.png
│   │   └── 1.png
│   │   ...
│   │   └── n.png

(Example) Dataloader Structure

dataloader={train, test}
dataloader['train']={(0:four_dim_tensor<images>, 1:one_dim_tensor<targets>), ....}
dataloader['test']={(0:four_dim_tensor<images>, 1:one_dim_tensor<targets>), ....}

len(dataloader['train']): 267
len(dataloader['test']): 60
len(dataloader['train'].dataset): 17069
len(dataloader['test'].dataset): 3794


1. Make Dataloader

'''
def make_loader(path):
    splits = ['train', 'test']
    shuffle = {'train': True, 'test': False}
    transform = transforms.Compose([transforms.Resize((64,64)),
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])

    dataset = {x: ImageFolder(os.path.join(path, x), transform) for x in splits}
    dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=64, shuffle=shuffle[x]) for x in splits}

    return dataloader
'''

def make_loader(path):
    splits = ['train', 'test']
    transform = transforms.Compose([transforms.Resize((64,64)),
                                    transforms.ToTensor(),])

    dataset = {x: ImageFolder(os.path.join(path, x), transform) for x in splits}
    dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=64) for x in splits}

    return dataloader

Check Dataloader

# tensor image -> plt show
def timshow(img):
    img=img.permute(1,2,0)
    plt.figure(figsize=(5,5))
    plt.imshow(img)
    plt.axis('off')
    plt.show()

for data in dl['test']:
    print("data[0].size():",data[0].size()) # torch.Size([64, 3, 64, 64])
    print("data[1].size():",data[1].size()) # torch.Size([64])
    timshow(data[0][0])
    print(data[1][0])
    break


2. Get Mean, Std with Dataloader

mean = 0.
std = 0.
for data in dl['train']:
    data=data[0] # data[0] is four_dim_tensor<images> (64x3x64x64)
    batch_samples = data.size(0)
    images = data.view(batch_samples, data.size(1), -1) # 64 x 3 x 4096 // 4096 = 64*64
    mean += images.mean(2).sum(0) # calculate mean with 4096 data -> 64x3 data -> calcuate sum with 64 data -> 3 data
    std += images.std(2).sum(0) # 64x3 data -> 3 data

mean /= len(dl['train'].dataset)
std /= len(dl['train'].dataset)

print("data size:",len(dl['train'].dataset))
print("mean:",mean)
print("std:",std)


3. CV2 Image Normalization
(CV2 Image -> Normalized Tensor)

# RGB로 구한 평균, 표준편차 -> BGR로 구한 평균, 표준편차
def cv2_infor(tmean,tstd):
    cmean,cstd=tmean.clone(),tstd.clone()
    cmean[0],cmean[2]=tmean[2],tmean[0]
    cstd[0],cstd[2]=tstd[2],tstd[0]

    return cmean,cstd

cmean,cstd=cv2_infor(mean,std) # get BGR mean, std
tensor_img=F.to_tensor(npimg) # /255
tensor_img=F.normalize(tensor_img, cmean, cstd) # standarlization


4. Tensor Image Denormalization
(Normalized Tensor -> CV2 Image)

# tensor -> numpy
np_mean, np_std=cmean.numpy(),cstd.numpy()
dimg=tensor_img.detach().cpu().numpy() 

dimg = np.transpose(dimg, (1, 2, 0)) # [C,H,W] -> [H,W,C]
dimg=np.clip(255.0 * (dimg * np_std + np_mean), 0, 255) # denormalization
dimg = dimg.astype(np.uint8).copy() # np.float32 -> np.uint8


Example Code: [GitHub]

Leave a Reply

Your email address will not be published. Required fields are marked *