0. Package
import torch from torchvision.datasets import ImageFolder import torchvision.transforms as transforms
(Example) Dataset Structure
Custom Dataset ├── train │ ├── class0 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png │ ├── class1 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png ├── test │ ├── class0 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png │ ├── class1 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png
(Example) Dataloader Structure
dataloader={train, test}
dataloader['train']={(0:four_dim_tensor<images>, 1:one_dim_tensor<targets>), ....}
dataloader['test']={(0:four_dim_tensor<images>, 1:one_dim_tensor<targets>), ....}
len(dataloader['train']): 267
len(dataloader['test']): 60
len(dataloader['train'].dataset): 17069
len(dataloader['test'].dataset): 3794
1. Make Dataloader
'''
def make_loader(path):
splits = ['train', 'test']
shuffle = {'train': True, 'test': False}
transform = transforms.Compose([transforms.Resize((64,64)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])
dataset = {x: ImageFolder(os.path.join(path, x), transform) for x in splits}
dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=64, shuffle=shuffle[x]) for x in splits}
return dataloader
'''
def make_loader(path):
splits = ['train', 'test']
transform = transforms.Compose([transforms.Resize((64,64)),
transforms.ToTensor(),])
dataset = {x: ImageFolder(os.path.join(path, x), transform) for x in splits}
dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=64) for x in splits}
return dataloader
Check Dataloader
# tensor image -> plt show
def timshow(img):
img=img.permute(1,2,0)
plt.figure(figsize=(5,5))
plt.imshow(img)
plt.axis('off')
plt.show()
for data in dl['test']:
print("data[0].size():",data[0].size()) # torch.Size([64, 3, 64, 64])
print("data[1].size():",data[1].size()) # torch.Size([64])
timshow(data[0][0])
print(data[1][0])
break
2. Get Mean, Std with Dataloader
mean = 0.
std = 0.
for data in dl['train']:
data=data[0] # data[0] is four_dim_tensor<images> (64x3x64x64)
batch_samples = data.size(0)
images = data.view(batch_samples, data.size(1), -1) # 64 x 3 x 4096 // 4096 = 64*64
mean += images.mean(2).sum(0) # calculate mean with 4096 data -> 64x3 data -> calcuate sum with 64 data -> 3 data
std += images.std(2).sum(0) # 64x3 data -> 3 data
mean /= len(dl['train'].dataset)
std /= len(dl['train'].dataset)
print("data size:",len(dl['train'].dataset))
print("mean:",mean)
print("std:",std)
3. CV2 Image Normalization
(CV2 Image -> Normalized Tensor)
# RGB로 구한 평균, 표준편차 -> BGR로 구한 평균, 표준편차
def cv2_infor(tmean,tstd):
cmean,cstd=tmean.clone(),tstd.clone()
cmean[0],cmean[2]=tmean[2],tmean[0]
cstd[0],cstd[2]=tstd[2],tstd[0]
return cmean,cstd
cmean,cstd=cv2_infor(mean,std) # get BGR mean, std
tensor_img=F.to_tensor(npimg) # /255
tensor_img=F.normalize(tensor_img, cmean, cstd) # standarlization
4. Tensor Image Denormalization
(Normalized Tensor -> CV2 Image)
# tensor -> numpy np_mean, np_std=cmean.numpy(),cstd.numpy() dimg=tensor_img.detach().cpu().numpy() dimg = np.transpose(dimg, (1, 2, 0)) # [C,H,W] -> [H,W,C] dimg=np.clip(255.0 * (dimg * np_std + np_mean), 0, 255) # denormalization dimg = dimg.astype(np.uint8).copy() # np.float32 -> np.uint8
Example Code: [GitHub]