0. Package
import torch from torchvision.datasets import ImageFolder import torchvision.transforms as transforms
(Example) Dataset Structure
Custom Dataset ├── train │ ├── class0 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png │ ├── class1 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png ├── test │ ├── class0 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png │ ├── class1 │ │ └── 0.png │ │ └── 1.png │ │ ... │ │ └── n.png
(Example) Dataloader Structure
dataloader={train, test} dataloader['train']={(0:four_dim_tensor<images>, 1:one_dim_tensor<targets>), ....} dataloader['test']={(0:four_dim_tensor<images>, 1:one_dim_tensor<targets>), ....} len(dataloader['train']): 267 len(dataloader['test']): 60 len(dataloader['train'].dataset): 17069 len(dataloader['test'].dataset): 3794
1. Make Dataloader
''' def make_loader(path): splits = ['train', 'test'] shuffle = {'train': True, 'test': False} transform = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) dataset = {x: ImageFolder(os.path.join(path, x), transform) for x in splits} dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=64, shuffle=shuffle[x]) for x in splits} return dataloader ''' def make_loader(path): splits = ['train', 'test'] transform = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor(),]) dataset = {x: ImageFolder(os.path.join(path, x), transform) for x in splits} dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=64) for x in splits} return dataloader
Check Dataloader
# tensor image -> plt show def timshow(img): img=img.permute(1,2,0) plt.figure(figsize=(5,5)) plt.imshow(img) plt.axis('off') plt.show() for data in dl['test']: print("data[0].size():",data[0].size()) # torch.Size([64, 3, 64, 64]) print("data[1].size():",data[1].size()) # torch.Size([64]) timshow(data[0][0]) print(data[1][0]) break
2. Get Mean, Std with Dataloader
mean = 0. std = 0. for data in dl['train']: data=data[0] # data[0] is four_dim_tensor<images> (64x3x64x64) batch_samples = data.size(0) images = data.view(batch_samples, data.size(1), -1) # 64 x 3 x 4096 // 4096 = 64*64 mean += images.mean(2).sum(0) # calculate mean with 4096 data -> 64x3 data -> calcuate sum with 64 data -> 3 data std += images.std(2).sum(0) # 64x3 data -> 3 data mean /= len(dl['train'].dataset) std /= len(dl['train'].dataset) print("data size:",len(dl['train'].dataset)) print("mean:",mean) print("std:",std)
3. CV2 Image Normalization
(CV2 Image -> Normalized Tensor)
# RGB로 구한 평균, 표준편차 -> BGR로 구한 평균, 표준편차 def cv2_infor(tmean,tstd): cmean,cstd=tmean.clone(),tstd.clone() cmean[0],cmean[2]=tmean[2],tmean[0] cstd[0],cstd[2]=tstd[2],tstd[0] return cmean,cstd cmean,cstd=cv2_infor(mean,std) # get BGR mean, std tensor_img=F.to_tensor(npimg) # /255 tensor_img=F.normalize(tensor_img, cmean, cstd) # standarlization
4. Tensor Image Denormalization
(Normalized Tensor -> CV2 Image)
# tensor -> numpy np_mean, np_std=cmean.numpy(),cstd.numpy() dimg=tensor_img.detach().cpu().numpy() dimg = np.transpose(dimg, (1, 2, 0)) # [C,H,W] -> [H,W,C] dimg=np.clip(255.0 * (dimg * np_std + np_mean), 0, 255) # denormalization dimg = dimg.astype(np.uint8).copy() # np.float32 -> np.uint8
Example Code: [GitHub]