1. 程式人生 > 其它 >PyTorch 自定義 Dataset 及訓練集、測試集劃分方法

PyTorch 自定義 Dataset 及訓練集、測試集劃分方法

技術標籤:PyTorch 基礎

  • 例項 1:自定義資料集類,torch.utils.data.random_split() 劃分訓練集和測試集,通過普通遍歷方式使用自定義資料集中的樣本
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

random_data = np.random.randn(10,3)
print(random_data)
print("#"*len(random_data))


class MyDataSet(Dataset):
    def
__init__(self, loaded_data): self.data = loaded_data def __len__(self): return len(self.data) def __getitem__(self, idx): return self.data[idx] custom_dataset = MyDataSet(random_data) for i in range(len(custom_dataset)): print(custom_dataset[i]) train_size =
int(len(custom_dataset) * 0.7) test_size = len(custom_dataset) - train_size train_dataset, test_dataset = torch.utils.data.random_split(custom_dataset, [train_size, test_size]) print(len(train_dataset)) print(len(test_dataset)) print("#"*len(custom_dataset)) for i in range(len(train_dataset)
): print(train_dataset[i]) print("#"*len(train_dataset)) for i in range(len(test_dataset)): print(test_dataset[i])
  • 輸出:

  • 例項 2:自定義資料集類,torch.utils.data.random_split() 劃分訓練集和測試集,通過 dataloader 方式使用自定義資料集中的樣本

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

random_data = np.random.randn(10,3)
print(random_data)
print("#"*len(random_data))


class MyDataSet(Dataset):
    def __init__(self, loaded_data):
        self.data = loaded_data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
    

custom_dataset = MyDataSet(random_data)

for i in range(len(custom_dataset)):
    print(custom_dataset[i])
    

train_size = int(len(custom_dataset) * 0.5)
validate_size = int(len(custom_dataset) * 0.2)
test_size = len(custom_dataset) - validate_size - train_size
train_dataset, validate_dataset, test_dataset = torch.utils.data.random_split(custom_dataset, [train_size, validate_size, test_size])


train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=0)
validate_loader = DataLoader(validate_dataset, batch_size=1, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

print(len(train_loader))
print(len(validate_loader))
print(len(test_loader))



for i, train_sample in enumerate(train_loader):
    print("{} {}".format(i, train_sample))

    
for j, validate_sample in enumerate(validate_loader):
    print("{} {}".format(j, validate_sample))


for k, test_sample in enumerate(test_loader):
    print("{} {}".format(k, test_sample))

參考

  • https://pytorch.org/tutorials/recipes/recipes/custom_dataset_transforms_loader.html?highlight=custom%20dataset