pytorch使用多GPU訓練MNIST

阿新 • • 發佈：2019-01-13

下面的程式碼引數沒有除錯，可能準確率不高，僅僅供參考程式碼格式。

import argparse
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import  datasets
from torchvision import transforms
from torch.autograd import Variable
import torch.utils.data.distributed

import math
import 
 time

import os

os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 3, 4, 5 ,6,7"

lr = 0.001
batch_size = 100
epochs = 10
test_batch_size=100
momentum=0.5
log_interval=100
#輸入資料變換操作
transform_list = [
                transforms.Resize(40),
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32 
),
                transforms.ToTensor()
                ]
transform = transforms.Compose(transform_list)


torch.manual_seed(2018)

# Horovod: pin GPU to local rank.
# torch.cuda.set_device(hvd.local_rank())
torch.cuda.manual_seed(2018)

kwargs = {'num_workers': 4, 'pin_memory': True}

train_dataset = \
    datasets.MNIST('data-0' 
, train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))


train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, **kwargs)


test_dataset = \
    datasets.MNIST('data-0', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ]))

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size,
                                        **kwargs)

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


# Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out


# ResNet Module
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(1, 16)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.layer4 = self.make_layer(block,128,layers[3])
        self.avg_pool = nn.AvgPool2d(7)
        self.fc = nn.Linear(128, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        global  COUNTER
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        #print("###############",out.size())
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


# model = ResNet(ResidualBlock, [2, 2, 2, 2])#.cuda()
model = torch.nn.DataParallel(ResNet(ResidualBlock, [2, 2, 2, 2]),device_ids=[0,1,2,3,4,5,6,7]).cuda()

# Horovod: scale learning rate by the number of GPUs.
optimizer = optim.SGD(model.parameters(), lr=lr,
                      momentum=momentum)


criterion = nn.CrossEntropyLoss()

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        #loss = F.nll_loss(output, target)
        loss=criterion(output,target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), 0,
                100. * batch_idx / len(train_loader), loss.item()))

for epoch in range(1, epochs + 1):
    train(epoch)


#測試過程
# model.load_state_dict(torch.load('param_model.pkl'))
# test()

pytorch使用多GPU訓練MNIST

下面的程式碼引數沒有除錯，可能準確率不高，僅僅供參考程式碼格式。 import argparse import torch import torch.nn as nn import torch.optim as optim import torch.nn.

pytorch 多GPU訓練

當一臺伺服器有多張GPU時，執行程式預設在一張GPU上執行。通過多GPU訓練，可以增大batchsize，加快訓練速度。 from torch.nn import DataParallel num_gp

pytorch多GPU訓練例項與效能對比

以下實驗是我在百度公司實習的時候做的，記錄下來留個小經驗。多GPU訓練 cifar10_97.23 使用 run.sh 檔案開始訓練 cifar10_97.50 使用 run.4GPU.sh 開始訓練在叢集中改變GPU呼叫個數修改 run.sh 檔案 nohup

pyTorch 使用多GPU訓練

1.在pyTorch中模型使用GPU訓練很方便，直接使用model.gpu()。 2.使用多GPU訓練，model = nn.DataParallel(model) 3.注意訓練/測試過程中 inputs和labels均需載入到GPU中 inputs, l

Pytorch yolov3 多GPU 訓練

pytorch 多gpu訓練：# -*- coding:utf-8 -*- from __future__ import division import datetime import torch import torch.nn as nn import torch.nn.

Keras多GPU訓練以及載入權重無效的問題

目錄 1、資料並行 1.1、單GPU或者無GPU訓練的程式碼如下： 1.2、資料並行的多GPU 訓練 2、裝置並行參考連結本文講簡單的探討Keras中使用多GPU訓練的方法以及需要注意的地方。有兩種方法可

Caffe 多GPU訓練問題，以及batch_size 選擇的問題

1. 多GPU訓練時，速度沒有變得更快。使用多GPU訓練時，每個GPU都會執行一個 Caffe 模型的例項。比如當使用 n n

【TensorFlow】多GPU訓練：示例程式碼解析

使用多GPU有助於提升訓練速度和調參效率。本文主要對tensorflow的示例程式碼進行註釋解析：cifar10_multi_gpu_train.py 1080Ti下加速效果如下（batch=128）單卡：兩個GPU比單個GPU加速了近一倍：

使用Keras進行多GPU訓練 multi_gpu_model

使用Keras訓練具有多個GPU的深度神經網路（照片來源：Nor-Tech.com）。摘要在今天的部落格文章中，我們學習瞭如何使用多個GPU來訓練基於Keras的深度神經網路。使用多個GPU使我們能夠獲得準線性加速。為了驗證這一點，我們在CIFAR-10資料集上訓練了MiniGoog

使用估算器、tf.keras 和 tf.data 進行多 GPU 訓練

文 / Zalando Research 研究科學家 Kashif Rasul 來源 | TensorFlow 公眾號與大多數 AI 研究部門一樣，Zalando Research 也意識到了對創意進行嘗試和快速原型設計的重要性。隨著資料集變得越來越龐大，

Pytorch 多GPU執行

self.net = netword() n_gpu = 1 if n_gpu==1: self.net = torch.nn.DataParallel(self.net).cuda(device=0) else: gpus = [] for i in range(n

keras 多GPU訓練，單GPU預測

多GPU訓練 keras自帶模組 multi_gpu_model，此方式為資料並行的方式，將將目標模型在多個裝置上各複製一份，並使用每個裝置上的複製品處理整個資料集的不同部分資料，最高支援在8片GPU上並行。使用方式： from keras.utils imp

tensorflow 多gpu訓練

當使用多個gpu訓練時,輸入資料為batch_size*num_gpu,這樣模型訓練時間可以大大較小. tensorflow中使用制定gpu可以通過tf.device()實現.例如我想使用0號顯示卡: gpu_ind=0 with tf.device("/g

『TensorFlow』分布式訓練_其二_多GPU並行demo分析（待續）

print all set represent proto copyright keys 20M runners 建議比對『MXNet』第七彈_多GPU並行程序設計 models/tutorials/image/cifar10/cifer10_multi_gpu-trai

pytorch使用指定GPU訓練

本文適合多GPU的機器，並且每個使用者需要單獨使用GPU訓練。雖然pytorch提供了指定gpu的幾種方式，但是使用不當的話會遇到out of memory的問題，主要是因為pytorch會在第0塊gpu上初始化，並且會佔用一定空間的視訊記憶體。這種情況下，經常會出現指定的gpu明明是

TensorFlow 訓練 MNIST （2）—— 多層神經網路

　　在我的上一篇隨筆中，採用了單層神經網路來對MNIST進行訓練，在測試集中只有約90%的正確率。這次換一種神經網路（多層神經網路）來進行訓練和測試。 1、獲取MNIST資料　　MNIST資料集只要一行程式碼就可以獲取的到，非常方便。關於MNIST的基本資訊可以參考我的上一篇隨筆。 mnist = i

tensorflow1.12 多GPU協同訓練報錯tensorflow.python.framework.errors_impl.NotFoundError: libnccl.so.2

tensroflow為了提高多模型訓練速度，需要多個GPU同時工作，而且我們一般使用的工作站都是8塊tesla K80，如果能將8塊顯示卡的計算力充分利用起來，將會大大提高模型訓練的速度，縮短模型訓練時間。這幾天看到tensorflow的mor

pytorch DataParallel 多GPU使用

單GPU： import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" 多GPU： device_ids = [0,1,2,3] model

Pytorch入門例項：mnist分類訓練

#!/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'denny' __time__ = '2017-9-9 9:03' import torch import torchvision from torch.autograd

Caffe訓練mnist資料遇到的問題（GPU版）

caffe在make所有階段沒有問題，但是在mnist訓練出錯，Cannot create Cublas handle. Cublas won't be available. 這個錯誤一句話總結是在執行過程中遇到的許可權問題。解決方法有幾種： 1、修改無法執行檔案

pytorch使用多GPU訓練MNIST

相關推薦