PyTorch之LeNet-5:利用PyTorch實現最經典的LeNet-5對手寫數字圖片識別
PyTorch之LeNet-5:利用PyTorch實現最經典的LeNet-5卷積神經網路對手寫數字圖片識別CNN
目錄
訓練過程
程式碼設計
訓練過程
程式碼設計
#PyTorch:利用PyTorch實現最經典的LeNet卷積神經網路對手寫數字進行識別CNN——Jason niu
import torch
import torch。nn as nn
import torch。optim as optim
class LeNet(nn。Module):
def __init__(self):
super(LeNet,self)。__init__()
#Conv1 和 Conv2:卷積層,每個層輸出在卷積核(小尺寸的權重張量)和同樣尺寸輸入區域之間的點積;
self。conv1 = nn。Conv2d(1,10,kernel_size=5)
self。conv2 = nn。Conv2d(10,20,kernel_size=5)
self。conv2_drop = nn。Dropout2d()
self。fc1 = nn。Linear(320,50)
self。fc2 = nn。Linear(50,10)
def forward(self,x):
x = F。relu(F。max_pool2d(self。conv1(x),2)) #使用 max 運算執行特定區域的下采樣(通常 2x2 畫素);
x = F。relu(F。max_pool2d(self。conv2_drop(self。conv2(x)),2))
x = x。view(-1, 320)
x = F。relu(self。fc1(x)) #修正線性單元函式,使用逐元素的啟用函式 max(0,x);
x = F。dropout(x, training=self。training) #Dropout2D隨機將輸入張量的所有通道設為零。當特徵圖具備強相關時,dropout2D 提升特徵圖之間的獨立性;
x = self。fc2(x)
return F。log_softmax(x, dim=1) #將 Log(Softmax(x)) 函式應用到 n 維輸入張量,以使輸出在 0 到 1 之間。
#建立 LeNet 類後,建立物件並移至 GPU
model = LeNet()
criterion = nn。CrossEntropyLoss()
optimizer = optim。SGD(model。parameters(),lr = 0。005, momentum = 0。9) #要訓練該模型,我們需要使用帶動量的 SGD,學習率為 0。01,momentum 為 0。5。
import os
from torch。autograd import Variable
import torch。nn。functional as F
cuda_gpu = torch。cuda。is_available()
def train(model, epoch, criterion, optimizer, data_loader):
model。train()
for batch_idx, (data, target) in enumerate(data_loader):
if cuda_gpu:
data, target = data。cuda(), target。cuda()
model。cuda()
data, target = Variable(data), Variable(target)
output = model(data)
optimizer。zero_grad()
loss = criterion(output, target)
loss。backward()
optimizer。step()
if (batch_idx+1) % 400 == 0:
print(‘Train Epoch: {} [{}/{} ({:。0f}%)]\tLoss: {:。6f}’。format(
epoch, (batch_idx+1) * len(data), len(data_loader。dataset),
100。 * (batch_idx+1) / len(data_loader), loss。data[0]))
from torchvision import datasets, transforms
batch_num_size = 64
train_loader = torch。utils。data。DataLoader(
datasets。MNIST(‘data’,train=True, download=True, transform=transforms。Compose([
transforms。ToTensor(),
transforms。Normalize((0。1307,), (0。3081,))
])),
batch_size=batch_num_size, shuffle=True)
test_loader = torch。utils。data。DataLoader(
datasets。MNIST(‘data’,train=False, transform=transforms。Compose([
transforms。ToTensor(),
transforms。Normalize((0。1307,), (0。3081,))
])),
batch_size=batch_num_size, shuffle=True)
def test(model, epoch, criterion, data_loader):
model。eval()
test_loss = 0
correct = 0
for data, target in data_loader:
if cuda_gpu:
data, target = data。cuda(), target。cuda()
model。cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target)。data[0]
pred = output。data。max(1)[1] # get the index of the max log-probability
correct += pred。eq(target。data)。cpu()。sum()
test_loss /= len(data_loader) # loss function already averages over batch size
acc = correct / len(data_loader。dataset)
print(‘\nTest set: Average loss: {:。4f}, Accuracy: {}/{} ({:。0f}%)\n’。format(
test_loss, correct, len(data_loader。dataset), 100。 * acc))
return (acc, test_loss)
epochs = 5 #僅僅需要 5 個 epoch(一個 epoch 意味著你使用整個訓練資料集來更新訓練模型的權重),就可以訓練出一個相當準確的 LeNet 模型。
#這段程式碼檢查可以確定檔案中是否已有預訓練好的模型。有則載入;無則訓練一個並儲存至磁碟。
if (os。path。isfile(‘pretrained/MNIST_net。t7’)):
print (‘Loading model’)
model。load_state_dict(torch。load(‘pretrained/MNIST_net。t7’, map_location=lambda storage, loc: storage))
acc, loss = test(model, 1, criterion, test_loader)
else:
print (‘Training model’) #打印出該模型的資訊。列印函式顯示所有層(如 Dropout 被實現為一個單獨的層)及其名稱和引數。
for epoch in range(1, epochs + 1):
train(model, epoch, criterion, optimizer, train_loader)
acc, loss = test(model, 1, criterion, test_loader)
torch。save(model。state_dict(), ‘pretrained/MNIST_net。t7’)
print (type(t。cpu()。data))#以使用 。cpu() 方法將張量移至 CPU(或確保它在那裡)。
#或當 GPU 可用時(torch。cuda。 可用),使用 。cuda() 方法將張量移至 GPU。你可以看到張量是否在 GPU 上,其型別為 torch。cuda。FloatTensor。
#如果張量在 CPU 上,則其型別為 torch。FloatTensor。
if torch。cuda。is_available():
print (“Cuda is available”)
print (type(t。cuda()。data))
else:
print (“Cuda is NOT available”)
if torch。cuda。is_available():
try:
print(t。data。numpy())
except RuntimeError as e:
“you can‘t transform a GPU tensor to a numpy nd array, you have to copy your weight tendor to cpu and then get the numpy array”
print(type(t。cpu()。data。numpy()))
print(t。cpu()。data。numpy()。shape)
print(t。cpu()。data。numpy())
data = model。conv1。weight。cpu()。data。numpy()
print (data。shape)
print (data[:, 0]。shape)
kernel_num = data。shape[0]
fig, axes = plt。subplots(ncols=kernel_num, figsize=(2*kernel_num, 2))
for col in range(kernel_num):
axes[col]。imshow(data[col, 0, :, :], cmap=plt。cm。gray)
plt。show()
相關文章
LeNet-5 is our latest convolutional network designed for handwritten and machine-printed character recognition。