學習使用pytorch實現LeNet、AlexNet、LSTM、BiLSTM、CNN-LSTM

作者：由 killerray 發表于書法時間：2021-07-27

使用pytorhch自定義LeNet、AlexNet、BiLSTM、CNN-LSTM模型處理識別MNIST資料集中的手寫數字。完整的程式碼實現放在github

模型定義

LeNet和AlexNet就是用於處理影象的，比較好理解。

BiLSTM處理MNIST相當於把影象轉換成時序資料；28*28，可以理解為28個時間點，每個時間點的資料28維；也可以理解為一句話28個詞，每個詞向量28維。

學習實現CNN-LSTM模型是想用於影片資料處理的，一段影片可以抽樣成若干幀圖片，每幀圖片用CNN提取特徵，然後再把從若干幀圖片中提取出來的特徵輸入到LSTM。沒找到的大小合適的資料集，只能把MNIST資料集中的一張圖片強行認為是隻有一幀的一段影片。

參考Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification 和相關實現定義AttBiLSTM實現LSTM+Attention。

#！/usr/bin/python

# -*- coding： utf-8 -*-

import

torch

from

torch

import

torch。nn。functional

# 定義單雙向LSTM 模型

class

Rnn

（

。

Module

）：

def

__init__

（

self

，

in_dim

，

hidden_dim

，

n_layer

，

n_class

，

bidirectional

）：

super

（

Rnn

，

self

）

。

__init__

（）

self

。

n_layer

self

。

hidden_dim

self

。

bidirectional

self

。

lstm

。

LSTM

（

in_dim

，

hidden_dim

，

n_layer

，

batch_first

True

，

bidirectional

）

self

。

bidirectional

：

self

。

classifier

。

Linear

（

hidden_dim

，

n_class

）

else

：

self

。

classifier

。

Linear

（

hidden_dim

，

n_class

）

def

forward

（

self

，

）：

out

，

（

，

）

self

。

lstm

（

）

self

。

bidirectional

：

out

torch

。

hstack

（（

［

，

：，

：］，

［

，

：，

：］））

else

：

out

［：，

，

：］

out

self

。

classifier

（

out

）

return

out

class

Attention

（

。

Module

）：

def

__init__

（

self

，

rnn_size

：

int

）：

super

（

Attention

，

self

）

。

__init__

（）

self

。

Linear

（

rnn_size

，

）

self

。

tanh

。

Tanh

（）

self

。

softmax

。

Softmax

（

dim

）

def

forward

（

self

，

）：

# eq。9： M = tanh（H）

self

。

tanh

（

）

# （batch_size， word_pad_len， rnn_size）

# eq。10： α = softmax（w^T M）

alpha

self

。

（

）

。

squeeze

（

）

# （batch_size， word_pad_len）

alpha

self

。

softmax

（

alpha

）

# （batch_size， word_pad_len）

# eq。11： r = H

alpha

。

unsqueeze

（

）

# （batch_size， word_pad_len， rnn_size）

。

sum

（

dim

）

# （batch_size， rnn_size）

return

，

alpha

class

AttBiLSTM

（

。

Module

）：

def

__init__

（

self

，

n_classes

：

int

，

emb_size

：

int

，

rnn_size

：

int

，

rnn_layers

：

int

，

dropout

：

float

）：

super

（

AttBiLSTM

，

self

）

。

__init__

（）

self

。

rnn_size

# bidirectional LSTM

self

。

BiLSTM

。

LSTM

（

emb_size

，

rnn_size

，

num_layers

rnn_layers

，

bidirectional

True

，

batch_first

True

）

self

。

attention

Attention

（

rnn_size

）

self

。

Linear

（

rnn_size

，

n_classes

）

self

。

tanh

。

Tanh

（）

self

。

dropout

。

Dropout

（

dropout

）

def

forward

（

self

，

）：

rnn_out

，

self

。

BiLSTM

（

）

rnn_out

［：，

：，

：

self

。

rnn_size

］

rnn_out

［：，

：，

self

。

rnn_size

：］

# attention module

，

alphas

self

。

attention

（

）

# （batch_size， rnn_size），（batch_size， word_pad_len）

# eq。12： h* = tanh（r）

self

。

tanh

（

）

# （batch_size， rnn_size）

scores

self

。

（

self

。

dropout

（

））

# （batch_size， n_classes）

return

scores

# 定義AlexNet

class

AlexNet

（

。

Module

）：

def

__init__

（

self

，

num_classes

）：

super

（

AlexNet

，

self

）

。

__init__

（）

self

。

features

。

Sequential

（

。

Conv2d

（

，

kernel_size

（

，

），

padding

（

，

）），

。

ReLU

（

inplace

True

），

。

MaxPool2d

（

kernel_size

，

stride

），

。

Conv2d

（

，

kernel_size

（

，

），

stride

（

，

），

padding

（

，

）），

。

ReLU

（

inplace

True

），

。

MaxPool2d

（

kernel_size

，

stride

），

。

Conv2d

（

，

128

，

kernel_size

（

，

），

stride

（

，

），

padding

（

，

）），

。

ReLU

（

inplace

True

），

。

Conv2d

（

128

，

256

，

kernel_size

（

，

），

stride

（

，

），

padding

（

，

）），

。

ReLU

（

inplace

True

），

。

Conv2d

（

256

，

256

，

kernel_size

（

，

），

stride

（

，

），

padding

（

，

）），

。

ReLU

（

inplace

True

），

。

MaxPool2d

（

kernel_size

，

stride

）

self

。

classifier

。

Sequential

（

。

Dropout

（

0。5

），

。

Linear

（

256

，

1024

），

。

ReLU

（

inplace

True

），

。

Dropout

（

0。5

），

。

Linear

（

1024

，

512

），

。

ReLU

（

inplace

True

），

。

Linear

（

512

，

num_classes

）

def

forward

（

self

，

）：

self

。

features

（

）

torch

。

flatten

（

，

start_dim

）

self

。

classifier

（

）

return

class

LeNet

（

。

Module

）：

def

__init__

（

self

，

num_classes

）：

super

（

LeNet

，

self

）

。

__init__

（）

self

。

features

。

Sequential

（

。

Conv2d

（

，

kernel_size

（

，

），

stride

（

，

），

padding

（

，

）），

。

ReLU

（

inplace

True

），

。

MaxPool2d

（

，

），

。

Conv2d

（

，

kernel_size

（

，

），

stride

（

，

）），

。

ReLU

（

inplace

True

），

。

MaxPool2d

（

，

））

self

。

classifier

。

Sequential

（

。

Linear

（

，

120

），

。

Linear

（

120

，

），

。

Linear

（

，

num_classes

））

def

forward

（

self

，

）：

self

。

features

（

）

。

view

（

，

）

self

。

classifier

（

）

return

class

LeNetVariant

（

。

Module

）：

def

__init__

（

self

）：

super

（

LeNetVariant

，

self

）

。

__init__

（）

self

。

features

。

Sequential

（

。

Conv2d

（

，

kernel_size

（

，

），

stride

（

，

），

padding

（

，

）），

。

ReLU

（

inplace

True

），

。

MaxPool2d

（

，

），

。

Conv2d

（

，

kernel_size

（

，

），

stride

（

，

）），

。

ReLU

（

inplace

True

），

。

MaxPool2d

（

，

））

self

。

classifier

。

Sequential

（

。

Linear

（

，

120

），

。

Linear

（

120

，

））

def

forward

（

self

，

）：

self

。

features

（

）

。

view

（

，

）

self

。

classifier

（

）

return

class

CNNLSTM

（

。

Module

）：

def

__init__

（

self

，

num_classes

）：

super

（

CNNLSTM

，

self

）

。

__init__

（）

self

。

cnn

LeNetVariant

（）

self

。

lstm

。

LSTM

（

input_size

，

hidden_size

128

，

num_layers

，

batch_first

True

）

self

。

fc1

。

Linear

（

128

，

num_classes

）

def

forward

（

self

，

x_3d

）：

cnn_output_list

list

（）

for

range

（

x_3d

。

size

（

））：

cnn_output_list

。

append

（

self

。

cnn

（

x_3d

［：，

，

：，

：］））

torch

。

stack

（

tuple

（

cnn_output_list

），

dim

）

out

，

hidden

self

。

lstm

（

）

out

［：，

，

：］

。

relu

（

）

self

。

fc1

（

）

return

訓練&驗證

這部分基本照搬pytorch官網Tutorial的Quickstart的程式碼。

#！/usr/bin/python

# -*- coding： utf-8 -*-

import torch

def train_loop（dataloader， model， loss_fn， optimizer， which_model）：

size = len（dataloader。dataset）

for batch，（X， y） in enumerate（dataloader）：

if which_model == 1：

X = X。squeeze（1）

elif which_model == 2：

X = X。unsqueeze（1）

else：

pass

# Compute prediction and loss

pred = model（X）

loss = loss_fn（pred， y）

# Backpropagation

optimizer。zero_grad（）

loss。backward（）

optimizer。step（）

if batch % 100 == 0：

loss， current = loss。item（）， batch * len（X）

print（f“loss： {loss：>7f} ［{current：>5d}/{size：>5d}］”）

def test_loop（dataloader， model， loss_fn， which_model）：

size = len（dataloader。dataset）

num_batches = len（dataloader）

test_loss， correct = 0， 0

with torch。no_grad（）：

for X， y in dataloader：

if which_model == 1：

X = X。squeeze（1）

elif which_model == 2：

X = X。unsqueeze（1）

else：

pass

pred = model（X）

test_loss += loss_fn（pred， y）。item（）

correct += （pred。argmax（1） == y）。type（torch。float）。sum（）。item（）

test_loss /= num_batches

correct /= size

print（f“Test Error： \n Accuracy： {（100 * correct）：>0。1f}%， ”

f“Avg loss： {test_loss：>8f} \n”）

自定義資料集載入類

# -*- coding： utf-8 -*-

import os

from torch。utils。data。dataset import Dataset

import torch

import numpy as np

class CustomDataset（Dataset）：

def __init__（self， annotations_file， sample_dir， sample_size，

tensor_shape）：

self。annotations_file = annotations_file

self。labels = self。read_label_file（）

self。sample_dir = sample_dir

self。sample_size = sample_size

self。tensor_shape = tensor_shape

def __len__（self）：

return len（self。labels）

def __getitem__（self， idx）：

sample_path = os。path。join（self。sample_dir，

self。labels［idx］［0］）

sample_tensor = self。transform（sample_path， self。sample_size，

self。tensor_shape）

label = self。labels［idx］［1］

label = torch。tensor（self。target_transform（label））

return sample_tensor， label

def read_label_file（self）：

label_list = list（）

with open（self。annotations_file， ‘r’） as h：

while True：

line = h。readline（）

if not line：

break

label_list。append（

line。strip（“ ”）。strip（“\r”）。strip（“\n”）。split（“，”））

return label_list

def target_transform（self， label）：

label_num_mapping = {“neg”： 0， “pos”： 1}

return label_num_mapping［label］

def transform（self， sample_path， sample_size， tensor_shape）：

with open（sample_path， “rb”） as h：

content = h。read（）

content = np。frombuffer（content， dtype=np。uint8， offset=0）

if content。size < sample_size：

padding_len = sample_size - content。size

content = np。hstack（

（content， np。zeros（padding_len， dtype=np。uint8）））

elif content。size > sample_size：

content = content［0：sample_size］

return torch。reshape（torch。tensor（content）。type（torch。float），

tensor_shape）

感受

1、pytorch確實是很友好

2、把模型基本的數學原理搞明白，再去自定義或者修改模型就容易很多了。

參考

標簽： self nn size __ RNN

上一篇:全職媽媽提升學歷有意義嗎？

下一篇：淺談青金石優劣分類和等級劃分，如何挑選優質青金石？

學習使用pytorch實現LeNet、AlexNet、LSTM、BiLSTM、CNN-LSTM

猜你喜歡

17、生成器

屬性雷達圖UGUI渲染器

總結一些網路上可獲取的特徵神經網路結構

Python資料分析及視覺化例項之多執行緒、程序

ResNet王者歸來：ImageNet上重新整理到80.7！