当前位置：网站首页>Pytoch realizes multi-layer perceptron manually

Pytoch realizes multi-layer perceptron manually

2022-07-19 10:50:00 【phac123】

sketch

Getting and reading data ; Here we still use Fashion-MNIST Data sets
Define model parameters , The input layer here is 28*28, Set hidden layer to 256, The output layer is 10
Defining models , Activate function using ReLU, Then go through a linear layer
Define the loss function , Using the cross entropy loss function
Finally, small batch random gradient descent is used for training optimization

notes : because SoftmaxCrossEntropyLoss At the time of back propagation, relative to batch Dimensional sum , and PyTorch The default is to average , So use PyTorch Calculated loss Than mxnet Many small （ Probably maxnet Calculated 1/batch_size This magnitude ）, So the gradient obtained by back propagation is much smaller , So in order to get the same learning effect , We set the learning rate to 100.0.( Why it's so big , I think it's because d2lzh_pytorch Inside sgd When the function is updated, it is divided by batch_size, Actually PyTorch In the calculation loss It's been removed once when I was ,sgd There should be no need here except )

Complete code

d2lzh_pytorch.py

import random
from IPython import display
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import sys
import torch.nn as nn


def use_svg_display():
    #  It's shown in vector form 
    display.set_matplotlib_formats('svg')

def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    #  Set the size of the drawing 
    plt.rcParams['figure.figsize'] = figsize

''' Given batch_size, feature, labels, Scramble the data and generate a data set of a specified size '''
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size): #(start, staop, step)
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # There may not be one last time batch
        yield features.index_select(0, j), labels.index_select(0, j)

''' Define the model of linear regression '''
def linreg(X, w, b):
    return torch.mm(X, w) + b

''' Define the loss function of linear regression '''
def squared_loss(y_hat, y):
    return (y_hat - y.view(y_hat.size())) ** 2 / 2

''' Optimization algorithm of linear regression  ——  Small batch random gradient descent method '''
def sgd(params, lr, batch_size):
    for param in params:
        param.data -= lr * param.grad / batch_size # What we use here is param.data

'''MINIST, You can convert numeric labels into corresponding text labels '''
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

''' Define a function that can draw multiple images and corresponding labels in one line '''
def show_fashion_mnist(images, labels):
    use_svg_display()
    #  there _ Means we ignore （ Don't use ） The variable of 
    _, figs = plt.subplots(1, len(images), figsize=(12, 12))
    for f, img, lbl in zip(figs, images, labels):
        f.imshow(img.view((28, 28)).numpy())
        f.set_title(lbl)
        f.axes.get_xaxis().set_visible(False)
        f.axes.get_yaxis().set_visible(False)
    plt.show()

''' Get and read Fashion-MNIST Data sets ; This function will return train_iter and test_iter Two variables '''
def load_data_fashion_mnist(batch_size):
    mnist_train = torchvision.datasets.FashionMNIST(root='Datasets/FashionMNIST', train=True, download=True,
                                                    transform=transforms.ToTensor())
    mnist_test = torchvision.datasets.FashionMNIST(root='Datasets/FashionMNIST', train=False, download=True,
                                                   transform=transforms.ToTensor())
    if sys.platform.startswith('win'):
        num_workers = 0  # 0 It means that there is no extra process to speed up reading data 
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

''' Evaluation model net In the data set data_iter The accuracy of '''
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

''' Training models ,softmax'''
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()

            #  Gradient clear 
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step()

            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

''' Yes x Shape conversion '''
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0], -1)

main.py

import torch
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

#  Getting and reading data 
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

#  Define model parameters 
num_inputs, num_hiddens, num_outputs = 784, 256, 10
w1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
w2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)
params = [w1, b1, w2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)

#  Definition ReLU function 
def relu(X):
    return torch.max(input = X, other = torch.tensor(0.0))

#  Defining models 
def net(X):
    X = X.view(-1, num_inputs)
    H = relu(torch.mm(X, w1) + b1)
    return torch.mm(H, w2) + b2

#  Define the loss function 
loss = torch.nn.CrossEntropyLoss()

#  Training models 
num_epochs, lr = 5, 100.0
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)