当前位置:网站首页>Pytoch realizes multi-layer perceptron manually
Pytoch realizes multi-layer perceptron manually
2022-07-19 10:50:00 【phac123】
sketch
- Getting and reading data ; Here we still use Fashion-MNIST Data sets
- Define model parameters , The input layer here is 28*28, Set hidden layer to 256, The output layer is 10
- Defining models , Activate function using ReLU, Then go through a linear layer
- Define the loss function , Using the cross entropy loss function
- Finally, small batch random gradient descent is used for training optimization
- notes : because SoftmaxCrossEntropyLoss At the time of back propagation, relative to batch Dimensional sum , and PyTorch The default is to average , So use PyTorch Calculated loss Than mxnet Many small ( Probably maxnet Calculated 1/batch_size This magnitude ), So the gradient obtained by back propagation is much smaller , So in order to get the same learning effect , We set the learning rate to 100.0.( Why it's so big , I think it's because d2lzh_pytorch Inside sgd When the function is updated, it is divided by batch_size, Actually PyTorch In the calculation loss It's been removed once when I was ,sgd There should be no need here except )
Complete code
d2lzh_pytorch.py
import random
from IPython import display
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import sys
import torch.nn as nn
def use_svg_display():
# It's shown in vector form
display.set_matplotlib_formats('svg')
def set_figsize(figsize=(3.5, 2.5)):
use_svg_display()
# Set the size of the drawing
plt.rcParams['figure.figsize'] = figsize
''' Given batch_size, feature, labels, Scramble the data and generate a data set of a specified size '''
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size): #(start, staop, step)
j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # There may not be one last time batch
yield features.index_select(0, j), labels.index_select(0, j)
''' Define the model of linear regression '''
def linreg(X, w, b):
return torch.mm(X, w) + b
''' Define the loss function of linear regression '''
def squared_loss(y_hat, y):
return (y_hat - y.view(y_hat.size())) ** 2 / 2
''' Optimization algorithm of linear regression —— Small batch random gradient descent method '''
def sgd(params, lr, batch_size):
for param in params:
param.data -= lr * param.grad / batch_size # What we use here is param.data
'''MINIST, You can convert numeric labels into corresponding text labels '''
def get_fashion_mnist_labels(labels):
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
''' Define a function that can draw multiple images and corresponding labels in one line '''
def show_fashion_mnist(images, labels):
use_svg_display()
# there _ Means we ignore ( Don't use ) The variable of
_, figs = plt.subplots(1, len(images), figsize=(12, 12))
for f, img, lbl in zip(figs, images, labels):
f.imshow(img.view((28, 28)).numpy())
f.set_title(lbl)
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
plt.show()
''' Get and read Fashion-MNIST Data sets ; This function will return train_iter and test_iter Two variables '''
def load_data_fashion_mnist(batch_size):
mnist_train = torchvision.datasets.FashionMNIST(root='Datasets/FashionMNIST', train=True, download=True,
transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='Datasets/FashionMNIST', train=False, download=True,
transform=transforms.ToTensor())
if sys.platform.startswith('win'):
num_workers = 0 # 0 It means that there is no extra process to speed up reading data
else:
num_workers = 4
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
return train_iter, test_iter
''' Evaluation model net In the data set data_iter The accuracy of '''
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
''' Training models ,softmax'''
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y).sum()
# Gradient clear
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
sgd(params, lr, batch_size)
else:
optimizer.step()
train_l_sum += l.item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
''' Yes x Shape conversion '''
class FlattenLayer(nn.Module):
def __init__(self):
super(FlattenLayer, self).__init__()
def forward(self, x):
return x.view(x.shape[0], -1)
main.py
import torch
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
# Getting and reading data
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# Define model parameters
num_inputs, num_hiddens, num_outputs = 784, 256, 10
w1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
w2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)
params = [w1, b1, w2, b2]
for param in params:
param.requires_grad_(requires_grad=True)
# Definition ReLU function
def relu(X):
return torch.max(input = X, other = torch.tensor(0.0))
# Defining models
def net(X):
X = X.view(-1, num_inputs)
H = relu(torch.mm(X, w1) + b1)
return torch.mm(H, w2) + b2
# Define the loss function
loss = torch.nn.CrossEntropyLoss()
# Training models
num_epochs, lr = 5, 100.0
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
边栏推荐
- SAP ABAP CDS view 视图的 Replacement 技术介绍
- Through middle order traversal and pre order traversal, the subsequent traversal will always build a binary tree
- Openfoam heat flow boundary condition
- Bidirectional NAT Technology
- Win10安装Apache Jena 3.17
- 军品研制过程所需文件-进阶版
- JSP based novel writing and creation website
- 最大半连通子图(tarjan缩点+拓扑排序+dp最长链)
- 二叉树刷题(二)
- Analysis and solution of application jar package conflict in yarn environment
猜你喜欢
C serialport configuration and attribute understanding
Structure the combat battalion | module 7
Google Earth Engine——Hansen Global Forest Change v1.8 (2000-2020) 森林覆盖度和森林损失量数据集
High number_ Chapter 1 space analytic geometry and vector algebra__ Distance from point to plane
【在vivado中调ila IP核】
论文笔记:Mind the Gap An Experimental Evaluation of Imputation ofMissing Values Techniques in TimeSeries
Use testeract JS offline recognition picture text record
vulnhub inclusiveness: 1
Journal日志与oplog日志的区别
"Baidu side" angrily sprayed the interviewer! Isn't it that the tree time increases by a line number?
随机推荐
LeetCode 2249. Count the number of grid points in the circle
欧拉角,轴角,四元数与旋转矩阵详解
6G全域融合网络展望
高数__方程与函数的关系
连通图(并查集)
Journal日志与oplog日志的区别
(一)了解MySQL
Map遍历 key-value 的4种方法
Thinking about the integrated communication of air, space and earth based on the "7.20 Zhengzhou rainstorm"
[makefile] some notes on the use of makefile
因果学习将开启下一代AI浪潮?九章云极DataCanvas正式发布YLearn因果学习开源项目
Pytorch手动实现多层感知机
Thread pool principle
Avi 部署使用指南(2):Avi 架构概述
如何在双链笔记软件中建立仪表盘和知识库?以嵌入式小组件库 NotionPet 为例
Takeout ordering system based on wechat applet
Leetcode丑数题解
使用tesseract.js-offline识别图片文字记录
可定义的6G安全架构
【设计过程】.NET ORM FreeSql WhereDynamicFilter 动态表格查询功能