【面试题必考题】从零实现神经网络的梯度反向传播算法
2021/9/5 12:06:21
本文主要是介绍【面试题必考题】从零实现神经网络的梯度反向传播算法,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
神经网络的训练就是梯度反向传播的过程,也是面试的时候手撕的重要考点之一!
下面我搭建了两层神经网络,使用sigmoid激活函数,具体的公式推导就忽略了,但是要注意的是,记住公式是最为关键的。
import numpy as np np.random.seed(17) def sigmoid(x): return 1 / (1 + np.exp(-x)) def sigmoid_grad(x): return (1.0 - sigmoid(x)) * sigmoid(x) def softmax(x): if x.ndim == 2: x = x.T x = x - np.max(x, axis=0) y = np.exp(x) / np.sum(np.exp(x), axis=0) return y.T x = x - np.max(x) # 溢出对策 return np.exp(x) / np.sum(np.exp(x)) # 搭建两层全连接神经网络,使用sigmoid激活函数,完成10分类任务 class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # 初始化权重 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) def predict(self, x): W1, W2 = self.params['W1'], self.params['W2'] b1, b2 = self.params['b1'], self.params['b2'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 y = softmax(a2) return y # x:输入数据, t:监督数据 def loss(self, x, t): y = self.predict(x) LOSS = (1/y.shape[0])*np.sum(-t * np.log(y) - (1-t) * np.log(1-y)) return LOSS def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # x:输入数据, t:监督数据 def gradient(self, x, t): W1, W2 = self.params['W1'], self.params['W2'] b1, b2 = self.params['b1'], self.params['b2'] grads = {} batch_num = x.shape[0] # forward a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 y = softmax(a2) # backward dy = (y - t) / batch_num # 输出层反向误差 grads['W2'] = np.dot(z1.T, dy) grads['b2'] = np.sum(dy, axis=0) da1 = np.dot(dy, W2.T) # 隐层反向传播误差 dz1 = sigmoid_grad(a1) * da1 grads['W1'] = np.dot(x.T, dz1) grads['b1'] = np.sum(dz1, axis=0) return grads # 数据集:随机的初始的数据集,输入为10分类,数据集大小是3万 x_train = np.random.randn(30000, 784) t_train = np.random.randint(0, 2, size=(x_train.shape[0], 10)) train_size = x_train.shape[0] batch_size = 512 # 任务1:梯度检查 # 梯度检查 net = TwoLayerNet(input_size=784, hidden_size=80, output_size=10) grad = net.gradient(x_train, t_train) print("-------梯度检查---------") print(grad["W1"].shape) print(grad["b1"].shape) print(grad["W2"].shape) print(grad["b2"].shape) # 任务2:模型训练 if __name__ == "__main__": net = TwoLayerNet(input_size=784, hidden_size=80, output_size=10) batch_size = 512 learning_rate = 0.01 iters = 1000 # 适当设定循环的次数 loss_history = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = net.gradient(x_batch, t_batch) # 更新参数 for key in ('W1', 'b1', 'W2', 'b2'): net.params[key] -= learning_rate * grad[key] loss = net.loss(x_batch, t_batch) if (i+1) % 100 == 0: loss_history.append(round(loss, 4)) print("iterrs:%d, loss:%.4f" % (i+1, loss)) print(loss_history)
参考文献:
斋藤康毅:深度学习入门_ 基于Python的理论与实现
这篇关于【面试题必考题】从零实现神经网络的梯度反向传播算法的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-11-23Springboot应用的多环境打包入门
- 2024-11-23Springboot应用的生产发布入门教程
- 2024-11-23Python编程入门指南
- 2024-11-23Java创业入门:从零开始的编程之旅
- 2024-11-23Java创业入门:新手必读的Java编程与创业指南
- 2024-11-23Java对接阿里云智能语音服务入门详解
- 2024-11-23Java对接阿里云智能语音服务入门教程
- 2024-11-23JAVA对接阿里云智能语音服务入门教程
- 2024-11-23Java副业入门:初学者的简单教程
- 2024-11-23JAVA副业入门:初学者的实战指南