【笔记】Caffe全连接层源码(inner product layer):区别于tensorflow中的fullyconnected_layer
2021/11/30 14:08:26
本文主要是介绍【笔记】Caffe全连接层源码(inner product layer):区别于tensorflow中的fullyconnected_layer,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
caffe的官方文档,介绍如下:
那么,它究竟做了什么那?
假设conv2的输入是256*27*27,那么conv2的输出即50*22*22,conv2的输入即pool2的输入,pool2的输出为50*11*11,即ip1的输入,ip1的输出为500*1*1,那么pool2->ip1的参数个数是多少呢?这里就要理解好什么是fully_connected了,即wTx,x为列向量,w的长度与x相同。在本文的例子中x的维度为50*11*11,那么pool2->ip1的参数个数为500*50*11*11 。50*11*11即是一个有50个通道大小为11*11的图片,那么在做完全卷积的时候,需要把对所有通道一起作卷积,即把图片转化成一个50*11*11的向量上段话转自http://www.cnblogs.com/dupuleng/articles/4312149.html
我们再来看一下它的头文件,其中有以下参数:
同大部分layer层一样,它也必须实现setup、reshape、Forward_cpu、Backward_cpu。
* 其中,setup定义参数:M_样本个数、K_单个样本特征长度、N_全连接之后神经元的个数。
* Forward_cpu,主要是计算y=W’*x + b, X表示输入,y表示输出 。x为输入,维度 M_*K_ 、 y为输出,维度 M_*N_ 、W为权重,维度 N_*K_, W_diff权重的梯度维度也为N_*K_ 、 b为偏置,维度 N_*1_ 。
* Backward_cpu:反向传播就是在更新w、b,计算delta。
我们来具体看一下代码:
#include <vector> #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" #include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { /* 输入层:(M_, N_, 1, 1); 输出层: (M_, K_, 1, 1); W矩阵:(N_,K_,1,1); b矩阵:(N_,1,1,1); M_样本个数,K_单个样本特征长度,N_全连接之后神经元的个数。 */ template <typename Dtype> void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //通过读取配置proto文件获得输出神经元的个数及是否使用偏置项 const int num_output = this->layer_param_.inner_product_param().num_output(); bias_term_ = this->layer_param_.inner_product_param().bias_term(); //全连接之后输出的神经元的个数 N_ = num_output; //全连接层输出的Blob维数为样本的个数*输出神经元的个数*1*1(M*N) //这里axis=1,即从C开始展开,即,CHW //输出:n_1 * (c_1 + c_2 + ... + c_K) * h * w const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); // Dimensions starting from "axis" are "flattened" into a single // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), // and axis == 1, N inner products with dimension CHW are performed. //表示单个样本的特征长度, K_ = bottom[0]->count(axis); // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; } else { //如果配置文件使用偏置项,则开辟2个Blob类智能指针,否则开辟一个 if (bias_term_) { this->blobs_.resize(2); } else { this->blobs_.resize(1); } // Intialize the weight vector<int> weight_shape(2); //lobs_[0]指向权重矩阵,blobs_[1]指向偏置矩阵 ,全连接层,形状为N_*K_*1*1 weight_shape[0] = N_; weight_shape[1] = K_; //新开辟一个Blob,指针返回给blobs_[0],weight_shape[2]为刚刚初始化的; this->blobs_[0].reset(new Blob<Dtype>(weight_shape)); // fill the weights //根据配置文件中的权重核( weight_filler )的类型初始化填充权重矩阵blobs_[0]; shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // If necessary, intiialize and fill the bias term //填充偏置矩阵blobs_[1],每个输出单元对应一个偏置,共N_个 if (bias_term_) { vector<int> bias_shape(1, N_); this->blobs_[1].reset(new Blob<Dtype>(bias_shape)); shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); } } // parameter initialization this->param_propagate_down_.resize(this->blobs_.size(), true); } template <typename Dtype> //一批次处理多个样本,在每一批次中权重矩阵与偏置矩阵是不变的 void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Figure out the dimensions const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); const int new_K = bottom[0]->count(axis); CHECK_EQ(K_, new_K) << "Input size incompatible with inner product parameters."; // The first "axis" dimensions are independent inner products; the total // number of these is M_, the product over these dimensions. M_ = bottom[0]->count(0, axis); // The top shape will be the bottom shape with the flattened axes dropped, // and replaced by a single axis with dimension num_output (N_). vector<int> top_shape = bottom[0]->shape(); top_shape.resize(axis + 1); top_shape[axis] = N_; top[0]->Reshape(top_shape); // Set up the bias multiplier if (bias_term_) { vector<int> bias_shape(1, M_); bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); } } // 计算y=W'*x + b, X表示输入,y表示输出 // x为输入,维度 M_*K_ // y为输出,维度 M_*N_ // W为权重,维度 N_*K_, W_diff权重的梯度维度也为N_*K_ // b为偏置,维度 N_*1_ template <typename Dtype> void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data();//内存中的权重矩阵是N*K //caffe_cpu_gemm, C←αA×B+βC,前两个参数控制A,B是否转置 //其中A(bottom_data)维度是M_xK_,B(weight')维度是K_xN_,C(top_data)维度为M_xN_ //最终 y = X*W', 维度为 M_xN_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1., bottom_data, weight, (Dtype)0., top_data); // 表示y= y + b (bias_multiplier维度为M_*1, b为1*N_(b实际上是N_*1,但是存储方式与1*N_等价, // top_data为M_*N_) // 实际是相当于将b复制成了M_*N_的矩阵,类似matlab的repmat(b, [M_, 1]),然后和top_data相加 if (bias_term_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., bias_multiplier_.cpu_data(), this->blobs_[1]->cpu_data(), (Dtype)1., top_data); } } template <typename Dtype> void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { //data传递的是数据,diff传递的是梯度,top_diff的维度是N*M,每一列代表一个样本的error term if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); // Gradient with respect to weight //A(top_diff'):N_*M_, B(bottom_data):M_*K_, C(W_diff):N_*K_ //W_diff = top_diff' * bottom_data caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff()); } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias // top_diff(M_*N_), bias_multiplier(M_*1), b_diff(N_1) // b_diff = top_diff' * bias_multiplier, 注意和gemm接口的区别 caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)0., this->blobs_[1]->mutable_cpu_diff()); } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bottom data // A(top_diff) M_*N_ , B(weight) N_*K_, C(bottom_diff) M_*K_ // bottom_diff = top_diff * weight caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } } #ifdef CPU_ONLY STUB_GPU(InnerProductLayer); #endif INSTANTIATE_CLASS(InnerProductLayer); REGISTER_LAYER_CLASS(InnerProduct); } // namespace caffe
这里有三点要介绍一下:
(1)axis的理解,这里,在官方文档中介绍如下:
这里写图片描述
在这里,axis=1,也就是说,我们从c开始暂开,这里n代表的是样本个数。
(2)这里涉及到一个函数,caffe_cpu_gemm,它的实现如下:
void caffe_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float* A, const float* B, const float beta, float* C) { int lda = (TransA == CblasNoTrans) ? K : M; int ldb = (TransB == CblasNoTrans) ? N : K; cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, N); }
也就是矩阵相乘后相加,功能: C(vetor)←alpba*AB+ beta*C. This function multiplies A * C (after transposing A, if needed) and multiplies the resulting matrix by alpha. It then multiplies vector C by beta. It stores the sum of these two products in vector C.
(3)caffe_cpu_gemv,是矩阵相乘,功能: C(vetor)←alpba*AB+ beta*C. This function multiplies A * C (after transposing A, if needed) and multiplies the resulting matrix by alpha. It then multiplies vector C by beta. It stores the sum of these two products in vector C.
附:
1、prototxt中的定义
layer { bottom: "fc7" top: "fc8" name: "fc8" type: "InnerProduct" param { # 权重学习参数 lr_mult: 10 # 学习率 decay_mult: 1 } param { # bias 学习参数 lr_mult: 20 # 一般情况,bias 学习率是权重学习率的两倍. decay_mult: 0 } inner_product_param { num_output: 1000 # 输出单元个数 weight_filler { # 权重初始化方法 type: "gaussian" std: 0.005 } bias_filler { # bias 初始化方法 type: "constant" value: 0.1 } } } 2、caffe.proto中的定义
message LayerParameter { optional InnerProductParameter inner_product_param = 117; } message InnerProductParameter { optional uint32 num_output = 1; // 网络层输出个数 optional bool bias_term = 2 [default = true]; // 是否有 bias 项 optional FillerParameter weight_filler = 3; // 权重weight filler optional FillerParameter bias_filler = 4; // 偏置bias filler // 在第一个 axis 进行单个内积计算. // -1 表示最后一个 axis optional int32 axis = 5 [default = 1]; //权重矩阵是否进行转置 optional bool transpose = 6 [default = false]; } 3、inner_product_layer.hpp
#ifndef CAFFE_INNER_PRODUCT_LAYER_HPP_ #define CAFFE_INNER_PRODUCT_LAYER_HPP_ #include <vector> #include "caffe/blob.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { /** * @brief Also known as a "fully-connected" layer, computes an inner product * with a set of learned weights, and (optionally) adds biases. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template <typename Dtype> class InnerProductLayer : public Layer<Dtype> { public: explicit InnerProductLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "InnerProduct"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); int M_; # 样本个数 int K_; # 特征维度 int N_; # 输出神经元个数 bool bias_term_; Blob<Dtype> bias_multiplier_; bool transpose_; ///< if true, assume transposed weights }; } // namespace caffe #endif // CAFFE_INNER_PRODUCT_LAYER_HPP_ 4、inner_product_layer.cpp
#include <vector> #include "caffe/filler.hpp" #include "caffe/layers/inner_product_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { template <typename Dtype> void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int num_output = this->layer_param_.inner_product_param().num_output(); // 输出单元个数 bias_term_ = this->layer_param_.inner_product_param().bias_term(); transpose_ = this->layer_param_.inner_product_param().transpose(); N_ = num_output; const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); // axis 维数据被 flattened 为长度为 K_ 的向量 // 例如,bottom[0] - (N, C, H, W), axis=1, 则从 C 维开始,对维度CHW 进行 N 个内积. // 输出: N x (C1 + C2 + ... + CK) x H x W // 样本个数 x 输出单元个数 x 1 x 1 (M x N x 1 x 1) K_ = bottom[0]->count(axis); // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; } else { if (bias_term_) { this->blobs_.resize(2); } else { this->blobs_.resize(1); } // 权重初始化 vector<int> weight_shape(2); if (transpose_) { // 权重矩阵是否进行转置 weight_shape[0] = K_; weight_shape[1] = N_; } else { weight_shape[0] = N_; weight_shape[1] = K_; } this->blobs_[0].reset(new Blob<Dtype>(weight_shape)); // 初始化权重 // blobs_[0],N_ x K_ x 1 x 1 shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // 如果有 bias 项,则初始化 // blobs_[1],每个输出对应一个 bias,共 N_ 个. if (bias_term_) { vector<int> bias_shape(1, N_); this->blobs_[1].reset(new Blob<Dtype>(bias_shape)); shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); } } // 参数初始化 this->param_propagate_down_.resize(this->blobs_.size(), true); } template <typename Dtype> void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Figure out the dimensions const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); const int new_K = bottom[0]->count(axis); CHECK_EQ(K_, new_K) << "Input size incompatible with inner product parameters."; // The first "axis" dimensions are independent inner products; the total // number of these is M_, the product over these dimensions. M_ = bottom[0]->count(0, axis); // 样本数,batchsize // The top shape will be the bottom shape with the flattened axes dropped, // and replaced by a single axis with dimension num_output (N_). vector<int> top_shape = bottom[0]->shape(); top_shape.resize(axis + 1); top_shape[axis] = N_; top[0]->Reshape(top_shape); // Set up the bias multiplier if (bias_term_) { vector<int> bias_shape(1, M_); bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); // 均设为 1 } } // 前向计算 // Y = W * x + b template <typename Dtype> void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans, M_, N_, K_, (Dtype)1., bottom_data, weight, (Dtype)0., top_data); if (bias_term_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., bias_multiplier_.cpu_data(), this->blobs_[1]->cpu_data(), (Dtype)1., top_data); } } // 反向计算 template <typename Dtype> void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // top_diff: N x M, 每一列为一个样本的 error. const Dtype* bottom_data = bottom[0]->cpu_data(); // Gradient with respect to weight // 关于 weight 的梯度 if (transpose_) { caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1., bottom_data, top_diff, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff()); } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bottom data if (transpose_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } } } #ifdef CPU_ONLY STUB_GPU(InnerProductLayer); #endif INSTANTIATE_CLASS(InnerProductLayer); REGISTER_LAYER_CLASS(InnerProduct); } // namespace caffe
这篇关于【笔记】Caffe全连接层源码(inner product layer):区别于tensorflow中的fullyconnected_layer的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-10-30tensorflow是什么-icode9专业技术文章分享
- 2024-10-15成功地使用本地的 NVIDIA GPU 运行 PyTorch 或 TensorFlow
- 2024-01-23供应链投毒预警 | 恶意Py包仿冒tensorflow AI框架实施后门投毒攻击
- 2024-01-19attributeerror: module 'tensorflow' has no attribute 'placeholder'
- 2024-01-19module 'tensorflow.compat.v2' has no attribute 'internal'
- 2023-07-17【2023年】第33天 Neural Networks and Deep Learning with TensorFlow
- 2023-07-10【2023年】第32天 Boosted Trees with TensorFlow 2.0(随机森林)
- 2023-07-09【2023年】第31天 Logistic Regression with TensorFlow 2.0(用TensorFlow进行逻辑回归)
- 2023-07-01【2023年】第30天 Supervised Learning with TensorFlow 2(用TensorFlow进行监督学习 2)
- 2023-06-18【2023年】第29天 Supervised Learning with TensorFlow 1(用TensorFlow进行监督学习 1)