Caffe: Net類解析(1)--原創
阿新 • • 發佈:2019-01-03
Net類是Caffe中的一個核心類, 他將各個Layer和Blob組織到一起, 完成前向和反響傳播計算
下面詳細解讀Net.cpp的程式碼:
----------------------------------------------------------------------------------------------------------------------------------
Net<Dtype>::Net 建構函式,在訓練一個網路時,涉及到多次迭代的設定和引數調優的策略,往往由更上一層的Solver來構造生成Net; 在用一個網路進行預測時,由於只需操作簡單的前向計算,不需要引數調優,往往不使用Solver,直接由自己寫的程式碼來構造Net
//建構函式。NetParameter是在Solver構造Net時傳入的網路結構引數 //本建構函式一般在訓練階段由Solver呼叫 //root_net是根net, 如果本net非根net, 可以從root_net中複製layer template <typename Dtype> Net<Dtype>::Net(const NetParameter& param, const Net* root_net) : root_net_(root_net) { Init(param); } //建構函式。通過prototxt檔案來傳入網路結構引數 //本建構函式一般在測試階段由開發者自己寫的程式碼來呼叫 //phase: 當前net是進行TEST還是TRAIN //stage: 指明當前net中哪些layer要包含在net中(通過在prototxt為各個layer的include/exclude中指明stage/not_stage,來確定某個layer是否要包含) //level: 指明當前net中哪些layer要包含在net中(通過在prototxt為各個layer的include/exclude中指明level,來確定某個layer是否要包含) template <typename Dtype> Net<Dtype>::Net(const string& param_file, Phase phase, const int level, const vector<string>* stages, const Net* root_net) : root_net_(root_net) { NetParameter param; ReadNetParamsFromTextFileOrDie(param_file, ¶m); // Set phase, stages and level param.mutable_state()->set_phase(phase); if (stages != NULL) { for (int i = 0; i < stages->size(); i++) { //在NetState中新增stages, NetState會與NetStateRules(來自於prototxt的include/exclude)比較來確定某個layer是否保留 param.mutable_state()->add_stage((*stages)[i]); } } //在NetState中新增level, NetState會與NetStateRules(來自於prototxt的include/exclude)比較來確定某個layer是否保留 param.mutable_state()->set_level(level); Init(param); }
Net<Dtype>::Init 對net中的各個layer、每個layer的輸入輸出blob、layer中的引數blob進行初始化
template <typename Dtype> void Net<Dtype>::Init(const NetParameter& in_param) { //檢測是否有根net CHECK(Caffe::root_solver() || root_net_) << "root_net_ needs to be set for all non-root solvers"; // Set phase from the state. phase_ = in_param.state().phase(); // Filter layers based on their include/exclude rules and // the current NetState. NetParameter filtered_param; //使用stages/level規則對NetParameter進行過濾, 過濾後放入filtered_param FilterNet(in_param, &filtered_param); LOG_IF(INFO, Caffe::root_solver()) << "Initializing net from parameters: " << std::endl << filtered_param.DebugString(); // Create a copy of filtered_param with splits added where necessary. NetParameter param; //對過濾後的filtered_param拷一個副本 InsertSplits(filtered_param, ¶m); // Basically, build all the layers and set up their connections. name_ = param.name(); map<string, int> blob_name_to_idx; set<string> available_blobs; memory_used_ = 0; // For each layer, set up its input and output //各個層輸入的blob。bottom_vecs_是二維vecter, //第一維是各個層,第二維是某層中的各個bottom_blob bottom_vecs_.resize(param.layer_size()); //各個層輸出blob top_vecs_.resize(param.layer_size()); //各個層輸入blob的id bottom_id_vecs_.resize(param.layer_size()); //各個層內參數blob的id param_id_vecs_.resize(param.layer_size()); //各個層輸出blob的id top_id_vecs_.resize(param.layer_size()); //各個層輸入blob是否要反向傳播計算 bottom_need_backward_.resize(param.layer_size()); for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) { // For non-root solvers, whether this layer is shared from root_net_. //當前層是否要從root_solver中共享得到(如果當前是根net,值為0) bool share_from_root = !Caffe::root_solver() && root_net_->layers_[layer_id]->ShareInParallel(); // Inherit phase from net if unset. if (!param.layer(layer_id).has_phase()) { param.mutable_layer(layer_id)->set_phase(phase_); } // Setup layer. const LayerParameter& layer_param = param.layer(layer_id); if (layer_param.propagate_down_size() > 0) { CHECK_EQ(layer_param.propagate_down_size(), layer_param.bottom_size()) << "propagate_down param must be specified " << "either 0 or bottom_size times "; } ////////////////////////////////////////////////////////////// //建立層, 壓入layers_: 可以從root_solver共享獲得, 或者新建層 ////////////////////////////////////////////////////////////// if (share_from_root) { LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net"; //root_net_->layers_中包含了所有的layer, layers_.push_back(root_net_->layers_[layer_id]); layers_[layer_id]->SetShared(true); } else { //對於root_solver()中的net: 裡面所有layer都要新create出來 //對於非root_solver()中的net, 只有非共享的layer才用新create layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param)); } layer_names_.push_back(layer_param.name()); LOG_IF(INFO, Caffe::root_solver()) << "Creating Layer " << layer_param.name(); bool need_backward = false; ///////////////////////////////////////////////////////////////////////// //1.初始化bottom blob: 將bottom_vecs_的地址與blobs_[blob_id]地址關聯起來, //將bottom_id_vecs_與blob_id_關聯起來; //2.對於資料輸入層來說只有top,沒有bottom,所以會跳過下面的for迴圈 //////////////////////////////////////////////////////////////////////// for (int bottom_id = 0; bottom_id < layer_param.bottom_size(); ++bottom_id) { //1.net中bottom/top是交替初始化的,前一層的top是後一層的bottom,前一層top的 //available_blobs/blob_name_to_idx引數就是後一層的bottom引數 //2.AppendBottom將bottom_vecs_與blobs_[id]關聯起來, 將bottom_id_vecs_與 //blob_id_關聯起來 const int blob_id = AppendBottom(param, layer_id, bottom_id, &available_blobs, &blob_name_to_idx); // If a blob needs backward, this layer should provide it. //blob_need_backward_[blob_id]的值是由前一層top_blob傳遞過來的,同時與當 //前層bottom_need_backward_[layer_id][bottom_id]或運算出來的結果; //need_backward是當前層是否要做反向傳播計算的最終判斷: need_backward由 //所有blob_need_backward_和param_need_backward_組合得到 need_backward |= blob_need_backward_[blob_id]; } int num_top = layer_param.top_size(); //////////////////////////////////////////////////////////////////////// //初始化top blob: 將top_vecs_的地址與blobs_[blob_id]地址關聯起來, //將top_id_vecs_與blob_id_關聯起來; AppendTop還建立了新blob //////////////////////////////////////////////////////////////////////// for (int top_id = 0; top_id < num_top; ++top_id) { //通過AppendTop和AppendBottom, bottom_vecs_和top_vecs_連線在了一起 //在AppendTop中會往available_blobs新增某層的輸出blob,在AppendBottom中會 //從available_blobs中刪除前一層的輸出blob,所有layers遍歷完後剩下的就 //是整個net的輸出blob AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx); // Collect Input layer tops as Net inputs. if (layer_param.type() == "Input") { //對於整個net的輸入層,每通過AppendTop新建一個top blob, blobs.size() //就增加1,blobs_size()是從0開始增加的,就能代表整個net輸入blob的id const int blob_id = blobs_.size() - 1; net_input_blob_indices_.push_back(blob_id); net_input_blobs_.push_back(blobs_[blob_id].get()); } } // If the layer specifies that AutoTopBlobs() -> true and the LayerParameter // specified fewer than the required number (as specified by // ExactNumTopBlobs() or MinTopBlobs()), allocate them here. Layer<Dtype>* layer = layers_[layer_id].get(); //////////////////////////////////////////////////////////////////////// //補上top blob, 使該層的top blob個數達到要求 //////////////////////////////////////////////////////////////////////// if (layer->AutoTopBlobs()) { const int needed_num_top = std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs()); //只有噹噹前層已有的top blob個數(num_top)小於引數中定義的個 //數(needed_num_top)時 ,才需要自動生成blobs,補上缺口 for (; num_top < needed_num_top; ++num_top) { // Add "anonymous" top blobs -- do not modify available_blobs or // blob_name_to_idx as we don't want these blobs to be usable as input // to other layers. AppendTop(param, layer_id, num_top, NULL, NULL); } } ////////////////////////////////////////////////////////////////////////// // After this layer is connected, set it up. // 初始化每一個top Blob的shape(前面已經把bottom blob和top blob地址關聯起來了 //, 所以不需要對bottom blob進行shape) ////////////////////////////////////////////////////////////////////////// if (share_from_root) { // Set up size of top blobs using root_net_ const vector<Blob<Dtype>*>& base_top = root_net_->top_vecs_[layer_id]; const vector<Blob<Dtype>*>& this_top = this->top_vecs_[layer_id]; for (int top_id = 0; top_id < base_top.size(); ++top_id) { this_top[top_id]->ReshapeLike(*base_top[top_id]); LOG(INFO) << "Created top blob " << top_id << " (shape: " << this_top[top_id]->shape_string() << ") for shared layer " << layer_param.name(); } } else { //如果是caffe::root_solver, 或非caffe::root_solver的非共享層的, 都會走下面分支 layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]); } LOG_IF(INFO, Caffe::root_solver()) << "Setting up " << layer_names_[layer_id]; ///////////////////////////////////////////////////////////////////// //初始化blob_loss_weights_: blob_loss_weights_用於存放loss; //blob_loss_weights_覆蓋了所有層的top blob, 但只有最後一層 //Loss輸出層值才是非0 ////////////////////////////////////////////////////////////////////// for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) { if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) { //top_id_vecs_[layer_id][top_id]是整個net中順序排列的id號(不是本層 //中從0開始的序列號 );每一個top blob都對應一個blob_loss_weights_[id] //的值, 用來存放loss,除了整個net的輸出blob外,值都是0 blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0)); } blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id); LOG_IF(INFO, Caffe::root_solver()) << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string(); if (layer->loss(top_id)) { LOG_IF(INFO, Caffe::root_solver()) << " with loss weight " << layer->loss(top_id); } memory_used_ += top_vecs_[layer_id][top_id]->count(); } LOG_IF(INFO, Caffe::root_solver()) << "Memory required for data: " << memory_used_ * sizeof(Dtype); /////////////////////////////////////////////////////////////////////// //對引數進行初始化:一般權值weight存放在一個blob,偏執bias存放在另一個blob //本層的param_need_backward(具體值來自LayerParameter)和本層的 //blob_need_backward_決定了本層的need_backward;本層的need_backward決 //定了本層的layer_need_backward_ /////////////////////////////////////////////////////////////////////// //LayerParameter中已經定義了的引數個數(可能小於實際的個數 ) const int param_size = layer_param.param_size(); //某層的實際引數個數 const int num_param_blobs = layers_[layer_id]->blobs().size(); CHECK_LE(param_size, num_param_blobs) << "Too many params specified for layer " << layer_param.name(); ParamSpec default_param_spec; for (int param_id = 0; param_id < num_param_blobs; ++param_id) { const ParamSpec* param_spec = (param_id < param_size) ? &layer_param.param(param_id) : &default_param_spec; //lr_mult是收斂速率 const bool param_need_backward = param_spec->lr_mult() != 0; //need_backward是當前層是否要做反向傳播計算的最終判斷: need_backward由 //所有blob_need_backward_和param_need_backward_組合得到 need_backward |= param_need_backward; layers_[layer_id]->set_param_propagate_down(param_id, param_need_backward); } //一個layer一般有兩個引數Blob, 第一個存weight, 第二個存bias for (int param_id = 0; param_id < num_param_blobs; ++param_id) { AppendParam(param, layer_id, param_id); } // Finally, set the backward flag // 只要本層中所有bottom blob和所有param blob中有一個支援backward, //need_backward就為true layer_need_backward_.push_back(need_backward); if (need_backward) { //當本層要支援backward後, 本層所有blob都要支援backward for (int top_id = 0; top_id < top_id_vecs_[layer_id].size(); ++top_id) { //對top blob對應id的blob_need_backward_置true, 該結果會傳遞到後面一層 //的bottom blob blob_need_backward_[top_id_vecs_[layer_id][top_id]] = true; } } } // Go through the net backwards to determine which blobs contribute to the // loss. We can skip backward computation for blobs that don't contribute // to the loss. // Also checks if all bottom blobs don't need backward computation (possible // because the skip_propagate_down param) and so we can skip bacward // computation for the entire layer set<string> blobs_under_loss; set<string> blobs_skip_backp; //for迴圈遍歷每個layer, 將不需要backward計算的層和bottom_blob標記出來 for (int layer_id = layers_.size() - 1; layer_id >= 0; --layer_id) { bool layer_contributes_loss = false; bool layer_skip_propagate_down = true; /////////////////////////////////////////////////////////////////// //遍歷該層每個top_blob, 確定該層是否輸出loss, 是否要backward計算 /////////////////////////////////////////////////////////////////// for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) { const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]]; //如果當前層是最終輸出層,或當前top blob為最終loss做出貢獻了就把 //layer_contributes_loss置true,layer_contributes_loss的true值最開 //始的源頭是整個net的最終輸出層,之後每一層的layer_contributes_loss //通過判斷是否有top blob在blobs_under_loss中得到,blobs_under_loss //的值是由上一層bottom計算時插入的 if (layers_[layer_id]->loss(top_id) || (blobs_under_loss.find(blob_name) != blobs_under_loss.end())) { layer_contributes_loss = true; } if (blobs_skip_backp.find(blob_name) == blobs_skip_backp.end()) { layer_skip_propagate_down = false; } //只要一層中有一個blob貢獻了loss,有一個blob要backwards, 就得到了該層這 //兩個引數的最終結果,可以直接退出迴圈 if (layer_contributes_loss && !layer_skip_propagate_down) break; } // If this layer can skip backward computation, also all his bottom blobs // don't need backpropagation //該層如果同時滿足下面if中兩個條件, 就相互矛盾, 該層就不進行backward計算 if (layer_need_backward_[layer_id] && layer_skip_propagate_down) { layer_need_backward_[layer_id] = false; for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size(); ++bottom_id) { bottom_need_backward_[layer_id][bottom_id] = false; } } if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; } if (Caffe::root_solver()) { if (layer_need_backward_[layer_id]) { LOG(INFO) << layer_names_[layer_id] << " needs backward computation."; } else { LOG(INFO) << layer_names_[layer_id] << " does not need backward computation."; } } for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size(); ++bottom_id) { if (layer_contributes_loss) { const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]]; //插入blobs_under_loss blobs_under_loss.insert(blob_name); } else { //如果該層沒有為loss做出貢獻, 該層就不需要backward計算 bottom_need_backward_[layer_id][bottom_id] = false; } if (!bottom_need_backward_[layer_id][bottom_id]) { const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]]; blobs_skip_backp.insert(blob_name); } } } ////////////////////////////////////////////////////////////////////////////// //如果當前net需要force backward, 將layer_need_backward設成true //blob_need_backward會由 layers_[layer_id]->AllowForceBackward(bottom_id)決定 ////////////////////////////////////////////////////////////////////////////// if (param.force_backward()) { for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) { layer_need_backward_[layer_id] = true; for (int bottom_id = 0; bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) { bottom_need_backward_[layer_id][bottom_id] = bottom_need_backward_[layer_id][bottom_id] || layers_[layer_id]->AllowForceBackward(bottom_id); blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] = blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] || bottom_need_backward_[layer_id][bottom_id]; } for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) { layers_[layer_id]->set_param_propagate_down(param_id, true); } } } // In the end, all remaining blobs are considered output blobs. //在AppendBottom中已經將bottom blob從available_blobs中刪掉,最終只剩下最頂 //層的top blob,就是輸出blob for (set<string>::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { LOG_IF(INFO, Caffe::root_solver()) << "This network produces output " << *it; net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); net_output_blob_indices_.push_back(blob_name_to_idx[*it]); } for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) { blob_names_index_[blob_names_[blob_id]] = blob_id; } for (size_t layer_id = 0; layer_id < layer_names_.size(); ++layer_id) { layer_names_index_[layer_names_[layer_id]] = layer_id; } ShareWeights(); debug_info_ = param.debug_info(); LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done."; }
Net::FilterNet 對NetParameter引數進行過濾, 僅保留符合規則的layers引數
Net::StateMeetsRule引數過濾的過程
在定義net結構的prototxt檔案中往往會定義某層的include/exclude引數, 比如下面的例子。include表示如果在構造net時如果滿足include的條件,本層就包含在net中;exclude表示在構造net時如果滿足exclude條件,本層就不會包含在net中。prototxt的這個include/exclude引數被讀取後就是caffe.proto中的NetStateRule類,類中有phase、min_level、max_level、stage、not_stage 5個引數,這些就是過濾得規則。那拿什麼來和這個規則進行比較呢?是用構造net時的輸入引數,我們往往用如下的方法來構造一個新net: Net<Dtype>::Net(const string& param_file, Phase phase, const int level, const vector<string>* stages, const Net* root_net)對於包含include引數的層:如果滿足min_level<level<max_level 或 stages中任意一個元素能在NetStateRule::stage中找到, 該層就會被保留在net中 對於包含exclude引數的層:如果滿足min_level<level<max_level 或 stages中任意一個元素能在NetStateRule::stage中找到, 該層就會從net中剔除 當然如果是在NetStateRule::not_stage中找到, 結果正好相反
layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TEST not_stage: "predict" # 在 predict 時過濾掉這一層 } transform_param { scale: 0.00390625 } data_param { source: "examples/mnist/mnist_test_lmdb" batch_size: 100 backend: LMDB } } # 增加 deploy 的輸入層 layer { name: "data" type: "Input" top: "data" input_param { shape: { dim: 1 dim: 1 dim: 28 dim: 28 } } exclude { phase: TEST stage: "predict" # 在 predict 時不加上這一層 } }對引數進行過濾有什麼實際用處呢? 可以參考這個文章:https://yangwenbo.com/articles/caffe-net-config-all-in-one.html?utm_source=tuicool&utm_medium=referral
template <typename Dtype>
void Net<Dtype>::FilterNet(const NetParameter& param,
NetParameter* param_filtered) {
NetState net_state(param.state());
param_filtered->CopyFrom(param);
//先清除layers,然後根據規則重新新增layers
param_filtered->clear_layer();
for (int i = 0; i < param.layer_size(); ++i) {
const LayerParameter& layer_param = param.layer(i);
const string& layer_name = layer_param.name();
//include和exclude不能同時存在
CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0)
<< "Specify either include rules or exclude rules; not both.";
// If no include rules are specified, the layer is included by default and
// only excluded if it meets one of the exclude rules.
bool layer_included = (layer_param.include_size() == 0);
for (int j = 0; layer_included && j < layer_param.exclude_size(); ++j) {
//net_state是由構造net時的輸入引數組成(phase/stage/level);
//layer_param.exclude是在prototxt中某層的exclude中的參
//數(max_level/min_level/stage/not_stage/phase);
//滿足if條件就說明,本層要被exclude;
if (StateMeetsRule(net_state, layer_param.exclude(j), layer_name)) {
layer_included = false;
}
}
for (int j = 0; !layer_included && j < layer_param.include_size(); ++j) {
//滿足條件就說明,本層要被include
if (StateMeetsRule(net_state, layer_param.include(j), layer_name)) {
layer_included = true;
}
}
if (layer_included) {
param_filtered->add_layer()->CopyFrom(layer_param);
}
}
}
//用構造net時的輸入phase/level/stage與prototxt中各層的規則(include/exclude)
//比較,決定本層是否要包含在net中
template <typename Dtype>
bool Net<Dtype>::StateMeetsRule(const NetState& state,
const NetStateRule& rule, const string& layer_name) {
// Check whether the rule is broken due to phase.
if (rule.has_phase()) {
if (rule.phase() != state.phase()) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState phase (" << state.phase()
<< ") differed from the phase (" << rule.phase()
<< ") specified by a rule in layer " << layer_name;
return false;
}
}
// Check whether the rule is broken due to min level.
if (rule.has_min_level()) {
if (state.level() < rule.min_level()) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState level (" << state.level()
<< ") is above the min_level (" << rule.min_level()
<< ") specified by a rule in layer " << layer_name;
return false;
}
}
// Check whether the rule is broken due to max level.
if (rule.has_max_level()) {
if (state.level() > rule.max_level()) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState level (" << state.level()
<< ") is above the max_level (" << rule.max_level()
<< ") specified by a rule in layer " << layer_name;
return false;
}
}
// Check whether the rule is broken due to stage. The NetState must
// contain ALL of the rule's stages to meet it.
for (int i = 0; i < rule.stage_size(); ++i) {
// Check that the NetState contains the rule's ith stage.
bool has_stage = false;
//net構造時輸入的stage中只要有一個符合stage規則,就表明本層被include
for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
if (rule.stage(i) == state.stage(j)) { has_stage = true; }
}
if (!has_stage) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState did not contain stage '" << rule.stage(i)
<< "' specified by a rule in layer " << layer_name;
return false;
}
}
// Check whether the rule is broken due to not_stage. The NetState must
// contain NONE of the rule's not_stages to meet it.
for (int i = 0; i < rule.not_stage_size(); ++i) {
// Check that the NetState contains the rule's ith not_stage.
bool has_stage = false;
//net構造時輸入的stage中只要有一個符合not_stage規則,就表明本層被exclude
for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
if (rule.not_stage(i) == state.stage(j)) { has_stage = true; }
}
if (has_stage) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState contained a not_stage '" << rule.not_stage(i)
<< "' specified by a rule in layer " << layer_name;
return false;
}
}
return true;
}
Net::AppendTop 給某層增加一個top blob
Net::AppendBottom給某層增加一個bottom blob
AppendTop函式會向整個net的blob列表(blobs_)中新增一個新blob,同時將本層新建的top blob指向該新增blob, 這樣就把層的輸出blob和blob列表(blobs_)關聯起來了。AppendTop函式在新建blob時可能會採用同址計算(in-place computer),所謂同址計算就是同一層的top blob和bottom blob複用。 AppendBottom函式不會向blobs_新增blob了,只是簡單的把新增的bottom blob和在AppendTop中已經增加的blobs_關聯起來。 經過上述兩個函式的處理,前一層的top blob、當前層的bottom blob就通過blobs_關聯起來了,整個net中所有的層級就連結到一起。// Helper for Net::Init: add a new top blob to the net.
// 給某層增加一個top blob
template <typename Dtype>
void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
const int top_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx) {
shared_ptr<LayerParameter> layer_param(
new LayerParameter(param.layer(layer_id)));
const string& blob_name = (layer_param->top_size() > top_id) ?
layer_param->top(top_id) : "(automatic)";
// Check if we are doing in-place computation
//同址計算:top blob使用和bottom blob相同的地址和id
//是否使用同址計算由prototxt中對top/bottom blob名字的定義決定
if (blob_name_to_idx && layer_param->bottom_size() > top_id &&
blob_name == layer_param->bottom(top_id)) {
// In-place computation
LOG_IF(INFO, Caffe::root_solver())
<< layer_param->name() << " -> " << blob_name << " (in-place)";
top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
} else if (blob_name_to_idx &&
blob_name_to_idx->find(blob_name) != blob_name_to_idx->end()) {
//blob_name_to_idx中的元素始終是在AppendTop中新增的,所以如果有重複名字,
//就意味之前有其他top blob同名
// If we are not doing in-place computation but have duplicated blobs,
// raise an error.
LOG(FATAL) << "Top blob '" << blob_name
<< "' produced by multiple sources.";
} else {
// Normal output.
// 不進行同址計算, top使用和bottom獨立的Blob
if (Caffe::root_solver()) {
LOG(INFO) << layer_param->name() << " -> " << blob_name;
}
shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
//當前blob的個數, 就是要新增的blob的id(在 blobs_尾部新增一個blob)
const int blob_id = blobs_.size();
//新增一個blob的動作是在AppendTop中完成的, AppendBottom中只是把當前層bottom和
//前一層top的地址關聯起來(通過bottom/top指向相同的blobs_[id]/blob_id來連線)
blobs_.push_back(blob_pointer);
blob_names_.push_back(blob_name);
blob_need_backward_.push_back(false);
//新增blob_name_to_idx的鍵值, 對於資料輸入層(只有top,沒有bottom, 是第一層),
//blob_name_to_idx也不是null,所以也會進入下面分支
if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; }
top_id_vecs_[layer_id].push_back(blob_id);
top_vecs_[layer_id].push_back(blob_pointer.get());
}
//在AppendTop中增加blob,在AppendBottom中剔除blob,遍歷所有層後剩下的就是net的輸出blob
if (available_blobs) { available_blobs->insert(blob_name); }
}
// Helper for Net::Init: add a new bottom blob to the net.
// 給某層增加一個bottom blob
// 將bottom_vecs_與blobs_[id]關聯起來, 將bottom_id_vecs_與blob_id_關聯起來
template <typename Dtype>
int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
const int bottom_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx) {
const LayerParameter& layer_param = param.layer(layer_id);
const string& blob_name = layer_param.bottom(bottom_id);
if (available_blobs->find(blob_name) == available_blobs->end()) {
LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '"
<< layer_param.name() << "', bottom index " << bottom_id << ")";
}
const int blob_id = (*blob_name_to_idx)[blob_name];
LOG_IF(INFO, Caffe::root_solver())
<< layer_names_[layer_id] << " <- " << blob_name;
//新增一個blob的動作是在top中完成的, bottom中只是把當前層bottom和前一層
//top的地址連線起來(通過bottom/top指向相同的blobs_[id]/blob_id來連線)
bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
bottom_id_vecs_[layer_id].push_back(blob_id);
//上一層的AppendTop時insert入available_blob, 本層的AppendBottom時erase
available_blobs->erase(blob_name);
bool need_backward = blob_need_backward_[blob_id];
// Check if the backpropagation on bottom_id should be skipped
if (layer_param.propagate_down_size() > 0) {
need_backward = layer_param.propagate_down(bottom_id);
}
bottom_need_backward_[layer_id].push_back(need_backward);
return blob_id;
}
Net::AppendParam 給某層增加可學習引數blob和超訓練引數
可學習引數: 存放入learnable_params_中, 用於存放權重weight和偏置bias, 對於需要反響傳播計算的層, 至少有兩個引數blob(一個放權重, 一個放偏置)
超訓練引數: 存放入params_lr和params_weight_decay中, 用於存放收斂速率和權重衰減
每一個可學習引數的blob(不管是存放權重還是偏置)都對應有超訓練引數params_lr和params_weight_decay, 這三者都通過param_names_index來統一索引
/////////////////////////////////////////////////////////////////////////////////////
//1.給某層增加一個可學習引數blob(存放權重/偏置),放入params_, 同時放入learnable_params_;
//2.給某層增加一個params_lr_和params_weight_decay_,用來存放超訓練引數
//3.一層中每個可學習引數blob(權重/偏置, learnable_params_)都對應有一個params_lr_和一個
//params_weight_decay_, 超訓練引數和可學習引數都是從LayerParameter中獲取到.
//4.param_names_index_是對一層中可學習引數/超訓練引數的總索引
////////////////////////////////////////////////////////////////////////////////////
template <typename Dtype>
void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
const int param_id) {
//layers_[layer_id]->layer_param()中存放的是訓練超引數
const LayerParameter& layer_param = layers_[layer_id]->layer_param();
const int param_size = layer_param.param_size();
string param_name =
(param_size > param_id) ? layer_param.param(param_id).name() : "";
if (param_name.size()) {
param_display_names_.push_back(param_name);
} else {
ostringstream param_display_name;
param_display_name << param_id;
param_display_names_.push_back(param_display_name.str());
}
const int net_param_id = params_.size();
//layers_[layer_id]->blobs()中存放的是可學習引數(權重/偏置);
//一個層一般有兩個blob,第一個存weight,第二個存bias
params_.push_back(layers_[layer_id]->blobs()[param_id]);
param_id_vecs_[layer_id].push_back(net_param_id);
param_layer_indices_.push_back(make_pair(layer_id, param_id));
//param_spec用來存放某層的訓練超引數
ParamSpec default_param_spec;
const ParamSpec* param_spec = (layer_param.param_size() > param_id) ?
&layer_param.param(param_id) : &default_param_spec;
if (!param_size || !param_name.size() || (param_name.size() &&
param_names_index_.find(param_name) == param_names_index_.end())) {
// This layer "owns" this parameter blob -- it is either anonymous
// (i.e., not given a param_name) or explicitly given a name that we
// haven't already seen.
//param_owners_用於存放param_names_index_中的id值, 如果是新建的
//param_names_index_引數,id為-1
param_owners_.push_back(-1);
if (param_name.size()) {
param_names_index_[param_name] = net_param_id;
}
const int learnable_param_id = learnable_params_.size();
//learnable_params_存放權重/偏置blob
learnable_params_.push_back(params_[net_param_id].get());
//learnable_param_ids_存放權重/偏置blob的id
learnable_param_ids_.push_back(learnable_param_id);
//has_params_lr_存放收斂速率的開關
has_params_lr_.push_back(param_spec->has_lr_mult());
//has_params_decay_存放權重衰減的開關
has_params_decay_.push_back(param_spec->has_decay_mult());
//params_lr_存放收斂速率值
params_lr_.push_back(param_spec->lr_mult());
//params_weight_decay_存放權重衰減值
params_weight_decay_.push_back(param_spec->decay_mult());
} else {
// Named param blob with name we've seen before: share params
// 複用之前已存在param_names_index_表中的引數,
// param_names_index_是對一層中可學習引數/超訓練引數的總索引
const int owner_net_param_id = param_names_index_[param_name];
param_owners_.push_back(owner_net_param_id);//param_owners_用於存放param_names_index_中的id值, 如果是新建的param_names_index_引數,id為-1
const pair<int, int>& owner_index =
param_layer_indices_[owner_net_param_id];
const int owner_layer_id = owner_index.first;
const int owner_param_id = owner_index.second;
LOG_IF(INFO, Caffe::root_solver()) << "Sharing parameters '" << param_name
<< "' owned by "
<< "layer '" << layer_names_[owner_layer_id] << "', param "
<< "index " << owner_param_id;
Blob<Dtype>* this_blob = layers_[layer_id]->blobs()[param_id].get();
Blob<Dtype>* owner_blob =
layers_[owner_layer_id]->blobs()[owner_param_id].get();
const int param_size = layer_param.param_size();
if (param_size > param_id && (layer_param.param(param_id).share_mode() ==
ParamSpec_DimCheckMode_PERMISSIVE)) {
// Permissive dimension checking -- only check counts are the same.
CHECK_EQ(this_blob->count(), owner_blob->count())
<< "Cannot share param '" << param_name << "' owned by layer '"
<< layer_names_[owner_layer_id] << "' with layer '"
<< layer_names_[layer_id] << "'; count mismatch. Owner layer param "
<< "shape is " << owner_blob->shape_string() << "; sharing layer "
<< "shape is " << this_blob->shape_string();
} else {
// Strict dimension checking -- all dims must be the same.
CHECK(this_blob->shape() == owner_blob->shape())
<< "Cannot share param '" << param_name << "' owned by layer '"
<< layer_names_[owner_layer_id] << "' with layer '"
<< layer_names_[layer_id] << "'; shape mismatch. Owner layer param "
<< "shape is " << owner_blob->shape_string() << "; sharing layer "
<< "expects shape " << this_blob->shape_string();
}
//因為複用, 需要把learnable_param_id再重複放入learnable_param_ids_中
const int learnable_param_id = learnable_param_ids_[owner_net_param_id];
learnable_param_ids_.push_back(learnable_param_id);
if (param_spec->has_lr_mult()) {
if (has_params_lr_[learnable_param_id]) {
CHECK_EQ(param_spec->lr_mult(), params_lr_[learnable_param_id])
<< "Shared param '" << param_name << "' has mismatched lr_mult.";
} else {
has_params_lr_[learnable_param_id] = true;
params_lr_[learnable_param_id] = param_spec->lr_mult();
}
}
if (param_spec->has_decay_mult()) {
if (has_params_decay_[learnable_param_id]) {
CHECK_EQ(param_spec->decay_mult(),
params_weight_decay_[learnable_param_id])
<< "Shared param '" << param_name << "' has mismatched decay_mult.";
} else {
has_params_decay_[learnable_param_id] = true;
params_weight_decay_[learnable_param_id] = param_spec->decay_mult();
}
}
}
}