機器學習BP神經網路,任意拓撲結構 (C++)
阿新 • • 發佈:2019-01-28
這次的版本更優秀了的樣子!
按照老闆說的,每個節點是單獨的匯出節點(會導致sigmod訓練變慢,原因sigmod層數多了,梯度會下降很快導致爆炸。)
換個啟用函式就行了。
net<10>表示網咯有10個節點
然後input.txt裡的檔案格式大概是
n
0 1
1 2
3 4
....
n表示有n行,每行2個數字,表示2個點有邊。(0下標開始,不超過net初始化的節點數量)。任意拓撲結構都可以執行。
#include <bits/stdc++.h> #include "recordlog.h" #include <memory> using std::cin; using std::endl; using std::cout; #define pr(x) cout<<#x<<" = "<<x<<" " #define prln(x) cout<<#x<<" = "<<x<<endl #define NODE (neurons[node]) #define NODE_GAIN (NODE.energy) #define NODE_THETA (NODE.loss_energy) //node節點的theta,也就是閾值 #define NODE_VALUE (NODE_GAIN + NODE_THETA) //node節點的實際能量(獲得能量+theta) #define NODE_OUTPUT (NODE.output) //node節點的實際輸出 #define NODE_PE (NODE.partial_derivative) //node節點output,對最終答案的導數 #define D_NODE (derivative(NODE_VALUE)) //node節點獲得的所有能量(加過theta的),對於node節點output的導數 #define NEXT_NODE (neurons[nextnode.first]) #define NEXT_NODE_OUTPUT (NEXT_NODE.output) //node節點的實際輸出 #define NEXT_NODE_GAIN (NEXT_NODE.energy) #define NEXT_NODE_PE (NEXT_NODE.partial_derivative) //node節點output,對最終答案的導數 #define NEXT_NODE_THETA (NEXT_NODE.loss_energy) //node節點的theta,也就是閾值 #define NEXT_NODE_VALUE (NEXT_NODE_GAIN + NEXT_NODE_THETA) //node節點的實際能量(獲得能量+theta) #define D_NEXT_NODE (derivative(NEXT_NODE_VALUE)) //node節點獲得的所有能量(加過theta的),對於node節點output的導數 #define NODE_TO_NEXTNODE_WEIGHT (nextnode.second) class neuron_t; typedef std::pair<int, double> PID; typedef std::vector<PID> neuron_array_t; typedef std::vector<int> vector_map_t; typedef std::unique_ptr<neuron_t> neuron_ptr_t; class neuron_t { public: double energy; double output; int number; double loss_energy; neuron_array_t neuron_array; double partial_derivative; bool is_input; bool is_output; }; template<int neuron_size> class net_t { public: neuron_t neurons[neuron_size]; vector_map_t vector_map[neuron_size]; std::string activation_way; //啟用函式的選擇,預設ReLU std::vector<double> input_weight; std::vector<int> output_number; std::vector<int> input_number; int tmp[neuron_size]; //臨時陣列,生成過n的全排列,和拓撲排序中記錄入度。 int height[neuron_size]; //輔助構圖的高度陣列 int topology[neuron_size]; //拓撲序 double rate; //學習率 int userful_neuron_size; static double sigmoid(double x) { return 1.0/(1.0 + exp(-x)); } static double line(double x) { return x; } static double ReLU(double x) { if (x<=0) return 0; return x; } double derivative(double x) { if (activation_way == "sigmoid"){ return sigmoid(x) * (1 - sigmoid(x)); } if (activation_way == "ReLU"){ if (x<0) return 0; return 1; } if (activation_way == "line"){ return 1; } cout<<"no activationFunction!"<<endl; return 0; } double activationFunction(double sum, double theta) { if (activation_way == "sigmoid"){ return sigmoid(sum + theta); } if (activation_way == "ReLU"){ return ReLU(sum + theta); } if (activation_way == "line"){ return line(sum + theta); } cout<<"no activationWay !" << endl; return 0; } static double randomDouble(double l, double r) { return randomInt(l*10000, r * 10000)/10000.0; } static long long randomInt(long long L, long long R) { long long tmp = (unsigned long long)rand() *(unsigned long long)rand() *(unsigned long long)rand() *(unsigned long long)rand() % (R - L + 1); return L + tmp; } ~net_t() { } net_t (std::string file_name) { //初始化新的網路 FILE *file = fopen(file_name.c_str(), "r"); printf("[%s]\n", file_name.c_str()); int n; fscanf(file, "%d", &n); this -> activation_way = "sigmoid"; this -> rate = 0.1; //xuexilv for (int i = 0; i < neuron_size; ++ i){ vector_map[i].clear(); tmp[i] = i; neurons[i].number = i; neurons[i].is_input = false; neurons[i].is_output = false; } this -> output_number.clear(); this -> input_number.clear(); prln(neuron_size); while (n--){ int s, t; fscanf(file, "%d%d", &s, &t); cout<<s<<" "<<t<<endl; vector_map[s].push_back(t); vector_map[t].push_back(s); } fclose(file); } net_t() { *this = net_t("input.txt"); } void initInputNeuron(std::vector<int> &input_num){ int sz = input_num.size(); input_weight.resize(sz); for (int i = 0; i < sz; ++ i){ input_weight[i] = randomDouble(-1, 1); neurons[input_num[i]].is_input = true; } } void setIO(std::vector<double> &input, std::vector<double> &output, std::vector<int> *input_num = NULL, std::vector<int> *output_num = NULL){ if (input.size() == 0){ //throws something TODO return; } if (output.size() == 0){ //throws something TODO return; } if (input_num && output_num) { output_number = *output_num; input_number = *input_num; } else { std::random_shuffle(tmp, tmp + neuron_size); printf("output nodes are: "); for (int i = 0; i < output.size(); ++ i){ output_number.push_back(tmp[i]); printf("%d ",tmp[i]); } printf("\n"); printf("input nodes are:"); for (int i = output.size(); i < input.size() + output.size(); ++ i){ input_number.push_back(tmp[i]); printf("%d ",tmp[i]); } printf("\n"); } initInputNeuron(*input_num); for (int i = 0; i < output.size(); ++ i){ neurons[output_number[i]].is_output = true; //pr(i),prln(output_number[i]); } std::queue<int>q[output.size() + input.size()]; memset(height, -1, sizeof(height)); int painted = output.size(); int cnt=0; for (auto curnode : output_number){ q[cnt++].push(curnode); height[curnode] = 0; } for (auto curnode : input_number){ q[cnt++].push(curnode); height[curnode] = neuron_size; } bool flag = true; while (flag){ int cnt = 0; flag = false; for (auto curnode : output_number){ flag |= bfs(q[cnt++], 1); } for (auto curnode : input_number){ flag |= bfs(q[cnt++], -1); } } auto build_map = [=](int from, int to){ neurons[from].neuron_array.push_back(std::make_pair(to, randomDouble(-1,1))); }; for (int i = 0; i < neuron_size; ++ i){ for (auto curnode : vector_map[i]){ if (height[i] > height[curnode]){ build_map(i, curnode); } } } for (int i = 0; i < neuron_size; ++ i){ neurons[i].loss_energy = randomDouble(-1, 1); } getTopology(); //至此構造完網路的拓撲結構 } void getTopology() { memset(tmp, 0, sizeof(tmp)); for (int i = 0; i < neuron_size; ++ i){ for (auto nextnode : neurons[i].neuron_array){ ++ tmp[nextnode.first]; } } std::queue<int>q; for (auto curnode : input_number){ q.push(curnode); } int pos = 0; while (!q.empty()) { int curnode = q.front(); q.pop(); topology[pos++] = curnode; for (auto nextnode : neurons[curnode].neuron_array){ if(-- tmp[nextnode.first] == 0){ q.push(nextnode.first); } } } userful_neuron_size = pos; //DEBUG //for (int i = 0; i < neuron_size; ++ i) // pr(i),prln(topology[i]); } bool bfs(std::queue<int> &q, int delta){ if (q.empty()){ return false; } int h = height[q.front()]; while (!q.empty() && height[q.front()] == h){ int curnode = q.front(); q.pop(); for (auto nextnode : vector_map[curnode]){ if (height[nextnode] != -1){ continue; } height[nextnode] = h + delta; q.push(nextnode); } } return true; } void cal_propagate(int node){ NODE_OUTPUT = activationFunction(NODE_GAIN, NODE_THETA); for (auto nextnode : NODE.neuron_array){ NEXT_NODE_GAIN += NODE_OUTPUT * NODE_TO_NEXTNODE_WEIGHT; } } void propagate(std::vector<double> &input){ //TODO /* for (int i = 0; i < input_number.size(); ++i) { input_weight[i] = 1; } */ for (int i = 0; i < neuron_size; ++ i){ neurons[i].energy = 0; neurons[i].output = 0; } for (int i = 0; i != input.size(); ++ i){ int node = input_number[i]; NODE_GAIN += input_weight[i] * input[i]; } for (int i = 0; i < userful_neuron_size; ++ i){ int node = topology[i]; cal_propagate(node); } } void cal_back(int node){ for (auto &nextnode : NODE.neuron_array){ NODE_PE += NEXT_NODE_PE * NODE_TO_NEXTNODE_WEIGHT * D_NEXT_NODE; } for (auto &nextnode : NODE.neuron_array){ NODE_TO_NEXTNODE_WEIGHT -= NODE_OUTPUT * D_NEXT_NODE * NEXT_NODE_PE * rate; } NODE_THETA -= NODE_PE * D_NODE * rate; } void back(std::vector<double> &input, std::vector<double> &output){ for (int i = 0; i < neuron_size; ++ i){ neurons[i].partial_derivative = 0; } for (int i = 0; i != output.size(); ++ i) { int node = output_number[i]; NODE_PE = NODE_OUTPUT - output[i]; NODE_THETA -= NODE_PE * D_NODE * rate; } for (int i = userful_neuron_size - 1; i >= 0; -- i){ int node = topology[i]; if (NODE.is_output){ continue; } else{ cal_back(node); } } for (int i = 0; i < input.size(); ++ i){ int node = input_number[i]; double tmp = input[i] * NODE_PE * D_NODE; //prln(tmp); //prln(input_weight[i]); input_weight[i] -= tmp * rate; //prln(input_weight[i]); } } double train(std::vector<double> &input, std::vector<double> &output){ propagate(input); // outputNetwork(); back(input, output); double error=0; for (int i = 0; i < output.size(); ++ i){ error += 0.5*pow((neurons[output_number[i]].output - output[i]), 2); } return error; } void outputNetwork(){ printf("---------------input nodes------------:\n"); for (int i = 0; i < input_number.size(); ++ i) { printf("[%d] weight:(%.7lf) \n", input_number[i], input_weight[i]); } printf("---------------other nodes------------\n"); printf("other nodes\n"); for (int i = 0; i < neuron_size; ++ i){ pr(topology[i]),prln(i); int node = topology[i]; printf("[%d] gain(%.7lf) theta(%.7lf) par_derivative(%.7lf) output(%.7lf) d(%.7lf)\n", node, NODE_GAIN, NODE_THETA, NODE_PE, NODE_OUTPUT, D_NODE); for (auto nextnode : NODE.neuron_array){ printf(" -> %d (%.7lf)\n", nextnode.first, nextnode.second); } } printf("=============End====================\n"); } void testOutput(std::vector<double> &input) { propagate(input); cout<<"output: "; for (auto curnode : output_number) { printf("%.7lf ", neurons[curnode].output); } cout<<endl; } std::vector<double> getTest(std::vector<double> &input) { std::vector<double> q; for (auto curnode : output_number) { q.push_back(neurons[curnode].energy); } return move(q); } }; void doit251() { std::vector<int>in({0,1}); std::vector<int>out({7}); net_t<8> net;//("input.txt"); srand(0); std::vector<double>input1({0,0}); std::vector<double>input2({0,1}); std::vector<double>input3({1,0}); std::vector<double>input4({1,1}); std::vector<double>output1({0}); std::vector<double>output2({1}); std::vector<double>output3({1}); std::vector<double>output4({0}); net.setIO(input1, output1, &in, &out); net.activation_way = "sigmoid"; net.rate = 20; /* net.propagate(input3); net.outputNetwork(); net.back(input3, output3); net.outputNetwork(); return; */ double error=0; for (int i = 1;i<=20000;++i){ error = 0; error += net.train(input1, output1); error += net.train(input2, output2); error += net.train(input3, output3); error += net.train(input4, output4); error/=4; cout<<error<<"\r"; } prln(error); //prln(net.userful_neuron_size); net.testOutput(input1); net.testOutput(input2); net.testOutput(input3); net.testOutput(input4); } int main() { doit251(); return 0; }