1. 程式人生 > >簡單神經網路預測結構化資料關係___測試集(改良)

簡單神經網路預測結構化資料關係___測試集(改良)

# coding: utf-8
import random
import csv
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

f = open("result_.csv"
, "a+", encoding='utf-8') writer_csv = csv.writer(f) header = ['Nodeid1','Nodeid2','author_degree1','author_degree2','No','pre_lable','isBD'] writer_csv.writerow(header) result=[] num_classes=2 data=pd.DataFrame(pd.read_csv('/home/henson/Desktop/huanping/huanping.csv_EDGE_NBD.csv',encoding='gb18030')) data.head() sess = tf.Session() X = np.array(data[['Nodeid1'
,'Nodeid2','author_degree1','author_degree2','No','isBD']]) nodeid1=X[:,0] nodeid2=X[:,1] print(X[0,2:5]) #StandardScaler= StandardScaler() #X_Standard = StandardScaler.fit_transform(X) #y_Standard = StandardScaler.fit_transform(y) X_train,X_test = train_test_split(X,test_size=0.2,random_state=0) #X_train = scale(X_train)
#X_test = scale(X_test) nodeid_test =X_test[:,0:2] print(nodeid_test) X_dataset=X_test[:,2:5] y_test=X_test[:,5] print(y_test.shape) #y_train = (np.arange(2) == y_train[:,None]).astype(np.float32) y_test_ = (np.arange(2) == y_test[:,None]).astype(np.float32) #y_train = scale(y.reshape((-1,1))) #y_test = scale(y_test.reshape((-1,1))) def add_layer(inputs,input_size,output_size,activation_function=None): with tf.variable_scope("Weights"): Weights = tf.Variable(tf.random_normal(shape=[input_size,output_size]),name="weights") tf.summary.histogram('Weights', Weights) with tf.variable_scope("biases"): biases = tf.Variable(tf.zeros(shape=[1,output_size]) + 0.1,name="biases") tf.summary.histogram('biases', biases) with tf.name_scope("Wx_plus_b"): Wx_plus_b = tf.matmul(inputs,Weights) + biases with tf.name_scope("dropout"): Wx_plus_b = tf.nn.dropout(Wx_plus_b,keep_prob=keep_prob_s) if activation_function is None: return Wx_plus_b else: with tf.name_scope("activation_function"): return activation_function(Wx_plus_b) xs = tf.placeholder(shape=[None,X_dataset.shape[1]],dtype=tf.float32,name="inputs") ys = tf.placeholder(shape=[None,2],dtype=tf.float32) #ys = tf.placeholder(shape=[None,num_classes],dtype=tf.float32) print(ys.shape) keep_prob_s = tf.placeholder(dtype=tf.float32) with tf.name_scope("layer_1"): l1 = add_layer(xs,3,10,activation_function=tf.nn.relu) with tf.name_scope("layer_2"):# l2 = add_layer(l1,10,10,activation_function=tf.nn.relu) with tf.name_scope("y_pred"): #pred = add_layer(l1,10,1) logits = add_layer(l2, 10, num_classes) print("logits:",logits) predicted_labels=tf.arg_max(logits, 1) with tf.name_scope("loss"): #loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - logits),reduction_indices=[1])) #loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ys,logits=tf.argmax(logits,1))) #loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ys, logits=logits)) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=ys, logits=logits)) tf.summary.scalar("loss",tensor=loss) with tf.name_scope("train"): train_op =tf.train.GradientDescentOptimizer(learning_rate=0.03).minimize(loss) #train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss) correct_prediction = tf.equal(tf.arg_max(logits, 1), tf.arg_max(ys, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar("accuracy", tensor=accuracy) def fit(node,X_, y_, n, keep_prob,isTrain): init = tf.global_variables_initializer() #feed_dict_train = {ys:y[:,:], xs: X, keep_prob_s: keep_prob} feed_dict_train = {xs: X_,ys: y_,keep_prob_s: keep_prob} with tf.Session() as sess: if isTrain: saver = tf.train.Saver(tf.global_variables(), max_to_keep=15) # 最大儲存的N個Checkpoints檔案 merged = tf.summary.merge_all() writer = tf.summary.FileWriter(logdir="nn_huanping_log", graph=sess.graph) # 寫tensorbord sess.run(init) for i in range(n): _loss, _ = sess.run([loss, train_op], feed_dict=feed_dict_train) if i % 100 == 0: print("epoch:%d/tloss:%.5f " % (i, _loss)) acc = sess.run(accuracy, feed_dict=feed_dict_train) print(acc) rs = sess.run(merged, feed_dict=feed_dict_train) writer.add_summary(summary=rs, global_step=i) # 寫tensorbord saver.save(sess=sess, save_path="model/nn_huanping.model", global_step=i) # 儲存模型 else: ckpt = tf.train.get_checkpoint_state("model/") if ckpt and ckpt.model_checkpoint_path: saver = tf.train.Saver() saver.restore(sess, ckpt.model_checkpoint_path) #print(sess.run(Weights)) # 輸出訓練模型儲存的權重和偏置量 #print(sess.run(bias)) pred_test, acc = sess.run([predicted_labels, accuracy], feed_dict=feed_dict_train) #pred_test = sess.run([predicted_labels], feed_dict=feed_dict_train) #print("prediction:" ,pred_test,"accuracy:%f"%(acc)) #size=len(pred_test) print(acc) """ A=np.array([1, 1, 1]) B = np.array([2, 2, 2]) A = A[:, np.newaxis] #增加維度 B = B[:, np.newaxis] print(A.shape) print(B.shape) print(nodeid1.shape) print(nodeid2.shape) """ for i in range(0,len(pred_test)): result.append((node[i,0],node[i,1],X_[i,0],X_[i,1],X_[i,2],pred_test[i],y_test[i])) print(result) writer_csv.writerows(result) #print(nodeid1[i], nodeid2[i], pred_test[i]) #print(pred_test) #result = np.concatenate((A,B), axis=1)   #縱向排列 #print(result) #print( y_test,acc) """預測輸出10個label sample_indexes = random.sample(range(len(y_test)), 10) X_test_min = [X_test[i] for i in sample_indexes] y_test_min = [y_test[i] for i in sample_indexes] # Run the "predicted_labels" op. #predicted = sess.run(predicted_labels, feed_dict={ys: y_test_min, xs: X_test_min, keep_prob_s: 1.0}) predicted = sess.run(predicted_labels, feed_dict={xs: X_test_min,keep_prob_s:0.8}) print(y_test_min) print(predicted) """ #fit(X_train, y_train,10000, 0.5, True) #訓練集 fit(nodeid_test,X_dataset,y_test_,10000, 1.0, False)  #驗證集 #用histogram 來追著 weight和 bias 每一個值都是新增追著 summuary_.....

果然認真過一遍思路,還是自己心太大,神經網路的來logits輸入竟然放了l1,之前納悶為什麼訓練集的準確率那麼高,而且驗證集的也那麼高,然而對比預測的label和真實的label,發現自己的一個很大的bug,輸出的nodeid跟label對不上,才導致以為效果差,心大了心大了。
心大的人應該不適合當程式猿吧,,,啊哈哈
其實也沒改亮,還是寫得亂七八糟的,沒有註釋自己的都可能看不懂了
太隨意,壞習慣