kaggle上泰坦尼克的年齡使用tensorflow進行隨機森林迴歸
阿新 • • 發佈:2018-12-31
kaggle上泰坦尼克的年齡轉換為int型,0~80歲一共81個標籤。使用隨機森林分類準確率為0.47,然後再用這個模型去獲得未知的年齡資料。
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensor_forest.python import tensor_forest
from tensorflow.python.ops import resources
path=r"E:\learn\pc_code\kaggle\Titanic\all"
data_train = pd.read_csv(path+'/'+'train.csv')
tf.reset_default_graph()
df=data_train
age_df=df[['Age','Fare','Parch','SibSp','Pclass']]
known_age = age_df[age_df.Age.notnull()].values #知道的做訓練集
unknown_age = age_df[age_df.Age.isnull()].values
test_x=known_age[:,1:]
test_y=known_age[:,0]
#param
num_steps=1000
batch_size= 100
num_classes=81
num_features=4
num_trees=10
max_nodes=100000
X=tf.placeholder(tf.float32,shape=[None,num_features])
Y=tf.placeholder(tf.float32,shape=[None])
hparams=tensor_forest.ForestHParams(num_classes=num_classes,
num_features=num_features,
num_trees= num_trees,
max_nodes=max_nodes).fill()
forest_graph=tensor_forest.RandomForestGraphs(params=hparams)
train_op=forest_graph.training_graph(X,Y)
loss_op=forest_graph.training_loss(X,Y)
infer_op, _, _=forest_graph.inference_graph(X)
correct_pre=tf.equal(tf.argmax(infer_op,1),tf.cast(Y,tf.int64))
accuracy_op = tf.reduce_mean(tf.cast(correct_pre,tf.float32))
init_vars=tf.group(tf.global_variables_initializer(),resources.initialize_resources(resources.shared_resources()))
sess=tf.Session()
sess.run(init_vars)
for i in range(num_steps):
permutation = np.random.permutation(test_y.shape[0])
batch_x=test_x[permutation,:][0:batch_size]
batch_y=test_y[permutation][0:batch_size]
_, l=sess.run([train_op,loss_op],feed_dict={X:batch_x,Y:batch_y})
if i%50==0 or i==1:
acc=sess.run(accuracy_op,feed_dict={X:test_x,Y:test_y})
print('Step: %i Loss: %f Accuracy: %f' %(i,l,acc))