1. 程式人生 > >lightgbm示例程式碼——機器學習

lightgbm示例程式碼——機器學習

import sklearn as sk
import gc
import pandas as pd
import numpy as np
import lightgbm as lgb
df_train = pd.read_csv('df_train.csv')
y_train = df_train['action_type']
x_train = df_train.drop(['date','newdate','action_type'],axis=1)
#del df_train
gc.collect()
split=-10000
x_valid=x_train[split:]
y_valid=y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
del df_train
gc.collect()
lgb_train = lgb.Dataset(x_train,y_train)
lgb_valid = lgb.Dataset(x_valid,y_valid,reference=lgb_train)
gc.collect()
params = {
    'task':'train',
    'boosting_type':'gbdt',
    'objective':'binary',
    'metric':{'12','auc','binary_logloss'},
    'num_leaves':31,
    'num_trees':100,
    'learning_rate':0.05,
    'feature_fraction':0.9,
    'bagging_fraction':0.8,
    'bagging_freq':5,
    'verbose':0
}
print 'start training...'
#train
gbm = lgb.train(params,
                lgb_train,
               num_boost_round=1,
               valid_sets=lgb_valid,
               early_stopping_rounds=50)
print 'saving model...'
gbm.save_model('model.txt',num_iteration=gbm.best_iteration)
df_test = pd.read_csv('df_test.csv')
gc.collect()
df_test.head()
'''
df_test.columns = ['meiyong','uid','spu_id','brand_id','cat_id']
del df_test['meiyong']
df_test.to_csv('df_test.csv',index=False)
'''
#predict
print 'start predicting'
y_pred = gbm.predict(df_test,num_iteration=gbm.best_iteration)
print(y_pred)

jieguo = pd.DataFrame()
jieguo['weights']=y_pred
jieguo.to_csv('jieguo.csv',header=None,index=False)
goods_table = pd.read_csv('goods_train.csv',header=None,delimiter='\t')
x_test.columns = ['uid','spu_id','wuyong']
goods_table.columns = ['spu_id','brand_id','cat_id']
df_test = pd.merge(x_test,goods_table,on="spu_id")
df_test.to_csv("df_test.csv")
df_test['newhead']=93
df_test.head()
x_train.head()
jieguo.head()