lightgbm示例程式碼——機器學習
阿新 • • 發佈:2018-11-07
import sklearn as sk import gc import pandas as pd import numpy as np import lightgbm as lgb df_train = pd.read_csv('df_train.csv') y_train = df_train['action_type'] x_train = df_train.drop(['date','newdate','action_type'],axis=1) #del df_train gc.collect() split=-10000 x_valid=x_train[split:] y_valid=y_train[split:] x_train = x_train[:split] y_train = y_train[:split] del df_train gc.collect() lgb_train = lgb.Dataset(x_train,y_train) lgb_valid = lgb.Dataset(x_valid,y_valid,reference=lgb_train) gc.collect() params = { 'task':'train', 'boosting_type':'gbdt', 'objective':'binary', 'metric':{'12','auc','binary_logloss'}, 'num_leaves':31, 'num_trees':100, 'learning_rate':0.05, 'feature_fraction':0.9, 'bagging_fraction':0.8, 'bagging_freq':5, 'verbose':0 } print 'start training...' #train gbm = lgb.train(params, lgb_train, num_boost_round=1, valid_sets=lgb_valid, early_stopping_rounds=50) print 'saving model...' gbm.save_model('model.txt',num_iteration=gbm.best_iteration) df_test = pd.read_csv('df_test.csv') gc.collect() df_test.head() ''' df_test.columns = ['meiyong','uid','spu_id','brand_id','cat_id'] del df_test['meiyong'] df_test.to_csv('df_test.csv',index=False) ''' #predict print 'start predicting' y_pred = gbm.predict(df_test,num_iteration=gbm.best_iteration) print(y_pred) jieguo = pd.DataFrame() jieguo['weights']=y_pred jieguo.to_csv('jieguo.csv',header=None,index=False) goods_table = pd.read_csv('goods_train.csv',header=None,delimiter='\t') x_test.columns = ['uid','spu_id','wuyong'] goods_table.columns = ['spu_id','brand_id','cat_id'] df_test = pd.merge(x_test,goods_table,on="spu_id") df_test.to_csv("df_test.csv") df_test['newhead']=93 df_test.head() x_train.head() jieguo.head()