1. 程式人生 > 其它 >心跳訊號分類預測_baseline_v2:更換單模型(CNN)

心跳訊號分類預測_baseline_v2:更換單模型(CNN)

baseline_v2_changeModel(cnn):score:267.2897

# 2021.05.08
# lightgbm 模型更換成CNN 模型
# 本文原創 望贊鼓勵,轉載請說明出處.
仍存疑問:
dense層的作用,需要幾層
如何來構造CNN
import os
import gc
import math

import pandas as pd
import numpy as np

import lightgbm as lgb
#import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.
linear_model import SGDRegressor, LinearRegression, Ridge from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import StratifiedKFold, KFold from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder from
tqdm import tqdm import matplotlib.pyplot as plt import time import warnings warnings.filterwarnings('ignore')
train = pd.read_csv('train.csv')
test=pd.read_csv('testA.csv')
train.head()

idheartbeat_signalslabel
000.9912297987616655,0.9435330436439665,0.764677...0.0
110.9714822034884503,0.9289687459588268,0.572932...0.0
221.0,0.9591487564065292,0.7013782792997189,0.23...2.0
330.9757952826275774,0.9340884687738161,0.659636...0.0
440.0,0.055816398940721094,0.26129357194994196,0...2.0

def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

# 簡單預處理
train_list = []

for items in train.values:
    train_list.append([items[0]] + [float(i) for i in items[1].split(',')] + [items[2]])

train = pd.DataFrame(np.array(train_list))
train.columns = ['id'] + ['s_'+str(i) for i in range(len(train_list[0])-2)] + ['label']
train = reduce_mem_usage(train)

test_list=[]
for items in test.values:
    test_list.append([items[0]] + [float(i) for i in items[1].split(',')])

test = pd.DataFrame(np.array(test_list))
test.columns = ['id'] + ['s_'+str(i) for i in range(len(test_list[0])-1)]
test = reduce_mem_usage(test)

Memory usage of dataframe is 157.93 MB
Memory usage after optimization is: 39.67 MB
Decreased by 74.9%
Memory usage of dataframe is 31.43 MB
Memory usage after optimization is: 7.90 MB
Decreased by 74.9%
test

ids_0s_1s_2s_3s_4s_5s_6s_7s_8...s_195s_196s_197s_198s_199s_200s_201s_202s_203s_204
0100000.00.9916991.0000000.6318360.1362300.0414120.1027220.1208500.1234130.107910...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
1100001.00.6074220.5415040.3405760.0000000.0906980.1649170.1950680.1688230.198853...0.3898930.3869630.3671880.3640140.3605960.3571780.3505860.3505860.3505860.36377
2100002.00.9750980.6708980.6865230.7084960.7187500.7167970.7207030.7016600.596680...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
3100003.00.9956050.9169920.5209960.0000000.2218020.4040530.4904790.5273440.518066...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
4100004.01.0000000.8881840.7456050.5317380.3803710.2246090.0911250.0576480.003914...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
..................................................................
19995119995.01.0000000.8330080.6342770.6391600.6240230.5981450.6137700.6240230.628906...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19996119996.01.0000000.8261720.4521480.0822140.0000000.1370850.2010500.1656490.158081...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19997119997.00.9516600.9165040.6674800.3520510.2553710.1973880.1735840.1419680.134521...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19998119998.00.9277340.6772460.2429200.0553590.1021120.0722660.0210110.0383000.048553...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
19999119999.00.6655270.5268550.5166020.3764650.4892580.4807130.4592290.4829100.469971...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000

20000 rows × 206 columns

 #刪除表中的某一行或者某一列更明智的方法是使用drop,它不改變原有的df中的資料,而是返回另一個dataframe來存放刪除後的資料。
#就是新開一個表 
#drop函式預設刪除行,列需要加axis = 1
x_train = train.drop(['id','label'], axis=1)
y_train = train['label']
x_test=test.drop(['id'], axis=1)

x_train
s_0s_1s_2s_3s_4s_5s_6s_7s_8s_9...s_195s_196s_197s_198s_199s_200s_201s_202s_203s_204
00.9912110.9433590.7646480.6186520.3796390.1907960.0402220.0260010.0317080.065552...0.00.00.00.00.00.00.00.00.00.0
10.9716800.9291990.5727540.1784670.1229860.1323240.0944210.0896000.0304870.040497...0.00.00.00.00.00.00.00.00.00.0
21.0000000.9589840.7011720.2318120.0000000.0806880.1284180.1875000.2807620.328369...0.00.00.00.00.00.00.00.00.00.0
30.9755860.9340820.6596680.2498780.2370610.2814940.2498780.2498780.2414550.230713...0.00.00.00.00.00.00.00.00.00.0
40.0000000.0558170.2612300.3598630.4331050.4536130.4990230.5429690.6166990.676758...0.00.00.00.00.00.00.00.00.00.0
..................................................................
999951.0000000.6777340.2224120.2570800.2047120.0546570.0261540.1181640.2448730.328857...0.00.00.00.00.00.00.00.00.00.0
999960.9267580.9062500.6372070.4150390.3747560.3825680.3588870.3413090.3364260.317139...0.00.00.00.00.00.00.00.00.00.0
999970.9257810.5874020.6333010.6323240.6391600.6142580.5991210.5175780.4038090.253174...0.00.00.00.00.00.00.00.00.00.0
999981.0000000.9946290.8295900.4582520.2641600.2402340.2137450.1893310.2038570.210815...0.00.00.00.00.00.00.00.00.00.0
999990.9257810.9165040.4042970.0000000.2629390.3854980.3610840.3327640.3398440.350586...0.00.00.00.00.00.00.00.00.00.0

100000 rows × 205 columns

x_test
s_0s_1s_2s_3s_4s_5s_6s_7s_8s_9...s_195s_196s_197s_198s_199s_200s_201s_202s_203s_204
00.9916991.0000000.6318360.1362300.0414120.1027220.1208500.1234130.1079100.110535...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
10.6074220.5415040.3405760.0000000.0906980.1649170.1950680.1688230.1988530.153564...0.3898930.3869630.3671880.3640140.3605960.3571780.3505860.3505860.3505860.36377
20.9750980.6708980.6865230.7084960.7187500.7167970.7207030.7016600.5966800.487061...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
30.9956050.9169920.5209960.0000000.2218020.4040530.4904790.5273440.5180660.545410...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
41.0000000.8881840.7456050.5317380.3803710.2246090.0911250.0576480.0039140.007820...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
..................................................................
199951.0000000.8330080.6342770.6391600.6240230.5981450.6137700.6240230.6289060.624023...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199961.0000000.8261720.4521480.0822140.0000000.1370850.2010500.1656490.1580810.165649...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199970.9516600.9165040.6674800.3520510.2553710.1973880.1735840.1419680.1345210.127075...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199980.9277340.6772460.2429200.0553590.1021120.0722660.0210110.0383000.0485530.017532...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000
199990.6655270.5268550.5166020.3764650.4892580.4807130.4592290.4829100.4699710.399170...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00000

20000 rows × 205 columns

y_train
0        0.0
1        0.0
2        2.0
3        0.0
4        2.0
        ... 
99995    0.0
99996    2.0
99997    3.0
99998    2.0
99999    0.0
Name: label, Length: 100000, dtype: float16
# from keras.utils.np_utils import to_categorical
# y_train = to_categorical(y_train)
# y_train
# loss函式
def abs_sum(y_pre,y_tru):
    y_pre=np.array(y_pre)
    y_tru=np.array(y_tru)
    loss=sum(sum(abs(y_pre-y_tru)))
    return loss
from keras.models import Sequential
from keras.layers import Dense # for fully connected layers dense will be used
from keras.layers import Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam

# avoid overfitting by normalizing the samples
from keras.layers.normalization import BatchNormalization
# cnn
def build_model():
    model = Sequential()
    

    #過濾器=神經元總數中的單位
    #Padding='相同',零填充,在輸入資料周圍新增零畫素
    model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same', input_shape = (205, 1))) #we pass individual values hence not 100000,187,1
    #:(none,205,64)
    
    # Normalization to avoid overfitting
    # 大概就是保持資料的敏感 ,加在全連線函式和激勵函式之間:https://www.bilibili.com/video/BV1Lx411j7GT?from=search&seid=5048435414489430319
    model.add(BatchNormalization())
    # #:(none,205,64)#只是把資料重初始化一下,也就是重新分佈了一下,為了使得這些數在下面的激勵函式中區分的更大
    
    # Pooling 
    model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))
    # :(none,103,64)# 因為strides是2 一下跳兩步 padding=“same" 所以變化前特徵總數不變 那麼輸出就是187/2=94

    model.add(Conv1D(filters = 64, kernel_size = 6, activation='relu', padding = 'same'))# (none, 103, 64)
    model.add(BatchNormalization())                                                      #:(none, 103, 64)
    model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))              #:(none, 52, 64)

    model.add(Conv1D( filters = 64, kernel_size = 6, activation='relu', padding = 'same'))#:(none, 52, 64)
    model.add(BatchNormalization())                                                       #:(none, 52, 64)
    model.add(MaxPooling1D(pool_size=(3), strides = (2), padding = 'same'))               #:(none, 26, 64)

    # Flatten 
    model.add(Flatten())
    #:(none, 1664) 這一步就是把多維的資料鋪平 26*64 = 1664

    # Fully connected layer
    # input layer
    #Dense(用來寫輸出層)
    #當輸入序列的長度固定時,該值為其長度。如果要在該層後接Flatten層,然後接Dense層,則必須指定該引數,否則Dense層的輸出維度無法自動推斷。
    #units:大於0的整數,代表該層的輸出維度。
    model.add(Dense(units = 64, activation='relu'))
    
    # Hidden Layer
    model.add(Dense(units = 64, activation='relu'))
    
    # Output Layer
    model.add(Dense(units = 4, activation='softmax'))

    # loss = 'categorical_crossentropy'
    model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model
model = build_model()
# This is for one sample, i.e. one row
model.summary()

Model: “sequential”
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 205, 64) 448
_________________________________________________________________
batch_normalization (BatchNo (None, 205, 64) 256
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 103, 64) 0
_________________________________________________________________
conv1d_1 (Conv1D) (None, 103, 64) 24640
_________________________________________________________________
batch_normalization_1 (Batch (None, 103, 64) 256
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 52, 64) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 52, 64) 24640
_________________________________________________________________
batch_normalization_2 (Batch (None, 52, 64) 256
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 26, 64) 0
_________________________________________________________________
flatten (Flatten) (None, 1664) 0
_________________________________________________________________
dense (Dense) (None, 64) 106560
_________________________________________________________________
dense_1 (Dense) (None, 64) 4160
_________________________________________________________________
dense_2 (Dense) (None, 4) 260
=================================================================
Total params: 161,476
Trainable params: 161,092
Non-trainable params: 384
_________________________________________________________________

from keras.utils.np_utils import to_categorical

def cv_model(clf, train_x, train_y, test_x, clf_name):
    folds = 5
    seed = 2021
    
    #k-交叉驗證KFold
    # n_split:要劃分的折數
    #shuffle: 每次都進行shuffle,測試集中折數的總和就是訓練集的個數
    #random_state:隨機狀態 總結:對於那些本質上是隨機的過程,我們有必要控制隨機的狀態,這樣才能重複的展現相同的結果。
    #如果,對隨機狀態不加控制,那麼實驗的結果就無法固定,而是隨機的顯現。比喻的說一下,也不知道準不準確。
    # 一個容器中放置一定量的沙子,每次用手去抓沙子的時候,抓取的結果會受到抓取的力度、一隻手抓還是兩隻手抓、手是乾的或溼的等諸多因素的影響(將影響因素定為A={a,b,c,d,e,f,……})。
    #固定random_state後,每次抓取沙子時的影響因素將被固定為具體的某一組,這樣每次抓取的沙子就具有了相同的狀態。

    
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
    #為預測做準備
    test = np.zeros((test_x.shape[0],4))
    test_x = test_x.iloc[:, :].values
    test_x = test_x.reshape(len(test_x),test_x.shape[1],1)    

    cv_scores = []
    

    onehot_encoder = OneHotEncoder(sparse=False)
    

    for i, (train_index, test_index) in enumerate(kf.split(x_train, y_train)):
        
        print('************************************ {} ************************************'.format(str(i+1)))
        
        x_kf_train, y_kf_train, x_kf_test, y_kf_test = x_train.iloc[train_index], y_train[train_index], x_train.iloc[test_index], y_train[test_index]

        
        if clf_name == "cnn":
            x_kf_train = x_kf_train.iloc[:, :].values
            x_kf_test = x_kf_test.iloc[:, :].values
            x_kf_train = x_kf_train.reshape(len(x_kf_train),x_kf_train.shape[1],1)
            x_kf_test = x_kf_test.reshape(len(x_kf_test),x_kf_test.shape[1],1)
            
            
            y_kf_train = to_categorical(y_kf_train)
            y_kf_test = to_categorical(y_kf_test)
            
            history = model.fit(x_kf_train,y_kf_train, epochs = 15, batch_size = 32, validation_data=(x_kf_test, y_kf_test))
            x_kf_test_pred = model.predict(x_kf_test)
            test_pred = model.predict(test_x)
            
    
      
            
        
        print("y_kf_test++++++:")  
        print(y_kf_test)

        print('預測的概率矩陣為test_pred:')
        print(test_pred)
        print("abs_sum++++++:")
        score=abs_sum(y_kf_test,  x_kf_test_pred)
        cv_scores.append(score)
        print("cv_scores+++++:")
        print(cv_scores)
        
#         test += test_pred
#         test=test/kf.n_splits
        # 將將要預測的帶到

            


    return test_pred

# folds = 5
# seed = 2021
# kf = KFold(n_splits=folds, shuffle=True, random_state=seed)

# # test = np.zeros((X_text.shape[0],4))
# cv_scores = []

# onehot_encoder = OneHotEncoder(sparse=False)

# for i, (train_index, valid_index) in enumerate(kf.split(x_train, y_train)):
        
#         print('************************************ {} ************************************'.format(str(i+1)))
        
#         # .iloc 提取的某一行,i取值就是【1,folds】,
#         # 其實就是去了四塊內容,訓練集(trn_)的x 訓練集的y 測試集(valid_)x 測試集y
#         trn_x, trn_y, val_x, val_y = x_train[train_index], y_train[train_index], x_train[valid_index], y_train[valid_index]
        
#         ohe = OneHotEncoder()
#         trn_y = ohe.fit_transform(trn_y.values.reshape(-1,1))
#         val_y = ohe.transform(val_y.values.reshape(-1,1))
#         trn_x =trn_x.values.reshape(len(trn_x),trn_x.values.shape[1],1)
#         val_x = val_x.values.reshape(len(val_x),val_x.values.shape[1],1)
        
#         history = model.fit(trn_x,trn_y, epochs = 15, batch_size = 32, validation_data=(val_x,  val_y))
#         model.evaluate(trn_x, trn_y)
def lgb_model(x_train, y_train, x_test):
    lgb_test = cv_model(lgb, x_train, y_train, x_test, "cnn")
    return lgb_test
lgb_test = lgb_model(x_train, y_train, x_test)

************************************ 1 ************************************
Epoch 1/15
2500/2500 [==============================] - 89s 35ms/step - loss: 0.1964 - accuracy: 0.9370 - val_loss: 0.1091 - val_accuracy: 0.9664
Epoch 2/15
2500/2500 [==============================] - 93s 37ms/step - loss: 0.0664 - accuracy: 0.9800 - val_loss: 0.0643 - val_accuracy: 0.9805
Epoch 3/15
2500/2500 [==============================] - 108s 43ms/step - loss: 0.0505 - accuracy: 0.9834 - val_loss: 0.0625 - val_accuracy: 0.9823
Epoch 4/15
2500/2500 [==============================] - 111s 44ms/step - loss: 0.0363 - accuracy: 0.9884 - val_loss: 0.0549 - val_accuracy: 0.9809
Epoch 5/15
2500/2500 [==============================] - 108s 43ms/step - loss: 0.0325 - accuracy: 0.9897 - val_loss: 0.0411 - val_accuracy: 0.9883
Epoch 6/15
2500/2500 [==============================] - 107s 43ms/step - loss: 0.0246 - accuracy: 0.9921 - val_loss: 0.0623 - val_accuracy: 0.9807
Epoch 7/15
2500/2500 [==============================] - 110s 44ms/step - loss: 0.0260 - accuracy: 0.9919 - val_loss: 0.0427 - val_accuracy: 0.9879
Epoch 8/15
2500/2500 [==============================] - 99s 40ms/step - loss: 0.0184 - accuracy: 0.9941 - val_loss: 0.0471 - val_accuracy: 0.9865
Epoch 9/15
2500/2500 [==============================] - 100s 40ms/step - loss: 0.0185 - accuracy: 0.9942 - val_loss: 0.0419 - val_accuracy: 0.9885
Epoch 10/15
2500/2500 [==============================] - 101s 40ms/step - loss: 0.0173 - accuracy: 0.9943 - val_loss: 0.0379 - val_accuracy: 0.9901
Epoch 11/15
2500/2500 [==============================] - 102s 41ms/step - loss: 0.0141 - accuracy: 0.9958 - val_loss: 0.0404 - val_accuracy: 0.9900
Epoch 12/15
2500/2500 [==============================] - 99s 40ms/step - loss: 0.0127 - accuracy: 0.9960 - val_loss: 0.0385 - val_accuracy: 0.9900
Epoch 13/15
2500/2500 [==============================] - 96s 39ms/step - loss: 0.0128 - accuracy: 0.9957 - val_loss: 0.0440 - val_accuracy: 0.9908
Epoch 14/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0111 - accuracy: 0.9962 - val_loss: 0.0438 - val_accuracy: 0.9902
Epoch 15/15
2500/2500 [==============================] - 100s 40ms/step - loss: 0.0098 - accuracy: 0.9967 - val_loss: 0.0379 - val_accuracy: 0.9907
y_kf_test++++++:
[[0. 0. 1. 0.]
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 ...
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]]
預測的概率矩陣為test_pred:
[[9.99999881e-01 1.23368892e-07 6.21502979e-12 4.32745534e-10]
 [7.47138074e-06 1.64497091e-04 9.99590218e-01 2.37837623e-04]
 [1.05034046e-11 1.90382871e-16 1.06615223e-08 1.00000000e+00]
 ...
 [1.77340873e-03 1.38662233e-06 9.98224914e-01 2.29101516e-07]
 [9.99994159e-01 5.82347275e-06 2.35584338e-12 1.23620975e-10]
 [9.99747932e-01 1.21477584e-04 2.38122061e-06 1.28281055e-04]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246]
************************************ 2 ************************************
Epoch 1/15
2500/2500 [==============================] - 93s 37ms/step - loss: 0.0187 - accuracy: 0.9948 - val_loss: 0.0102 - val_accuracy: 0.9966
Epoch 2/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0122 - accuracy: 0.9961 - val_loss: 0.0097 - val_accuracy: 0.9966
Epoch 3/15
2500/2500 [==============================] - 97s 39ms/step - loss: 0.0122 - accuracy: 0.9958 - val_loss: 0.0206 - val_accuracy: 0.9940
Epoch 4/15
2500/2500 [==============================] - 91s 37ms/step - loss: 0.0116 - accuracy: 0.9963 - val_loss: 0.0133 - val_accuracy: 0.9960
Epoch 5/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0098 - accuracy: 0.9969 - val_loss: 0.0182 - val_accuracy: 0.9953
Epoch 6/15
2500/2500 [==============================] - 96s 39ms/step - loss: 0.0087 - accuracy: 0.9971 - val_loss: 0.0145 - val_accuracy: 0.9952
Epoch 7/15
2500/2500 [==============================] - 95s 38ms/step - loss: 0.0074 - accuracy: 0.9978 - val_loss: 0.0347 - val_accuracy: 0.9913
Epoch 8/15
2500/2500 [==============================] - 103s 41ms/step - loss: 0.0088 - accuracy: 0.9972 - val_loss: 0.0179 - val_accuracy: 0.9956
Epoch 9/15
2500/2500 [==============================] - 90s 36ms/step - loss: 0.0082 - accuracy: 0.9976 - val_loss: 0.0256 - val_accuracy: 0.9941
Epoch 10/15
2500/2500 [==============================] - 104s 41ms/step - loss: 0.0073 - accuracy: 0.9979 - val_loss: 0.0212 - val_accuracy: 0.9944
Epoch 11/15
2500/2500 [==============================] - 100s 40ms/step - loss: 0.0067 - accuracy: 0.9980 - val_loss: 0.0263 - val_accuracy: 0.9926
Epoch 12/15
2500/2500 [==============================] - 87s 35ms/step - loss: 0.0073 - accuracy: 0.9977 - val_loss: 0.0159 - val_accuracy: 0.9960
Epoch 13/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0069 - accuracy: 0.9981 - val_loss: 0.0376 - val_accuracy: 0.9902
Epoch 14/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0060 - accuracy: 0.9982 - val_loss: 0.0246 - val_accuracy: 0.9942
Epoch 15/15
2500/2500 [==============================] - 82s 33ms/step - loss: 0.0060 - accuracy: 0.9981 - val_loss: 0.0292 - val_accuracy: 0.9940
y_kf_test++++++:
[[0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 ...
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]]
預測的概率矩陣為test_pred:
[[1.0000000e+00 3.1578247e-11 8.9162606e-17 9.0605463e-16]
 [7.5351809e-26 4.6311908e-30 1.0000000e+00 2.6168691e-38]
 [1.3659213e-14 1.1359105e-21 7.8721543e-11 1.0000000e+00]
 ...
 [1.6125210e-04 1.5620843e-05 9.9982315e-01 1.6944726e-10]
 [1.0000000e+00 6.7933081e-10 7.0405877e-13 2.2816355e-14]
 [9.9999905e-01 4.4103444e-07 9.4723184e-08 4.0850134e-07]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701]
************************************ 3 ************************************
Epoch 1/15
2500/2500 [==============================] - 86s 34ms/step - loss: 0.0106 - accuracy: 0.9969 - val_loss: 0.0042 - val_accuracy: 0.9987
Epoch 2/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0067 - accuracy: 0.9981 - val_loss: 0.0051 - val_accuracy: 0.9985
Epoch 3/15
2500/2500 [==============================] - 88s 35ms/step - loss: 0.0068 - accuracy: 0.9979 - val_loss: 0.0072 - val_accuracy: 0.9981
Epoch 4/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0053 - accuracy: 0.9984 - val_loss: 0.0107 - val_accuracy: 0.9962
Epoch 5/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0068 - accuracy: 0.9980 - val_loss: 0.0065 - val_accuracy: 0.9977
Epoch 6/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0057 - accuracy: 0.9981 - val_loss: 0.0177 - val_accuracy: 0.9950
Epoch 7/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0058 - accuracy: 0.9984 - val_loss: 0.0072 - val_accuracy: 0.9980
Epoch 8/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0056 - accuracy: 0.9984 - val_loss: 0.0077 - val_accuracy: 0.9977
Epoch 9/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0040 - accuracy: 0.9986 - val_loss: 0.0161 - val_accuracy: 0.9967
Epoch 10/15
2500/2500 [==============================] - 86s 34ms/step - loss: 0.0059 - accuracy: 0.9983 - val_loss: 0.0135 - val_accuracy: 0.9963
Epoch 11/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0057 - accuracy: 0.9984 - val_loss: 0.0137 - val_accuracy: 0.9967
Epoch 12/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0040 - accuracy: 0.9989 - val_loss: 0.0100 - val_accuracy: 0.9974
Epoch 13/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0052 - accuracy: 0.9985 - val_loss: 0.0145 - val_accuracy: 0.9966
Epoch 14/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0044 - accuracy: 0.9987 - val_loss: 0.0199 - val_accuracy: 0.9956
Epoch 15/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0053 - accuracy: 0.9987 - val_loss: 0.0189 - val_accuracy: 0.9955
y_kf_test++++++:
[[1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]]
預測的概率矩陣為test_pred:
[[1.0000000e+00 3.4608899e-11 6.7611266e-16 1.6289031e-16]
 [3.2447400e-17 3.6295522e-13 1.0000000e+00 1.7152423e-38]
 [2.6966924e-25 2.5888265e-33 4.9505679e-23 1.0000000e+00]
 ...
 [2.8026802e-04 1.5653444e-05 9.9970406e-01 6.4556320e-09]
 [9.9999952e-01 4.2466991e-07 4.3031992e-13 6.4092606e-11]
 [9.9703240e-01 2.8589300e-03 1.0849427e-04 6.7704690e-08]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701, 216.68724060058594]
************************************ 4 ************************************
Epoch 1/15
2500/2500 [==============================] - 82s 33ms/step - loss: 0.0065 - accuracy: 0.9981 - val_loss: 0.0026 - val_accuracy: 0.9990
Epoch 2/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0050 - accuracy: 0.9986 - val_loss: 0.0045 - val_accuracy: 0.9987
Epoch 3/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0044 - accuracy: 0.9987 - val_loss: 0.0028 - val_accuracy: 0.9989
Epoch 4/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0053 - accuracy: 0.9984 - val_loss: 0.0108 - val_accuracy: 0.9976
Epoch 5/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0043 - accuracy: 0.9989 - val_loss: 0.0116 - val_accuracy: 0.9972
Epoch 6/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0031 - accuracy: 0.9990 - val_loss: 0.0087 - val_accuracy: 0.9975
Epoch 7/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0030 - accuracy: 0.9990 - val_loss: 0.0230 - val_accuracy: 0.9944
Epoch 8/15
2500/2500 [==============================] - 82s 33ms/step - loss: 0.0052 - accuracy: 0.9987 - val_loss: 0.0126 - val_accuracy: 0.9963
Epoch 9/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0049 - accuracy: 0.9987 - val_loss: 0.0222 - val_accuracy: 0.9937
Epoch 10/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0036 - accuracy: 0.9990 - val_loss: 0.0134 - val_accuracy: 0.9966
Epoch 11/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0032 - accuracy: 0.9990 - val_loss: 0.0124 - val_accuracy: 0.9967
Epoch 12/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0046 - accuracy: 0.9989 - val_loss: 0.0068 - val_accuracy: 0.9976
Epoch 13/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0043 - accuracy: 0.9989 - val_loss: 0.0149 - val_accuracy: 0.9965
Epoch 14/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0033 - accuracy: 0.9990 - val_loss: 0.0086 - val_accuracy: 0.9979
Epoch 15/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0027 - accuracy: 0.9992 - val_loss: 0.0172 - val_accuracy: 0.9952
y_kf_test++++++:
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]]
預測的概率矩陣為test_pred:
[[1.0000000e+00 1.0942575e-12 3.9219293e-17 9.0291727e-18]
 [1.8889039e-13 1.7051572e-07 9.9999988e-01 8.8278994e-23]
 [1.4202803e-20 5.8440978e-34 1.3529702e-17 1.0000000e+00]
 ...
 [3.3775454e-05 2.1584659e-04 9.9975032e-01 4.9212700e-12]
 [9.9991751e-01 6.5086162e-05 2.6754990e-06 1.4732053e-05]
 [9.9999952e-01 8.1493896e-09 4.8628596e-07 3.6478176e-09]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701, 216.68724060058594, 255.52966451644897]
************************************ 5 ************************************
Epoch 1/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0068 - accuracy: 0.9983 - val_loss: 0.0020 - val_accuracy: 0.9994
Epoch 2/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0041 - accuracy: 0.9990 - val_loss: 0.0135 - val_accuracy: 0.9972
Epoch 3/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0033 - accuracy: 0.9990 - val_loss: 0.0023 - val_accuracy: 0.9995
Epoch 4/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0044 - accuracy: 0.9987 - val_loss: 0.0044 - val_accuracy: 0.9987
Epoch 5/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0037 - accuracy: 0.9990 - val_loss: 0.0044 - val_accuracy: 0.9989
Epoch 6/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0033 - accuracy: 0.9991 - val_loss: 0.0128 - val_accuracy: 0.9973
Epoch 7/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0042 - accuracy: 0.9989 - val_loss: 0.0045 - val_accuracy: 0.9988
Epoch 8/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0041 - accuracy: 0.9990 - val_loss: 0.0029 - val_accuracy: 0.9988
Epoch 9/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0035 - accuracy: 0.9991 - val_loss: 0.0089 - val_accuracy: 0.9969
Epoch 10/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0032 - accuracy: 0.9991 - val_loss: 0.0076 - val_accuracy: 0.9974
Epoch 11/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0031 - accuracy: 0.9992 - val_loss: 0.0062 - val_accuracy: 0.9981
Epoch 12/15
2500/2500 [==============================] - 84s 33ms/step - loss: 0.0041 - accuracy: 0.9988 - val_loss: 0.0058 - val_accuracy: 0.9981
Epoch 13/15
2500/2500 [==============================] - 84s 34ms/step - loss: 0.0043 - accuracy: 0.9989 - val_loss: 0.0087 - val_accuracy: 0.9975
Epoch 14/15
2500/2500 [==============================] - 85s 34ms/step - loss: 0.0030 - accuracy: 0.9992 - val_loss: 0.0033 - val_accuracy: 0.9990
Epoch 15/15
2500/2500 [==============================] - 83s 33ms/step - loss: 0.0028 - accuracy: 0.9992 - val_loss: 0.0074 - val_accuracy: 0.9981
y_kf_test++++++:
[[0. 0. 1. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 ...
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]]
預測的概率矩陣為test_pred:
[[1.0000000e+00 2.9956503e-13 1.9854391e-16 2.4101917e-17]
 [7.2484188e-19 1.7757707e-14 1.0000000e+00 0.0000000e+00]
 [2.4454344e-29 0.0000000e+00 1.4663728e-33 1.0000000e+00]
 ...
 [8.1580965e-06 1.2090248e-04 9.9987090e-01 1.8680077e-12]
 [1.0000000e+00 4.4534781e-10 1.6605388e-13 7.2103205e-15]
 [1.0000000e+00 1.0435092e-11 6.3772593e-13 2.8722074e-14]]
abs_sum++++++:
cv_scores+++++:
[473.23671531677246, 290.0407085418701, 216.68724060058594, 255.52966451644897, 83.4570825099945]
temp=pd.DataFrame(lgb_test)
result=pd.read_csv('sample_submit.csv')
result['label_0']=temp[0]
result['label_1']=temp[1]
result['label_2']=temp[2]
result['label_3']=temp[3]
result.to_csv('submit_baseline_v2.3.csv',index=False)



submit_data=pd.read_csv('submit_baseline_v2.3.csv')
submit_data
idlabel_0label_1label_2label_3
01000001.000000e+002.995650e-131.985439e-162.410192e-17
11000017.248419e-191.775771e-141.000000e+000.000000e+00
21000022.445434e-290.000000e+001.466373e-331.000000e+00
31000031.000000e+001.637765e-212.459309e-211.862687e-24
41000041.000000e+003.080988e-106.932140e-155.327876e-19
..................
199951199959.999999e-016.929825e-082.932834e-084.951478e-10
199961199961.000000e+004.902514e-082.717561e-086.707961e-12
199971199978.158096e-061.209025e-049.998709e-011.868008e-12
199981199981.000000e+004.453478e-101.660539e-137.210321e-15
199991199991.000000e+001.043509e-116.377259e-132.872207e-14

20000 rows × 5 columns

for index,row in submit_data.iterrows():
    row_max = max(list(row)[1:])
    if row_max > 0.9:
        for i in range(1,5):
            if row[i]>0.9:
                submit_data.iloc[index,i] = 1
            else:
                submit_data.iloc[index,i] = 0
submit_data

idlabel_0label_1label_2label_3
01000001.00.00.00.0
11000010.00.01.00.0
21000020.00.00.01.0
31000031.00.00.00.0
41000041.00.00.00.0
..................
199951199951.00.00.00.0
199961199961.00.00.00.0
199971199970.00.01.00.0
199981199981.00.00.00.0
199991199991.00.00.00.0

20000 rows × 5 columns

submit_data.to_csv('submit_baseline_v2.3.1.csv',index=False)

在這裡插入圖片描述