1. 程式人生 > >LabelEncoder標籤編碼

LabelEncoder標籤編碼

作用: 利用LabelEncoder() 將轉換成連續的數值型變數。即是對不連續的數字或者文字進行編號。

import pandas as pd
#先建立一個數據框(包含缺失值)
df = pd.DataFrame({'auth':['spring','summer','fall','spring'],
                   'sply':['a','c','a','b'],
                   'name':['zhangsan','lisi','xiaohua','xiaomei']})
df
Out[124]: 
     auth sply      name
0  spring    a  zhangsan
1  summer    c      lisi
2    fall    a   xiaohua
3  spring    b   xiaomei

categorical_name = ['auth','sply','name']

#定義一個迴圈函式,處理分型別特徵,進行標籤編碼
def categorical_preprocessing(dataset,categorical_feature):
    '''
    param:
        dataset:DataFrame,輸入的資料集
        categorical_feature:list,分類特徵列名
    '''
    for feature in categorical_feature:
        set_feature = set(dataset[feature])#將特徵對映到集合中
        dic_feature = {}
        for i ,feat in enumerate(set_feature):
            dic_feature[feat] = i
        dataset[feature] = dataset[feature].map(dic_feature)
    return dataset

#處理分類特徵編碼
dataset = categorical_preprocessing(df,categorical_name)
dataset
Out[122]: 
   auth  sply  name
0     1     0     1
1     0     1     3
2     2     0     0
3     1     2     2