1. 程式人生 > >機器學習歸一化(附Python實現原始碼)

機器學習歸一化(附Python實現原始碼)

# -*- coding: utf-8 -*-
import inspect
import math
import numpy as np
from sklearn import preprocessing


def max_min_normalization(data_list):
    """
    利用最大最小數將一組資料進行歸一化輸出
    x_new = (x - min) / (max - min)
    :param data_list:
    :return:
    """
    normalized_list = []
    max_min_interval = max(data_list) - min(data_list)
    for data in data_list:
        data = float(data)
        new_data = (data - min(data_list)) / max_min_interval
        normalized_list.append(round(new_data, 3))

    return normalized_list


def mean_normalization(data_list):
    """
    利用平均數將一組資料進行標準化輸出
    標準化的結果不一定是在0,1之間
    x_new = (x - mean) / (max - min)
    :param data_list:
    :return:
    """
    normalized_list = []
    mean = sum(data_list) / len(data_list)
    max_min_interval = max(data_list) - min(data_list)
    for data in data_list:
        data = float(data)
        new_data = (data - mean) / max_min_interval
        normalized_list.append(round(new_data, 3))

    return normalized_list


def zscores_normalization(data_list):
    """
    利用z-scores方法針對資料進行標準化
    :param data_list:
    :return:
    """
    normalized_list = []
    mean = sum(data_list, 0.0) / len(data_list)
    var_lst = []
    for data in data_list:
        var_lst.append((float(data) - mean) ** 2)
    std_value = math.sqrt(sum(var_lst) / len(var_lst))

    for data in data_list:
        normalized_list.append(round((data - mean) / std_value, 3))

    return normalized_list


def max_min_normalization_using_numpy(data_list):
    """
    用資料處理包numpy歸一化
    :param data_list:
    :return:
    """
    normalized_list = []
    max = np.max(data_list)
    min = np.min(data_list)
    for data in data_list:
        new_data = (float(data) - min) / (max - min)
        normalized_list.append(round(new_data, 3))

    return normalized_list


def zscores_normalization_using_numpy(data_list):
    """
    利用numpy中現有的方法計算標準差和平均數,然後用z-scores方法針對資料進行標準化
    :param data_list:
    :return:
    """
    normalized_list = []
    mean = np.mean(data_list)
    std = np.std(data_list)
    for data in data_list:
        normalized_list.append(round((data - mean) / std, 3))
    return normalized_list


def normalize_data_using_sk(data_list):
    """
    利用sklearn學習庫自帶的歸一方法實現
    :param data_list:
    :return:
    """
    data_array = np.asarray(data_list, 'float').reshape(1, -1)
    new_data = preprocessing.minmax_scale(data_array, axis=1)
    return np.round(new_data, 3)[0, :]


if __name__ == '__main__':
    data_list = np.random.randint(1, 20, 10)
    data = globals().copy()
    for key in data:
        if inspect.isfunction(data[key]):
            res = data[key](data_list)
            print '%s:\n%s' % (key, res)

執行結果:

zscores_normalization_using_numpy:
[-1.528, 1.382, -0.255, 1.564, -0.073, 0.291, 0.837, -1.346, -0.8, -0.073]
max_min_normalization:
[0.0, 0.941, 0.412, 1.0, 0.471, 0.588, 0.765, 0.059, 0.235, 0.471]
normalize_data_using_sk:
[0.    0.941 0.412 1.    0.471 0.588 0.765 0.059 0.235 0.471]
max_min_normalization_using_numpy:
[0.0, 0.941, 0.412, 1.0, 0.471, 0.588, 0.765, 0.059, 0.235, 0.471]
mean_normalization:
[-0.471, 0.471, -0.059, 0.529, 0.0, 0.118, 0.294, -0.412, -0.235, 0.0]
zscores_normalization:
[-1.528, 1.382, -0.255, 1.564, -0.073, 0.291, 0.837, -1.346, -0.8, -0.073]