10分鐘看明白大M法和兩階段法

阿新 • • 發佈：2020-10-17

Numpy 基礎

1. Numpy 安裝

pip install numpy
pip install numpy -i https://pypi.tuna.tsinghua.edu.cn/simple

2. Numpy 屬性

import numpy as np

array = np.array([[1, 2, 3], [4, 5, 6]])

print(array)
# 獲取維度
print("number of dim:", array.ndim)
# 獲取行數和列數
print("shape:", array.shape)
# 獲取元素個數
print("size:", array.size)

 
"""
執行結果：
[[1 2 3]
 [4 5 6]]
number of dim: 2
shape: (2, 3)
size: 6
"""

3. Numpy建立array

import numpy as np

t1 = np.array([1, 2, 3, 4])
print(t1)

t2 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(t2)

t3 = np.zeros((3, 4))
print(t3)

t4 = np.ones((3, 4), dtype=np.int16)
print(t4)

t5 = np.empty((3, 4))
 
print(t5)

t6 = np.arange(10, 20, 2)
print(t6)

t7 = np.arange(12).reshape(3, 4)
print(t7)

t8 = np.linspace(1, 10, 6).reshape(2, 3)
print(t8)


"""
執行結果：
[1 2 3 4]
[[1 2 3 4]
 [5 6 7 8]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[10 12 14 16 18]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 1.   2.8  4.6]
 [ 6.4  8.2 10. ]]
 
"""

4. Numpy的基礎運算

4.1Numpy 資料型別

型別	型別程式碼	說明
int8, uint8	i1, u1	有符號和無符號的8位（1位元組）整形
int16, uint16	i2, u2	有符號和無符號的16位（2位元組）整形
int32, uint32	i4, u4	有符號和無符號的32位（4位元組）整形
int64, uint64	i8, u8	有符號和無符號的64位（8位元組）整形
float16	f2	半精度浮點數
float32	f4或f	標準的點季度浮點數，與C的float相容
float64	f8或d	標準的點季度浮點數，與C的double和Python的float物件相容
float128	f16或g	擴充套件精度浮點數
complex64,	c8	32位浮點數標識的複數
complex128	c16	64位浮點數標識的複數
complex256	c32	128位浮點數標識的複數
boll	?	儲存True和Flase值得布林型別

4.2numpy 資料型別操作

import numpy as np

# 指定資料型別
t_1 = np.array(range(1, 5), dtype=float)
print(t_1, type(t_1), t_1.dtype)

t_2 = np.array(range(1, 5), dtype='i1')
print(t_2, type(t_2), t_2.dtype)

t_3 = np.array([1, 0, 1, 1, 0, 0], dtype=bool)
print(t_3, type(t_3), t_3.dtype)

# 修改資料型別
t_4 = t_3.astype('int8')
print(t_4, type(t_4), t_4.dtype)

"""
執行結果：
[1. 2. 3. 4.] <class 'numpy.ndarray'> float64
[1 2 3 4] <class 'numpy.ndarray'> int8
[ True False  True  True False False] <class 'numpy.ndarray'> bool
[1 0 1 1 0 0] <class 'numpy.ndarray'> int8
"""

4.3修改浮點型的小數位數

import numpy as np
from random import random

t_1 = np.array([random() for i in range(10)], dtype=float)
print(t_1, type(t_1), t_1.dtype)

# 保留兩位小數
t_2 = np.round(t_1, 2)
print(t_2)

"""
執行結果：
[0.44010709 0.57982965 0.87300702 0.39740862 0.30596835 0.08421772 0.58618834 0.61866253 0.41368359 0.32946455] <class 'numpy.ndarray'> float64
[0.44 0.58 0.87 0.4  0.31 0.08 0.59 0.62 0.41 0.33]
"""

4.4 例項

demo_01

import numpy as np

t1 = np.array([10, 20, 30, 40])
t2 = np.arange(4)

print("原始的元素: ", t1, t2)

# 對應位置的元素進行相減
print("對應位置的元素進行相減: ", t1 - t2)

# 對應位置的元素進行相加
print("對應位置的元素進行相加: ", t1 + t2)

# 對應位置的元素進行相乘
print("對應位置的元素進行相乘: ", t1 * t2)

# 輸出t1的平方
print("t1的平方: ", t1 ** 2)

# 布林索引
print("布林索引: ", t2 < 3)


"""
執行結果：
原始的元素:  [10 20 30 40] [0 1 2 3]
對應位置的元素進行相減:  [10 19 28 37]
對應位置的元素進行相加:  [10 21 32 43]
對應位置的元素進行相乘:  [  0  20  60 120]
t1的平方:  [ 100  400  900 1600]
布林索引:  [ True  True  True False]
"""

demo_02

import numpy as np

t1 = np.array([[1, 1], [0, 1]])
t2 = np.arange(4).reshape(2, 2)

print("原始的元素t1: ", t1)
print("原始的元素t2: ", t2)

# 對應位置的元素進行相乘
print("對應位置的元素進行相乘: ", t1 * t2)

# 矩陣乘法
print("矩陣乘法方式一: ", np.dot(t1, t2))
print("矩陣乘法方式二: ", t1.dot(t2))

"""
執行結果：
原始的元素t1:  [[1 1]
 [0 1]]
原始的元素t2:  [[0 1]
 [2 3]]
對應位置的元素進行相乘:  [[0 1]
 [0 3]]
矩陣乘法方式一:  [[2 4]
 [2 3]]
矩陣乘法方式二:  [[2 4]
 [2 3]]

"""

demo_03

import numpy as np

t1 = np.random.random((2, 4))

print(t1)
print("求和", np.sum(t1))
print("求最小值", np.min(t1))
print("求最大值", np.max(t1))

# 自定義維度
print("自定義維度求和", np.sum(t1, axis=1))
print("自定義維度求最小值", np.min(t1, axis=0))
print("自定義維度求最大值", np.max(t1, axis=1))

"""
執行結果：
[[0.71728488 0.18311745 0.78101771 0.44276308]
 [0.59118476 0.11805874 0.49797704 0.14829068]]
求和 3.479694341207028
求最小值 0.1180587424542946
求最大值 0.7810177064485218
自定義維度求和 [2.12418312 1.35551122]
自定義維度求最小值 [0.59118476 0.11805874 0.49797704 0.14829068]
自定義維度求最大值 [0.78101771 0.59118476]
"""

demo_04

import numpy as np

t1 = np.arange(2, 14).reshape((3, 4))
print(t1)

# 獲取最小值的索引
print("獲取最小值的索引:\n ", np.argmin(t1))

# 獲取最大值的索引
print("獲取最大值的索引:\n ", np.argmax(t1))

# 獲取平均值
print("獲取平均值:\n ", np.mean(t1))
print("獲取平均值:\n ", np.average(t1))

# 獲取中位數
print("獲取中位數:\n ", np.median(t1))

# 逐步累加
print("逐步累加:\n ", np.cumsum(t1))

# 每兩個數之間的差
print("每兩個數之間的差:\n ", np.diff(t1))

# 找出非0的數
print("找出非0的數:\n ", np.nonzero(t1))

# 排序
print("排序:\n ", np.sort(t1))

# 將行變成列，將列變成行
print("將行變成列，將列變成行:\n ", np.transpose(t1))
print("將行變成列，將列變成行:\n ", t1.T)

# 將矩陣中大於9的數該為9，將小於5的數改為5
print("將矩陣中大於9的數該為9，將小於5的數改為5:\n ", np.clip(t1, 5, 9))


"""
執行結果：
[[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]
獲取最小值的索引:
  0
獲取最大值的索引:
  11
獲取平均值:
  7.5
獲取平均值:
  7.5
獲取中位數:
  7.5
逐步累加:
  [ 2  5  9 14 20 27 35 44 54 65 77 90]
每兩個數之間的差:
  [[1 1 1]
 [1 1 1]
 [1 1 1]]
找出非0的數:
  (array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], dtype=int64), array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], dtype=int64))
排序:
  [[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]
將行變成列，將列變成行:
  [[ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]
 [ 5  9 13]]
將行變成列，將列變成行:
  [[ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]
 [ 5  9 13]]
將矩陣中大於9的數該為9，將小於5的數改為5:
  [[5 5 5 5]
 [6 7 8 9]
 [9 9 9 9]]
"""

4.5 Numpy 中的轉置

import numpy as np

t1 = np.arange(24).reshape((4, 6))
print(t1)
print('*' * 50)

# 轉置方式1（講原始的行變成列，將列變成行）
print(t1.transpose())
print('*' * 50)

# 轉置方式2
print(t1.T)
print('*' * 50)

# 轉置方式3
print(t1.swapaxes(1,0))

"""
執行結果：
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
**************************************************
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]
**************************************************
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]
**************************************************
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]
"""

5. Numpy索引

import numpy as np

# 一維陣列
t1 = np.arange(3, 15)
print(t1)
print(t1[3])

# 二維陣列
t2 = np.arange(3, 15).reshape(3, 4)
print(t2)
print("獲取第2行第2列的元素: ", t2[1][1])
print("獲取第3行第3列的元素: ", t2[2][2])
print("獲取第3行第2列的元素: ", t2[2, 1])
print("獲取第3行的所有元素: ", t2[2, :])
print("獲取第2列的所有元素: ", t2[:, 1])
print("獲取第1行的第2列到第3列的元素: ", t2[1, 1:3])


# 迴圈
print(t2.flatten())
for item in t2.flat:
    print(item)

"""
執行結果：
[ 3  4  5  6  7  8  9 10 11 12 13 14]
6
[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]
獲取第2行第2列的元素:  8
獲取第3行第3列的元素:  13
獲取第3行第2列的元素:  12
獲取第3行的所有元素:  [11 12 13 14]
獲取第2列的所有元素:  [ 4  8 12]
獲取第1行的第2列到第3列的元素:  [8 9]
[ 3  4  5  6  7  8  9 10 11 12 13 14]
3
4
5
6
7
8
9
10
11
12
13
14
"""

6. Numpy array合併

import numpy as np

t1 = np.array([1, 1, 1])
t2 = np.array([2, 2, 2])

# 上下合併
t3 = np.vstack((t1, t2))
print("上下合併:\n ", t3, t3.shape)

# 左右合併
t4 = np.hstack((t1, t2))
print("左右合併:\n ", t4, t4.shape)

# 改變維度
s1 = np.array([1, 1, 1])[:, np.newaxis]
s2 = np.array([2, 2, 2])[:, np.newaxis]

# 上下合併
s3 = np.vstack((s1, s2))
print("上下合併:\n ", s3, s3.shape)

# 左右合併
s4 = np.hstack((s1, s2))
print("左右合併:\n ", s4, s4.shape)

# 多個array合併
s5 = np.concatenate((s1, s2, s2, s1), axis=0)
print(s5)
s6 = np.concatenate((s1, s2, s2, s1), axis=1)
print(s6)


"""
執行結果：
上下合併:
  [[1 1 1]
 [2 2 2]] (2, 3)
左右合併:
  [1 1 1 2 2 2] (6,)
上下合併:
  [[1]
 [1]
 [1]
 [2]
 [2]
 [2]] (6, 1)
左右合併:
  [[1 2]
 [1 2]
 [1 2]] (3, 2)
[[1]
 [1]
 [1]
 [2]
 [2]
 [2]
 [2]
 [2]
 [2]
 [1]
 [1]
 [1]]
[[1 2 2 1]
 [1 2 2 1]
 [1 2 2 1]]
"""

7. Numpy array分割

import numpy as np

t1 = np.arange(12).reshape((3, 4))
print(t1)

# 橫向分割
print("橫向分割\n")
print(np.split(t1, 3, axis=0))
print(np.vsplit(t1, 3))

# 縱向分割
print("縱向分割\n")
print(np.split(t1, 2, axis=1))
print(np.hsplit(t1, 2))

# 不等量分割
print("不等量分割\n")
print(np.array_split(t1, 3, axis=1))


"""
執行結果：
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
橫向分割

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
縱向分割

[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]
[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]
不等量分割

[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2],
       [ 6],
       [10]]), array([[ 3],
       [ 7],
       [11]])]
"""

對於剛剛加載出來的資料,我如果只想選擇其中的某一列(行)我們應該怎麼做呢?

其實操作很簡單,和python中列表的操作一樣

import numpy as np

t1 = np.arange(64).reshape(8, 8)
print("原始資料為:\n", t1)

# 取一行
print("取一行資料為:\n", t1[0])

# 取一列
print("取一列資料為:\n", t1[:, 2])

# 取多行
print("取多行資料為:\n", t1[1:3])

# 取多列
print("取多列資料為:\n", t1[:, 1:3])

# 取不連續的多行
print("取不連續的多行:\n", t1[[1, 3, 5]])

# 取不連續的多列
print("取不連續的多列:\n", t1[:, [1, 3, 5]])

# 取指定行指定列（取第三行，第四列的值）
print("取指定行指定列:\n", t1[2, 3])

# 取多行多列（取第三行到第五行，第二列到第四列的結果）
print("取多行多列:\n", t1[2:5, 1:4])

# 取多個不相鄰的點(0, 0), (2, 1), (2, 3)
print("取多個不相鄰的點:\n", t1[[0, 2, 2], [0, 1, 3]])

"""
執行結果
原始資料為:
 [[ 0  1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]
 [16 17 18 19 20 21 22 23]
 [24 25 26 27 28 29 30 31]
 [32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47]
 [48 49 50 51 52 53 54 55]
 [56 57 58 59 60 61 62 63]]
取一行資料為:
 [0 1 2 3 4 5 6 7]
取一列資料為:
 [ 2 10 18 26 34 42 50 58]
取多行資料為:
 [[ 8  9 10 11 12 13 14 15]
 [16 17 18 19 20 21 22 23]]
取多列資料為:
 [[ 1  2]
 [ 9 10]
 [17 18]
 [25 26]
 [33 34]
 [41 42]
 [49 50]
 [57 58]]
取不連續的多行:
 [[ 8  9 10 11 12 13 14 15]
 [24 25 26 27 28 29 30 31]
 [40 41 42 43 44 45 46 47]]
取不連續的多列:
 [[ 1  3  5]
 [ 9 11 13]
 [17 19 21]
 [25 27 29]
 [33 35 37]
 [41 43 45]
 [49 51 53]
 [57 59 61]]
取指定行指定列:
 19
取多行多列:
 [[17 18 19]
 [25 26 27]
 [33 34 35]]
取多個不相鄰的點:
 [ 0 17 19]
"""

8. Numpy copy&deep copy

import numpy as np

t1 = np.arange(4)
print(t1)
c1 = t1
t1[0] = 11
print(t1)
print(c1 is t1)
print(c1)


t2 = np.arange(4)
c2 = t2.copy()
t2[3] = 44
print(t2)
print(c2 is t2)
print(c2)

"""
執行結果：
[0 1 2 3]
[11  1  2  3]
True
[11  1  2  3]
[ 0  1  2 44]
False
[0 1 2 3]
"""

9. Numpy讀取本地資料

軸（axis）：

　　在numpy中可以理解為方向，使用0,1,2...數字表示，對於一個一維陣列，只有一個0軸，對於2維陣列（shape(2, 2)），有0軸和1軸，對於三維陣列（shape(2, 2, 3)），有0,1,2軸

　　有了軸的概念之後我們計算會更加方便，比如計算一個2維陣列的平均值，必須制定是計算哪個方向上面的數字的平均值

語法：

np.loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None)

引數解釋：

引數	解釋
fname	檔案，字串或產生器，可以是.gz或bz2壓縮檔案
dtype	資料型別，可選，csv的字串一什麼資料型別讀入陣列，預設float
delimiter	分割字串，預設是任何空格，改為逗號
skiprows	跳過前x行，一般跳過第一行表頭
usecols	讀取指定的列，索引，元祖型別
unpack	如果是True，讀入屬性講分別寫入不同陣列變數，Flase讀入資料只寫一個數組變數，預設Flase


import numpy as np

file_path = './files/demo_001.csv'
data = np.loadtxt(file_path, dtype='int', delimiter=',', encoding='utf-8')
# 轉置
data_1 = np.loadtxt(file_path, dtype='int', delimiter=',', encoding='utf-8', unpack=True)
print(data)
print('*'*50)
print(data_1)
"""
執行結果：
[[  1234   3467   3478 457889]
 [  1234   3467   3478 457889]
 [  1234   3467   3478 457889]
 [  1234   3467   3478 457889]
 [  1234   3467   3478 457889]]
**************************************************
[[  1234   1234   1234   1234   1234]
 [  3467   3467   3467   3467   3467]
 [  3478   3478   3478   3478   3478]
 [457889 457889 457889 457889 457889]]
"""