numpy矩陣拼接_Python資料分析-Numpy

阿新 • • 發佈：2021-01-03

Numpy是以矩陣為基礎的數學計算模組，純數學儲存和處理大型矩陣，可以表示向量和矩陣的多維陣列資料結構，支援大量的維度陣列和矩陣運算，對陣列運算提供了大量的數學函式庫！

Python已有列表型別，為什麼需要一個數組物件(型別)？

陣列物件可以去掉元素間運算所需的迴圈，使一維向量更像單個數據
設定專門的陣列物件，經過優化，可以提升這類應用的運算速度

eg.計算A+B，其中，A和B是一維陣列：

def npSum():
    a = np.array([0,1,2,3,4])
    b = np.array([9,8,7,6,5])
    c = a**2 + b**3
    return c
npSum()
#array([729, 513, 347, 225, 141], dtype=int32)

匯入Numpy模組：import numpy as np

1、np.array() ---生成一個ndarray陣列

a = np.array([[0,1,2,3,4],[9,8,7,6,5]])
a
#二維陣列
#array([[0, 1, 2, 3, 4],
#       [9, 8, 7, 6, 5]])

2、ndarray物件的屬性

.ndim:秩，即軸的數量或者維度的數量

a = np.array([[0,1,2,3,4],[9,8,7,6,5]])
a.ndim
#2

.shape:ndarray物件的尺度，對於矩陣，n行m列

a = np.array([[0,1,2,3,4],[9,8,7,6,5]])
a.shape
#2行5列
#(2, 5)

.size:ndarray物件元素的個數，相當於.shape中n*m的值

a = np.array([[0,1,2,3,4],[9,8,7,6,5]])
a.size
#10

.dtype:ndarry物件的元素型別

a = np.array([[0,1,2,3,4],[9,8,7,6,5]])
a.dtype
#dtype('int32')

.itemsize:ndarray物件中每個元素的大小，以位元組為單位

a = np.array([[0,1,2,3,4],[9,8,7,6,5]])
#1位元組 = 八位
a.itemsize
#4

3、陣列的建立

#隨機生成一個數組
a = np.floor(10*np.random.random((3,4)))
print (a)
#floor是向下取整，random.random((3,4))是生成一個3行4列的元素在0-1之間的陣列，floor是向下取整
*************************
[[7. 8. 4. 5.]
 [8. 3. 7. 0.]
 [7. 2. 5. 9.]]
*************************

print (a.ravel())
#ravel()是把一個數組拉成一個向量
*************************
[6. 4. 2. 8. 9. 3. 7. 1. 6. 6. 1. 4.]
*************************

a.shape = (6, 2)
a
*************
[[0. 9.]
 [7. 5.]
 [2. 3.]
 [1. 7.]
 [1. 6.]
 [7. 5.]]
*************

a.T
*************************
[[8. 1. 6. 4. 8. 9.]
 [7. 8. 3. 2. 3. 0.]]
*************************

import numpy
#檔案的讀取，genfromtxt讀取txt檔案，其中","為分隔符，分割之後的元素型別為字串
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",",dtype=str)
print(type(world_alcohol))
print(world_alcohol)

arange函式：類似python的range函式，通過指定開始值、終值和步長來建立一個一維陣列，注意：最終建立的陣列不包含終值

arr2 = np.arange(1,10,2)  #arange只能建立一維陣列
arr2
#array([1, 3, 5, 7, 9])

zeros函式：建立指定長度或者形狀的全零陣列。

arr3 = np.zeros((2,3,4)) #可以建立多維陣列(2維3行4列)
arr3
*************************
array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])
*************************

ones函式：建立指定長度或者形狀的全1陣列

arr4 = np.one((2,3),dtype=np.int32),(2,3)是一個字典型別，指定元素為int型別
arr4
************
[[1. 1. 1.]
 [1. 1. 1.]]
************

full函式：建立一個指定指定形狀，指定元素的陣列

arr5 = np.full((2,3,4),100)
arr5
*****************************
array([[[100, 100, 100, 100],
        [100, 100, 100, 100],
        [100, 100, 100, 100]],

       [[100, 100, 100, 100],
        [100, 100, 100, 100],
        [100, 100, 100, 100]]])
******************************

eye函式：建立一個正方的n*n單位矩陣，對角線為1，其餘為0

arr6 = np.eye(4)
arr6
*****************************
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
*****************************

np.ones_like(a):根據陣列a的形狀生成一個全1的陣列

np.ones_like(arr4)
*****************************
array([[1., 1., 1.],
       [1., 1., 1.]])
*****************************

np.zeros_like(a):根據陣列a的形狀生成一個全0的陣列

np.zeros_like(arr4)
*****************************
array([[0., 0., 0.],
       [0., 0., 0.]])
*****************************

np.full_like(a,val):根據陣列a的形狀生成一個數組，每個元素的值都是val

np.full_like(arr4,100)
*****************************
array([[100., 100., 100.],
       [100., 100., 100.]])
*****************************

linspace：返回指定區間，指定個數元素的陣列

a = np.linspace(1,10,4,endpoint=True)
a
#array([ 1.,  4.,  7., 10.])
b = np.linspace(1,10,4,endpoint=False) #False不包含結尾
b
#array([1.  , 3.25, 5.5 , 7.75])

concatenate:陣列拼接

c = np.concatenate((a,b),axis=0)#axis=0 行拼接，一般預設不寫
c
#array([ 1.  ,  4.  ,  7.  , 10.  ,  1.  ,  3.25,  5.5 ,  7.75])
a=np.array([[1,2,3],[4,5,6]])
b=np.array([[11,21,31],[7,8,9]])
np.concatenate((a,b),axis=1) #axis=1對應行拼接
#array([[ 1,  2,  3, 11, 21, 31],
#       [ 4,  5,  6,  7,  8,  9]])

4、陣列的變換

reshape：在原陣列的shape保持不變的前提下，改變尺寸形成新陣列

arr6
********************************
array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])
********************************
arr9 = arr6.reshape(4,2,-1)#當指定某一個軸為-1的時候，表示將根據陣列元素的數量自動計算該軸的 長度值。
arr9
********************************
array([[[1., 0.],
        [0., 0.]],

       [[0., 1.],
        [0., 0.]],

       [[0., 0.],
        [1., 0.]],

       [[0., 0.],
        [0., 1.]]])
********************************

修改指定位置元素值

arr9[1][0][1]=100
arr9
**************************
array([[[  1.,   0.],
        [  0.,   0.]],

       [[  0., 100.],
        [  0.,   0.]],

       [[  0.,   0.],
        [  1.,   0.]],

       [[  0.,   0.],
        [  0.,   1.]]])
**************************

arr9源自於arr6，arr9重新賦值之後，arr6也會發生改變

arr6
#array([  1.,   0.,   0.,   0.,   0., 100.,   0.,   0.,   0.,   0.,   1.,
         0.,   0.,   0.,   0.,   1.])

降維

arr6.shape = (-1,)  #降維
arr6
#array([  1.,   0.,   0.,   0.,   0., 100.,   0.,   0.,   0.,   0.,   1.,
         0.,   0.,   0.,   0.,   1.])

更改陣列元素型別

arr6
array([  1.,   0.,   0.,   0.,   0., 100.,   0.,   0.,   0.,   0.,   1.,
         0.,   0.,   0.,   0.,   1.])
#浮點型轉化成整型
arr6.astype(np.int)
array([  1,   0,   0,   0,   0, 100,   0,   0,   0,   0,   1,   0,   0,
         0,   0,   1])

陣列轉化成列表

arr6.tolist()
********
[1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 100.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0]
********

陣列的索引和切片

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[:,0:2])
#類似於Python中列表取數，先取橫，後取豎

a = np.arange(24).reshape(2,3,4)
a
************************
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])
************************
a[:,1,-3]
#array([ 5, 17])

a[:,1:3,:]
************************
array([[[ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[16, 17, 18, 19],
        [20, 21, 22, 23]]])
*************************

a[:,:,::2]
******************
array([[[ 0,  2],
        [ 4,  6],
        [ 8, 10]],

       [[12, 14],
        [16, 18],
        [20, 22]]])
********************

花式索引

arr10 = np.random.randint(0,40,(4,4)) #隨機整數陣列，（4,4）是所產生陣列的shape
arr10
*************************
array([[15, 20, 26, 25],
       [11, 24, 28, 19],
       [15, 13, 30, 28],
       [ 4,  1, 16,  4]])
**************************

arr10[[0,3]] #要行下索引為0和3的元素
**************************
array([[15, 20, 26, 25],
       [ 4,  1, 16,  4]])
**************************

arr10[[1,2],[2,3]]#要1行2列和2行3列的兩個元素
**************************
array([28, 28])
**************************

arr10[np.ix_([0,3],[1,3])]#要第0行和第3行，第1列和第3列的資料
******************
array([[20, 25],
       [ 1,  4]])
******************
--------------------------------------------------------------------------------------
arr = np.arange(32).reshape(8,4)
arr

*************************
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])
*************************

arr[[0,3,5],[0,2,3]] #獲取（0,0），（3,2），（5,3）的資料
# array([ 0, 14, 23])

arr[[0,3,5]]  #獲取0,3,5行資料
*************************
array([[ 0,  1,  2,  3],
       [12, 13, 14, 15],
       [20, 21, 22, 23]])
**************************

arr[np.ix_([0,3,5],[0,2,3])]# 獲取0、3、5行，0、2、3列資料
*************************
array([[ 0,  2,  3],
       [12, 14, 15],
       [20, 22, 23]])
*************************

arr[np.ix_([0,3,5],[0,2,3])]
****************************
array([[ 0,  2,  3],
       [12, 14, 15],
       [20, 22, 23]])
****************************

布林索引

arr11 = np.random.random((4,4))
arr11
*************************************************************
array([[0.57006061, 0.6400715 , 0.1466771 , 0.75634831],
       [0.48848161, 0.23073786, 0.222856  , 0.03639575],
       [0.11081246, 0.27271055, 0.47278399, 0.96284317],
       [0.71150518, 0.48641716, 0.5161454 , 0.62008656]])
*************************************************************

arr12 = arr11 < 0.5
arr12

******************************************
array([[False, False,  True, False],
       [ True,  True,  True,  True],
       [ True,  True,  True, False],
       [False,  True, False, False]])
******************************************

arr11[arr12]

*******************************************************************
array([0.1466771 , 0.48848161, 0.23073786, 0.222856  , 0.03639575,
       0.11081246, 0.27271055, 0.47278399, 0.48641716])
*******************************************************************

-------------------------------------------------------------------------------

#練習
names = np.array(['joe','tom','anne'])
classes = np.array(["數學","英語","語文"])
scores = np.array([
    [70,80,90],
    [77,88,91],
    [80,90,70]
 ])

names == 'joe'
# array([ True, False, False])

#1.joe的成績
scores[names =='joe']
# array([[70, 80, 90]])

#2.joe的數學成績
scores[names == 'joe',classes =='數學'
# array([70])

#3，joe和anna的成績
scores[(names =='joe') | (names =='anne')]
************************
array([[70, 80, 90],
       [80, 90, 70]])
************************

#4,非joe和anne的成績
scores[(names !='joe') & (names !='anne')]
# array([[77, 88, 91]])

陣列的運算

#求和
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=1)
#橫的求和
# array([ 30,  75, 120])

matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=0)
#豎的求和
#array([60, 75, 90])

#numpy陣列運算
arr12 = np.arange(24).reshape(2,3,4)
arr12
***************************
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])
***************************

np.square(arr12)  #平方
******************************
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])
******************************

arr13 = np.sqrt(arr12)
arr13
************************************************************
array([[[0.        , 1.        , 1.41421356, 1.73205081],
        [2.        , 2.23606798, 2.44948974, 2.64575131],
        [2.82842712, 3.        , 3.16227766, 3.31662479]],

       [[3.46410162, 3.60555128, 3.74165739, 3.87298335],
        [4.        , 4.12310563, 4.24264069, 4.35889894],
        [4.47213595, 4.58257569, 4.69041576, 4.79583152]]])
************************************************************

np.maximum(arr12,arr13)
*********************************
array([[[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]],

       [[12., 13., 14., 15.],
        [16., 17., 18., 19.],
        [20., 21., 22., 23.]]])
**********************************

arr12>arr13 #返回bool型別的陣列
*************************************
array([[[False, False,  True,  True],
        [ True,  True,  True,  True],
        [ True,  True,  True,  True]],

       [[ True,  True,  True,  True],
        [ True,  True,  True,  True],
        [ True,  True,  True,  True]]])
*************************************

np.rint(arr13)#四捨五入
*************************************
array([[[0., 1., 1., 2.],
        [2., 2., 2., 3.],
        [3., 3., 3., 3.]],

       [[3., 4., 4., 4.],
        [4., 4., 4., 4.],
        [4., 5., 5., 5.]]])
*************************************

np.modf(arr13)# 返回arr13的整數部分和小數部分
***********************************************************
(array([[[0.        , 0.        , 0.41421356, 0.73205081],
         [0.        , 0.23606798, 0.44948974, 0.64575131],
         [0.82842712, 0.        , 0.16227766, 0.31662479]],
 
        [[0.46410162, 0.60555128, 0.74165739, 0.87298335],
         [0.        , 0.12310563, 0.24264069, 0.35889894],
         [0.47213595, 0.58257569, 0.69041576, 0.79583152]]]),
 array([[[0., 1., 1., 1.],
         [2., 2., 2., 2.],
         [2., 3., 3., 3.]],
 
        [[3., 3., 3., 3.],
         [4., 4., 4., 4.],
         [4., 4., 4., 4.]]]))
***********************************************************

np.exp(arr12)#e的n次冪
************************************************************************
array([[[1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01],
        [5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03],
        [2.98095799e+03, 8.10308393e+03, 2.20264658e+04, 5.98741417e+04]],

       [[1.62754791e+05, 4.42413392e+05, 1.20260428e+06, 3.26901737e+06],
        [8.88611052e+06, 2.41549528e+07, 6.56599691e+07, 1.78482301e+08],
        [4.85165195e+08, 1.31881573e+09, 3.58491285e+09, 9.74480345e+09]]])
************************************************************************

numpy常用函式

#堆疊函式
#vstack：用來堆疊生成豎直的單個數組
arr14 = np.array([[1,2],[3,4]])
arr14
****************
array([[1, 2],
       [3, 4]])
****************

arr15 = np.array([[7,8],[9,0]])
arr15
*********************
array([[7, 8],
       [9, 0]])
*********************

np.vstack((arr14,arr15))
********************
array([[1, 2],
       [3, 4],
       [7, 8],
       [9, 0]])
********************

#hstack :通過堆疊來生成水平的單個數組
np.hstack((arr14,arr15))
************************
array([[1, 2, 7, 8],
       [3, 4, 9, 0]])
************************

# unique():去重冰沙
arr16 = np.array(['雪碧','可樂','咖啡','奶茶','冰沙','雪碧','牛奶','咖啡','冰沙','牛奶','可樂'])
arr16
#array(['雪碧', '可樂', '咖啡', '奶茶', '冰沙', '雪碧', '牛奶', '咖啡', '冰沙', '牛奶', '可樂'],
      dtype='<U2')

np.unique(arr16)
#array(['冰沙', '可樂', '咖啡', '奶茶', '牛奶', '雪碧'], dtype='<U2')

# where(): 返回輸入陣列中滿足給定條件的元素索引
arr17 = np.arange(1,10).reshape(3,-1)
arr17
********************
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])
********************

# where(): 返回輸入陣列中滿足給定條件的元素索引
arr17 = np.arange(1,10).reshape(3,-1)
arr17
**********************
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])
**********************

condition = np.where(arr17>5)
condition
(array([1, 2, 2, 2], dtype=int64), array([2, 0, 1, 2], dtype=int64))
arr17[condition]
array([6, 7, 8, 9])
np.where(arr17>5,"通過","不通過")
*****************************************
array([['不通過', '不通過', '不通過'],
       ['不通過', '不通過', '通過'],
       ['通過', '通過', '通過']], dtype='<U3')
******************************************