1. 程式人生 > 其它 >Numpy陣列索引和切片

Numpy陣列索引和切片

陣列可以通過索引或切片的方式進行訪問或修改,陣列切片x[start:stop:step],與Ptyhon內建的list標準索引和切片類似,只是陣列產生的是一個非副本檢視,根據條件索引的值如果修改,直接在原陣列上修改,不另建立副本

一般索引
In [1]: import numpy as np

In [2]: x = np.arange(10)

In [3]: x
Out[3]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

#獲取索引值為0的元素
In [4]: x[0]
Out[4]: 0
    
#獲取索引值為2的元素
In [5]: x[2]
Out[5]: 2
    
#獲取前5個元素
In [6]: x[:5]
Out[6]: array([0, 1, 2, 3, 4])
    
#獲取索引5之後的元素
In [7]: x[5:]
Out[7]: array([5, 6, 7, 8, 9])
    
#獲取索引[4,7)之間的元素
In [8]: x[4:7]
Out[8]: array([4, 5, 6])
    
#每隔一個元素獲取,步長為2
In [9]: x[::2]
Out[9]: array([0, 2, 4, 6, 8])
    
#從索引1開始每隔一個元素獲取,步長為2
In [10]: x[1::2]
Out[10]: array([1, 3, 5, 7, 9])
    
#獲取所有元素
In [11]: x[:]
Out[11]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    
#獲取所有元素,步長為-1(負號表示逆序)
In [12]: x[::-1]
Out[12]: array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
    
#從索引5開始每隔一個元素獲取,步長為-2(負號表示逆序)
In [13]: x[5::-2]
Out[13]: array([5, 3, 1])
布林索引(Boolean Indexing)

利用布林陣列作為索引。

In [1]: import numpy as np
In [2]: names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])

# Return a sample (or samples) from the "standard normal" distribution.
In [3]: data = np.random.randn(7,4)

# names 為用於做布林索引的陣列
In [4]: names
Out[4]: array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

# data 為用於索引的陣列7*4
In [5]: data
Out[5]:
array([[ 0.25289291, -0.00816246, -0.96999809,  1.29376934],
       [ 0.44550485, -0.68194628,  1.14512874, -0.29373544],
       [ 0.40232255, -0.00788444,  0.63469223, -0.20523947],
       [ 0.74526848, -0.72704461, -0.50133668,  0.40030561],
       [ 0.54539544, -0.93755557,  0.0147922 , -1.6301914 ],
       [ 1.59887252,  0.79954153, -0.48389643, -1.11492092],
       [-0.93389025,  0.36513027, -0.49868663,  0.67454948]])

# 建立一個names == 'Bob' 真值陣列用於索引data
In [7]: names == 'Bob'
Out[7]: array([ True, False, False,  True, False, False, False])

# data陣列的行與names陣列元素一一對應,當條件names=='Bob'=True則返回相應行陣列
In [8]: data[names=='Bob']
Out[8]:
array([[ 0.25289291, -0.00816246, -0.96999809,  1.29376934],
       [ 0.74526848, -0.72704461, -0.50133668,  0.40030561]])

# data陣列的行與names陣列元素一一對應,當條件names=='Bob'=True則返回相應行陣列,1:用於切割列資料
In [9]: data[names == 'Bob', 1:]
Out[9]:
array([[-0.00816246, -0.96999809,  1.29376934],
       [-0.72704461, -0.50133668,  0.40030561]])

# data陣列的行與names陣列元素一一對應,當條件names=='Bob'=True則返回相應行陣列,1用於切割得到第一列資料
In [10]: data[names == 'Bob', 1]
Out[10]: array([-0.00816246, -0.72704461])

# != 和 ~ 用於取非值
In [11]: data[names != 'Bob']
Out[11]:
array([[ 0.44550485, -0.68194628,  1.14512874, -0.29373544],
       [ 0.40232255, -0.00788444,  0.63469223, -0.20523947],
       [ 0.54539544, -0.93755557,  0.0147922 , -1.6301914 ],
       [ 1.59887252,  0.79954153, -0.48389643, -1.11492092],
       [-0.93389025,  0.36513027, -0.49868663,  0.67454948]])

In [12]: data[~(names == 'Bob')]
Out[12]:
array([[ 0.44550485, -0.68194628,  1.14512874, -0.29373544],
       [ 0.40232255, -0.00788444,  0.63469223, -0.20523947],
       [ 0.54539544, -0.93755557,  0.0147922 , -1.6301914 ],
       [ 1.59887252,  0.79954153, -0.48389643, -1.11492092],
       [-0.93389025,  0.36513027, -0.49868663,  0.67454948]])

# and / or 表示整個陣列物件的與和或,而&和|對一個物件的內容(單個位元或位元組)執行多個布林運算,因此只能用& / | 表示陣列內容的與和或
In [13]: data[(names == 'Bob') | (names == 'Will')]
Out[13]:
array([[ 0.25289291, -0.00816246, -0.96999809,  1.29376934],
       [ 0.40232255, -0.00788444,  0.63469223, -0.20523947],
       [ 0.74526848, -0.72704461, -0.50133668,  0.40030561],
       [ 0.54539544, -0.93755557,  0.0147922 , -1.6301914 ]])

# 根據data[條件] 修改值,直接在原陣列上修改,不另建立副本
In [14]: data
Out[14]:
array([[ 0.25289291, -0.00816246, -0.96999809,  1.29376934],
       [ 0.44550485, -0.68194628,  1.14512874, -0.29373544],
       [ 0.40232255, -0.00788444,  0.63469223, -0.20523947],
       [ 0.74526848, -0.72704461, -0.50133668,  0.40030561],
       [ 0.54539544, -0.93755557,  0.0147922 , -1.6301914 ],
       [ 1.59887252,  0.79954153, -0.48389643, -1.11492092],
       [-0.93389025,  0.36513027, -0.49868663,  0.67454948]])

#把 data < 0 的值修改為 0
In [15]: data[data < 0] = 0

In [16]: data
Out[16]:
array([[0.25289291, 0.        , 0.        , 1.29376934],
       [0.44550485, 0.        , 1.14512874, 0.        ],
       [0.40232255, 0.        , 0.63469223, 0.        ],
       [0.74526848, 0.        , 0.        , 0.40030561],
       [0.54539544, 0.        , 0.0147922 , 0.        ],
       [1.59887252, 0.79954153, 0.        , 0.        ],
       [0.        , 0.36513027, 0.        , 0.67454948]])

#把 names != 'Joe' 的行值都修改為 7
In [17]: names
Out[17]: array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [18]: data[names != 'Joe'] = 7

In [19]: data
Out[19]:
array([[7.        , 7.        , 7.        , 7.        ],
       [0.44550485, 0.        , 1.14512874, 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [1.59887252, 0.79954153, 0.        , 0.        ],
       [0.        , 0.36513027, 0.        , 0.67454948]])
花哨索引(Fancy Indexing)

傳遞一個索引陣列或切片形式進行索引。

In [1]: import numpy as np

#建立一個空陣列
In [2]: arr = np.empty((8,4))
In [3]: arr
Out[3]:
array([[2.59345432e+161, 1.68813881e+195, 6.01347002e-154,
        6.01347002e-154],
       [6.01347002e-154, 8.90389719e+252, 1.96086583e+243,
        1.75631032e-152],
       [5.49257737e+241, 1.08298236e-153, 4.78210140e+180,
        6.01347002e-154],
       [6.01347002e-154, 9.77795611e+024, 2.32160957e-152,
        4.83245960e+276],
       [3.09394663e+169, 9.08367217e+223, 5.56218818e+180,
        6.79031368e+199],
       [6.01347002e-154, 6.01347002e-154, 6.01347002e-154,
        7.22247388e+159],
       [4.89915603e+252, 8.88968974e+228, 2.52303419e-258,
        7.49232572e+247],
       [6.01347002e-154, 6.01347002e-154, 2.47379808e-091,
        1.95132487e+227]])
# 往陣列填充資料
In [4]: for i in range(8):
   ...:     arr[i] = i
   ...:
In [5]: arr
Out[5]:
array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

# 選取陣列索引為2、4、6的行
In [7]: arr[[2,4,6]]
Out[7]:
array([[2., 2., 2., 2.],
       [4., 4., 4., 4.],
       [6., 6., 6., 6.]])

# 選取陣列索引為-1、-2、-8的行
In [8]: arr[[-1,-2,-8]]
Out[8]:
array([[7., 7., 7., 7.],
       [6., 6., 6., 6.],
       [0., 0., 0., 0.]])

# 從0~32中生成一個8*4的陣列
In [9]: arr = np.arange(32).reshape((8,4))
In [10]: arr
Out[10]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

#[3,4,6]表示行索引,[1,2,1]表示列索引,返回(行,列)為(3,1),(4,2),(6,1)位置的數
In [11]: arr[[3,5,6],[1,2,1]]
Out[11]: array([13, 22, 25])

#[1,4,6]表示行索引,冒號(:)表示該行所有的數,[0,3,2,1]表示列排序-索引1的列與3的列對換
In [15]: arr[[1,4,6]][:,[0,3,2,1]]
Out[15]:
array([[ 4,  7,  6,  5],
       [16, 19, 18, 17],
       [24, 27, 26, 25]])

#利用花哨索引修改值
In [20]: x = np.arange(10)
In [21]: x
Out[21]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    
#建立一個花哨索引陣列
In [22]: i = np.array([0,2,4,6,8])
In [23]: x[i] = x[i]*2
In [24]: x
Out[24]: array([ 0,  1,  4,  3,  8,  5, 12,  7, 16,  9])
    
#實現累加
In [25]: x = np.zeros(10)
In [26]: i = np.array([1,3,5,7,9])
In [27]: np.add.at(x,i,1)
In [28]: x   
Out[28]: array([0., 1., 0., 1., 0., 1., 0., 1., 0., 1.])
    
#j=[1,3,3,7,7]分別實現索引1累加1次,索引3累加2次,索引7累加2次
In [29]: j = np.array([1,3,3,7,7])
In [30]: np.add.at(x,j,1)
In [31]: x  
Out[31]: array([0., 2., 0., 3., 0., 1., 0., 3., 0., 1.])