1. 程式人生 > >numpy基本函式與操作——一篇就夠了!

numpy基本函式與操作——一篇就夠了!

對numpy基本函式操作進行了整理,掌握這些便可以算是入了個門,基本全部敲一遍就掌握差不多了!開發環境為jupyter notebook 基本是一個輸入一個輸出
需要原始碼的可以去我的github下載

import numpy as np
# 讀檔案的操作  分隔符為逗號  型別是str
world_alcohol = np.genfromtxt('world_alcohol.txt', delimiter = ',', dtype = str )
print(type(world_alcohol))
<class 'numpy.ndarray'>
world_alcohol
array([['Year', 'WHO region', 'Country', 'Beverage Types', 'Display Value'],
       ['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ..., 
       ['1987', 'Africa', 'Malawi', 'Other', '0.75'],
       ['1989', 'Americas', 'Bahamas', 'Wine', '1.5'],
       ['1985', 'Africa', 'Malawi', 'Spirits', '0.31']],
      dtype='<U52')
# 呼叫help函式  檢視api詳情
print(help(np.genfromtxt))
Help on function genfromtxt in module numpy.lib.npyio:

genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None)
    Load data from a text file, with missing values handled as specified.

    Each line past the first `skip_header` lines is split at the `delimiter`
    character, and characters following the `comments` character are discarded.

    Parameters
    ----------
    fname : file, str, pathlib.Path, list of str, generator
        File, filename, list, or generator to read.  If the filename
        extension is `.gz` or `.bz2`, the file is first decompressed. Note
        that generators must return byte strings in Python 3k.  The strings
        in a list or produced by a generator are treated as lines.
    dtype : dtype, optional
        Data type of the resulting array.
        If None, the dtypes will be determined by the contents of each
        column, individually.
    comments : str, optional
        The character used to indicate the start of a comment.
        All the characters occurring on a line after a comment are discarded
    delimiter : str, int, or sequence, optional
        The string used to separate values.  By default, any consecutive
        whitespaces act as delimiter.  An integer or sequence of integers
        can also be provided as width(s) of each field.
    skiprows : int, optional
        `skiprows` was removed in numpy 1.10. Please use `skip_header` instead.
    skip_header : int, optional
        The number of lines to skip at the beginning of the file.
    skip_footer : int, optional
        The number of lines to skip at the end of the file.
    converters : variable, optional
        The set of functions that convert the data of a column to a value.
        The converters can also be used to provide a default value
        for missing data: ``converters = {3: lambda s: float(s or 0)}``.
    missing : variable, optional
        `missing` was removed in numpy 1.10. Please use `missing_values`
        instead.
    missing_values : variable, optional
        The set of strings corresponding to missing data.
    filling_values : variable, optional
        The set of values to be used as default when the data are missing.
    usecols : sequence, optional
        Which columns to read, with 0 being the first.  For example,
        ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
    names : {None, True, str, sequence}, optional
        If `names` is True, the field names are read from the first valid line
        after the first `skip_header` lines.
        If `names` is a sequence or a single-string of comma-separated names,
        the names will be used to define the field names in a structured dtype.
        If `names` is None, the names of the dtype fields will be used, if any.
    excludelist : sequence, optional
        A list of names to exclude. This list is appended to the default list
        ['return','file','print']. Excluded names are appended an underscore:
        for example, `file` would become `file_`.
    deletechars : str, optional
        A string combining invalid characters that must be deleted from the
        names.
    defaultfmt : str, optional
        A format used to define default field names, such as "f%i" or "f_%02i".
    autostrip : bool, optional
        Whether to automatically strip white spaces from the variables.
    replace_space : char, optional
        Character(s) used in replacement of white spaces in the variables
        names. By default, use a '_'.
    case_sensitive : {True, False, 'upper', 'lower'}, optional
        If True, field names are case sensitive.
        If False or 'upper', field names are converted to upper case.
        If 'lower', field names are converted to lower case.
    unpack : bool, optional
        If True, the returned array is transposed, so that arguments may be
        unpacked using ``x, y, z = loadtxt(...)``
    usemask : bool, optional
        If True, return a masked array.
        If False, return a regular array.
    loose : bool, optional
        If True, do not raise errors for invalid values.
    invalid_raise : bool, optional
        If True, an exception is raised if an inconsistency is detected in the
        number of columns.
        If False, a warning is emitted and the offending lines are skipped.
    max_rows : int,  optional
        The maximum number of rows to read. Must not be used with skip_footer
        at the same time.  If given, the value must be at least 1. Default is
        to read the entire file.

        .. versionadded:: 1.10.0

    Returns
    -------
    out : ndarray
        Data read from the text file. If `usemask` is True, this is a
        masked array.

    See Also
    --------
    numpy.loadtxt : equivalent function when no data is missing.

    Notes
    -----
    * When spaces are used as delimiters, or when no delimiter has been given
      as input, there should not be any missing data between two fields.
    * When the variables are named (either by a flexible dtype or with `names`,
      there must not be any header in the file (else a ValueError
      exception is raised).
    * Individual values are not stripped of spaces by default.
      When using a custom converter, make sure the function does remove spaces.

    References
    ----------
    .. [1] NumPy User Guide, section `I/O with NumPy
           <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.

    Examples
    ---------
    >>> from io import StringIO
    >>> import numpy as np

    Comma delimited file with mixed dtype

    >>> s = StringIO("1,1.3,abcde")
    >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
    ... ('mystring','S5')], delimiter=",")
    >>> data
    array((1, 1.3, 'abcde'),
          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])

    Using dtype = None

    >>> s.seek(0) # needed for StringIO example only
    >>> data = np.genfromtxt(s, dtype=None,
    ... names = ['myint','myfloat','mystring'], delimiter=",")
    >>> data
    array((1, 1.3, 'abcde'),
          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])

    Specifying dtype and names

    >>> s.seek(0)
    >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
    ... names=['myint','myfloat','mystring'], delimiter=",")
    >>> data
    array((1, 1.3, 'abcde'),
          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])

    An example with fixed-width columns

    >>> s = StringIO("11.3abcde")
    >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
    ...     delimiter=[1,3,5])
    >>> data
    array((1, 1.3, 'abcde'),
          dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])

None
import numpy as np
# 構造一維向量
np.array(np.arange(3))
array([0, 1, 2])
# 構造二維矩陣
s = np.array([[1,2,3], [4,5,6]])
s
array([[1, 2, 3],
       [4, 5, 6]])
s.dtype
dtype('int32')
# 索引
s[0,0]
1
# 矩陣中型別必須一致
import numpy as np
numbers = np.array([1,2,3,4.])
print(numbers)
print(numbers.dtype)
[ 1.  2.  3.  4.]
float64
# 值的判定  返回布林陣列
numbers == 3
array([False, False,  True, False], dtype=bool)
# 可用布林陣列作為索引  查出具體的值
equal_to_3 = (numbers == 3)
numbers[equal_to_3]
array([ 3.])
matrix = np.array(
    [[11,26,38],
     [32,65,96],
     [21,78,84],
    ]
)
matrix
array([[11, 26, 38],
       [32, 65, 96],
       [21, 78, 84]])
# 返回第二列中是否等於78的布林陣列
column_equalto65 = matrix[:,1] == 78
# 由列變為行
column_equalto65
array([False, False,  True], dtype=bool)
# 返回第二行中等於78的一行資料
matrix[column_equalto65]
array([[21, 78, 84]])
# 這樣的索引表示取第二列  :代表所有的行  返回結果為行向量
matrix[:,1]
array([26, 65, 78])
# 求和操作  指定維度(axis)為1 則表示每一行求和
matrix.sum(axis = 1)
array([ 75, 193, 183])
# 求和操作  指定維度(axis)為0 則表示每一列求和
matrix.sum(axis = 0)
array([ 64, 169, 218])
# 通過reshape方法  指定3行5列  重構矩陣
import numpy as np
a = np.array([np.arange(15)])
print(a)
a = a.reshape((3,5))
print(a)
b = np.arange(16).reshape(2,8)
print(b)
[[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]]
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 0  1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]]
# 輸出維度
a.ndim
2
# 輸出型別
a.dtype
dtype('int32')
# 輸出型別名
a.dtype.name
'int32'
# 隨機模組的應用  指定3行4列矩陣
np.random.random((3,4))
array([[ 0.19375842,  0.36607602,  0.2676583 ,  0.45307936],
       [ 0.43905375,  0.28215774,  0.89246178,  0.2877808 ],
       [ 0.52287865,  0.3748371 ,  0.85626729,  0.37688939]])
# arange的應用 從1開始每次加5 直到小於15
np.arange(1, 15, 5)
array([ 1,  6, 11])
# linspace的應用  起點值為0 終點為2pi 平均取100個值
from numpy import pi
s = np.linspace(0, 2*pi, 100)
print(s)
[ 0.          0.06346652  0.12693304  0.19039955  0.25386607  0.31733259
  0.38079911  0.44426563  0.50773215  0.57119866  0.63466518  0.6981317
  0.76159822  0.82506474  0.88853126  0.95199777  1.01546429  1.07893081
  1.14239733  1.20586385  1.26933037  1.33279688  1.3962634   1.45972992
  1.52319644  1.58666296  1.65012947  1.71359599  1.77706251  1.84052903
  1.90399555  1.96746207  2.03092858  2.0943951   2.15786162  2.22132814
  2.28479466  2.34826118  2.41172769  2.47519421  2.53866073  2.60212725
  2.66559377  2.72906028  2.7925268   2.85599332  2.91945984  2.98292636
  3.04639288  3.10985939  3.17332591  3.23679243  3.30025895  3.36372547
  3.42719199  3.4906585   3.55412502  3.61759154  3.68105806  3.74452458
  3.8079911   3.87145761  3.93492413  3.99839065  4.06185717  4.12532369
  4.1887902   4.25225672  4.31572324  4.37918976  4.44265628  4.5061228
  4.56958931  4.63305583  4.69652235  4.75998887  4.82345539  4.88692191
  4.95038842  5.01385494  5.07732146  5.14078798  5.2042545   5.26772102
  5.33118753  5.39465405  5.45812057  5.52158709  5.58505361  5.64852012
  5.71198664  5.77545316  5.83891968  5.9023862   5.96585272  6.02931923
  6.09278575  6.15625227  6.21971879  6.28318531]
# 取三角函式值
np.sin(np.linspace(0, 2*pi, 100))
array([  0.00000000e+00,   6.34239197e-02,   1.26592454e-01,
         1.89251244e-01,   2.51147987e-01,   3.12033446e-01,
         3.71662456e-01,   4.29794912e-01,   4.86196736e-01,
         5.40640817e-01,   5.92907929e-01,   6.42787610e-01,
         6.90079011e-01,   7.34591709e-01,   7.76146464e-01,
         8.14575952e-01,   8.49725430e-01,   8.81453363e-01,
         9.09631995e-01,   9.34147860e-01,   9.54902241e-01,
         9.71811568e-01,   9.84807753e-01,   9.93838464e-01,
         9.98867339e-01,   9.99874128e-01,   9.96854776e-01,
         9.89821442e-01,   9.78802446e-01,   9.63842159e-01,
         9.45000819e-01,   9.22354294e-01,   8.95993774e-01,
         8.66025404e-01,   8.32569855e-01,   7.95761841e-01,
         7.55749574e-01,   7.12694171e-01,   6.66769001e-01,
         6.18158986e-01,   5.67059864e-01,   5.13677392e-01,
         4.58226522e-01,   4.00930535e-01,   3.42020143e-01,
         2.81732557e-01,   2.20310533e-01,   1.58001396e-01,
         9.50560433e-02,   3.17279335e-02,  -3.17279335e-02,
        -9.50560433e-02,  -1.58001396e-01,  -2.20310533e-01,
        -2.81732557e-01,  -3.42020143e-01,  -4.00930535e-01,
        -4.58226522e-01,  -5.13677392e-01,  -5.67059864e-01,
        -6.18158986e-01,  -6.66769001e-01,  -7.12694171e-01,
        -7.55749574e-01,  -7.95761841e-01,  -8.32569855e-01,
        -8.66025404e-01,  -8.95993774e-01,  -9.22354294e-01,
        -9.45000819e-01,  -9.63842159e-01,  -9.78802446e-01,
        -9.89821442e-01,  -9.96854776e-01,  -9.99874128e-01,
        -9.98867339e-01,  -9.93838464e-01,  -9.84807753e-01,
        -9.71811568e-01,  -9.54902241e-01,  -9.34147860e-01,
        -9.09631995e-01,  -8.81453363e-01,  -8.49725430e-01,
        -8.14575952e-01,  -7.76146464e-01,  -7.34591709e-01,
        -6.90079011e-01,  -6.42787610e-01,  -5.92907929e-01,
        -5.40640817e-01,  -4.86196736e-01,  -4.29794912e-01,
        -3.71662456e-01,  -3.12033446e-01,  -2.51147987e-01,
        -1.89251244e-01,  -1.26592454e-01,  -6.34239197e-02,
        -2.44929360e-16])
# 做數學運算
a = np.array([12, 45, 16, 56])
b = np.arange(4)
print(a)
print(b)
c = a - b
print(c)
c = c - 1
print(c)
b = b ** 2
print(b)
# 返回布林陣列
print(a > 16)
[12 45 16 56]
[0 1 2 3]
[12 44 14 53]
[11 43 13 52]
[0 1 4 9]
[False  True False  True]
# 矩陣乘法
a = np.array([
    [1,2],
    [3,4]
])
b = np.array([
    [3,4],
    [1,2]
])
# 對應項相乘
print(a * b)
print('-'*10)
# 矩陣乘法
print(a.dot(b))
print('-'*10)
print(np.dot(a, b))
print('-'*10)

[[3 8]
 [3 8]]
----------
[[ 5  8]
 [13 20]]
----------
[[ 5  8]
 [13 20]]
----------
# e次冪、開根號的計算
B = np.arange(4)
print(np.exp(B))
print(np.sqrt(B))
[  1.           2.71828183   7.3890561   20.08553692]
[ 0.          1.          1.41421356  1.73205081]
# floor 表示向下取整
a = np.floor(10*np.random.random((3, 4)))
print(a)
print('-'*20)
# 利用ravel()方法將矩陣拉成向量
print(a.ravel())
print('-'*20)
a.shape = (6, 2)
print(a)
print('-'*20)
# 求轉置
print(a.T)
# -1 代表預設讓系統自己計算列數
print(a.reshape(3, -1))
[[ 4.  8.  1.  7.]
 [ 2.  6.  8.  9.]
 [ 8.  9.  5.  6.]]
--------------------
[ 4.  8.  1.  7.  2.  6.  8.  9.  8.  9.  5.  6.]
--------------------
[[ 4.  8.]
 [ 1.  7.]
 [ 2.  6.]
 [ 8.  9.]
 [ 8.  9.]
 [ 5.  6.]]
--------------------
[[ 4.  1.  2.  8.  8.  5.]
 [ 8.  7.  6.  9.  9.  6.]]
[[ 4.  8.  1.  7.]
 [ 2.  6.  8.  9.]
 [ 8.  9.  5.  6.]]
# 資料拼接 
a = np.floor(10*np.random.random((2, 2)))
b = np.floor(10*np.random.random((2, 2)))
print(a)
print('-'*20)
print(b)
print('-'*20)
# 按行拼 增加樣本特徵
print(np.hstack((a, b)))
print('-'*20)
# 按列拼  增加樣本數
print(np.vstack((a, b)))
print('-'*20)
[[ 5.  6.]
 [ 8.  0.]]
--------------------
[[ 9.  9.]
 [ 9.  8.]]
--------------------
[[ 5.  6.  9.  9.]
 [ 8.  0.  9.  8.]]
--------------------
[[ 5.  6.]
 [ 8.  0.]
 [ 9.  9.]
 [ 9.  8.]]
--------------------
# 資料的切割
a = np.floor(10*np.random.random((2, 12)))
b = np.floor(10*np.random.random((2, 12)))
print(a)
print('-'*20)
print(b)
print('-'*20)
# 表示按行切開
print(np.hsplit(a, 3))
print('-'*20)
# 表示從某位置切割  (3, 4)  切兩下 最左邊記為0
print(np.hsplit(a, (3, 4)))
print('-'*20)
# 表示按列切開
print(np.vsplit(b , 2))
[[ 2.  2.  1.  9.  3.  9.  3.  6.  8.  1.  0.  2.]
 [ 6.  3.  7.  7.  0.  0.  5.  3.  5.  8.  5.  0.]]
--------------------
[[ 0.  0.  6.  4.  3.  1.  8.  9.  7.  7.  8.  5.]
 [ 8.  2.  4.  1.  5.  2.  0.  8.  2.  4.  8.  0.]]
--------------------
[array([[ 2.,  2.,  1.,  9.],
       [ 6.,  3.,  7.,  7.]]), array([[ 3.,  9.,  3.,  6.],
       [ 0.,  0.,  5.,  3.]]), array([[ 8.,  1.,  0.,  2.],
       [ 5.,  8.,  5.,  0.]])]
--------------------
[array([[ 2.,  2.,  1.],
       [ 6.,  3.,  7.]]), array([[ 9.],
       [ 7.]]), array([[ 3.,  9.,  3.,  6.,  8.,  1.,  0.,  2.],
       [ 0.,  0.,  5.,  3.,  5.,  8.,  5.,  0.]])]
--------------------
[array([[ 0.,  0.,  6.,  4.,  3.,  1.,  8.,  9.,  7.,  7.,  8.,  5.]]), array([[ 8.,  2.,  4.,  1.,  5.,  2.,  0.,  8.,  2.,  4.,  8.,  0.]])]
# 物件的複製  傳引用的方式
a = np.arange(12)
b = a
b.shape = (3, -1)
print(a.shape)
print(id(a))
print(id(b))
(3, 4)
2262218295696
2262218295696
# 用view方法建立拷貝物件
# a、c指向不同的記憶體  但共用了一堆值 改變c的值 a的值也會改變
c = a.view()
c.shape = (4, -1)
print(a.shape)
print(id(a))
print(id(c))

c[1, 1] = 123456
print(c)
print(a)
(3, 4)
2262218295696
2262218297216
[[     0      1      2]
 [     3 123456      5]
 [     6      7      8]
 [     9     10     11]]
[[     0      1      2      3]
 [123456      5      6      7]
 [     8      9     10     11]]
# 用copy進行深拷貝物件 改變d的值 a不會改變
d = a.copy()
print(d is a)
d[0, 0] = 2356
print(a)
False
[[     0      1      2      3]
 [123456      5      6      7]
 [     8      9     10     11]]
# 根據索引做運算
data = np.sin(np.arange(20)).reshape(5, 4)
print(data)
# 求出每一列中最大元素的索引
ind = data.argmax(axis=0)
print(ind)
# 將索引傳進去  range(data.shape[1])值為[0,1,2,3]代表四列
data_max = data[ind, range(data.shape[1])]
print(data_max)
[[ 0.          0.84147098  0.90929743  0.14112001]
 [-0.7568025  -0.95892427 -0.2794155   0.6569866 ]
 [ 0.98935825  0.41211849 -0.54402111 -0.99999021]
 [-0.53657292  0.42016704  0.99060736  0.65028784]
 [-0.28790332 -0.96139749 -0.75098725  0.14987721]]
[2 0 3 1]
[ 0.98935825  0.84147098  0.99060736  0.6569866 ]
# 運用tile進行擴充套件
a = np.arange(0, 40, 10)
print(a)
b = np.tile(a, (2, 2))
print(b)
[ 0 10 20 30]
[[ 0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30]]
# 排序操作
a = np.array([
    [4, 3, 5],
    [1, 2, 1],
])
# 按行進行排序
print(np.sort(a, axis = 1))
# 按列進行排序
print(np.sort(a, axis = 0))
b = np.array([2, 6, 1, 3])
# 對索引進行排序
c = np.argsort(b)
print(c)
# 按排序的索引進行輸出  則是從小到大輸出
print(b[c])
[[3 4 5]
 [1 1 2]]
[[1 2 1]
 [4 3 5]]
[2 0 3 1]
[1 2 3 6]