pandas學習與使用1
阿新 • • 發佈:2018-12-12
學習了numpy之後,開始繼續學習pandas。以下是pandas基本的一些語法及其使用方法,這一節主要是pandas中的Series結構。執行環境python2.7
#!/usr/bin/python # -*- coding: UTF-8 -*- import pandas as pd import numpy as np # Pandas模組的資料結構主要有兩:1、Series ;2、DataFrame # Series結構是基於NumPy的ndarray結構,是一個一維的標籤矩陣 # pd.Series([list],index=[list]) # 以list為引數,引數為一list; index為可選引數,若不填則預設index從0開始;若添則index長度與value長度相等 # 例子一 s = pd.Series([1, 2, 6, np.nan, 44, 1]) print "s:\n", s # 例子二 s1 = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"]) print "s1:\n", s1 # pd.Series({dict})以一字典結構為引數 s2 = pd.Series({'a': 3, 'b': 4, 'c': 5, 'd': 2, 'e': 6}) print "s2:\n", s2 # 取值 s[index] or s[[index的list]] 取值操作類似陣列,當取不連續的多個值時可以以一list為引數 v = np.random.random_sample(20) print "v:", v l = pd.Series(v) print "l:\n", l l1 = l[[1, 5, 7, 9]] print "l1:\n", l1 l2 = l[2:10] print "l2:\n", l2 l3 = l[17] print "l3:\n", l3 # head(n);tail(n)//取出頭n行或尾n行,n為可選引數,若不填預設5 print "l的前五個值:\n", l.head() print "l的後三個值:\n", l.tail(3) # index(); values()//取出index 與values ,返回list print type(l.index) # class 'pandas.core.indexes.range.RangeIndex' print "l的標籤:\n", list(l.index) print "l的值:\n", l.values # Size、shape、uniqueness、counts of values print "len():", len(l) # Series長度,包括NaN len(): 20 print "shape():", np.shape(l) # 矩陣形狀,(,) shape(): (20L,) print "count():", l.count() # Series長度,不包括NaN count(): 20 print "unique():", l.unique() # 出現不重複values值 print "value_counts():\n", l.value_counts() # 統計value值出現次數 # 加運算:相同index的value相加,若index並非共有的則該index對應value變為NaN # s4: NaN 1 2 3 4 # s5: 1 1 1 1 NaN # s6: NaN 2 3 4 NaN s4 = pd.Series([1, 2, 3, 4], index=[1, 2, 3, 4]) s5 = pd.Series([1, 1, 1, 1]) s6 = s4 + s5 print "s4 + s5:\n", s6
以下是上述程式碼的執行結果
D:\software\Anaconda2\python.exe D:/PycharmProjects/Learn/learn_panda/learn_pd1.py s: 0 1.0 1 2.0 2 6.0 3 NaN 4 44.0 5 1.0 dtype: float64 s1: a 1 b 2 c 3 d 4 e 5 dtype: int64 s2: a 3 b 4 c 5 d 2 e 6 dtype: int64 v: [0.37635755 0.4886929 0.07210501 0.79949712 0.45134537 0.87680035 0.30748949 0.99859402 0.61613633 0.83520501 0.07343604 0.81827534 0.25139748 0.65326514 0.45755433 0.31827714 0.3407211 0.07814553 0.64210118 0.69003531] l: 0 0.376358 1 0.488693 2 0.072105 3 0.799497 4 0.451345 5 0.876800 6 0.307489 7 0.998594 8 0.616136 9 0.835205 10 0.073436 11 0.818275 12 0.251397 13 0.653265 14 0.457554 15 0.318277 16 0.340721 17 0.078146 18 0.642101 19 0.690035 dtype: float64 l1: 1 0.488693 5 0.876800 7 0.998594 9 0.835205 dtype: float64 l2: 2 0.072105 3 0.799497 4 0.451345 5 0.876800 6 0.307489 7 0.998594 8 0.616136 9 0.835205 dtype: float64 l3: 0.0781455282331196 l的前五個值: 0 0.376358 1 0.488693 2 0.072105 3 0.799497 4 0.451345 dtype: float64 l的後三個值: 17 0.078146 18 0.642101 19 0.690035 dtype: float64 <class 'pandas.core.indexes.range.RangeIndex'> l的標籤: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] l的值: [0.37635755 0.4886929 0.07210501 0.79949712 0.45134537 0.87680035 0.30748949 0.99859402 0.61613633 0.83520501 0.07343604 0.81827534 0.25139748 0.65326514 0.45755433 0.31827714 0.3407211 0.07814553 0.64210118 0.69003531] len(): 20 shape(): (20L,) count(): 20 unique(): [0.37635755 0.4886929 0.07210501 0.79949712 0.45134537 0.87680035 0.30748949 0.99859402 0.61613633 0.83520501 0.07343604 0.81827534 0.25139748 0.65326514 0.45755433 0.31827714 0.3407211 0.07814553 0.64210118 0.69003531] value_counts(): 0.451345 1 0.835205 1 0.307489 1 0.799497 1 0.818275 1 0.457554 1 0.616136 1 0.998594 1 0.078146 1 0.376358 1 0.318277 1 0.251397 1 0.073436 1 0.072105 1 0.488693 1 0.642101 1 0.690035 1 0.876800 1 0.340721 1 0.653265 1 dtype: int64 s4 + s5: 0 NaN 1 2.0 2 3.0 3 4.0 4 NaN dtype: float64 Process finished with exit code 0