1. 程式人生 > 實用技巧 >pandas學習筆記

pandas學習筆記

pandas學習筆記

1.一維資料結構 Series 物件

b=pd.Series(data=[1,2,3])  #利用陣列建立Series物件
b
>>>
0    1
1    2
2    3
dtype: int64

type(b)
>>>pandas.core.series.Series

a = {'1':'whj','2':'xhl','3':'xj'}  #利用字典dict建立Series物件
c = pd.Series(data=a)
c
>>>
1    whj
2    xhl
3     xj
dtype: object

type(c)
>>> pandas.core.series.Series

c.index			#獲取下標
>>> Index(['1', '2', '3'], dtype='object')

c.values			#獲取值
>>> array(['whj', 'xhl', 'xj'], dtype=object)

c[0]			#利用下標進行索引
>>>'whj'
c[1]
>>>'xhl'

c + '   nihao!'			#進行拼接
>>> 
1    whj   nihao!
2    xhl   nihao!
3     xj   nihao!
dtype: object

 c
>>>
1    whj
2    xhl
3     xj
dtype: object

2.二維資料結構 DataFrame 物件

a = pd.DataFrame(data = [[85,90,95],[82,86,90],[90,60,75]],
						columns=['語文','數學','英語'],
						index=['熊寒露','王韓健','熊健'])
a
>>>
     語文  數學  英語
熊寒露  85  90  95
王韓健  82  86  90
熊健   90  60  75

a['語文']
>>>
熊寒露    85
王韓健    82
熊健     90
Name: 語文, dtype: int64

del a['英語']         #刪除列
a
>>>
     語文  數學
熊寒露  85  90
王韓健  82  86
熊健   90  60


#層次化索引,複合索引
data = pd.DataFrame(data={'month':[12,3,6,9],'year':[2013,2014,2014,2014],'salary':[55,46,73,89]},)
data
>>>
   month  year  salary
0     12  2013      55
1      3  2014      46
2      6  2014      73
3      9  2014      89

data.set_index(['year','month'])
>>>
            salary
year month        
2013 12         55
2014 3          46
     6          73
     9          89
     
     
     
     
data = pd.DataFrame(data={'year':[2020,2020,2019,2020],'month':[1,5,6,9],'salary':['4k','5k','4k','5k']},)
data
>>>
   year  month salary
0  2020      1     4k
1  2020      5     5k
2  2019      6     4k
3  2020      9     5k

data.set_index(['year','month'])
>>>
           salary
year month       
2020 1         4k
     5         5k
2019 6         4k
2020 9         5k

3.重置索引 reindex()

a.reindex(['a','b','c','d','e','f','g'],fill_value=10)
>>>
    0
a  10
b  10
c  10
d  10
e  10
f  10
g  10

a.reindex(['a','b','c','d','e','f','g'],copy=True)
>>>
    0
a NaN
b NaN
c NaN
d NaN
e NaN
f NaN
g NaN


data
>>>
   year  month salary
0  2020      1     4k
1  2020      5     5k
2  2019      6     4k
3  2020      9     5k

data.reindex(['a','b','c','d','e','f'])
>>>
   year  month salary
a   NaN    NaN    NaN
b   NaN    NaN    NaN
c   NaN    NaN    NaN
d   NaN    NaN    NaN
e   NaN    NaN    NaN
f   NaN    NaN    NaN



#DataFrame直接索引 只支援先列後行,不支援先行後列的 索引方式
#但是支援對行的切片
data
>>>
   year  month salary
0  2020      1     4k
1  2020      5     5k
2  2019      6     4k
3  2020      9     5k
data[:3]
>>>
   year  month salary
0  2020      1     4k
1  2020      5     5k
2  2019      6     4k

data[1]			#直接對行進行索引會報錯
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 1

4.算術運算與資料對齊

a = pd.Series(data=range(3))     #Series一維陣列
b = pd.Series(data=range(6))
a
>>>
0    0
1    1
2    2
dtype: int64

b
>>>
0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64

a+b
>>>
0    0.0
1    2.0
2    4.0
3    NaN
4    NaN
5    NaN
dtype: float64

a.add(b,fill_value=0)     #對a裡面沒有的陣列進行填充fill_value=0
>>>
0    0.0
1    2.0
2    4.0
3    3.0
4    4.0
5    5.0
dtype: float64