pandas基礎知識(DataFrame)
阿新 • • 發佈:2019-01-07
> 多維的pandas.Series
### 選取資料
### numpy.array->pandas.DataFrame
### 元素所屬關係
### 刪除一列
### 篩選
### 巢狀字典生成DataFrame
### 轉置
import pandas as pd
import numpy as np
data = {
'color':['blue', 'green', 'yellow', 'red', 'white'],
'object':['ball', 'pen', 'pencil', 'paper', 'mug'],
'price':[1.2, 1.0, 0.6, 0.9, 1.7]
}
frame = pd.DataFrame(data)
frame
color | object | price | |
---|---|---|---|
0 | blue | ball | 1.2 |
1 | green | pen | 1.0 |
2 | yellow | pencil | 0.6 |
3 | red | paper | 0.9 |
4 | white | mug | 1.7 |
# 指定列名讀取
frame2 = pd.DataFrame(data, columns=['object' , 'price'])
frame2
object | price | |
---|---|---|
0 | ball | 1.2 |
1 | pen | 1.0 |
2 | pencil | 0.6 |
3 | paper | 0.9 |
4 | mug | 1.7 |
# 修改索引
frames3 = pd.DataFrame(data, index=['one' , 'two', 'three', 'four', 'five'])
frames3
color | object | price | |
---|---|---|---|
one | blue | ball | 1.2 |
two | green | pen | 1.0 |
three | yellow | pencil | 0.6 |
four | red | paper | 0.9 |
five | white | mug | 1.7 |
frame4 = pd.DataFrame(np.arange(16).reshape(4,4),
index=['one', 'two', 'three', 'four'],
columns=['blue', 'green', 'yellow', 'red'])
frame4
blue | green | yellow | red | |
---|---|---|---|---|
one | 0 | 1 | 2 | 3 |
two | 4 | 5 | 6 | 7 |
three | 8 | 9 | 10 | 11 |
four | 12 | 13 | 14 | 15 |
frame4.columns
Index([‘blue’, ‘green’, ‘yellow’, ‘red’], dtype=’object’)
frame4.index
Index([‘one’, ‘two’, ‘three’, ‘four’], dtype=’object’)
frame4.values
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
frame4.red
one 3
two 7
three 11
four 15
Name: red, dtype: int64
# frame4.ix[2] ix方法已經不能使用了
frame4.iloc[:,2]
one 2
two 6
three 10
four 14
Name: yellow, dtype: int64
# 行->列
frame4['red'][3]
15
### 賦值
frame
color | object | price | |
---|---|---|---|
0 | blue | ball | 1.2 |
1 | green | pen | 1.0 |
2 | yellow | pencil | 0.6 |
3 | red | paper | 0.9 |
4 | white | mug | 1.7 |
frame.index.name = "id"
frame.columns.name = "item"
frame
item | color | object | price |
---|---|---|---|
id | |||
0 | blue | ball | 1.2 |
1 | green | pen | 1.0 |
2 | yellow | pencil | 0.6 |
3 | red | paper | 0.9 |
4 | white | mug | 1.7 |
frame['new'] = 12
frame
item | color | object | price | new |
---|---|---|---|---|
id | ||||
0 | blue | ball | 1.2 | 12 |
1 | green | pen | 1.0 | 12 |
2 | yellow | pencil | 0.6 | 12 |
3 | red | paper | 0.9 | 12 |
4 | white | mug | 1.7 | 12 |
frame['new'] = [1.2, 2.3, 3.5, 5.8, 9.6]
frame
item | color | object | price | new |
---|---|---|---|---|
id | ||||
0 | blue | ball | 1.2 | 1.2 |
1 | green | pen | 1.0 | 2.3 |
2 | yellow | pencil | 0.6 | 3.5 |
3 | red | paper | 0.9 | 5.8 |
4 | white | mug | 1.7 | 9.6 |
ser = pd.Series(np.arange(5))
ser
0 0
1 1
2 2
3 3
4 4
dtype: int64
frame['new'] = ser
frame
item | color | object | price | new |
---|---|---|---|---|
id | ||||
0 | blue | ball | 1.2 | 0 |
1 | green | pen | 1.0 | 1 |
2 | yellow | pencil | 0.6 | 2 |
3 | red | paper | 0.9 | 3 |
4 | white | mug | 1.7 | 4 |
frame.isin([1.0,'pen'])
item | color | object | price | new |
---|---|---|---|---|
id | ||||
0 | False | False | False | False |
1 | False | True | True | True |
2 | False | False | False | False |
3 | False | False | False | False |
4 | False | False | False | False |
frame[frame.isin([1.0,'pen'])]
item | color | object | price | new |
---|---|---|---|---|
id | ||||
0 | NaN | NaN | NaN | NaN |
1 | NaN | pen | 1.0 | 1.0 |
2 | NaN | NaN | NaN | NaN |
3 | NaN | NaN | NaN | NaN |
4 | NaN | NaN | NaN | NaN |
del frame['new']
frame
item | color | object | price |
---|---|---|---|
id | |||
0 | blue | ball | 1.2 |
1 | green | pen | 1.0 |
2 | yellow | pencil | 0.6 |
3 | red | paper | 0.9 |
4 | white | mug | 1.7 |
frame4
blue | green | yellow | red | |
---|---|---|---|---|
one | 0 | 1 | 2 | 3 |
two | 4 | 5 | 6 | 7 |
three | 8 | 9 | 10 | 11 |
four | 12 | 13 | 14 | 15 |
frame4[frame4<12]
blue | green | yellow | red | |
---|---|---|---|---|
one | 0.0 | 1.0 | 2.0 | 3.0 |
two | 4.0 | 5.0 | 6.0 | 7.0 |
three | 8.0 | 9.0 | 10.0 | 11.0 |
four | NaN | NaN | NaN | NaN |
nestdict = {'red':{2012:22, 2013:33},
'white':{2011:13, 2012:22, 2013:16},
'blue':{2011:17, 2012:27, 2013:18}}
frame5 = pd.DataFrame(nestdict)
frame5
blue | red | white | |
---|---|---|---|
2011 | 17 | NaN | 13 |
2012 | 27 | 22.0 | 22 |
2013 | 18 | 33.0 | 16 |
frame5.T
2011 | 2012 | 2013 | |
---|---|---|---|
blue | 17.0 | 27.0 | 18.0 |
red | NaN | 22.0 | 33.0 |
white | 13.0 | 22.0 | 16.0 |
nestdict = {'red':{2012:22, 2013:33},
'white':{2011:13, 2012:22, 2013:16},
'blue':{2011:17, 2012:27, 2013:18}}
frame5 = pd.DataFrame(nestdict)
frame5
blue | red | white | |
---|---|---|---|
2011 | 17 | NaN | 13 |
2012 | 27 | 22.0 | 22 |
2013 | 18 | 33.0 | 16 |