Datawhale Task05 變形 打卡
阿新 • • 發佈:2020-12-28
目錄
5.1長寬表的變形
import pandas as pd
pd.DataFrame({'Gender':['F','F','M','M']
,'Height':[163,160,175,180]})
Gender | Height | |
---|---|---|
0 | F | 163 |
1 | F | 160 |
2 | M | 175 |
3 | M | 180 |
pd.DataFrame({'Height:F':[163,160],
'Height:M' :[175,180]})
Height:F | Height:M | |
---|---|---|
0 | 163 | 175 |
1 | 160 | 180 |
5.1.1 pivot
df = pd.DataFrame({'Class':[1,1,2,2],
'Name':['zhangsan','zhangsan','lisi','lisi']
,'Subject':['Chinese','Math','Chinese','Math']
,'Grade':[80,75,90,85]})
df
Class | Name | Subject | Grade | |
---|---|---|---|---|
0 | 1 | zhangsan | Chinese | 80 |
1 | 1 | zhangsan | Math | 75 |
2 | 2 | lisi | Chinese | 90 |
3 | 2 | lisi | Math | 85 |
df.pivot(index = 'Name',columns = 'Subject',values = 'Grade')
Subject | Chinese | Math |
---|---|---|
Name | ||
lisi | 90 | 85 |
zhangsan | 80 | 75 |
5.1.2 pivot_table
df = pd.DataFrame({'Name':['San Zhang', 'San Zhang',
'San Zhang', 'San Zhang',
'Si Li' , 'Si Li', 'Si Li', 'Si Li'],
'Subject':['Chinese', 'Chinese', 'Math', 'Math',
'Chinese', 'Chinese', 'Math', 'Math'],
'Grade':[80, 90, 100, 90, 70, 80, 85, 95]})
df
Name | Subject | Grade | |
---|---|---|---|
0 | San Zhang | Chinese | 80 |
1 | San Zhang | Chinese | 90 |
2 | San Zhang | Math | 100 |
3 | San Zhang | Math | 90 |
4 | Si Li | Chinese | 70 |
5 | Si Li | Chinese | 80 |
6 | Si Li | Math | 85 |
7 | Si Li | Math | 95 |
df.pivot_table(index = 'Name',
columns = 'Subject',
values = 'Grade',
aggfunc = 'mean')
Subject | Chinese | Math |
---|---|---|
Name | ||
San Zhang | 85 | 95 |
Si Li | 75 | 90 |
df.pivot_table(index = 'Name',
columns = 'Subject',
values = 'Grade',
aggfunc = lambda x:x.mean())
Subject | Chinese | Math |
---|---|---|
Name | ||
San Zhang | 85 | 95 |
Si Li | 75 | 90 |
df.pivot_table(index = 'Name',
columns = 'Subject',
values = 'Grade',
aggfunc='mean',
margins=True)
Subject | Chinese | Math | All |
---|---|---|---|
Name | |||
San Zhang | 85 | 95.0 | 90.00 |
Si Li | 75 | 90.0 | 82.50 |
All | 80 | 92.5 | 86.25 |
5.1.3 melt
df = pd.DataFrame({'Class':[1,2],
'Name':['zhangsan','lisi'],
'Chinese':[80,90]
,'Math':[80,75]})
df
Class | Name | Chinese | Math | |
---|---|---|---|---|
0 | 1 | zhangsan | 80 | 80 |
1 | 2 | lisi | 90 | 75 |
df_melted = df.melt(id_vars = ['Class','Name'],
value_vars = ['Chinese','Math'],
var_name = 'Subject',
value_name = 'Grade')
df_melted
Class | Name | Subject | Grade | |
---|---|---|---|---|
0 | 1 | zhangsan | Chinese | 80 |
1 | 2 | lisi | Chinese | 90 |
2 | 1 | zhangsan | Math | 80 |
3 | 2 | lisi | Math | 75 |
5.1.4 wide_to_long
df = pd.DataFrame({'Class':[1,2],'Name':['San Zhang', 'Si Li'],
'Chinese_Mid':[80, 75], 'Math_Mid':[90, 85],
'Chinese_Final':[80, 75], 'Math_Final':[90, 85]})
df
Class | Name | Chinese_Mid | Math_Mid | Chinese_Final | Math_Final | |
---|---|---|---|---|---|---|
0 | 1 | San Zhang | 80 | 90 | 80 | 90 |
1 | 2 | Si Li | 75 | 85 | 75 | 85 |
pd.wide_to_long(df,
stubnames=['Chinese', 'Math'],
i = ['Class', 'Name'],
j='Examination',
sep='_',
suffix='.+')
Chinese | Math | |||
---|---|---|---|---|
Class | Name | Examination | ||
1 | San Zhang | Mid | 80 | 90 |
Final | 80 | 90 | ||
2 | Si Li | Mid | 75 | 85 |
Final | 75 | 85 |