1. 程式人生 > 其它 >Datawhale Task05 變形 打卡

Datawhale Task05 變形 打卡

技術標籤:pandaspython

目錄

5.1長寬表的變形

import pandas as pd 
pd.DataFrame({'Gender':['F','F','M','M']
             ,'Height':[163,160,175,180]})

GenderHeight
0F163
1F160
2M175
3M180
pd.DataFrame({'Height:F':[163,160],
             'Height:M'
:[175,180]})
Height:FHeight:M
0163175
1160180

5.1.1 pivot

df = pd.DataFrame({'Class':[1,1,2,2],
                  'Name':['zhangsan','zhangsan','lisi','lisi']
                  ,'Subject':['Chinese','Math','Chinese','Math']
                  ,'Grade':[80,75,90,85]})
df
ClassNameSubjectGrade
01zhangsanChinese80
11zhangsanMath75
22lisiChinese90
32lisiMath85
df.pivot(index = 'Name',columns = 'Subject',values = 'Grade')
SubjectChineseMath
Name
lisi9085
zhangsan8075

5.1.2 pivot_table

df = pd.DataFrame({'Name':['San Zhang', 'San Zhang', 
                              'San Zhang', 'San Zhang',
                              'Si Li'
, 'Si Li', 'Si Li', 'Si Li'], 'Subject':['Chinese', 'Chinese', 'Math', 'Math', 'Chinese', 'Chinese', 'Math', 'Math'], 'Grade':[80, 90, 100, 90, 70, 80, 85, 95]}) df
NameSubjectGrade
0San ZhangChinese80
1San ZhangChinese90
2San ZhangMath100
3San ZhangMath90
4Si LiChinese70
5Si LiChinese80
6Si LiMath85
7Si LiMath95
df.pivot_table(index = 'Name',
               columns = 'Subject',
               values = 'Grade',
               aggfunc = 'mean')
SubjectChineseMath
Name
San Zhang8595
Si Li7590
df.pivot_table(index = 'Name',
               columns = 'Subject',
               values = 'Grade',
               aggfunc = lambda x:x.mean())
SubjectChineseMath
Name
San Zhang8595
Si Li7590
df.pivot_table(index = 'Name',
               columns = 'Subject',
               values = 'Grade',
               aggfunc='mean',
               margins=True)
SubjectChineseMathAll
Name
San Zhang8595.090.00
Si Li7590.082.50
All8092.586.25

5.1.3 melt

df = pd.DataFrame({'Class':[1,2],
                  'Name':['zhangsan','lisi'],
                  'Chinese':[80,90]
                  ,'Math':[80,75]})
df
ClassNameChineseMath
01zhangsan8080
12lisi9075
df_melted = df.melt(id_vars = ['Class','Name'],
                   value_vars = ['Chinese','Math'],
                   var_name = 'Subject',
                   value_name = 'Grade')
df_melted
ClassNameSubjectGrade
01zhangsanChinese80
12lisiChinese90
21zhangsanMath80
32lisiMath75

5.1.4 wide_to_long

df = pd.DataFrame({'Class':[1,2],'Name':['San Zhang', 'Si Li'],
                   'Chinese_Mid':[80, 75], 'Math_Mid':[90, 85],
                   'Chinese_Final':[80, 75], 'Math_Final':[90, 85]})
df
ClassNameChinese_MidMath_MidChinese_FinalMath_Final
01San Zhang80908090
12Si Li75857585
pd.wide_to_long(df,
                stubnames=['Chinese', 'Math'],
                i = ['Class', 'Name'],
                j='Examination',
                sep='_',
                suffix='.+')
ChineseMath
ClassNameExamination
1San ZhangMid8090
Final8090
2Si LiMid7585
Final7585