1. 程式人生 > >pandas26 combine通過函式組合2個數據幀( tcy)

pandas26 combine通過函式組合2個數據幀( tcy)

combine通過函式組合2個數據幀2018/12/31

===================================================================================
df1.combine(other, func, fill_value=None, overwrite=True)

用途:通過函式組合2個數據幀
    # 不傳播NaN值,如果1個數據缺少列用其他資料的列值替代(也可能是NaN)
引數
    # other:DataFrame
    # func:函式#將兩個系列作為輸入並返回一個Series或一個標量
    #
    # fill_value:標量值;表示na值或補齊的資料長度
    # overwrite=True:如為True則覆蓋呼叫幀中公共鍵的值 (本引數實驗發現無什麼變化歡迎指正)
返回:組合DataFrame

====================================================================================
# 例項1.1:df1 if df1>=df2 else df2
df1 = pd.DataFrame({'A': [10, 11], 'B': [12, 13]})
df2 = pd.DataFrame({'A': [21, -22], 'B': [23, -24]})

result=df1[df1>=df2].append(df2[df1<df2])

# df1            df2                  result
    A   B            A   B              A     B
0  10  12        0  21  23         0  NaN   NaN
1  11  13        1 -22 -24         1  11.0  13.0
                                   0  21.0  23.0
                                   1   NaN   NaN
# 例項1.df1 if df1>=df2 else df2
df1.combine(df2, lambda s1, s2: s1 if s1.any() < s2.any() else s2)

 # df1             df2            result
    A   B            A   B            A   B
0  10  12        0  21  23        0  21  23
1  11  13        1 -22 -24        1 -22 -24

# 例項1.3:
result=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]))
# df1               df2           result
    A   B            A   B            A   B
0  10  12        0  21  23        0  21  23
1  11  13        1 -22 -24        1  11  13

===================================================================================
# 例項2.1:
df1 = pd.DataFrame({'A': [10, 11], 'B': [12, 13],'C': [14, 15]})
df2 = pd.DataFrame({'A': [21, -22], 'B': [23, -24]})

result=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]))
result1=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]),fill_value=-1)
result2=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]),fill_value=30)

#df1             df2           result           result1            result2
    A   B   C        A   B        A   B   C        A    B   C      A   B    C
0  10  12  14    0  21  23     0  21  23 NaN    0  21  23  14   0  21  23  30
1  11  13  15    1 -22 -24     1  11  13 NaN    1  11  13  15   1  11  13  30

# 例項2.2:fill_value補齊資料長度
df1 = pd.DataFrame({'A': [10, 11], 'B': [12, 13]})
df2 = pd.DataFrame({'A': [21, -22], 'B': [23, -24],'C': [25, 26]})

result=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]))
result1=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]),fill_value=-1)
result2=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]),fill_value=30)

  #df1      df2               result          result1              result2
    A   B       A   B   C        A   B   C       A    B     C     A   B     C
0  10  12   0  21  23  25     0  21  23 NaN   0  21  23  25.0  0  21  23  30.0
1  11  13   1 -22 -24  26     1  11  13 NaN   1  11  13  26.0  1  11  13  30.0

==================================================================================
# 例項3:na值
df1 = pd.DataFrame({'A': [10, 11], 'B': [12, np.nan],'C': [14, np.nan]})
df2 = pd.DataFrame({'A': [21, -22], 'B': [23, np.nan],'C': [25, 26]})
df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]))

 #df1                   df2                    result
    A     B     C           A     B   C           A     B     C
0  10  12.0  14.0       0  21  23.0  25        0  21  23.0  25.0
1  11   NaN  NaN        1 -22  NaN   26        1  11   NaN  NaN

# 例項4:na值 fill_value 替代na值
result=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]),fill_value=-1)
result1=df1.combine(df2, lambda s1, s2: s1[s1>=s2].append(s2[s1<s2]),fill_value=30)
    
#df1                df2                 result              result1
     A     B     C      A     B   C        A     B     C        A     B     C
0  10  12.0  14.0   0  21  23.0  25     0  21  23.0  25.0    0  21  23.0  25.0
1  11   NaN   NaN   1 -22   NaN  26     1  11  -1.0  26.0    1  11  30.0  30.0

=================================================================================