1. 程式人生 > >pandas applymap,apply,map例項應用

pandas applymap,apply,map例項應用

applymap作用於每一個元素
apply作用於一行或者一列
map作用於series的元素
import pandas as pd
import numpy as np

'''
applymap
apply
map
的區別
'''


# eg1

def func_tuple(x):
    return x * x


series = pd.Series([1, 2, 3, 4])
series1 = pd.Series([11, 12, 13, 14])
df = pd.DataFrame({
    "A": series,
    "B": series1
})

print(df)
dff = df.applymap(func_tuple)
print(dff)

'''
   A   B
0  1  11
1  2  12
2  3  13
3  4  14
    A    B
0   1  121
1   4  144
2   9  169
3  16  196
'''

series = pd.Series(list('abcd'))
series1 = pd.Series([11, 12, 13, 14])
df = pd.DataFrame({
    "A": series,
    "B": series1
})

print(df)


# dff=df.applymap(func_tuple)
# print(dff)
# print("***")

def func_list(x):
    return [x, x]


dff = df.applymap(func_list)
print(dff)
print("***")

'''
DataFrame.applymap(func)
Apply a function to a Dataframe elementwise.
This method applies a function that accepts and returns a scalar to every element of a DataFrame.
'''

'''
DataFrame.apply ApplyafunctionalonginputaxisofDataFrame


Unlike agg, apply’s callable is passed a sub-DataFrame which gives you access to all the columns
'''

df = pd.DataFrame({'animal': 'cat dog cat fish dog cat cat'.split(),
                   'size': list('SSMMMLL'),
                   'weight': [8, 10, 11, 1, 20, 12, 12],
                   'adult': [False] * 5 + [True] * 2})
print(df)

dff=df.groupby('animal')
for name,gp in dff:
    print(name)
    print(gp)
    print("##")
    pass

print(dff.groups)
print(dff.get_group('cat'))

res=df.groupby('animal').apply(lambda subf:subf['size'][subf['weight'].idxmax()])
print(res)


print("*"*33)
df = pd.DataFrame({'jim': range(5), 'joe': range(5, 10)})
print(df)

gr=df.groupby(df['jim']<2)
print(gr.groups)

print(df)

print(df.apply('mean') )
print(type(df.apply('mean')) )  #<class 'pandas.core.series.Series'>
print(df.apply('mean',axis=1) )
'''
{False: Int64Index([2, 3, 4], dtype='int64'), True: Int64Index([0, 1], dtype='int64')}
   jim  joe
0    0    5
1    1    6
2    2    7
3    3    8
4    4    9
jim    2.0
joe    7.0
dtype: float64
0    2.5
1    3.5
2    4.5
3    5.5
4    6.5
dtype: float64

'''



#map
'''Index.map(mapper, na_action=None)
Map values using input correspondence (a dict, Series, or function).
Parameters mapper : function, dict, or Series Mapping correspondence.
na_action : {None, ‘ignore’}
If ‘ignore’, propagate NA values, without passing them to the mapping corre-
spondence.
Returns applied : Union[Index, MultiIndex], inferred
The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned.
'''
from pandas.tseries.offsets import CustomBusinessDay
from datetime import datetime
import numpy as np
import pandas as pd



weekmask_egypt = 'Sun Mon Tue Wed Thu'


holidays = ['2012-05-01', datetime(2013, 5, 1), np.datetime64('2014-05-01')]

bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
print(bday_egypt)

dt = datetime(2013, 4, 30)

print(dt + 2 * bday_egypt)

dts =pd.date_range(dt, periods=5, freq=bday_egypt)
print(dts)
'''DatetimeIndex(['2013-04-30', '2013-05-02', '2013-05-05', '2013-05-06',
               '2013-05-07'],
              dtype='datetime64[ns]', freq='C')'''

print(pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'. split())))
print(pd.Series(dts.weekday, dts) )
'''
2013-04-30    Tue
2013-05-02    Thu
2013-05-05    Sun
2013-05-06    Mon
2013-05-07    Tue
Freq: C, dtype: object
2013-04-30    1
2013-05-02    3
2013-05-05    6                               
2013-05-06    0
2013-05-07    1
Freq: C, dtype: int64
'''

x = pd.Series([1, 2, 3, 4, 5, 6, 7])
y = pd.Series("mon tues weds thurs fri sat sun".split())


def f(x):
    pass
    if x > 5:
        return 'high'
    else:
        return 'low'


print(x)
print(y)
print(x.map(f))

dic={
    "1":'mon',
    '2':'tue',
    '4':'thir',
    '5':'fri',
    '6':'sat',
    '7': 'sun',
    '3':'wed'

}
def get_weekday(x):
    return dic[str(x)]
print('**')
     
print(x.map(get_weekday))