pandas replace 替換功能function
阿新 • • 發佈:2018-12-22
import pandas as pd
import numpy as np
s = pd.Series([0,1,2,3,4])
s.replace(0,5) # single value to replace
0 5
1 1
2 2
3 3
4 4
dtype: int64
df = pd.DataFrame({'A':[0,1,2,3,4], "B":[5,6,7,8,9], "C":['a','b','c','d','e']})
df.replace(0,5) # replace all 0 to 5
A | B | C | |
---|---|---|---|
0 | 5 | 5 | a |
1 | 1 | 6 | b |
2 | 2 | 7 | c |
3 | 3 | 8 | d |
4 | 4 | 9 | e |
df # the default parameter in_place= False # DataFrame.replace(to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad') # to_place can be number,string list or dict and even regex expression # limit Maximum size gap to forward or backward fill.
A | B | C | |
---|---|---|---|
0 | 0 | 5 | a |
1 | 1 | 6 | b |
2 | 2 | 7 | c |
3 | 3 | 8 | d |
4 | 4 | 9 | e |
1. list like replace method
df.replace([1,2,3,4],[4,3,2,1]) # content to replace . to_replace=[1,2,3,4],value=[4,3,2,1]
A | B | C | |
---|---|---|---|
0 | 0 | 5 | a |
1 | 4 | 6 | b |
2 | 3 | 7 | c |
3 | 2 | 8 | d |
4 | 1 | 9 | e |
df.replace([1,2,3,4],100) # to_replace=[1,2,3,4],value=4
A | B | C | |
---|---|---|---|
0 | 0 | 5 | a |
1 | 100 | 6 | b |
2 | 100 | 7 | c |
3 | 100 | 8 | d |
4 | 100 | 9 | e |
df.replace([1,2],method='bfill') # . like fillna with mehtod bfill(backfill), and the default mehtod was pad
A | B | C | |
---|---|---|---|
0 | 0 | 5 | a |
1 | 3 | 6 | b |
2 | 3 | 7 | c |
3 | 3 | 8 | d |
4 | 4 | 9 | e |
2. dict like replace method
df.replace({2:20,6:100}) # to_replace =2 value=20,to_replace=6,value =100
A | B | C | |
---|---|---|---|
0 | 0 | 5 | a |
1 | 1 | 100 | b |
2 | 20 | 7 | c |
3 | 3 | 8 | d |
4 | 4 | 9 | e |
df.replace({'A':2,'B':7},1000) # . to_replace={'A':2,"B":7}, value=1000
A | B | C | |
---|---|---|---|
0 | 0 | 5 | a |
1 | 1 | 6 | b |
2 | 1000 | 1000 | c |
3 | 3 | 8 | d |
4 | 4 | 9 | e |
df.replace({'A':{1:1000,4:20}}) # in colomn A to_replace=1,value=1000, to_replace=4, value=20
A | B | C | |
---|---|---|---|
0 | 0 | 5 | a |
1 | 1000 | 6 | b |
2 | 2 | 7 | c |
3 | 3 | 8 | d |
4 | 20 | 9 | e |
3. regex expression
df = pd.DataFrame({'A':['bat','foot','bait'],
'B':['abc','bar','foot']})
df.replace(to_replace=r'^ba.$',value='vvvv',regex=True) # to define to_replace and value in the function
A | B | |
---|---|---|
0 | vvvv | abc |
1 | foot | vvvv |
2 | bait | foot |
df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) # in column A to_replce=r'^ba.$' value='new'
A | B | |
---|---|---|
0 | new | abc |
1 | foot | bar |
2 | bait | foot |
df.replace({'A':{r"^ba.$":"new"}},regex=True) # same as above
A | B | |
---|---|---|
0 | new | abc |
1 | foot | bar |
2 | bait | foot |
df.replace(regex=r'^ba.$',value='vvv') # in the whole dataframe
A | B | |
---|---|---|
0 | vvv | abc |
1 | foot | vvv |
2 | bait | foot |
df.replace(regex={r'^ba.$':'vvv','foot':'xyz'})
A | B | |
---|---|---|
0 | vvv | abc |
1 | xyz | vvv |
2 | bait | xyz |
df.replace(regex=[r'^ba.$','foo.$'],value='vvv')
A | B | |
---|---|---|
0 | vvv | abc |
1 | vvv | vvv |
2 | bait | vvv |