1. 程式人生 > >Pandas 同元素多列去重

Pandas 同元素多列去重

#-*- coding: utf-8 -*-

data = {'G1':['a','b','c','d','e'],'G2':['b','a','d','c','f']}
data = pd.DataFrame(data)

data['G3'] = data['G1'] + '|' + data['G2']
p = []
for i in data['G3'].tolist():
    tmp = sorted(i.split('|'))  # The most important part,sort 
    p.append(tmp[0] + '|' + tmp[1])
data['G3'] = pd.Series(p)
data = data.drop_duplicates('G3')