mongodb去重pymongo實現
阿新 • • 發佈:2018-12-13
# -*- coding: UTF-8 -*- _author_ = 'zy' _date_ = '2018/12/13 0013 0:01' import pymongo def savedb(dbname,data): client = pymongo.MongoClient('127.0.0.1', 27017) # 缺少一步驟進行屬性的清洗操作,確定是否有這個值 db = client.weibo db[dbname].insert(data) def dealwith(dbname,dbnew): client = pymongo.MongoClient('127.0.0.1', 27017) # 缺少一步驟進行屬性的清洗操作,確定是否有這個值 db = client.weibo cursor=db[dbname].find() list=[] for i in cursor: id=i['weibo_id'] if id in list: pass else: list.append(id) data={ 'weibo_id':i['weibo_id'], 'created':i['created'], 'uid_name':i['uid_name'], 'uid':i['uid'], 'level':i['level'], 'area':i['area'], 'url':i['area'], 'text':i['comment'],#area 'area':i['area'], } savedb(dbnew,data) begin=db[dbname].count() after=len(list) print('去重前'+str(begin)+'去重後'+str(after)) #4132 if __name__=='__main__': dealwith('all','new_comment')