Python3--批量爬取資料之調金山詞霸api進行翻譯
阿新 • • 發佈:2019-02-15
上程式碼:
#/usr/bin/env python3 #coding=utf8 from fake_useragent import UserAgent import http.client import hashlib import urllib import random,csv import json,time import requests #獲取IP列表並檢驗IP的有效性 def get_ip_list(): f=open('IP.txt','r') ip_list=f.readlines() f.close() return ip_list #從IP列表中獲取隨機IP def get_random_ip(ip_list): proxy_ip = random.choice(ip_list) proxy_ip=proxy_ip.strip('\n') proxies = {'https': proxy_ip} return proxies #註釋:a若查詢不到中人名則返回a(row[6]) def getTransResult(q): type = "json" q = q.lower() myurl = 'http://dict-co.iciba.com/api/trans/vip/translate' #q = 'hello baidu trans' #要翻譯的內容 myurl = "http://dict-co.iciba.com/api/dictionary.php?w="+q+"&type="+type+"&key=key" #開發者Key ip_list = get_ip_list() proxies = get_random_ip(ip_list) headers = { 'User-Agent':str(UserAgent().random)} try: time.sleep(1) req=requests.get(myurl,headers=headers,proxies=proxies) except: print('程式出錯,暫停20秒') time.sleep(20) proxies = get_random_ip(ip_list) headers = { 'User-Agent':str(UserAgent().random)} req=requests.get(myurl,headers=headers,proxies=proxies) req.encoding="utf-8" data = req.text mresult = json.loads(data) getTransResult='' try: getTransResult = mresult['symbols'][0]['parts'][0]['means'][0] except: return getTransResult print('翻譯結果為:'+getTransResult) return getTransResult #功能:讀取檔案並處理 def read_file(filepath): reader=[] with open(filepath,'r') as csvfile: spanreader = csv.reader(csvfile,delimiter='|',quoting=csv.QUOTE_MINIMAL) for row in spanreader: if row: reader.append(row) return reader #功能:將爬取到的內容寫入檔案 #注意事項:寫檔案時open中要加上newline='',否則寫一行後程序會自動換行 def write_file(filepath,row): with open(filepath,'a+',encoding='utf-8',newline='') as csvfile: spanreader = csv.writer(csvfile,delimiter='|',quoting=csv.QUOTE_MINIMAL) spanreader.writerow(row) if __name__ == "__main__": reader = read_file('S_baiduBaike_youdaoChinese_utf-8.csv') for row in reader: if not row[6]: print('現在爬取的人名是:'+row[0]) TransResult = getTransResult(row[0]) if not TransResult.find('[人名]')==-1: TransResult=TransResult.replace('[人名]','') row[6] = TransResult elif not TransResult.find('[男子名]')==-1: TransResult=TransResult.replace('[男子名]','') row[6] = TransResult elif not TransResult.find('[女子名]')==-1: TransResult=TransResult.replace('[女子名]','') row[6] = TransResult write_file('經有道金山詞霸爬取後/S_baiduBaike_youdaoChinese_jscb.csv',row) print('程式執行結束')