python 爬蟲抓豆瓣電影,並存入資料庫
阿新 • • 發佈:2019-01-22
import urllib.request import json import codecs class info(object): #@classmethod def moviedown(url): #網址 url = "https://movie.douban.com/j/new_search_subjects?sort=T&range=0,10&tags=%E7%94%B5%E5%BD%B1&start=9960" #請求 request = urllib.request.Request(url) #爬取結果 response = urllib.request.urlopen(request) data = response.read() #設定解碼方式 data = data.decode('utf-8') data1 = [] data1=json.loads(data)#這一步pyrhon 轉成字典 #data1['data']------>這是list #data1['data'][0]-------->這是dict print(data1['data'][0]['rate']); str = "\r\n" for item in data1['data']: #print json.dumps(item) str = str + "insert into tencent(title,rate) values " str = str + "('%s','%s');\r\n" % (item['title'],item['rate']) file_object = codecs.open('tencent.sql', 'a' ,"utf-8") file_object.write(str) file_object.close() print ("success") ; test = info() test.moviedown()##例項呼叫方法
這個只是普通實現,做個例子,會繼續優化下去
上面只實現了20個電影的存入,下面做了一個近萬電影的存入,功能可以做綜藝,電視劇等等
import urllib.request import json import codecs class info(object): #@classmethod def moviedown(url): #網址 url = "https://movie.douban.com/j/new_search_subjects?sort=T&range=0,10&tags=%E7%94%B5%E5%BD%B1&start=" #地址更改的情況下 還能存入小說 電視劇 綜藝等等 #設定解碼方式 count=0 data1 = [] final=[] while (int(count) < 9961): #這裡獲取了9980個電影,裡面有電影名稱,卡司,評分,圖片,海報,icon等內容 我這邊存入資料的只有評分和名稱 a='%d'%count print(url+a); request = urllib.request.Request(url+a) #爬取結果 response = urllib.request.urlopen(request) data = response.read() data = data.decode('utf-8') data1=json.loads(data)#這一步pyrhon 轉成字典 final=final+data1['data'] count=count+20 #data1['data']------>這是list #data1['data'][0]-------->這是dict print(final); str = "\r\n" for item in final: #print json.dumps(item) str = str + "insert into mx_movie(title,rate) values " str = str + "('%s','%s');\r\n" % (item['title'],item['rate']) file_object = codecs.open('mx_movie.sql', 'a' ,"utf-8") file_object.write(str) file_object.close() print ("success") ; test = info() test.moviedown()##例項呼叫方法