python隨筆(一)
阿新 • • 發佈:2019-04-04
xlsx border album pytho 環境 win 自己 column cts
python爬蟲獲取QQ音樂和豆瓣的最新電影音樂名字
先上代碼開源大家一起學習,代碼如下:
#!python2 #coding:utf-8 __author__ = ‘OldHarry‘ import urllib2 import os import re import json import xlsxwriter import sys defaultencoding = ‘utf-8‘ if sys.getdefaultencoding() != defaultencoding: reload(sys) sys.setdefaultencoding(defaultencoding)def getHtml(url): send_headers = { ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0‘, ‘Accept‘:‘text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8‘, ‘Connection‘:‘keep-alive‘ } urls = urllib2.Request(url,headers=send_headers) html = urllib2.urlopen(urls)if html.getcode() == 200: pass #print ("已捕獲"),url,"目標站數據..." else: print ("訪問出現錯誤...錯誤代碼:"),html.getcode() return html.read() def kugoumusic(url): xx=getHtml(url) rr=re.compile(r‘<span class="songName">(.*?) - (.*?)</span>‘) x=rr.findall(xx) nk=[] for xxx in x: if xxx not in nk: nk.append(xxx[1].decode(‘utf8‘)) print json.dumps(nk, encoding="UTF-8", ensure_ascii=False) return nk def qqmusic(url): xx=getHtml(url) rr=re.compile(r‘{"action":{"alert":[0-9]+,"icons":[0-9]+,"msgdown":[0-9]+,"msgfav":[0-9]+,"msgid":[0-9]+,"msgpay":[0-9]+,"msgshare":[0-9]+,"switch":[0-9]+},"album":{"id":[0-9]+,"mid":"[a-zA-Z0-9]+","name":"(.*?)"‘) x=rr.findall(xx) nq=[] for xxx in x: xxx.strip() if xxx not in nq: nq.append(xxx) print json.dumps(nq, encoding="UTF-8", ensure_ascii=False) return nq def dbmovie(url): ssd = getHtml(url) tt=re.compile(r‘alt="(.*?)" rel="[a-z]+" class="" />‘) shu=tt.findall(ssd) print json.dumps(shu, encoding="UTF-8", ensure_ascii=False) return shu def rmmovie(url): ssd = getHtml(url) tt=re.compile(r‘"title":"(.*?)"‘) shu=tt.findall(ssd) print json.dumps(shu, encoding="UTF-8", ensure_ascii=False) return shu def rmdsj(): ssd = rmmovie(‘https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0‘)+rmmovie(‘https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=20‘)+rmmovie(‘https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=40‘) return ssd def runtest(): IP_PATH = os.path.abspath(‘.‘) + ‘\TXT.xls‘ print "酷狗音樂--新歌榜" a=kugoumusic("http://www.kugou.com/") print "騰訊音樂--內地新歌榜" b=qqmusic("https://u.y.qq.com/cgi-bin/musicu.fcg?-=recom2388477980207393&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22comm%22%3A%7B%22ct%22%3A24%7D%2C%22new_song%22%3A%7B%22module%22%3A%22QQMusic.MusichallServer%22%2C%22method%22%3A%22GetNewSong%22%2C%22param%22%3A%7B%22type%22%3A1%7D%7D%7D") print "騰訊音樂--港臺新歌榜" c=qqmusic("https://u.y.qq.com/cgi-bin/musicu.fcg?-=recom6698628102261504&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22comm%22%3A%7B%22ct%22%3A24%7D%2C%22new_song%22%3A%7B%22module%22%3A%22QQMusic.MusichallServer%22%2C%22method%22%3A%22GetNewSong%22%2C%22param%22%3A%7B%22type%22%3A2%7D%7D%7D") print "騰訊音樂--歐美新歌榜" d=qqmusic("https://u.y.qq.com/cgi-bin/musicu.fcg?-=recom08419989487702839&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22comm%22%3A%7B%22ct%22%3A24%7D%2C%22new_song%22%3A%7B%22module%22%3A%22QQMusic.MusichallServer%22%2C%22method%22%3A%22GetNewSong%22%2C%22param%22%3A%7B%22type%22%3A3%7D%7D%7D") print "騰訊音樂--日本新歌榜" e=qqmusic("https://u.y.qq.com/cgi-bin/musicu.fcg?-=recom24411354608866187&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22comm%22%3A%7B%22ct%22%3A24%7D%2C%22new_song%22%3A%7B%22module%22%3A%22QQMusic.MusichallServer%22%2C%22method%22%3A%22GetNewSong%22%2C%22param%22%3A%7B%22type%22%3A4%7D%7D%7D") print "騰訊音樂--韓國新歌榜" f=qqmusic("https://u.y.qq.com/cgi-bin/musicu.fcg?-=recom909302436024819&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22comm%22%3A%7B%22ct%22%3A24%7D%2C%22new_song%22%3A%7B%22module%22%3A%22QQMusic.MusichallServer%22%2C%22method%22%3A%22GetNewSong%22%2C%22param%22%3A%7B%22type%22%3A5%7D%7D%7D") print "豆瓣電影--正在熱映" g=dbmovie("https://movie.douban.com/") print "豆瓣電影--熱門電影" h=rmmovie("https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0") print "豆瓣電影--最新電影" i=rmmovie("https://movie.douban.com/j/search_subjects?type=movie&tag=%E6%9C%80%E6%96%B0&page_limit=20&page_start=0") print "豆瓣電影--經典電影" j=rmmovie("https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%BB%8F%E5%85%B8&sort=time&page_limit=20&page_start=0") print "豆瓣電影--可播放電影" k=rmmovie("https://movie.douban.com/j/search_subjects?type=movie&tag=%E5%8F%AF%E6%92%AD%E6%94%BE&sort=time&page_limit=20&page_start=0") print "豆瓣電影--高分電影" l=rmmovie("https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=time&page_limit=20&page_start=0") print "豆瓣電影--熱門電視劇" m=rmdsj() workbook = xlsxwriter.Workbook(IP_PATH) worksheet = workbook.add_worksheet() bold = workbook.add_format({‘bold‘: 1, ‘align‘: ‘center‘, ‘border‘: 1}) bold2 = workbook.add_format({‘align‘: ‘center‘, ‘border‘: 1}) headings = [‘酷狗音樂--新歌榜‘, ‘騰訊音樂--內地新歌榜‘,‘騰訊音樂--港臺新歌榜‘,‘騰訊音樂--歐美新歌榜‘,‘騰訊音樂--日本新歌榜‘,‘騰訊音樂--韓國新歌榜‘,‘豆瓣電影--正在熱映‘,‘豆瓣電影--熱門電影‘,‘豆瓣電影--最新電影‘,‘豆瓣電影--經典電影‘,‘豆瓣電影--可播放電影‘,‘豆瓣電影--高分電影‘,‘豆瓣電影--熱門電視劇‘] worksheet.write_row(‘A1‘, headings, bold) SS=30 worksheet.set_column(‘A:A‘, SS) worksheet.set_column(‘B:B‘, SS) worksheet.set_column(‘C:C‘, SS) worksheet.set_column(‘D:D‘, SS) worksheet.set_column(‘E:E‘, SS) worksheet.set_column(‘F:F‘, SS) worksheet.set_column(‘G:G‘, SS) worksheet.set_column(‘H:H‘, SS) worksheet.set_column(‘I:I‘, SS) worksheet.set_column(‘J:J‘, SS) worksheet.set_column(‘K:K‘, SS) worksheet.set_column(‘L:L‘, SS) worksheet.set_column(‘M:M‘, SS) worksheet.write_column(‘A2‘, a, bold2) worksheet.write_column(‘B2‘, b, bold2) worksheet.write_column(‘C2‘, c, bold2) worksheet.write_column(‘D2‘, d, bold2) worksheet.write_column(‘E2‘, e, bold2) worksheet.write_column(‘F2‘, f, bold2) worksheet.write_column(‘G2‘, g, bold2) worksheet.write_column(‘H2‘, h, bold2) worksheet.write_column(‘I2‘, i, bold2) worksheet.write_column(‘J2‘, j, bold2) worksheet.write_column(‘K2‘, k, bold2) worksheet.write_column(‘L2‘, l, bold2) worksheet.write_column(‘M2‘, m, bold2) workbook.close() if __name__ == ‘__main__‘: runtest()
主要思路是:第一步解析網站,第二步選擇自己想要的數據,第三步在當前文件夾生成一個文件夾寫入excl
第一次寫博客,各路大神不喜勿噴,python萌新一枚。
開發環境:Pycharm python2.7
2019-04-0411:33:23
Study hard and make progress every day!
python隨筆(一)