玩轉熱門框架 用企業級思維 開發通用夠硬的大資料平臺
阿新 • • 發佈:2020-11-25
玩轉熱門框架 用企業級思維 開發通用夠硬的大資料平臺
- 2-2 什麼是大資料平臺
- 2-3 為什麼要建設大資料平臺【三個解決、兩個提升】
- 2-4 大資料平臺常用技術棧有哪些【知識體系梳理、我們要用哪些】
# -*- coding: utf-8 -*- import scrapy import sys #返回musicspider目錄 sys.path.append('.\\.\\musicspider') from musicspider.items import MusicspiderItem classMusiclistSpider(scrapy.Spider): name = 'musiclist' allowed_domains = ['music.163.com'] start_urls = ['https://music.163.com/discover/toplist'] def parse(self, response): items = [] #xpath方法抓取排行榜名稱和URLS toplistname = response.xpath('//div[@class="item f-cb"]/p/a/text()').extract() urls = response.xpath('//div[@class="item f-cb"]/p/a/@href').extract() # 拼接排行榜的URLS toplisturls = [] for url in urls: toplisturls.append(''.join(['https://music.163.com', url])) for i in range(len(toplistname)): item = MusicspiderItem() item['toplistname'] = toplistname[i] item['toplisturls'] = toplisturls[i] items.append(item) # 遍歷排行榜URLS,獲取歌單資訊 for item in items: yield scrapy.Request(url = item['toplisturls'], meta = {'meta_1':item}, callback = self.second_parse) # 獲取歌單資訊 def second_parse(self, response): items = [] meta_1 = response.meta['meta_1'] musicname = response.xpath('//div/ul[@class="f-hide"]/li/a/text()').extract() urls = response.xpath('//div/ul[@class="f-hide"]/li/a/@href').extract() musicurls = [] #拼接歌曲URLS for url in urls: musicurls.append(''.join(['https://music.163.com', url])) for i in range(len(musicname)): item = MusicspiderItem() item['toplistname'] = meta_1['toplistname'] item['toplisturls'] = meta_1['toplisturls'] item['musicname'] = musicname[i] item['musicurls'] = musicurls[i] items.append(item) yield item
:ititit111222333
:1940379308