1. 程式人生 > 實用技巧 >玩轉熱門框架 用企業級思維 開發通用夠硬的大資料平臺

玩轉熱門框架 用企業級思維 開發通用夠硬的大資料平臺

玩轉熱門框架 用企業級思維 開發通用夠硬的大資料平臺

  • 2-2 什麼是大資料平臺
  • 2-3 為什麼要建設大資料平臺【三個解決、兩個提升】
  • 2-4 大資料平臺常用技術棧有哪些【知識體系梳理、我們要用哪些】
                                        
# -*- coding: utf-8 -*-
import scrapy
import sys
#返回musicspider目錄
sys.path.append('.\\.\\musicspider')
from musicspider.items import MusicspiderItem
 
class
MusiclistSpider(scrapy.Spider): name = 'musiclist' allowed_domains = ['music.163.com'] start_urls = ['https://music.163.com/discover/toplist'] def parse(self, response): items = [] #xpath方法抓取排行榜名稱和URLS toplistname = response.xpath('//div[@class="item f-cb"]/p/a/text()
').extract() urls = response.xpath('//div[@class="item f-cb"]/p/a/@href').extract() # 拼接排行榜的URLS toplisturls = [] for url in urls: toplisturls.append(''.join(['https://music.163.com', url])) for i in range(len(toplistname)): item = MusicspiderItem() item[
'toplistname'] = toplistname[i] item['toplisturls'] = toplisturls[i] items.append(item) # 遍歷排行榜URLS,獲取歌單資訊 for item in items: yield scrapy.Request(url = item['toplisturls'], meta = {'meta_1':item}, callback = self.second_parse) # 獲取歌單資訊 def second_parse(self, response): items = [] meta_1 = response.meta['meta_1'] musicname = response.xpath('//div/ul[@class="f-hide"]/li/a/text()').extract() urls = response.xpath('//div/ul[@class="f-hide"]/li/a/@href').extract() musicurls = [] #拼接歌曲URLS for url in urls: musicurls.append(''.join(['https://music.163.com', url])) for i in range(len(musicname)): item = MusicspiderItem() item['toplistname'] = meta_1['toplistname'] item['toplisturls'] = meta_1['toplisturls'] item['musicname'] = musicname[i] item['musicurls'] = musicurls[i] items.append(item) yield item

:ititit111222333 

:1940379308