xpath案例-全國城市名爬取
阿新 • • 發佈:2020-07-28
#!/usr/bin/python import requests from lxml import etree #專案需求:解析出所有的城市名稱https://www.aqistudy.cn/historydata/ if __name__ == "__main__": # headers = { # 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36' # } # url = 'https://www.aqistudy.cn/historydata/' # page_text = requests.get(url=url,headers=headers).text # tree = etree.HTML(page_text) # host_li_list = tree.xpath('//div[@class="bottom"]/ul/li') # all_city_names = [] # #解析到了熱門城市的名稱 # for li in host_li_list: # hot_city_name = li.xpath('./a/text()')[0] # all_city_names.append(hot_city_name) # #解析的是全部城市的名稱 # city_names_list = tree.xpath('//div[@class="bottom"]/ul/div[2]/li') # for li in city_names_list: # city_name = li.xpath('./a/text()')[0] # all_city_names.append(city_name) # print(all_city_names,len(all_city_names)) headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36' } url = 'https://www.aqistudy.cn/historydata/' page_text = requests.get(url=url,headers=headers).text tree = etree.HTML(page_text) #解析到熱門城市和所有城市對應的a標籤 # //div[@class="bottom"]/ul/li/ 熱門城市a標籤的層級關係 # //div[@class="bottom"]/ul/div[2]/li/a 所有城市a標籤的層級關係 a_list= tree.xpath('//div[@class="bottom"]/ul/li/a | //div[@class="bottom"]/ul/div[2]/li/a') all_city_names = [] for a in a_list: city_name = a.xpath('./text()')[0] all_city_names.append(city_name) print(all_city_names,len(all_city_names))