1. 程式人生 > 實用技巧 >Python爬取12306車次資訊

Python爬取12306車次資訊

1、資料來源:12306官網

2、程式碼

import requests
import re
def send_request():
    headers = headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
        'Cookie': '_uab_collina=159678916887533768255393; JSESSIONID=21EE7FF8A85068502D212C7C1B267636; BIGipServerotn=1139802634.24610.0000; RAIL_EXPIRATION=1597092329089; RAIL_DEVICEID=I1dDo6IWRkLe8N0UCAm7-kBhC02clA2kM6fmg9n2-gvelMW5c_oTgb5bHwUwoH9hH1AXmL9CeBhmIms8mv9a5_a6BsViywDB8ICSt1oIL8zg8MWqmvtPJf4xxDhBr_9x8bglHbZ5Xx1nfqGTI10knzerugMo5icI; BIGipServerpool_passport=233636362.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; _jc_save_fromDate=2020-08-07; _jc_save_toDate=2020-08-07; _jc_save_wfdc_flag=dc; _jc_save_fromStation=%u5317%u4EAC%2CBJP; _jc_save_toStation=%u5929%u6D25%2CTJP
'}#建立頭部資訊 url='https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2020-08-07&leftTicketDTO.from_station=BJP&leftTicketDTO.to_station=TJP&purpose_codes=ADULT' #設定編碼格式。防止亂碼 resp=requests.get(url,headers=headers) resp.encoding='utf-8' print(resp.text) return resp
#解析資料 #{}是字典。根據key獲取值。 def parse_json(resp,city): json_ticket=resp.json()#將相應的資料轉換為json data_list=json_ticket['data']['result']#得到車次的列表 lst=[]#列表 for item in data_list: #遍歷車次資訊進行分割 d=item.split('|') lst.append([d[3],city[d[6]],city[d[7]],d[31],d[30],d[13]])
return lst ''' d[3]車次 d[6]查詢起始站 d[7]查詢到達站 d[31]一等座 d[30]表示二等座 d[13]表示出行時間''' #獲得station_name的資訊 def get_city(): url='https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9151' headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'} resp=requests.get(url,headers=headers) resp.encoding='utf-8' #進行資料的提取(只要一部分) stations=re.findall('([\u4e00-\u9fa5]+)\|([A-Z]+)',resp.text) #將列表進行轉換為字典 stations_data=dict(stations) #key與value進行互換 station_d={}#空字典。用於完成上述操作 for item in stations_data: station_d[stations_data[item]]=item #print(station_d) return station_d def start(): lst=parse_json(send_request(),get_city()) #進行資料的篩選(得到有效的資料) for i in lst: if i[3]!='' and i[3]!='': print(i) if __name__=='__main__': #print(get_city()) start() #開始
12306.py

3、結果