1. 程式人生 > >爬取12306站點資訊

爬取12306站點資訊

# -*- coding: utf-8 -*-

import ssl
import urllib2
from prettytable import PrettyTable

def get_station_name():
    url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9025"
    
    #目的為了通過未認證的12306證書
    ssl._create_default_https_context = ssl._create_unverified_context

    req = urllib2.Request(url)
    req.add_header("Referer","https://kyfw.12306.cn/otn/leftTicket/init")
    req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36")
    names = urllib2.urlopen(req).read()    
    
    with open("station_name0.txt", 'wb') as f:
        f.write(names) 
    
    itemsTmp = names.split("'")
    if len(itemsTmp) == 3:
        items = itemsTmp[1].split("@")
        
        table = PrettyTable(["序號","中文名","車站程式碼","中文拼音","拼音首字母"])
        table.header = True 
        table.padding_width = 2
           
        for item in items:
            if item == "":
                continue
                
            info = item.split("|")
            tr = [info[5],info[1],info[2],info[3],info[4]]
            table.add_row(tr)
                
        with open("station_name.txt", 'wb') as f:
            f.write(table.get_string()) 
            
if __name__ == "__main__":
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    get_station_name()