python常用模塊
阿新 • • 發佈:2017-05-13
sin info app ans tro apple mozilla pre pytho
urllib
1. urllib.urlopen() 打開網頁
from urllib import request import json response = request.urlopen("http://www.baidu.com") #獲取的網頁信息 html = response.read().decode("utf-8") print(html)
urlopen返回對象,支持操作:
- read() readline() readlines() fileno() close() 這些方法的使用方式與文件對象完全一致
- info() 返回一個httplib.HTTPMessage對象,表示遠程服務器返回的頭信息
- getcode() 返回Http狀態碼。如果是http請求,200請求成功完成;404網址未找到
- geturl() 返回請求的url
urlopen返回對象的的屬性:
- status 返回狀態嗎
- reason 返回狀態信息
操作示例:
#header頭信息 info = response.info() print(info) #返回碼 code = response.getcode() print(code) #訪問url信息 url = response.geturl() print(url) #遍歷所有header信息 for k,v in info.items(): print("%s -> %s"%(k,v)) #獲取header中特定內容信息,包括Date,Server等 if "Date" in info: print(info["date"])
另一種操作方式:
from urllib import request with request.urlopen(‘https://api.douban.com/v2/book/2129650‘) as f: data = f.read() print(‘Status:‘, f.status, f.reason) for k, v in f.getheaders(): print(‘%s: %s‘ % (k, v)) print(‘Data:‘, data.decode(‘utf-8‘))
2. urllib.urlretrieve()將網頁保存到本地
3. urllib.urlencode(query)
將URL中的鍵值對以連接符&劃分,可以與urlopen結合以實現post方法和get方法
Get:
>>> import urllib >>> params=urllib.urlencode({‘spam‘:1,‘eggs‘:2,‘bacon‘:0}) >>> params ‘eggs=2&bacon=0&spam=1‘ >>> f=urllib.urlopen("http://python.org/query?%s" % params) >>> print f.read()
Post:
>>> import urllib >>> parmas = urllib.urlencode({‘spam‘:1,‘eggs‘:2,‘bacon‘:0}) >>> f=urllib.urlopen("http://python.org/query",parmas) >>> f.read()
案例:
1. 通過urllib實現圖片的簡單下載
url = "http://n.sinaimg.cn/tech/transform/20170512/P0He-fyfeutp7573474.jpg" from urllib import request response = request.urlopen(url) html = response.read() with open("a.img", "wb") as f: f.write(html)
2. 通過POST實現調用百度的在線翻譯:
from urllib import request, parse import json req = request.Request("http://fanyi.baidu.com/v2transapi") req.add_header("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393") req.add_header("Content-Type","application/x-www-form-urlencoded; charset=UTF-8") req.add_header("Referer", "http://fanyi.baidu.com") post_data = {} #英文到中文的翻譯 # post_data["from"] = "en" # post_data["query"] = "hello world" # post_data["simple_means_flag"] = 3 # post_data["to"] = "zh" #中文到英文的翻譯 post_data["from"] = "zh" post_data["query"] = "你好" post_data["simple_means_flag"] = 3 post_data["to"] = "en" post_data = parse.urlencode(post_data) response = request.urlopen(req, data=post_data.encode("utf-8")) html = response.read().decode("utf-8") target = json.loads(html) print(target["trans_result"]["data"][0]["dst"]) #print(html)
3. 通過GET獲取新浪新聞信息:
from urllib import request,parse import json #http://feed.mix.sina.com.cn/api/roll/get?pageid=1&lid=21 values = { "pageid":1, "lid":21 } param = parse.urlencode(values) req = request.Request("http://feed.mix.sina.com.cn/api/roll/get?%s" % param) req.add_header("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393") req.add_header("Referer", "http://tech.sina.com.cn/") result = request.urlopen(req) html = result.read().decode("utf-8") target = json.loads(html) #print(target) news_time = target["result"]["timestamp"] all_data = target["result"]["data"] print(news_time) for i in all_data: print("標題:", i["title"]) print("\t匯總:", i["summary"]) print("\t信息:", i["intro"]) print("\turl:", i["url"]) print("\timg:", (i["img"]["u"]))
4.
python常用模塊