Python3 urllib使用

阿新 • • 發佈：2018-11-20

Python3 urllib使用

基本使用 get,post,timeout超時,異常

# 姓名: 劉帥
# 日期: 2018.11.18
# 功能: urllib.request.urlopen使用方法
from urllib.request import urlopen  # 請求網頁
from urllib.parse import urlencode  # 字典轉字串轉碼
from urllib.error import URLError  # URLError錯誤


def urlA():  # GET請求
    res = urlopen('http://www.baidu.com')  # 開啟指定url
    print(res.read().decode('utf8'))  # 輸出響應內容
    print(type(res))  # 輸出函式返回資料型別
    print(res.status)  # 輸出響應狀態碼
    print(res.getheaders())  # 輸出響應頭
    print(res.getheader('Server'))  # 輸出響應頭Server內容 伺服器資訊
    return


def urlB():  # POST請求
    dataA = {'word': 'hello'}  # 定義post內容 引數word 值hello
    dataB = urlencode(dataA)  # 轉化為字串
    new_data = bytes(dataB, encoding='utf-8')  # 轉碼為位元組流
    res = urlopen('http://httpbin.org/post', data=new_data)  # post請求 new_data為post資料
    print(res.read().decode('utf-8'))  # 輸出響應內容
    return


def urlC():  # 設定timeout超時時間
    try:  # 異常處理
        res = urlopen('http://httpbin.org/', timeout=0.1)  # 開啟指定url, 設定超時時間0.1秒 超時跑出異常
    except URLError as err:  # 異常處理方法
        print(err.reason)
    else:  # 未丟擲異常則執行
        print(res.read().decode('utf8'))  # 輸出響應內容
        print(type(res))  # 輸出函式返回資料型別
        print(res.status)  # 輸出響應狀態碼
        print(res.getheaders())  # 輸出響應頭
        print(res.getheader('Server'))  # 輸出響應頭Server內容 伺服器資訊
    return

Requset使用 get,post,驗證,代理,Cookie儲存讀取,異常

# 姓名: 劉帥
# 日期: 2018.11.18
# 功能: urllib.request.Requset使用方法
from urllib.parse import urlencode  # 字典轉字串轉碼
from urllib.request import Request  # Request類
from urllib.request import urlopen  # 請求網頁
from urllib.request import URLError  # URLError錯誤
from urllib.request import HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, build_opener  # 驗證
from urllib.request import ProxyHandler  # HTTP代理
from http.cookiejar import CookieJar
from http.cookiejar import MozillaCookieJar
from http.cookiejar import LWPCookieJar
from urllib.request import HTTPCookieProcessor
from urllib.request import HTTPError


def urlA():  # get請求
    req = Request('http://httpbin.org/get')  # 建立填充Requset類
    res = urlopen(req)  # 通過Requset類請求網頁
    print(res.read().decode('utf-8'))  # 輸出響應內容
    return


def urlB():  # post請求
    headersA = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36'
                              ' (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
                'Host': 'httpbin.org'}
    dataA = {'word': 'hello'}  # 定義字典 引數word 值hello
    dataB = urlencode(dataA)  # 轉化為字串
    dataC = bytes(dataB, encoding='utf-8')  # 轉碼為位元組流
    req = Request(url='http://httpbin.org/post', headers=headersA, data=dataC,
                  method='POST')  # 建立填充Requset類 data為post內容 headers為請求頭 method請求方式
    # req.add_header('Host', 'httpbin.org')  # 通過add_headers新增請求頭
    res = urlopen(req)  # 通過Requset類請求網頁
    print(res.read().decode('utf-8'))  # 輸出響應內容
    return


def urlC():  # 驗證
    username = 'LiuShuai'  # 使用者名稱
    password = 'Ls123456'  # 使用者密碼
    url = 'http://www.httpbin.org/basic-auth/LiuShuai/Ls123456'  # url網址
    p = HTTPPasswordMgrWithDefaultRealm()  # 建立HTTPPasswordMgrWithDefaultRealm物件
    p.add_password(None, url, username, password)  # 使用add_password新增賬號密碼
    auth_handler = HTTPBasicAuthHandler(p)  # 建立HTTPBasicAuthHandler物件 引數HTTPPasswordMgrWithDefaultRealm
    opener = build_opener(auth_handler)  # 構建Opener
    try:  # 異常處理
        res = opener.open(url)  # 開啟連線完成驗證返回登入後的頁面
    except URLError as err:  # 異常處理方法
        print(err.reason)
    else:  # 未丟擲異常執行
        print(res.read().decode('utf-8'))  # 輸出返回內容
    return


def urlD():  # 代理
    url = 'http://httpbin.org/get'  # url網址
    proxy = {'http': 'http://111.202.37.195:38431',
             'https': 'https://111.202.37.195:38431'}  # 代理字典
    proxy_handler = ProxyHandler(proxy)  # 通過ProxyHandler使用字典生成handler
    opener = build_opener(proxy_handler)  # 構造opener
    try:  # 異常處理
        res = opener.open(url)  # 開啟網址
    except URLError as err:  # 異常處理方法
        print(err.reason)
    else:  # 未異常則執行
        print(res.read().decode("utf-8"))  # 輸出返回內容
    return


def urlE():  # Cookie 普通使用, 無法儲存或讀取檔案
    cookie = CookieJar()  # 建立CookieJar類 無法儲存或讀取檔案
    handler = HTTPCookieProcessor(cookie)  # 構建handler
    opener = build_opener(handler)  # 構建opener
    try:  # 異常處理
        res = opener.open('http://www.baidu.com')  # 開啟連線
    except URLError as err:  # 異常處理方法
        print(err.reason)
    else:  # 未異常則執行
        for cke in cookie:  # 迴圈輸出cookie 的key  value
            print(cke.name, cke.value)  # 迴圈輸出cookie 的key  value
    return


def urlF():  # Cookie 高階使用, 可以儲存或讀取檔案
    cookie = MozillaCookieJar('1.txt')  # 建立MozillaCookieJar類 可以儲存或讀取檔案 傳入檔名
    cookie.load('1.txt', ignore_expires=True, ignore_discard=True)  # 讀取cookie檔案
    handler = HTTPCookieProcessor(cookie)  # 構建handler
    opener = build_opener(handler)  # 構建opener
    try:  # 異常處理
        res = opener.open('http://www.baidu.com')  # 開啟連線
    except URLError as err:  # 異常處理方法
        print(err.reason)
    else:  # 未異常則執行
        cookie.save(ignore_discard=True, ignore_expires=True)  # 儲存cookie檔案
        for cke in cookie:  # 迴圈輸出cookie 的key  value
            print(cke.name, cke.value)  # 迴圈輸出cookie 的key  value
    return


def urlG():  # Cookie 高階使用, 可以儲存或讀取檔案
    cookie = LWPCookieJar('1.txt')  # 建立LWPCookieJar類 可以儲存或讀取檔案 傳入檔名
    cookie.load('1.txt', ignore_expires=True, ignore_discard=True)  # 讀取cookie檔案
    handler = HTTPCookieProcessor(cookie)  # 構建handler
    opener = build_opener(handler)  # 構建opener
    try:  # 異常處理
        res = opener.open('http://www.baidu.com')  # 開啟連線
    except URLError as err:  # 異常處理方法
        print(err.reason)
    else:  # 未異常則執行
        cookie.save(ignore_discard=True, ignore_expires=True)  # 儲存cookie檔案
        for cke in cookie:  # 迴圈輸出cookie 的key  value
            print(cke.name, cke.value)  # 迴圈輸出cookie 的key  value
    return


def urlH():  # 異常捕獲處理
    try:
        res = urlopen('http://cuiqingcai.com/index.htm')  # 開啟一個不存在的頁面 丟擲Not Found 404
    except HTTPError as err:  # 異常處理方法 先捕獲HTTPError
        print(err.reason, err.code, err.headers)
    except URLError as err:  # 異常處理方法 再捕獲HTTPError
        print(err.reason)
    else:  # 未異常則執行
        print(res.read().decode('utf-8'))  # 輸出請求到的內容
    return

parse使用 url分割合併,構建get引數,get引數轉換為列表或元組,字串轉url編碼

# 姓名: 劉帥
# 日期: 2018.11.18
# 功能: urllib.parse使用方法
from urllib.parse import urlparse  # 分解url為6個數據的元組
from urllib.parse import urlunparse  # 使用6個數據的元組構造url
from urllib.parse import urlsplit  # 分解url為5個數據的元組 忽略params
from urllib.parse import urlunsplit  # 使用5個數據的元組構造url 忽略params
from urllib.parse import urljoin  # url分解合併
from urllib.parse import urlencode  # 構造GET引數
from urllib.parse import parse_qs  # GET引數轉回字典
from urllib.parse import parse_qsl  # GET引數轉為元組組成的列表
from urllib.parse import quote  # 將內容轉化為url編碼格式
from urllib.parse import unquote  # 將url編碼內容轉化為普通字元


def urlA():  # urlparse使用方法 返回6個數據的元組
    res = urlparse('http://www.baidu.com/index.html;user?id=5#comment')  # 傳入url
    print(type(res))  # 輸出變數型別<class 'urllib.parse.ParseResult'>
    print(res)  # 輸出url分解分段內容
    return


def urlB():  # urlparse的scheme引數使用方法 返回6個數據的元組
    res = urlparse('www.baidu.com/index.html;user?id=5#comment', scheme='http')  # 傳入url scheme傳入協議型別
    print(type(res))  # 輸出變數型別<class 'urllib.parse.ParseResult'>
    print(res)  # 輸出url分解分段內容
    return


def urlC():  # urlparse的fragments引數使用方法 返回6個數據的元組
    res = urlparse('www.baidu.com/index.html;user?id=5#comment', allow_fragments=False)
    # 傳入url allow_fragments傳入是否忽略fragments
    print(type(res))  # 輸出變數型別<class 'urllib.parse.ParseResult'>
    print(res)  # 輸出url分解分段內容 fragments為空
    return


def urlD():  # urlparse不包含 params query,並且忽略fragments 返回6個數據的元組
    res = urlparse('www.baidu.com/index.html;user?id=5#comment', allow_fragments=False)
    # 傳入url allow_fragments傳入是否忽略fragments
    print(type(res))  # 輸出變數型別<class 'urllib.parse.ParseResult'>
    print(res)  # 輸出url分解分段內容 fragments params query為空
    return


def urlE():  # urlparse,返回變數的使用方法 獲取 返回6個數據的元組
    res = urlparse('http://www.baidu.com/index.html;user?id=5#comment')  # 傳入url
    print(type(res))  # 輸出變數型別<class 'urllib.parse.ParseResult'>
    print(res.scheme, res[0], res.netloc, res[1])  # 輸出返回內容
    return


def urlF():  # url的構造 許6個數據的可迭代資料
    data = ['http', 'www.baidu', 'index.html', 'user', 'a=6', 'comment']  # 初始化列表
    res = urlunparse(data)  # 根據元組構造url
    print(res)  # 輸出構造結果
    return


def urlG():  # urlsplitd的使用方法 忽略params合併進path內
    res = urlsplit('http://www.baidu.com/index.html;user?id=5#comment')  # 分解返回url 返回5個數據的元組
    print(type(res))  # 輸出變數型別<class 'urllib.parse.SplitResult'>
    print(res)  # 輸出返回內容
    print(res.scheme, res[0], res.netloc, res[1])  # 輸出返回內容 使用key或下標 獲取
    return


def urlH():
    data = ['http', 'www.baidu.com', '/index.html;user', 'id=5', 'comment']
    res = urlunsplit(data)
    print(res)
    return


def urlI():  # urljoin使用
    print(urljoin('http://www.baidu.com', 'FAQ.html'))
    print(urljoin('http://www.baidu.com', 'https://cuiqingcai.com/FAQ.html'))
    print(urljoin('http://www.baidu,com/about.html', 'https://cuiqingcai/FAQ.html'))
    print(urljoin('http://www.baidu,com/about.html', 'http://cuiqingcai/FAQ.html?question=2'))
    print(urljoin('http://baidu.com?wd=abc', 'https://cuiqingcai/index.php'))
    print(urljoin('http://www.baidu.com', '?category=2#comment'))
    print(urljoin('www.baidu.com', 'category=2#comment'))
    print(urljoin('www.baidu.com#comment', '?category=2'))
    return


def urlJ():  # 使用urlencode,構建GET引數
    params = {'name': 'LiuShuai', 'age': '55'}  # get引數 引數key 值valude
    base_url = 'http://www.baidu.com?'  # 網址基址
    url = base_url + urlencode(params)  # 連線url與引數
    print(url)  # 輸出完整url
    return


def urlK():  # GET引數的使用轉換列表 元組
    query = 'name=LiuShuai&age=55'  # 建立一個get引數
    print(parse_qs(query))  # GET引數轉回字典
    print(parse_qsl(query))  # GET引數轉為元組組成的列表
    return


def urlL():  # 使用quote將字串轉化為url編碼格式
    word = '桌布'  # 定義字串
    url = 'http://www.baidu.com/s?wd=' + quote(word)  # 將字串轉化為url編碼格式
    print(url)  # 輸出轉化url編碼格式合併後的url
    url = unquote(url)  # 將url轉化為普通字串
    print(url)  # 輸出轉化為普通字串的url
    return

爬蟲小探-Python3 urllib.request獲取頁面數據

text height urlopen -s mozilla 使用 pri 爬蟲 size 使用Python3 urllib.request中的Requests()和urlopen()方法獲取頁面源碼，並用re正則進行正則匹配查找需要的數據。 #forex.py#co

python3 urllib.requesturlopen 一個https 時ssl證書錯誤!

使用就會 cert http ssl req pen urllib erro 不知道從那個版本起，python用urlopen打開一個https時會驗證一次 SSL 證書，當目標使用的是自簽名的證書時就會爆出一個 <urlopen error [SSL: CERT

python3 urllib的用法

caf utf 語言 try all cti webkit com ret 1.基本方法 urllib.request.urlopen(url, data=None, [timeout, ]*, cafile=None, capath=None, cadefault=Fal

Python3 Urllib庫的基本使用

一、什麼是Urllib 　　Urllib庫是Python自帶的一個http請求庫，包含以下幾個模組： urllib.request　　　　請求模組 urllib.error　　　　異常處理模組 urllib.parse　　　　 url解析

python3 urllib爬蟲，你只需要看這一篇就夠了

寫在最前面：以下資料均脫敏 from urllib import request import requests import urllib if __name__ == "__main__": # 介面的url session_requests = requests.se

Python3 urllib使用

Python3 urllib使用基本使用 get,post,timeout超時,異常 # 姓名: 劉帥 # 日期: 2018.11.18 # 功能: urllib.request.urlopen使用方法 from urllib.request import urlopen # 請

python3 urllib 詳解

本文主要講解 Python 3 中的 urllib 庫的用法。urllib 是 Python 標準庫中用於網路請求的庫。該庫有四個模組，分別是urllib.request，urllib.error，urllib.parse，urllib.robotparser。其中urllib.request，ur

Python3 urllib.parse 常用函數示例

獲取url uil quest search %x sea xxxxx 為什麽 pat Python3 urllib.parse 常用函數示例 http://blog.51cto.com/walkerqt/1766670 1、獲取url參數。 >>&

Python3 urllib.parse 常用函式示例

Python3 urllib.parse 常用函式示例 http://blog.51cto.com/walkerqt/1766670 1、獲取url引數。 >>> from urllib import parse >&g

python3 urllib和requests模組

urllib模組是python自帶的，直接呼叫就好，用法如下： 1 #處理get請求，不傳data，則為get請求 2 import urllib 3 from urllib.request import urlopen 4 from urllib.parse

python3 urllib.request.Request的用法

import urllib.request import urllib.parse url = 'http://127.0.0.1:8000/api/login/' headers = {'User-

python3 urllib基本使用

在python中，urllib是請求url連線的標準庫，在python2中，分別有urllib和urllib，在python3中，整合成了一個，稱謂urllib 　urllib.request 　　request主要負責構建和發起網路請求　　1）GET請求（不帶引數）　　　　response =&

python3 urllib爬蟲抓取記錄

# 目的：GET請求抓取csdn部落格頁面所有文章標題，並儲存在csdn目錄下 import re import os from urllib import request #抓取整個頁面下來 data=request.urlopen('http://blog.csdn.net/a51

Python3 Urllib庫

一.什麼是Urllib庫 urllib是python標準庫，就是你安裝了python，這兩個庫就已經可以直接使用了；它包括以下模組：urllib.request 請求模組 urllib.error 異常處理模組 urllib.parse url解析模組 urllib.robotparser r

Python3 urllib庫和requests庫

1. Python3 使用urllib庫請求網路 1.1 基於urllib庫的GET請求請求百度首頁www.baidu.com ，不新增請求頭資訊： 1 import urllib.requests 2 3 4 def get_page(): 5　　

Python3 urllib庫學習

python3將python2的urllib和urllib2庫整合為一個urllib庫，由於近期用到了這個庫就參考官方文件和網上的資料總結了一下匯入 import urllib urllib.request urllib.reque

python3 urllib包與http包的使用

urllib包和http包都是面向HTTP協議的。其中urllib主要用於處理 URL，使用urllib操作URL可以像使用和開啟本地檔案一樣地操作。而 http包則實現了對 HTTP協議的封裝，是urllib.request模組的底層。 1.urllib包簡介 2. h

Python3 urllib.request與requests模組請求網頁程式碼

爬蟲的起點，獲取網頁程式碼 #!/usr/bin/env python3 # -*- encoding:utf-8 -*- # 請求網頁程式碼 import urllib.request r

Python3 urllib抓取指定URL的內容

最近在研究Python，熟悉了一些基本語法和模組的使用；現在打算研究一下Python爬蟲。學習主要是通過別人的部落格和自己下載的一下文件進行的，自己也寫一下部落格作為記錄學習自己過程吧。Python程式碼寫起來和Java的感覺很不一樣。 Python爬蟲主要使用的是urll

Python3 urllib 庫

read style cookielib readlines 錯誤處理 timeout serve 我們 urllib 簡介 urllib 基礎模塊使用 urllib 發送請求使用 urllib 進行身份驗證使用 urllib 設置代理服務器使用 u

Python3 urllib使用

Python3 urllib使用

基本使用 get,post,timeout超時,異常

Requset使用 get,post,驗證,代理,Cookie儲存讀取,異常

parse使用 url分割合併,構建get引數,get引數轉換為列表或元組,字串轉url編碼

相關推薦