python3 模擬POST請求時 “Content-Type”指定編碼會消失的問題
阿新 • • 發佈:2019-02-17
HTTP請求 伺服器預設GBK編碼時 瀏覽器UTF-8編碼 如果請求時不指定編碼格式就會亂碼
python3 模擬POST請求時 “Content-Type”指定編碼會消失的問題
如下程式碼所示
在外部指定的”Content-Type”=”application/x-www-form-urlencoded; charset=UTF-8” 但是用抓包後返現charset=UTF-8消失了變成了 “Content-Type”=”application/x-www-form-urlencoded; ” 導致伺服器用GBK解碼,致使亂碼。
import http.cookiejar
import urllib.request
import urllib.parse
def ungzip(data):
try:
data = gzip.decompress(data)
except Exception as e:
pass # print('未經壓縮, 無需解壓')
return data
def getOpener(head):
""" deal with the Cookies """
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
header = []
for key, value in head.items():
elem = (key, value)
header.append(elem)
opener.addheaders = header
return opener
header={
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
# ... other header
}
opener = getOpener(header)
postDict={
"sql": "select '你好' from dual" ,
"pageNum": "1",
"pageSize": "100"
}
postData = urllib.parse.urlencode(postDict).encode()
op = opener.open(url, postData, timeout=timeout)
response = op.read()
response = ungzip(response)
response = response.decode()
response = json.loads(response)
分析除錯發現 上述方式指定的”Content-Type” 會被預設值取代 “application/x-www-form-urlencoded;”
具體原始碼分析如下 urllib.request.AbstractHTTPHandler.do_request_()
def do_request_(self, request):
host = request.host
if not host:
raise URLError('no host given')
if request.data is not None: # POST
data = request.data
if isinstance(data, str):
msg = "POST data should be bytes or an iterable of bytes. " \
"It cannot be of type str."
raise TypeError(msg)
# label_A 不是直接在給request賦值過Content-type 這地方會設定預設值
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
if not request.has_header('Content-length'):
try:
mv = memoryview(data)
except TypeError:
if isinstance(data, collections.Iterable):
raise ValueError("Content-Length should be specified "
"for iterable data of type %r %r" % (type(data),
data))
else:
request.add_unredirected_header(
'Content-length', '%d' % (len(mv) * mv.itemsize))
sel_host = host
if request.has_proxy():
scheme, sel = splittype(request.selector)
sel_host, sel_path = splithost(sel)
if not request.has_header('Host'):
request.add_unredirected_header('Host', sel_host)
for name, value in self.parent.addheaders:
name = name.capitalize()
# 如果是POST請求 並走到了上面的label_A處 這地方就不會重新設定Content-type
if not request.has_header(name):
request.add_unredirected_header(name, value)
return request
解決方案
方案1、把原始碼中的 if not request.has_header(name) 判斷去掉就OK 啦
方案2、因為opener.open中的url可以是字串也可以是Request物件, 直接傳入攜帶header的物件Request就ok啦
#op = opener.open(url, postData, timeout=timeout)
op = opener.open(urllib.request.Request(url, headers=header), postData, timeout=timeout)