1. 程式人生 > 實用技巧 >Python HTML特殊符號的轉義與反轉義

Python HTML特殊符號的轉義與反轉義

需求:在做Web開發過程中,經常遇到特殊符號需要轉義為瀏覽器認為是字串的資料,減少前端的攻擊。
注意:此程式碼來源Tornado原始碼
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import html.entities
import typing
from typing import Union, Optional, Dict

_TO_UNICODE_TYPES = (str, type(None))

def to_unicode(value: Union[None, str, bytes]) -> Optional[str]:  #
noqa: F811 """將位元組轉為字串""" if isinstance(value, _TO_UNICODE_TYPES): return value if not isinstance(value, bytes): raise TypeError("Expected bytes, unicode, or None; got %r" % type(value)) return value.decode("utf-8") _XHTML_ESCAPE_RE = re.compile("[&<>\"']"
) _XHTML_ESCAPE_DICT = { "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;", } def xhtml_escape(value: Union[str, bytes]) -> str: """將特殊符號:``<``, ``>``, ``"``, ``'``, and ``&``,進行轉義""" return _XHTML_ESCAPE_RE.sub(
lambda match: _XHTML_ESCAPE_DICT[match.group(0)], to_unicode(value) ) def _build_unicode_map() -> Dict[str, str]: """ 打印出html所有的特殊符號與轉義後的簡稱 :return: """ unicode_map = {} for name, value in html.entities.name2codepoint.items(): unicode_map[name] = chr(value) return unicode_map _HTML_UNICODE_MAP = _build_unicode_map() def _convert_entity(m: typing.Match) -> str: """ re.sub回撥函式 """ if m.group(1) == "#": try: if m.group(2)[:1].lower() == "x": return chr(int(m.group(2)[1:], 16)) else: return chr(int(m.group(2))) except ValueError: return "&#%s;" % m.group(2) try: return _HTML_UNICODE_MAP[m.group(2)] except KeyError: return "&%s;" % m.group(2) def xhtml_unescape(value: Union[str, bytes]) -> str: """將轉義字元,返轉義為特殊符號.""" return re.sub(r"&(#?)(\w+?);", _convert_entity, to_unicode(value)) if __name__ == '__main__': src_text = '<script>alert(1)</script>' ret_escape = xhtml_escape(src_text) print(ret_escape) reback = xhtml_unescape(ret_escape) print(reback) """ 輸出結果: &lt;script&gt;alert(1)&lt;/script&gt; <script>alert(1)</script> """