Python HTML特殊符號的轉義與反轉義
阿新 • • 發佈:2020-11-30
需求:在做Web開發過程中,經常遇到特殊符號需要轉義為瀏覽器認為是字串的資料,減少前端的攻擊。
注意:此程式碼來源Tornado原始碼
#!/usr/bin/env python # -*- coding: utf-8 -*- import re import html.entities import typing from typing import Union, Optional, Dict _TO_UNICODE_TYPES = (str, type(None)) def to_unicode(value: Union[None, str, bytes]) -> Optional[str]: #noqa: F811 """將位元組轉為字串""" if isinstance(value, _TO_UNICODE_TYPES): return value if not isinstance(value, bytes): raise TypeError("Expected bytes, unicode, or None; got %r" % type(value)) return value.decode("utf-8") _XHTML_ESCAPE_RE = re.compile("[&<>\"']") _XHTML_ESCAPE_DICT = { "&": "&", "<": "<", ">": ">", '"': """, "'": "'", } def xhtml_escape(value: Union[str, bytes]) -> str: """將特殊符號:``<``, ``>``, ``"``, ``'``, and ``&``,進行轉義""" return _XHTML_ESCAPE_RE.sub(lambda match: _XHTML_ESCAPE_DICT[match.group(0)], to_unicode(value) ) def _build_unicode_map() -> Dict[str, str]: """ 打印出html所有的特殊符號與轉義後的簡稱 :return: """ unicode_map = {} for name, value in html.entities.name2codepoint.items(): unicode_map[name] = chr(value) return unicode_map _HTML_UNICODE_MAP = _build_unicode_map() def _convert_entity(m: typing.Match) -> str: """ re.sub回撥函式 """ if m.group(1) == "#": try: if m.group(2)[:1].lower() == "x": return chr(int(m.group(2)[1:], 16)) else: return chr(int(m.group(2))) except ValueError: return "&#%s;" % m.group(2) try: return _HTML_UNICODE_MAP[m.group(2)] except KeyError: return "&%s;" % m.group(2) def xhtml_unescape(value: Union[str, bytes]) -> str: """將轉義字元,返轉義為特殊符號.""" return re.sub(r"&(#?)(\w+?);", _convert_entity, to_unicode(value)) if __name__ == '__main__': src_text = '<script>alert(1)</script>' ret_escape = xhtml_escape(src_text) print(ret_escape) reback = xhtml_unescape(ret_escape) print(reback) """ 輸出結果: <script>alert(1)</script> <script>alert(1)</script> """