python不同語言的字串連線成文字
阿新 • • 發佈:2020-10-29
python不同語言的字串連線成文字
# -*- coding:utf-8 -*- # import sys import unicodedata import six _ALPHANUMERIC_CHAR_SET = set( six.unichr(i) for i in xrange(sys.maxunicode) if (unicodedata.category(six.unichr(i)).startswith("L") or unicodedata.category(six.unichr(i)).startswith("N"))) def _join_tokens_to_string(tokens):"""Join a list of string tokens into a single string.""" token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens] ret = [] for i, token in enumerate(tokens): if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: ret.append(u" ") token = token.decode("utf-8") ret.append(token)return "".join(ret) if __name__ == '__main__': texts = [['hello','world'], ['mehr', 'Sicherheit', 'für'], ["從40萬年前","開始"]] for text in texts: ret = _join_tokens_to_string(text) print(ret.encode("utf-8"))
輸出結果:
hello world
mehr Sicherheit für
從40萬年前開始