1. 程式人生 > >Python隨筆(四)動態語法樹AST

Python隨筆(四)動態語法樹AST

開發十年,就只剩下這套架構體系了! >>>   

什麼是抽象語法樹嘞?

在電腦科學中,抽象語法和抽象語法樹其實是原始碼的抽象語法結構的樹狀表現形式 我們可以用一個線上的AST編輯器來觀察AST的構建
Python語言的執行過程就是通過將Python位元組碼轉化為抽象語法樹來進行下一步的分析等其他操作,所以將Python轉化為抽象語法樹更利於程式的分析 一般來說,我們早期的學習當中固然會用到一種叫做表示式樹的東西,我們用Python來實現一下表達式樹

class StackEmptyException(Exception): pass


class StackFullException(Exception): pass


class Node:
    def __init__(self, val=None, nxt=None):
        self.value = val
        self.next = nxt

    def __str__(self):
        return str(self.value)


class Stack:

    def __init__(self, max=0):
        self._top = None
        self._max = 0
        self.max = max

    @property
    def max(self):
        return self._max

    @max.setter
    def max(self, m):
        m = int(m)
        if m < self.length:
            raise Exception('Resize stack failed, please pop some elements first.')
        self._max = m
        if self._max < 0:
            self._max = 0

    def init(self, iterable=()):
        if not iterable:
            return
        self._top = Node(iterable[0])
        for i in iterable[1:]:
            node = self._top
            self._top = Node(i)
            self._top.next = node

    def show(self):
        def _traversal(self):
            node = self._top
            while node and node.next:
                yield node
                node = node.next
            yield node

        print('\n'.join(map(lambda x: '|{:^7}|'.format(str(x)), _traversal(self))) + '\n ' + 7 * '-')

    @property
    def length(self):
        if self._top is None:
            return 0
        node = self._top
        i = 1
        while node.next:
            node = node.next
            i += 1
        return i

    @property
    def is_empty(self):
        return self._top is None

    @property
    def is_full(self):
        return bool(self._max and self.length == self._max)

    def push(self, item):
        if self.is_full:
            raise StackFullException('Error: trying to push element into a full stack!')
        if not self._top:
            self._top = Node(item)
            return
        node = self._top
        self._top = Node(item)
        self._top.next = node

    def pop(self):
        if self.is_empty:
            raise StackEmptyException('Error: trying to pop element from an empty stack!')
        node = self._top
        self._top = self._top.next
        return node.value

    def top(self):
        return self._top.value if self._top else self._top

    def clear(self):
        while self._top:
            self.pop()


def test(stack):
    print('\nShow stack:')
    stack.show()

    print('\nInit linked list:')
    stack.init([1, 2, 3, 4, 5])
    stack.show()

    print('\nPush element to stack:')
    stack.push(6)
    stack.push(7)
    stack.push('like')
    stack.show()

    print('\nCheck top element:')
    print(stack.top())

    print('\nPop element from stack:')
    e = stack.pop()
    print('Element %s popped,' % e)
    stack.show()

    print('\nSet stack max size:')
    try:
        stack.max = 1
    except Exception as e:
        print(e)

    print('\nSet stack max size:')
    stack.max = 7
    print(stack.max)

    print('\nPush full stack:')
    try:
        stack.push(7)
    except StackFullException as e:
        print(e)

    print('\nClear stack:')
    stack.clear()
    stack.show()

    print('\nStack is empty:')
    print(stack.is_empty)

    print('\nPop empty stack:')
    try:
        stack.pop()
    except StackEmptyException as e:
        print(e)


class TreeNode:
    def __init__(self, val=None, lef=None, rgt=None):
        self.value = val
        self.left = lef
        self.right = rgt

    def __str__(self):
        return str(self.value)


class BinaryTree:
    def __init__(self, root=None):
        self._root = root

    def __str__(self):
        return '\n'.join(map(lambda x: x[1]*4*' '+str(x[0]), self.pre_traversal()))

    def pre_traversal(self, root=None):
        if not root:
            root = self._root
        x = []
        depth = -1

        def _traversal(node):
            nonlocal depth
            depth += 1
            x.append((node, depth))
            if node and node.left is not None:
                _traversal(node.left)
            if node and node.right is not None:
                _traversal(node.right)
            depth -= 1
            return x
        return _traversal(root)

    def in_traversal(self, root=None):
        if not root:
            root = self._root
        x = []
        depth = -1

        def _traversal(node):
            nonlocal depth
            depth += 1
            if node and node.left is not None:
                _traversal(node.left)
            x.append((node, depth))
            if node and node.right is not None:
                _traversal(node.right)
            depth -= 1
            return x
        return _traversal(root)

    def post_traversal(self, root=None):
        if not root:
            root = self._root
        x = []
        depth = -1

        def _traversal(node):
            nonlocal depth
            depth += 1
            if node and node.left is not None:
                _traversal(node.left)
            if node and node.right is not None:
                _traversal(node.right)
            x.append((node, depth))
            depth -= 1
            return x
        return _traversal(root)

    @property
    def max_depth(self):
        return sorted(self.pre_traversal(), key=lambda x: x[1])[-1][1]

    def show(self, tl=None):
        if not tl:
            tl = self.pre_traversal()
        print('\n'.join(map(lambda x: x[1]*4*' '+str(x[0]), tl)))

    def make_empty(self):
        self.__init__()

    def insert(self, item):
        if self._root is None:
            self._root = TreeNode(item)
            return

        def _insert(item, node):
            if not node:
                return TreeNode(item)
            if node.left is None:
                node.left = _insert(item, node.left)
            elif node.right is None:
                node.right = _insert(item, node.right)
            else:
                if len(self.pre_traversal(node.left)) <= len(self.pre_traversal(node.right)):
                    node.left = _insert(item, node.left)
                else:
                    node.right = _insert(item, node.right)
            return node
        self._root = _insert(item, self._root)


class ExpressionTree(BinaryTree):
    SIGN = {'+': 1, '-': 1, '*': 2, '/': 2, '(': 3}

    def gene_tree_by_postfix(self, expr):
        s =Stack()
        for i in expr:
            if i in self.SIGN.keys():
                right = s.pop()
                left = s.pop()
                node = TreeNode(i, left, right)
                s.push(node)
            else:
                s.push(TreeNode(i))
        self._root = s.pop()

class ExpressionTree(BinaryTree):
    SIGN = {'+': 1, '-': 1, '*': 2, '/': 2, '(': 3}

    def gene_tree_by_postfix(self, expr):
        s = Stack()
        for i in expr:
            if i in self.SIGN.keys():
                right = s.pop()
                left = s.pop()
                node = TreeNode(i, left, right)
                s.push(node)
            else:
                s.push(TreeNode(i))
        self._root = s.pop()


def test_expression_tree(ep):
    t = ExpressionTree()
    t.gene_tree_by_postfix(ep)
    print('\n------先序遍歷-------')
    print(t)
    print('\n------後序遍歷------')
    t.show(t.post_traversal())
    print('\n-------中序遍歷-------')
    t.show(t.in_traversal())

if __name__ == '__main__':
    ep = 'a b + c d e + * *'
    test_expression_tree(ep.split(' '))

回到AST
AST主要作用有三步:

1. 解析(PARSE):將程式碼字串解析成抽象語法樹。
2. 轉換(TRANSFORM):對抽象語法樹進行轉換操作。
3. 生成(GENERATE): 根據變換後的抽象語法樹再生成程式碼字串。  

Python官方對於CPython直譯器對python原始碼的處理過程如下:

1. Parse source code into a parse tree (Parser/pgen.c)
2. Transform parse tree into an Abstract Syntax Tree (Python/ast.c)
3. Transform AST into a Control Flow Graph (Python/compile.c)
4. Emit bytecode based on the Control Flow Graph (Python/compile.c)

但是隻知道上面還不夠我們去理解,因為在Python中,以控制檯為例,我們的輸入都是些字串例如a=2b=[1,2,3,4,5]之類我們要如何讓計算機去理解並且執行這些東西呢?

1. 將整個程式碼字串分割成 語法單元陣列。
2. 在分詞結果的基礎之上分析 語法單元之間的關係。  

一個抽象語法樹的基本構成

type:描述該語句的型別 --變數宣告語句
kind:變數宣告的關鍵字 -- var
declaration: 宣告的內容陣列,裡面的每一項也是一個物件
    type: 描述該語句的型別 
    id: 描述變數名稱的物件
        type:定義
        name: 是變數的名字
    init: 初始化變數值得物件
        type: 型別
        value: 值 "is tree" 不帶引號
        row: "\"is tree"\" 帶引號

一般來說我們在可以Python的pythonrun裡面找到

PyObject *type;

定義了語法樹的型別
一般來說,研究抽象語法樹有哪些用途呢?

在一種語言的IDE中,語法的檢查、風格的檢查、格式化、高亮、錯誤提示,程式碼自動補全等等
通過搭建一個Python的語法樹去理解表示式是如何被解析的,我們來看一個(3+2-5*0)/3的例子:

#首先定義四則運算
Num = lambda env, n: n
Var = lambda env, x: env[x]
Add = lambda env, a, b:_eval(env, a) + _eval(env, b)
Mul = lambda env, a, b:_eval(env, a) * _eval(env, b)
Sub = lambda env, a, b:_eval(env, a) - _eval(env, b)
Div = lambda env, a, b:_eval(env, a) / _eval(env, b)
#定義表示式計算
 _eval = lambda env, expr:expr[0](env, *expr[1:])
#定義環境中的自變數
env = {'i':5, 'j':2, 'k':3}
#定義語法樹結構(我尋思這玩意怎麼那麼像Clojure呢。。。。。)
tree=(Div,(Sub,(Add,(Var,'k'),(Var,'j')),(Mul,(Var,'i'),(Num,0))),(Var,'k'))
print(_eval(env, tree))

輸出:

承接前一篇虛擬機器的執行機制,我們來看看Python的AST解析過程

PyTokenizer_Get
PyRun_InteractiveOneObjectEx

Python中AST的節點定義
pythoncore/Parser/node.c

PyNode_New(int type)
{
    node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
    if (n == NULL)
        return NULL;
    n->n_type = type;
    n->n_str = NULL;
    n->n_lineno = 0;
    n->n_nchildren = 0;
    n->n_child = NULL;
    return n;
}
import ast
Monster ="""
class Monster:
    def __init__(self):
        self.level=0
        self.hp=1000
        self.boom=[x for x in range(10)]
    def eat(self,frut):
        self.hp+=1
    def howl(self):
        print("Ao uuuuuuuuuuuuuu")
monster=Monster()
monster.howl()
"""
if __name__=="__main__":
    # cm = compile(Monster, '<string>', 'exec')
    # exec (cm)
    r_node = ast.parse(Monster)
    print(ast.dump(r_node))

通過compile我們可以編譯Python字串執行字串的內容

同時,我們也可以用Python自帶的AST庫解析我們的字串為語法樹
參考文件: [Abstract Syntax Trees]https://docs.python.org/3/library/ast.html
[輪子哥部落格]http://www.cppblog.com/vczh/archive/2008/06/15/53373.html
[表示式樹]http://www.cnblogs.com/stackli