Python的執行機制--pyc檔案格式淺析
Python的原始程式碼在執行前都會被先編譯成位元組碼,並把編譯的結果儲存到一個一個的PyCodeObject中,pyc 檔案即是把PyCodeObject從記憶體中以marshal格式儲存到檔案後的結果。
下面我們來通過測試和工具來了解下pyc檔案到底有些什麼東西。
先寫個簡單的測試程式:
test.py
import dis myglobal = True def add(a): b = 1 a += b return a class world: def __init__(self): pass def sayHello(self): print 'hello,world' w = world() w.sayHello()
在這個例子裡,全域性變數,函式,類都有了,然後我們用下面的命令把它編譯成pyc檔案:
python -m compileall test.py
然後我們通過下面的程式碼分析一下test.pyc:
showfile.py
import dis, marshal, struct, sys, time, types def show_file(fname): f = open(fname, "rb") magic = f.read(4) moddate = f.read(4) modtime = time.asctime(time.localtime(struct.unpack('L', moddate)[0])) print "magic %s" % (magic.encode('hex')) print "moddate %s (%s)" % (moddate.encode('hex'), modtime) code = marshal.load(f) show_code(code) def show_code(code, indent=''): old_indent = indent print "%s<code>" % indent indent += ' ' print "%s<argcount> %d </argcount>" % (indent, code.co_argcount) print "%s<nlocals> %d</nlocals>" % (indent, code.co_nlocals) print "%s<stacksize> %d</stacksize>" % (indent, code.co_stacksize) print "%s<flags> %04x</flags>" % (indent, code.co_flags) show_hex("code", code.co_code, indent=indent) print "%s<dis>" % indent dis.disassemble(code) print "%s</dis>" % indent print "%s<names> %r</names>" % (indent, code.co_names) print "%s<varnames> %r</varnames>" % (indent, code.co_varnames) print "%s<freevars> %r</freevars>" % (indent, code.co_freevars) print "%s<cellvars> %r</cellvars>" % (indent, code.co_cellvars) print "%s<filename> %r</filename>" % (indent, code.co_filename) print "%s<name> %r</name>" % (indent, code.co_name) print "%s<firstlineno> %d</firstlineno>" % (indent, code.co_firstlineno) print "%s<consts>" % indent for const in code.co_consts: if type(const) == types.CodeType: show_code(const, indent+' ') else: print " %s%r" % (indent, const) print "%s</consts>" % indent show_hex("lnotab", code.co_lnotab, indent=indent) print "%s</code>" % old_indent def show_hex(label, h, indent): h = h.encode('hex') if len(h) < 60: print "%s<%s> %s</%s>" % (indent, label, h,label) else: print "%s<%s>" % (indent, label) for i in range(0, len(h), 60): print "%s %s" % (indent, h[i:i+60]) print "%s</%s>" % (indent, label) show_file(sys.argv[1])
執行下面的命令:
showfile.py test.pyc >test.xml
將會把輸入儲存到test.xml中,來看看test.xml中有什麼內容:
magic 03f30d0a moddate a2567054 (Sat Nov 22 17:25:54 2014) <code> <argcount> 0 </argcount> <nlocals> 0</nlocals> <stacksize> 3</stacksize> <flags> 0040</flags> <code> 6400006401006c00005a00006501005a02006402008400005a0300640300 640500640400840000830000595a04006504008300005a05006505006a06 008300000164010053 </code> <dis> 1 0 LOAD_CONST 0 (-1) 3 LOAD_CONST 1 (None) 6 IMPORT_NAME 0 (dis) 9 STORE_NAME 0 (dis) 2 12 LOAD_NAME 1 (True) 15 STORE_NAME 2 (myglobal) 4 18 LOAD_CONST 2 (<code object add at 024E3B60, file "test.py", line 4>) 21 MAKE_FUNCTION 0 24 STORE_NAME 3 (add) 9 27 LOAD_CONST 3 ('world') 30 LOAD_CONST 5 (()) 33 LOAD_CONST 4 (<code object world at 024E3650, file "test.py", line 9>) 36 MAKE_FUNCTION 0 39 CALL_FUNCTION 0 42 BUILD_CLASS 43 STORE_NAME 4 (world) 15 46 LOAD_NAME 4 (world) 49 CALL_FUNCTION 0 52 STORE_NAME 5 (w) 16 55 LOAD_NAME 5 (w) 58 LOAD_ATTR 6 (sayHello) 61 CALL_FUNCTION 0 64 POP_TOP 65 LOAD_CONST 1 (None) 68 RETURN_VALUE </dis> <names> ('dis', 'True', 'myglobal', 'add', 'world', 'w', 'sayHello')</names> <varnames> ()</varnames> <freevars> ()</freevars> <cellvars> ()</cellvars> <filename> 'test.py'</filename> <name> '<module>'</name> <firstlineno> 1</firstlineno> <consts> -1 None <code> <argcount> 1 </argcount> <nlocals> 2</nlocals> <stacksize> 2</stacksize> <flags> 0043</flags> <code> 6401007d01007c00007c0100377d00007c000053</code> <dis> 5 0 LOAD_CONST 1 (1) 3 STORE_FAST 1 (b) 6 6 LOAD_FAST 0 (a) 9 LOAD_FAST 1 (b) 12 INPLACE_ADD 13 STORE_FAST 0 (a) 7 16 LOAD_FAST 0 (a) 19 RETURN_VALUE </dis> <names> ()</names> <varnames> ('a', 'b')</varnames> <freevars> ()</freevars> <cellvars> ()</cellvars> <filename> 'test.py'</filename> <name> 'add'</name> <firstlineno> 4</firstlineno> <consts> None 1 </consts> <lnotab> 000106010a01</lnotab> </code> 'world' <code> <argcount> 0 </argcount> <nlocals> 0</nlocals> <stacksize> 1</stacksize> <flags> 0042</flags> <code> 6500005a01006400008400005a02006401008400005a03005253</code> <dis> 9 0 LOAD_NAME 0 (__name__) 3 STORE_NAME 1 (__module__) 10 6 LOAD_CONST 0 (<code object __init__ at 024E3B18, file "test.py", line 10>) 9 MAKE_FUNCTION 0 12 STORE_NAME 2 (__init__) 12 15 LOAD_CONST 1 (<code object sayHello at 024E3698, file "test.py", line 12>) 18 MAKE_FUNCTION 0 21 STORE_NAME 3 (sayHello) 24 LOAD_LOCALS 25 RETURN_VALUE </dis> <names> ('__name__', '__module__', '__init__', 'sayHello')</names> <varnames> ()</varnames> <freevars> ()</freevars> <cellvars> ()</cellvars> <filename> 'test.py'</filename> <name> 'world'</name> <firstlineno> 9</firstlineno> <consts> <code> <argcount> 1 </argcount> <nlocals> 1</nlocals> <stacksize> 1</stacksize> <flags> 0043</flags> <code> 64000053</code> <dis> 11 0 LOAD_CONST 0 (None) 3 RETURN_VALUE </dis> <names> ()</names> <varnames> ('self',)</varnames> <freevars> ()</freevars> <cellvars> ()</cellvars> <filename> 'test.py'</filename> <name> '__init__'</name> <firstlineno> 10</firstlineno> <consts> None </consts> <lnotab> 0001</lnotab> </code> <code> <argcount> 1 </argcount> <nlocals> 1</nlocals> <stacksize> 1</stacksize> <flags> 0043</flags> <code> 640100474864000053</code> <dis> 13 0 LOAD_CONST 1 ('hello,world') 3 PRINT_ITEM 4 PRINT_NEWLINE 5 LOAD_CONST 0 (None) 8 RETURN_VALUE </dis> <names> ()</names> <varnames> ('self',)</varnames> <freevars> ()</freevars> <cellvars> ()</cellvars> <filename> 'test.py'</filename> <name> 'sayHello'</name> <firstlineno> 12</firstlineno> <consts> None 'hello,world' </consts> <lnotab> 0001</lnotab> </code> </consts> <lnotab> 06010902</lnotab> </code> () </consts> <lnotab> 0c010602090513060901</lnotab> </code>
再對照Python程式碼中Code.h中定義的PyCodeObject結構:
/* Bytecode object */
typedef struct {
PyObject_HEAD
int co_argcount; /* #arguments, except *args */
int co_nlocals; /* #local variables */
int co_stacksize; /* #entries needed for evaluation stack */
int co_flags; /* CO_..., see below */
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash/cmp */
PyObject *co_filename; /* string (where it was loaded from) */
PyObject *co_name; /* string (name, for reference) */
int co_firstlineno; /* first source line number */
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See
Objects/lnotab_notes.txt for details. */
void *co_zombieframe; /* for optimization only (see frameobject.c) */
PyObject *co_weakreflist; /* to support weakrefs to code objects */
} PyCodeObject;
我們可以看到,整個test.pyc就是一個巢狀的PyCodeObject結構的組合,對於每個函式,或者類的方法,都會生成一個對應的PyCodeObject結構,並且模組還會生成額外的一個PyCodeObject結構:
來簡單解釋下各欄位的意義:
argcount:引數的個數
nlocals:區域性變數的個數(包含引數在內)
stacksize:堆疊的大小
flags:用來表示引數中是否有*args或者 **kwargs
code:位元組碼
names:全域性變數,函式,類,類的方法的名稱
varnames:區域性變數的名稱(包含引數)
consts:一個常量表,在marshal.c中有定義所有的型別:
#define TYPE_NULL '0'
#define TYPE_NONE 'N'
#define TYPE_FALSE 'F'
#define TYPE_TRUE 'T'
#define TYPE_STOPITER 'S'
#define TYPE_ELLIPSIS '.'
#define TYPE_INT 'i'
#define TYPE_INT64 'I'
#define TYPE_FLOAT 'f'
#define TYPE_BINARY_FLOAT 'g'
#define TYPE_COMPLEX 'x'
#define TYPE_BINARY_COMPLEX 'y'
#define TYPE_LONG 'l'
#define TYPE_STRING 's'
#define TYPE_INTERNED 't'
#define TYPE_STRINGREF 'R'
#define TYPE_TUPLE '('
#define TYPE_LIST '['
#define TYPE_DICT '{'
#define TYPE_CODE 'c'
#define TYPE_UNICODE 'u'
#define TYPE_UNKNOWN '?'
#define TYPE_SET '<'
#define TYPE_FROZENSET '>'
所有的PyCodeObject都是通過呼叫以下的函式得以執行的:
PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
這個函式是Python的一個重量極的函式,它的作用即是執行中間碼,Python的程式碼都是通過呼叫這個函式來執行的。
通過上面的分析,對於Python程式碼的執行應該有一個比較清醒的認識。
參考連結:
http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
http://daeken.com/2010-02-20_Python_Marshal_Format.html