python 原始碼分析之型別系統
型別系統
一般物件是不能靜態分配的,而 python 所有內建物件都是靜態分配的
typedef struct _object {
_PyObject_HEAD_EXTRA
Py_ssize_t ob_refcnt;
struct _typeobject *ob_type;
} PyObject;
#ifdef Py_TRACE_REFS
/* Define pointers to support a doubly-linked list of all live heap objects. */
#define _PyObject_HEAD_EXTRA \
struct _object *_ob_next; \
struct _object *_ob_prev;
#define _PyObject_EXTRA_INIT 0, 0,
#else
#define _PyObject_HEAD_EXTRA
#define _PyObject_EXTRA_INIT
#endif
預設編譯 Py_TRACE_REFS 沒有定義,因此,實際為
//固定長度,如 int
typedef struct _object {
Py_ssize_t ob_refcnt; //很明顯引用計數器
struct _typeobject *ob_type; //儲存型別資訊
} PyObject;
//可變長物件,如 string, list
typedef struct {
PyObject ob_base;
Py_ssize_t ob_size; //元素個數
} PyVarObject;
python 所有物件的頭部都是 PyObject,因此,可以通過 PyObject 指標指向任何物件。
我們最熟悉的 list 的定義如下:
#define PyObject_VAR_HEAD PyVarObject ob_base;
typedef struct {
PyObject_VAR_HEAD
/* Vector of pointers to list elements. list[0] is ob_item[0], etc. */
PyObject **ob_item;
/* ob_item contains space for 'allocated' elements. The number
* currently in use is ob_size.
* Invariants:
* 0 <= ob_size <= allocated
* len(list) == ob_size
* ob_item == NULL implies ob_size == allocated == 0
* list.sort() temporarily sets allocated to -1 to detect mutations.
*
* Items must normally not be NULL, except during construction when
* the list is not yet visible outside the function that builds it.
*/
Py_ssize_t allocated;
} PyListObject;
據此,知道所有的物件都包含共同的 PyObject,不同物件之間的區別在哪裡呢?
主要的區別在於不同的物件對應 C 中不同的結構體
typedef struct _typeobject {
PyObject_VAR_HEAD
const char *tp_name; /* For printing, in format "<module>.<name>" */
Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */
/* Methods to implement standard operations */
destructor tp_dealloc;
printfunc tp_print;
getattrfunc tp_getattr;
setattrfunc tp_setattr;
PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
or tp_reserved (Python 3) */
reprfunc tp_repr;
/* Method suites for standard classes */
PyNumberMethods *tp_as_number;
PySequenceMethods *tp_as_sequence;
PyMappingMethods *tp_as_mapping;
/* More standard operations (here for binary compatibility) */
hashfunc tp_hash;
ternaryfunc tp_call;
reprfunc tp_str;
getattrofunc tp_getattro;
setattrofunc tp_setattro;
/* Functions to access object as input/output buffer */
PyBufferProcs *tp_as_buffer;
/* Flags to define presence of optional/expanded features */
unsigned long tp_flags;
const char *tp_doc; /* Documentation string */
/* Assigned meaning in release 2.0 */
/* call function for all accessible objects */
traverseproc tp_traverse;
/* delete references to contained objects */
inquiry tp_clear;
/* Assigned meaning in release 2.1 */
/* rich comparisons */
richcmpfunc tp_richcompare;
/* weak reference enabler */
Py_ssize_t tp_weaklistoffset;
/* Iterators */
getiterfunc tp_iter;
iternextfunc tp_iternext;
/* Attribute descriptor and subclassing stuff */
struct PyMethodDef *tp_methods;
struct PyMemberDef *tp_members;
struct PyGetSetDef *tp_getset;
struct _typeobject *tp_base;
PyObject *tp_dict;
descrgetfunc tp_descr_get;
descrsetfunc tp_descr_set;
Py_ssize_t tp_dictoffset;
initproc tp_init;
allocfunc tp_alloc;
newfunc tp_new;
freefunc tp_free; /* Low-level free-memory routine */
inquiry tp_is_gc; /* For PyObject_IS_GC */
PyObject *tp_bases;
PyObject *tp_mro; /* method resolution order */
PyObject *tp_cache;
PyObject *tp_subclasses;
PyObject *tp_weaklist;
destructor tp_del;
/* Type attribute cache version tag. Added in version 2.6 */
unsigned int tp_version_tag;
destructor tp_finalize;
#ifdef COUNT_ALLOCS
/* these must be last and never explicitly initialized */
Py_ssize_t tp_allocs;
Py_ssize_t tp_frees;
Py_ssize_t tp_maxalloc;
struct _typeobject *tp_prev;
struct _typeobject *tp_next;
#endif
} PyTypeObject;
其中
- tp_bases : 指向基類指標
- tp_basicsize : 分配物件空間大小
tp_new : 建立物件時呼叫對應的函式
- 建立物件
tp_new 根據 tp_basicsize 分配記憶體
如果 tp_new 為 NULL,會向上找到到 tp_base
- 初始化
tp_init
物件建立
- Python C API
分配 Py[type]_Type 物件
AOL(abstract Object Layer) : 形如 PyObject_*** 可以應用到任何 python 物件上,如 PyObject_New 來建立物件
- COL(Concrete Object Layer) : 只作用某一型別物件,如 PyInt_FromLong(10)
如
>>> __builtins__.__dict__['int']
<type 'int'>
>>> a = int(10)
>>> a
10
>>> type(a)
<type 'int'>
>>> int.__base__
<type 'object'>
型別
PyTypeObject PyBaseObject_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"object", /* tp_name */
sizeof(PyObject), /* tp_basicsize */ 0, /* tp_itemsize */
object_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
object_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
(hashfunc)_Py_HashPointer, /* tp_hash */
0, /* tp_call */
object_str, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
PyObject_GenericSetAttr, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
PyDoc_STR("object()\n--\n\nThe most base type"), /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
object_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
object_methods, /* tp_methods */
0, /* tp_members */
object_getsets, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
object_init, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
object_new, /* tp_new */
PyObject_Del, /* tp_free */
};
我們在自定義類的時候,都會繼承 object 類,而這個類對應的
C 結構體就是 PyBaseObject_Type,從其 tp_name 為 "object" 可知端倪。
在建立物件的時候,所有物件都會呼叫這裡的 object_new 函式
物件的行為
在 PyTypeObject 中三個成員決定物件的行為
PyNumberMethods *tp_as_number;
PySequenceMethods *tp_as_sequence;
PyMappingMethods *tp_as_mapping;
- PyNumberMethods : 數字操作,加減乘除
- PySequenceMethods : 序列相關
- PyMappingMethods : 關聯物件
如果一個物件支援上述操作,對應的指標就不為 NULL。
物件型別
PyTypeObject PyType_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"type", /* tp_name */
sizeof(PyHeapTypeObject), /* tp_basicsize */
sizeof(PyMemberDef), /* tp_itemsize */
(destructor)type_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)type_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(ternaryfunc)type_call, /* tp_call */
0, /* tp_str */
(getattrofunc)type_getattro, /* tp_getattro */
(setattrofunc)type_setattro, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE | Py_TPFLAGS_TYPE_SUBCLASS, /* tp_flags */
type_doc, /* tp_doc */
(traverseproc)type_traverse, /* tp_traverse */
(inquiry)type_clear, /* tp_clear */
0, /* tp_richcompare */
offsetof(PyTypeObject, tp_weaklist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
type_methods, /* tp_methods */
type_members, /* tp_members */
type_getsets, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
offsetof(PyTypeObject, tp_dict), /* tp_dictoffset */
type_init, /* tp_init */
0, /* tp_alloc */
type_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
(inquiry)type_is_gc, /* tp_is_gc */
};
總結:
所有的物件都一個 PyObject 指標頭,而 PyObject 中包含 PyTypeObject
包含所有物件的基本資訊
所有以 object 為基類的物件 : tp_base 指向 PyBaseObject_Type
所有物件型別: ob_type 指向 PyType_Type
PyBaseObject_Type 的 ob_type 也指向 PyType_Type
python 多型
所有物件都有一個頭 PyObject,而 PyObject 的 ob_type 指向 PyType_Type
所有物件的型別通過其 ob_type 指向的 PyType_Type 獲知其型別資訊
PyObject* PyObject_Repr(PyObject *v) {
//...
res = (*v->ob_type->tp_repr)(v);
//...
}
不同型別的 v 的型別對應不同的 PyType_Type 物件,呼叫對應物件的
tp_repr 方法
引用計數
#define Py_INCREF(op) ( \
_Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \
((PyObject *)(op))->ob_refcnt++)
#define Py_DECREF(op) \
do { \
PyObject *_py_decref_tmp = (PyObject *)(op); \
if (_Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA \
--(_py_decref_tmp)->ob_refcnt != 0) \
_Py_CHECK_REFCNT(_py_decref_tmp) \
else \
_Py_Dealloc(_py_decref_tmp); \
} while (0)
#define _Py_Dealloc(op) ( \
_Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA \
(*Py_TYPE(op)->tp_dealloc)((PyObject *)(op)))
Py_INCREF : 增加計數
Py_DECREF : 減少計數,但物件 ob_refcnt 為 0 時呼叫 tp_dealloc 方法釋放記憶體
注:
- python 中引用計數次數不超過 Py_ssize_t 指定的大小的限制
- tp_dealloc 並不意味著會釋放記憶體,而是將其返回記憶體池
#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
#define _Py_NewReference(op) ( \
_Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA \
_Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \
Py_REFCNT(op) = 1)
可見在初始化的時候,將 ob_refcnt 置為 1