python slots源碼分析
上次總結Python3的字典實現後的某一天,突然開竅Python的__slots__的實現應該也是類似,於是翻了翻CPython的源碼,果然如此!
關於在自定義類裏面添加__slots__的效果,網上已經有很多資料了,其中優點大致有:
(1)更省內存。
(2)訪問屬性更高效。
而本文講的是,為什麽更省內存?為什麽更高效?當然為了弄明白這些,深入到CPython的源碼是必不可少的。不過,心裏有個猜想之後再去看源碼效果或許更好,這樣目的性更強,清楚自己需要關註的是什麽以免在其中迷失!
我先稍微解釋一下:
(1)更省內存是因為實例的屬性不以字典的形式存儲,而是以更緊湊的格式。
(2)更高效是因為實例在做屬性查找的時候,節省了一次hash查找,改為以計算屬性內存的偏移量直接讀寫內存。
接下來本文會從三方面分析定義了slots的作用以及影響,分別是:定義類時、創建實例為其分配內存時、以及從實例訪問屬性時。
1、定義類
先說一下在類定義時使用__slots__會有哪些影響
typeobject.c:
static PyObject * type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds) { ... /* Check for a __slots__ sequence variable in dict, and count it */ slots = PyDict_GetItemString(dict, "__slots__"); nslots = 0; if (slots == NULL) { /* 類定義中沒有__slots__,不需要關註 */ } else { /* Have slots */ /* Make it into a tuple */ if (PyString_Check(slots) || PyUnicode_Check(slots)) slots = PyTuple_Pack(1, slots); else slots = PySequence_Tuple(slots);if (slots == NULL) { Py_DECREF(bases); return NULL; } assert(PyTuple_Check(slots)); /* Copy slots into a list, mangle names and sort them. Sorted names are needed for __class__ assignment. Convert them back to tuple at the end. */ newslots = PyList_New(nslots - add_dict - add_weak); if (newslots == NULL) goto bad_slots; for (i = j = 0; i < nslots; i++) { char *s; tmp = PyTuple_GET_ITEM(slots, i); s = PyString_AS_STRING(tmp); if ((add_dict && strcmp(s, "__dict__") == 0) || (add_weak && strcmp(s, "__weakref__") == 0)) continue; tmp =_Py_Mangle(name, tmp); if (!tmp) { Py_DECREF(newslots); goto bad_slots; } PyList_SET_ITEM(newslots, j, tmp); j++; } nslots = j; Py_DECREF(slots); if (PyList_Sort(newslots) == -1) { Py_DECREF(bases); Py_DECREF(newslots); return NULL; } slots = PyList_AsTuple(newslots); Py_DECREF(newslots); if (slots == NULL) { Py_DECREF(bases); return NULL; } } /* Allocate the type object */ /* 為類對象申請內存,這裏分配內存時也考慮了存儲slots需要的內存 */ type = (PyTypeObject *)metatype->tp_alloc(metatype, nslots); if (type == NULL) { Py_XDECREF(slots); Py_DECREF(bases); return NULL; } /* Add descriptors for custom slots from __slots__, or for __dict__ */ /* 將slots的數據作為member存儲在類對象上,後續將會根據這個member創建具體的descriptior * 而實際上讀寫這個屬性都是通過descriptior實現的 */ mp = PyHeapType_GET_MEMBERS(et); slotoffset = base->tp_basicsize; if (slots != NULL) { for (i = 0; i < nslots; i++, mp++) { mp->name = PyString_AS_STRING( PyTuple_GET_ITEM(slots, i)); mp->type = T_OBJECT_EX; mp->offset = slotoffset; /* __dict__ and __weakref__ are already filtered out */ assert(strcmp(mp->name, "__dict__") != 0); assert(strcmp(mp->name, "__weakref__") != 0); slotoffset += sizeof(PyObject *); } } /* 類的type->tp_basicsize這個值描述了實例所占內存的大小(當然只是內存的一部分) * 而從上面的代碼可以看出,slotoffset這個值包含了nslots個指針大小。沒錯!這個指針就是實際存儲屬性用的 * 因此slots是直接存儲在實例內存上面的,而屬性的具體位置的偏移值信息則以member存儲在類對象上 */ type->tp_basicsize = slotoffset; type->tp_itemsize = base->tp_itemsize; type->tp_members = PyHeapType_GET_MEMBERS(et); /* Always override allocation strategy to use regular heap */ type->tp_alloc = PyType_GenericAlloc; /* 調用PyType_Ready這個函數時會為類身上的每個member創建一個descriptor * 當實例訪問屬性時,會需要借助這個descriptor的力量:P */ if (PyType_Ready(type) < 0) { Py_DECREF(type); return NULL; } return (PyObject *)type; }
當我們定義一個類的時候,最後會調用到上面type_new這個函數。由於只關註slots,因此我省略掉了一部分的代碼。可以看出,如果有定義slots,那麽會將其信息以member的形式存儲在類的身上。觀察初始化member的代碼,可以發現關於訪問屬性的最重要的兩個數據都在其中,一個是屬性的內存位置,由相對於實例的偏移值mp->offset描述。通過這個偏移值,我們能拿到屬性數據在內存起始地址,但卻不知道如何解釋這塊內存,因此還需要一個類型信息,這個信息由mp->type來補充。
剩下的工作便是在調用函數PyType_Ready時,根據member中存儲的信息,創建出執行訪問操作的descriptor對象。
int PyType_Ready(PyTypeObject *type) { /* Add type-specific descriptors to tp_dict */ if (type->tp_members != NULL) { if (add_members(type, type->tp_members) < 0) goto error; } return 0; error: type->tp_flags &= ~Py_TPFLAGS_READYING; return -1; } static int add_members(PyTypeObject *type, PyMemberDef *memb) { PyObject *dict = type->tp_dict; for (; memb->name != NULL; memb++) { PyObject *descr; if (PyDict_GetItemString(dict, memb->name)) continue; descr = PyDescr_NewMember(type, memb); if (descr == NULL) return -1; if (PyDict_SetItemString(dict, memb->name, descr) < 0) { Py_DECREF(descr); return -1; } Py_DECREF(descr); } return 0; }
同樣的,省略了很多其它不相關的代碼。可以看出,最終根據member創建出的descriptor是存儲在type對象上的tp_dict中的。
2、創建實例
當創建一個類的實例時,會為其分配內存。如果這個類定義了slots,那麽會申請更多的內存,slots定義的屬性便是存儲在這部分內存中。直接看為實例申請內存的代碼:
PyObject * PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) { PyObject *obj; const size_t size = _PyObject_VAR_SIZE(type, nitems+1); /* note that we need to add one, for the sentinel */ if (PyType_IS_GC(type)) obj = _PyObject_GC_Malloc(size); else obj = (PyObject *)PyObject_MALLOC(size); if (obj == NULL) return PyErr_NoMemory(); memset(obj, ‘\0‘, size); if (type->tp_flags & Py_TPFLAGS_HEAPTYPE) Py_INCREF(type); if (type->tp_itemsize == 0) (void)PyObject_INIT(obj, type); else (void) PyObject_INIT_VAR((PyVarObject *)obj, type, nitems); if (PyType_IS_GC(type)) _PyObject_GC_TRACK(obj); return obj; } #define _PyObject_VAR_SIZE(typeobj, nitems) \ (size_t) ( ( (typeobj)->tp_basicsize + (nitems)*(typeobj)->tp_itemsize + (SIZEOF_VOID_P - 1) ) & ~(SIZEOF_VOID_P - 1) )
從代碼可知,實例的內存大小與其type對象的tp_basicsize是相關聯的。回看之前定義類時的type_new函數,會發現tp_basicsize這個值已經是包含了slots所需的內存了(詳見計算member偏移值那部分代碼)。type_new為slots中的每一項都分配一個指針長度的內存,而日後實例的屬性便是存儲在這個位置上。這也正是slots更省內存的原因!
3、訪問屬性
最後來看從實例上訪問slots的屬性是怎樣的,以讀屬性的值為例
/* Generic GetAttr functions - put these in your tp_[gs]etattro slot */ PyObject * _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict) { PyTypeObject *tp = Py_TYPE(obj); PyObject *descr = NULL; PyObject *res = NULL; descrgetfunc f; Py_ssize_t dictoffset; PyObject **dictptr; if (tp->tp_dict == NULL) { if (PyType_Ready(tp) < 0) goto done; } descr = _PyType_Lookup(tp, name); Py_XINCREF(descr); f = NULL; if (descr != NULL && PyType_HasFeature(descr->ob_type, Py_TPFLAGS_HAVE_CLASS)) { f = descr->ob_type->tp_descr_get; if (f != NULL && PyDescr_IsData(descr)) { res = f(descr, obj, (PyObject *)obj->ob_type); Py_DECREF(descr); goto done; } } if (dict == NULL) { /* Inline _PyObject_GetDictPtr */ dictoffset = tp->tp_dictoffset; if (dictoffset != 0) { if (dictoffset < 0) { Py_ssize_t tsize; size_t size; tsize = ((PyVarObject *)obj)->ob_size; if (tsize < 0) tsize = -tsize; size = _PyObject_VAR_SIZE(tp, tsize); dictoffset += (long)size; assert(dictoffset > 0); assert(dictoffset % SIZEOF_VOID_P == 0); } dictptr = (PyObject **) ((char *)obj + dictoffset); dict = *dictptr; } } if (dict != NULL) { Py_INCREF(dict); res = PyDict_GetItem(dict, name); if (res != NULL) { Py_INCREF(res); Py_XDECREF(descr); Py_DECREF(dict); goto done; } Py_DECREF(dict); } if (f != NULL) { res = f(descr, obj, (PyObject *)Py_TYPE(obj)); Py_DECREF(descr); goto done; } if (descr != NULL) { res = descr; /* descr was already increfed above */ goto done; } PyErr_Format(PyExc_AttributeError, "‘%.50s‘ object has no attribute ‘%.400s‘", tp->tp_name, PyString_AS_STRING(name)); done: Py_DECREF(name); return res; }
當從實例身上訪問一個屬性時,首先嘗試從類對象的tp_dict查找,是否存在對應的descriptor。若是(查找slots的屬性正是如此),調用descriptor身上的tp_descr_get方法,並將方法的返回值作為這次屬性查找的結果返回。
從中也可以看出,如果是訪問正常的屬性時,還要根據type對象的dictoffset偏移值找到實例的屬性字典,然後再在這個字典中執行hash查找屬性。這就是為什麽定義了slots後屬性查找理論上會更高效。
看看tp_descr_get方法長啥樣:
PyTypeObject PyMemberDescr_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "member_descriptor", sizeof(PyMemberDescrObject), 0, (destructor)descr_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc)member_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ descr_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ descr_members, /* tp_members */ member_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ (descrgetfunc)member_get, /* tp_descr_get */ (descrsetfunc)member_set, /* tp_descr_set */ }; static PyObject * member_get(PyMemberDescrObject *descr, PyObject *obj, PyObject *type) { PyObject *res; if (descr_check((PyDescrObject *)descr, obj, &res)) return res; return PyMember_GetOne((char *)obj, descr->d_member); }
原來最後是通過函數PyMember_GetOne來獲取屬性。好!繼續深入:
PyObject * PyMember_GetOne(const char *addr, PyMemberDef *l) { PyObject *v; if ((l->flags & READ_RESTRICTED) && PyEval_GetRestricted()) { PyErr_SetString(PyExc_RuntimeError, "restricted attribute"); return NULL; } addr += l->offset; switch (l->type) { case T_BOOL: v = PyBool_FromLong(*(char*)addr); break; case T_BYTE: v = PyInt_FromLong(*(char*)addr); break; case T_UBYTE: v = PyLong_FromUnsignedLong(*(unsigned char*)addr); break; case T_SHORT: v = PyInt_FromLong(*(short*)addr); break; case T_USHORT: v = PyLong_FromUnsignedLong(*(unsigned short*)addr); break; case T_INT: v = PyInt_FromLong(*(int*)addr); break; case T_UINT: v = PyLong_FromUnsignedLong(*(unsigned int*)addr); break; case T_LONG: v = PyInt_FromLong(*(long*)addr); break; case T_ULONG: v = PyLong_FromUnsignedLong(*(unsigned long*)addr); break; case T_PYSSIZET: v = PyInt_FromSsize_t(*(Py_ssize_t*)addr); break; case T_FLOAT: v = PyFloat_FromDouble((double)*(float*)addr); break; case T_DOUBLE: v = PyFloat_FromDouble(*(double*)addr); break; case T_STRING: if (*(char**)addr == NULL) { Py_INCREF(Py_None); v = Py_None; } else v = PyString_FromString(*(char**)addr); break; case T_STRING_INPLACE: v = PyString_FromString((char*)addr); break; case T_CHAR: v = PyString_FromStringAndSize((char*)addr, 1); break; case T_OBJECT: v = *(PyObject **)addr; if (v == NULL) v = Py_None; Py_INCREF(v); break; case T_OBJECT_EX: /* slots對應的member->type是T_OBJECT_EX */ v = *(PyObject **)addr; if (v == NULL) PyErr_SetString(PyExc_AttributeError, l->name); Py_XINCREF(v); break; #ifdef HAVE_LONG_LONG case T_LONGLONG: v = PyLong_FromLongLong(*(PY_LONG_LONG *)addr); break; case T_ULONGLONG: v = PyLong_FromUnsignedLongLong(*(unsigned PY_LONG_LONG *)addr); break; #endif /* HAVE_LONG_LONG */ default: PyErr_SetString(PyExc_SystemError, "bad memberdescr type"); v = NULL; } return v; }
終於都看到了,根據member所記錄的偏移值和類型,訪問屬性內存的代碼了!
python slots源碼分析