Python 源码剖析LIST对象
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python 源码剖析LIST对象相关的知识,希望对你有一定的参考价值。
1、PyListObject对象
2、PyListObject的创建与维护
3、PyListObject 对象缓冲池
4、Hack PyListObject
1、PyListObject对象
PyListObject 对象是变长对象,而且还是一个可变对象:
[listobject.h] typedef struct { PyObject_VAR_HEAD /* Vector of pointers to list elements. list[0] is ob_item[0], etc. */ PyObject **ob_item; int allocated; } PyListObject;
PyObject_VAR_HEAD 中有一个ob_size和allocated,allocated 指申请了内存的大小,ob_size指使用内存的大小,0<=ob_size<=allcated。
2、PyListObject的创建与维护
2.1、创建
唯一创建方法PyList_New:
listobject.c] PyObject* PyList_New(int size) { PyListObject *op; size_t nbytes; nbytes = size * sizeof(PyObject *); /* Check for overflow */ if (nbytes / sizeof(PyObject *) != (size_t)size) return PyErr_NoMemory(); //为PyListObject申请空间 if (num_free_lists) { //使用缓冲池 num_free_lists--; op = free_lists[num_free_lists]; _Py_NewReference((PyObject *)op); } else { //缓冲池中没有可用的对象,创建对象 op = PyObject_GC_New(PyListObject, &PyList_Type); } //为PyListObject对象中维护的元素列表申请空间 if (size <= 0) op->ob_item = NULL; else { op->ob_item = (PyObject **) PyMem_MALLOC(nbytes); memset(op->ob_item, 0, nbytes); } op->ob_size = size; op->allocated = size; _PyObject_GC_TRACK(op); return (PyObject *) op; }
先进行检查,再判断缓冲池是否可用,不可则malloc在堆上新建PyListObject。PyListObject缓冲池为free_lists:
/* Empty list reuse scheme to save calls to malloc and free */ #define MAXFREELISTS 80 static PyListObject *free_lists[MAXFREELISTS]; static int num_free_lists = 0;
放元素到List指定位置:
[listobject.c] int PyList_SetItem(register PyObject *op, register int i, register PyObject *newitem) { register PyObject *olditem; register PyObject **p; if (!PyList_Check(op)) { …… } if (i < 0 || i >= ((PyListObject *)op) -> ob_size) { Py_XDECREF(newitem); PyErr_SetString(PyExc_IndexError, "list assignment index out of range"); return -1; } p = ((PyListObject *)op) -> ob_item + i; olditem = *p; *p = newitem; Py_XDECREF(olditem); return 0; }
2.2、添加
插入元素:
[listobject.c] int PyList_Insert(PyObject *op, int where, PyObject *newitem) { ......//类型检查 return ins1((PyListObject *)op, where, newitem); }
static int ins1(PyListObject *self, int where, PyObject *v) { int i, n = self->ob_size; PyObject **items; ...... if (list_resize(self, n+1) == -1) return -1; if (where < 0) { where += n; if (where < 0) where = 0; } if (where > n) where = n; items = self->ob_item; for (i = n; --i >= where; ) items[i+1] = items[i]; Py_INCREF(v); items[where] = v; return 0; }
看看list_resize:
listobject.c] static int list_resize(PyListObject *self, int newsize) { PyObject **items; size_t new_allocated; int allocated = self->allocated; /* Bypass realloc() when a previous overallocation is large enough to accommodate the newsize. If the newsize falls lower than half the allocated size, then proceed with the realloc() to shrink the list. */ if (allocated >= newsize && newsize >= (allocated >> 1)) { assert(self->ob_item != NULL || newsize == 0); self->ob_size = newsize; return 0; } /* This over-allocates proportional to the list size, making room * for additional growth. The over-allocation is mild, but is * enough to give linear-time amortized behavior over a long * sequence of appends() in the presence of a poorly-performing * system realloc(). * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... */ new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6) + newsize; if (newsize == 0) new_allocated = 0; items = self->ob_item; if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *))) PyMem_RESIZE(items, PyObject *, new_allocated); else items = NULL; if (items == NULL) { PyErr_NoMemory(); return -1; } self->ob_item = items; self->ob_size = newsize; self->allocated = new_allocated; return 0; }
插入的时候,Python分四种情况处理:
1. newsize > ob_size && newsize < allocated :简单调整ob_size值。
2. newsize < ob_size && newsize > allocated/2 :简单调整ob_size值。
3. newsize < ob_size && newsize < allocated/2 :调用realloc,重新分配空间。
4. newsize > ob_size && newsize > allocated :调用realloc,重新分配空间。
调整完空间后开始插入元素,其中体现负值索引的特性:
static int ins1(PyListObject *self, int where, PyObject *v) { ...... if (where < 0) { where += n; if (where < 0) where = 0; } if (where > n) where = n; items = self->ob_item; for (i = n; --i >= where; ) items[i+1] = items[i]; Py_INCREF(v); items[where] = v; return 0; }
PyListObject 与 C++中的vector相似。
再看看list中的append:
[listobject.c] int PyList_Append(PyObject *op, PyObject *newitem) { if (PyList_Check(op) && (newitem != NULL)) return app1((PyListObject *)op, newitem); PyErr_BadInternalCall(); return -1; } static PyObject* listappend(PyListObject *self, PyObject *v) { if (app1(self, v) == 0) Py_RETURN_NONE; return NULL; } static int app1(PyListObject *self, PyObject *v) { int n = PyList_GET_SIZE(self); ...... if (list_resize(self, n+1) == -1) return -1; Py_INCREF(v); PyList_SET_ITEM(self, n, v); return 0; }
添加的元素添加在ob_size位置,而不是allocated位置上。
2.3、删除
PyListObject的删除remove:
[listobject.c] static PyObject * listremove(PyListObject *self, PyObject *v) { int i; for (i = 0; i < self->ob_size; i++) { int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); if (cmp > 0) { if (list_ass_slice(self, i, i+1,(PyObject *)NULL) == 0) Py_RETURN_NONE; return NULL; } else if (cmp < 0) return NULL; } PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); return NULL; }
先用PyObject_RichCompareBool匹配元素是否在list上,在的话用list_ass_slice 删除:
[listobject.c] static int list_ass_slice(PyListObject *a, int ilow, int ihigh, PyObject *v) { PyObject *recycle_on_stack[8]; PyObject **recycle = recycle_on_stack; /* will allocate more if needed */ PyObject **item; int n; /* # of elements in replacement list */ int norig; /* # of elements in list getting replaced */ int d; /* Change in size */ #define b ((PyListObject *)v) if (v == NULL) n = 0; else { …… } norig = ihigh - ilow; d = n - norig; item = a->ob_item; //[1] s = norig * sizeof(PyObject *); if (s > sizeof(recycle_on_stack)) { recycle = (PyObject **)PyMem_MALLOC(s); if (recycle == NULL) { PyErr_NoMemory(); goto Error; } } memcpy(recycle, &item[ilow], s); //[2] if (d < 0) { /* Delete -d items */ memmove(&item[ihigh+d], &item[ihigh], (a->ob_size - ihigh)*sizeof(PyObject *)); list_resize(a, a->ob_size + d); item = a->ob_item; } …… //[3] for (k = norig - 1; k >= 0; --k) Py_XDECREF(recycle[k]); #undef b }
当v为NULL时执行删除动作,删除个数为ihigh-ilow=1,最后通过memove执行删除元素。PyListObject删除元素时会引起内存搬移动作。
list_ass_slice不仅仅是用做删除元素,它还可以进行插入元素的动作:
a[ilow:ihigh] = v if v != NULL.
del a[ilow:ihigh] if v == NULL.
3、PyListObject 对象缓冲池
刚刚提到的free_lists是在PyListObject销毁时产生的:
[listobject.c] static void list_dealloc(PyListObject *op) { int i; PyObject_GC_UnTrack(op); Py_TRASHCAN_SAFE_BEGIN(op) if (op->ob_item != NULL) { /* Do it backwards, for Christian Tismer. There‘s a simple test case where somehow this reduces thrashing when a *very* large list is created and immediately deleted. */ i = op->ob_size; while (--i >= 0) { Py_XDECREF(op->ob_item[i]); } PyMem_FREE(op->ob_item); } if (num_free_lists < MAXFREELISTS && PyList_CheckExact(op)) free_lists[num_free_lists++] = op; else op->ob_type->tp_free((PyObject *)op); Py_TRASHCAN_SAFE_END(op) }
在销毁PyListObject时,先减少list中的每个元素的引用计数,然后判断free_lists是否满了,没满就加上要删除的PyListObject,而下次创建PyListObject时,会优先从free_lists获取内存。而删除对象原来拥有的PyObject*列表,会因引用计数减少各归各处。
4、Hack PyListObject
可在list_print中添加:
printf("\nallocated=%d, ob_size=%d\n", op->allocated, op->ob_size);
观察PyListObject对内存的管理。
也可打印num_free_lists观察增删元素时对num_free_lists影响。
以上是关于Python 源码剖析LIST对象的主要内容,如果未能解决你的问题,请参考以下文章
《python解释器源码剖析》第3章--python中的字符串对象