Python 源码剖析LIST对象

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python 源码剖析LIST对象相关的知识,希望对你有一定的参考价值。

1、PyListObject对象

2、PyListObject的创建与维护

3、PyListObject 对象缓冲池

4、Hack PyListObject


1、PyListObject对象

PyListObject 对象是变长对象,而且还是一个可变对象:

[listobject.h] 

typedef struct {

    PyObject_VAR_HEAD

    /* Vector of pointers to list elements.  list[0] is ob_item[0], etc. */

    PyObject **ob_item;

    int allocated;

} PyListObject;
PyObject_VAR_HEAD 中有一个ob_size和allocated,allocated 指申请了内存的大小,ob_size指使用内存的大小,0<=ob_size<=allcated。


 

2、PyListObject的创建与维护

2.1、创建

唯一创建方法PyList_New:

listobject.c] 

PyObject* PyList_New(int size)
{
    PyListObject *op;
    size_t nbytes;
 
    nbytes = size * sizeof(PyObject *);
    /* Check for overflow */
    if (nbytes / sizeof(PyObject *) != (size_t)size)
        return PyErr_NoMemory();
 
    //为PyListObject申请空间
    if (num_free_lists) {
        //使用缓冲池
        num_free_lists--;
        op = free_lists[num_free_lists];
        _Py_NewReference((PyObject *)op);
   } else {
        //缓冲池中没有可用的对象,创建对象
        op = PyObject_GC_New(PyListObject, &PyList_Type);
    }
    //为PyListObject对象中维护的元素列表申请空间
    if (size <= 0)
        op->ob_item = NULL;
    else {
        op->ob_item = (PyObject **) PyMem_MALLOC(nbytes);
        memset(op->ob_item, 0, nbytes);
    }
    op->ob_size = size;
    op->allocated = size;
    _PyObject_GC_TRACK(op);
    return (PyObject *) op;
}

先进行检查,再判断缓冲池是否可用,不可则malloc在堆上新建PyListObject。PyListObject缓冲池为free_lists:

/* Empty list reuse scheme to save calls to malloc and free */ 

#define MAXFREELISTS 80 

static PyListObject *free_lists[MAXFREELISTS]; 

static int num_free_lists = 0; 

 

放元素到List指定位置:

[listobject.c] 

int PyList_SetItem(register PyObject *op, register int i, register PyObject   *newitem)
{
    register PyObject *olditem;
    register PyObject **p;
    if (!PyList_Check(op)) {
        ……
    }
    if (i < 0 || i >= ((PyListObject *)op) -> ob_size) {
        Py_XDECREF(newitem);
        PyErr_SetString(PyExc_IndexError,
                "list assignment index out of range");
        return -1;
    }
    p = ((PyListObject *)op) -> ob_item + i;
    olditem = *p;
    *p = newitem;
    Py_XDECREF(olditem);
    return 0;
}

 


2.2、添加

插入元素:

[listobject.c] 

int PyList_Insert(PyObject *op, int where, PyObject *newitem)
{
    ......//类型检查
    return ins1((PyListObject *)op, where, newitem);
}
static int ins1(PyListObject *self, int where, PyObject *v)
{
    int i, n = self->ob_size;
    PyObject **items;
    ......
    if (list_resize(self, n+1) == -1)
        return -1;

    if (where < 0) {
        where += n;
        if (where < 0)
            where = 0;
    }
    if (where > n)
        where = n;
    items = self->ob_item;
    for (i = n; --i >= where; )
        items[i+1] = items[i];
    Py_INCREF(v);
    items[where] = v;
    return 0;
}

看看list_resize:

listobject.c]
static int list_resize(PyListObject *self, int newsize)
{
    PyObject **items;
    size_t new_allocated;
    int allocated = self->allocated;
 

    /* Bypass realloc() when a previous overallocation is large enough
       to accommodate the newsize.  If the newsize falls lower than half
       the allocated size, then proceed with the realloc() to shrink the list.
    */
    if (allocated >= newsize && newsize >= (allocated >> 1)) {
        assert(self->ob_item != NULL || newsize == 0);
        self->ob_size = newsize;
        return 0;
    }
 

    /* This over-allocates proportional to the list size, making room
     * for additional growth.  The over-allocation is mild, but is
     * enough to give linear-time amortized behavior over a long
     * sequence of appends() in the presence of a poorly-performing
     * system realloc().
     * The growth pattern is:  0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
     */
    new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6) + newsize;
    if (newsize == 0)
        new_allocated = 0;
    items = self->ob_item;
    if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *)))
        PyMem_RESIZE(items, PyObject *, new_allocated);
    else
        items = NULL;
    if (items == NULL) {
        PyErr_NoMemory();
        return -1;
    }
    self->ob_item = items;
    self->ob_size = newsize;
    self->allocated = new_allocated;
    return 0;
}

插入的时候,Python分四种情况处理:

1.      newsize > ob_size && newsize < allocated :简单调整ob_size值。

2.      newsize < ob_size && newsize > allocated/2 :简单调整ob_size值。

3.      newsize < ob_size && newsize < allocated/2 :调用realloc,重新分配空间。

4.      newsize > ob_size && newsize > allocated :调用realloc,重新分配空间。

调整完空间后开始插入元素,其中体现负值索引的特性:

static int ins1(PyListObject *self, int where, PyObject *v)
{
    ......
    if (where < 0) {
        where += n;
        if (where < 0)
            where = 0;
    }
    if (where > n)
        where = n;
    items = self->ob_item;
    for (i = n; --i >= where; )
        items[i+1] = items[i];
    Py_INCREF(v);
    items[where] = v;
    return 0;
}

PyListObject 与 C++中的vector相似。

再看看list中的append:

[listobject.c]
int PyList_Append(PyObject *op, PyObject *newitem)
{
    if (PyList_Check(op) && (newitem != NULL))
        return app1((PyListObject *)op, newitem);
    PyErr_BadInternalCall();
    return -1;
}
 


static PyObject* listappend(PyListObject *self, PyObject *v)
{
    if (app1(self, v) == 0)
        Py_RETURN_NONE;
    return NULL;
}
 


static int app1(PyListObject *self, PyObject *v)
{
    int n = PyList_GET_SIZE(self);
......
    if (list_resize(self, n+1) == -1)
        return -1;
 

    Py_INCREF(v);
    PyList_SET_ITEM(self, n, v);
    return 0;
} 

添加的元素添加在ob_size位置,而不是allocated位置上。

 

2.3、删除

PyListObject的删除remove:

[listobject.c] 

static PyObject * listremove(PyListObject *self, PyObject *v)
{
    int i;
    for (i = 0; i < self->ob_size; i++) {
        int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ);
        if (cmp > 0) {
            if (list_ass_slice(self, i, i+1,(PyObject *)NULL) == 0)
                Py_RETURN_NONE;
            return NULL;
        }
        else if (cmp < 0)
            return NULL;
    }
    PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
    return NULL;
}

先用PyObject_RichCompareBool匹配元素是否在list上,在的话用list_ass_slice 删除:

[listobject.c]
static int list_ass_slice(PyListObject *a, int ilow, int ihigh, PyObject *v)
{
   PyObject *recycle_on_stack[8];
    PyObject **recycle = recycle_on_stack; /* will allocate more if needed */ 

    PyObject **item;
    int n; /* # of elements in replacement list */
    int norig; /* # of elements in list getting replaced */
    int d; /* Change in size */
#define b ((PyListObject *)v)
    if (v == NULL)
        n = 0;
else {
……
}
 

    norig = ihigh - ilow;
    d = n - norig;
item = a->ob_item;
    //[1] 

    s = norig * sizeof(PyObject *); 

    if (s > sizeof(recycle_on_stack)) { 

        recycle = (PyObject **)PyMem_MALLOC(s); 

        if (recycle == NULL) { 

            PyErr_NoMemory(); 

            goto Error; 

        } 

    } 

    memcpy(recycle, &item[ilow], s); 

 

//[2] 

    if (d < 0) { /* Delete -d items */
        memmove(&item[ihigh+d], &item[ihigh],
(a->ob_size - ihigh)*sizeof(PyObject *));
        list_resize(a, a->ob_size + d);
        item = a->ob_item;
    }
……
//[3] 

for (k = norig - 1; k >= 0; --k) 

        Py_XDECREF(recycle[k]);
#undef b
}

当v为NULL时执行删除动作,删除个数为ihigh-ilow=1,最后通过memove执行删除元素。PyListObject删除元素时会引起内存搬移动作。

list_ass_slice不仅仅是用做删除元素,它还可以进行插入元素的动作:

a[ilow:ihigh] = v if v != NULL.

del a[ilow:ihigh] if v == NULL. 

 


 

3、PyListObject 对象缓冲池

刚刚提到的free_lists是在PyListObject销毁时产生的:

[listobject.c] 

static void list_dealloc(PyListObject *op)

{

    int i;

    PyObject_GC_UnTrack(op);

    Py_TRASHCAN_SAFE_BEGIN(op)

    if (op->ob_item != NULL) {

        /* Do it backwards, for Christian Tismer.

           There‘s a simple test case where somehow this reduces

           thrashing when a *very* large list is created and

           immediately deleted. */

        i = op->ob_size;

        while (--i >= 0) {

            Py_XDECREF(op->ob_item[i]);

        }

        PyMem_FREE(op->ob_item);

    }

    if (num_free_lists < MAXFREELISTS && PyList_CheckExact(op))

        free_lists[num_free_lists++] = op;

    else

        op->ob_type->tp_free((PyObject *)op);

    Py_TRASHCAN_SAFE_END(op)

}

在销毁PyListObject时,先减少list中的每个元素的引用计数,然后判断free_lists是否满了,没满就加上要删除的PyListObject,而下次创建PyListObject时,会优先从free_lists获取内存。而删除对象原来拥有的PyObject*列表,会因引用计数减少各归各处。

 


 

4、Hack PyListObject

可在list_print中添加:

printf("\nallocated=%d, ob_size=%d\n", op->allocated, op->ob_size); 

观察PyListObject对内存的管理。

也可打印num_free_lists观察增删元素时对num_free_lists影响。

 





以上是关于Python 源码剖析LIST对象的主要内容,如果未能解决你的问题,请参考以下文章

《Python源码剖析》

C++源码剖析——list

python源码书籍

《python解释器源码剖析》第3章--python中的字符串对象

《python解释器源码剖析》第13章--python虚拟机中的类机制

Redis源码剖析--对象object