SLUB结构体创建及创建slab分析
Posted Loopers
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了SLUB结构体创建及创建slab分析相关的知识,希望对你有一定的参考价值。
在上一篇文章中我们通过一个简单的例子大概描述了如何创建SLUB缓存,如何分配一个object。本文详细描述下涉及的结构体,从结构体的描述中就可以大概理解slub的工作原理了。
首先就是kmem_cache结构体
/*
* Slab cache management.
*/
struct kmem_cache
struct kmem_cache_cpu __percpu *cpu_slab;
/* Used for retriving partial slabs etc */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* The size of an object including meta data */
unsigned int object_size;/* The size of an object without meta data */
unsigned int offset; /* Free pointer offset. */
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects max;
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *);
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
struct kmem_cache_node *node[MAX_NUMNODES];
;
- cpu_slab: 一个per_cpu变量,对于每个CPU都有一个本地的缓冲池,当分配Object的时候优先从per-cpu中分配
- flags: 分配slab时候的一些掩码
- min_partial: kmem_cache_node节点中partital链表中最小的object个数
- size: 此size是元数据对齐后的大小
- object_size: 是调用kmem_cache_create传递进来的size,所以说object_size <= size
- offset: 一个slab中很多object,那各个Object之间是如何联系的,就是通过offset变量+一个object的大小就可以获取下一个Object的地址
- oo: 低16位代表一个slab中object的个数,高16位代表一个slab需要几个Page,order值
- max: 等于00
- ctor: 创建slab缓冲池的构造函数
- inuse: 刚开始创建的时候等于object的个数,代表已经使用的object个数
- align: 对齐使用
- name: slab缓冲区的名字
- list: 系统中所有slab的链表
- node: slab节点
struct kmem_cache_cpu
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
;
- freelist: 指向下一个可用的object
- tid: 一个唯一的传输id
- page: slab所属的page,此slab就是从这个page中分配出来的
/*
* The slab lists for all objects.
*/
struct kmem_cache_node
spinlock_t list_lock;
#ifdef CONFIG_SLUB
unsigned long nr_partial;
struct list_head partial;
#endif
;
- nr_partial: node中slab的个数
- partital: 部分object的链表
看完了上面的结构体是不是感觉还是很晕,怎么办? 我们还是通过上节(SLUB的引入及举例说明)的例子来分析各个结构体是如何联系的。
slub_test = kmem_cache_create("slub_test", sizeof(struct student), 0, 0, NULL);
if(slub_test != NULL)
printk("slub_test create success!\\n");
当调用kmem_cache_create的时候,代码流程是:
struct kmem_cache *
kmem_cache_create_usercopy(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *))
struct kmem_cache *s = NULL;
const char *cache_name;
/* Fail closed on bad usersize of useroffset values. */
if (WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
if (!usersize)
s = __kmem_cache_alias(name, size, align, flags, ctor);
cache_name = kstrdup_const(name, GFP_KERNEL);
s = create_cache(cache_name, size,
calculate_alignment(flags, align, size),
flags, useroffset, usersize, ctor, NULL, NULL);
return s;
- 首先上来检查下传递的参数,名字都是否正确。在这里usersize=useroffset=0的
- 调用__kmem_cache_alias函数去检查传递的大小和一些flag是否可以和系统中已经创建的slab匹配上,如果匹配上则就不用重新申请了,直接使用别名就行,相当于链接过去,只是你看到的名字依然是slub_test,但是object是从别人那里拿到的
- 如果没有合适的,则重新调用create_cache重新创建一个slab
static struct kmem_cache *create_cache(const char *name, unsigned int object_size, unsigned int align, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *),
struct mem_cgroup *memcg, struct kmem_cache *root_cache)
struct kmem_cache *s;
int err;
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
if (!s)
goto out;
s->name = name;
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
s->useroffset = useroffset;
s->usersize = usersize;
err = __kmem_cache_create(s, flags);
if (err)
goto out_free_cache;
s->refcount = 1;
list_add(&s->list, &slab_caches);
memcg_link_cache(s);
- 调用kmem_cache_zalloc去分配一个kmem_cache结构,此时分配是从名为“kmem_cache”中分配一个object,分配的object刚好就是我们我们需要的一个slab,则就不进去看kmem_cache_zalloc的代码实现了,就理解现在已经有一个分配好的kmem_cache了
- 设置传递进来的各个参数
- 根据传递进来的参数,真正的去计算此slab需要多个page等
int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
int err;
err = kmem_cache_open(s, flags);
if (err)
return err;
/* Mutex is not taken during early boot */
if (slab_state <= UP)
return 0;
memcg_propagate_slab_attrs(s);
err = sysfs_slab_add(s);
if (err)
__kmem_cache_release(s);
return err;
- 调用kmem_cache_open函数去设置我们的slab的一次参数
- 如果slab的状态不是UP则退出。
- 将slab信息添加到sys节点下,这样/sys/kernel/slab下都会有每一个注册好的slab
再解析分析之前,我们先看几个小函数
static inline unsigned int order_objects(unsigned int order, unsigned int size)
return ((unsigned int)PAGE_SIZE << order) / size;
根据你的order和size,看需要多个object的。比如order为0,size等于8,PAGE_SIZE=4K。则值就是object的大小为8,一个page中有多少个object. 4096/8=512个object
static inline __attribute_const__ int get_order(unsigned long size)
if (__builtin_constant_p(size))
if (!size)
return BITS_PER_LONG - PAGE_SHIFT;
if (size < (1UL << PAGE_SHIFT))
return 0;
return ilog2((size) - 1) - PAGE_SHIFT + 1;
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
return fls(size);
#else
return fls64(size);
#endif
根据传递的size,计算出需要的order是多少。比如我们传递进来的值是128,则order等于0。大家可以算算大于4K的时候,怎么算
- 当size小于1<<PAGE_SHIFT的时候,order等于0,PAGE_SHIFT等于12, 1<<PAGE_SHIFT=4K
static inline unsigned int slab_order(unsigned int size,
unsigned int min_objects, unsigned int max_order,
unsigned int fract_leftover)
unsigned int min_order = slub_min_order;
unsigned int order;
if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
return get_order(size * MAX_OBJS_PER_PAGE) - 1;
for (order = max(min_order, (unsigned int)get_order(min_objects * size));
order <= max_order; order++)
unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
unsigned int rem;
rem = slab_size % size;
if (rem <= slab_size / fract_leftover)
break;
return order;
- 此函数的意思是根据slab object的大小计算出需要多少个order
- 看下参数
- size: object的size
- min_object: 系统可以分配的最新object数字,根据如下计算出来的
min_objects = 4 * (fls(nr_cpu_ids) + 1); //nr_cpu_id代表cpu的个数
比如当前CPU的个数是4,fls是获取最高bit位的位数,则fls(4)等3,则min_objects=4*(3+1)=16
-
- max_orders: 代表最大order,系统默认是3。也就是最大系统觉得分配8page就是比较昂贵的分配了。
- fract_leftover: 这是一个分数,会每次除以2来参见运算的。
这个函数的算法是:
- 最小order的object数大于MAX_OBJS_PER_PAGE(32767)的,就用get_order(size * MAX_OBJS_PER_PAGE)获取一个order,通常是不走到这里的
- 最小order,到最大order之间的遍历。假设这里最小order是0,最大是3。
- if (rem <= slab_size / fract_leftover) 这句话的意思是:如果一个slab中有剩余的空间超过slab大小的1/16则认为是浪费空间,则继续查找。
这些小函数看完,我们在看一个包含各个小函数的大函数
static inline int calculate_order(unsigned int size)
unsigned int order;
unsigned int min_objects;
unsigned int max_objects;
/*
* Attempt to find best configuration for a slab. This
* works by first attempting to generate a layout with
* the best configuration and backing off gradually.
*
* First we increase the acceptable waste in a slab. Then
* we reduce the minimum objects required in a slab.
*/
min_objects = slub_min_objects;
if (!min_objects)
min_objects = 4 * (fls(nr_cpu_ids) + 1); //nr_cpu_ids=4, 则min_objects=16个
max_objects = order_objects(slub_max_order, size); //2的3次方是昂贵的页,除size就是可以申请的最大的object
min_objects = min(min_objects, max_objects);
while (min_objects > 1)
unsigned int fraction;
fraction = 16;
while (fraction >= 4)
order = slab_order(size, min_objects,
slub_max_order, fraction);
if (order <= slub_max_order)
return order;
fraction /= 2;
min_objects--;
/*
* We were unable to place multiple objects in a slab. Now
* lets see if we can place a single object there.
*/
order = slab_order(size, 1, slub_max_order, 1);
if (order <= slub_max_order)
return order;
/*
* Doh this slab cannot be placed using slub_max_order.
*/
order = slab_order(size, 1, MAX_ORDER, 1);
if (order < MAX_ORDER)
return order;
return -ENOSYS;
- 根据我们的例子,min_object的大小为16. max_object=4096
- while循环中会通过运行找出合理的order,则这里我们的order=0
- 如果没合适的order,从slub_max_order中计算order
- 再从MAX_ORDER(11)中计算order
- 至此我们已经计算出适合我们objectsize的order了,此order的值为0
再看一个函数,会根据order的值计算kmem_cache的一些值得大小
static int calculate_sizes(struct kmem_cache *s, int forced_order)
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
unsigned int order;
/*
* Round up object size to the next word boundary. We can only
* place the free pointer at word boundaries and this determines
* the possible location of the free pointer.
*/
size = ALIGN(size, sizeof(void *));
/*
* SLUB stores one object immediately after another beginning from
* offset 0. In order to align the objects we have to simply size
* each object to conform to the alignment.
*/
size = ALIGN(size, s->align);
s->size = size;
if (forced_order >= 0)
order = forced_order;
else
order = calculate_order(size);
if ((int)order < 0)
return 0;
/*
* Determine the number of objects per slab
*/
s->oo = oo_make(order, size);
s->min = oo_make(get_order(size), size);
if (oo_objects(s->oo) > oo_objects(s->max))
s->max = s->oo;
return !!oo_objects(s->oo);
- 对齐计算,下一个object的大小是存在这个object的头部或者尾部或者随机
- 计算size对齐
- 此处forced_order=-1, 则通过calculate_order函数计算出size等于8的order等于0
- 如果order小于0,则返回
static inline struct kmem_cache_order_objects oo_make(unsigned int order,
unsigned int size)
struct kmem_cache_order_objects x =
(order << OO_SHIFT) + order_objects(order, size)
;
return x;
- 所以说oo的值等于order<<16+object的个数=0+512=512
- 则s->min=s->oo=s→max=512
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
if (!calculate_sizes(s, -1))
goto error;
/*
* The larger the object size is, the more pages we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2); //min_partial是node中最大的slab
set_cpu_partial(s); //pre cpu上最大的free_object的个数
if (!init_kmem_cache_nodes(s))
goto error;
if (alloc_kmem_cache_cpus(s))
return 0;
- ilog2的就是数学上的Log2(8)=3,则kmem_cache_node中partital链表的最新个数等于5; #define MIN_PARTIAL 5
- 设置per_cpu 上最大的object个数,这里设置为30. s->cpu_partial = 30;
- 分配kmem_cache_node结构
- 分配kmem_cache_cpu结构
创建完毕之后,就多出来一个名为slub_test的slab,此slab的object个数是512的,size是8。order等于0。此时只是建立好个slab缓冲区,里面还是没有object的。也就是freelist指向为NULL的。
创建完毕后,大家可以去/sys/kernel/slab/slub_test下看一些节点的信息:
/sys/kernel/slab/slub_test# ls
aliases destroy_by_rcu order slab_size
align free_calls partial slabs
alloc_calls hwcache_align poison slabs_cpu_partial
cache_dma min_partial reclaim_account store_user
cgroup object_size red_zone total_objects
cpu_partial objects reserved trace
cpu_slabs objects_partial sanity_checks validate
ctor objs_per_slab shrink
/sys/kernel/slab/slub_test #
以上是关于SLUB结构体创建及创建slab分析的主要内容,如果未能解决你的问题,请参考以下文章