crush 核心函数 crush_do_rule
Posted Jason__Zhou
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了crush 核心函数 crush_do_rule相关的知识,希望对你有一定的参考价值。
crush 核心函数
crush_do_rule
位置crush/mapper.c
重要过程
- crush_do_rule: do crushrules iteratively
- crush_choose_firstn: choose buckets or devices of specified type recursively
- crush_bucket_choose: directly choose a son of the input bucket
步骤操作标志
/* step op codes */
enum
CRUSH_RULE_NOOP = 0,
CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
/* arg2 = type */
CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
CRUSH_RULE_EMIT = 4, /* no args */
CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,
CRUSH_RULE_CHOOSELEAF_INDEP = 7,
CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */
CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
;
最核心的函数
/**
* crush_do_rule - calculate a mapping with the given input and rule
* @map: the crush_map
* @ruleno: the rule id
* @x: hash input
* @result: pointer to result vector
* @result_max: maximum result size
* @weight: weight vector (for map leaves)
* @weight_max: size of weight vector
* @scratch: scratch vector for private use; must be >= 3 * result_max
*/
int crush_do_rule(const struct crush_map *map,
int ruleno, int x, int *result, int result_max,
const __u32 *weight, int weight_max,
int *scratch)
int result_len;
int *a = scratch;
int *b = scratch + result_max;
int *c = scratch + result_max*2;
int recurse_to_leaf;
int *w;
int wsize = 0;
int *o;
int osize;
int *tmp;
struct crush_rule *rule;
__u32 step;
int i, j;
int numrep;
int out_size;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
*/
int choose_tries = map->choose_total_tries + 1;
int choose_leaf_tries = 0;
/*
* the local tries values were counted as "retries", though,
* and need no adjustment
*/
int choose_local_retries = map->choose_local_tries;
int choose_local_fallback_retries = map->choose_local_fallback_tries;
int vary_r = map->chooseleaf_vary_r;
int stable = map->chooseleaf_stable;
if ((__u32)ruleno >= map->max_rules)
dprintk(" bad ruleno %d\\n", ruleno);
return 0;
rule = map->rules[ruleno];
result_len = 0;
w = a;
o = b;
for (step = 0; step < rule->len; step++)
int firstn = 0;
struct crush_rule_step *curstep = &rule->steps[step];
switch (curstep->op)
case CRUSH_RULE_TAKE:
if ((curstep->arg1 >= 0 &&
curstep->arg1 < map->max_devices) ||
(-1-curstep->arg1 >= 0 &&
-1-curstep->arg1 < map->max_buckets &&
map->buckets[-1-curstep->arg1]))
w[0] = curstep->arg1;
wsize = 1;
else
dprintk(" bad take value %d\\n", curstep->arg1);
break;
case CRUSH_RULE_SET_CHOOSE_TRIES:
if (curstep->arg1 > 0)
choose_tries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
if (curstep->arg1 > 0)
choose_leaf_tries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
if (curstep->arg1 >= 0)
choose_local_retries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
if (curstep->arg1 >= 0)
choose_local_fallback_retries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
if (curstep->arg1 >= 0)
vary_r = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
if (curstep->arg1 >= 0)
stable = curstep->arg1;
break;
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
case CRUSH_RULE_CHOOSE_FIRSTN:
firstn = 1;
/* fall through */
case CRUSH_RULE_CHOOSELEAF_INDEP:
case CRUSH_RULE_CHOOSE_INDEP:
if (wsize == 0)
break;
recurse_to_leaf =
curstep->op ==
CRUSH_RULE_CHOOSELEAF_FIRSTN ||
curstep->op ==
CRUSH_RULE_CHOOSELEAF_INDEP;
/* reset output */
osize = 0;
for (i = 0; i < wsize; i++)
int bno;
/*
* see CRUSH_N, CRUSH_N_MINUS macros.
* basically, numrep <= 0 means relative to
* the provided result_max
*/
numrep = curstep->arg1;
if (numrep <= 0)
numrep += result_max;
if (numrep <= 0)
continue;
j = 0;
/* make sure bucket id is valid */
bno = -1 - w[i];
if (bno < 0 || bno >= map->max_buckets)
// w[i] is probably CRUSH_ITEM_NONE
dprintk(" bad w[i] %d\\n", w[i]);
continue;
if (firstn)
int recurse_tries;
if (choose_leaf_tries)
recurse_tries =
choose_leaf_tries;
else if (map->chooseleaf_descend_once)
recurse_tries = 1;
else
recurse_tries = choose_tries;
osize += crush_choose_firstn(
map,
map->buckets[bno],
weight, weight_max,
x, numrep,
curstep->arg2,
o+osize, j,
result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
choose_local_fallback_retries,
recurse_to_leaf,
vary_r,
stable,
c+osize,
0);
else
out_size = ((numrep < (result_max-osize)) ?
numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[bno],
weight, weight_max,
x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
choose_leaf_tries ?
choose_leaf_tries : 1,
recurse_to_leaf,
c+osize,
0);
osize += out_size;
if (recurse_to_leaf)
/* copy final _leaf_ values to output set */
memcpy(o, c, osize*sizeof(*o));
/* swap o and w arrays */
tmp = o;
o = w;
w = tmp;
wsize = osize;
break;
case CRUSH_RULE_EMIT:
for (i = 0; i < wsize && result_len < result_max; i++)
result[result_len] = w[i];
result_len++;
wsize = 0;
break;
default:
dprintk(" unknown op %d at step %d\\n",
curstep->op, step);
break;
return result_len;
crush_choose_firstn - choose numrep distinct items of given type
/**
* crush_choose_firstn - choose numrep distinct items of given type
* @map: the crush_map
* @bucket: the bucket we are choose an item from
* @x: crush input value
* @numrep: the number of items to choose
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
* @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
* @local_fallback_retries: localized fallback retries
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
* @stable: stable mode starts rep=0 in the recursive call for all replicas
* @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf)
* @parent_r: r value passed from the parent
*/
static int crush_choose_firstn(const struct crush_map *map,
struct crush_bucket *bucket,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
unsigned int local_fallback_retries,
int recurse_to_leaf,
unsigned int vary_r,
unsigned int stable,
int *out2,
int parent_r)
int rep;
unsigned int ftotal, flocal;
int retry_descent, retry_bucket, skip_rep;
struct crush_bucket *in = bucket;
int r;
int i;
int item = 0;
int itemtype;
int collide, reject;
int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d \\
recurse_tries %d local_retries %d local_fallback_retries %d \\
parent_r %d stable %d\\n",
recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r, stable);
for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++)
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
do
retry_descent = 0;
in = bucket; /* initial bucket */
/* choose through intervening buckets */
flocal = 0;
do
collide = 0;
retry_bucket = 0;
r = rep + parent_r;
/* r' = r + f_total */
r += ftotal;
/* bucket choose */
if (in->size == 0)
reject = 1;
goto reject;
if (local_fallback_retries > 0 &&
flocal >= (in->size>>1) &&
flocal > local_fallback_retries)
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
if (item >= map->max_devices)
dprintk(" bad item %d\\n", item);
skip_rep = 1;
break;
/* desired type? */
if (item < 0)
itemtype = map->buckets[-1-item]->type;
else
itemtype = 0;
dprintk(" item %d type %d\\n", item, itemtype);
/* keep going? */
if (itemtype != type)
if (item >= 0 ||
(-1-item) >= map->max_buckets)
dprintk(" bad item type %d\\n", type);
skip_rep = 1;
break;
in = map->buckets[-1-item];
retry_bucket = 1;
continue;
/* collision? */
for (i = 0; i < outpos; i++)
if (out[i] == item)
collide = 1;
break;
reject = 0;
if (!collide && recurse_to_leaf)
if (item < 0)
int sub_r;
if (vary_r)
sub_r = r >> (vary_r-1);
else
sub_r = 0;
if (crush_choose_firstn(map,
map->buckets[-1-item],
weight, weight_max,
x, stable ? 1 : outpos+1, 0,
out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
0,
vary_r,
stable,
NULL,
sub_r) <= outpos)
/* didn't get leaf */
reject = 1;
else
/* we already have a leaf! */
out2[outpos] = item;
if (!reject)
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
weight_max,
item, x);
else
reject = 0;
reject:
if (reject || collide)
ftotal++;
flocal++;
if (collide && flocal <= local_retries)
/* retry locally a few times */
retry_bucket = 1;
else if (local_fallback_retries > 0 &&
flocal <= in->size + local_fallback_retries)
/* exhaustive bucket search */
retry_bucket = 1;
else if (ftotal < tries)
/* then retry descent */
retry_descent = 1;
else
/* else give up */
skip_rep = 1;
dprintk(" reject %d collide %d "
"ftotal %u flocal %u\\n",
reject, collide, ftotal,
flocal);
while (retry_bucket);
while (retry_descent);
if (skip_rep)
dprintk("skip rep\\n");
continue;
dprintk("CHOOSE got %d\\n", item);
out[outpos] = item;
outpos++;
count--;
#ifndef __KERNEL__
if (map->choose_tries && ftotal <= map->choose_total_tries)
map->choose_tries[ftotal]++;
#endif
dprintk("CHOOSE returns %d\\n", outpos);
return outpos;
crush_bucket_choose
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
dprintk(" crush_bucket_choose %d x=%d r=%d\\n", in->id, x, r);
BUG_ON(in->size == 0);
switch (in->alg)
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
x, r);
case CRUSH_BUCKET_LIST:
return bucket_list_choose((struct crush_bucket_list *)in,
x, r);
case CRUSH_BUCKET_TREE:
return bucket_tree_choose((struct crush_bucket_tree *)in,
x, r);
case CRUSH_BUCKET_STRAW:
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
case CRUSH_BUCKET_STRAW2:
return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
x, r);
default:
dprintk("unknown bucket %d alg %d\\n", in->id, in->alg);
return in->items[0];
bucket_straw_choose
static int bucket_straw_choose(struct crush_bucket_straw *bucket,
int x, int r)
__u32 i;
int high = 0;
__u64 high_draw = 0;
__u64 draw;
for (i = 0; i < bucket->h.size; i++)
draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
draw &= 0xffff;
draw *= bucket->straws[i];
if (i == 0 || draw > high_draw)
high = i;
high_draw = draw;
return bucket->h.items[high];
以上是关于crush 核心函数 crush_do_rule的主要内容,如果未能解决你的问题,请参考以下文章
ceph Luminous crush device class(crush 设备分类)
CEPH CRUSH 算法源码分析 原文CEPH CRUSH algorithm source code analysis