Ceph Crush 演算法原始碼分析
簡介:
Ceph Crush演算法是Ceph分散式系統中用於資料分佈(定位)的核心演算法,其核心元件有crush rule、bucket algorithm。crush rule是可以自定義的選擇過程,bucket algorithm是從bucket選取item時使用的演算法,該演算法需要的主要引數有:placement seed(pgid)、crush map、副本數等。本文將簡要介紹Ceph Crush演算法的實現。
先來看一個crush map的簡單例項:
{
//devices
"devices": [
{
"id": 0,
"name" : "osd.0"
},
{
"id": 1,
"name": "osd.1"
},
...
{
"id": 9,
"name": "osd.9"
}
],
//type
"types": [
{
"type_id": 0,
"name": "osd"
},
{
"type_id" : 1,
"name": "host"
},
...
{
"type_id": 10,
"name": "root"
}
],
//buckets
"buckets": [
{
"id": -1,
"name": "default",
"type_id": 10,
"type_name": "root",
"weight" : 821160,
"alg": "straw",
"hash": "rjenkins1",
"items": [
{
"id": -2,
"weight": 142868,
"pos": 0
},
{
"id": -3,
"weight": 142868,
"pos": 1
},
{
"id": -8,
"weight": 178910,
"pos": 2
},
{
"id": -10,
"weight": 356514,
"pos": 3
}
]
},
{
"id": -2,
"name": "ceph-osd-240",
"type_id": 1,
"type_name": "host",
"weight": 142868,
"alg": "straw",
"hash": "rjenkins1",
"items": [
{
"id": 0,
"weight": 142868,
"pos": 0
}
]
},
{
"id": -3,
"name": "ceph-osd-241",
"type_id": 1,
"type_name": "host",
"weight": 142868,
"alg": "straw",
"hash": "rjenkins1",
"items": [
{
"id": 1,
"weight": 142868,
"pos": 0
}
]
},
{
"id": -8,
"name": "ceph-osd-66",
"type_id": 1,
"type_name": "host",
"weight": 178910,
"alg": "straw",
"hash": "rjenkins1",
"items": [
{
"id": 2,
"weight": 36044,
"pos": 0
},
{
"id": 3,
"weight": 32768,
"pos": 1
},
{
"id": 4,
"weight": 3276,
"pos": 2
},
{
"id": 5,
"weight": 34078,
"pos": 3
},
{
"id": 6,
"weight": 36044,
"pos": 4
},
{
"id": 7,
"weight": 36700,
"pos": 5
}
]
},
{
"id": -10,
"name": "ceph-osd-253",
"type_id": 1,
"type_name": "host",
"weight": 356514,
"alg": "straw",
"hash": "rjenkins1",
"items": [
{
"id": 8,
"weight": 178257,
"pos": 0
},
{
"id": 9,
"weight": 178257,
"pos": 1
}
]
}
],
//crush rule
"rules": [
{
"rule_id": 0,
"rule_name": "replicated_ruleset",
"ruleset": 0,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
],
//相關的可調控的配置引數
"tunables": {
"choose_local_tries": 0,
"choose_local_fallback_tries": 0,
"choose_total_tries": 50,
"chooseleaf_descend_once": 1,
"chooseleaf_vary_r": 0,
"straw_calc_version": 1,
"allowed_bucket_algs": 22,
"profile": "unknown",
"optimal_tunables": 0,
"legacy_tunables": 0,
"require_feature_tunables": 1,
"require_feature_tunables2": 1,
"require_feature_tunables3": 0,
"has_v2_rules": 0,
"has_v3_rules": 0,
"has_v4_buckets": 0
}
}
Crush演算法實現中構造的主要資料結構:
crush rule中的step op codes
/* step op codes */
enum {
CRUSH_RULE_NOOP = 0,
CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
¦ ¦ /* arg2 = type */
CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
CRUSH_RULE_EMIT = 4, /* no args */
CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,
CRUSH_RULE_CHOOSELEAF_INDEP = 7,
CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */
CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
};
crush_map結構體
/*
* CRUSH map includes all buckets, rules, etc.
*/
struct crush_map {
struct crush_bucket **buckets;
struct crush_rule **rules;
__s32 max_buckets;
__u32 max_rules;
__s32 max_devices;
/* choose local retries before re-descent */
__u32 choose_local_tries;
/* choose local attempts using a fallback permutation before
¦* re-descent */
__u32 choose_local_fallback_tries;
/* choose attempts before giving up */
__u32 choose_total_tries;
/* attempt chooseleaf inner descent once for firstn mode; on
¦* reject retry outer descent. Note that this does *not*
¦* apply to a collision: in that case we will retry as we used
¦* to. */
__u32 chooseleaf_descend_once;
/* if non-zero, feed r into chooseleaf, bit-shifted right by (r-1)
¦* bits. a value of 1 is best for new clusters. for legacy clusters
¦* that want to limit reshuffling, a value of 3 or 4 will make the
¦* mappings line up a bit better with previous mappings. */
__u8 chooseleaf_vary_r;
/*
¦* version 0 (original) of straw_calc has various flaws. version 1
¦* fixes a few of them.
¦*/
__u8 straw_calc_version;
/*
¦* allowed bucket algs is a bitmask, here the bit positions
¦* are CRUSH_BUCKET_*. note that these are *bits* and
¦* CRUSH_BUCKET_* values are not, so we need to or together (1
¦* << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to
¦* minimize confusion (bucket type values start at 1).
¦*/
__u32 allowed_bucket_algs;
__u32 *choose_tries;
};
/*
* CRUSH uses user-defined "rules" to describe how inputs should be
* mapped to devices. A rule consists of sequence of steps to perform
* to generate the set of output devices.
*/
struct crush_rule_step {
__u32 op;
__s32 arg1;
__s32 arg2;
};
/*
* The rule mask is used to describe what the rule is intended for.
* Given a ruleset and size of output set, we search through the
* rule list for a matching rule_mask.
*/
struct crush_rule_mask {
__u8 ruleset;
__u8 type;
__u8 min_size;
__u8 max_size;
};
crush_bucket結構體:
struct crush_bucket {
__s32 id; /* this'll be negative */
__u16 type; /* non-zero; type=0 is reserved for devices */
__u8 alg; /* one of CRUSH_BUCKET_* */
__u8 hash; /* which hash function to use, CRUSH_HASH_* */
__u32 weight; /* 16-bit fixed point */
__u32 size; /* num items */
__s32 *items;
/*
¦* cached random permutation: used for uniform bucket and for
¦* the linear search fallback for the other bucket types.
¦*/
__u32 perm_x; /* @x for which *perm is defined */
__u32 perm_n; /* num elements of *perm that are permuted/defined */
__u32 *perm;
};
//crush_rule的結構體,表示pg對映的策略
struct crush_rule {
__u32 len;
struct crush_rule_mask mask;
struct crush_rule_step steps[0];
};
struct crush_bucket_straw {
struct crush_bucket h;
__u32 *item_weights; /* 16-bit fixed point */
__u32 *straws; /* 16-bit fixed point */
};
程式碼簡析
Crush 演算法入口
/*
* map raw pg (full precision ps) into a placement seed. include
* pool id in that value so that different pools don't use the same
* seeds.
*/
ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
{
if (flags & FLAG_HASHPSPOOL) {
// Hash the pool id so that pool PGs do not overlap.
return
crush_hash32_2(CRUSH_HASH_RJENKINS1,
ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
pg.pool());
} else {
// Legacy behavior; add ps and pool together. This is not a great
// idea because the PGs from each pool will essentially overlap on
// top of each other: 0.5 == 1.4 == 2.3 == ...
return
ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
pg.pool();
}
}
//將PG對映到一組OSDS
int OSDMap::_pg_to_osds(const pg_pool_t& pool, pg_t pg,
vector<int> *osds, int *primary,
ps_t *ppps) const
{
// map to osds[]
ps_t pps = pool.raw_pg_to_pps(pg); // placement ps
//獲取pool的replicated size
unsigned size = pool.get_size();
// what crush rule? 獲取pool使用crush rule
int ruleno = crush->find_rule(pool.get_crush_ruleset(), pool.get_type(), size);
if (ruleno >= 0)
crush->do_rule(ruleno, pps, *osds, size, osd_weight);
//刪除不存在的osd
_remove_nonexistent_osds(pool, *osds);
*primary = -1;
//選取primary osd(第一個作為primary osd)
for (unsigned i = 0; i < osds->size(); ++i) {
if ((*osds)[i] != CRUSH_ITEM_NONE) {
*primary = (*osds)[i];
break;
}
}
if (ppps)
*ppps = pps;
return osds->size();
}
void do_rule(int rule, int x, vector<int>& out, int maxout,
¦ ¦ ¦ ¦const vector<__u32>& weight) const {
¦ Mutex::Locker l(mapper_lock);
¦ int rawout[maxout];
¦ int scratch[maxout * 3];
//開始crush過程:
//crush: crush map; rule:ruleset;x:placement seed; maxout:副本數;rawout:存放結果的資料
¦ int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0], weight.size(), scratch);
¦ if (numrep < 0)
¦ ¦ numrep = 0;
¦ out.resize(numrep);
¦ for (int i=0; i<numrep; i++)
¦ ¦ out[i] = rawout[i];
}
函式crush_do_rule
/**
* crush_do_rule - calculate a mapping with the given input and rule
* @map: the crush_map //crush map 包含了device、type、buckets、rules等。
* @ruleno: the rule id //當前pool所使用的rule規則ruleset
* @x: hash input //placement seed
* @result: pointer to result vector //用於存放選中的osd。
* @result_max: maximum result size //需要選擇的osd個數
* @weight: weight vector (for map leaves)
* @weight_max: size of weight vector
* @scratch: scratch vector for private use; must be >= 3 * result_max
*/
int crush_do_rule(const struct crush_map *map,
¦ int ruleno, int x, int *result, int result_max,
¦ const __u32 *weight, int weight_max,
¦ int *scratch)
{
int result_len;
int *a = scratch;
int *b = scratch + result_max;
int *c = scratch + result_max*2;
int recurse_to_leaf;
int *w;
int wsize = 0;
int *o;
int osize;
int *tmp;
struct crush_rule *rule;
__u32 step;
int i, j;
int numrep;
int out_size;
/*
¦* the original choose_total_tries value was off by one (it
¦* counted "retries" and not "tries"). add one.
¦*/
int choose_tries = map->choose_total_tries + 1;
int choose_leaf_tries = 0;
/*
¦* the local tries values were counted as "retries", though,
¦* and need no adjustment
¦*/
int choose_local_retries = map->choose_local_tries;
int choose_local_fallback_retries = map->choose_local_fallback_tries;
int vary_r = map->chooseleaf_vary_r;
if ((__u32)ruleno >= map->max_rules) {
dprintk(" bad ruleno %d\n", ruleno);
return 0;
}
//選擇當前pool使用的rule
rule = map->rules[ruleno];
result_len = 0;
w = a;
o = b;
for (step = 0; step < rule->len; step++) {
int firstn = 0;
struct crush_rule_step *curstep = &rule->steps[step];
switch (curstep->op) {
case CRUSH_RULE_TAKE:
//選擇的是device或者是bucket,注:bucket的id使用負值
if ((curstep->arg1 >= 0 &&
curstep->arg1 < map->max_devices) ||
(-1-curstep->arg1 >= 0 &&
-1-curstep->arg1 < map->max_buckets &&
map->buckets[-1-curstep->arg1])) {
w[0] = curstep->arg1;
wsize = 1;
} else {
dprintk(" bad take value %d\n", curstep->arg1);
}
break;
case CRUSH_RULE_SET_CHOOSE_TRIES:
if (curstep->arg1 > 0)
choose_tries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
if (curstep->arg1 > 0)
choose_leaf_tries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
if (curstep->arg1 >= 0)
choose_local_retries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
if (curstep->arg1 >= 0)
choose_local_fallback_retries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
if (curstep->arg1 >= 0)
vary_r = curstep->arg1;
break;
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
case CRUSH_RULE_CHOOSE_FIRSTN:
firstn = 1;
/* fall through */
case CRUSH_RULE_CHOOSELEAF_INDEP:
case CRUSH_RULE_CHOOSE_INDEP:
if (wsize == 0)
break;
//決定是否遞迴的選擇item(要求最終選擇的item的型別為item(device))
recurse_to_leaf =
curstep->op ==
CRUSH_RULE_CHOOSELEAF_FIRSTN ||
curstep->op ==
CRUSH_RULE_CHOOSELEAF_INDEP;
/* reset output */
osize = 0;
for (i = 0; i < wsize; i++) {
int bno;
/*
* see CRUSH_N, CRUSH_N_MINUS macros.
* basically, numrep <= 0 means relative to
* the provided result_max
*/
//該step選擇的item(buckets/devices)數;
//如果指定的數是大於零的數,則選擇指定的item數,否則選擇(numrep += result_max)(不小於0)個item
numrep = curstep->arg1;
if (numrep <= 0) {
numrep += result_max;
if (numrep <= 0)
continue;
}
j = 0;
/* make sure bucket id is valid */
bno = -1 - w[i];
if (bno < 0 || bno >= map->max_buckets) {
// w[i] is probably CRUSH_ITEM_NONE
dprintk(" bad w[i] %d\n", w[i]);
continue;
}
if (firstn) {
//recurse_tries 遞迴選擇leaf item的次數。(貌似該變數沒有真正使用)
int recurse_tries;
if (choose_leaf_tries)
recurse_tries =
choose_leaf_tries;
else if (map->chooseleaf_descend_once)
recurse_tries = 1;
else
recurse_tries = choose_tries;
//在某bucket下選擇指定的數量的item(buckets/devices)
osize += crush_choose_firstn(
map,
map->buckets[bno],
weight, weight_max,
x, numrep,
curstep->arg2,
o+osize, j,
result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
choose_local_fallback_retries,
recurse_to_leaf,
vary_r,
c+osize,
0);
} else {
out_size = ((numrep < (result_max-osize)) ?
¦ ¦ ¦ ¦ ¦ ¦ ¦ ¦ ¦ ¦ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[bno],
weight, weight_max,
x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
choose_leaf_tries ,
choose_leaf_tries : 1,
recurse_to_leaf,
c+osize,
0);
osize += out_size;
}
}
//如果recurse_to_leaf為true,則將遞迴選中的item放入o vector中。
if (recurse_to_leaf)
/* copy final _leaf_ values to output set */
memcpy(o, c, osize*sizeof(*o));
/* swap o and w arrays */ //把o中選中的結果,轉交給w(w指向選擇的結果)
tmp = o;
o = w;
w = tmp;
wsize = osize;
break;
//crush rule step的結束操作,將最終的結果都拷貝到result vector中
case CRUSH_RULE_EMIT:
for (i = 0; i < wsize && result_len < result_max; i++) {
result[result_len] = w[i];
result_len++;
}
wsize = 0;
break;
default:
dprintk(" unknown op %d at step %d\n",
curstep->op, step);
break;
}
}
return result_len;
}
就上文中crush map的例項中的rule規則結合程式碼實現過程,可以知道,首先第一步take,從default開始選擇,其id為-4;然後進入第二步chooseleaf_firstn,相應的呼叫crush_choose_firstn函式,在default之下繼續,該步選擇bucket的型別為host,選擇的item數為0(如果是0,則選擇副本數個item,如果大於0,則選擇指定個數的item,小於0則與副本數求和,其和作為item的個數,如果和也小於0則失敗),並且recurse_to_leaf會被置為true,表示會遞迴的選擇到osd device為止;最後rule 結束標誌,將最終的結果儲存到result vector中。 與chooseleaf_firstn非常相似的是choose_firstn,該step只會選擇指定個數,指定型別的bucket/device。
注:scratch該引數被分成三部分(以副本數等分)用於不同的邏輯中,第一份用於存放step的最終結果,第二部分用於存放crush_choose_firstn的邏輯結果,第三部分用於crush_choose_firstn遞迴呼叫邏輯。
函式:crush_choose_firstn
/**
* crush_choose_firstn - choose numrep distinct items of given type
* @map: the crush_map
* @bucket: the bucket we are choose an item from
* @x: crush input value
* @numrep: the number of items to choose
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
* @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
* @local_fallback_retries: localized fallback retries
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
* @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf) //需要遞迴選擇osd type item時,會將選中的item放入該vector中
* @parent_r: r value passed from the parent
*/
static int crush_choose_firstn(const struct crush_map *map,
struct crush_bucket *bucket,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
unsigned int local_fallback_retries,
int recurse_to_leaf,
unsigned int vary_r,
int *out2,
int parent_r)
{
{
int rep;
unsigned int ftotal, flocal;
int retry_descent, retry_bucket, skip_rep;
struct crush_bucket *in = bucket;
int r;
int i;
int item = 0;
int itemtype;
int collide, reject;
int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r);
//迴圈選取副本數個osds
for (rep = outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
do {
retry_descent = 0;
in = bucket; /* initial bucket */
/* choose through intervening buckets */
flocal = 0;
do {
collide = 0;
retry_bucket = 0;
r = rep + parent_r;
/* r' = r + f_total */
r += ftotal;
/* bucket choose */
if (in->size == 0) {
reject = 1;
goto reject;
}
if (local_fallback_retries > 0 &&
flocal >= (in->size>>1) &&
flocal > local_fallback_retries)
item = bucket_perm_choose(in, x, r);
else
//在某bucket(in)中選擇item,並返回該項
item = crush_bucket_choose(in, x, r);
//檢查選中的項是否合法
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
skip_rep = 1;
break;
}
/* desired type? */
if (item < 0)
//選中的item(bucket/device)的型別
itemtype = map->buckets[-1-item]->type;
else
itemtype = 0;
dprintk(" item %d type %d\n", item, itemtype);
/* keep going? */
//如果選中的item不是指定的型別,同時該item不是bucket,則選擇失敗,否則在該選中的bucket中繼續選擇
if (itemtype != type) {
if (item >= 0 ||
(-1-item) >= map->max_buckets) {
dprintk(" bad item type %d\n", type);
skip_rep = 1;
break;
}
in = map->buckets[-1-item];
retry_bucket = 1;
continue;
}
/* collision? */ //判斷當前選擇的item與之前選中的item是否重複(衝突)。
for (i = 0; i < outpos; i++) {
if (out[i] == item) {
collide = 1;
break;
}
}
reject = 0;
//如果當前選中的item,跟之前選擇的不存在衝突,且該次step是choose leaf,則進入如下處理(遞迴呼叫crush_choose_firstn),否則跳過
if (!collide && recurse_to_leaf) {
//如果選中的是bucket者繼續(遞迴)呼叫crush_choose_firstn
if (item < 0) {
int sub_r;
if (vary_r)
sub_r = r >> (vary_r-1);
else
sub_r = 0;
if (crush_choose_firstn(map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
0,
vary_r,
NULL,
sub_r) <= outpos)
/* didn't get leaf */
reject = 1;
} else {
/* we already have a leaf! */
out2[outpos] = item;
}
}
if (!reject) {
/* out? */
if (itemtype == 0)
//檢查選擇的osd tyep的item是否是out狀態
reject = is_out(map, weight,
weight_max,
item, x);
else
reject = 0;
}
reject: //若沒有選中合適的item則進入如下處理,
if (reject || collide) {
ftotal++;
flocal++;
if (collide && flocal <= local_retries)
/* retry locally a few times */
retry_bucket = 1;
else if (local_fallback_retries > 0 &&
flocal <= in->size + local_fallback_retries)
/* exhaustive bucket search */
retry_bucket = 1;
else if (ftotal < tries)
/* then retry descent */
retry_descent = 1;
else
/* else give up */
skip_rep = 1;
dprintk(" reject %d collide %d "
"ftotal %u flocal %u\n",
reject, collide, ftotal,
flocal);
}
} while (retry_bucket); //選中bucket,繼續選擇
} while (retry_descent);
if (skip_rep) {
dprintk("skip rep\n");
continue;
}
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
count--;
if (map->choose_tries && ftotal <= map->choose_total_tries)
map->choose_tries[ftotal]++;
}
dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
該函式簡單的說就是呼叫crush_bucket_choose(…)函式從指定的bucket中選擇合適的item,放入到out vector中,如果選中的item的型別不是期望的型別,且不是device,則基於當前的bucket繼續呼叫crush_bucket_choose(…);如果當前的step是chooseleaf_firstn,則遞迴呼叫crush_choose_firstn(…),遞迴呼叫選中的osd將臨時存放到out2 vector中,跳出遞迴後再複製給out。
注:recurse_tries引數表示遞迴嘗試choose leaf的次數,貌似在該實現中沒有使用。
crush_bucket_choose函式
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
BUG_ON(in->size == 0);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
x, r);
case CRUSH_BUCKET_LIST:
return bucket_list_choose((struct crush_bucket_list *)in,
x, r);
case CRUSH_BUCKET_TREE:
return bucket_tree_choose((struct crush_bucket_tree *)in,
x, r);
case CRUSH_BUCKET_STRAW:
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
case CRUSH_BUCKET_STRAW2:
return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
x, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}