Feature #9410 » adaptive-crush-modify.patch
src/crush/CrushCompiler.cc | ||
---|---|---|
bool dopos = false;
|
||
switch (alg) {
|
||
case CRUSH_BUCKET_UNIFORM:
|
||
case CRUSH_BUCKET_LINEAR:
|
||
out << "\t# do not change bucket size (" << n << ") unnecessarily";
|
||
dopos = true;
|
||
break;
|
||
... | ... | |
alg = CRUSH_BUCKET_TREE;
|
||
else if (a == "straw")
|
||
alg = CRUSH_BUCKET_STRAW;
|
||
else if (a == "linear")
|
||
alg = CRUSH_BUCKET_LINEAR;
|
||
else {
|
||
err << "unknown bucket alg '" << a << "'" << std::endl << std::endl;
|
||
return -EINVAL;
|
||
... | ... | |
assert(0);
|
||
}
|
||
if (alg == CRUSH_BUCKET_UNIFORM) {
|
||
if (alg == CRUSH_BUCKET_UNIFORM || alg == CRUSH_BUCKET_LINEAR) {
|
||
if (!have_uniform_weight) {
|
||
have_uniform_weight = true;
|
||
uniform_weight = weight;
|
src/crush/CrushWrapper.cc | ||
---|---|---|
}
|
||
break;
|
||
case CRUSH_BUCKET_LINEAR:
|
||
::encode(((crush_bucket_linear*)crush->buckets[i])->item_weight, bl);
|
||
break;
|
||
default:
|
||
assert(0);
|
||
break;
|
||
... | ... | |
case CRUSH_BUCKET_STRAW:
|
||
size = sizeof(crush_bucket_straw);
|
||
break;
|
||
case CRUSH_BUCKET_LINEAR:
|
||
size = sizeof(crush_bucket_linear);
|
||
break;
|
||
default:
|
||
{
|
||
char str[128];
|
||
... | ... | |
break;
|
||
}
|
||
case CRUSH_BUCKET_LINEAR:
|
||
::decode(((crush_bucket_linear*)bucket)->item_weight, blp);
|
||
break;
|
||
default:
|
||
// We should have handled this case in the first switch statement
|
||
assert(0);
|
src/crush/CrushWrapper.h | ||
---|---|---|
out[i] = rawout[i];
|
||
}
|
||
void do_rule(int rule, int x, vector<int>& out, int maxout,
|
||
const vector<__u32>& weight, float balance_param) const {
|
||
Mutex::Locker l(mapper_lock);
|
||
int rawout[maxout];
|
||
int scratch[maxout * 3];
|
||
int numrep = crush_do_rule_wrapper(crush, rule, x, rawout, maxout,
|
||
&weight[0], weight.size(), scratch, balance_param);
|
||
if (numrep < 0)
|
||
numrep = 0;
|
||
out.resize(numrep);
|
||
for (int i=0; i<numrep; i++)
|
||
out[i] = rawout[i];
|
||
}
|
||
int read_from_file(const char *fn) {
|
||
bufferlist bl;
|
||
std::string error;
|
src/crush/builder.c | ||
---|---|---|
return NULL;
|
||
}
|
||
/* linear bucket */
|
||
struct crush_bucket_linear *
|
||
crush_make_linear_bucket(int hash, int type, int size,
|
||
int *items,
|
||
int item_weight)
|
||
{
|
||
int i;
|
||
struct crush_bucket_linear *bucket;
|
||
if (crush_multiplication_is_unsafe(size, item_weight)) {
|
||
//need more info
|
||
//printf("");
|
||
return NULL;
|
||
}
|
||
bucket = malloc(sizeof(*bucket));
|
||
if (!bucket)
|
||
return NULL;
|
||
memset(bucket, 0, sizeof(*bucket));
|
||
bucket->h.alg = CRUSH_BUCKET_LINEAR;
|
||
bucket->h.hash = hash;
|
||
bucket->h.type = type;
|
||
bucket->h.size = size;
|
||
bucket->h.weight = size * item_weight;
|
||
bucket->item_weight = item_weight;
|
||
bucket->h.items = malloc(sizeof(__s32) * size);
|
||
if (!bucket->h.items)
|
||
goto err;
|
||
bucket->h.perm = malloc(sizeof(__u32) * size);
|
||
if (!bucket->h.perm)
|
||
goto err;
|
||
for (i=0; i<size; i++)
|
||
bucket->h.items[i] = items[i];
|
||
return bucket;
|
||
err:
|
||
free(bucket->h.perm);
|
||
free(bucket->h.items);
|
||
free(bucket);
|
||
return NULL;
|
||
}
|
||
struct crush_bucket*
|
||
... | ... | |
case CRUSH_BUCKET_STRAW:
|
||
return (struct crush_bucket *)crush_make_straw_bucket(hash, type, size, items, weights);
|
||
case CRUSH_BUCKET_LINEAR:
|
||
if (size && weights)
|
||
item_weight = weights[0];
|
||
else
|
||
item_weight = 0;
|
||
return (struct crush_bucket *)crush_make_linear_bucket(hash, type, size, items, item_weight);
|
||
}
|
||
return 0;
|
||
}
|
||
... | ... | |
return crush_calc_straw(bucket);
|
||
}
|
||
int crush_add_linear_bucket_item(struct crush_bucket_linear *bucket, int item, int weight)
|
||
{
|
||
int newsize = bucket->h.size + 1;
|
||
void *_realloc = NULL;
|
||
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
|
||
return -ENOMEM;
|
||
} else {
|
||
bucket->h.items = _realloc;
|
||
}
|
||
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
|
||
return -ENOMEM;
|
||
} else {
|
||
bucket->h.perm = _realloc;
|
||
}
|
||
bucket->h.items[newsize-1] = item;
|
||
if (crush_addition_is_unsafe(bucket->h.weight, weight))
|
||
return -ERANGE;
|
||
bucket->h.weight += weight;
|
||
bucket->h.size++;
|
||
return 0;
|
||
}
|
||
int crush_bucket_add_item(struct crush_bucket *b, int item, int weight)
|
||
{
|
||
/* invalidate perm cache */
|
||
... | ... | |
return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight);
|
||
case CRUSH_BUCKET_STRAW:
|
||
return crush_add_straw_bucket_item((struct crush_bucket_straw *)b, item, weight);
|
||
case CRUSH_BUCKET_LINEAR:
|
||
return crush_add_linear_bucket_item((struct crush_bucket_linear *)b, item, weight);
|
||
default:
|
||
return -1;
|
||
}
|
||
... | ... | |
return crush_calc_straw(bucket);
|
||
}
|
||
int crush_remove_linear_bucket_item(struct crush_bucket_linear *bucket, int item)
|
||
{
|
||
unsigned i, j;
|
||
int newsize;
|
||
void *_realloc = NULL;
|
||
for (i = 0; i < bucket->h.size; i++)
|
||
if (bucket->h.items[i] == item)
|
||
break;
|
||
if (i == bucket->h.size)
|
||
return -ENOENT;
|
||
for (j = i; j < bucket->h.size; j++)
|
||
bucket->h.items[j] = bucket->h.items[j+1];
|
||
newsize = --bucket->h.size;
|
||
bucket->h.weight -= bucket->item_weight;
|
||
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
|
||
return -ENOMEM;
|
||
} else {
|
||
bucket->h.items = _realloc;
|
||
}
|
||
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
|
||
return -ENOMEM;
|
||
} else {
|
||
bucket->h.perm = _realloc;
|
||
}
|
||
return 0;
|
||
}
|
||
int crush_bucket_remove_item(struct crush_bucket *b, int item)
|
||
{
|
||
/* invalidate perm cache */
|
||
... | ... | |
return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item);
|
||
case CRUSH_BUCKET_STRAW:
|
||
return crush_remove_straw_bucket_item((struct crush_bucket_straw *)b, item);
|
||
case CRUSH_BUCKET_LINEAR:
|
||
return crush_remove_linear_bucket_item((struct crush_bucket_linear *)b, item);
|
||
default:
|
||
return -1;
|
||
}
|
||
... | ... | |
return diff;
|
||
}
|
||
int crush_adjust_linear_bucket_item_weight(struct crush_bucket_linear *bucket, int item, int weight)
|
||
{
|
||
int diff = (weight - bucket->item_weight) * bucket->h.size;
|
||
bucket->item_weight = weight;
|
||
bucket->h.weight = bucket->item_weight * bucket->h.size;
|
||
return diff;
|
||
}
|
||
int crush_bucket_adjust_item_weight(struct crush_bucket *b, int item, int weight)
|
||
{
|
||
switch (b->alg) {
|
||
... | ... | |
case CRUSH_BUCKET_STRAW:
|
||
return crush_adjust_straw_bucket_item_weight((struct crush_bucket_straw *)b,
|
||
item, weight);
|
||
case CRUSH_BUCKET_LINEAR:
|
||
return crush_adjust_linear_bucket_item_weight((struct crush_bucket_linear *)b,
|
||
item, weight);
|
||
default:
|
||
return -1;
|
||
}
|
||
... | ... | |
return 0;
|
||
}
|
||
static int crush_reweight_linear_bucket(struct crush_map *crush, struct crush_bucket_linear *bucket)
|
||
{
|
||
unsigned i;
|
||
unsigned sum = 0, n = 0, leaves = 0;
|
||
for (i = 0; i < bucket->h.size; i++) {
|
||
int id = bucket->h.items[i];
|
||
if (id < 0) {
|
||
struct crush_bucket *c = crush->buckets[-1-id];
|
||
crush_reweight_bucket(crush, c);
|
||
if (crush_addition_is_unsafe(sum, c->weight))
|
||
return -ERANGE;
|
||
sum += c->weight;
|
||
n++;
|
||
} else {
|
||
leaves++;
|
||
}
|
||
}
|
||
if (n > leaves)
|
||
bucket->item_weight = sum / n; // more bucket children than leaves, average!
|
||
bucket->h.weight = bucket->item_weight * bucket->h.size;
|
||
return 0;
|
||
}
|
||
int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *b)
|
||
{
|
||
switch (b->alg) {
|
||
... | ... | |
return crush_reweight_tree_bucket(crush, (struct crush_bucket_tree *)b);
|
||
case CRUSH_BUCKET_STRAW:
|
||
return crush_reweight_straw_bucket(crush, (struct crush_bucket_straw *)b);
|
||
case CRUSH_BUCKET_LINEAR:
|
||
return crush_reweight_linear_bucket(crush, (struct crush_bucket_linear *)b);
|
||
default:
|
||
return -1;
|
||
}
|
src/crush/builder.h | ||
---|---|---|
crush_make_straw_bucket(int hash, int type, int size,
|
||
int *items,
|
||
int *weights);
|
||
struct crush_bucket_linear *
|
||
crush_make_linear_bucket(int hash, int type, int size,
|
||
int *items,
|
||
int item_weight);
|
||
#endif
|
src/crush/crush.c | ||
---|---|---|
case CRUSH_BUCKET_LIST: return "list";
|
||
case CRUSH_BUCKET_TREE: return "tree";
|
||
case CRUSH_BUCKET_STRAW: return "straw";
|
||
case CRUSH_BUCKET_LINEAR: return "linear";
|
||
default: return "unknown";
|
||
}
|
||
}
|
||
... | ... | |
return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
|
||
case CRUSH_BUCKET_STRAW:
|
||
return ((struct crush_bucket_straw *)b)->item_weights[p];
|
||
case CRUSH_BUCKET_LINEAR:
|
||
return ((struct crush_bucket_linear *)b)->item_weight;
|
||
}
|
||
return 0;
|
||
}
|
||
... | ... | |
kfree(b);
|
||
}
|
||
void crush_destroy_bucket_linear(struct crush_bucket_linear *b)
|
||
{
|
||
kfree(b->h.perm);
|
||
kfree(b->h.items);
|
||
kfree(b);
|
||
}
|
||
void crush_destroy_bucket(struct crush_bucket *b)
|
||
{
|
||
switch (b->alg) {
|
||
... | ... | |
case CRUSH_BUCKET_STRAW:
|
||
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
|
||
break;
|
||
case CRUSH_BUCKET_LINEAR:
|
||
crush_destroy_bucket_linear((struct crush_bucket_linear *)b);
|
||
break;
|
||
}
|
||
}
|
||
src/crush/crush.h | ||
---|---|---|
CRUSH_BUCKET_UNIFORM = 1,
|
||
CRUSH_BUCKET_LIST = 2,
|
||
CRUSH_BUCKET_TREE = 3,
|
||
CRUSH_BUCKET_STRAW = 4
|
||
CRUSH_BUCKET_STRAW = 4,
|
||
CRUSH_BUCKET_LINEAR = 5
|
||
};
|
||
extern const char *crush_bucket_alg_name(int alg);
|
||
... | ... | |
__u32 *straws; /* 16-bit fixed point */
|
||
};
|
||
struct crush_bucket_linear {
|
||
struct crush_bucket h;
|
||
__u32 item_weight; /* 16-bit fixed point; all items equally weighted */
|
||
};
|
||
/*
|
||
* CRUSH map includes all buckets, rules, etc.
|
||
... | ... | |
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
|
||
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
|
||
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
|
||
extern void crush_destroy_bucket_linear(struct crush_bucket_linear *b);
|
||
extern void crush_destroy_bucket(struct crush_bucket *b);
|
||
extern void crush_destroy_rule(struct crush_rule *r);
|
||
extern void crush_destroy(struct crush_map *map);
|
src/crush/grammar.h | ||
---|---|---|
bucket_alg = str_p("alg") >> ( str_p("uniform") |
|
||
str_p("list") |
|
||
str_p("tree") |
|
||
str_p("straw") );
|
||
str_p("straw")|
|
||
str_p("linear"));
|
||
bucket_hash = str_p("hash") >> ( integer |
|
||
str_p("rjenkins1") );
|
||
bucket_item = str_p("item") >> name
|
src/crush/mapper.c | ||
---|---|---|
return bucket->h.items[high];
|
||
}
|
||
static int bucket_linear_choose(struct crush_bucket_linear *bucket,
|
||
int x, int r)
|
||
{
|
||
unsigned int item = x%bucket->h.size;
|
||
return bucket->h.items[item];
|
||
}
|
||
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
|
||
{
|
||
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
|
||
... | ... | |
}
|
||
}
|
||
static int crush_bucket_choose_wrapper(struct crush_bucket *in, int x, int r, float balance_param)
|
||
{
|
||
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
|
||
BUG_ON(in->size == 0);
|
||
switch (in->alg) {
|
||
case CRUSH_BUCKET_UNIFORM:
|
||
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
|
||
x, r);
|
||
case CRUSH_BUCKET_LIST:
|
||
return bucket_list_choose((struct crush_bucket_list *)in,
|
||
x, r);
|
||
case CRUSH_BUCKET_TREE:
|
||
return bucket_tree_choose((struct crush_bucket_tree *)in,
|
||
x, r);
|
||
case CRUSH_BUCKET_STRAW:
|
||
return bucket_straw_choose((struct crush_bucket_straw *)in,
|
||
x, r);
|
||
case CRUSH_BUCKET_LINEAR:
|
||
return bucket_linear_choose((struct crush_bucket_linear *)in,
|
||
x/balance_param, r);
|
||
default:
|
||
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
|
||
return in->items[0];
|
||
}
|
||
}
|
||
/*
|
||
* true if device is marked "out" (failed, fully offloaded)
|
||
* of the cluster
|
||
... | ... | |
return outpos;
|
||
}
|
||
static int crush_choose_firstn_wrapper(const struct crush_map *map,
|
||
struct crush_bucket *bucket,
|
||
const __u32 *weight, int weight_max,
|
||
int x, int numrep, int type,
|
||
int *out, int outpos,
|
||
unsigned int tries,
|
||
unsigned int recurse_tries,
|
||
unsigned int local_retries,
|
||
unsigned int local_fallback_retries,
|
||
int recurse_to_leaf,
|
||
int *out2, float balance_param)
|
||
{
|
||
int rep;
|
||
unsigned int ftotal, flocal;
|
||
int retry_descent, retry_bucket, skip_rep;
|
||
struct crush_bucket *in = bucket;
|
||
int r;
|
||
int i;
|
||
int item = 0;
|
||
int itemtype;
|
||
int collide, reject;
|
||
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
|
||
bucket->id, x, outpos, numrep);
|
||
for (rep = outpos; rep < numrep; rep++) {
|
||
/* keep trying until we get a non-out, non-colliding item */
|
||
ftotal = 0;
|
||
skip_rep = 0;
|
||
do {
|
||
retry_descent = 0;
|
||
in = bucket; /* initial bucket */
|
||
/* choose through intervening buckets */
|
||
flocal = 0;
|
||
do {
|
||
collide = 0;
|
||
retry_bucket = 0;
|
||
r = rep;
|
||
/* r' = r + f_total */
|
||
r += ftotal;
|
||
/* bucket choose */
|
||
if (in->size == 0) {
|
||
reject = 1;
|
||
goto reject;
|
||
}
|
||
if (local_fallback_retries > 0 &&
|
||
flocal >= (in->size>>1) &&
|
||
flocal > local_fallback_retries)
|
||
item = bucket_perm_choose(in, x, r);
|
||
else
|
||
item = crush_bucket_choose_wrapper(in, x, r, balance_param);
|
||
if (item >= map->max_devices) {
|
||
dprintk(" bad item %d\n", item);
|
||
skip_rep = 1;
|
||
break;
|
||
}
|
||
/* desired type? */
|
||
if (item < 0)
|
||
itemtype = map->buckets[-1-item]->type;
|
||
else
|
||
itemtype = 0;
|
||
dprintk(" item %d type %d\n", item, itemtype);
|
||
/* keep going? */
|
||
if (itemtype != type) {
|
||
if (item >= 0 ||
|
||
(-1-item) >= map->max_buckets) {
|
||
dprintk(" bad item type %d\n", type);
|
||
skip_rep = 1;
|
||
break;
|
||
}
|
||
in = map->buckets[-1-item];
|
||
retry_bucket = 1;
|
||
continue;
|
||
}
|
||
/* collision? */
|
||
for (i = 0; i < outpos; i++) {
|
||
if (out[i] == item) {
|
||
collide = 1;
|
||
break;
|
||
}
|
||
}
|
||
reject = 0;
|
||
if (!collide && recurse_to_leaf) {
|
||
if (item < 0) {
|
||
if (crush_choose_firstn_wrapper(map,
|
||
map->buckets[-1-item],
|
||
weight, weight_max,
|
||
x, outpos+1, 0,
|
||
out2, outpos,
|
||
recurse_tries, 0,
|
||
local_retries,
|
||
local_fallback_retries,
|
||
0,
|
||
NULL, balance_param) <= outpos)
|
||
/* didn't get leaf */
|
||
reject = 1;
|
||
} else {
|
||
/* we already have a leaf! */
|
||
out2[outpos] = item;
|
||
}
|
||
}
|
||
if (!reject) {
|
||
/* out? */
|
||
if (itemtype == 0)
|
||
reject = is_out(map, weight,
|
||
weight_max,
|
||
item, x);
|
||
else
|
||
reject = 0;
|
||
}
|
||
reject:
|
||
if (reject || collide) {
|
||
ftotal++;
|
||
flocal++;
|
||
if (collide && flocal <= local_retries)
|
||
/* retry locally a few times */
|
||
retry_bucket = 1;
|
||
else if (local_fallback_retries > 0 &&
|
||
flocal <= in->size + local_fallback_retries)
|
||
/* exhaustive bucket search */
|
||
retry_bucket = 1;
|
||
else if (ftotal < tries)
|
||
/* then retry descent */
|
||
retry_descent = 1;
|
||
else
|
||
/* else give up */
|
||
skip_rep = 1;
|
||
dprintk(" reject %d collide %d "
|
||
"ftotal %u flocal %u\n",
|
||
reject, collide, ftotal,
|
||
flocal);
|
||
}
|
||
} while (retry_bucket);
|
||
} while (retry_descent);
|
||
if (skip_rep) {
|
||
dprintk("skip rep\n");
|
||
continue;
|
||
}
|
||
dprintk("CHOOSE got %d\n", item);
|
||
out[outpos] = item;
|
||
outpos++;
|
||
if (map->choose_tries && ftotal <= map->choose_total_tries)
|
||
map->choose_tries[ftotal]++;
|
||
}
|
||
dprintk("CHOOSE returns %d\n", outpos);
|
||
return outpos;
|
||
}
|
||
/**
|
||
* crush_choose_indep: alternative breadth-first positionally stable mapping
|
||
... | ... | |
return result_len;
|
||
}
|
||
int crush_do_rule_wrapper(const struct crush_map *map,
|
||
int ruleno, int x, int *result, int result_max,
|
||
const __u32 *weight, int weight_max,
|
||
int *scratch,
|
||
float balance_param)
|
||
{
|
||
int result_len;
|
||
int *a = scratch;
|
||
int *b = scratch + result_max;
|
||
int *c = scratch + result_max*2;
|
||
int recurse_to_leaf;
|
||
int *w;
|
||
int wsize = 0;
|
||
int *o;
|
||
int osize;
|
||
int *tmp;
|
||
struct crush_rule *rule;
|
||
__u32 step;
|
||
int i, j;
|
||
int numrep;
|
||
/*
|
||
* the original choose_total_tries value was off by one (it
|
||
* counted "retries" and not "tries"). add one.
|
||
*/
|
||
int choose_tries = map->choose_total_tries + 1;
|
||
int choose_leaf_tries = 0;
|
||
/*
|
||
* the local tries values were counted as "retries", though,
|
||
* and need no adjustment
|
||
*/
|
||
int choose_local_retries = map->choose_local_tries;
|
||
int choose_local_fallback_retries = map->choose_local_fallback_tries;
|
||
if ((__u32)ruleno >= map->max_rules) {
|
||
dprintk(" bad ruleno %d\n", ruleno);
|
||
return 0;
|
||
}
|
||
rule = map->rules[ruleno];
|
||
result_len = 0;
|
||
w = a;
|
||
o = b;
|
||
for (step = 0; step < rule->len; step++) {
|
||
int firstn = 0;
|
||
struct crush_rule_step *curstep = &rule->steps[step];
|
||
switch (curstep->op) {
|
||
case CRUSH_RULE_TAKE:
|
||
w[0] = curstep->arg1;
|
||
wsize = 1;
|
||
break;
|
||
case CRUSH_RULE_SET_CHOOSE_TRIES:
|
||
if (curstep->arg1 > 0)
|
||
choose_tries = curstep->arg1;
|
||
break;
|
||
case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
|
||
if (curstep->arg1 > 0)
|
||
choose_leaf_tries = curstep->arg1;
|
||
break;
|
||
case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
|
||
if (curstep->arg1 >= 0)
|
||
choose_local_retries = curstep->arg1;
|
||
break;
|
||
case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
|
||
if (curstep->arg1 >= 0)
|
||
choose_local_fallback_retries = curstep->arg1;
|
||
break;
|
||
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
|
||
case CRUSH_RULE_CHOOSE_FIRSTN:
|
||
firstn = 1;
|
||
/* fall through */
|
||
case CRUSH_RULE_CHOOSELEAF_INDEP:
|
||
case CRUSH_RULE_CHOOSE_INDEP:
|
||
if (wsize == 0)
|
||
break;
|
||
recurse_to_leaf =
|
||
curstep->op ==
|
||
CRUSH_RULE_CHOOSELEAF_FIRSTN ||
|
||
curstep->op ==
|
||
CRUSH_RULE_CHOOSELEAF_INDEP;
|
||
/* reset output */
|
||
osize = 0;
|
||
for (i = 0; i < wsize; i++) {
|
||
/*
|
||
* see CRUSH_N, CRUSH_N_MINUS macros.
|
||
* basically, numrep <= 0 means relative to
|
||
* the provided result_max
|
||
*/
|
||
numrep = curstep->arg1;
|
||
if (numrep <= 0) {
|
||
numrep += result_max;
|
||
if (numrep <= 0)
|
||
continue;
|
||
}
|
||
j = 0;
|
||
if (firstn) {
|
||
int recurse_tries;
|
||
if (choose_leaf_tries)
|
||
recurse_tries =
|
||
choose_leaf_tries;
|
||
else if (map->chooseleaf_descend_once)
|
||
recurse_tries = 1;
|
||
else
|
||
recurse_tries = choose_tries;
|
||
osize += crush_choose_firstn_wrapper(
|
||
map,
|
||
map->buckets[-1-w[i]],
|
||
weight, weight_max,
|
||
x, numrep,
|
||
curstep->arg2,
|
||
o+osize, j,
|
||
choose_tries,
|
||
recurse_tries,
|
||
choose_local_retries,
|
||
choose_local_fallback_retries,
|
||
recurse_to_leaf,
|
||
c+osize,
|
||
balance_param);
|
||
} else {
|
||
crush_choose_indep(
|
||
map,
|
||
map->buckets[-1-w[i]],
|
||
weight, weight_max,
|
||
x, numrep, numrep,
|
||
curstep->arg2,
|
||
o+osize, j,
|
||
choose_tries,
|
||
choose_leaf_tries ?
|
||
choose_leaf_tries : 1,
|
||
recurse_to_leaf,
|
||
c+osize,
|
||
0);
|
||
osize += numrep;
|
||
}
|
||
}
|
||
if (recurse_to_leaf)
|
||
/* copy final _leaf_ values to output set */
|
||
memcpy(o, c, osize*sizeof(*o));
|
||
/* swap o and w arrays */
|
||
tmp = o;
|
||
o = w;
|
||
w = tmp;
|
||
wsize = osize;
|
||
break;
|
||
case CRUSH_RULE_EMIT:
|
||
for (i = 0; i < wsize && result_len < result_max; i++) {
|
||
result[result_len] = w[i];
|
||
result_len++;
|
||
}
|
||
wsize = 0;
|
||
break;
|
||
default:
|
||
dprintk(" unknown op %d at step %d\n",
|
||
curstep->op, step);
|
||
break;
|
||
}
|
||
}
|
||
return result_len;
|
||
}
|
||
src/crush/mapper.h | ||
---|---|---|
int x, int *result, int result_max,
|
||
const __u32 *weights, int weight_max,
|
||
int *scratch);
|
||
extern int crush_do_rule_wrapper(const struct crush_map *map,
|
||
int ruleno,
|
||
int x, int *result, int result_max,
|
||
const __u32 *weights, int weight_max,
|
||
int *scratch,
|
||
float balance_param);
|
||
#endif
|
src/mon/OSDMonitor.cc | ||
---|---|---|
<< ").osd e" << osdmap.get_epoch() << " ";
|
||
}
|
||
float cal_average(vector<int> dataset, int length) {
|
||
float avg = 0;
|
||
float sum = 0;
|
||
for(int i = 0; i < length; i++){
|
||
sum += dataset[i];
|
||
}
|
||
avg = sum/length;
|
||
return avg;
|
||
}
|
||
float cal_stdev(vector<int> dataset, int length) {
|
||
float avg = cal_average(dataset, length);
|
||
float sum = 0;
|
||
float stdev;
|
||
for(int i = 0; i < length; i++){
|
||
sum += pow(dataset[i] - avg, 2);
|
||
}
|
||
stdev = pow(sum/length, 0.5);
|
||
return stdev;
|
||
}
|
||
bool OSDMonitor::_have_pending_crush()
|
||
{
|
||
return pending_inc.crush.length();
|
||
... | ... | |
pg_pool_t::TYPE_REPLICATED, 0, ss);
|
||
}
|
||
void OSDMonitor::prepare_adaptive_balance_param(pg_pool_t *pi, int64_t pool)
|
||
{
|
||
float min_stdev = 999999;
|
||
float balance_param = 1;
|
||
|
||
for (int k = 1; k < 5; k++) {
|
||
map<int, int> osd_total_pgs;
|
||
pi->set_balance_param((float)k);
|
||
for (int i = 0; i < (int)pi->get_pg_num(); i++) {
|
||
pg_t pg(i, pool);
|
||
pg.set_pool(pool);
|
||
vector<int> acting;
|
||
int nrep = osdmap.pg_to_up_osds_adaptive(*pi, pg, acting);
|
||
if (nrep) {
|
||
for (int j = 0; j < nrep; j++) {
|
||
osd_total_pgs[acting[j]]++;
|
||
}
|
||
}
|
||
}
|
||
|
||
vector<int> pg_num;
|
||
for (map<int, int>::iterator ptr = osd_total_pgs.begin();
|
||
ptr != osd_total_pgs.end();
|
||
++ptr) {
|
||
pg_num.push_back(ptr->second);
|
||
}
|
||
|
||
float stdev = cal_stdev(pg_num, pg_num.size());
|
||
if (stdev < min_stdev) {
|
||
min_stdev = stdev;
|
||
balance_param = k;
|
||
}
|
||
}
|
||
|
||
pi->set_balance_param(balance_param);
|
||
}
|
||
int OSDMonitor::crush_ruleset_create_erasure(const string &name,
|
||
const string &profile,
|
||
int *ruleset,
|
||
... | ... | |
g_conf->osd_pool_default_cache_target_full_ratio * 1000000;
|
||
pi->cache_min_flush_age = g_conf->osd_pool_default_cache_min_flush_age;
|
||
pi->cache_min_evict_age = g_conf->osd_pool_default_cache_min_evict_age;
|
||
prepare_adaptive_balance_param(pi, pool);
|
||
pending_inc.new_pool_names[pool] = name;
|
||
return 0;
|
||
}
|
src/mon/OSDMonitor.h | ||
---|---|---|
bool prepare_pool_op (MPoolOp *m);
|
||
bool prepare_pool_op_create (MPoolOp *m);
|
||
bool prepare_pool_op_delete(MPoolOp *m);
|
||
void prepare_adaptive_balance_param(pg_pool_t *pi, int64_t pool);
|
||
int crush_ruleset_create_erasure(const string &name,
|
||
const string &profile,
|
||
int *ruleset,
|
src/osd/OSDMap.cc | ||
---|---|---|
ps_t *ppps) const
|
||
{
|
||
// map to osds[]
|
||
ps_t pps = pool.raw_pg_to_pps(pg); // placement ps
|
||
ps_t pps = pool.raw_pg_to_congruential_pps(pg); // placement ps
|
||
unsigned size = pool.get_size();
|
||
// what crush rule?
|
||
int ruleno = crush->find_rule(pool.get_crush_ruleset(), pool.get_type(), size);
|
||
if (ruleno >= 0)
|
||
crush->do_rule(ruleno, pps, *osds, size, osd_weight);
|
||
crush->do_rule(ruleno, pps, *osds, size, osd_weight, pool.get_balance_param());
|
||
_remove_nonexistent_osds(pool, *osds);
|
||
... | ... | |
_raw_to_up_osds(*pool, raw, up, primary);
|
||
_apply_primary_affinity(pps, *pool, up, primary);
|
||
}
|
||
int OSDMap::pg_to_up_osds_adaptive(const pg_pool_t& pool, pg_t pg, vector<int>& up) const
|
||
{
|
||
int primary;
|
||
vector<int> raw;
|
||
_pg_to_osds(pool, pg, &raw, &primary, NULL);
|
||
_raw_to_up_osds(pool, raw, &up, &primary);
|
||
return up.size();
|
||
}
|
||
|
||
void OSDMap::_pg_to_up_acting_osds(const pg_t& pg, vector<int> *up, int *up_primary,
|
||
vector<int> *acting, int *acting_primary) const
|
src/osd/OSDMap.h | ||
---|---|---|
int r = pg_to_acting_osds(pg, &acting, &primary);
|
||
return r;
|
||
}
|
||
int pg_to_up_osds_adaptive(const pg_pool_t& pool, pg_t pg, vector<int>& up) const;
|
||
/**
|
||
* This does not apply temp overrides and should not be used
|
||
* by anybody for data mapping purposes. Specify both pointers.
|
src/osd/osd_types.cc | ||
---|---|---|
f->dump_int("min_size", get_min_size());
|
||
f->dump_int("crush_ruleset", get_crush_ruleset());
|
||
f->dump_int("object_hash", get_object_hash());
|
||
f->dump_float("balance_param", get_balance_param());
|
||
f->dump_int("pg_num", get_pg_num());
|
||
f->dump_int("pg_placement_num", get_pgp_num());
|
||
f->dump_unsigned("crash_replay_interval", get_crash_replay_interval());
|
||
... | ... | |
}
|
||
}
|
||
ps_t pg_pool_t::raw_pg_to_congruential_pps(pg_t pg) const
|
||
{
|
||
if (flags & FLAG_HASHPSPOOL) {
|
||
// Shuffles the original pgid sequence, with poolid being the seed
|
||
ps_t stable = ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask);
|
||
ps_t pps = (1033*stable + 2*pg.pool() + 1) % pgp_num_mask + pg.pool();
|
||
return pps;
|
||
} else {
|
||
// Legacy behavior; add ps and pool together. This is not a great
|
||
// idea because the PGs from each pool will essentially overlap on
|
||
// top of each other: 0.5 == 1.4 == 2.3 == ...
|
||
return
|
||
ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
|
||
pg.pool();
|
||
}
|
||
}
|
||
uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
|
||
{
|
||
uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
|
||
... | ... | |
::encode(size, bl);
|
||
::encode(crush_ruleset, bl);
|
||
::encode(object_hash, bl);
|
||
::encode(balance_param, bl);
|
||
::encode(pg_num, bl);
|
||
::encode(pgp_num, bl);
|
||
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
|
||
... | ... | |
::encode(size, bl);
|
||
::encode(crush_ruleset, bl);
|
||
::encode(object_hash, bl);
|
||
::encode(balance_param, bl);
|
||
::encode(pg_num, bl);
|
||
::encode(pgp_num, bl);
|
||
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
|
||
... | ... | |
::encode(size, bl);
|
||
::encode(crush_ruleset, bl);
|
||
::encode(object_hash, bl);
|
||
::encode(balance_param, bl);
|
||
::encode(pg_num, bl);
|
||
::encode(pgp_num, bl);
|
||
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
|
||
... | ... | |
::encode(size, bl);
|
||
::encode(crush_ruleset, bl);
|
||
::encode(object_hash, bl);
|
||
::encode(balance_param, bl);
|
||
::encode(pg_num, bl);
|
||
::encode(pgp_num, bl);
|
||
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
|
||
... | ... | |
::decode(size, bl);
|
||
::decode(crush_ruleset, bl);
|
||
::decode(object_hash, bl);
|
||
::decode(balance_param, bl);
|
||
::decode(pg_num, bl);
|
||
::decode(pgp_num, bl);
|
||
{
|
||
... | ... | |
a.size = 2;
|
||
a.crush_ruleset = 3;
|
||
a.object_hash = 4;
|
||
a.balance_param = 1.0;
|
||
a.pg_num = 6;
|
||
a.pgp_num = 5;
|
||
a.last_change = 9;
|
||
... | ... | |
<< " min_size " << p.get_min_size()
|
||
<< " crush_ruleset " << p.get_crush_ruleset()
|
||
<< " object_hash " << p.get_object_hash_name()
|
||
<< " balance_param " << p.get_balance_param()
|
||
<< " pg_num " << p.get_pg_num()
|
||
<< " pgp_num " << p.get_pgp_num()
|
||
<< " last_change " << p.get_last_change();
|
src/osd/osd_types.h | ||
---|---|---|
__u8 size, min_size; ///< number of osds in each pg
|
||
__u8 crush_ruleset; ///< crush placement rule set
|
||
__u8 object_hash; ///< hash mapping object name to ps
|
||
float balance_param;
|
||
private:
|
||
__u32 pg_num, pgp_num; ///< number of pgs
|
||
... | ... | |
pg_pool_t()
|
||
: flags(0), type(0), size(0), min_size(0),
|
||
crush_ruleset(0), object_hash(0),
|
||
crush_ruleset(0), object_hash(0), balance_param(1),
|
||
pg_num(0), pgp_num(0),
|
||
last_change(0),
|
||
last_force_op_resend(0),
|
||
... | ... | |
unsigned get_min_size() const { return min_size; }
|
||
int get_crush_ruleset() const { return crush_ruleset; }
|
||
int get_object_hash() const { return object_hash; }
|
||
float get_balance_param() const { return balance_param; }
|
||
const char *get_object_hash_name() const {
|
||
return ceph_str_hash_name(get_object_hash());
|
||
}
|
||
... | ... | |
unsigned get_pg_num_mask() const { return pg_num_mask; }
|
||
unsigned get_pgp_num_mask() const { return pgp_num_mask; }
|
||
void set_balance_param(float b) { balance_param = b; }
|
||
// if pg_num is not a multiple of two, pgs are not equally sized.
|
||
// return, for a given pg, the fraction (denominator) of the total
|
||
// pool size that it represents.
|
||
... | ... | |
* seeds.
|
||
*/
|
||
ps_t raw_pg_to_pps(pg_t pg) const;
|
||
ps_t raw_pg_to_congruential_pps(pg_t pg) const;
|
||
/// choose a random hash position within a pg
|
||
uint32_t get_random_pg_position(pg_t pgid, uint32_t seed) const;
|
src/tools/crushtool.cc | ||
---|---|---|
cout << " specify output for for (de)compilation\n";
|
||
cout << " --build --num_osds N layer1 ...\n";
|
||
cout << " build a new map, where each 'layer' is\n";
|
||
cout << " 'name (uniform|straw|list|tree) size'\n";
|
||
cout << " 'name (uniform|straw|list|tree|linear) size'\n";
|
||
cout << " -i mapfn --test test a range of inputs on the map\n";
|
||
cout << " [--min-x x] [--max-x x] [--x x]\n";
|
||
cout << " [--min-rule r] [--max-rule r] [--rule r]\n";
|
||
... | ... | |
{ "list", CRUSH_BUCKET_LIST },
|
||
{ "straw", CRUSH_BUCKET_STRAW },
|
||
{ "tree", CRUSH_BUCKET_TREE },
|
||
{ "linear", CRUSH_BUCKET_LINEAR },
|
||
{ 0, 0 },
|
||
};
|
||