Project

General

Profile

Feature #9410 » adaptive-crush-modify.patch

Patch for CRUSH optimization - jian zhang, 09/09/2014 06:25 PM

View differences:

src/crush/CrushCompiler.cc
bool dopos = false;
switch (alg) {
case CRUSH_BUCKET_UNIFORM:
case CRUSH_BUCKET_LINEAR:
out << "\t# do not change bucket size (" << n << ") unnecessarily";
dopos = true;
break;
......
alg = CRUSH_BUCKET_TREE;
else if (a == "straw")
alg = CRUSH_BUCKET_STRAW;
else if (a == "linear")
alg = CRUSH_BUCKET_LINEAR;
else {
err << "unknown bucket alg '" << a << "'" << std::endl << std::endl;
return -EINVAL;
......
assert(0);
}
if (alg == CRUSH_BUCKET_UNIFORM) {
if (alg == CRUSH_BUCKET_UNIFORM || alg == CRUSH_BUCKET_LINEAR) {
if (!have_uniform_weight) {
have_uniform_weight = true;
uniform_weight = weight;
src/crush/CrushWrapper.cc
}
break;
case CRUSH_BUCKET_LINEAR:
::encode(((crush_bucket_linear*)crush->buckets[i])->item_weight, bl);
break;
default:
assert(0);
break;
......
case CRUSH_BUCKET_STRAW:
size = sizeof(crush_bucket_straw);
break;
case CRUSH_BUCKET_LINEAR:
size = sizeof(crush_bucket_linear);
break;
default:
{
char str[128];
......
break;
}
case CRUSH_BUCKET_LINEAR:
::decode(((crush_bucket_linear*)bucket)->item_weight, blp);
break;
default:
// We should have handled this case in the first switch statement
assert(0);
src/crush/CrushWrapper.h
out[i] = rawout[i];
}
void do_rule(int rule, int x, vector<int>& out, int maxout,
const vector<__u32>& weight, float balance_param) const {
Mutex::Locker l(mapper_lock);
int rawout[maxout];
int scratch[maxout * 3];
int numrep = crush_do_rule_wrapper(crush, rule, x, rawout, maxout,
&weight[0], weight.size(), scratch, balance_param);
if (numrep < 0)
numrep = 0;
out.resize(numrep);
for (int i=0; i<numrep; i++)
out[i] = rawout[i];
}
int read_from_file(const char *fn) {
bufferlist bl;
std::string error;
src/crush/builder.c
return NULL;
}
/* linear bucket */
struct crush_bucket_linear *
crush_make_linear_bucket(int hash, int type, int size,
int *items,
int item_weight)
{
int i;
struct crush_bucket_linear *bucket;
if (crush_multiplication_is_unsafe(size, item_weight)) {
//need more info
//printf("");
return NULL;
}
bucket = malloc(sizeof(*bucket));
if (!bucket)
return NULL;
memset(bucket, 0, sizeof(*bucket));
bucket->h.alg = CRUSH_BUCKET_LINEAR;
bucket->h.hash = hash;
bucket->h.type = type;
bucket->h.size = size;
bucket->h.weight = size * item_weight;
bucket->item_weight = item_weight;
bucket->h.items = malloc(sizeof(__s32) * size);
if (!bucket->h.items)
goto err;
bucket->h.perm = malloc(sizeof(__u32) * size);
if (!bucket->h.perm)
goto err;
for (i=0; i<size; i++)
bucket->h.items[i] = items[i];
return bucket;
err:
free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
}
struct crush_bucket*
......
case CRUSH_BUCKET_STRAW:
return (struct crush_bucket *)crush_make_straw_bucket(hash, type, size, items, weights);
case CRUSH_BUCKET_LINEAR:
if (size && weights)
item_weight = weights[0];
else
item_weight = 0;
return (struct crush_bucket *)crush_make_linear_bucket(hash, type, size, items, item_weight);
}
return 0;
}
......
return crush_calc_straw(bucket);
}
int crush_add_linear_bucket_item(struct crush_bucket_linear *bucket, int item, int weight)
{
int newsize = bucket->h.size + 1;
void *_realloc = NULL;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.items = _realloc;
}
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.perm = _realloc;
}
bucket->h.items[newsize-1] = item;
if (crush_addition_is_unsafe(bucket->h.weight, weight))
return -ERANGE;
bucket->h.weight += weight;
bucket->h.size++;
return 0;
}
int crush_bucket_add_item(struct crush_bucket *b, int item, int weight)
{
/* invalidate perm cache */
......
return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight);
case CRUSH_BUCKET_STRAW:
return crush_add_straw_bucket_item((struct crush_bucket_straw *)b, item, weight);
case CRUSH_BUCKET_LINEAR:
return crush_add_linear_bucket_item((struct crush_bucket_linear *)b, item, weight);
default:
return -1;
}
......
return crush_calc_straw(bucket);
}
int crush_remove_linear_bucket_item(struct crush_bucket_linear *bucket, int item)
{
unsigned i, j;
int newsize;
void *_realloc = NULL;
for (i = 0; i < bucket->h.size; i++)
if (bucket->h.items[i] == item)
break;
if (i == bucket->h.size)
return -ENOENT;
for (j = i; j < bucket->h.size; j++)
bucket->h.items[j] = bucket->h.items[j+1];
newsize = --bucket->h.size;
bucket->h.weight -= bucket->item_weight;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.items = _realloc;
}
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.perm = _realloc;
}
return 0;
}
int crush_bucket_remove_item(struct crush_bucket *b, int item)
{
/* invalidate perm cache */
......
return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item);
case CRUSH_BUCKET_STRAW:
return crush_remove_straw_bucket_item((struct crush_bucket_straw *)b, item);
case CRUSH_BUCKET_LINEAR:
return crush_remove_linear_bucket_item((struct crush_bucket_linear *)b, item);
default:
return -1;
}
......
return diff;
}
int crush_adjust_linear_bucket_item_weight(struct crush_bucket_linear *bucket, int item, int weight)
{
int diff = (weight - bucket->item_weight) * bucket->h.size;
bucket->item_weight = weight;
bucket->h.weight = bucket->item_weight * bucket->h.size;
return diff;
}
int crush_bucket_adjust_item_weight(struct crush_bucket *b, int item, int weight)
{
switch (b->alg) {
......
case CRUSH_BUCKET_STRAW:
return crush_adjust_straw_bucket_item_weight((struct crush_bucket_straw *)b,
item, weight);
case CRUSH_BUCKET_LINEAR:
return crush_adjust_linear_bucket_item_weight((struct crush_bucket_linear *)b,
item, weight);
default:
return -1;
}
......
return 0;
}
static int crush_reweight_linear_bucket(struct crush_map *crush, struct crush_bucket_linear *bucket)
{
unsigned i;
unsigned sum = 0, n = 0, leaves = 0;
for (i = 0; i < bucket->h.size; i++) {
int id = bucket->h.items[i];
if (id < 0) {
struct crush_bucket *c = crush->buckets[-1-id];
crush_reweight_bucket(crush, c);
if (crush_addition_is_unsafe(sum, c->weight))
return -ERANGE;
sum += c->weight;
n++;
} else {
leaves++;
}
}
if (n > leaves)
bucket->item_weight = sum / n; // more bucket children than leaves, average!
bucket->h.weight = bucket->item_weight * bucket->h.size;
return 0;
}
int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *b)
{
switch (b->alg) {
......
return crush_reweight_tree_bucket(crush, (struct crush_bucket_tree *)b);
case CRUSH_BUCKET_STRAW:
return crush_reweight_straw_bucket(crush, (struct crush_bucket_straw *)b);
case CRUSH_BUCKET_LINEAR:
return crush_reweight_linear_bucket(crush, (struct crush_bucket_linear *)b);
default:
return -1;
}
src/crush/builder.h
crush_make_straw_bucket(int hash, int type, int size,
int *items,
int *weights);
struct crush_bucket_linear *
crush_make_linear_bucket(int hash, int type, int size,
int *items,
int item_weight);
#endif
src/crush/crush.c
case CRUSH_BUCKET_LIST: return "list";
case CRUSH_BUCKET_TREE: return "tree";
case CRUSH_BUCKET_STRAW: return "straw";
case CRUSH_BUCKET_LINEAR: return "linear";
default: return "unknown";
}
}
......
return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
case CRUSH_BUCKET_STRAW:
return ((struct crush_bucket_straw *)b)->item_weights[p];
case CRUSH_BUCKET_LINEAR:
return ((struct crush_bucket_linear *)b)->item_weight;
}
return 0;
}
......
kfree(b);
}
void crush_destroy_bucket_linear(struct crush_bucket_linear *b)
{
kfree(b->h.perm);
kfree(b->h.items);
kfree(b);
}
void crush_destroy_bucket(struct crush_bucket *b)
{
switch (b->alg) {
......
case CRUSH_BUCKET_STRAW:
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
break;
case CRUSH_BUCKET_LINEAR:
crush_destroy_bucket_linear((struct crush_bucket_linear *)b);
break;
}
}
src/crush/crush.h
CRUSH_BUCKET_UNIFORM = 1,
CRUSH_BUCKET_LIST = 2,
CRUSH_BUCKET_TREE = 3,
CRUSH_BUCKET_STRAW = 4
CRUSH_BUCKET_STRAW = 4,
CRUSH_BUCKET_LINEAR = 5
};
extern const char *crush_bucket_alg_name(int alg);
......
__u32 *straws; /* 16-bit fixed point */
};
struct crush_bucket_linear {
struct crush_bucket h;
__u32 item_weight; /* 16-bit fixed point; all items equally weighted */
};
/*
* CRUSH map includes all buckets, rules, etc.
......
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket_linear(struct crush_bucket_linear *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy_rule(struct crush_rule *r);
extern void crush_destroy(struct crush_map *map);
src/crush/grammar.h
bucket_alg = str_p("alg") >> ( str_p("uniform") |
str_p("list") |
str_p("tree") |
str_p("straw") );
str_p("straw")|
str_p("linear"));
bucket_hash = str_p("hash") >> ( integer |
str_p("rjenkins1") );
bucket_item = str_p("item") >> name
src/crush/mapper.c
return bucket->h.items[high];
}
static int bucket_linear_choose(struct crush_bucket_linear *bucket,
int x, int r)
{
unsigned int item = x%bucket->h.size;
return bucket->h.items[item];
}
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
......
}
}
static int crush_bucket_choose_wrapper(struct crush_bucket *in, int x, int r, float balance_param)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
BUG_ON(in->size == 0);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
x, r);
case CRUSH_BUCKET_LIST:
return bucket_list_choose((struct crush_bucket_list *)in,
x, r);
case CRUSH_BUCKET_TREE:
return bucket_tree_choose((struct crush_bucket_tree *)in,
x, r);
case CRUSH_BUCKET_STRAW:
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
case CRUSH_BUCKET_LINEAR:
return bucket_linear_choose((struct crush_bucket_linear *)in,
x/balance_param, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
/*
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
......
return outpos;
}
static int crush_choose_firstn_wrapper(const struct crush_map *map,
struct crush_bucket *bucket,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
unsigned int local_fallback_retries,
int recurse_to_leaf,
int *out2, float balance_param)
{
int rep;
unsigned int ftotal, flocal;
int retry_descent, retry_bucket, skip_rep;
struct crush_bucket *in = bucket;
int r;
int i;
int item = 0;
int itemtype;
int collide, reject;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
do {
retry_descent = 0;
in = bucket; /* initial bucket */
/* choose through intervening buckets */
flocal = 0;
do {
collide = 0;
retry_bucket = 0;
r = rep;
/* r' = r + f_total */
r += ftotal;
/* bucket choose */
if (in->size == 0) {
reject = 1;
goto reject;
}
if (local_fallback_retries > 0 &&
flocal >= (in->size>>1) &&
flocal > local_fallback_retries)
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose_wrapper(in, x, r, balance_param);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
skip_rep = 1;
break;
}
/* desired type? */
if (item < 0)
itemtype = map->buckets[-1-item]->type;
else
itemtype = 0;
dprintk(" item %d type %d\n", item, itemtype);
/* keep going? */
if (itemtype != type) {
if (item >= 0 ||
(-1-item) >= map->max_buckets) {
dprintk(" bad item type %d\n", type);
skip_rep = 1;
break;
}
in = map->buckets[-1-item];
retry_bucket = 1;
continue;
}
/* collision? */
for (i = 0; i < outpos; i++) {
if (out[i] == item) {
collide = 1;
break;
}
}
reject = 0;
if (!collide && recurse_to_leaf) {
if (item < 0) {
if (crush_choose_firstn_wrapper(map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
out2, outpos,
recurse_tries, 0,
local_retries,
local_fallback_retries,
0,
NULL, balance_param) <= outpos)
/* didn't get leaf */
reject = 1;
} else {
/* we already have a leaf! */
out2[outpos] = item;
}
}
if (!reject) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
weight_max,
item, x);
else
reject = 0;
}
reject:
if (reject || collide) {
ftotal++;
flocal++;
if (collide && flocal <= local_retries)
/* retry locally a few times */
retry_bucket = 1;
else if (local_fallback_retries > 0 &&
flocal <= in->size + local_fallback_retries)
/* exhaustive bucket search */
retry_bucket = 1;
else if (ftotal < tries)
/* then retry descent */
retry_descent = 1;
else
/* else give up */
skip_rep = 1;
dprintk(" reject %d collide %d "
"ftotal %u flocal %u\n",
reject, collide, ftotal,
flocal);
}
} while (retry_bucket);
} while (retry_descent);
if (skip_rep) {
dprintk("skip rep\n");
continue;
}
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
if (map->choose_tries && ftotal <= map->choose_total_tries)
map->choose_tries[ftotal]++;
}
dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
/**
* crush_choose_indep: alternative breadth-first positionally stable mapping
......
return result_len;
}
int crush_do_rule_wrapper(const struct crush_map *map,
int ruleno, int x, int *result, int result_max,
const __u32 *weight, int weight_max,
int *scratch,
float balance_param)
{
int result_len;
int *a = scratch;
int *b = scratch + result_max;
int *c = scratch + result_max*2;
int recurse_to_leaf;
int *w;
int wsize = 0;
int *o;
int osize;
int *tmp;
struct crush_rule *rule;
__u32 step;
int i, j;
int numrep;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
*/
int choose_tries = map->choose_total_tries + 1;
int choose_leaf_tries = 0;
/*
* the local tries values were counted as "retries", though,
* and need no adjustment
*/
int choose_local_retries = map->choose_local_tries;
int choose_local_fallback_retries = map->choose_local_fallback_tries;
if ((__u32)ruleno >= map->max_rules) {
dprintk(" bad ruleno %d\n", ruleno);
return 0;
}
rule = map->rules[ruleno];
result_len = 0;
w = a;
o = b;
for (step = 0; step < rule->len; step++) {
int firstn = 0;
struct crush_rule_step *curstep = &rule->steps[step];
switch (curstep->op) {
case CRUSH_RULE_TAKE:
w[0] = curstep->arg1;
wsize = 1;
break;
case CRUSH_RULE_SET_CHOOSE_TRIES:
if (curstep->arg1 > 0)
choose_tries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
if (curstep->arg1 > 0)
choose_leaf_tries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
if (curstep->arg1 >= 0)
choose_local_retries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
if (curstep->arg1 >= 0)
choose_local_fallback_retries = curstep->arg1;
break;
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
case CRUSH_RULE_CHOOSE_FIRSTN:
firstn = 1;
/* fall through */
case CRUSH_RULE_CHOOSELEAF_INDEP:
case CRUSH_RULE_CHOOSE_INDEP:
if (wsize == 0)
break;
recurse_to_leaf =
curstep->op ==
CRUSH_RULE_CHOOSELEAF_FIRSTN ||
curstep->op ==
CRUSH_RULE_CHOOSELEAF_INDEP;
/* reset output */
osize = 0;
for (i = 0; i < wsize; i++) {
/*
* see CRUSH_N, CRUSH_N_MINUS macros.
* basically, numrep <= 0 means relative to
* the provided result_max
*/
numrep = curstep->arg1;
if (numrep <= 0) {
numrep += result_max;
if (numrep <= 0)
continue;
}
j = 0;
if (firstn) {
int recurse_tries;
if (choose_leaf_tries)
recurse_tries =
choose_leaf_tries;
else if (map->chooseleaf_descend_once)
recurse_tries = 1;
else
recurse_tries = choose_tries;
osize += crush_choose_firstn_wrapper(
map,
map->buckets[-1-w[i]],
weight, weight_max,
x, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
recurse_tries,
choose_local_retries,
choose_local_fallback_retries,
recurse_to_leaf,
c+osize,
balance_param);
} else {
crush_choose_indep(
map,
map->buckets[-1-w[i]],
weight, weight_max,
x, numrep, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
choose_leaf_tries ?
choose_leaf_tries : 1,
recurse_to_leaf,
c+osize,
0);
osize += numrep;
}
}
if (recurse_to_leaf)
/* copy final _leaf_ values to output set */
memcpy(o, c, osize*sizeof(*o));
/* swap o and w arrays */
tmp = o;
o = w;
w = tmp;
wsize = osize;
break;
case CRUSH_RULE_EMIT:
for (i = 0; i < wsize && result_len < result_max; i++) {
result[result_len] = w[i];
result_len++;
}
wsize = 0;
break;
default:
dprintk(" unknown op %d at step %d\n",
curstep->op, step);
break;
}
}
return result_len;
}
src/crush/mapper.h
int x, int *result, int result_max,
const __u32 *weights, int weight_max,
int *scratch);
extern int crush_do_rule_wrapper(const struct crush_map *map,
int ruleno,
int x, int *result, int result_max,
const __u32 *weights, int weight_max,
int *scratch,
float balance_param);
#endif
src/mon/OSDMonitor.cc
<< ").osd e" << osdmap.get_epoch() << " ";
}
float cal_average(vector<int> dataset, int length) {
float avg = 0;
float sum = 0;
for(int i = 0; i < length; i++){
sum += dataset[i];
}
avg = sum/length;
return avg;
}
float cal_stdev(vector<int> dataset, int length) {
float avg = cal_average(dataset, length);
float sum = 0;
float stdev;
for(int i = 0; i < length; i++){
sum += pow(dataset[i] - avg, 2);
}
stdev = pow(sum/length, 0.5);
return stdev;
}
bool OSDMonitor::_have_pending_crush()
{
return pending_inc.crush.length();
......
pg_pool_t::TYPE_REPLICATED, 0, ss);
}
void OSDMonitor::prepare_adaptive_balance_param(pg_pool_t *pi, int64_t pool)
{
float min_stdev = 999999;
float balance_param = 1;
for (int k = 1; k < 5; k++) {
map<int, int> osd_total_pgs;
pi->set_balance_param((float)k);
for (int i = 0; i < (int)pi->get_pg_num(); i++) {
pg_t pg(i, pool);
pg.set_pool(pool);
vector<int> acting;
int nrep = osdmap.pg_to_up_osds_adaptive(*pi, pg, acting);
if (nrep) {
for (int j = 0; j < nrep; j++) {
osd_total_pgs[acting[j]]++;
}
}
}
vector<int> pg_num;
for (map<int, int>::iterator ptr = osd_total_pgs.begin();
ptr != osd_total_pgs.end();
++ptr) {
pg_num.push_back(ptr->second);
}
float stdev = cal_stdev(pg_num, pg_num.size());
if (stdev < min_stdev) {
min_stdev = stdev;
balance_param = k;
}
}
pi->set_balance_param(balance_param);
}
int OSDMonitor::crush_ruleset_create_erasure(const string &name,
const string &profile,
int *ruleset,
......
g_conf->osd_pool_default_cache_target_full_ratio * 1000000;
pi->cache_min_flush_age = g_conf->osd_pool_default_cache_min_flush_age;
pi->cache_min_evict_age = g_conf->osd_pool_default_cache_min_evict_age;
prepare_adaptive_balance_param(pi, pool);
pending_inc.new_pool_names[pool] = name;
return 0;
}
src/mon/OSDMonitor.h
bool prepare_pool_op (MPoolOp *m);
bool prepare_pool_op_create (MPoolOp *m);
bool prepare_pool_op_delete(MPoolOp *m);
void prepare_adaptive_balance_param(pg_pool_t *pi, int64_t pool);
int crush_ruleset_create_erasure(const string &name,
const string &profile,
int *ruleset,
src/osd/OSDMap.cc
ps_t *ppps) const
{
// map to osds[]
ps_t pps = pool.raw_pg_to_pps(pg); // placement ps
ps_t pps = pool.raw_pg_to_congruential_pps(pg); // placement ps
unsigned size = pool.get_size();
// what crush rule?
int ruleno = crush->find_rule(pool.get_crush_ruleset(), pool.get_type(), size);
if (ruleno >= 0)
crush->do_rule(ruleno, pps, *osds, size, osd_weight);
crush->do_rule(ruleno, pps, *osds, size, osd_weight, pool.get_balance_param());
_remove_nonexistent_osds(pool, *osds);
......
_raw_to_up_osds(*pool, raw, up, primary);
_apply_primary_affinity(pps, *pool, up, primary);
}
int OSDMap::pg_to_up_osds_adaptive(const pg_pool_t& pool, pg_t pg, vector<int>& up) const
{
int primary;
vector<int> raw;
_pg_to_osds(pool, pg, &raw, &primary, NULL);
_raw_to_up_osds(pool, raw, &up, &primary);
return up.size();
}
void OSDMap::_pg_to_up_acting_osds(const pg_t& pg, vector<int> *up, int *up_primary,
vector<int> *acting, int *acting_primary) const
src/osd/OSDMap.h
int r = pg_to_acting_osds(pg, &acting, &primary);
return r;
}
int pg_to_up_osds_adaptive(const pg_pool_t& pool, pg_t pg, vector<int>& up) const;
/**
* This does not apply temp overrides and should not be used
* by anybody for data mapping purposes. Specify both pointers.
src/osd/osd_types.cc
f->dump_int("min_size", get_min_size());
f->dump_int("crush_ruleset", get_crush_ruleset());
f->dump_int("object_hash", get_object_hash());
f->dump_float("balance_param", get_balance_param());
f->dump_int("pg_num", get_pg_num());
f->dump_int("pg_placement_num", get_pgp_num());
f->dump_unsigned("crash_replay_interval", get_crash_replay_interval());
......
}
}
ps_t pg_pool_t::raw_pg_to_congruential_pps(pg_t pg) const
{
if (flags & FLAG_HASHPSPOOL) {
// Shuffles the original pgid sequence, with poolid being the seed
ps_t stable = ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask);
ps_t pps = (1033*stable + 2*pg.pool() + 1) % pgp_num_mask + pg.pool();
return pps;
} else {
// Legacy behavior; add ps and pool together. This is not a great
// idea because the PGs from each pool will essentially overlap on
// top of each other: 0.5 == 1.4 == 2.3 == ...
return
ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
pg.pool();
}
}
uint32_t pg_pool_t::get_random_pg_position(pg_t pg, uint32_t seed) const
{
uint32_t r = crush_hash32_2(CRUSH_HASH_RJENKINS1, seed, 123);
......
::encode(size, bl);
::encode(crush_ruleset, bl);
::encode(object_hash, bl);
::encode(balance_param, bl);
::encode(pg_num, bl);
::encode(pgp_num, bl);
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
......
::encode(size, bl);
::encode(crush_ruleset, bl);
::encode(object_hash, bl);
::encode(balance_param, bl);
::encode(pg_num, bl);
::encode(pgp_num, bl);
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
......
::encode(size, bl);
::encode(crush_ruleset, bl);
::encode(object_hash, bl);
::encode(balance_param, bl);
::encode(pg_num, bl);
::encode(pgp_num, bl);
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
......
::encode(size, bl);
::encode(crush_ruleset, bl);
::encode(object_hash, bl);
::encode(balance_param, bl);
::encode(pg_num, bl);
::encode(pgp_num, bl);
__u32 lpg_num = 0, lpgp_num = 0; // tell old code that there are no localized pgs.
......
::decode(size, bl);
::decode(crush_ruleset, bl);
::decode(object_hash, bl);
::decode(balance_param, bl);
::decode(pg_num, bl);
::decode(pgp_num, bl);
{
......
a.size = 2;
a.crush_ruleset = 3;
a.object_hash = 4;
a.balance_param = 1.0;
a.pg_num = 6;
a.pgp_num = 5;
a.last_change = 9;
......
<< " min_size " << p.get_min_size()
<< " crush_ruleset " << p.get_crush_ruleset()
<< " object_hash " << p.get_object_hash_name()
<< " balance_param " << p.get_balance_param()
<< " pg_num " << p.get_pg_num()
<< " pgp_num " << p.get_pgp_num()
<< " last_change " << p.get_last_change();
src/osd/osd_types.h
__u8 size, min_size; ///< number of osds in each pg
__u8 crush_ruleset; ///< crush placement rule set
__u8 object_hash; ///< hash mapping object name to ps
float balance_param;
private:
__u32 pg_num, pgp_num; ///< number of pgs
......
pg_pool_t()
: flags(0), type(0), size(0), min_size(0),
crush_ruleset(0), object_hash(0),
crush_ruleset(0), object_hash(0), balance_param(1),
pg_num(0), pgp_num(0),
last_change(0),
last_force_op_resend(0),
......
unsigned get_min_size() const { return min_size; }
int get_crush_ruleset() const { return crush_ruleset; }
int get_object_hash() const { return object_hash; }
float get_balance_param() const { return balance_param; }
const char *get_object_hash_name() const {
return ceph_str_hash_name(get_object_hash());
}
......
unsigned get_pg_num_mask() const { return pg_num_mask; }
unsigned get_pgp_num_mask() const { return pgp_num_mask; }
void set_balance_param(float b) { balance_param = b; }
// if pg_num is not a multiple of two, pgs are not equally sized.
// return, for a given pg, the fraction (denominator) of the total
// pool size that it represents.
......
* seeds.
*/
ps_t raw_pg_to_pps(pg_t pg) const;
ps_t raw_pg_to_congruential_pps(pg_t pg) const;
/// choose a random hash position within a pg
uint32_t get_random_pg_position(pg_t pgid, uint32_t seed) const;
src/tools/crushtool.cc
cout << " specify output for for (de)compilation\n";
cout << " --build --num_osds N layer1 ...\n";
cout << " build a new map, where each 'layer' is\n";
cout << " 'name (uniform|straw|list|tree) size'\n";
cout << " 'name (uniform|straw|list|tree|linear) size'\n";
cout << " -i mapfn --test test a range of inputs on the map\n";
cout << " [--min-x x] [--max-x x] [--x x]\n";
cout << " [--min-rule r] [--max-rule r] [--rule r]\n";
......
{ "list", CRUSH_BUCKET_LIST },
{ "straw", CRUSH_BUCKET_STRAW },
{ "tree", CRUSH_BUCKET_TREE },
{ "linear", CRUSH_BUCKET_LINEAR },
{ 0, 0 },
};
(1-1/3)