Actions
Bug #20970
closedbug in funciton reweight_by_utilization
% Done:
0%
Source:
Tags:
Backport:
luminous
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Component(RADOS):
Monitor
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
There is one bug in function OSDMonitor::reweight_by_utilization
for (ceph::unordered_map<pg_t,pg_stat_t>::const_iterator p =
pgm.pg_stat.begin();
p != pgm.pg_stat.end();
++p) {
if (pools && pools->count(p->first.pool()) == 0)
continue;
for (vector<int>::const_iterator q = p->second.acting.begin();
q != p->second.acting.end();
++q) {
if (*q >= (int)pgs_by_osd.size())
pgs_by_osd.resize(*q);
if (pgs_by_osd[*q] == 0) {
if (osdmap.crush->get_item_weightf(*q) <= 0) {
//skip if we currently can not identify item
continue;
}
weight_sum += osdmap.crush->get_item_weightf(*q);
++num_osds;
}
if the acting set contain item CRUSH_ITEM_NONE, *q will be 0x7fffffff.
pgs_by_osd.resize(*q) will take a large number of memory
tcmalloc: large alloc 8589934592 bytes == (nil) @ 0x7f5cc3a0b36c 0x7f5cc3a2abd8 0x7f5cc4c87eb7 0x7f5cc4ca8165 0x7f5cc4ca8974 0x7f5cc4ca9024 0x7f5cc4c5b428 0x7f5cc4c16c3d 0x7f5cc4c1a2cb 0x7f5cc4c1b479 0x7f5cc4c1cd35 0x7f5cc4c1ab22 0x7f5cc4c1b479 0x7f5cc4c41013 0x7f5cc4fbe502 0x7f5cc4e72dcd 0x7f5cc4ef8a25 0x7f5cc2189dc5 0x7f5cc169821d (nil)terminate called after throwing an instance of 'std::bad_alloc'
what(): std::bad_alloc
- Caught signal (Aborted)
in thread 7f5cb5c25700 thread_name:ms_dispatch
ceph version 11.2.0 (f223e27eeb35991352ebc1f67423d4ebc252adb7)
1: (()+0x6fe81a) [0x7f5cc500181a]
2: (()+0xf100) [0x7f5cc2191100]
3: (gsignal()+0x37) [0x7f5cc15d75f7]
4: (abort()+0x148) [0x7f5cc15d8ce8]
5: (_gnu_cxx::_verbose_terminate_handler()+0x165) [0x7f5cc1edb9b5]
6: (()+0x5e926) [0x7f5cc1ed9926]
7: (()+0x5e953) [0x7f5cc1ed9953]
8: (()+0x5eb73) [0x7f5cc1ed9b73]
9: (()+0x18437) [0x7f5cc3a0b437]
10: (tc_new()+0x18) [0x7f5cc3a2abd8]
11: (OSDMonitor::reweight_by_utilization(int, double, int, bool, std::set<long, std::less<long>, std::allocator<long> > const*, bool, bool, std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >, std::string, ceph::Formatter*)+0x4f7) [0x7f5cc4c87eb7]
12: (OSDMonitor::prepare_command_impl(std::shared_ptr<MonOpRequest>, std::map<std::string, boost::variant<std::string<bool, long, double, std::vector<std::string, std::allocator<std::string> >, std::vector<long, std::allocator<long> > > >, std::less<std::string>, std::allocator<std::pair<std::string const, std::string<bool, long, double, std::vector<std::string, std::allocator<std::string> >, std::vector<long, std::allocator<long> > > > > >&)+0x12d95) [0x7f5cc4ca8165]
13: (OSDMonitor::prepare_command(std::shared_ptr<MonOpRequest>)+0x414) [0x7f5cc4ca8974]
14: (OSDMonitor::prepare_update(std::shared_ptr<MonOpRequest>)+0x394) [0x7f5cc4ca9024]
15: (PaxosService::dispatch(std::shared_ptr<MonOpRequest>)+0xe38) [0x7f5cc4c5b428]
16: (Monitor::handle_command(std::shared_ptr<MonOpRequest>)+0x1d2d) [0x7f5cc4c16c3d]
17: (Monitor::dispatch_op(std::shared_ptr<MonOpRequest>)+0x31b) [0x7f5cc4c1a2cb]
18: (Monitor::_ms_dispatch(Message*)+0x519) [0x7f5cc4c1b479]
19: (Monitor::handle_forward(std::shared_ptr<MonOpRequest>)+0xb65) [0x7f5cc4c1cd35]
20: (Monitor::dispatch_op(std::shared_ptr<MonOpRequest>)+0xb72) [0x7f5cc4c1ab22]
21: (Monitor::_ms_dispatch(Message*)+0x519) [0x7f5cc4c1b479]
22: (Monitor::ms_dispatch(Message*)+0x23) [0x7f5cc4c41013]
23: (DispatchQueue::entry()+0x6b2) [0x7f5cc4fbe502]
24: (DispatchQueue::DispatchThread::entry()+0xd) [0x7f5cc4e72dcd]
25: (Thread::entry_wrapper()+0x75) [0x7f5cc4ef8a25]
26: (()+0x7dc5) [0x7f5cc2189dc5]
27: (clone()+0x6d) [0x7f5cc169821d]
2017-08-10 14:53:39.286961 7f5cb5c25700 -1 Caught signal (Aborted) *
in thread 7f5cb5c25700 thread_name:ms_dispatch
ceph version 11.2.0 (f223e27eeb35991352ebc1f67423d4ebc252adb7)
1: (()+0x6fe81a) [0x7f5cc500181a]
2: (()+0xf100) [0x7f5cc2191100]
3: (gsignal()+0x37) [0x7f5cc15d75f7]
4: (abort()+0x148) [0x7f5cc15d8ce8]
5: (_gnu_cxx::_verbose_terminate_handler()+0x165) [0x7f5cc1edb9b5]
6: (()+0x5e926) [0x7f5cc1ed9926]
7: (()+0x5e953) [0x7f5cc1ed9953]
8: (()+0x5eb73) [0x7f5cc1ed9b73]
9: (()+0x18437) [0x7f5cc3a0b437]
10: (tc_new()+0x18) [0x7f5cc3a2abd8]
11: (OSDMonitor::reweight_by_utilization(int, double, int, bool, std::set<long, std::less<long>, std::allocator<long> > const*, bool, bool, std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >, std::string, ceph::Formatter*)+0x4f7) [0x7f5cc4c87eb7]
12: (OSDMonitor::prepare_command_impl(std::shared_ptr<MonOpRequest>, std::map<std::string, boost::variant<std::string<bool, long, double, std::vector<std::string, std::allocator<std::string> >, std::vector<long, std::allocator<long> > > >, std::less<std::string>, std::allocator<std::pair<std::string const, std::string<bool, long, double, std::vector<std::string, std::allocator<std::string> >, std::vector<long, std::allocator<long> > > > > >&)+0x12d95) [0x7f5cc4ca8165]
13: (OSDMonitor::prepare_command(std::shared_ptr<MonOpRequest>)+0x414) [0x7f5cc4ca8974]
14: (OSDMonitor::prepare_update(std::shared_ptr<MonOpRequest>)+0x394) [0x7f5cc4ca9024]
15: (PaxosService::dispatch(std::shared_ptr<MonOpRequest>)+0xe38) [0x7f5cc4c5b428]
16: (Monitor::handle_command(std::shared_ptr<MonOpRequest>)+0x1d2d) [0x7f5cc4c16c3d]
17: (Monitor::dispatch_op(std::shared_ptr<MonOpRequest>)+0x31b) [0x7f5cc4c1a2cb]
18: (Monitor::_ms_dispatch(Message*)+0x519) [0x7f5cc4c1b479]
19: (Monitor::handle_forward(std::shared_ptr<MonOpRequest>)+0xb65) [0x7f5cc4c1cd35]
20: (Monitor::dispatch_op(std::shared_ptr<MonOpRequest>)+0xb72) [0x7f5cc4c1ab22]
21: (Monitor::_ms_dispatch(Message*)+0x519) [0x7f5cc4c1b479]
22: (Monitor::ms_dispatch(Message*)+0x23) [0x7f5cc4c41013]
23: (DispatchQueue::entry()+0x6b2) [0x7f5cc4fbe502]
24: (DispatchQueue::DispatchThread::entry()+0xd) [0x7f5cc4e72dcd]
25: (Thread::entry_wrapper()+0x75) [0x7f5cc4ef8a25]
26: (()+0x7dc5) [0x7f5cc2189dc5]
27: (clone()+0x6d) [0x7f5cc169821d]
Updated by Kefu Chai over 6 years ago
- Status changed from New to Fix Under Review
- Assignee set to xie xingguo
- Backport set to luminous
Updated by xie xingguo over 6 years ago
- Status changed from Fix Under Review to Pending Backport
Updated by Abhishek Lekshmanan over 6 years ago
- Copied to Backport #21079: bug in funciton reweight_by_utilization added
Updated by Kefu Chai over 6 years ago
- Status changed from Pending Backport to Resolved
Actions