Bug #54558
Updated by Dan Mick about 2 years ago
When curl from cli, an HTTP request containing malformed json data, for creating user and defining capabilities, caused every ceph-mon service to receive abort signal and to get stuck in restart loop. The malformed request looks like - <pre> curl -k -H "Authorization: Basic $TOKEN" "https://juju-3b3d82-10-lxd-0:8003/request" -X POST -d '{"prefix":"auth add","entity":"client.testuser02","caps":"mon '\''allow r'\'' osd '\''allow rw pool=testpool01'\''"}' </pre> The request status shows it is still in the queue. <pre> [ { "failed": [], "finished": [], "has_failed": false, "id": "140576245092648", "is_finished": false, "is_waiting": false, "running": [ { "command": "auth add entity=client.testuser02 caps=mon 'allow r' osd 'allow rw pool=testpool01'", "outb": "", "outs": "" } ], "state": "pending", "waiting": [] } ] </pre> But this works fine, (using list type in the caps dict value) <pre> curl -k -H "Authorization: Basic $TOKEN" "https://juju-3b3d82-10-lxd-0:8003/request" -X POST -d '{"prefix":"auth add","entity":"client.testuser02","caps":["mon", "allow r", "osd", "allow rw pool=testpool01"]}' </pre> Ceph API should be resilient to bad formatting in HTTP requests. As of now, when ceph aborts the thread, even ceph -s hangs for a while. ceph keeps trying to fulfill the request and fails until the request is manually removed. rbd uploads time out. The mon logs show, <pre> -1> 2022-03-14T11:01:37.789+0000 7fbe63d09700 0 mon.juju-8c5f4a-sts-stein-bionic-0@0(leader) e3 handle_command mon_command({"prefix": "auth add", "entity": "client.testuser02", "caps": "mon 'allow r' osd 'allow rw pool=testpool01'"} v 0) v1 0> 2022-03-14T11:01:37.797+0000 7fbe63d09700 -1 *** Caught signal (Aborted) ** in thread 7fbe63d09700 thread_name:ms_dispatch ceph version 15.2.14 (cd3bb7e87a2f62c1b862ff3fd8b1eec13391a5be) octopus (stable) 1: (()+0x12980) [0x7fbe6ec02980] 2: (gsignal()+0xc7) [0x7fbe6e83de87] 3: (abort()+0x141) [0x7fbe6e83f7f1] 4: (()+0x8c957) [0x7fbe6f451957] 5: (()+0x92ae6) [0x7fbe6f457ae6] 6: (()+0x92b21) [0x7fbe6f457b21] 7: (()+0x92d54) [0x7fbe6f457d54] 8: (bool ceph::common::cmd_getval<std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >(std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, double, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<long, std::allocator<long> >, std::vector<double, std::allocator<double> > >, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, double, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<long, std::allocator<long> >, std::vector<double, std::allocator<double> > > > > > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >&)+0x108) [0x561b5882d218] 9: (Monitor::_generate_command_map(std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, double, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<long, std::allocator<long> >, std::vector<double, std::allocator<double> > >, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, double, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<long, std::allocator<long> >, std::vector<double, std::allocator<double> > > > > >&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >&)+0x10d) [0x561b587d4add] 10: (Monitor::handle_command(boost::intrusive_ptr<MonOpRequest>)+0x11fe) [0x561b58807cee] 11: (Monitor::dispatch_op(boost::intrusive_ptr<MonOpRequest>)+0xa5a) [0x561b5880e0ea] 12: (Monitor::_ms_dispatch(Message*)+0x51a) [0x561b5880f2ca] 13: (Dispatcher::ms_dispatch2(boost::intrusive_ptr<Message> const&)+0x58) [0x561b5883dab8] 14: (DispatchQueue::entry()+0x11c2) [0x7fbe70ac6d62] 15: (DispatchQueue::DispatchThread::entry()+0xd) [0x7fbe70b6625d] 16: (()+0x76db) [0x7fbe6ebf76db] 17: (clone()+0x3f) [0x7fbe6e92061f] NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. --- logging levels --- 0/ 5 none 0/ 1 lockdep 0/ 1 context 1/ 1 crush 1/ 5 mds 1/ 5 mds_balancer 1/ 5 mds_locker 1/ 5 mds_log 1/ 5 mds_log_expire 1/ 5 mds_migrator 0/ 1 buffer 0/ 1 timer 0/ 1 filer 0/ 1 striper 0/ 1 objecter 0/ 5 rados 0/ 5 rbd 0/ 5 rbd_mirror 0/ 5 rbd_replay 0/ 5 rbd_rwl 0/ 5 journaler 0/ 5 objectcacher 0/ 5 immutable_obj_cache 0/ 5 client 1/ 5 osd 0/ 5 optracker 0/ 5 objclass 1/ 3 filestore 1/ 3 journal 0/ 0 ms 1/ 5 mon 0/10 monc 1/ 5 paxos 0/ 5 tp 1/ 5 auth 1/ 5 crypto 1/ 1 finisher 1/ 1 reserver 1/ 5 heartbeatmap 1/ 5 perfcounter 1/ 5 rgw 1/ 5 rgw_sync 1/10 civetweb 1/ 5 javaclient 1/ 5 asok 1/ 1 throttle 0/ 0 refs 1/ 5 compressor 1/ 5 bluestore 1/ 5 bluefs 1/ 3 bdev 1/ 5 kstore 4/ 5 rocksdb 4/ 5 leveldb 4/ 5 memdb 1/ 5 fuse 1/ 5 mgr 1/ 5 mgrc 1/ 5 dpdk 1/ 5 eventtrace 1/ 5 prioritycache 0/ 5 test -2/-2 (syslog threshold) -1/-1 (stderr threshold) --- pthread ID / name mapping for recent threads --- 7fbe60502700 / ms_dispatch 7fbe61504700 / rocksdb:dump_st 7fbe62506700 / fn_monstore 7fbe63d09700 / ms_dispatch 7fbe6650e700 / safe_timer 7fbe69d15700 / rocksdb:low0 7fbe6b55e700 / admin_socket 7fbe79673540 / ceph-mon max_recent 10000 max_new 1000 log_file /var/log/ceph/ceph-mon.juju-8c5f4a-sts-stein-bionic-0.log --- end dump of recent events --- </pre> This is the code that leads to the issue, in cmdparse.cc, # git branch * (HEAD detached at v15.2.14) <pre> bool cmd_getval(const cmdmap_t& cmdmap, const std::string& k, bool& val) { /* * Specialized getval for booleans. CephBool didn't exist before Nautilus, * so earlier clients are sent a CephChoices argdesc instead, and will * send us a "--foo-bar" value string for boolean arguments. */ if (cmdmap.count(k)) { try { val = boost::get<bool>(cmdmap.find(k)->second); return true; } catch (boost::bad_get&) { try { std::string expected = "--" + k; std::replace(expected.begin(), expected.end(), '_', '-'); std::string v_str = boost::get<std::string>(cmdmap.find(k)->second); if (v_str == expected) { val = true; return true; } else { throw bad_cmd_get(k, cmdmap); } } catch (boost::bad_get&) { throw bad_cmd_get(k, cmdmap); } } } return false; } } </pre>