Backport #19070
Updated by Nathan Cutler about 7 years ago
https://github.com/ceph/ceph/pull/13605 1. ceph version ceph version 0.94.10 (b1e0532418e4631af01acbc0cedd426f1905f4af) 2.test scripts [root@ol7 src]# cat test/mon/mon-rebuild-store.sh <pre><code class="c"> #!/bin/bash source test/mon/mon-test-helpers.sh source test/osd/osd-test-helpers.sh fsid=$(uuidgen) function run() { local dir=$1 export CEPH_CONF=$dir/ceph.conf export CEPH_MON="127.0.0.1:7107" export CEPH_ARGS CEPH_ARGS+="--fsid=$fsid --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " test_mon_rebuild_store $dir || return 1 } function test_mon_rebuild_store() { local dir=$1 run_mon $dir a --public-addr $CEPH_MON || return 1 #write conf, which is needed in update_monitor cat <<EOF >> $CEPH_CONF [global] fsid = $fsid auth cluster required = none auth service required = none auth client required = none [mon.a] host = localhost mon data = $dir/a mon addr = $CEPH_MON EOF run_osd $dir 0 || return 1 run_osd $dir 1 || return 1 ./rados -p rbd bench 10 write --no-cleanup kill_daemons $dir local ms=$dir/mon-store mkdir $ms # collect the cluster map from OSDs for osd in {0..1}; do ./ceph-objectstore-tool --data-path $dir/$osd --journal-path $dir/$osd/journal --op update-mon-db --mon-store-path $ms || return 1 done ./ceph-monstore-tool $dir/mon-store rebuild || return 1 # backup corrupted store.db just in case mv $dir/a/store.db $dir/a/store.db.corrupted mv $dir/mon-store/store.db $dir/a/store.db #start mon ./ceph-mon \ --id a \ --mon-osd-full-ratio=.99 \ --mon-data-avail-crit=1 \ --paxos-propose-interval=0.1 \ --osd-crush-chooseleaf-type=0 \ --osd-pool-default-erasure-code-directory=.libs \ --debug-mon 20 \ --debug-ms 20 \ --debug-paxos 20 \ --chdir= \ --mon-data=$dir/a \ --log-file=$dir/a/log \ --mon-cluster-log-file=$dir/a/log \ --run-dir=$dir/a \ --pid-file=$dir/a/\$name.pid || return 1 } main mon_rebuild_store # Local Variables: # compile-command: "cd ../.. ; make -j4 && test/mon/mon-rebuild-store.sh" # End: </code></pre> 3.stack dump <pre><code class="c"> test_mon_rebuild_store: 49: ./ceph-monstore-tool testdir/mon_rebuild_store/mon-store rebuild terminate called after throwing an instance of 'std::logic_error' what(): basic_string::_S_construct null not valid *** Caught signal (Aborted) ** in thread 7fd433830780 ceph version 0.94.10 (b1e0532418e4631af01acbc0cedd426f1905f4af) 1: ./ceph-monstore-tool() [0x52c204] 2: (()+0xf100) [0x7fd432dac100] 3: (gsignal()+0x37) [0x7fd43144d5f7] 4: (abort()+0x148) [0x7fd43144ece8] 5: (__gnu_cxx::__verbose_terminate_handler()+0x165) [0x7fd431d4e9d5] 6: (()+0x5e946) [0x7fd431d4c946] 7: (()+0x5e973) [0x7fd431d4c973] 8: (()+0x5eb93) [0x7fd431d4cb93] 9: (std::__throw_logic_error(char const*)+0x77) [0x7fd431da1757] 10: (char* std::string::_S_construct<char const*>(char const*, char const*, std::allocator<char> const&, std::forward_iterator_tag)+0xa1) [0x7fd431dad5a1] 11: (std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(char const*, std::allocator<char> const&)+0x38) [0x7fd431dad958] 12: (rebuild_monstore(char const*, std::vector<std::string, std::allocator<std::string> >&, MonitorDBStore&)+0x848) [0x5102d8] 13: (main()+0x3e04) [0x50a614] 14: (__libc_start_main()+0xf5) [0x7fd431439b15] 15: ./ceph-monstore-tool() [0x50be39] 2017-02-24 10:13:54.268339 7fd433830780 -1 *** Caught signal (Aborted) ** in thread 7fd433830780 </code></pre> please see attach file "test-mon-rebuild-store2.log" to get detail info