1
|
|
2
|
ceph 12.2.7
|
3
|
|
4
|
1. ceph-mon MON118 have connection with state STATE_CONNECTING_WAIT_BANNER_AND_IDENTIFY;
|
5
|
2. ceph-mon MON119 try to connect MON118
|
6
|
3. ceph-mon MON118 reply MON119 CEPH_MSGR_TAG_WAIT
|
7
|
stuck on 2 3.
|
8
|
|
9
|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
10
|
MON118
|
11
|
10.252.253.118
|
12
|
|
13
|
|
14
|
2018-11-29 16:18:18.071753 7f1c7f921700 10 mon.MON118@0(electing).paxosservice(health 6091..6337) restart
|
15
|
2018-11-29 16:18:18.071897 7f1c7f921700 1 -- 10.252.253.118:6789/0 _send_message--> mon.1 10.252.253.119:6789/0 -- election(bd03b9ee-3273-4a4f-91e0-7ce427fcb388 propose 50777) v7 -- ?+0 0x565167714f00
|
16
|
2018-11-29 16:18:18.071903 7f1c7f921700 1 -- 10.252.253.118:6789/0 --> 10.252.253.119:6789/0 -- election(bd03b9ee-3273-4a4f-91e0-7ce427fcb388 propose 50777) v7 -- 0x565167714f00 con 0
|
17
|
2018-11-29 16:18:18.071913 7f1c7f921700 1 -- 10.252.253.118:6789/0 _send_message--> mon.2 10.252.253.130:6789/0 -- election(bd03b9ee-3273-4a4f-91e0-7ce427fcb388 propose 50777) v7 -- ?+0 0x565159790900
|
18
|
2018-11-29 16:18:18.071914 7f1c7c11a700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565156585800 :-1 s=STATE_CONNECTING_WAIT_BANNER_AND_IDENTIFY pgs=47057 cs=14 l=0).handle_write
|
19
|
2018-11-29 16:18:18.071919 7f1c7f921700 1 -- 10.252.253.118:6789/0 --> 10.252.253.130:6789/0 -- election(bd03b9ee-3273-4a4f-91e0-7ce427fcb388 propose 50777) v7 -- 0x565159790900 con 0
|
20
|
2018-11-29 16:18:18.071921 7f1c7c11a700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565156585800 :-1 s=l pgs=47057 cs=14 l=0)._try_send sent bytes 0 remaining bytes 0
|
21
|
2018-11-29 16:18:18.071927 7f1c7f921700 10 -- 10.252.253.118:6789/0 dispatch_throttle_release 533 to dispatch throttler 533/536870912
|
22
|
2018-11-29 16:18:18.071928 7f1c7c91b700 10 -- 10.252.253.118:6789/0 >> 10.252.253.130:6789/0 conn(0x565156587000 :-1 s=STATE_OPEN pgs=42481 cs=17 l=0).handle_write
|
23
|
2018-11-29 16:18:18.071934 7f1c7f921700 1 -- 10.252.253.118:6789/0 <== mon.0 10.252.253.118:6789/0 0 ==== log(last 50151) v1 ==== 0+0+0 (0 0 0) 0x5651612a72c0 con 0x56515687e000
|
24
|
2018-11-29 16:18:18.071940 7f1c7f921700 20 mon.MON118@0(electing) e3 _ms_dispatch existing session 0x565155fbd980 for mon.0 10.252.253.118:6789/0
|
25
|
2018-11-29 16:18:18.071943 7f1c7f921700 20 mon.MON118@0(electing) e3 caps allow *
|
26
|
2018-11-29 16:18:18.071944 7f1c7f921700 20 is_capable service=mon command= read on cap allow *
|
27
|
|
28
|
|
29
|
2018-11-29 16:18:24.313588 7f1c7b919700 10 Processor -- accept accepted incoming on sd 10
|
30
|
2018-11-29 16:18:24.313593 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> - conn(0x565161da6000 :-1 s=STATE_NONE pgs=0 cs=0 l=0).accept sd=10
|
31
|
2018-11-29 16:18:24.313612 7f1c7b919700 1 -- 10.252.253.118:6789/0 >> - conn(0x565161da6000 :6789 s=STATE_ACCEPTING pgs=0 cs=0 l=0)._process_connection sd=10 -
|
32
|
2018-11-29 16:18:24.313623 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> - conn(0x565161da6000 :6789 s=STATE_ACCEPTING pgs=0 cs=0 l=0)._try_send sent bytes 281 remaining bytes 0
|
33
|
2018-11-29 16:18:24.313626 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> - conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_BANNER_ADDR pgs=0 cs=0 l=0)._process_connection write banner and addr done: -
|
34
|
2018-11-29 16:18:24.313725 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> - conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_BANNER_ADDR pgs=0 cs=0 l=0)._process_connection accept peer addr is 10.252.253.119:6789/0
|
35
|
2018-11-29 16:18:24.313764 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0)._process_connection accept of host_type 1, policy.lossy=0 policy.server=0 policy.standby=1 policy.resetcheck=1
|
36
|
2018-11-29 16:18:24.313773 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0).handle_connect_msg accept my proto 13, their proto 13
|
37
|
2018-11-29 16:18:24.313776 7f1c7b919700 10 mon.MON118@0(leader) e3 ms_verify_authorizer 10.252.253.119:6789/0 mon protocol 2
|
38
|
2018-11-29 16:18:24.313826 7f1c7b919700 0 mon.MON118@0(leader) e3 ms_verify_authorizer bad authorizer from mon 10.252.253.119:6789/0
|
39
|
2018-11-29 16:18:24.313832 7f1c7b919700 0 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0).handle_connect_msg: challenging authorizer
|
40
|
2018-11-29 16:18:24.313838 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0)._try_send sent bytes 58 remaining bytes 0
|
41
|
2018-11-29 16:18:24.313929 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0)._process_connection accept of host_type 1, policy.lossy=0 policy.server=0 policy.standby=1 policy.resetcheck=1
|
42
|
2018-11-29 16:18:24.313933 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0).handle_connect_msg accept my proto 13, their proto 13
|
43
|
2018-11-29 16:18:24.313935 7f1c7b919700 10 mon.MON118@0(leader) e3 ms_verify_authorizer 10.252.253.119:6789/0 mon protocol 2
|
44
|
2018-11-29 16:18:24.313952 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0).handle_connect_msg accept setting up session_security.
|
45
|
2018-11-29 16:18:24.313957 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0).handle_connect_msg accept existing 0x565156585800.gseq 47057 <= 62458, looks ok
|
46
|
2018-11-29 16:18:24.313959 7f1c7b919700 0 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0).handle_connect_msg accept connect_seq 14 vs existing csq=14 existing_state=STATE_CONNECTING_WAIT_BANNER_AND_IDENTIFY
|
47
|
2018-11-29 16:18:24.313961 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0).handle_connect_msg accept connection race, existing 0x565156585800.cseq 14 == 14, sending WAIT
|
48
|
2018-11-29 16:18:24.313968 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=0 cs=0 l=0)._try_send sent bytes 62 remaining bytes 0
|
49
|
2018-11-29 16:18:24.314020 7f1c7b919700 1 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG pgs=0 cs=0 l=0).read_bulk peer close file descriptor 10
|
50
|
2018-11-29 16:18:24.314023 7f1c7b919700 1 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG pgs=0 cs=0 l=0).read_until read failed
|
51
|
2018-11-29 16:18:24.314026 7f1c7b919700 1 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG pgs=0 cs=0 l=0)._process_connection read connect msg failed
|
52
|
2018-11-29 16:18:24.314033 7f1c7b919700 10 -- 10.252.253.118:6789/0 >> 10.252.253.119:6789/0 conn(0x565161da6000 :6789 s=STATE_ACCEPTING_WAIT_CONNECT_MSG pgs=0 cs=0 l=0).fault with nothing to send, going to standby
|
53
|
|
54
|
|
55
|
|
56
|
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
57
|
MON119
|
58
|
10.252.253.119
|
59
|
|
60
|
2018-11-29 13:51:39.485325 7f4a114af700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_OPEN pgs=1799 cs=1 l=0).fault initiating reconnect
|
61
|
2018-11-29 13:51:45.514862 7f4a10cae700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=1799 cs=2 l=0).handle_connect_msg: challenging authorizer
|
62
|
2018-11-29 13:52:23.229339 7f4a10cae700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_OPEN pgs=41615 cs=3 l=0).fault initiating reconnect
|
63
|
2018-11-29 13:52:29.848002 7f4a11cb0700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=41615 cs=4 l=0).handle_connect_msg: challenging authorizer
|
64
|
2018-11-29 13:53:04.637339 7f4a11cb0700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_OPEN pgs=41673 cs=5 l=0).fault initiating reconnect
|
65
|
2018-11-29 13:53:36.776084 7f4a114af700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=41673 cs=6 l=0).handle_connect_msg: challenging authorizer
|
66
|
2018-11-29 13:53:59.869327 7f4a114af700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_OPEN pgs=41887 cs=7 l=0).fault initiating reconnect
|
67
|
2018-11-29 13:54:33.453320 7f4a114af700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_OPEN pgs=41930 cs=9 l=0).fault initiating reconnect
|
68
|
2018-11-29 13:54:49.272217 7f4a114af700 0 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH pgs=41930 cs=10 l=0).handle_connect_msg: challenging authorizer
|
69
|
....
|
70
|
|
71
|
2018-11-29 16:18:39.317237 7f4a114af700 10 NetHandler reconnect reconnect: Operation already in progress
|
72
|
2018-11-29 16:18:39.317250 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_RE pgs=43339 cs=14 l=0)._process_connection nonblock connect inprogress
|
73
|
2018-11-29 16:18:39.317458 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_RE pgs=43339 cs=14 l=0)._process_connection connect successfully, ready to send banner
|
74
|
2018-11-29 16:18:39.317472 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_RE pgs=43339 cs=14 l=0)._try_send sent bytes 9 remaining bytes 0
|
75
|
2018-11-29 16:18:39.317478 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_BANNER_AND_IDENTIFY pgs=43339 cs=14 l=0)._process_connection connect write banner done: 10.252.253.118:6789/0
|
76
|
2018-11-29 16:18:39.317485 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_BANNER_AND_IDENTIFY pgs=43339 cs=14 l=0).handle_write
|
77
|
2018-11-29 16:18:39.317491 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_BANNER_AND_IDENTIFY pgs=43339 cs=14 l=0)._try_send sent bytes 0 remaining bytes 0
|
78
|
2018-11-29 16:18:39.317601 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_BANNER_AND_IDENTIFY pgs=43339 cs=14 l=0)._try_send sent bytes 136 remaining bytes 0
|
79
|
2018-11-29 16:18:39.317607 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_SEND_CONNECT_MSG pgs=43339 cs=14 l=0)._process_connection connect sent my addr 10.252.253.119:6789/0
|
80
|
2018-11-29 16:18:39.317611 7f4a114af700 10 mon.MON119@1(electing) e3 ms_get_authorizer for mon
|
81
|
2018-11-29 16:18:39.317661 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_SEND_CONNECT_MSG pgs=43339 cs=14 l=0)._process_connection connect_msg.authorizer_len=158 protocol=2
|
82
|
2018-11-29 16:18:39.317666 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_SEND_CONNECT_MSG pgs=43339 cs=14 l=0)._process_connection connect sending gseq=62460 cseq=14 proto=13
|
83
|
2018-11-29 16:18:39.317674 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_SEND_CONNECT_MSG pgs=43339 cs=14 l=0)._try_send sent bytes 191 remaining bytes 0
|
84
|
2018-11-29 16:18:39.317680 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY pgs=43339 cs=14 l=0).handle_write
|
85
|
2018-11-29 16:18:39.317682 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY pgs=43339 cs=14 l=0)._try_send sent bytes 0 remaining bytes 0
|
86
|
2018-11-29 16:18:39.317806 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY_AUTH pgs=43339 cs=14 l=0)._process_connection reply.authorizer_len=32
|
87
|
2018-11-29 16:18:39.317812 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY_AUTH pgs=43339 cs=14 l=0)._process_connection connect got auth challenge
|
88
|
2018-11-29 16:18:39.317821 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_SEND_CONNECT_MSG pgs=43339 cs=14 l=0)._process_connection connect_msg.authorizer_len=158 protocol=2
|
89
|
2018-11-29 16:18:39.317825 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_SEND_CONNECT_MSG pgs=43339 cs=14 l=0)._process_connection connect sending gseq=62460 cseq=14 proto=13
|
90
|
2018-11-29 16:18:39.317832 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_SEND_CONNECT_MSG pgs=43339 cs=14 l=0)._try_send sent bytes 191 remaining bytes 0
|
91
|
2018-11-29 16:18:39.317837 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY pgs=43339 cs=14 l=0).handle_write
|
92
|
2018-11-29 16:18:39.317840 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY pgs=43339 cs=14 l=0)._try_send sent bytes 0 remaining bytes 0
|
93
|
2018-11-29 16:18:39.317934 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY_AUTH pgs=43339 cs=14 l=0)._process_connection reply.authorizer_len=36
|
94
|
2018-11-29 16:18:39.317944 7f4a114af700 1 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING_WAIT_CONNECT_REPLY_AUTH pgs=43339 cs=14 l=0).handle_connect_reply connect got WAIT (connection race)
|
95
|
2018-11-29 16:18:39.317947 7f4a114af700 1 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_WAIT pgs=43339 cs=14 l=0).process enter wait state, failing
|
96
|
2018-11-29 16:18:39.317958 7f4a114af700 10 -- 10.252.253.119:6789/0 >> 10.252.253.118:6789/0 conn(0x5603245ea000 :-1 s=STATE_CONNECTING pgs=43339 cs=14 l=0).fault waiting 15.000000
|
97
|
2018-11-29 16:18:39.527127 7f4a174bb700 11 mon.MON119@1(electing) e3 tick
|
98
|
2018-11-29 16:18:39.527144 7f4a174bb700 20 mon.MON119@1(electing) e3 sync_trim_providers
|
99
|
|
100
|
|
101
|
|
102
|
|
103
|
|
104
|
|
105
|
|
106
|
|
107
|
|
108
|
|
109
|
|
110
|
|
111
|
|