Project

General

Profile

Bug #43063

ROCEv2 can not communication between machines in different VLAN

Added by yong xing 9 months ago.

Status:
New
Priority:
Normal
Assignee:
-
Category:
AsyncMessenger
Target version:
% Done:

0%

Source:
Community (user)
Tags:
Messengers
Backport:
v14.2.4
Regression:
No
Severity:
1 - critical
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature:

Description

I used the official released docker image: ceph/daemon:v4.0.6-stable-4.0-nautilus-centos-7-x86_64,
which is corresponding to source code version: nautilus 14.2.4 (commit: 75f4de193b3ea58512f204623e6c5a16e6c1e1ba)

I found that, servers in different VLANs can not communicate with ceph async+rdma messengers, but servers in the same VLANs do.
The definitely reason is that, the action of executable binary compiled from source code depends on the compiling environment.

Code in ceph/src/msg/async/rdma/Infiniband.cc as follows:

33 Port::Port(CephContext cct, struct ibv_context ictxt, uint8_t ipn): ctxt(ictxt), port_num(ipn), port_attr(new ibv_port_attr), gid_idx(0)
34 {
35 #ifdef HAVE_IBV_EXP
36 union ibv_gid cgid;
37 struct ibv_exp_gid_attr gid_attr;
38 bool malformed = false;
39
40 ldout(cct,1) << func << " using experimental verbs for gid" << dendl;
41 int r = ibv_query_port(ctxt, port_num, port_attr);
42 if (r == -1) {
43 lderr(cct) << func << " query port failed " << cpp_strerror(errno) << dendl;
44 ceph_abort();
45 }
46
47 lid = port_attr->lid;
48
49 // search for requested GID in GIDs table
50 ldout(cct, 1) << func << " looking for local GID " << (cct->_conf->ms_async_rdma_local_gid)
51 << " of type " << (cct->_conf->ms_async_rdma_roce_ver) << dendl;
52 r = sscanf(cct->_conf->ms_async_rdma_local_gid.c_str(),
53 "%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx"
54 ":%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx",
55 &cgid.raw[ 0], &cgid.raw[ 1],
56 &cgid.raw[ 2], &cgid.raw[ 3],
57 &cgid.raw[ 4], &cgid.raw[ 5],
58 &cgid.raw[ 6], &cgid.raw[ 7],
59 &cgid.raw[ 8], &cgid.raw[ 9],
60 &cgid.raw[10], &cgid.raw[11],
61 &cgid.raw[12], &cgid.raw[13],
62 &cgid.raw[14], &cgid.raw[15]);
63
64 if (r != 16) {
65 ldout(cct, 1) << func << " malformed or no GID supplied, using GID index 0" << dendl;
66 malformed = true;
67 }
68
69 gid_attr.comp_mask = IBV_EXP_QUERY_GID_ATTR_TYPE;
70
71 for (gid_idx = 0; gid_idx < port_attr->gid_tbl_len; gid_idx++) {
72 r = ibv_query_gid(ctxt, port_num, gid_idx, &gid);
73 if (r) {
74 lderr(cct) << func << " query gid of port " << port_num << " index " << gid_idx << " failed " << cpp_strerror(errno) << dendl;
75 ceph_abort();
76 }
77 r = ibv_exp_query_gid_attr(ctxt, port_num, gid_idx, &gid_attr);
78 if (r) {
79 lderr(cct) << func << " query gid attributes of port " << port_num << " index " << gid_idx << " failed " << cpp_strerror(errno) << dendl;
80 ceph_abort();
81 }
82
83 if (malformed) break; // stay with gid_idx=0
84 if ( (gid_attr.type cct->_conf->ms_async_rdma_roce_ver) &&
85 (memcmp(&gid, &cgid, 16) 0) ) {
86 ldout(cct, 1) << func << " found at index " << gid_idx << dendl;
87 break;
88 }
89 }
90
91 if (gid_idx == port_attr->gid_tbl_len) {
92 lderr(cct) << func << " Requested local GID was not found in GID table" << dendl;
93 ceph_abort();
94 }
95 #else
96 int r = ibv_query_port(ctxt, port_num, port_attr);
97 if (r == -1) {
98 lderr(cct) << func << " query port failed " << cpp_strerror(errno) << dendl;
99 ceph_abort();
100 }
101
102 lid = port_attr->lid;
103 r = ibv_query_gid(ctxt, port_num, 0, &gid);
104 if (r) {
105 lderr(cct) << func << " query gid failed " << cpp_strerror(errno) << dendl;
106 ceph_abort();
107 }
108 #endif
109 }

The macro is conditionally defined in ceph/cmake/modules/Findverbs.cmake:
16 include(CheckCXXSourceCompiles)
17 CHECK_CXX_SOURCE_COMPILES("
18 #include <infiniband/verbs.h>
19 int main() {
20 struct ibv_context* ctxt;
21 struct ibv_exp_gid_attr gid_attr;
22 ibv_exp_query_gid_attr(ctxt, 1, 0, &gid_attr);
23 return 0;
24 } " HAVE_IBV_EXP)

This piece of code can only be compiled with the ibverbs lib released in MLNX_OFED driver (since version 3.1-1.0.0).
In most cases, we do not have ofed dirver installed. So the class member gid_idx will be hard coded as 0 in Port::Port().
But ROCEv2 communication between machines requires some proper gid_idx's according to the specific circumstances.

Also available in: Atom PDF