Actions
Bug #52573
openCEPHADM_CHECK_PUBLIC_MEMBERSHIP - fails, wrongly includes fe80::/8 addresses
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
In an otherwise fully operating cluster in a lab sandbox: the health check regarding hosts having interfaces on public networks fails on ipv6 public networks about half the time -- when 'gather facts' lists the public ipv6 network as the fe80::/8 member instead of the interface matching the cluster public network address.
e.g.
root@noc2:~# ceph health detail HEALTH_WARN Public network(s) is not directly accessible from 2 cluster hosts [WRN] CEPHADM_CHECK_PUBLIC_MEMBERSHIP: Public network(s) is not directly accessible from 2 cluster hosts noc2.1.quietfountain.com does not have an interface on any public network noc3.1.quietfountain.com does not have an interface on any public network root@noc2:~# dig AAAA noc2.1.quietfountain.com ; <<>> DiG 9.16.8-Ubuntu <<>> AAAA noc2.1.quietfountain.com ;; global options: +cmd ;; Got answer: ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 58115 ;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1 ;; OPT PSEUDOSECTION: ; EDNS: version: 0, flags:; udp: 65494 ;; QUESTION SECTION: ;noc2.1.quietfountain.com. IN AAAA ;; ANSWER SECTION: noc2.1.quietfountain.com. 0 IN AAAA fc00:1002:c7::42 ;; Query time: 0 msec ;; SERVER: 127.0.0.53#53(127.0.0.53) ;; WHEN: Fri Sep 10 14:14:39 CDT 2021 ;; MSG SIZE rcvd: 81 root@noc2:~# ip addr show lan0noc0iface 7: lan0noc0iface@lan0noc0port: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 52:54:ea:c2:f7:16 brd ff:ff:ff:ff:ff:ff inet 10.12.112.66/20 brd 10.12.127.255 scope global lan0noc0iface valid_lft forever preferred_lft forever inet6 fc00:1002:c7::42/64 scope global valid_lft forever preferred_lft forever inet6 fe80::5054:eaff:fec2:f716/64 scope link valid_lft forever preferred_lft forever root@noc2:~# grep noc2.1.quietfountain.com /etc/hosts 10.12.112.66 noc2.1.quietfountain.com noc2 fc00:1002:c7::42 noc2.1.quietfountain.com noc2 root@noc2:~# ceph config get osd public_network fc00:1002:c7::/64 root@noc2:~# cephadm gather-facts { ... "interfaces": { ... "lan0noc0iface": { "driver": "", "iftype": "logical", "ipv4_address": "10.12.112.66/20", "ipv6_address": "fe80::5054:eaff:fec2:f716/64", "lower_devs_list": [], "mtu": 1500, "nic_type": "ethernet", "operstate": "up", "speed": 10000, "upper_devs_list": [] }, ...
Updated by Boris B 7 months ago
We have the same issue with `fe80` addresses:
root@0cc47a6df14e:/etc/ceph# ceph health detail HEALTH_WARN Public network(s) is not directly accessible from 1 cluster hosts [WRN] CEPHADM_CHECK_PUBLIC_MEMBERSHIP: Public network(s) is not directly accessible from 1 cluster hosts 0cc47a6df14e does not have an interface on any public network root@0cc47a6df14e:/etc/ceph# ip a | grep inet6 inet6 ::1/128 scope host inet6 fe80::d49d:69ff:fe6d:5126/64 scope link inet6 fd01:1:f00f:443::10/64 scope global inet6 fe80::d49d:69ff:fe6d:5126/64 scope link root@0cc47a6df14e:/etc/ceph# cephadm gather-facts ... "hostname": "0cc47a6df14e", "interfaces": { "bond0": { "driver": "", "iftype": "logical", "ipv4_address": "", "ipv6_address": "fe80::d49d:69ff:fe6d:5126/64", "lower_devs_list": [ "enp4s0f0", "enp4s0f1" ], "mtu": 9100, "nic_type": "bonding", "operstate": "up", "speed": 20000, "upper_devs_list": [ "bond0.443" ] }, "bond0.443": { "driver": "", "iftype": "logical", "ipv4_address": "172.25.13.10/24", "ipv6_address": "fe80::d49d:69ff:fe6d:5126/64", "lower_devs_list": [ "bond0" ], "mtu": 9100, "nic_type": "ethernet", "operstate": "up", "speed": 20000, "upper_devs_list": [] }, ... root@0cc47a6df14e:/etc/ceph# ceph config get osd public_network fd01:1:f00f:443::/64 root@0cc47a6df14e:/etc/ceph# uname -a Linux 0cc47a6df14e 5.15.0-83-generic #92~20.04.1-Ubuntu SMP Mon Aug 21 14:00:49 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux root@0cc47a6df14e:/etc/ceph# lsb_release -a No LSB modules are available. Distributor ID: Ubuntu Description: Ubuntu 20.04.6 LTS Release: 20.04 Codename: focal
Updated by Boris B 7 months ago
It seems to be related to the sorting of `/proc/net/if_net6`
root@0cc47a6df14e:~# cat /proc/net/if_inet6 fe80000000000000d49d69fffe6d5126 05 40 20 80 bond0.443 fe80000000000000d49d69fffe6d5126 04 40 20 80 bond0 fd010001f00f04430000000000000010 05 40 00 80 bond0.443 00000000000000000000000000000001 01 80 10 80 lo root@0cc47a6df330:~# cat /proc/net/if_inet6 00000000000000000000000000000001 01 80 10 80 lo fd010001f00f04430000000000000011 05 40 00 80 bond0.443 fe800000000000007c4898fffe938928 05 40 20 80 bond0.443 fe800000000000007c4898fffe938928 04 40 20 80 bond0
Top host is shown in the warning, bottom host is fine.
Actions