Project

General

Profile

Actions

Bug #52573

open

CEPHADM_CHECK_PUBLIC_MEMBERSHIP - fails, wrongly includes fe80::/8 addresses

Added by Harry Coin over 2 years ago. Updated 7 months ago.

Status:
New
Priority:
Normal
Assignee:
Category:
cephadm
Target version:
-
% Done:

0%

Source:
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

In an otherwise fully operating cluster in a lab sandbox: the health check regarding hosts having interfaces on public networks fails on ipv6 public networks about half the time -- when 'gather facts' lists the public ipv6 network as the fe80::/8 member instead of the interface matching the cluster public network address.
e.g.

root@noc2:~# ceph health detail
HEALTH_WARN Public network(s) is not directly accessible from 2 cluster hosts
[WRN] CEPHADM_CHECK_PUBLIC_MEMBERSHIP: Public network(s) is not directly accessible from 2 cluster hosts
    noc2.1.quietfountain.com does not have an interface on any public network
    noc3.1.quietfountain.com does not have an interface on any public network
root@noc2:~# dig AAAA noc2.1.quietfountain.com

; <<>> DiG 9.16.8-Ubuntu <<>> AAAA noc2.1.quietfountain.com
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 58115
;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 65494
;; QUESTION SECTION:
;noc2.1.quietfountain.com.      IN      AAAA

;; ANSWER SECTION:
noc2.1.quietfountain.com. 0     IN      AAAA    fc00:1002:c7::42

;; Query time: 0 msec
;; SERVER: 127.0.0.53#53(127.0.0.53)
;; WHEN: Fri Sep 10 14:14:39 CDT 2021
;; MSG SIZE  rcvd: 81

root@noc2:~# ip addr show lan0noc0iface
7: lan0noc0iface@lan0noc0port: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 52:54:ea:c2:f7:16 brd ff:ff:ff:ff:ff:ff
    inet 10.12.112.66/20 brd 10.12.127.255 scope global lan0noc0iface
       valid_lft forever preferred_lft forever
    inet6 fc00:1002:c7::42/64 scope global 
       valid_lft forever preferred_lft forever
    inet6 fe80::5054:eaff:fec2:f716/64 scope link 
       valid_lft forever preferred_lft forever
root@noc2:~# grep noc2.1.quietfountain.com /etc/hosts
10.12.112.66             noc2.1.quietfountain.com noc2  
fc00:1002:c7::42         noc2.1.quietfountain.com noc2  

root@noc2:~# ceph config get osd public_network 
fc00:1002:c7::/64

root@noc2:~# cephadm gather-facts
{
...
 "interfaces": {
...
 "lan0noc0iface": {
      "driver": "",
      "iftype": "logical",
      "ipv4_address": "10.12.112.66/20",
      "ipv6_address": "fe80::5054:eaff:fec2:f716/64",
      "lower_devs_list": [],
      "mtu": 1500,
      "nic_type": "ethernet",
      "operstate": "up",
      "speed": 10000,
      "upper_devs_list": []
    },
...
Actions #1

Updated by Loïc Dachary over 2 years ago

  • Target version deleted (v16.2.6)
Actions #2

Updated by Sebastian Wagner over 2 years ago

  • Description updated (diff)
Actions #3

Updated by Paul Cuzner over 2 years ago

  • Assignee set to Paul Cuzner
Actions #4

Updated by Boris B 7 months ago

We have the same issue with `fe80` addresses:

root@0cc47a6df14e:/etc/ceph# ceph health detail
HEALTH_WARN Public network(s) is not directly accessible from 1 cluster hosts
[WRN] CEPHADM_CHECK_PUBLIC_MEMBERSHIP: Public network(s) is not directly accessible from 1 cluster hosts
    0cc47a6df14e does not have an interface on any public network

root@0cc47a6df14e:/etc/ceph# ip a | grep inet6
    inet6 ::1/128 scope host
    inet6 fe80::d49d:69ff:fe6d:5126/64 scope link
    inet6 fd01:1:f00f:443::10/64 scope global
    inet6 fe80::d49d:69ff:fe6d:5126/64 scope link

root@0cc47a6df14e:/etc/ceph# cephadm gather-facts
...
  "hostname": "0cc47a6df14e",
  "interfaces": {
    "bond0": {
      "driver": "",
      "iftype": "logical",
      "ipv4_address": "",
      "ipv6_address": "fe80::d49d:69ff:fe6d:5126/64",
      "lower_devs_list": [
        "enp4s0f0",
        "enp4s0f1" 
      ],
      "mtu": 9100,
      "nic_type": "bonding",
      "operstate": "up",
      "speed": 20000,
      "upper_devs_list": [
        "bond0.443" 
      ]
    },
    "bond0.443": {
      "driver": "",
      "iftype": "logical",
      "ipv4_address": "172.25.13.10/24",
      "ipv6_address": "fe80::d49d:69ff:fe6d:5126/64",
      "lower_devs_list": [
        "bond0" 
      ],
      "mtu": 9100,
      "nic_type": "ethernet",
      "operstate": "up",
      "speed": 20000,
      "upper_devs_list": []
    },
...
root@0cc47a6df14e:/etc/ceph# ceph config get osd public_network
fd01:1:f00f:443::/64

root@0cc47a6df14e:/etc/ceph# uname -a
Linux 0cc47a6df14e 5.15.0-83-generic #92~20.04.1-Ubuntu SMP Mon Aug 21 14:00:49 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
root@0cc47a6df14e:/etc/ceph# lsb_release -a
No LSB modules are available.
Distributor ID:    Ubuntu
Description:    Ubuntu 20.04.6 LTS
Release:    20.04
Codename:    focal
Actions #5

Updated by Boris B 7 months ago

It seems to be related to the sorting of `/proc/net/if_net6`

root@0cc47a6df14e:~# cat /proc/net/if_inet6
fe80000000000000d49d69fffe6d5126 05 40 20 80 bond0.443
fe80000000000000d49d69fffe6d5126 04 40 20 80    bond0
fd010001f00f04430000000000000010 05 40 00 80 bond0.443
00000000000000000000000000000001 01 80 10 80       lo

root@0cc47a6df330:~# cat /proc/net/if_inet6
00000000000000000000000000000001 01 80 10 80       lo
fd010001f00f04430000000000000011 05 40 00 80 bond0.443
fe800000000000007c4898fffe938928 05 40 20 80 bond0.443
fe800000000000007c4898fffe938928 04 40 20 80    bond0

Top host is shown in the warning, bottom host is fine.

Actions

Also available in: Atom PDF