Actions
Bug #63871
openCephAdm unable to add OSD on system with mixed disk configurations
Status:
New
Priority:
Normal
Assignee:
-
Target version:
-
% Done:
0%
Source:
Community (user)
Tags:
Backport:
Regression:
No
Severity:
3 - minor
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
Trying to add osd with cphadm returns the following:
root@kiki ~
# ceph orch daemon add osd kiki:/dev/sdc
Error EINVAL: Traceback (most recent call last):
File "/usr/share/ceph/mgr/mgr_module.py", line 1757, in _handle_command
return self.handle_command(inbuf, cmd)
File "/usr/share/ceph/mgr/orchestrator/_interface.py", line 171, in handle_command
return dispatch[cmd['prefix']].call(self, cmd, inbuf)
File "/usr/share/ceph/mgr/mgr_module.py", line 462, in call
return self.func(mgr, **kwargs)
File "/usr/share/ceph/mgr/orchestrator/_interface.py", line 107, in <lambda>
wrapper_copy = lambda *l_args, **l_kwargs: wrapper(*l_args, **l_kwargs) # noqa: E731
File "/usr/share/ceph/mgr/orchestrator/_interface.py", line 96, in wrapper
return func(*args, **kwargs)
File "/usr/share/ceph/mgr/orchestrator/module.py", line 965, in _daemon_add_osd
raise_if_exception(completion)
File "/usr/share/ceph/mgr/orchestrator/_interface.py", line 228, in raise_if_exception
raise e
RuntimeError: cephadm exited with an error code: 1, stderr:Inferring config /var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/mon.kiki/config
Non-zero exit code 1 from /usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init -e CONTAINER_IMAGE=quay.io/ceph/ceph@sha256:1fb108217b110c01c480e32d0cfea0e19955733537af7bb8cbae165222496e09 -e NODE_NAME=kiki -e CEPH_USE_RANDOM_NONCE=1 -e CEPH_VOLUME_OSDSPEC_AFFINITY=None -e CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v /var/run/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e:/var/run/ceph:z -v /var/log/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e:/var/log/ceph:z -v /var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/crash:/var/lib/ceph/crash:z -v /run/systemd/journal:/run/systemd/journal -v /dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v /run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro -v /tmp/ceph-tmpv0tfw227:/etc/ceph/ceph.conf:z -v /tmp/ceph-tmp7k5mpbeu:/var/lib/ceph/bootstrap-osd/ceph.keyring:z quay.io/ceph/ceph@sha256:1fb108217b110c01c480e32d0cfea0e19955733537af7bb8cbae165222496e09 lvm batch --no-auto /dev/sdc --yes --no-systemd
/usr/bin/podman: stderr Traceback (most recent call last):
/usr/bin/podman: stderr File "/usr/sbin/ceph-volume", line 11, in <module>
/usr/bin/podman: stderr load_entry_point('ceph-volume==1.0.0', 'console_scripts', 'ceph-volume')()
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/main.py", line 41, in __init__
/usr/bin/podman: stderr self.main(self.argv)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/decorators.py", line 59, in newfunc
/usr/bin/podman: stderr return f(*a, **kw)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/main.py", line 153, in main
/usr/bin/podman: stderr terminal.dispatch(self.mapper, subcommand_args)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/terminal.py", line 194, in dispatch
/usr/bin/podman: stderr instance.main()
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/main.py", line 46, in main
/usr/bin/podman: stderr terminal.dispatch(self.mapper, self.argv)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/terminal.py", line 192, in dispatch
/usr/bin/podman: stderr instance = mapper.get(arg)(argv[count:])
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/batch.py", line 357, in __init__
/usr/bin/podman: stderr self.args = parser.parse_args(argv)
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 1734, in parse_args
/usr/bin/podman: stderr args, argv = self.parse_known_args(args, namespace)
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 1766, in parse_known_args
/usr/bin/podman: stderr namespace, args = self._parse_known_args(args, namespace)
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 1954, in _parse_known_args
/usr/bin/podman: stderr positionals_end_index = consume_positionals(start_index)
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 1931, in consume_positionals
/usr/bin/podman: stderr take_action(action, args)
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 1824, in take_action
/usr/bin/podman: stderr argument_values = self._get_values(action, argument_strings)
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 2279, in _get_values
/usr/bin/podman: stderr value = [self._get_value(action, v) for v in arg_strings]
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 2279, in <listcomp>
/usr/bin/podman: stderr value = [self._get_value(action, v) for v in arg_strings]
/usr/bin/podman: stderr File "/usr/lib64/python3.6/argparse.py", line 2294, in _get_value
/usr/bin/podman: stderr result = type_func(arg_string)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/util/arg_validators.py", line 116, in __call__
/usr/bin/podman: stderr super().get_device(dev_path)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/util/arg_validators.py", line 24, in get_device
/usr/bin/podman: stderr self._device = Device(dev_path)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/util/device.py", line 125, in __init__
/usr/bin/podman: stderr sys_info.devices = disk.get_devices(device=self.path)
/usr/bin/podman: stderr File "/usr/lib/python3.6/site-packages/ceph_volume/util/disk.py", line 889, in get_devices
/usr/bin/podman: stderr if device_slaves:
/usr/bin/podman: stderr UnboundLocalError: local variable 'device_slaves' referenced before assignment
Traceback (most recent call last):
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 9679, in <module>
main()
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 9667, in main
r = ctx.func(ctx)
^^^^^^^^^^^^^
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 2116, in _infer_config
return func(ctx)
^^^^^^^^^
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 2032, in _infer_fsid
return func(ctx)
^^^^^^^^^
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 2144, in _infer_image
return func(ctx)
^^^^^^^^^
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 2019, in _validate_fsid
return func(ctx)
^^^^^^^^^
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 6272, in command_ceph_volume
out, err, code = call_throws(ctx, c.run_cmd(), verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/cephadm.8b92cafd937eb89681ee011f9e70f85937fd09c4bd61ed4a59981d275a1f255b", line 1807, in call_throws
raise RuntimeError('Failed command: %s' % ' '.join(command))
RuntimeError: Failed command: /usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init -e CONTAINER_IMAGE=quay.io/ceph/ceph@sha256:1fb108217b110c01c480e32d0cfea0e19955733537af7bb8cbae165222496e09 -e NODE_NAME=kiki -e CEPH_USE_RANDOM_NONCE=1 -e CEPH_VOLUME_OSDSPEC_AFFINITY=None -e CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v /var/run/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e:/var/run/ceph:z -v /var/log/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e:/var/log/ceph:z -v /var/lib/ceph/6d81b46a-9e04-11ee-a87f-2cf05dd3b46e/crash:/var/lib/ceph/crash:z -v /run/systemd/journal:/run/systemd/journal -v /dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v /run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro -v /tmp/ceph-tmpv0tfw227:/etc/ceph/ceph.conf:z -v /tmp/ceph-tmp7k5mpbeu:/var/lib/ceph/bootstrap-osd/ceph.keyring:z quay.io/ceph/ceph@sha256:1fb108217b110c01c480e32d0cfea0e19955733537af7bb8cbae165222496e09 lvm batch --no-auto /dev/sdc --yes --no-systemd
Looking at the ceph-volume.log
[2023-12-19 19:02:47,601][ceph_volume.main][INFO ] Running command: ceph-volume lvm batch --no-auto /dev/sdc --yes --no-systemd
[2023-12-19 19:02:47,611][ceph_volume][ERROR ] exception caught by decorator
Traceback (most recent call last):
File "/usr/lib/python3.6/site-packages/ceph_volume/decorators.py", line 59, in newfunc
return f(*a, **kw)
File "/usr/lib/python3.6/site-packages/ceph_volume/main.py", line 153, in main
terminal.dispatch(self.mapper, subcommand_args)
File "/usr/lib/python3.6/site-packages/ceph_volume/terminal.py", line 194, in dispatch
instance.main()
File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/main.py", line 46, in main
terminal.dispatch(self.mapper, self.argv)
File "/usr/lib/python3.6/site-packages/ceph_volume/terminal.py", line 192, in dispatch
instance = mapper.get(arg)(argv[count:])
File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/batch.py", line 357, in __init__
self.args = parser.parse_args(argv)
File "/usr/lib64/python3.6/argparse.py", line 1734, in parse_args
args, argv = self.parse_known_args(args, namespace)
File "/usr/lib64/python3.6/argparse.py", line 1766, in parse_known_args
namespace, args = self._parse_known_args(args, namespace)
File "/usr/lib64/python3.6/argparse.py", line 1954, in _parse_known_args
positionals_end_index = consume_positionals(start_index)
File "/usr/lib64/python3.6/argparse.py", line 1931, in consume_positionals
take_action(action, args)
File "/usr/lib64/python3.6/argparse.py", line 1824, in take_action
argument_values = self._get_values(action, argument_strings)
File "/usr/lib64/python3.6/argparse.py", line 2279, in _get_values
value = [self._get_value(action, v) for v in arg_strings]
File "/usr/lib64/python3.6/argparse.py", line 2279, in <listcomp>
value = [self._get_value(action, v) for v in arg_strings]
File "/usr/lib64/python3.6/argparse.py", line 2294, in _get_value
result = type_func(arg_string)
File "/usr/lib/python3.6/site-packages/ceph_volume/util/arg_validators.py", line 116, in __call__
super().get_device(dev_path)
File "/usr/lib/python3.6/site-packages/ceph_volume/util/arg_validators.py", line 24, in get_device
self._device = Device(dev_path)
File "/usr/lib/python3.6/site-packages/ceph_volume/util/device.py", line 125, in __init__
sys_info.devices = disk.get_devices(device=self.path)
File "/usr/lib/python3.6/site-packages/ceph_volume/util/disk.py", line 889, in get_devices
if device_slaves:
UnboundLocalError: local variable 'device_slaves' referenced before assignment
Googling for this issue, I ran across this https://forum.proxmox.com/threads/ceph-osd-creation-error.136839/ which references modifying the source to `ceph_volume/util/disk.py`.
Before modification:
root@kiki ~ 3m 7s
# ceph-volume inventory --format json /dev/sdc
--> UnboundLocalError: cannot access local variable 'device_slaves' where it is not associated with a value
Testing the modification:
root@kiki ~
# diff -u /usr/lib/python3.11/site-packages/ceph_volume/util/disk.py.org /usr/lib/python3.11/site-packages/ceph_volume/util/disk.py
--- /usr/lib/python3.11/site-packages/ceph_volume/util/disk.py.org 2023-12-19 13:06:47.883566748 -0600
+++ /usr/lib/python3.11/site-packages/ceph_volume/util/disk.py 2023-12-19 13:07:06.791956564 -0600
@@ -886,6 +886,8 @@
device_slaves = os.listdir(os.path.join(sysdir, 'slaves'))
metadata['partitions'] = get_partitions_facts(sysdir)
+ device_slaves=False
+
if device_slaves:
metadata['device_nodes'] = ','.join(device_slaves)
else:
root@kiki ~
# ceph-volume inventory --format json /dev/sdc
{"path": "/dev/sdc", "sys_api": {}, "ceph_device": false, "lsm_data": {}, "available": false, "rejected_reasons": ["Insufficient space (<5GB)"], "device_id": "Samsung_SSD_870_QVO_4TB_S5VYNJ0W902090E", "lvs": []}
Problem is to make this work with cephadm, I will have to make a modified cephadm container and force my system to use that. Also, this fix isn't an actual fix to the issue at hand, its just a workaround.
Version:
# ceph --version
ceph version 17.2.7 (b12291d110049b2f35e32e0de30d70e9a4c060d2) quincy (stable)
# cat /etc/redhat-release
Fedora release 38 (Thirty Eight)
Updated by James Coleman 5 months ago
This is possibly a better solution to what I tried:
--- /usr/lib/python3.11/site-packages/ceph_volume/util/disk.py.org 2023-12-21 09:26:42.446569998 -0600
+++ /usr/lib/python3.11/site-packages/ceph_volume/util/disk.py 2023-12-21 09:27:04.361021649 -0600
@@ -882,6 +882,7 @@
for key, file_ in facts:
metadata[key] = get_file_contents(os.path.join(sysdir, file_))
+ device_slaves=False
if block[2] != 'part':
device_slaves = os.listdir(os.path.join(sysdir, 'slaves'))
metadata['partitions'] = get_partitions_facts(sysdir)
Actions