Bug #24793
ceph-volume fails to zap a device (wipefs problem)
0%
Description
[root@ceph-osd0 /]# ps fauaxf USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND ceph 1616 0.1 3.8 845844 19340 ? Ssl 12:31 0:06 ceph-osd -i 1 --setuser ceph --setgroup disk root 1229 0.0 0.3 11820 1692 pts/4 Ss 12:04 0:00 bash root 3355 0.0 0.3 51704 1700 pts/4 R+ 13:31 0:00 \_ ps fauaxf root 836 0.0 0.3 11820 1544 pts/3 Ss+ 08:53 0:00 bash root 5 0.0 0.1 11820 556 pts/1 Ss+ Jul05 0:00 bash root 1 0.0 0.0 4360 24 ? Ss Jul05 0:00 sleep 365d [root@ceph-osd0 /]# kill 1616 [root@ceph-osd0 /]# umount /var/lib/ceph/osd/ceph-1 [root@ceph-osd0 /]# ceph-volume lvm zap /dev/sdb --> Zapping: /dev/sdb Running command: /usr/sbin/wipefs --all /dev/sdb stderr: wipefs: error: /dev/sdb: probing initialization failed: Device or resource busy --> RuntimeError: command returned non-zero exit status: 1 [root@ceph-osd0 /]# df Filesystem 1K-blocks Used Available Use% Mounted on overlay 39269648 1541404 37728244 4% / tmpfs 250012 0 250012 0% /sys/fs/cgroup devtmpfs 241300 0 241300 0% /dev shm 65536 0 65536 0% /dev/shm /dev/mapper/VolGroup00-LogVol00 39269648 1541404 37728244 4% /etc/ceph tmpfs 250012 29424 220588 12% /run/lvm/lvmetad.socket [root@ceph-osd0 /]# ceph-volume lvm zap /dev/sdb --> Zapping: /dev/sdb Running command: /usr/sbin/wipefs --all /dev/sdb stderr: wipefs: error: /dev/sdb: probing initialization failed: Device or resource busy --> RuntimeError: command returned non-zero exit status: 1 [root@ceph-osd0 /]# ps faux USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 1229 0.0 0.3 11820 1700 pts/4 Ss 12:04 0:00 bash root 3381 0.0 0.3 51704 1688 pts/4 R+ 13:32 0:00 \_ ps faux root 836 0.0 0.3 11820 1544 pts/3 Ss+ 08:53 0:00 bash root 5 0.0 0.1 11820 556 pts/1 Ss+ Jul05 0:00 bash root 1 0.0 0.0 4360 24 ? Ss Jul05 0:00 sleep 365d [root@ceph-osd0 /]# fuser /dev/sdb
History
#1 Updated by Sébastien Han over 5 years ago
[2018-07-06 15:00:57,168][ceph_volume.main][INFO ] Running command: ceph-volume lvm zap /dev/sdb [2018-07-06 15:00:57,169][ceph_volume.process][INFO ] Running command: /usr/sbin/lvs --noheadings --readonly --separator=";" -o lv_tags,lv_path,lv_name,vg_name,lv_uuid [2018-07-06 15:00:57,395][ceph_volume.process][INFO ] stdout ";"/dev/VolGroup00/LogVol00";"LogVol00";"VolGroup00";"gqFct1-q2eW-5I7a-IQl2-g2Vm-Nsiq-B2CqHo [2018-07-06 15:00:57,395][ceph_volume.process][INFO ] stdout ";"/dev/VolGroup00/LogVol01";"LogVol01";"VolGroup00";"T8lHub-uZnx-CX9A-354Z-Jd3P-fTPq-dve374 [2018-07-06 15:00:57,395][ceph_volume.process][INFO ] stdout ceph.block_device=/dev/ceph-0118b379-16f4-4a3b-904b-90bc89e706fc/osd-block-4ae961d3-2870-44f7-85f3-49c0a8788789,ceph.block_uuid=u3ozXM-RPcC-yhip-4vdv-nkG0-fsGK-BAaBcZ,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=0a9374ec-2998-47df-9a7f-d030e5b9c261,ceph.cluster_name=ceph,ceph.crush_device_class=None,ceph.encrypted=0,ceph.osd_fsid=4ae961d3-2870-44f7-85f3-49c0a8788789,ceph.osd_id=2,ceph.type=block,ceph.vdo=0";"/dev/ceph-0118b379-16f4-4a3b-904b-90bc89e706fc/osd-block-4ae961d3-2870-44f7-85f3-49c0a8788789";"osd-block-4ae961d3-2870-44f7-85f3-49c0a8788789";"ceph-0118b379-16f4-4a3b-904b-90bc89e706fc";"u3ozXM-RPcC-yhip-4vdv-nkG0-fsGK-BAaBcZ [2018-07-06 15:00:57,395][ceph_volume.process][INFO ] stdout ceph.block_device=/dev/ceph-4d5af038-df98-437a-b038-a2f3f45d0cd2/osd-block-ea618678-b6d7-489a-9180-a7b67c3b69a0,ceph.block_uuid=BTOKvd-zxmP-58uL-CAy6-coEt-Ir8e-VDdV16,ceph.cephx_lockbox_secret=AQCbbD9btcZ7NxAA1X+fydCuBVkyZZ6yZfymUw==,ceph.cluster_fsid=0a9374ec-2998-47df-9a7f-d030e5b9c261,ceph.cluster_name=ceph,ceph.crush_device_class=None,ceph.encrypted=1,ceph.osd_fsid=ea618678-b6d7-489a-9180-a7b67c3b69a0,ceph.osd_id=0,ceph.type=block,ceph.vdo=0";"/dev/ceph-4d5af038-df98-437a-b038-a2f3f45d0cd2/osd-block-ea618678-b6d7-489a-9180-a7b67c3b69a0";"osd-block-ea618678-b6d7-489a-9180-a7b67c3b69a0";"ceph-4d5af038-df98-437a-b038-a2f3f45d0cd2";"BTOKvd-zxmP-58uL-CAy6-coEt-Ir8e-VDdV16 [2018-07-06 15:00:57,395][ceph_volume.process][INFO ] stderr WARNING: Failed to connect to lvmetad. Falling back to device scanning. [2018-07-06 15:00:57,395][ceph_volume.devices.lvm.zap][INFO ] Zapping: /dev/sdb [2018-07-06 15:00:57,395][ceph_volume.process][INFO ] Running command: /usr/sbin/pvs --no-heading --readonly --separator=";" -o pv_name,pv_tags,pv_uuid,vg_name [2018-07-06 15:00:57,620][ceph_volume.process][INFO ] stdout /dev/sda";"";"ijcyzY-sgjK-cCfq-aeoB-FstK-Z2Ri-tyzNBr";"ceph-4d5af038-df98-437a-b038-a2f3f45d0cd2 [2018-07-06 15:00:57,621][ceph_volume.process][INFO ] stdout /dev/sdb";"";"OjmDl0-KjFI-PMGN-gjBH-nBlP-9s12-Emzuz0";"ceph-0118b379-16f4-4a3b-904b-90bc89e706fc [2018-07-06 15:00:57,621][ceph_volume.process][INFO ] stdout /dev/vda3";"";"qGTgpA-Amb9-J2GQ-bP3A-2X6n-g5Ud-zPWx6a";"VolGroup00 [2018-07-06 15:00:57,621][ceph_volume.process][INFO ] stderr WARNING: Failed to connect to lvmetad. Falling back to device scanning. [2018-07-06 15:00:57,621][ceph_volume.process][INFO ] Running command: /usr/sbin/lvs --noheadings --readonly --separator=";" -o lv_tags,lv_path,lv_name,vg_name,lv_uuid [2018-07-06 15:00:57,846][ceph_volume.process][INFO ] stdout ";"/dev/VolGroup00/LogVol00";"LogVol00";"VolGroup00";"gqFct1-q2eW-5I7a-IQl2-g2Vm-Nsiq-B2CqHo [2018-07-06 15:00:57,846][ceph_volume.process][INFO ] stdout ";"/dev/VolGroup00/LogVol01";"LogVol01";"VolGroup00";"T8lHub-uZnx-CX9A-354Z-Jd3P-fTPq-dve374 [2018-07-06 15:00:57,847][ceph_volume.process][INFO ] stdout ceph.block_device=/dev/ceph-0118b379-16f4-4a3b-904b-90bc89e706fc/osd-block-4ae961d3-2870-44f7-85f3-49c0a8788789,ceph.block_uuid=u3ozXM-RPcC-yhip-4vdv-nkG0-fsGK-BAaBcZ,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=0a9374ec-2998-47df-9a7f-d030e5b9c261,ceph.cluster_name=ceph,ceph.crush_device_class=None,ceph.encrypted=0,ceph.osd_fsid=4ae961d3-2870-44f7-85f3-49c0a8788789,ceph.osd_id=2,ceph.type=block,ceph.vdo=0";"/dev/ceph-0118b379-16f4-4a3b-904b-90bc89e706fc/osd-block-4ae961d3-2870-44f7-85f3-49c0a8788789";"osd-block-4ae961d3-2870-44f7-85f3-49c0a8788789";"ceph-0118b379-16f4-4a3b-904b-90bc89e706fc";"u3ozXM-RPcC-yhip-4vdv-nkG0-fsGK-BAaBcZ [2018-07-06 15:00:57,847][ceph_volume.process][INFO ] stdout ceph.block_device=/dev/ceph-4d5af038-df98-437a-b038-a2f3f45d0cd2/osd-block-ea618678-b6d7-489a-9180-a7b67c3b69a0,ceph.block_uuid=BTOKvd-zxmP-58uL-CAy6-coEt-Ir8e-VDdV16,ceph.cephx_lockbox_secret=AQCbbD9btcZ7NxAA1X+fydCuBVkyZZ6yZfymUw==,ceph.cluster_fsid=0a9374ec-2998-47df-9a7f-d030e5b9c261,ceph.cluster_name=ceph,ceph.crush_device_class=None,ceph.encrypted=1,ceph.osd_fsid=ea618678-b6d7-489a-9180-a7b67c3b69a0,ceph.osd_id=0,ceph.type=block,ceph.vdo=0";"/dev/ceph-4d5af038-df98-437a-b038-a2f3f45d0cd2/osd-block-ea618678-b6d7-489a-9180-a7b67c3b69a0";"osd-block-ea618678-b6d7-489a-9180-a7b67c3b69a0";"ceph-4d5af038-df98-437a-b038-a2f3f45d0cd2";"BTOKvd-zxmP-58uL-CAy6-coEt-Ir8e-VDdV16 [2018-07-06 15:00:57,847][ceph_volume.process][INFO ] stderr WARNING: Failed to connect to lvmetad. Falling back to device scanning. [2018-07-06 15:00:57,848][ceph_volume.process][INFO ] Running command: wipefs --all /dev/sdb [2018-07-06 15:00:58,057][ceph_volume.process][INFO ] stderr wipefs: error: /dev/sdb: probing initialization failed: Device or resource busy [2018-07-06 15:00:58,057][ceph_volume][ERROR ] exception caught by decorator Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/ceph_volume/decorators.py", line 59, in newfunc return f(*a, **kw) File "/usr/lib/python2.7/site-packages/ceph_volume/main.py", line 153, in main terminal.dispatch(self.mapper, subcommand_args) File "/usr/lib/python2.7/site-packages/ceph_volume/terminal.py", line 182, in dispatch instance.main() File "/usr/lib/python2.7/site-packages/ceph_volume/devices/lvm/main.py", line 38, in main terminal.dispatch(self.mapper, self.argv) File "/usr/lib/python2.7/site-packages/ceph_volume/terminal.py", line 182, in dispatch instance.main() File "/usr/lib/python2.7/site-packages/ceph_volume/devices/lvm/zap.py", line 169, in main self.zap(args) File "/usr/lib/python2.7/site-packages/ceph_volume/decorators.py", line 16, in is_root return func(*a, **kw) File "/usr/lib/python2.7/site-packages/ceph_volume/devices/lvm/zap.py", line 102, in zap wipefs(path) File "/usr/lib/python2.7/site-packages/ceph_volume/devices/lvm/zap.py", line 21, in wipefs path File "/usr/lib/python2.7/site-packages/ceph_volume/process.py", line 149, in run raise RuntimeError(msg) RuntimeError: command returned non-zero exit status: 1
#2 Updated by Sébastien Han over 5 years ago
Basically, wipefs cannot open the device with O_EXCL and which means the ENOENT is valid here.
One option would add --force to the wipefs call.
This is obviously really dangerous.
In my case lvm was still holding the device, --force will remove the lvm header however it'll remain and the only way to unlock this was to "dmsetup remove ceph--0118b379--16f4--4a3b--904b--90bc89e706fc-osd--block--4ae961d3--2870--44f7--85f3--49c0a8788789"
#3 Updated by Alfredo Deza over 5 years ago
- Assignee set to Andrew Schoen
#4 Updated by Alfredo Deza over 5 years ago
- Status changed from New to 4
This has been solved by commits in this PR https://github.com/ceph/ceph/pull/23532
Can you check Sebastien?
#5 Updated by Sébastien Han over 5 years ago
Sorry I don't have much time to test this again, I'll let you know once I resume my work.
Thanks for the patches.
#6 Updated by Alfredo Deza over 5 years ago
- Status changed from 4 to Resolved
#7 Updated by Jan Fajerski about 5 years ago
- Status changed from Resolved to 12
- Affected Versions v14.0.0 added
This issue still seems to be present in 14.0.1:
CEPH_VOLUME_DEBUG=1 ceph-volume lvm zap /dev/vdb1
Running command: /usr/sbin/cryptsetup status /dev/mapper/dc44240b-2dc1-4630-808f-e61ce918f211
--> Zapping: /dev/vdb1
Running command: /usr/sbin/wipefs --all /dev/vdb1
stderr: wipefs: error: /dev/vdb1: probing initialization failed: Device or resource busy
Traceback (most recent call last):
File "/usr/sbin/ceph-volume", line 11, in <module>
load_entry_point('ceph-volume==1.0.0', 'console_scripts', 'ceph-volume')()
File "/usr/lib/python3.6/site-packages/ceph_volume/main.py", line 38, in __init__
self.main(self.argv)
File "/usr/lib/python3.6/site-packages/ceph_volume/decorators.py", line 59, in newfunc
return f(*a, **kw)
File "/usr/lib/python3.6/site-packages/ceph_volume/main.py", line 148, in main
terminal.dispatch(self.mapper, subcommand_args)
File "/usr/lib/python3.6/site-packages/ceph_volume/terminal.py", line 182, in dispatch
instance.main()
File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/main.py", line 40, in main
terminal.dispatch(self.mapper, self.argv)
File "/usr/lib/python3.6/site-packages/ceph_volume/terminal.py", line 182, in dispatch
instance.main()
File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/zap.py", line 195, in main
self.zap(args)
File "/usr/lib/python3.6/site-packages/ceph_volume/decorators.py", line 16, in is_root
return func(*a, **kw)
File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/zap.py", line 107, in zap
wipefs(path)
File "/usr/lib/python3.6/site-packages/ceph_volume/devices/lvm/zap.py", line 21, in wipefs
path
File "/usr/lib/python3.6/site-packages/ceph_volume/process.py", line 153, in run
raise RuntimeError(msg)
RuntimeError: command returned non-zero exit status: 1
The workaround via dmsetup still works however.
#8 Updated by Patrick Donnelly over 4 years ago
- Status changed from 12 to New