Bug #1945
closed
Added by Sage Weil over 12 years ago.
Updated about 11 years ago.
Description
17297 ? Sl 14:59 | \_ ./blogbench -d blogtest_in
18392 ? Ss 0:00 \_ sshd: ubuntu [priv]
18406 ? S 0:00 \_ sshd: ubuntu@pts/0
18407 pts/0 Ss 0:00 \_ -bash
18442 pts/0 R+ 0:00 \_ ps axf
988 ? Ss 0:01 /usr/sbin/ntpd -p /var/run/ntpd.pid -g -u 107:116
12978 ? Sl 0:00 rsyslogd -c4
ubuntu@sepia27:~$ sudo ls -al /proc/17297/fd
total 0
dr-x------ 2 ubuntu ubuntu 0 2012-01-17 09:03 .
dr-xr-xr-x 8 ubuntu ubuntu 0 2012-01-17 09:02 ..
lr-x------ 1 ubuntu ubuntu 64 2012-01-17 09:05 0 -> pipe:[23664]
l-wx------ 1 ubuntu ubuntu 64 2012-01-17 09:05 1 -> pipe:[23665]
l-wx------ 1 ubuntu ubuntu 64 2012-01-17 09:05 2 -> pipe:[23666]
l-wx------ 1 ubuntu ubuntu 64 2012-01-17 09:05 22 -> /tmp/cephtest/mnt.0/client.0/tmp/blogbench-1.0/src/blogtest_in/blog-10/comment-29.xml (deleted)
ubuntu@sepia27:~$ sudo cat /proc/17297/task/*/stack
[<ffffffff810a3d2d>] futex_wait_queue_me+0xcd/0x110
[<ffffffff810a3f1b>] futex_wait+0x1ab/0x2b0
[<ffffffff810a4dd1>] do_futex+0x101/0xb20
[<ffffffff810a586b>] sys_futex+0x7b/0x180
[<ffffffff8160e1c2>] system_call_fastpath+0x16/0x1b
[<ffffffffffffffff>] 0xffffffffffffffff
[<ffffffffa049f275>] ceph_get_caps+0x135/0x220 [ceph]
[<ffffffffa04949a5>] ceph_aio_write+0x155/0xab0 [ceph]
[<ffffffff81175e62>] do_sync_write+0xe2/0x120
[<ffffffff811763f8>] vfs_write+0xc8/0x190
[<ffffffff811765b1>] sys_write+0x51/0x90
[<ffffffff8160e1c2>] system_call_fastpath+0x16/0x1b
[<ffffffffffffffff>] 0xffffffffffffffff
job
kernel:
sha1: facddcc70502bda4ef2be9667884a542d6bb0b14
nuke-on-error: true
overrides:
ceph:
btrfs: 1
coverage: true
log-whitelist:
- clocks not synchronized
sha1: 7b2fd45bfe0d2aca4e18ab4efccb9cd3efbb6c43
roles:
- - mon.a
- mon.c
- osd.0
- - mon.b
- mds.a
- osd.1
- - client.0
tasks:
- chef: null
- ceph: null
- kclient: null
- workunit:
all:
- suites/blogbench.sh
/var/lib/teuthworker/archive/nightly_coverage_2012-01-17-a/8028
Happened again in /var/lib/teuthworker/archive/nightly_coverage_2012-02-02-a/10268 (also blogbench)
ubuntu@teuthology:/a/nightly_coverage_2012-02-04-a/10600
- Status changed from New to 12
ubuntu@teuthology:/a/sage-2012-08-16_17:18:36-regression-wip-crypto-testing-basic/2560$ cat config.yaml
kernel: &id001
kdb: true
sha1: 1fe5e9932156f6122c3b1ff6ba7541c27c86718c
nuke-on-error: true
overrides:
ceph:
fs: btrfs
log-whitelist:
- slow request
sha1: 6da267c09b77985db1a22c4d1769650abc7db95d
workunit:
sha1: 6da267c09b77985db1a22c4d1769650abc7db95d
roles:
- - mon.a
- mon.c
- osd.0
- osd.1
- osd.2
- - mon.b
- mds.a
- osd.3
- osd.4
- osd.5
- - client.0
targets:
ubuntu@plana81.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCzwyoEUguCYhu0TNdXrk7aWVUvePF6F6coBynLZ73Y/7eqTKAzxnNCiwUQx3kGjK33kliZDk7g/x4FdsiwzknDuGGCXP1pZyIGVtU5wNJ6ZM29XyH2SZvyU0MNfmoMzygxHR53TGcsK3hzwSbaW1woEpJmoqgIFQJ6BJr3nc2foKl79wBQdCNumqJ7sbh26xYVI29vYsJHUTYAdmyE4QrLaLOkZKU5Q/OvUnbKQbURcs7ArxFooObu9h0ENRPK4MKuxBFfgpAYTr/rMeWfVQxvSWsuOMpOdzLaNLt5UBYVVU+wxIFdDwcHb+2Er0rEV49W9xUD6JGXnaFjrxDDocfj
ubuntu@plana85.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCQQz4HH+uZV5duWKnH+ZVOyBzgUIw3K4G31TlhhiDrxLQJIBFeYHacS9+TzSdTSsgDvikBmX43J/VUdEE+W6jL8bZ5cwXcULXdiaO6eFbnZOld76WdI3Rb6+F2cAQUBAFEmy4FvdGVteMYlLZVo+3VvMckM1C09RDlhA8AHyNf6AisZEkV0gu6ba7BKmfj8CxGI+Hs/zFRa0wyLmJgJIgPqvo5yikBAAZmVPyfOt8jUe3sfhf5R8o2hlr8ucBc9jHlpbspc1fJJ0HMOE2cIjCN5vup2CtjTmnxhghAeYujIJL8mkT81RiCPrXxngg8X8CkaH2KevviMElXjK/dtLL
ubuntu@plana86.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDUMsCERn/r+eXKzlu9W/n6BFak26ReRaNUaccpQTrqOqhm3M7LiIGAeBo6JsygU9Kdtsm3115P4odiDbQLuNm0gKPw5DY7zDVqV+YRqe3kOrgIL/rxs6l6Y7htSRvzGhz7RsaG1fQH/BuaD18+s2WguKWwgRuuU1bvRYEHu0Y7qYYqUy+xJd4CGX+LMuAJzDYj5R7xIEYlJBln/c47He8q53cUUw5w48Hm47O8xo9ov7CqHCpOTZixeseY+zEm9skoLsUpBDIOmT+xUh7sOyKFhj+CnETpv3DocxGtoUgkx42GtkDhFK/dVV95Q6EOBseyYIUvFoHBKB2WV32xyxB5
tasks:
- internal.lock_machines: 3
- internal.save_config: null
- internal.check_lock: null
- internal.connect: null
- internal.check_conflict: null
- kernel: *id001
- internal.base: null
- internal.archive: null
- internal.coredump: null
- internal.syslog: null
- internal.timer: null
- chef: null
- clock: null
- ceph: null
- kclient: null
- workunit:
clients:
all:
- suites/blogbench.sh
'
root@plana81:/tmp/cephtest# cat /proc/5922/task/5930/stack
[<ffffffffa045a5c5>] ceph_get_caps+0x125/0x210 [ceph]
[<ffffffffa04508d3>] ceph_aio_write+0x143/0xaa0 [ceph]
[<ffffffff8117ba62>] do_sync_write+0xd2/0x110
[<ffffffff8117c373>] vfs_write+0xb3/0x180
[<ffffffff8117c69a>] sys_write+0x4a/0x90
[<ffffffff81634329>] system_call_fastpath+0x16/0x1b
[<ffffffffffffffff>] 0xffffffffffffffff
ubuntu@teuthology:/var/lib/teuthworker/archive/teuthology-2012-08-21_02:00:04-regression-testing-testing-basic/5675
- Project changed from Ceph to CephFS
- Category deleted (
1)
- Priority changed from Normal to High
- Assignee set to Sage Weil
this might be fixed by commit:854a78669fe057f2bf1bc5a07308b415461e78d7 ...
- Assignee changed from Sage Weil to Zheng Yan
Yan, would you mind taking a look at this when you have time?
Sorry for the delay, I didn't noticed the notification. I fixed several bugs that may cause hangs of this type, but I haven't seen any hang of this type for the last two months
- Status changed from 12 to Can't reproduce
We haven't seen this in a long time (at least, that's marked here), and there's been a ton of work here over the last several months so I'm closing this out for now.
Also available in: Atom
PDF