Bug #6495
closedCpu load on cluster node is very high every has the write request to cluster
0%
Description
ceph version 0.61.8
Kernel Ubuntu 12.10 (GNU/Linux 3.5.0-41-generic )
CPU: 24 processors, Ram 64GB
Env: 3 cluster nodes (10 osds/node, 2 ssdd raid0 as journal for 10 osds/node ), use dedicated network (private network: infiniband over IP)
+ If I used the test tool "fio" to generate the write request to cluster, high load cpu happened to my storage cluster (cause of existing of huge kernel thread). But as i generated the read request, cluster worked fine.
-> I used the top, htop, and atop, iostat to check all osds, and saw a heavy i/o load . So i think that Ceph-osd processes were causing the high load cpu....
ceph --admin-daemon /var/run/ceph/ceph-osd.29.asok perf dump {"filestore":{"journal_queue_max_ops":4096,"journal_queue_ops":0,"journal_ops":2310279,"journal_queue_max_bytes":41943040,"journal_queue_bytes":0,"journal_bytes":783155502587,"journal_latency":{"avgcount":2310279,"sum":29779.007221000},"journal_wr":2195043,"journal_wr_bytes":{"avgcount":2195043,"sum":790773211136},"op_queue_max_ops":10240,"op_queue_ops":0,"ops":2310279,"op_queue_max_bytes":209715200,"op_queue_bytes":0,"bytes":783128135267,"apply_latency":{"avgcount":2310279,"sum":52114.282829000},"committing":0,"commitcycle":13965,"commitcycle_interval":{"avgcount":13965,"sum":421252.623903000},"commitcycle_latency":{"avgcount":13965,"sum":1815.430763000},"journal_full":0},"leveldb":{"leveldb_get":12262171,"leveldb_transaction":7902360,"leveldb_compact":0,"leveldb_compact_range":0,"leveldb_compact_queue_merge":0,"leveldb_compact_queue_len":0},"mutex-FileJournal::completions_lock":{"wait":{"avgcount":0,"sum":0.000000000}},"mutex-FileJournal::finisher_lock":{"wait":{"avgcount":0,"sum":0.000000000}},"mutex-FileJournal::write_lock":{"wait":{"avgcount":0,"sum":0.000000000}},"mutex-FileJournal::writeq_lock":{"wait":{"avgcount":0,"sum":0.000000000}},"mutex-JOS::ApplyManager::apply_lock":{"wait":{"avgcount":0,"sum":0.000000000}},"mutex-JOS::ApplyManager::com_lock":{"wait":{"avgcount":0,"sum":0.000000000}},"mutex-JOS::SubmitManager::lock":{"wait":{"avgcount":0,"sum":0.000000000}},"osd":{"opq":0,"op_wip":2,"op":11475380,"op_in_bytes":451681929752,"op_out_bytes":953634957004,"op_latency":{"avgcount":11475380,"sum":471643.159999000},"op_r":9912039,"op_r_out_bytes":953634957004,"op_r_latency":{"avgcount":9912039,"sum":314610.837718000},"op_w":1563341,"op_w_in_bytes":451681929752,"op_w_rlat":{"avgcount":1563341,"sum":28750.267983000},"op_w_latency":{"avgcount":1563341,"sum":157032.322281000},"op_rw":0,"op_rw_in_bytes":0,"op_rw_out_bytes":0,"op_rw_rlat":{"avgcount":0,"sum":0.000000000},"op_rw_latency":{"avgcount":0,"sum":0.000000000},"subop":969850,"subop_in_bytes":349674255306,"subop_latency":{"avgcount":969850,"sum":42226.727569000},"subop_w":0,"subop_w_in_bytes":349674255306,"subop_w_latency":{"avgcount":969780,"sum":42194.859888000},"subop_pull":0,"subop_pull_latency":{"avgcount":4,"sum":0.058636000},"subop_push":0,"subop_push_in_bytes":0,"subop_push_latency":{"avgcount":66,"sum":31.809045000},"pull":33,"push":12,"push_out_bytes":40026224,"push_in":33,"push_in_bytes":137592832,"recovery_ops":28,"loadavg":5847,"buffer_bytes":0,"numpg":1174,"numpg_primary":673,"numpg_replica":501,"numpg_stray":0,"heartbeat_to_peers":20,"heartbeat_from_peers":0,"map_messages":3563,"map_message_epochs":6189,"map_message_epoch_dups":26238},"throttle-filestore_bytes":{"val":132790,"max":41943040,"get":0,"get_sum":0,"get_or_fail_fail":0,"get_or_fail_success":0,"take":2310281,"take_sum":783155899239,"put":2195044,"put_sum":783155766449,"wait":{"avgcount":7,"sum":0.956338000}},"throttle-filestore_ops":{"val":1,"max":4096,"get":0,"get_sum":0,"get_or_fail_fail":0,"get_or_fail_success":0,"take":2310281,"take_sum":2310281,"put":2195044,"put_sum":2310280,"wait":{"avgcount":0,"sum":0.000000000}},"throttle-msgr_dispatch_throttler-client":{"val":131249,"max":1073741824,"get":11285265,"get_sum":431907691785,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":11285264,"put_sum":431907560536,"wait":{"avgcount":0,"sum":0.000000000}},"throttle-msgr_dispatch_throttler-cluster":{"val":0,"max":1073741824,"get":2396528,"get_sum":351465026511,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":2396528,"put_sum":351465026511,"wait":{"avgcount":0,"sum":0.000000000}},"throttle-msgr_dispatch_throttler-hbclient":{"val":0,"max":1073741824,"get":2593222,"get_sum":121881434,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":2593222,"put_sum":121881434,"wait":{"avgcount":0,"sum":0.000000000}},"throttle-msgr_dispatch_throttler-hbserver":{"val":0,"max":1073741824,"get":2617576,"get_sum":123026072,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":2617576,"put_sum":123026072,"wait":{"avgcount":0,"sum":0.000000000}},"throttle-osd_client_bytes":{"val":655537,"max":524288000,"get":11195453,"get_sum":431879485602,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":12476385,"put_sum":431878830065,"wait":{"avgcount":0,"sum":0.000000000}},"throttle-osd_client_messages":{"val":24,"max":100,"get":11366090,"get_sum":11366090,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":11366066,"put_sum":11366066,"wait":{"avgcount":39115,"sum":920.334202000}}}