Right, the mgr gets restarted:
teuthology.run_tasks:Running task cephfs_test_runner...
teuthology.orchestra.run.smithi003:> true
teuthology.orchestra.run.smithi003:> sudo adjust-ulimits ceph-coverage /home/ubuntu/cephtest/archive/coverage timeout 120 ceph --cluster ceph --log-early osd dump --format=json
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.034316+0000 mon.a (mon.0) 239 : audit [INF] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd=[{"prefix":"config-key set","key":"mgr/cephadm/spec.mds.1","val":"{\"create\": \"2020-05-08T07:53:15.034002\", \"spec\": {\"placement\": {\"count\": 2}, \"service_id\": \"1\", \"service_name\": \"mds.1\", \"service_type\": \"mds\"}}"}]: dispatch
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.036083+0000 mon.a (mon.0) 240 : audit [INF] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd='[{"prefix":"config-key set","key":"mgr/cephadm/spec.mds.1","val":"{\"created\": \"2020-05-08T07:53:15.034002\", \"spec\": {\"placement\": {\"count\": 2}, \"service_id\": \"1\", \"service_name\": \"mds.1\", \"service_type\": \"mds\"}}"}]': finished
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.037441+0000 mon.a (mon.0) 241 : audit [INF] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd=[{"prefix": "config set", "who": "mds.1", "name": "mds_join_fs", "value": "1"}]: dispatch
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.039909+0000 mon.a (mon.0) 242 : audit [INF] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd='[{"prefix": "config set", "who": "mds.1", "name": "mds_join_fs", "value": "1"}]': finished
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.040708+0000 mon.a (mon.0) 243 : audit [INF] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd=[{"prefix": "auth get-or-create", "entity": "mds.1.smithi003.pehqbc", "caps": ["mon", "profile mds", "osd", "allow rwx", "mds", "allow"]}]: dispatch
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.042618+0000 mon.a (mon.0) 244 : audit [INF] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd='[{"prefix": "auth get-or-create", "entity": "mds.1.smithi003.pehqbc", "caps": ["mon", "profile mds", "osd", "allow rwx", "mds", "allow"]}]': finished
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.043260+0000 mon.a (mon.0) 245 : audit [DBG] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd=[{"prefix": "config generate-minimal-conf"}]: dispatch
ceph.mon.a.smithi003.stdout:May 08 07:53:15 smithi003 bash[25807]: audit 2020-05-08T07:53:15.044105+0000 mon.a (mon.0) 246 : audit [DBG] from='mgr.14142 172.21.15.3:0/125380827' entity='mgr.a' cmd=[{"prefix": "config get", "who": "mds.1.smithi003.pehqbc", "key": "container_image"}]: dispatch
teuthology.orchestra.run.smithi003:> true
teuthology.orchestra.run.smithi003:> sudo adjust-ulimits ceph-coverage /home/ubuntu/cephtest/archive/coverage timeout 120 ceph --cluster ceph --log-early osd pool get device_health_metrics pg_num
teuthology.orchestra.run.smithi003.stdout:pg_num: 1
teuthology.orchestra.run.smithi003:> true
teuthology.orchestra.run.smithi003:> sudo adjust-ulimits ceph-coverage /home/ubuntu/cephtest/archive/coverage timeout 120 ceph --cluster ceph --log-early osd dump --format=json
teuthology.orchestra.run.smithi003.stdout:
teuthology.orchestra.run.smithi003.stdout:{"epoch":22,"fsid":"8f4e11de-9100-11ea-a068-001a4aab830c","created":"2020-05-08T07:50:57.493161+0000","modified":"2020-05-08T07:52:42.465961+0000","last_up_change":"2020-05-08T07:52:41.465316+0000","last_in_change":"2020-05-08T07:52:41.465316+0000","flags":"sortbitwise,recovery_deletes,purged_snapdirs,pglog_hardlimit","flags_num":5799936,"flags_set":["pglog_hardlimit","purged_snapdirs","recovery_deletes","sortbitwise"],"crush_version":8,"full_ratio":0.94999998807907104,"backfillfull_ratio":0.89999997615814209,"nearfull_ratio":0.85000002384185791,"cluster_snapshot":"","pool_max":1,"max_osd":3,"require_min_compat_client":"luminous","min_compat_client":"jewel","require_osd_release":"pacific","pools":[{"pool":1,"pool_name":"device_health_metrics","create_time":"2020-05-08T07:51:08.676406+0000","flags":1,"flags_names":"hashpspool","type":1,"size":3,"min_size":2,"crush_rule":0,"object_hash":2,"pg_autoscale_mode":"off","pg_num":1,"pg_placement_num":1,"pg_placement_num_target":1,"pg_num_target":1,"pg_num_pending":1,"last_pg_merge_meta":{"source_pgid":"0.0","ready_epoch":0,"last_epoch_started":0,"last_epoch_clean":0,"source_version":"0'0","target_version":"0'0"},"last_change":"12","last_force_op_resend":"0","last_force_op_resend_prenautilus":"0","last_force_op_resend_preluminous":"0","auid":0,"snap_mode":"selfmanaged","snap_seq":0,"snap_epoch":0,"pool_snaps":[],"removed_snaps":"[]","quota_max_bytes":0,"quota_max_objects":0,"tiers":[],"tier_of":-1,"read_tier":-1,"write_tier":-1,"cache_mode":"none","target_max_bytes":0,"target_max_objects":0,"cache_target_dirty_ratio_micro":400000,"cache_target_dirty_high_ratio_micro":600000,"cache_target_full_ratio_micro":800000,"cache_min_flush_age":0,"cache_min_evict_age":0,"erasure_code_profile":"","hit_set_params":{"type":"none"},"hit_set_period":0,"hit_set_count":0,"use_gmt_hitset":true,"min_read_recency_for_promote":0,"min_write_recency_for_promote":0,"hit_set_grade_decay_rate":0,"hit_set_search_last_n":0,"grade_table":[],"stripe_width":0,"expected_num_objects":0,"fast_read":false,"options":{"pg_num_min":1},"application_metadata":{"mgr_devicehealth":{}}}],"osds":[{"osd":0,"uuid":"fc7238e9-7c0c-43a8-9bdb-6fbd64a1dd1e","up":1,"in":1,"weight":1,"primary_affinity":1,"last_clean_begin":0,"last_clean_end":0,"up_from":10,"up_thru":10,"down_at":0,"lost_at":0,"public_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6802","nonce":3721421418},{"type":"v1","addr":"172.21.15.3:6803","nonce":3721421418}]},"cluster_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6804","nonce":3721421418},{"type":"v1","addr":"172.21.15.3:6805","nonce":3721421418}]},"heartbeat_back_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6808","nonce":3721421418},{"type":"v1","addr":"172.21.15.3:6809","nonce":3721421418}]},"heartbeat_front_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6806","nonce":3721421418},{"type":"v1","addr":"172.21.15.3:6807","nonce":3721421418}]},"public_addr":"172.21.15.3:6803/3721421418","cluster_addr":"172.21.15.3:6805/3721421418","heartbeat_back_addr":"172.21.15.3:6809/3721421418","heartbeat_front_addr":"172.21.15.3:6807/3721421418","state":["exists","up"]},{"osd":1,"uuid":"cafa5809-bbde-4125-bd28-c4b36f434f51","up":1,"in":1,"weight":1,"primary_affinity":1,"last_clean_begin":0,"last_clean_end":0,"up_from":16,"up_thru":21,"down_at":0,"lost_at":0,"public_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6810","nonce":431372452},{"type":"v1","addr":"172.21.15.3:6811","nonce":431372452}]},"cluster_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6812","nonce":431372452},{"type":"v1","addr":"172.21.15.3:6813","nonce":431372452}]},"heartbeat_back_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6816","nonce":431372452},{"type":"v1","addr":"172.21.15.3:6817","nonce":431372452}]},"heartbeat_front_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6814","nonce":431372452},{"type":"v1","addr":"172.21.15.3:6815","nonce":431372452}]},"public_addr":"172.21.15.3:6811/431372452","cluster_addr":"172.21.15.3:6813/431372452","heartbeat_back_addr":"172.21.15.3:6817/431372452","heartbeat_front_addr":"172.21.15.3:6815/431372452","state":["exists","up"]},{"osd":2,"uuid":"d490049d-7375-4755-835c-387571dec1b4","up":1,"in":1,"weight":1,"primary_affinity":1,"last_clean_begin":0,"last_clean_end":0,"up_from":21,"up_thru":0,"down_at":0,"lost_at":0,"public_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6818","nonce":3872854968},{"type":"v1","addr":"172.21.15.3:6819","nonce":3872854968}]},"cluster_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6820","nonce":3872854968},{"type":"v1","addr":"172.21.15.3:6821","nonce":3872854968}]},"heartbeat_back_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6824","nonce":3872854968},{"type":"v1","addr":"172.21.15.3:6825","nonce":3872854968}]},"heartbeat_front_addrs":{"addrvec":[{"type":"v2","addr":"172.21.15.3:6822","nonce":3872854968},{"type":"v1","addr":"172.21.15.3:6823","nonce":3872854968}]},"public_addr":"172.21.15.3:6819/3872854968","cluster_addr":"172.21.15.3:6821/3872854968","heartbeat_back_addr":"172.21.15.3:6825/3872854968","heartbeat_front_addr":"172.21.15.3:6823/3872854968","state":["exists","up"]}],"osd_xinfo":[{"osd":0,"down_stamp":"0.000000","laggy_probability":0,"laggy_interval":0,"features":4540138292837744639,"old_weight":0,"last_purged_snaps_scrub":"2020-05-08T07:52:05.414664+0000","dead_epoch":0},{"osd":1,"down_stamp":"0.000000","laggy_probability":0,"laggy_interval":0,"features":4540138292837744639,"old_weight":0,"last_purged_snaps_scrub":"2020-05-08T07:52:22.372890+0000","dead_epoch":0},{"osd":2,"down_stamp":"0.000000","laggy_probability":0,"laggy_interval":0,"features":4540138292837744639,"old_weight":0,"last_purged_snaps_scrub":"2020-05-08T07:52:39.696420+0000","dead_epoch":0}],"pg_upmap":[],"pg_upmap_items":[],"pg_temp":[],"primary_temp":[],"blacklist":{"172.21.15.3:0/4218317686":"2020-05-09T07:51:31.456074+0000","172.21.15.3:6801/747294320":"2020-05-09T07:51:31.456074+0000","172.21.15.3:6800/747294320":"2020-05-09T07:51:31.456074+0000","172.21.15.3:0/343613801":"2020-05-09T07:51:31.456074+0000","172.21.15.3:0/321731460":"2020-05-09T07:51:17.451065+0000","172.21.15.3:0/1524061237":"2020-05-09T07:51:17.451065+0000","172.21.15.3:6800/570309461":"2020-05-09T07:51:17.451065+0000","172.21.15.3:6801/570309461":"2020-05-09T07:51:17.451065+0000","172.21.15.3:0/1326871292":"2020-05-09T07:51:31.456074+0000","172.21.15.3:0/2357622241":"2020-05-09T07:51:17.451065+0000"},"erasure_code_profiles":{"default":{"crush-failure-domain":"osd","k":"2","m":"1","plugin":"jerasure","ruleset-failure-domain":"osd","technique":"reed_sol_van"}},"removed_snaps_queue":[],"new_removed_snaps":[],"new_purged_snaps":[],"crush_node_flags":{},"device_class_flags":{}}
ceph.mon.a.smithi003.stdout:May 08 07:53:16 smithi003 bash[25807]: audit 2020-05-08T07:53:15.033546+0000 mgr.a (mgr.14142) 59 : audit [DBG] from='client.14224 -' entity='client.admin' cmd=[{"prefix": "orch apply mds", "fs_name": "1", "target": ["mon-mgr", ""]}]: dispatch
ceph.mon.a.smithi003.stdout:May 08 07:53:16 smithi003 bash[25807]: cephadm 2020-05-08T07:53:15.033991+0000 mgr.a (mgr.14142) 60 : cephadm [INF] Saving service mds.1 spec with placement count:2
ceph.mon.a.smithi003.stdout:May 08 07:53:16 smithi003 bash[25807]: cephadm 2020-05-08T07:53:15.043747+0000 mgr.a (mgr.14142) 61 : cephadm [INF] Deploying daemon mds.1.smithi003.pehqbc on smithi003
ceph.mon.a.smithi003.stdout:May 08 07:53:16 smithi003 bash[25807]: cluster 2020-05-08T07:53:15.527635+0000 mgr.a (mgr.14142) 62 : cluster [DBG] pgmap v62: 1 pgs: 1 active+clean; 0 B data, 576 KiB used, 265 GiB / 268 GiB avail
ceph.mon.a.smithi003.stdout:May 08 07:53:16 smithi003 bash[25807]: audit 2020-05-08T07:53:15.900953+0000 mon.a (mon.0) 247 : audit [DBG] from='client.? 172.21.15.3:0/3739932791' entity='client.admin' cmd=[{"prefix": "osd dump", "format": "json"}]: dispatch
ceph.mon.a.smithi003.stdout:May 08 07:53:16 smithi003 bash[25807]: audit 2020-05-08T07:53:16.252179+0000 mon.a (mon.0) 248 : audit [DBG] from='client.? 172.21.15.3:0/2236254010' entity='client.admin' cmd=[{"prefix": "osd pool get", "pool": "device_health_metrics", "var": "pg_num"}]: dispatch
tasks.cephfs.filesystem.ceph_manager:[{'pool': 1, 'pool_name': 'device_health_metrics', 'create_time': '2020-05-08T07:51:08.676406+0000', 'flags': 1, 'flags_names': 'hashpspool', 'type': 1, 'size': 3, 'min_size': 2, 'crush_rule': 0, 'object_hash': 2, 'pg_autoscale_mode': 'off', 'pg_num': 1, 'pg_placement_num': 1, 'pg_placement_num_target': 1, 'pg_num_target': 1, 'pg_num_pending': 1, 'last_pg_merge_meta': {'source_pgid': '0.0', 'ready_epoch': 0, 'last_epoch_started': 0, 'last_epoch_clean': 0, 'source_version': "0'0", 'target_version': "0'0"}, 'last_change': '12', 'last_force_op_resend': '0', 'last_force_op_resend_prenautilus': '0', 'last_force_op_resend_preluminous': '0', 'auid': 0, 'snap_mode': 'selfmanaged', 'snap_seq': 0, 'snap_epoch': 0, 'pool_snaps': [], 'removed_snaps': '[]', 'quota_max_bytes': 0, 'quota_max_objects': 0, 'tiers': [], 'tier_of': -1, 'read_tier': -1, 'write_tier': -1, 'cache_mode': 'none', 'target_max_bytes': 0, 'target_max_objects': 0, 'cache_target_dirty_ratio_micro': 400000, 'cache_target_dirty_high_ratio_micro': 600000, 'cache_target_full_ratio_micro': 800000, 'cache_min_flush_age': 0, 'cache_min_evict_age': 0, 'erasure_code_profile': '', 'hit_set_params': {'type': 'none'}, 'hit_set_period': 0, 'hit_set_count': 0, 'use_gmt_hitset': True, 'min_read_recency_for_promote': 0, 'min_write_recency_for_promote': 0, 'hit_set_grade_decay_rate': 0, 'hit_set_search_last_n': 0, 'grade_table': [], 'stripe_width': 0, 'expected_num_objects': 0, 'fast_read': False, 'options': {'pg_num_min': 1}, 'application_metadata': {'mgr_devicehealth': {}}}]
teuthology.orchestra.run.smithi003:> true
teuthology.orchestra.run.smithi003:> sudo adjust-ulimits ceph-coverage /home/ubuntu/cephtest/archive/coverage timeout 120 ceph --cluster ceph --log-early osd pool get device_health_metrics pg_num
teuthology.orchestra.run.smithi003.stdout:pg_num: 1
tasks.cephadm.mgr.a:Stopping mgr.a...
do you have any idea why that happens?