Project

General

Profile

Bug #58120 » kill_9_rep.py

王子敬 wang, 12/12/2022 03:39 AM

 
#!/usr/bin/python3
import os
import time
import sys
import subprocess

def print_count_down(count):
for i in range(count, 0, -1):
print("\r", "countdown {} S".format(i), end="", flush=True)
time.sleep(1)

def echotime():
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) )

def isallranksactive():
command = "ceph -s"
rankstatus = "8 up:active"
for loop in range(0,60):
ret =subprocess.run(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")
if rankstatus in ret.stdout:
print("all ranks is active")
return True
break
else:
print(ret.stdout)
print("NOT all ranks are active, try again later")
print_count_down(3)
return False



def ismdsrunning(mdsname):
command = "ceph -s"
mdsstatus = mdsname + " = up:running"
for loop in range(0,60):
echotime()
ret =subprocess.run(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")
if mdsstatus in ret.stdout:
print("mds {} is running".format(mdsname))
return True
break
else:
print(ret.stdout)
print("mds {} is not in running status, try again later".format(mdsname))
print_count_down(3)
return False



def isallmdsrunning():
ret = []
for eachmds in mds:
ret.append(ismdsrunning(eachmds))
if (all(ret)):
return True
print(" all mds is running")
else:
print("NOT all mds in running status, please check it manualy")
return False


mds = ["a", "b", "c"]
cluster = ["node1", "node2", "node3"]
#loops = 5000
loops = int(sys.argv[1])
servicetime = 60
for loop in range(1,loops):
print("-------------------------------loop %s---------------------------" %loop)
for i in range(0,2):
cmd_kill_9_mds = 'ssh {} -C "pkill -9 tfs-rep; systemctl reset-failed tfs-rep@{}; systemctl start tfs-rep.target"'.format(cluster[i], mds[i])
cmd_ceph_status = "ceph -s"
print(cmd_kill_9_mds)
echotime()
os.system(cmd_kill_9_mds)
print_count_down(3)
if(isallranksactive() & isallmdsrunning()):
#time.sleep(servicetime)
print_count_down(servicetime)
for i in range(1,3):
cmd_kill_9_mds = 'ssh {} -C "pkill -9 tfs-rep; systemctl reset-failed tfs-rep@{}; systemctl start tfs-rep.target"'.format(cluster[i], mds[i])
cmd_ceph_status = "ceph -s"
print(cmd_kill_9_mds)
echotime()
os.system(cmd_kill_9_mds)
else:
echotime()
print("loop : %s, ranks status is not as expected, check maunualy" %loop)
break

print_count_down(3)
if(isallranksactive() & isallmdsrunning()):
time.sleep(servicetime)
else:
echotime()
print("loop : %s, ranks status is not as expected, check maunualy" %loop)
break



(5-5/5)