|
#!/usr/bin/python3
|
|
import os
|
|
import time
|
|
import sys
|
|
import subprocess
|
|
|
|
def print_count_down(count):
|
|
for i in range(count, 0, -1):
|
|
print("\r", "countdown {} S".format(i), end="", flush=True)
|
|
time.sleep(1)
|
|
|
|
def echotime():
|
|
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) )
|
|
|
|
def isallranksactive():
|
|
command = "ceph -s"
|
|
rankstatus = "8 up:active"
|
|
for loop in range(0,60):
|
|
ret =subprocess.run(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")
|
|
if rankstatus in ret.stdout:
|
|
print("all ranks is active")
|
|
return True
|
|
break
|
|
else:
|
|
print(ret.stdout)
|
|
print("NOT all ranks are active, try again later")
|
|
print_count_down(3)
|
|
return False
|
|
|
|
|
|
|
|
def ismdsrunning(mdsname):
|
|
command = "ceph -s"
|
|
mdsstatus = mdsname + " = up:running"
|
|
for loop in range(0,60):
|
|
echotime()
|
|
ret =subprocess.run(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8")
|
|
if mdsstatus in ret.stdout:
|
|
print("mds {} is running".format(mdsname))
|
|
return True
|
|
break
|
|
else:
|
|
print(ret.stdout)
|
|
print("mds {} is not in running status, try again later".format(mdsname))
|
|
print_count_down(3)
|
|
return False
|
|
|
|
|
|
|
|
def isallmdsrunning():
|
|
ret = []
|
|
for eachmds in mds:
|
|
ret.append(ismdsrunning(eachmds))
|
|
if (all(ret)):
|
|
return True
|
|
print(" all mds is running")
|
|
else:
|
|
print("NOT all mds in running status, please check it manualy")
|
|
return False
|
|
|
|
|
|
mds = ["a", "b", "c"]
|
|
cluster = ["node1", "node2", "node3"]
|
|
#loops = 5000
|
|
loops = int(sys.argv[1])
|
|
servicetime = 60
|
|
for loop in range(1,loops):
|
|
print("-------------------------------loop %s---------------------------" %loop)
|
|
for i in range(0,2):
|
|
cmd_kill_9_mds = 'ssh {} -C "pkill -9 tfs-rep; systemctl reset-failed tfs-rep@{}; systemctl start tfs-rep.target"'.format(cluster[i], mds[i])
|
|
cmd_ceph_status = "ceph -s"
|
|
print(cmd_kill_9_mds)
|
|
echotime()
|
|
os.system(cmd_kill_9_mds)
|
|
|
|
print_count_down(3)
|
|
if(isallranksactive() & isallmdsrunning()):
|
|
#time.sleep(servicetime)
|
|
print_count_down(servicetime)
|
|
for i in range(1,3):
|
|
cmd_kill_9_mds = 'ssh {} -C "pkill -9 tfs-rep; systemctl reset-failed tfs-rep@{}; systemctl start tfs-rep.target"'.format(cluster[i], mds[i])
|
|
cmd_ceph_status = "ceph -s"
|
|
print(cmd_kill_9_mds)
|
|
echotime()
|
|
os.system(cmd_kill_9_mds)
|
|
else:
|
|
echotime()
|
|
print("loop : %s, ranks status is not as expected, check maunualy" %loop)
|
|
break
|
|
|
|
print_count_down(3)
|
|
if(isallranksactive() & isallmdsrunning()):
|
|
time.sleep(servicetime)
|
|
else:
|
|
echotime()
|
|
print("loop : %s, ranks status is not as expected, check maunualy" %loop)
|
|
break
|
|
|
|
|
|
|
|
|