|
#!/bin/bash
|
|
#
|
|
# Author: Raimund Sacherer (rsachere@redhat.com)
|
|
#
|
|
# Changes: 2023-10-10 Initial Version V1
|
|
#
|
|
#
|
|
# Please note that this is not to be treated as a Red Hat official binary / software,
|
|
# and feel free to go though the source code to look for what's happening behind the scenes.
|
|
#
|
|
# SPDX-License-Identifier: MIT-0
|
|
# MAINTAINER: rsachere@redhat.com
|
|
#
|
|
# TODO: 2023-10-10 Check if it would make sense to loop over all volumes
|
|
# and subvolume-groups automatically
|
|
# 2023-10-11 Add Creation Time output and if it exists, last accessed timestamps to log files
|
|
#
|
|
|
|
|
|
shopt -s checkwinsize; (:);
|
|
|
|
trap 'rm -f ${TEMPFILE_0} ; rm -f ${TEMPFILE_1}' EXIT
|
|
|
|
OPTIND=1
|
|
|
|
#
|
|
# Defaults
|
|
SUDO=sudo
|
|
LIST_CLONES=1
|
|
LIST_SNAPSHOTS=0
|
|
CASE_NUMBER=""
|
|
CASE_UPLOAD=0
|
|
USER_NAME=""
|
|
CEPHFS_GROUP=csi
|
|
CEPHFS_VOLUME=cephfs
|
|
DESTRUCTIVE_ACTION=0
|
|
CANCEL_IN_PROGRESS=0
|
|
CANCEL_PENDING_CLONES=0
|
|
REMOVE_CANCELED_SUBVOLUME=0
|
|
REMOVE_ALL_CANCELED_SUBVOLUMES=0
|
|
|
|
for program in jq ceph sudo mktemp ; do
|
|
which ${program} > /dev/null 2>&1
|
|
|
|
if [ ! $? -eq 0 ] ; then
|
|
echo "Please install program `${program}`."
|
|
exit 65
|
|
fi
|
|
done
|
|
|
|
for i in "$@" ; do
|
|
case $i in
|
|
--list-snapshots)
|
|
LIST_SNAPSHOTS=1
|
|
LIST_CLONES=0
|
|
shift
|
|
;;
|
|
--list-clones)
|
|
LIST_SNAPSHOTS=0
|
|
LIST_CLONES=1
|
|
shift
|
|
;;
|
|
--cancel-pending-clones)
|
|
CANCEL_PENDING_CLONES=1
|
|
shift
|
|
;;
|
|
--cancel-inprogress-clones)
|
|
CANCEL_IN_PROGRESS=1
|
|
shift
|
|
;;
|
|
--remove-canceled-clones)
|
|
REMOVE_CANCELED_SUBVOLUME=1
|
|
shift
|
|
;;
|
|
--remove-all-canceled-clones)
|
|
REMOVE_ALL_CANCELED_SUBVOLUMES=1
|
|
shift
|
|
;;
|
|
--group-name=*)
|
|
CEPHFS_GROUP="${i#*=}"
|
|
shift
|
|
;;
|
|
--volume-name=*)
|
|
CEPHFS_VOLUME="${i#*=}"
|
|
shift
|
|
;;
|
|
|
|
--case-number=*)
|
|
CASE_NUMBER="${i#*=}"
|
|
CASE_UPLOAD=1
|
|
shift
|
|
;;
|
|
|
|
--portal-user-name=*)
|
|
USER_NAME="${i#*=}"
|
|
shift
|
|
;;
|
|
|
|
--no-sudo)
|
|
SUDO=""
|
|
;;
|
|
|
|
-h|--help)
|
|
echo "$0 [--list-clones] [--list-snapshots]"
|
|
echo " [--cancel-pending-clones] [--cancel-inprogress-clones]"
|
|
echo " [--remove-canceled-clones] [--remove-all-canceled-clones]"
|
|
echo " [--group-name=NAME] [--volume-name=NAME]"
|
|
echo " [--case-number=NUMBER] [--portal-user-name=USER]"
|
|
echo " [--no-sudo]"
|
|
echo " [-h|--help]"
|
|
echo
|
|
echo " ********************************************************************************************"
|
|
echo " Pleasese note that this is not to be treated as a Red Hat official binary / software, "
|
|
echo " and feel free to go though the source code to look for what's happening behind the scenes. "
|
|
echo " ********************************************************************************************"
|
|
echo
|
|
echo " --list-clones List all clones (In-Progress, Pending, Complete). Default Operation"
|
|
echo " --list-snapshots List all snapshots and check for pending clone operations"
|
|
echo " --cancel-pending-clones Cancel all pending clones."
|
|
echo " --cancel-inprogress-clones Cancel all in-progress cloning operations."
|
|
echo " --remove-canceled-clones When a clone is canceled, also remove the subvolume."
|
|
echo " --remove-all-canceled-clones Remove the subvolumes for clones which have been canceled already."
|
|
echo " --group-name=NAME Use <NAME> as cephfs subvolume group name (default 'csi')."
|
|
echo " --volume-name=NAME Use <NAME> as cephfs subvolume name (default 'cephfs')."
|
|
echo " --case-number=NUMBER Case number to use to upload the logfiles to."
|
|
echo " --portal-user-name=USER access.redhat.com username to upload the logfiles."
|
|
echo " --no-sudo Do not use sudo when executing ceph commands."
|
|
echo " --help|-h This help screen."
|
|
echo
|
|
echo "Statistics: 'P' = Pending clones, 'In-P' = In progress clones, 'C' = Canceled clones."
|
|
echo
|
|
|
|
shift
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Command option ${i} unknown." >&2
|
|
echo
|
|
${0} --help
|
|
exit 66
|
|
;;
|
|
esac
|
|
done
|
|
|
|
|
|
|
|
log () {
|
|
local TAB=$'\t'
|
|
#echo "${1}${TAB}${2}${TAB}${3}${TAB}${4}"
|
|
echo "$(date -Iseconds)${TAB}${1}${TAB}${2}${TAB}${3}${TAB}${4}" >> ${LOGFILE}
|
|
}
|
|
|
|
case_upload () {
|
|
|
|
if [ -n "${USER_NAME}" ] && [ "${CASE_UPLOAD}" -eq 1 ] && [ -n "${CASE_NUMBER}" ]; then
|
|
echo "Trying to upload log files to case ${CASE_NUMBER}."
|
|
UPLOAD_FILE="${0}-$(date -Iminutes).tgz"
|
|
tar -czf "${UPLOAD_FILE}" "${LOGFILE}" "${CMDFILE}"
|
|
curl -u "${USER_NAME}" -X POST -F "description=${LOGFILE}" -F "file=@${UPLOAD_FILE}" "https://attachments.access.redhat.com/hydra/rest/cases/${CASE_NUMBER}/attachments/"
|
|
rm "${UPLOAD_FILE}"
|
|
echo
|
|
fi
|
|
}
|
|
|
|
ceph_command () {
|
|
local OUTPUT_FILE
|
|
|
|
if [ -n "${5}" ] ; then
|
|
OUTPUT_FILE="${5}"
|
|
else
|
|
OUTPUT_FILE="${TEMPFILE_1}"
|
|
fi
|
|
|
|
if [ "${OUTPUT_FILE}" == "STDOUT_WITHERROR" ] ; then
|
|
echo "${SUDO} ceph fs ${1} ${2} ${3} --vol_name ${CEPHFS_VOLUME} --group_name ${CEPHFS_GROUP} ${4} 2>&1" >> "${CMDFILE}"
|
|
${SUDO} ceph fs ${1} ${2} ${3} --vol_name ${CEPHFS_VOLUME} --group_name ${CEPHFS_GROUP} ${4} 2>&1
|
|
elif [ "${OUTPUT_FILE}" == "STDOUT" ] ; then
|
|
echo "${SUDO} ceph fs ${1} ${2} ${3} --vol_name ${CEPHFS_VOLUME} --group_name ${CEPHFS_GROUP} ${4} 2>/dev/null" >> "${CMDFILE}"
|
|
${SUDO} ceph fs ${1} ${2} ${3} --vol_name ${CEPHFS_VOLUME} --group_name ${CEPHFS_GROUP} ${4} 2>/dev/null
|
|
else
|
|
echo "${SUDO} ceph fs ${1} ${2} ${3} --vol_name ${CEPHFS_VOLUME} --group_name ${CEPHFS_GROUP} ${4} > ${OUTPUT_FILE} 2>&1" >> "${CMDFILE}"
|
|
${SUDO} ceph fs ${1} ${2} ${3} --vol_name ${CEPHFS_VOLUME} --group_name ${CEPHFS_GROUP} ${4} > ${OUTPUT_FILE} 2>&1
|
|
fi
|
|
|
|
return $?
|
|
}
|
|
|
|
check_and_remove_subvolume () {
|
|
if [ $(cat ${TEMPFILE_1} | jq -r '.status.state') == "canceled" ] ; then
|
|
ceph_command subvolume rm "--sub_name ${1}" "--force"
|
|
CEPH_EXIT_CODE=$?
|
|
|
|
if [ ${CEPH_EXIT_CODE} -eq 0 ] ; then
|
|
log "removing-canceled-clone" "${1}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
let stat_clone_removed+=1
|
|
else
|
|
log "not-removing-canceled-clone" "${1}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
fi
|
|
else
|
|
log "not-removing-canceled-clone" "${1}" "${CEPH_EXIT_CODE}" "Status is not canceled (status: $(cat ${TEMPFILE_1} | jq -r '.status.state'))"
|
|
fi
|
|
}
|
|
|
|
TEMPFILE_0=$(mktemp)
|
|
TEMPFILE_1=$(mktemp)
|
|
LOGFILE=${0}_$(date -Iseconds).log
|
|
CMDFILE=${0}_$(date -Iseconds).cmd
|
|
|
|
touch ${LOGFILE}
|
|
touch ${CMDFILE}
|
|
|
|
|
|
# Function to draw progress bar
|
|
progressBar () {
|
|
local raw_percent=0
|
|
local percent
|
|
local stats
|
|
|
|
if [ "${2}" == "1" ] ; then
|
|
global_progressbar_percent=-1
|
|
return
|
|
fi
|
|
|
|
printf -v raw_percent "%5i" $(((${2}*10000)/${1}))
|
|
percent=${raw_percent::3}
|
|
|
|
if [ "${percent}" == " " ] ; then percent=0 ; fi
|
|
if [ "${percent}" -eq "${global_progressbar_percent}" ] ; then
|
|
# Only update on a integer percentage change
|
|
# This limits screen rewrites to max 100 and helps performance
|
|
# No need to update the screen if we don't modify the % value.
|
|
return
|
|
else
|
|
printf -v stats "%i of %i %s" "${2}" "${1}" "${3}"
|
|
printf "\r%${COLUMNS}s" ""
|
|
printf "\r[%3i%%] %s" "$percent" "$stats"
|
|
global_progressbar_percent=${percent}
|
|
fi
|
|
|
|
return
|
|
}
|
|
|
|
#
|
|
# For Future version: Loop through all ceph fs groups, maybe even ceph fs volumes
|
|
{ ceph_command subvolume ls "" "" "STDOUT_WITHERROR" ; } | jq -r '.[].name' > "${TEMPFILE_0}"
|
|
CEPH_EXIT_CODE=$?
|
|
|
|
if [ ${CEPH_EXIT_CODE} -ne 0 ] ; then
|
|
echo "Error executing ceph command."
|
|
exit 67
|
|
fi
|
|
|
|
subvol_completed=0
|
|
subvol_number=$(cat "${TEMPFILE_0}" | wc -l)
|
|
stat_clone_removed=0
|
|
stat_clone_canceled=0
|
|
stat_found_pending=0
|
|
stat_found_canceled=0
|
|
stat_found_inprogress=0
|
|
stat_canceled_inprogress=0
|
|
|
|
cat "${TEMPFILE_0}" | while read subvolume ; do
|
|
|
|
if [ "${LIST_SNAPSHOTS}" -eq 1 ] ; then
|
|
|
|
{ ceph_command subvolume snapshot ls "--sub_name ${subvolume}" "STDOUT" ; } | jq -r '.[].name' > "${TEMPFILE_1}"
|
|
|
|
if [ $(cat "${TEMPFILE_1}" | wc -l) -gt 0 ] ; then
|
|
for snapshot in $(cat "${TEMPFILE_1}") ; do
|
|
#
|
|
# In newer versions we could extract pending clones from the snapshot info output, but this is very new and the output is not
|
|
# present in older versions of ceph. To be backwards compatible we do not try to use it.
|
|
{ ceph_command "subvolume snapshot" info "--sub_name ${subvolume}" "--snap_name ${snapshot}" "STDOUT" ; } | \
|
|
jq -r --arg sv "${subvolume}" --arg ss "${snapshot}" '. += {"volume_name": $sv, "snap_name": $ss} |
|
|
[.volume_name, .snap_name, ("pending-clones:" + .has_pending_clones), .created_at] |
|
|
@tsv' >> "${LOGFILE}"
|
|
done
|
|
fi
|
|
|
|
let subvol_completed+=1
|
|
progressBar "${subvol_number}" "${subvol_completed}" "subvolumes checked for snapshots (latest: ${subvolume})."
|
|
|
|
else
|
|
ceph_command clone status "--clone_name ${subvolume}"
|
|
CEPH_EXIT_CODE=$?
|
|
|
|
if [ ${CEPH_EXIT_CODE} -eq 0 ] ; then
|
|
STATUS=$(cat ${TEMPFILE_1} | jq -r '.status.state')
|
|
|
|
case ${STATUS} in
|
|
pending)
|
|
let stat_found_pending+=1
|
|
if [ ${CANCEL_PENDING_CLONES} -eq 1 ] ; then
|
|
ceph_command clone cancel "--clone_name ${subvolume}"
|
|
CEPH_EXIT_CODE=$?
|
|
|
|
if [ ${CEPH_EXIT_CODE} -eq 0 ] ; then
|
|
log "cancel-pending-clone" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
let stat_clone_canceled+=1
|
|
|
|
if [ ${REMOVE_CANCELED_SUBVOLUME} -eq 1 ] ; then
|
|
ceph_command clone status "--clone_name ${subvolume}"
|
|
CEPH_EXIT_CODE=$?
|
|
|
|
if [ ${CEPH_EXIT_CODE} -eq 0 ] ; then
|
|
check_and_remove_subvolume "${subvolume}" "canceled"
|
|
else
|
|
log "not-removing-canceled-clone" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
fi
|
|
fi
|
|
else
|
|
log "error-cancel-pending-clone" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
fi
|
|
fi
|
|
|
|
if [ "${LIST_CLONES}" -eq 1 ] ; then
|
|
log "not-canceling" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1} | jq -r '.status.state')"
|
|
fi
|
|
|
|
;;
|
|
|
|
canceled)
|
|
let stat_found_canceled+=1
|
|
if [ ${REMOVE_ALL_CANCELED_SUBVOLUMES} -eq 1 ] ; then
|
|
check_and_remove_subvolume "${subvolume}" "canceled"
|
|
else
|
|
log "not-canceling" "${subvolume}" "${CEPH_EXIT_CODE}" "clone already canceled."
|
|
fi
|
|
;;
|
|
|
|
in-progress)
|
|
let stat_found_inprogress+=1
|
|
if [ ${CANCEL_IN_PROGRESS} -eq 1 ] ; then
|
|
ceph_command clone cancel "--clone_name ${subvolume}"
|
|
CEPH_EXIT_CODE=$?
|
|
|
|
if [ ${CEPH_EXIT_CODE} -eq 0 ] ; then
|
|
log "cancel-in-progress-clone" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
let stat_canceled_inprogress+=1
|
|
|
|
if [ ${REMOVE_CANCELED_SUBVOLUME} -eq 1 ] ; then
|
|
ceph_command clone status "--clone_name ${subvolume}"
|
|
CEPH_EXIT_CODE=$?
|
|
|
|
if [ ${CEPH_EXIT_CODE} -eq 0 ] ; then
|
|
check_and_remove_subvolume "${subvolume}" "canceled"
|
|
else
|
|
log "not-removing-canceled-clone" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
fi
|
|
fi
|
|
else
|
|
log "error-cancel-in-progress-clone" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
fi
|
|
else
|
|
log "not-canceling" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1} | jq -r '.status.state')"
|
|
fi
|
|
;;
|
|
|
|
*)
|
|
log "not-canceling" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1} | jq -r '.status.state')"
|
|
;;
|
|
esac
|
|
|
|
else
|
|
log "not-canceling" "${subvolume}" "${CEPH_EXIT_CODE}" "$(cat ${TEMPFILE_1})"
|
|
fi
|
|
|
|
let subvol_completed+=1
|
|
progressBar "${subvol_number}" "${subvol_completed}" "subvolumes. Found: 'P': ${stat_found_pending} 'In-P': ${stat_found_inprogress} 'C': ${stat_found_canceled}. Action taken: ${stat_clone_canceled} 'P' canceled. ${stat_canceled_inprogress} 'In-P' canceled. ${stat_clone_removed} 'C' Removed."
|
|
fi
|
|
done
|
|
|
|
echo
|
|
echo Log file: ${LOGFILE}
|
|
echo Cmd file: ${CMDFILE}
|
|
|
|
case_upload
|
|
|
|
if [ "${LIST_CLONES}" -eq 1 ] ; then
|
|
if [ "$(cat "${LOGFILE}" | grep -v ENOTSUP | wc -l)" -ne "0" ] ; then
|
|
cat "${LOGFILE}" | grep -v ENOTSUP | column -t | less
|
|
fi
|
|
else
|
|
if [ "$(cat "${LOGFILE}" | grep -v ENOTSUP | grep -Ev "complete$" | wc -l)" -ne "0" ] ; then
|
|
cat "${LOGFILE}" | grep -v ENOTSUP | grep -Ev "complete$" | column -t | less
|
|
fi
|
|
fi
|
|
|
|
echo "Finished."
|
|
|
|
exit 0
|