check_container_upgrade/check_container_upgrade
2024-04-18 11:16:47 +02:00

305 lines
7.1 KiB
Bash
Executable file

#!/bin/bash
# Monitoring plugin to check if running containers are upgradable
#
# Author: Benjamin Renard <brenard@zionetrix.net>
# Date: Sun, 03 Mar 2024 16:40:19 +0100
# Source: https://gitea.zionetrix.net/bn8/check_container_upgrade
#
ENGINE="auto"
POSSIBLE_ENGINES=( "auto" "docker" "podman" )
DOCKERCOMPOSE_FILE=""
DEBUG=0
MAX_PARALLEL_CHECKS=4
ONLY_CONTAINERS=()
EXCLUDED_CONTAINERS=( buildx_buildkit_default )
declare -rA CHECK_PLUGINS=(
["/usr/lib/nagios/plugins/check_apt"]="/usr/lib/nagios/plugins/check_apt -u -U -t 60 -l"
["/usr/lib/nagios/plugins/check_apk"]="/usr/lib/nagios/plugins/check_apk"
)
function debug() {
if [ $DEBUG -eq 1 ]
then
>&2 echo -e "[DEBUG] $@"
fi
}
function is_empty() {
[ $# -gt 0 ] && return 1
return 0
}
function in_array() {
param=$1;
shift;
for elem in "$@";
do
[[ "$param" = "$elem" ]] && return 0;
done;
return 1
}
function implode() {
local d=${1-} f=${2-}
if shift 2; then
printf %s "$f" "${@/#/$d}"
fi
}
function usage() {
error="$1"
[ -n "$error" ] && echo "$error"
cat << EOF
Usage : $(basename $0) [-d] [-E /path/to/engine] [container1,...]
-E [path] Force a specific engine (possible values: ${POSSIBLE_ENGINES[@]}, default: $ENGINE)
-x [container] Exclude specified container (could be repeat)
-M [integer] Max number of container checks to run in parallel (default: $MAX_PARALLEL_CHECKS, 0=no limit)
-f [docker-compose.yml] To check upgrade on docker compose project, specified the path of the docker-compose.yml file
-d Debug mode
-X Enable bash tracing (=set -x)
-h Show this message
EOF
[ -n "$error" ] && exit 1
exit 0
}
idx=1
while [ $idx -le $# ]
do
OPT=${!idx}
case $OPT in
-d)
DEBUG=1
;;
-h)
usage
;;
-E)
((idx++))
ENGINE=${!idx}
if [ ! -x "$ENGINE" ]
then
in_array $ENGINE ${POSSIBLE_ENGINES[@]} || usage "Invalid engine $ENGINE"
fi
;;
-f)
((idx++))
DOCKERCOMPOSE_FILE=${!idx}
;;
-x)
((idx++))
EXCLUDED_CONTAINERS+=( ${!idx} )
;;
-M)
((idx++))
MAX_PARALLEL_CHECKS=${!idx}
;;
-X)
set -x
;;
*)
ONLY_CONTAINERS+=( $OPT )
;;
esac
((idx++))
done
! is_empty $ONLY_CONTAINERS && debug "Only containers: ${ONLY_CONTAINERS[@]}"
if [ "$ENGINE" == "auto" ]
then
debug "Auto-detect engine..."
for engine in ${POSSIBLE_ENGINES[@]}
do
[ "$engine" == "auto" ] && continue
which "$engine" > /dev/null 2>&1
if [ $? -ne 0 ]
then
debug "$engine not found"
continue
fi
ENGINE="$engine"
break
done
if [ -z "$ENGINE" ]
then
echo "UNKNOWN - Fail to auto-detect engine"
exit 3
fi
debug "Auto-detected engine: $ENGINE"
fi
if [ -n "$DOCKERCOMPOSE_FILE" -a ! -e "$DOCKERCOMPOSE_FILE" ]
then
echo "UNKNOWN - Docker compose file not found ($DOCKERCOMPOSE_FILE)"
exit 3
fi
EXIT_CODE=0
declare -A CONTAINER_STATUS_FILE
declare -A CONTAINER_PID
declare -A UP_TO_DATE
declare -A ERRORS
declare -A UNKNOWNS
CHECKED_CONTAINERS=( )
debug "List running containers..."
if [ -n "$DOCKERCOMPOSE_FILE" ]
then
RUNNING_CONTAINERS=$($ENGINE compose -f $DOCKERCOMPOSE_FILE ps --format '{{.Service}}' | tr '\n' ' ')
else
RUNNING_CONTAINERS=$($ENGINE ps --format '{{.Names}}' | tr '\n' ' ')
fi
debug "Running containers: $RUNNING_CONTAINERS"
function exec_in_container() {
container=$1
shift;
if [ -n "$DOCKERCOMPOSE_FILE" ]
then
$ENGINE compose -f $DOCKERCOMPOSE_FILE exec $container $@
return $?
fi
$ENGINE exec $container $@
return $?
}
# Implement check inside a function to allow running it in parallel
# Parameters : [container] [output file]
function check_container() {
container="$1"
output_file="$2"
STATUS=""
for check_plugin in ${CHECK_PLUGINS[@]}
do
exec_in_container $container test -e $check_plugin > /dev/null 2>&1
if [ $? -ne 0 ]
then
debug "$container - Plugin $check_plugin not found"
continue
fi
debug "$container - Plugin $check_plugin found, use it"
STATUS="$(exec_in_container $container ${CHECK_PLUGINS[${check_plugin}]} 2>&1)"
ex=$?
debug "$container - Plugin output: $STATUS"
debug "$container - Plugin exit code: $ex"
break
done
if [ -z "$STATUS" ]
then
debug "$container - No check plugin found"
STATUS="UNKNOWN - No check plugin available"
ex=3
fi
echo $STATUS > $output_file
return $ex
}
debug "Trigger check of all selected containers..."
for container in $RUNNING_CONTAINERS
do
if ! is_empty $ONLY_CONTAINERS && ! in_array $container ${ONLY_CONTAINERS[@]}
then
debug "$container - Ignored"
continue
fi
if in_array $container ${EXCLUDED_CONTAINERS[@]}
then
debug "$container - Excluded"
continue
fi
if [ $MAX_PARALLEL_CHECKS -gt 0 -a "$(jobs | wc -l)" -ge $MAX_PARALLEL_CHECKS ]
then
debug "Max parallel checks count reached. Waiting some check ending"
wait -n
debug "Some check ended, continue"
fi
CHECKED_CONTAINERS+=( "$container" )
CONTAINER_STATUS_FILE+=( ["$container"]=$( mktemp ) )
check_container $container ${CONTAINER_STATUS_FILE[$container]} & CONTAINER_PID+=( ["$container"]=$! )
done
debug "Wait for each individual container check and handle their result..."
for container in ${!CONTAINER_PID[@]}
do
pid=${CONTAINER_PID[$container]}
debug "$container - Waiting for PID ${pid}..."
wait $pid
ex=$?
debug "$container - Check return ${ex}"
STATUS=$( cat ${CONTAINER_STATUS_FILE[$container]} )
rm -f ${CONTAINER_STATUS_FILE[$container]}
if [ $ex -eq 0 ]
then
UP_TO_DATE+=( ["$container"]=$STATUS )
else
ERRORS+=( ["$container"]=$STATUS )
[ $ex -ge 3 ] && UNKNOWNS+=( "$container" )
fi
[ $EXIT_CODE -ge $ex ] && continue
[ $ex -gt 3 ] && ex=3
EXIT_CODE=$ex
done
NOTFOUNDS=()
if ! is_empty $ONLY_CONTAINERS
then
for container in ${ONLY_CONTAINERS[@]}
do
if ! in_array $container ${CHECKED_CONTAINERS[@]}
then
debug "$container - Container not found"
ERRORS+=( ["$container"]="Container not found" )
NOTFOUNDS+=( "$container" )
EXIT_CODE=3
fi
done
fi
debug "Final exit code: $EXIT_CODE"
debug "Check containers (${#CHECKED_CONTAINERS[@]}): $( implode ", " "${CHECKED_CONTAINERS[@]}" )"
debug "Containers with errors (${#ERRORS[@]}): $( implode ", " "${!ERRORS[@]}" )"
debug "Not found containers (${#NOTFOUNDS[@]}): $( implode ", " "${NOTFOUNDS[@]}" )"
# Compute performance data
let CONTAINER_COUNTS=${#CHECKED_CONTAINERS[@]}+${#NOTFOUNDS[@]}
PERF_DATA=(
"uptodate_containers=${#UP_TO_DATE[@]};;;0;$CONTAINER_COUNTS"
"containers_with_errors=${#ERRORS[@]};1;;0;$CONTAINER_COUNTS"
"unknown_state_containers=${#UNKNOWNS[@]};;;0;$CONTAINER_COUNTS"
)
# Display check result message
case $EXIT_CODE in
0)
echo -n "OK - All ${#UP_TO_DATE[@]} container(s) are up-to-date"
;;
1)
echo -n "WARNING - ${#ERRORS[@]} container(s) need to be updated"
;;
2)
echo -n "CRITICAL - ${#ERRORS[@]} container(s) need to be updated"
;;
*)
echo -n "UNKNOWN - fail to retrieve status of ${#UNKNOWNS[@]} container(s)"
;;
esac
# Add performance data
echo " |$( implode " " "${PERF_DATA[@]}" )"
# Display details, starting by errors
for container in ${!ERRORS[@]}
do
echo ${container} - ${ERRORS[${container}]}
done
for container in ${!UP_TO_DATE[@]}
do
echo ${container} - ${UP_TO_DATE[${container}]}
done
exit $EXIT_CODE