| #!/usr/bin/env bash |
| set -eu |
| set -o pipefail |
| [[ ${DEBUG:-} != true ]] || set -x |
| |
| dirname=$(dirname "$0") |
| |
| function error() { echo "ERROR: $1" >&2; exit 1; } |
| function info() { echo "INFO: $1"; } |
| function log() { local log="${1?}"; shift; mkdir -p $(dirname "$log"); "$@" 2>&1 | tee -a "$log"; } |
| |
| JENKINS_URL="${JENKINS_URL:-http://$JENKINS_SERVER:$JENKINS_PORT/}" |
| |
| JOB_SET_NODES_OFFLINE=${JOB_SET_NODES_OFFLINE:-false} |
| JOB_SET_NODES_ONLINE=${JOB_SET_NODES_ONLINE:-false} |
| JENKINS_WGET="${JENKINS_WGET:-wget --auth-no-challenge}" |
| |
| SLAVES_LIST_FILE="${1:-}" |
| [[ -n $SLAVES_LIST_FILE ]] || error "missing slave list argument" |
| |
| SLAVES_CHECK="${2:-}" |
| [[ -n $SLAVES_CHECK ]] || error "missing slave check script" |
| |
| SLAVES_REPORT="${3:-}" |
| [[ -n $SLAVES_REPORT ]] || error "missing slave report name" |
| |
| $dirname/set-build-description.sh "Checking all slaves..." |
| |
| WORKSPACE=${WORKSPACE:-$PWD/workspace} |
| ARTIFACTSDIR=${WORKSPACE}/artifacts/${SLAVES_REPORT} |
| LOGSDIR=${ARTIFACTSDIR}/logs |
| rm -rf $ARTIFACTSDIR $LOGSDIR |
| mkdir -p $WORKSPACE $ARTIFACTSDIR $LOGSDIR |
| |
| declare -i errors=0 |
| declare -i total=0 |
| while read -r line; do |
| status=0 |
| line=$(echo "$line" | sed -e 's/ *#.*//' -e 's/^ *//') |
| [[ -n $line ]] || continue |
| host=$(echo "$line" | cut -f1 -d,) |
| nodes=$(echo "$line" | cut -f2 -d,) |
| [[ -n $host ]] || continue |
| total=$((total + 1)) |
| pushd $dirname >/dev/null |
| log $LOGSDIR/check_$host.log.txt ./remote-exec.sh $host ./$SLAVES_CHECK || status=$? |
| popd >/dev/null |
| [[ $status = 0 ]] || errors=$((errors + 1)) |
| [[ $status = 0 ]] || info "$SLAVES_CHECK on $host returned with status $status" |
| for node in $nodes; do |
| # FIXME: We need to authenticate to Jenkins for this to work |
| offline_cause=$($JENKINS_WGET -q -O- "$JENKINS_URL/computer/$node/api/xml/?xpath=//offlineCauseReason" | sed 's|<[^>]*>||g') || true |
| offline_by_job=$(echo "$offline_cause" | grep "Set offline by" | sed 's|^.*Set offline by \([^:]*\):.*$|\1|' || true) |
| offline_agent=$(echo "$offline_cause" | grep "This agent is offline because Jenkins failed to launch the agent process on it." || true) |
| if [[ $status = 0 ]]; then |
| if [[ $offline_cause != "" ]]; then |
| if [[ $offline_by_job = job ]]; then |
| if [[ $JOB_SET_NODES_ONLINE = true ]]; then |
| info "setting node back online: $node" |
| $dirname/set-node-online.sh $node |
| else |
| info "skip setting of node back online: $node (JOB_SET_NODES_ONLINE=$JOB_SET_NODES_ONLINE)" |
| fi |
| else |
| if [ "x$offline_agent" != "x" ]; then |
| error "Node $node is offline, the agent could not start" |
| fi |
| case "x$offline_cause" in |
| *"Connection was broken"*) |
| error "Node $node is offline, connexion broken" |
| ;; |
| esac |
| fi |
| fi |
| else |
| if [[ $JOB_SET_NODES_OFFLINE = true ]]; then |
| if [[ $offline_cause = "" || $offline_by_job = job ]]; then |
| info "setting node offline: $node" |
| message="node offline due to missing capacities." |
| [[ -z ${JENKINS_SERVER_COOKIE:-} ]] || \ |
| message="$message <br> Set offline by job: <a href=\"$BUILD_URL\">$JOB_NAME / $BUILD_ID</a>. <br> Console: <a href=\"$BUILD_URL/artifact/artifacts/${SLAVES_REPORT}/logs/check_$host.log.txt\"> slave check output log</a>." |
| [[ -n ${JENKINS_SERVER_COOKIE:-} ]] || message="$message <br> Set offline by user: $USER, hostname: $(hostname), pid: $$." |
| $dirname/set-node-offline.sh $node "$message" || true # Avoid abort if can't be set offline |
| fi |
| else |
| info "skip setting of node offline: $node (JOB_SET_NODES_OFFLINE=$JOB_SET_NODES_OFFLINE)" |
| fi |
| fi |
| done |
| done < $SLAVES_LIST_FILE |
| |
| $dirname/set-build-description.sh "Generating report..." |
| $dirname/postbuild-report.sh $SLAVES_LIST_FILE $SLAVES_REPORT |
| |
| if [[ $errors -gt 0 ]]; then |
| $dirname/set-build-description.sh "<font color=\"red\">FAILURE: $errors / $total hosts KO</font>" |
| echo "FAILURE: $errors / $total hosts KO" |
| exit 1 |
| else |
| $dirname/set-build-description.sh "<font color=\"green\">SUCCESS: $total hosts OK</font>" |
| echo "SUCCESS: $total hosts OK" |
| fi |
| |