diff options
Diffstat (limited to 'pgaas/src/stage/opt/app/pgaas/bin/check_cluster')
-rwxr-xr-x | pgaas/src/stage/opt/app/pgaas/bin/check_cluster | 170 |
1 files changed, 0 insertions, 170 deletions
diff --git a/pgaas/src/stage/opt/app/pgaas/bin/check_cluster b/pgaas/src/stage/opt/app/pgaas/bin/check_cluster deleted file mode 100755 index ced7c0a..0000000 --- a/pgaas/src/stage/opt/app/pgaas/bin/check_cluster +++ /dev/null @@ -1,170 +0,0 @@ -#!/bin/bash -# Copyright (C) 2017 AT&T Intellectual Property. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this code except in compliance -# with the License. You may obtain a copy of the License -# at http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. See the License for the specific language governing -# permissions and limitations under the License. - - -# NAME -# check_cluster - check the state of the cluster -# -# USAGE -# check_cluster [-v] [-l] [-t timeout] -# -l do not check localhost first (and restarting the service if necessary) -# -t timeout set how long to wait when accessing the servers -# -v verbose -# -# DESCRIPTION -# Loop through the nodes in the cluster, using pgwget to determine how many are masters, secondaries, down for maintenance, or not up. -# Complain about certain situations. -# If there are multiple masters, and this not the first master in the list, then: -# run pg_ctl_restart -# prevent /ro from returning true - -CDF=/opt/app/cdf -if [ -d /opt/app/postgresql-9.5.2 ] -then PGDIR=/opt/app/postgresql-9.5.2 -elif [ -d /usr/lib/postgresql/9.6 ] -then PGDIR=/usr/lib/postgresql/9.6 -elif [ -d /usr/lib/postgresql/9.5 ] -then PGDIR=/usr/lib/postgresql/9.5 -else echo PostgreSQL bin directory not found 1>&2; exit 1 -fi -PATH=$PGDIR/bin:$CDF/bin:/opt/app/pgaas/bin:$PATH - -usage() -{ - exec 1>&2 - [ $# -gt 0 ] && echo "$@" - echo "Usage: $0 [-v] [-l] [-t timeout] [-d file]" - echo -e " -l do not check localhost first (and restarting the service if necessary)" - echo -e " -t timeout set how long to wait when accessing the servers" - echo -e " -v verbose" - echo -e " -d file duplicate the status output to the given file" - exit 1 -} - -VERBOSE=false -TIMEOUT=10 -TESTLOCAL=: -DFILE= -while getopts d:lt:v c -do - case "$c" in - d ) DFILE=$OPTARG ;; - l ) TESTLOCAL=false ;; - t ) TIMEOUT=$OPTARG ;; - v ) VERBOSE=: ;; - \?) usage ;; - esac -done -shift $(($OPTIND - 1)) - -# loop through the nodes in the cluster, using pgwget to determine if any are a master. Save in $@ -master_count=0 -secondary_count=0 -total_count=0 -down_count=0 -maintenance_count=0 - -MASTERS= -SECONDARIES= -MAINTENANCES= -DOWNS= - -MSEP= -SSEP= -BSEP= -DSEP= -HOSTNAME=$(hostname -f) -FOUNDPREVIOUSMASTER= -DOPGCTLSTOP= - -if $TESTLOCAL -then - isrw=`pgwget --tries=1 --read-timeout=$TIMEOUT --quiet -O/dev/stdout http://localhost:8000/isrw` - case "$isrw" in - Master | Secondary | Maintenance ) ;; - * ) - echo "$(date)|WARNING|RESTARTED|Local iDNS-responder not responding. Restarting." - ps -fu postgres | grep "python3 /opt/app/pgaas/lib/iDNS-responder" | grep -v grep | awk '{print "kill " $2}' | sh - sleep 10 - ;; - esac -fi - -for i in $(getpropvalue -n pgnodes | sed 's/|/ /g') -do - $VERBOSE && echo -n "Checking $i" - isrw=`pgwget --tries=1 --read-timeout=10 --quiet -O/dev/stdout http://$i:8000/isrw` - $VERBOSE && echo ": $isrw" - case "$isrw" in - Master ) - (( master_count = master_count + 1 )) - (( total_count = total_count + 1 )) - MASTERS="$MASTERS$MSEP$i" - MSEP=" " - if [ -z "$FOUNDPREVIOUSMASTER" ] - then FOUNDPREVIOUSMASTER=yes - elif [ -n "$FOUNDPREVIOUSMASTER" -a "$i" = $HOSTNAME ] - then DOPGCTLSTOP=yes - fi - ;; - Secondary ) - (( secondary_count = secondary_count + 1 )) - (( total_count = total_count + 1 )) - SECONDARIES="$SECONDARIES$SSEP$i" - SSEP=" " - ;; - Maintenance ) - (( maintenance_count = maintenance_count + 1 )) - (( total_count = total_count + 1 )) - MAINTENANCES="$MAINTENANCES$BSEP$i" - BSEP=" " - ;; - * ) - DOWNS="$DOWNS$DSEP$i" - DSEP=" " - (( down_count = down_count + 1 )) - (( total_count = total_count + 1 )) - ;; - esac -done - -(( up_count = master_count + secondary_count )) - -date=$(date) -output="$date|INFO|masters=$master_count $MASTERS|secondaries=$secondary_count $SECONDARIES|maintenance=$maintenance_count $MAINTENANCES|down=$down_count $DOWNS|" -echo "$output" -if [ -n "$DFILE" ] -then (umask 022; echo "$output" > $DFILE.tmp && mv $DFILE.tmp $DFILE) -fi - -FORCEROOFF=/var/run/postgresql/force-ro-off -if [ $master_count -lt 1 ] -then echo "$date|FATAL|NOMASTER|NO MASTER FOUND|" -elif [ $master_count -gt 1 ] -then echo "$date|FATAL|MULTIPLEMASTERS|TOO MANY MASTERS FOUND|" - if [ -n "$DOPGCTLSTOP" ] - then - touch "$FORCEROOFF" - pg_ctl_stop - fi -fi -if [ -z "$DOPGCTLSTOP" -a -f "$FORCEROOFF" ] -then rm -f "$FORCEROOFF" -fi -if [ $up_count -eq 0 ] -then echo "$date|FATAL|NOREADER|NO SECONDARY FOUND" -fi -if [ $down_count -ne 0 ] -then echo "$date|ERROR|DOWN|One or more systems are down: $DOWNS" -fi |