diff options
Diffstat (limited to 'sampleApp')
-rwxr-xr-x | sampleApp/config.json | 19 | ||||
-rwxr-xr-x | sampleApp/ensureSdncActive.sh | 30 | ||||
-rwxr-xr-x | sampleApp/ensureSdncStandby.sh | 25 | ||||
-rwxr-xr-x | sampleApp/promoverride.py | 141 | ||||
-rwxr-xr-x | sampleApp/sdnc.cluster | 10 | ||||
-rwxr-xr-x | sampleApp/sdnc.cluster.standby | 10 | ||||
-rwxr-xr-x | sampleApp/sdnc.failover | 12 | ||||
-rwxr-xr-x | sampleApp/sdnc.failover.failure | 12 | ||||
-rwxr-xr-x | sampleApp/sdnc.monitor | 10 | ||||
-rwxr-xr-x | sampleApp/sdnc.monitor.failure | 10 | ||||
-rwxr-xr-x | sampleApp/startPromDaemon.sh | 77 |
11 files changed, 356 insertions, 0 deletions
diff --git a/sampleApp/config.json b/sampleApp/config.json new file mode 100755 index 0000000..dd3ac8f --- /dev/null +++ b/sampleApp/config.json @@ -0,0 +1,19 @@ + {
+ "appName":"",
+ "aid":"",
+ "namespace":"",
+ "userid":"",
+ "password":"",
+ "ensure-active-0": "./ensureSdncActive.sh",
+ "ensure-active-1": "./ensureSdncActive.sh",
+ "ensure-passive-0":"./ensureSdncStandby.sh",
+ "ensure-passive-1":"./ensureSdncStandby.sh",
+ "restart-prom-0":"ssh ...",
+ "restart-prom-1":"",
+ "core-monitor-sleep-time":"1000",
+ "prom-timeout":"5000",
+ "noOfRetryAttempts":"3",
+ "replicaIdList":["0", "1"],
+ "musicLocation":"127.0.0.1",
+ "musicVersion":2
+ }
diff --git a/sampleApp/ensureSdncActive.sh b/sampleApp/ensureSdncActive.sh new file mode 100755 index 0000000..70202c8 --- /dev/null +++ b/sampleApp/ensureSdncActive.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +dir=`dirname $0` +# query SDN-C cluster status +clusterStatus=$( $dir/sdnc.cluster ) + +if [ "ACTIVE" = "$clusterStatus" ];then + # peform health-check + health=$( $dir/sdnc.monitor ) + + if [ "HEALTHY" = "$health" ]; then + echo "Cluster is ACTIVE and HEALTHY" + exit 0 + fi + echo "Cluster is ACTIVE and UNHEALTHY" + exit 1 + +elif [ "STANDBY" = "$clusterStatus" ]; then + # perform takeover + echo "Cluster is STANDBY - taking over" + takeoverResult=$( $dir/sdnc.failover ) + if [ "SUCCESS" = "$takeoverResult" ]; then + exit 0 + fi + echo "Cluster takeover failed" + exit 1 +fi + +echo "Unknown cluster status '$clusterStatus'" +exit 1 diff --git a/sampleApp/ensureSdncStandby.sh b/sampleApp/ensureSdncStandby.sh new file mode 100755 index 0000000..b9e9864 --- /dev/null +++ b/sampleApp/ensureSdncStandby.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +dir=`dirname $0` +# query SDN-C cluster status +clusterStatus=$( $dir/sdnc.cluster ) + +if [ "ACTIVE" = "$clusterStatus" ];then + # check that standby cluster is healthy + health=$( $dir/sdnc.monitor ) + if [ "FAILURE" = "$health" ];then + echo "Backup site is unhealthy - can't accept traffic!" + exit 1 + fi + + # assume transient error as other side transitions to ACTIVE + echo "Cluster is ACTIVE but PROM wants STANDBY! Panic!" + exit 0 + +elif [ "STANDBY" = "$clusterStatus" ]; then + echo "Cluster is standing by" + exit 0 +fi + +echo "Unknown cluster status '$clusterStatus'" +exit 1 diff --git a/sampleApp/promoverride.py b/sampleApp/promoverride.py new file mode 100755 index 0000000..ec15590 --- /dev/null +++ b/sampleApp/promoverride.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python2 + +# -*- encoding: utf-8 -*- +# ------------------------------------------------------------------------- +# Copyright (c) 2018 AT&T Intellectual Property +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ------------------------------------------------------------------------- +# + + +import sys +import getopt +import json +import requests + +musicLocation ="" +base_url = "" +keyspaceName = "" +tableName = "replicas" +aid = "" +namespace = "" + + +def parseConfig(config): + global musicLocation, base_url, keyspaceName, aid, namespace + config = json.load(open(config)) + musicLocations = config["music-location"] + base_url = "http://" + musicLocations[0] + ":8080/MUSIC/rest/v2" + keyspaceName = "prom_" + config["app-name"] + aid = config["aid"] + namespace = config["namespace"] + +def getHeaders(): + headers = {'aid': aid, 'ns': namespace} + return headers + +def getReplica(id): + response = requests.get(base_url+"/keyspaces/"+keyspaceName+"/tables/"+tableName+"/rows?id="+id, + headers=getHeaders()) + return response.json()["result"]["row 0"] + +def getAllReplicas(): + response = requests.get(base_url+"/keyspaces/"+keyspaceName+"/tables/"+tableName+"/rows", + headers=getHeaders()) + print json.dumps(response.json()["result"], indent=2, sort_keys=True) + +def acquireLock(lockref): + response = requests.get(base_url+"/locks/acquire/"+lockref, + headers=getHeaders()) + return response.json() + +def releaseLock(lockref): + print "releasing lock: " + lockref + response = requests.delete(base_url+"/locks/release/"+lockref, + headers=getHeaders()) + #return response.json() + return + +def getCurrentLockHolder(lockname): + response = requests.get(base_url+"/locks/enquire/"+lockname, + headers=getHeaders()) + return response.json() + +def releaseLocksUntil(lockname, lockref): + """release locks until the lockref passed in is the current lock holder + this essentially forces the lockref to become the active prom site""" + acquire = acquireLock(lockref) + while acquire["status"]=="FAILURE": + if acquire["message"]=="Lockid doesn't exist": + print "[ERROR] Lock" , lockref, "cannot be found." + return False + currentLockHolder = getCurrentLockHolder(lockname) + if currentLockHolder["lock"]["lock-holder"] is not lockref: + releaseLock(currentLockHolder["lock"]["lock-holder"]) + acquire = acquireLock(lockref) + return True + +def deleteLock(lockname): + response = requests.delete(base_url + "/locks/delete/"+lockname, + headers=getHeaders()) + return response.json() + + +def usage(): + print "usage: promoverride -c <prom config file> -i <prom_id>" + print " -c, --config <prom config file> OPTIONAL location of the 'config.json' file for prom." \ + " Default location is current directory" + print " -i <prom_id> is the replica site to force to become active" + print " -l, --list to list current prom instances" + print "\n Config file is needed to read information about music location and keyspace information" + +if __name__=="__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "c:i:l", ["config=", "id=", "list"]) + except getopt.GetoptError as err: + print(err) + usage() + exit(1) + # defaults here + configFile = "config.json" + id = None + listInstances = False + + for opt, args in opts: + if opt in ("-c", "--config"): + configFile = args + elif opt in ("-i", "--id"): + id = args + elif opt in ("-l", "--list"): + listInstances = True + else: + assert False, "unhandled option " + str(opt) + + parseConfig(configFile) + + if listInstances: + # todo list current instances + getAllReplicas() + exit(0) + + if id == None: + print "Mandatory prom id not provided." + usage() + exit(1) + + replicaInfo = getReplica(id) + print "Forcing prom site ", id, " to become active" + if releaseLocksUntil(keyspaceName+".active.lock", replicaInfo["lockref"]) is True: + print "prom site", id, " should now be active" diff --git a/sampleApp/sdnc.cluster b/sampleApp/sdnc.cluster new file mode 100755 index 0000000..1734f9a --- /dev/null +++ b/sampleApp/sdnc.cluster @@ -0,0 +1,10 @@ +#!/bin/sh +# +# SDNC Resiliency project +# SHELL script to determine whether cluster is the active SDNC cluster or the GeoR Stanby SDNC cluster. +# The status of the cluster is determined by examining the response data obtained from jolokia calls to the ODL nodes. +# return values: +# ACTIVE - cluster is the active cluster +# STANDBY - cluster is the standby cluster +# +echo "ACTIVE" diff --git a/sampleApp/sdnc.cluster.standby b/sampleApp/sdnc.cluster.standby new file mode 100755 index 0000000..8ed0566 --- /dev/null +++ b/sampleApp/sdnc.cluster.standby @@ -0,0 +1,10 @@ +#!/bin/sh +# +# SDNC Resiliency project +# SHELL script to determine whether cluster is the active SDNC cluster or the GeoR Stanby SDNC cluster. +# The status of the cluster is determined by examining the response data obtained from jolokia calls to the ODL nodes. +# return values: +# ACTIVE - cluster is the active cluster +# STANDBY - cluster is the standby cluster +# +echo "STANDBY" diff --git a/sampleApp/sdnc.failover b/sampleApp/sdnc.failover new file mode 100755 index 0000000..5a7884f --- /dev/null +++ b/sampleApp/sdnc.failover @@ -0,0 +1,12 @@ +#!/bin/sh + +# +# SDNC Resiliency project +# SHELL script to execute the SDNC cluster failover tasks +# +# return values: +# SUCCESS - failover tasks were executed successfully +# FAILURE - failover tasks failed +# + +echo "SUCCESS" diff --git a/sampleApp/sdnc.failover.failure b/sampleApp/sdnc.failover.failure new file mode 100755 index 0000000..56a4f91 --- /dev/null +++ b/sampleApp/sdnc.failover.failure @@ -0,0 +1,12 @@ +#!/bin/sh + +# +# SDNC Resiliency project +# SHELL script to execute the SDNC cluster failover tasks +# +# return values: +# SUCCESS - failover tasks were executed successfully +# FAILURE - failover tasks failed +# + +echo "FAILURE" diff --git a/sampleApp/sdnc.monitor b/sampleApp/sdnc.monitor new file mode 100755 index 0000000..bc3b73b --- /dev/null +++ b/sampleApp/sdnc.monitor @@ -0,0 +1,10 @@ +#!/bin/sh +# +# SDNC Resiliency project +# SHELL script to query status of the SDNC cluster +# +# return values: +# HEALTHY - the cluster health is determined to be able to successfully process requests +# FAILURE - the cluster is in unhealthy state, the execution of failover is required +# +echo "HEALTHY" diff --git a/sampleApp/sdnc.monitor.failure b/sampleApp/sdnc.monitor.failure new file mode 100755 index 0000000..d6e30ac --- /dev/null +++ b/sampleApp/sdnc.monitor.failure @@ -0,0 +1,10 @@ +#!/bin/sh +# +# SDNC Resiliency project +# SHELL script to query status of the SDNC cluster +# +# return values: +# HEALTHY - the cluster health is determined to be able to successfully process requests +# FAILURE - the cluster is in unhealthy state, the execution of failover is required +# +echo "FAILURE" diff --git a/sampleApp/startPromDaemon.sh b/sampleApp/startPromDaemon.sh new file mode 100755 index 0000000..2eb868f --- /dev/null +++ b/sampleApp/startPromDaemon.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +usage () { + echo "Usage: $0 -i <prom id> [-p] [-c <config.json directory>] [-z]" + echo "where" + echo -e "\t -i <prom_id> the identifier of the prom daemon" + echo -e "\t -p specifies whether the daemon must start as passive" + echo -e "\t -c is the directory where the prom config.json resides" + echo -e "\t -z keep std out open after daemon is started (for docker containers only)" + exit 1 +} + + +passive="" #default=can be active or passive +config=$PWD # default config directory is working directory +id_flag=0 #make sure user passes in id +docker_deployment=false + +while getopts ":i:pc:z" o; do + case "${o}" in + i) + id=${OPTARG} + id_flag=1 + ;; + p) + passive="-p" + ;; + c) + config=${OPTARG} + ;; + z) + docker_deployment=true + echo "docker deployment" + ;; + *) + usage + ;; + esac +done + +if [ $id_flag -eq 0 ]; then + echo "ERROR: Required parameter <prom id> not provided." + usage +fi + +echo "config location is $config" +echo "prom id is $id" +echo "passive is $passive" + +if $docker_deployment ; then + echo "Container version detected, keeping syso open" + #keep container running +fi + +dir=$PWD +ps aux > $dir/PromLog$id.out +promId=`grep "prom.jar $id" $dir/PromLog$id.out | awk '{ print $2 }'` +if [ -z "${promId}" ]; then +# echo prom dead + echo "Starting prom $id" + java -jar $dir/prom.jar --id $id $passive --config $config > $dir/prom$id.out & +fi +sleep 3 +ps aux > $dir/PromLog$id.out +promId=`grep "prom.jar" $dir/PromLog$id.out | awk '{ print $2 }'` +if [ -z "${promId}" ]; then + echo "NotRunning" +else + echo $promId +fi +rm $dir/PromLog$id.out + +if $docker_deployment ; then + echo "Container version detected, keeping syso open" + #keep container running + tail -f /dev/null +fi |