summaryrefslogtreecommitdiffstats
path: root/kubernetes/sdnc/sdnc-prom/resources/bin
diff options
context:
space:
mode:
authorBorislav Glozman <Borislav.Glozman@amdocs.com>2018-07-15 12:32:24 +0000
committerGerrit Code Review <gerrit@onap.org>2018-07-15 12:32:24 +0000
commitb8a250d496d7dcdab72b0118164a3f2bb23cb7e4 (patch)
tree160bc55b0bb19203eef6811d0c71032ff2fe7aa3 /kubernetes/sdnc/sdnc-prom/resources/bin
parent310736ba91c4e9f8f8037b2014e3f2ab06112f0b (diff)
parentb642ee553d6dd486fc375755af9da2fdd9c94885 (diff)
Merge "SDN-C Multi-site High-availability - Auto-failover"
Diffstat (limited to 'kubernetes/sdnc/sdnc-prom/resources/bin')
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncActive.sh105
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncStandby.sh58
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/prom.sh31
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/sdnc.cluster61
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/sdnc.dnsswitch22
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/sdnc.failover86
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/sdnc.monitor125
-rwxr-xr-xkubernetes/sdnc/sdnc-prom/resources/bin/switchVoting.sh110
8 files changed, 598 insertions, 0 deletions
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncActive.sh b/kubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncActive.sh
new file mode 100755
index 0000000000..fb24653129
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncActive.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+debugLog(){
+ if [ "$enableDebugLogging" == true ]; then
+ if [ $# -eq 0 ]; then
+ echo "" >> $LOGFILE
+ else
+ echo $( date ) $@ >> $LOGFILE
+ fi
+ fi
+}
+
+failover(){
+ lockFile=/tmp/sdnc.failover.lock
+ # make sure that no failover is currently running
+ if [ -e ${lockFile} ] && kill -0 $(cat ${lockFile}) 2> /dev/null; then
+ debugLog "Currently running sdnc and dns failover"
+ return
+ fi
+ trap "rm -f ${lockFile}" INT TERM RETURN
+ echo $BASHPID > ${lockFile}
+
+ # perform takeover
+ debugLog "Started executing sdnc.failover for $SITE_NAME"
+ takeoverResult=$( /app/bin/sdnc.failover )
+ debugLog "Completed executing sdnc.failover. takeoverResult is: $takeoverResult"
+ if [ "success" = "$takeoverResult" ]; then
+ # update CoreDNS upon successful execution of sdnc.failover script
+ debugLog "Executing sdnc.dnsswitch"
+ /app/bin/sdnc.dnsswitch
+ rc=$?
+ debugLog "Completed executing sdnc.dnsswitch for $SITE_NAME. rc=$rc"
+ else
+ debugLog "Cluster takeover current status: $takeoverResult on $SITE_NAME."
+ rc=1
+ fi
+
+ if [ $rc -ne 0 ];then
+ takeoverResult="failure"
+ fi
+
+ data="{\
+\"type\": \"failover\",\
+\"status\": \"$takeoverResult\",\
+\"site\": \"$SITE_NAME\",\
+\"deployment\": \"{{.Values.config.deployment}}\",\
+\"timestamp\": \"$(date '+%F %T')\"\
+}"
+
+ # notifications are best-effort - ignore any failures
+ curl -H "Content-Type: application/json" -X POST --data "$data" http://$message_router/events/$topic >/dev/null 2>&1
+
+}
+
+LOGFILE="/app/geo.log"
+enableDebugLogging=true
+message_router=message-router:3904
+topic={{.Values.config.messageRouterTopic}}
+SITE_NAME="sdnc01"
+if [ "$SDNC_IS_PRIMARY_CLUSTER" = "false" ];then
+ SITE_NAME="sdnc02"
+fi
+
+debugLog
+debugLog "Executing ensureSdncActive"
+
+# query SDN-C cluster status
+debugLog "Started executing sdnc.cluster"
+clusterStatus=$( /app/bin/sdnc.cluster )
+debugLog "Completed executing sdnc.cluster. Cluster status is: $clusterStatus"
+
+if [ "active" = "$clusterStatus" ]; then
+ # peform health-check
+ debugLog "Started excuting sdnc.monitor"
+ health=$( /app/bin/sdnc.monitor )
+ debugLog "Completed executing sdnc.monitor. Cluster is: $health"
+
+ if [ "healthy" = "$health" ]; then
+ # Cluster is ACTIVE and HEALTHY
+ exit 0
+ fi
+ exit 1
+
+elif [ "standby" = "$clusterStatus" ]; then
+ # Run failover in background process and allow PROM to continue
+ ( failover & )
+ exit 0
+fi
+
+# Unknown cluster status
+exit 1
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncStandby.sh b/kubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncStandby.sh
new file mode 100755
index 0000000000..8dd84bd3ea
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/ensureSdncStandby.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+debugLog(){
+ if [ "$enableDebugLogging" == true ]; then
+ if [ $# -eq 0 ]; then
+ echo "" >> $LOGFILE
+ else
+ echo $( date ) $@ >> $LOGFILE
+ fi
+ fi
+}
+
+LOGFILE="/app/geo.log"
+enableDebugLogging=true
+
+debugLog
+debugLog "Executing ensureSdncStandby"
+
+# query SDN-C cluster status
+debugLog "Started executing sdnc.cluster"
+clusterStatus=$( /app/bin/sdnc.cluster )
+debugLog "Completed executing sdnc.cluster. Cluster status is: $clusterStatus"
+
+if [ "active" = "$clusterStatus" ]; then
+ # assume transient error as other side transitions to ACTIVE
+ debugLog "Cluster status: $clusterStatus. exit 0"
+ exit 0
+
+elif [ "standby" = "$clusterStatus" ]; then
+ # check that standby cluster is healthy
+ debugLog "Started executing sdnc.monitor. Cluster status is: $clusterStatus"
+ health=$( /app/bin/sdnc.monitor )
+ debugLog "Completed executing sdnc.monitor. Cluster is: $health"
+ if [ "failure" = "$health" ];then
+ # Backup site is unhealthy - can't accept traffic!
+ exit 1
+ fi
+ # Cluster is standing by
+ exit 0
+fi
+
+debugLog "Unknown cluster status: $clusterStatus"
+# Unknown cluster status
+exit 1
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/prom.sh b/kubernetes/sdnc/sdnc-prom/resources/bin/prom.sh
new file mode 100755
index 0000000000..c93ba24bd7
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/prom.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ "${SDNC_IS_PRIMARY_CLUSTER:-true}" = "true" ];then
+ id=sdnc01
+else
+ id=sdnc02
+fi
+
+# should PROM start as passive?
+state=$( bin/sdnc.cluster )
+if [ "$state" == "standby" ]; then
+ echo "Starting PROM in passive mode"
+ passive="-p"
+fi
+
+# start PROM as foreground process
+java -jar prom.jar --id $id $passive --config config
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.cluster b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.cluster
new file mode 100755
index 0000000000..76603410d4
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.cluster
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# query ODL cluster state
+USERNAME="{{.Values.odl.jolokia.username}}"
+PASSWORD="{{.Values.odl.jolokia.password}}"
+
+count=${SDNC_ODL_COUNT:-1}
+memberStart=0
+if [ "${SDNC_IS_PRIMARY_CLUSTER:-true}" != "true" ];then
+ memberStart=$(( $memberStart + $count ))
+fi
+
+for instance in $(seq $count);do
+ shard=member-$(( $memberStart + $instance ))-shard-default-config
+ mbean=Category=Shards,name=$shard,type=DistributedConfigDatastore
+ url=http://{{.Release.Name}}-sdnc-$(( $instance-1 )).sdnc-cluster.{{.Release.Namespace}}:8181/jolokia/read/org.opendaylight.controller:$mbean
+
+ response=$( curl -s -u $USERNAME:$PASSWORD $url )
+ rc=$?
+ if [ $rc -ne 0 ];then
+ # failed to contact SDN-C instance - try another
+ continue
+ fi
+ status=$( echo -E "$response" | jq -r ".status" )
+ if [ "$status" != "200" ];then
+ # query failed, try another instance
+ continue
+ fi
+
+ voting=$( echo -E "$response" | jq -r ".value.Voting" )
+ case $voting in
+ true)
+ echo "active"
+ exit 0
+ ;;
+ false)
+ echo "standby"
+ exit 0
+ ;;
+ *)
+ echo "Error: Voting status could not be determined."
+ exit 1
+ ;;
+ esac
+done
+echo "Error: Voting status could not be determined."
+exit 1
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.dnsswitch b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.dnsswitch
new file mode 100755
index 0000000000..209352c4e3
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.dnsswitch
@@ -0,0 +1,22 @@
+#! /bin/bash
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+####################################################################################################
+# sdncDnsSwitchWrapper.bash: Wrapper script to invoke SDNC DNS Switch for domain: sdnc.example.com #
+####################################################################################################
+ssh -i {{.Values.coreDNS.sshKeyFile}} -o StrictHostKeyChecking=no {{.Values.coreDNS.sshUser}}@{{.Values.coreDNS.host}} "{{.Values.coreDNS.switchScript}} $SDNC_LOCAL_K8S_CLUSTER_MASTER {{.Values.config.deployment}}"
+
+exit $?
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.failover b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.failover
new file mode 100755
index 0000000000..e78b7eeee3
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.failover
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOGFILE="/app/geo.log"
+enableDebugLogging=true
+message_router=message-router:3904
+topic={{.Values.config.messageRouterTopic}}
+KEYWORD_success="success"
+KEYWORD_failure="failure"
+SITE_NAME="sdnc01"
+if [ "$SDNC_IS_PRIMARY_CLUSTER" = "false" ];then
+ SITE_NAME="sdnc02"
+fi
+
+APP_BIN=/app/bin
+
+debugLog(){
+ if [ "$enableDebugLogging" == true ]; then
+ if [ $# -eq 0 ]; then
+ echo "" >> $LOGFILE
+ else
+ echo $( date ) $@ >> $LOGFILE
+ fi
+ fi
+}
+
+EXC_SIMPLE_FAILOVER=`${APP_BIN}/switchVoting.sh`
+
+if [ "$EXC_SIMPLE_FAILOVER" == "success" ]; then
+ debugLog "Simple failover success. SDNC failover completed."
+else
+ # Simple failover failed. Trying catastrophic failover ...
+ debugLog "Simple failover failed. Trying catastrophic failover for $SITE_NAME ..."
+
+ # Notify Dmaap before executing catastrophic failover, because all connections will be reset.
+ data="{\
+ \"type\": \"Catastrophic failover\",\
+ \"reason\": \"Simple failover failed\",\
+ \"message_router\": \"$message_router\",\
+ \"topic\": \"$topic\",\
+ \"site\": \"$SITE_NAME\",\
+ \"deployment\": \"{{.Values.config.deployment}}\",\
+ \"timestamp\": \"$(date '+%F %T')\"\
+ }"
+
+ debugLog "$data"
+
+ # notifications to Dmaap
+ curl -H "Content-Type: application/json" -X POST --data "$data" http://$message_router/events/$topic >/dev/null 2>&1
+
+ # We're going to kill prom, so we need to do dnsswitch now
+
+ debugLog "Executing sdnc.dnsswitch"
+
+ /app/bin/sdnc.dnsswitch > /dev/null 2>&1
+ rc=$?
+ if [ $rc -ne 0 ];then
+ debugLog "sdnc.dnsswitch FAILED"
+ echo $KEYWORD_failure
+ exit 0
+ fi
+
+ # Now do catastrophic failure
+
+ debugLog "Catastrophic failover in progress"
+
+ ssh -o StrictHostKeyChecking=no -i /app/config/coredns/master.key root@$SDNC_LOCAL_K8S_CLUSTER_MASTER "su - ubuntu bash -c 'helm upgrade --set sdnc.config.geoEnabled=false dev local/onap --namespace onap; kubectl -n onap delete pods -l app=sdnc'" > /dev/null 2>&1
+
+ # Sleep here so prom can die without us passing control back to ensureSDNCActive
+ sleep 300
+fi
+
+echo $KEYWORD_success
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.monitor b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.monitor
new file mode 100755
index 0000000000..0042ac368a
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/sdnc.monitor
@@ -0,0 +1,125 @@
+#!/usr/bin/env python2
+# encoding: utf-8
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import json
+import requests
+from datetime import datetime
+
+consul_server = "consul-server:8500"
+message_router = "message-router:3904"
+topic = '{{.Values.config.messageRouterTopic}}'
+log_file='/app/monitor.log'
+status_file='/app/.health'
+logEnabled=False
+
+siteName='sdnc01'
+if os.environ.get('SDNC_IS_PRIMARY_CLUSTER', 'true') == 'false':
+ siteName='sdnc02'
+
+debug=False
+if len(sys.argv) > 1 and sys.argv[1] == '--debug':
+ debug=True
+
+def get_state(healthcheck):
+ response = requests.get("http://" + consul_server + "/v1/health/checks/" + healthcheck)
+ if response.status_code != 200:
+ raise RuntimeError("HTTP " + str(response.status_code))
+ data = response.json()
+ if len(data) == 0:
+ raise RuntimeError(healthcheck + " not found")
+ if len(data) > 1:
+ raise RuntimeError("Multiple states for " + healthcheck + " found")
+
+ return data[0]
+
+
+def log(message):
+ if logEnabled:
+ with open(log_file, 'a') as f:
+ f.write(str(datetime.now()) + " " + message + "\n")
+
+def healthcheck(checks, failFirst=True):
+ if len(checks) == 0:
+ return True
+
+ for check in checks:
+ if type(check) is list:
+ passing = healthcheck(check, False)
+ else:
+ state = get_state(check)
+ status = state['Status']
+ passing = status == "passing" or status == "warning"
+ log(check + " " + status)
+ if debug:
+ if status == "passing":
+ color = "\033[32m" # green
+ elif status == "warning":
+ color = "\033[33m" # yellow
+ else:
+ color = "\033[31m" # red
+ print check, color + status + "\033[0m"
+ if not passing:
+ print "\tCause:", state['Output']
+
+
+ if passing:
+ if not failFirst:
+ # found a passing check so can stop here
+ return True
+ else:
+ if failFirst:
+ # found a failing check so can stop here
+ return False
+
+ return failFirst
+
+
+try:
+ with open("/app/config/healthchecks.json") as f:
+ checks = json.load(f)
+
+ try:
+ with open(status_file) as f:
+ previous_result = f.read()
+ except IOError:
+ # file doesn't exist
+ previous_result = 'unknown'
+
+ if healthcheck(checks):
+ result = "healthy"
+ else:
+ result = "unhealthy"
+
+ print result
+
+ # save current result to file
+ with open(status_file, 'w') as f:
+ f.write(result)
+
+ if previous_result != 'unknown' and result != previous_result:
+ payload = { 'type' : 'health-change', 'status': result, 'site': siteName, 'deployment': '{{.Values.config.deployment}}', 'timestamp': str(datetime.now()) }
+ log("Posting event " + str(payload))
+ try:
+ requests.post("http://" + message_router + "/events/" + topic, data=json.dumps(payload), headers={ 'Content-Type' : 'application/json' } )
+ except Exception:
+ # events are best-effort
+ pass
+
+except Exception as e:
+ sys.exit(str(e))
diff --git a/kubernetes/sdnc/sdnc-prom/resources/bin/switchVoting.sh b/kubernetes/sdnc/sdnc-prom/resources/bin/switchVoting.sh
new file mode 100755
index 0000000000..f13196e7e8
--- /dev/null
+++ b/kubernetes/sdnc/sdnc-prom/resources/bin/switchVoting.sh
@@ -0,0 +1,110 @@
+#/bin/sh
+
+# Copyright © 2018 Amdocs
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+primary=${SDNC_IS_PRIMARY_CLUSTER:-true}
+
+url=http://sdnc:8282/restconf/operations/cluster-admin:change-member-voting-states-for-all-shards
+username="${ODL_USERNAME:-{{.Values.odl.restconf.username}}}"
+password="${ODL_PASSWORD:-{{.Values.odl.restconf.password}}}"
+LOGFILE="/app/geo.log"
+enableDebugLogging=true
+
+debugLog(){
+ if [ "$enableDebugLogging" == true ]; then
+ if [ $# -eq 0 ]; then
+ echo "" >> $LOGFILE
+ else
+ echo $( date ) $@ >> $LOGFILE
+ fi
+ fi
+}
+
+
+if [ "$primary" = "true" ]; then
+ votingState='
+{
+ "input": {
+ "member-voting-state": [
+ {
+ "member-name": "member-1",
+ "voting": true
+ },
+ {
+ "member-name": "member-2",
+ "voting": true
+ },
+ {
+ "member-name": "member-3",
+ "voting": true
+ },
+ {
+ "member-name": "member-4",
+ "voting": false
+ },
+ {
+ "member-name": "member-5",
+ "voting": false
+ },
+ {
+ "member-name": "member-6",
+ "voting": false
+ }
+ ]
+ }
+}'
+else
+ votingState='
+{
+ "input": {
+ "member-voting-state": [
+ {
+ "member-name": "member-1",
+ "voting": false
+ },
+ {
+ "member-name": "member-2",
+ "voting": false
+ },
+ {
+ "member-name": "member-3",
+ "voting": false
+ },
+ {
+ "member-name": "member-4",
+ "voting": true
+ },
+ {
+ "member-name": "member-5",
+ "voting": true
+ },
+ {
+ "member-name": "member-6",
+ "voting": true
+ }
+ ]
+ }
+}'
+fi
+
+status=$(curl -s -u $username:$password -o /dev/null -H "Content-Type: application/json" -H "Accept: application/json" -X POST -d "$votingState" -w "%{http_code}\n" $url 2> /dev/null)
+if [ $status -ne 200 ];then
+ debugLog "Switch voting failed. status: $status ,username: $username ,password: $password ,votingState: $votingState ,url:$url "
+ echo "failure"
+else
+ echo "success"
+fi
+