From aae2da91becf5f1f56329d49656c1ad634917cba Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Fri, 8 May 2020 18:56:39 +0200 Subject: [Tree-wide] Make chart build process predictible ONAP is built using plain makefile rules. List of targets is generated using wildcard function. Based on make changelog: http://git.savannah.gnu.org/cgit/make.git/tree/NEWS since version 3.82 wildcard is not going to sort its results which means that charts are being processed in an arbitrary order which may lead to build failure due to missing dependencies. Since version 4.3 make started sorting the wildcard results once again which may lead to build issues. To avoid that and make our builds predictible independently from Makefile version let's make sure that we always sort wildcard results. Addinally let's use 'file://' instead of '@local' for charts in common to resolve dependencies between them. Issue-ID: OOM-2399 Signed-off-by: Krzysztof Opasiak Change-Id: Iacb02dcdbd577ce0e9ca1078dd0586d296ec9375 --- .../sdnc-prom/resources/bin/ensureSdncActive.sh | 105 +++++++++++++++++ .../sdnc-prom/resources/bin/ensureSdncStandby.sh | 58 ++++++++++ .../components/sdnc-prom/resources/bin/prom.sh | 31 +++++ .../sdnc-prom/resources/bin/sdnc.cluster | 61 ++++++++++ .../sdnc-prom/resources/bin/sdnc.dnsswitch | 22 ++++ .../sdnc-prom/resources/bin/sdnc.failover | 86 ++++++++++++++ .../sdnc-prom/resources/bin/sdnc.monitor | 125 +++++++++++++++++++++ .../sdnc-prom/resources/bin/switchVoting.sh | 110 ++++++++++++++++++ 8 files changed, 598 insertions(+) create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncActive.sh create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncStandby.sh create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/prom.sh create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.cluster create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.dnsswitch create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.failover create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.monitor create mode 100755 kubernetes/sdnc/components/sdnc-prom/resources/bin/switchVoting.sh (limited to 'kubernetes/sdnc/components/sdnc-prom/resources/bin') diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncActive.sh b/kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncActive.sh new file mode 100755 index 0000000000..fb24653129 --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncActive.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +debugLog(){ + if [ "$enableDebugLogging" == true ]; then + if [ $# -eq 0 ]; then + echo "" >> $LOGFILE + else + echo $( date ) $@ >> $LOGFILE + fi + fi +} + +failover(){ + lockFile=/tmp/sdnc.failover.lock + # make sure that no failover is currently running + if [ -e ${lockFile} ] && kill -0 $(cat ${lockFile}) 2> /dev/null; then + debugLog "Currently running sdnc and dns failover" + return + fi + trap "rm -f ${lockFile}" INT TERM RETURN + echo $BASHPID > ${lockFile} + + # perform takeover + debugLog "Started executing sdnc.failover for $SITE_NAME" + takeoverResult=$( /app/bin/sdnc.failover ) + debugLog "Completed executing sdnc.failover. takeoverResult is: $takeoverResult" + if [ "success" = "$takeoverResult" ]; then + # update CoreDNS upon successful execution of sdnc.failover script + debugLog "Executing sdnc.dnsswitch" + /app/bin/sdnc.dnsswitch + rc=$? + debugLog "Completed executing sdnc.dnsswitch for $SITE_NAME. rc=$rc" + else + debugLog "Cluster takeover current status: $takeoverResult on $SITE_NAME." + rc=1 + fi + + if [ $rc -ne 0 ];then + takeoverResult="failure" + fi + + data="{\ +\"type\": \"failover\",\ +\"status\": \"$takeoverResult\",\ +\"site\": \"$SITE_NAME\",\ +\"deployment\": \"{{.Values.config.deployment}}\",\ +\"timestamp\": \"$(date '+%F %T')\"\ +}" + + # notifications are best-effort - ignore any failures + curl -H "Content-Type: application/json" -X POST --data "$data" http://$message_router/events/$topic >/dev/null 2>&1 + +} + +LOGFILE="/app/geo.log" +enableDebugLogging=true +message_router=message-router:3904 +topic={{.Values.config.messageRouterTopic}} +SITE_NAME="sdnc01" +if [ "$SDNC_IS_PRIMARY_CLUSTER" = "false" ];then + SITE_NAME="sdnc02" +fi + +debugLog +debugLog "Executing ensureSdncActive" + +# query SDN-C cluster status +debugLog "Started executing sdnc.cluster" +clusterStatus=$( /app/bin/sdnc.cluster ) +debugLog "Completed executing sdnc.cluster. Cluster status is: $clusterStatus" + +if [ "active" = "$clusterStatus" ]; then + # peform health-check + debugLog "Started excuting sdnc.monitor" + health=$( /app/bin/sdnc.monitor ) + debugLog "Completed executing sdnc.monitor. Cluster is: $health" + + if [ "healthy" = "$health" ]; then + # Cluster is ACTIVE and HEALTHY + exit 0 + fi + exit 1 + +elif [ "standby" = "$clusterStatus" ]; then + # Run failover in background process and allow PROM to continue + ( failover & ) + exit 0 +fi + +# Unknown cluster status +exit 1 diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncStandby.sh b/kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncStandby.sh new file mode 100755 index 0000000000..8dd84bd3ea --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/ensureSdncStandby.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +debugLog(){ + if [ "$enableDebugLogging" == true ]; then + if [ $# -eq 0 ]; then + echo "" >> $LOGFILE + else + echo $( date ) $@ >> $LOGFILE + fi + fi +} + +LOGFILE="/app/geo.log" +enableDebugLogging=true + +debugLog +debugLog "Executing ensureSdncStandby" + +# query SDN-C cluster status +debugLog "Started executing sdnc.cluster" +clusterStatus=$( /app/bin/sdnc.cluster ) +debugLog "Completed executing sdnc.cluster. Cluster status is: $clusterStatus" + +if [ "active" = "$clusterStatus" ]; then + # assume transient error as other side transitions to ACTIVE + debugLog "Cluster status: $clusterStatus. exit 0" + exit 0 + +elif [ "standby" = "$clusterStatus" ]; then + # check that standby cluster is healthy + debugLog "Started executing sdnc.monitor. Cluster status is: $clusterStatus" + health=$( /app/bin/sdnc.monitor ) + debugLog "Completed executing sdnc.monitor. Cluster is: $health" + if [ "failure" = "$health" ];then + # Backup site is unhealthy - can't accept traffic! + exit 1 + fi + # Cluster is standing by + exit 0 +fi + +debugLog "Unknown cluster status: $clusterStatus" +# Unknown cluster status +exit 1 diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/prom.sh b/kubernetes/sdnc/components/sdnc-prom/resources/bin/prom.sh new file mode 100755 index 0000000000..c93ba24bd7 --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/prom.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ "${SDNC_IS_PRIMARY_CLUSTER:-true}" = "true" ];then + id=sdnc01 +else + id=sdnc02 +fi + +# should PROM start as passive? +state=$( bin/sdnc.cluster ) +if [ "$state" == "standby" ]; then + echo "Starting PROM in passive mode" + passive="-p" +fi + +# start PROM as foreground process +java -jar prom.jar --id $id $passive --config config diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.cluster b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.cluster new file mode 100755 index 0000000000..bdfa1a440b --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.cluster @@ -0,0 +1,61 @@ +#!/bin/bash + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# query ODL cluster state +USERNAME="{{.Values.odl.jolokia.username}}" +PASSWORD="{{.Values.odl.jolokia.password}}" + +count=${SDNC_ODL_COUNT:-1} +memberStart=0 +if [ "${SDNC_IS_PRIMARY_CLUSTER:-true}" != "true" ];then + memberStart=$(( $memberStart + $count )) +fi + +for instance in $(seq $count);do + shard=member-$(( $memberStart + $instance ))-shard-default-config + mbean=Category=Shards,name=$shard,type=DistributedConfigDatastore + url=http://{{ include "common.release" . }}-sdnc-$(( $instance-1 )).sdnc-cluster.{{.Release.Namespace}}:8181/jolokia/read/org.opendaylight.controller:$mbean + + response=$( curl -s -u $USERNAME:$PASSWORD $url ) + rc=$? + if [ $rc -ne 0 ];then + # failed to contact SDN-C instance - try another + continue + fi + status=$( echo -E "$response" | jq -r ".status" ) + if [ "$status" != "200" ];then + # query failed, try another instance + continue + fi + + voting=$( echo -E "$response" | jq -r ".value.Voting" ) + case $voting in + true) + echo "active" + exit 0 + ;; + false) + echo "standby" + exit 0 + ;; + *) + echo "Error: Voting status could not be determined." + exit 1 + ;; + esac +done +echo "Error: Voting status could not be determined." +exit 1 diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.dnsswitch b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.dnsswitch new file mode 100755 index 0000000000..209352c4e3 --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.dnsswitch @@ -0,0 +1,22 @@ +#! /bin/bash + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#################################################################################################### +# sdncDnsSwitchWrapper.bash: Wrapper script to invoke SDNC DNS Switch for domain: sdnc.example.com # +#################################################################################################### +ssh -i {{.Values.coreDNS.sshKeyFile}} -o StrictHostKeyChecking=no {{.Values.coreDNS.sshUser}}@{{.Values.coreDNS.host}} "{{.Values.coreDNS.switchScript}} $SDNC_LOCAL_K8S_CLUSTER_MASTER {{.Values.config.deployment}}" + +exit $? diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.failover b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.failover new file mode 100755 index 0000000000..e78b7eeee3 --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.failover @@ -0,0 +1,86 @@ +#!/bin/bash + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LOGFILE="/app/geo.log" +enableDebugLogging=true +message_router=message-router:3904 +topic={{.Values.config.messageRouterTopic}} +KEYWORD_success="success" +KEYWORD_failure="failure" +SITE_NAME="sdnc01" +if [ "$SDNC_IS_PRIMARY_CLUSTER" = "false" ];then + SITE_NAME="sdnc02" +fi + +APP_BIN=/app/bin + +debugLog(){ + if [ "$enableDebugLogging" == true ]; then + if [ $# -eq 0 ]; then + echo "" >> $LOGFILE + else + echo $( date ) $@ >> $LOGFILE + fi + fi +} + +EXC_SIMPLE_FAILOVER=`${APP_BIN}/switchVoting.sh` + +if [ "$EXC_SIMPLE_FAILOVER" == "success" ]; then + debugLog "Simple failover success. SDNC failover completed." +else + # Simple failover failed. Trying catastrophic failover ... + debugLog "Simple failover failed. Trying catastrophic failover for $SITE_NAME ..." + + # Notify Dmaap before executing catastrophic failover, because all connections will be reset. + data="{\ + \"type\": \"Catastrophic failover\",\ + \"reason\": \"Simple failover failed\",\ + \"message_router\": \"$message_router\",\ + \"topic\": \"$topic\",\ + \"site\": \"$SITE_NAME\",\ + \"deployment\": \"{{.Values.config.deployment}}\",\ + \"timestamp\": \"$(date '+%F %T')\"\ + }" + + debugLog "$data" + + # notifications to Dmaap + curl -H "Content-Type: application/json" -X POST --data "$data" http://$message_router/events/$topic >/dev/null 2>&1 + + # We're going to kill prom, so we need to do dnsswitch now + + debugLog "Executing sdnc.dnsswitch" + + /app/bin/sdnc.dnsswitch > /dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ];then + debugLog "sdnc.dnsswitch FAILED" + echo $KEYWORD_failure + exit 0 + fi + + # Now do catastrophic failure + + debugLog "Catastrophic failover in progress" + + ssh -o StrictHostKeyChecking=no -i /app/config/coredns/master.key root@$SDNC_LOCAL_K8S_CLUSTER_MASTER "su - ubuntu bash -c 'helm upgrade --set sdnc.config.geoEnabled=false dev local/onap --namespace onap; kubectl -n onap delete pods -l app=sdnc'" > /dev/null 2>&1 + + # Sleep here so prom can die without us passing control back to ensureSDNCActive + sleep 300 +fi + +echo $KEYWORD_success diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.monitor b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.monitor new file mode 100755 index 0000000000..0042ac368a --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/sdnc.monitor @@ -0,0 +1,125 @@ +#!/usr/bin/env python2 +# encoding: utf-8 + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import json +import requests +from datetime import datetime + +consul_server = "consul-server:8500" +message_router = "message-router:3904" +topic = '{{.Values.config.messageRouterTopic}}' +log_file='/app/monitor.log' +status_file='/app/.health' +logEnabled=False + +siteName='sdnc01' +if os.environ.get('SDNC_IS_PRIMARY_CLUSTER', 'true') == 'false': + siteName='sdnc02' + +debug=False +if len(sys.argv) > 1 and sys.argv[1] == '--debug': + debug=True + +def get_state(healthcheck): + response = requests.get("http://" + consul_server + "/v1/health/checks/" + healthcheck) + if response.status_code != 200: + raise RuntimeError("HTTP " + str(response.status_code)) + data = response.json() + if len(data) == 0: + raise RuntimeError(healthcheck + " not found") + if len(data) > 1: + raise RuntimeError("Multiple states for " + healthcheck + " found") + + return data[0] + + +def log(message): + if logEnabled: + with open(log_file, 'a') as f: + f.write(str(datetime.now()) + " " + message + "\n") + +def healthcheck(checks, failFirst=True): + if len(checks) == 0: + return True + + for check in checks: + if type(check) is list: + passing = healthcheck(check, False) + else: + state = get_state(check) + status = state['Status'] + passing = status == "passing" or status == "warning" + log(check + " " + status) + if debug: + if status == "passing": + color = "\033[32m" # green + elif status == "warning": + color = "\033[33m" # yellow + else: + color = "\033[31m" # red + print check, color + status + "\033[0m" + if not passing: + print "\tCause:", state['Output'] + + + if passing: + if not failFirst: + # found a passing check so can stop here + return True + else: + if failFirst: + # found a failing check so can stop here + return False + + return failFirst + + +try: + with open("/app/config/healthchecks.json") as f: + checks = json.load(f) + + try: + with open(status_file) as f: + previous_result = f.read() + except IOError: + # file doesn't exist + previous_result = 'unknown' + + if healthcheck(checks): + result = "healthy" + else: + result = "unhealthy" + + print result + + # save current result to file + with open(status_file, 'w') as f: + f.write(result) + + if previous_result != 'unknown' and result != previous_result: + payload = { 'type' : 'health-change', 'status': result, 'site': siteName, 'deployment': '{{.Values.config.deployment}}', 'timestamp': str(datetime.now()) } + log("Posting event " + str(payload)) + try: + requests.post("http://" + message_router + "/events/" + topic, data=json.dumps(payload), headers={ 'Content-Type' : 'application/json' } ) + except Exception: + # events are best-effort + pass + +except Exception as e: + sys.exit(str(e)) diff --git a/kubernetes/sdnc/components/sdnc-prom/resources/bin/switchVoting.sh b/kubernetes/sdnc/components/sdnc-prom/resources/bin/switchVoting.sh new file mode 100755 index 0000000000..f13196e7e8 --- /dev/null +++ b/kubernetes/sdnc/components/sdnc-prom/resources/bin/switchVoting.sh @@ -0,0 +1,110 @@ +#/bin/sh + +# Copyright © 2018 Amdocs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +primary=${SDNC_IS_PRIMARY_CLUSTER:-true} + +url=http://sdnc:8282/restconf/operations/cluster-admin:change-member-voting-states-for-all-shards +username="${ODL_USERNAME:-{{.Values.odl.restconf.username}}}" +password="${ODL_PASSWORD:-{{.Values.odl.restconf.password}}}" +LOGFILE="/app/geo.log" +enableDebugLogging=true + +debugLog(){ + if [ "$enableDebugLogging" == true ]; then + if [ $# -eq 0 ]; then + echo "" >> $LOGFILE + else + echo $( date ) $@ >> $LOGFILE + fi + fi +} + + +if [ "$primary" = "true" ]; then + votingState=' +{ + "input": { + "member-voting-state": [ + { + "member-name": "member-1", + "voting": true + }, + { + "member-name": "member-2", + "voting": true + }, + { + "member-name": "member-3", + "voting": true + }, + { + "member-name": "member-4", + "voting": false + }, + { + "member-name": "member-5", + "voting": false + }, + { + "member-name": "member-6", + "voting": false + } + ] + } +}' +else + votingState=' +{ + "input": { + "member-voting-state": [ + { + "member-name": "member-1", + "voting": false + }, + { + "member-name": "member-2", + "voting": false + }, + { + "member-name": "member-3", + "voting": false + }, + { + "member-name": "member-4", + "voting": true + }, + { + "member-name": "member-5", + "voting": true + }, + { + "member-name": "member-6", + "voting": true + } + ] + } +}' +fi + +status=$(curl -s -u $username:$password -o /dev/null -H "Content-Type: application/json" -H "Accept: application/json" -X POST -d "$votingState" -w "%{http_code}\n" $url 2> /dev/null) +if [ $status -ne 200 ];then + debugLog "Switch voting failed. status: $status ,username: $username ,password: $password ,votingState: $votingState ,url:$url " + echo "failure" +else + echo "success" +fi + -- cgit 1.2.3-korg