From e380f6bb14fb0e5fae4e9a6d9b9af40a9340f11a Mon Sep 17 00:00:00 2001 From: Lusheng Ji Date: Mon, 14 May 2018 22:45:56 -0400 Subject: Increase robustness for TCA Enhanced TCA robustness against unprovisioned topics. When the configuration tells TCA to subscribe to a non-existent MR topic, TCA will attempt but stop because subscribing to such topics resulted failure. The enhancements implemented here will test for sub topic, and if non-existent, make a publish to create the topic. Additional enhancements include: 1. restart TCA is the number of workers is below expected (3); 2. allow MR subscriber group and id be set via environment variables DMAAPSUBGROUP and DMAAPSUBID. 3. Minor version is bumped. Issue-ID: DCAEGEN2-502 Change-Id: I3414a96706a1b720184cd657324db4d11db12590 Signed-off-by: Lusheng Ji --- tca-cdap-container/Dockerfile | 4 +- tca-cdap-container/get-tca.sh | 2 +- tca-cdap-container/mr-watchdog.sh | 59 +++++++++++++++++++++++++++++ tca-cdap-container/pom.xml | 2 +- tca-cdap-container/restart.sh | 79 +++++++++++++++++++++++++++------------ 5 files changed, 120 insertions(+), 26 deletions(-) create mode 100755 tca-cdap-container/mr-watchdog.sh diff --git a/tca-cdap-container/Dockerfile b/tca-cdap-container/Dockerfile index 5cd1267..2c57ff2 100644 --- a/tca-cdap-container/Dockerfile +++ b/tca-cdap-container/Dockerfile @@ -16,13 +16,15 @@ FROM caskdata/cdap-standalone:4.1.2 -RUN apt-get update && apt-get install -y netcat jq iputils-ping wget vim +RUN apt-get update && apt-get install -y netcat jq iputils-ping wget vim curl COPY get-tca.sh /opt/tca/get-tca.sh RUN /opt/tca/get-tca.sh COPY tca_app_config.json /opt/tca/tca_app_config.json COPY tca_app_preferences.json /opt/tca/tca_app_preferences.json COPY restart.sh /opt/tca/restart.sh RUN chmod 755 /opt/tca/restart.sh +COPY mr-watchdog.sh /opt/tca/mr-watchdog.sh +RUN chmod 755 /opt/tca/mr-watchdog.sh #COPY host.aliases /etc/host.aliases #RUN echo "export HOSTALIASES=/etc/host.aliases" >> /etc/profile diff --git a/tca-cdap-container/get-tca.sh b/tca-cdap-container/get-tca.sh index 9b46830..784d914 100755 --- a/tca-cdap-container/get-tca.sh +++ b/tca-cdap-container/get-tca.sh @@ -37,7 +37,7 @@ echo "Getting version $VERSION of $GROUPID.$ARTIFACTID from $REPO repo on $NEXUS if [ "$REPO" == "snapshots" ]; then # SNOTSHOT repo container many snapshots for each version. get the newest among them URL="${PROTO}://${NEXUSREPO}/service/local/repositories/${REPO}/content/${GROUPID//.//}/${ARTIFACTID}/${VERSION}/maven-metadata.xml" - VT=$(wget --no-check-certificate -O- $URL | grep -m 1 \ | sed -e 's/\(.*\)<\/value>/\1/' | sed -e 's/ //g') + VT=$(wget --no-check-certificate -O- "$URL" | grep -m 1 \ | sed -e 's/\(.*\)<\/value>/\1/' | sed -e 's/ //g') else VT=${VERSION} fi diff --git a/tca-cdap-container/mr-watchdog.sh b/tca-cdap-container/mr-watchdog.sh new file mode 100755 index 0000000..fa623a1 --- /dev/null +++ b/tca-cdap-container/mr-watchdog.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# ================================================================================ +# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved. +# ================================================================================ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============LICENSE_END========================================================= + + + +SUB_TOPIC=${3:-unauthenticated.VES_MEASUREMENT_OUTPUT} +MR_LOCATION=${1:-10.0.11.1} +MR_PORT=${2:-3904} +MR_PROTO='http' + + +TOPIC_LIST_URL="${MR_PROTO}://${MR_LOCATION}:${MR_PORT}/topics" +TEST_PUB_URL="${MR_PROTO}://${MR_LOCATION}:${MR_PORT}/events/${SUB_TOPIC}" + +unset RES +echo "==> Check topic [${SUB_TOPIC}] availbility on ${MR_LOCATION}:${MR_PORT}" +until [ -n "$RES" ]; do + URL="$TOPIC_LIST_URL" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL") + HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed -e 's/HTTPSTATUS\:.*//g') + HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + if [ "${HTTP_STATUS}" != "200" ]; then + echo " ==> MR topic listing not ready, retry in 30 seconds" + sleep 30 + continue + fi + + echo " ==> MR topic listing received, check topic availbility" + RES=$(echo "${HTTP_BODY}" |jq .topics |grep "\"$SUB_TOPIC\"") + if [ -z "${RES}" ]; then + echo " ==> No topic [${SUB_TOPIC}] found, send test publish" + URL="$TEST_PUB_URL" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -H "Content-Type:text/plain" -X POST -d "{}" "$URL") + HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed -e 's/HTTPSTATUS\:.*//g') + HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + + if [ "$HTTP_STATUS" != "200" ]; then + echo " ==> Testing MR topic publishing received status $HTTP_STATUS != 200, retesting in 30 seconds" + sleep 30 + else + echo " ==> Testing MR topic publishing received status $HTTP_STATUS, topic [$SUB_TOPIC] created" + fi + fi +done +echo "==> Topic [${SUB_TOPIC}] ready" diff --git a/tca-cdap-container/pom.xml b/tca-cdap-container/pom.xml index cab867b..212feab 100644 --- a/tca-cdap-container/pom.xml +++ b/tca-cdap-container/pom.xml @@ -27,7 +27,7 @@ limitations under the License. org.onap.dcaegen2.deployments tca-cdap-container dcaegen2-deployments-tca-cdap-container - 1.0.0 + 1.1.0 http://maven.apache.org UTF-8 diff --git a/tca-cdap-container/restart.sh b/tca-cdap-container/restart.sh index 4f6ed92..6d0c60f 100755 --- a/tca-cdap-container/restart.sh +++ b/tca-cdap-container/restart.sh @@ -36,6 +36,10 @@ TCA_PREF_TEMP='/tmp/tca_preferences.json' TCA_PATH_APP="${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/${TCA_NAMESPACE}/apps/${TCA_APPNAME}" TCA_PATH_ARTIFACT="${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/${TCA_NAMESPACE}/artifacts" +MR_WATCHDOG_PATH="${TCA_FILE_PATH}/mr-watchdog.sh" + + +WORKER_COUNT='0' CONSUL_HOST=${CONSUL_HOST:-consul} CONSUL_PORT=${CONSUL_PORT:-8500} @@ -48,12 +52,14 @@ MY_NAME=${HOSTNAME:-tca} echo "Generting preference file" +DMAAPSUBGROUP=${DMAAPSUBGROUP:-OpenDCAEc12} +DMAAPSUBID=${DMAAPSUBID:=c12} sed -i 's/{{DMAAPHOST}}/'"${DMAAPHOST}"'/g' ${TCA_PREF} sed -i 's/{{DMAAPPORT}}/'"${DMAAPPORT}"'/g' ${TCA_PREF} sed -i 's/{{DMAAPPUBTOPIC}}/'"${DMAAPPUBTOPIC}"'/g' ${TCA_PREF} sed -i 's/{{DMAAPSUBTOPIC}}/'"${DMAAPSUBTOPIC}"'/g' ${TCA_PREF} -sed -i 's/{{DMAAPSUBGROUP}}/OpenDCAEc12/g' ${TCA_PREF} -sed -i 's/{{DMAAPSUBID}}/c12/g' ${TCA_PREF} +sed -i 's/{{DMAAPSUBGROUP}}/'"${DMAAPSUBGROUP}"'/g' ${TCA_PREF} +sed -i 's/{{DMAAPSUBID}}/'"${DMAAPSUBID}"'/g' ${TCA_PREF} sed -i 's/{{AAIHOST}}/'"${AAIHOST}"'/g' ${TCA_PREF} sed -i 's/{{AAIPORT}}/'"${AAIPORT}"'/g' ${TCA_PREF} if [ -z "$REDISHOSTPORT" ]; then @@ -121,19 +127,41 @@ function tca_start { function tca_status { + WORKER_COUNT='0' echo - echo "TCADMaaPMRPublisherWorker status: " - curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRPublisherWorker/status" + STATUS=$(curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRPublisherWorker/status") + echo "TCADMaaPMRPublisherWorker status: $STATUS" + INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l) + WORKER_COUNT=$((WORKER_COUNT+INC)) + + STATUS=$(curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRSubscriberWorker/status") + echo "TCADMaaPMRSubscriberWorker status: $STATUS" + INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l) + WORKER_COUNT=$((WORKER_COUNT+INC)) + + STATUS=$(curl -s "http://${TCA_PATH_APP}/flows/TCAVESCollectorFlow/status") + echo "TCAVESCollectorFlow status: $STATUS" + INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l) + WORKER_COUNT=$((WORKER_COUNT+INC)) echo - echo "TCADMaaPMRSubscriberWorker status: " - curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRSubscriberWorker/status" - echo - echo "TCAVESCollectorFlow status" - curl -s "http://${TCA_PATH_APP}/flows/TCAVESCollectorFlow/status" - echo; echo } +function tca_restart { + MR_HOST=$(jq .subscriberHostName ${TCA_PREF} |sed -e 's/\"//g') + MR_PORT=$(jq .subscriberHostPort ${TCA_PREF} |sed -e 's/\"//g') + MR_TOPIC=$(jq .subscriberTopicName ${TCA_PREF} |sed -e 's/\"//g') + echo "Verifying DMaaP topic: ${MR_TOPIC}@${MR_HOST}:${MR_PORT} (will block until topic ready)" + "${MR_WATCHDOG_PATH}" "${MR_HOST}" "${MR_PORT}" "${MR_TOPIC}" + tca_stop + tca_delete + tca_load_artifact + tca_load_conf + tca_start + sleep 5 + tca_status +} + function tca_poll_policy { URL0="${CBS_HOST}:${CBS_PORT}/service_component_all/${MY_NAME}" echo "tca_poll_policy: Retrieving all-in-one config at ${URL0}" @@ -246,12 +274,9 @@ function tca_poll_policy { if [[ "$PERF_CHANGED" == "1" || "$CONF_CHANGED" == "1" ]]; then echo "Newly received configuration/preference differ from the running instance's. reload confg" - tca_stop - tca_delete - tca_load_artifact - tca_load_conf - tca_start - tca_status + tca_restart + else + echo "Newly received configuration/preference identical from the running instance's" fi } @@ -264,9 +289,9 @@ echo "Starting TCA-CDAP in standalone mode" # starting CDAP SDK in background cdap sdk start -echo "Started, waiting CDAP ready on port 11015 ..." +echo "CDAP Started, waiting CDAP ready on ${CDAP_HOST}:${CDAP_PORT} ..." while ! nc -z ${CDAP_HOST} ${CDAP_PORT}; do - sleep 0.1 # wait for 1/10 of the second before check again + sleep 1 # wait for 1 second before check again done echo "Creating namespace cdap_tca_hi_lo ..." @@ -274,21 +299,18 @@ curl -s -X PUT "http://${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/cdap_tca_hi_lo" # stop programs tca_stop - # delete application tca_delete - # load artifact tca_load_artifact tca_load_conf - # start programs tca_start # get status of programs tca_status -echo "TCA-CDAP standalone mode initialization completed" +echo "TCA-CDAP standalone mode initialization completed, with $WORKER_COUNT / 3 up" @@ -301,7 +323,18 @@ echo "TCA environment: I am ${MY_NAME}, consul at ${CONSUL_HOST}:${CONSUL_PORT}, while echo do - echo "$(date): ======================================================" + echo "======================================================> $(date)" + tca_status + + while [ "$WORKER_COUNT" != "3" ]; do + echo "Status checking: worker count is $WORKER_COUNT, needs a reset" + sleep 5 + + tca_restart + echo "TCA restarted" + done + + if [[ -z "$CBS_HOST" || -z "$CBS_PORT" ]]; then echo "Retrieving host and port for ${CBS_SERVICE_NAME} from ${CONSUL_HOST}:${CONSUL_PORT}" sleep 2 -- cgit 1.2.3-korg