summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLusheng Ji <lji@research.att.com>2018-05-14 22:45:56 -0400
committerLusheng Ji <lji@research.att.com>2018-05-14 23:02:39 -0400
commite380f6bb14fb0e5fae4e9a6d9b9af40a9340f11a (patch)
tree5888b27d73d51e58403ab3873af972b734068cd2
parent430b6b44fcd1dfa917cb599962ae6ef332581ede (diff)
Increase robustness for TCA
Enhanced TCA robustness against unprovisioned topics. When the configuration tells TCA to subscribe to a non-existent MR topic, TCA will attempt but stop because subscribing to such topics resulted failure. The enhancements implemented here will test for sub topic, and if non-existent, make a publish to create the topic. Additional enhancements include: 1. restart TCA is the number of workers is below expected (3); 2. allow MR subscriber group and id be set via environment variables DMAAPSUBGROUP and DMAAPSUBID. 3. Minor version is bumped. Issue-ID: DCAEGEN2-502 Change-Id: I3414a96706a1b720184cd657324db4d11db12590 Signed-off-by: Lusheng Ji <lji@research.att.com>
-rw-r--r--tca-cdap-container/Dockerfile4
-rwxr-xr-xtca-cdap-container/get-tca.sh2
-rwxr-xr-xtca-cdap-container/mr-watchdog.sh59
-rw-r--r--tca-cdap-container/pom.xml2
-rwxr-xr-xtca-cdap-container/restart.sh79
5 files changed, 120 insertions, 26 deletions
diff --git a/tca-cdap-container/Dockerfile b/tca-cdap-container/Dockerfile
index 5cd1267..2c57ff2 100644
--- a/tca-cdap-container/Dockerfile
+++ b/tca-cdap-container/Dockerfile
@@ -16,13 +16,15 @@
FROM caskdata/cdap-standalone:4.1.2
-RUN apt-get update && apt-get install -y netcat jq iputils-ping wget vim
+RUN apt-get update && apt-get install -y netcat jq iputils-ping wget vim curl
COPY get-tca.sh /opt/tca/get-tca.sh
RUN /opt/tca/get-tca.sh
COPY tca_app_config.json /opt/tca/tca_app_config.json
COPY tca_app_preferences.json /opt/tca/tca_app_preferences.json
COPY restart.sh /opt/tca/restart.sh
RUN chmod 755 /opt/tca/restart.sh
+COPY mr-watchdog.sh /opt/tca/mr-watchdog.sh
+RUN chmod 755 /opt/tca/mr-watchdog.sh
#COPY host.aliases /etc/host.aliases
#RUN echo "export HOSTALIASES=/etc/host.aliases" >> /etc/profile
diff --git a/tca-cdap-container/get-tca.sh b/tca-cdap-container/get-tca.sh
index 9b46830..784d914 100755
--- a/tca-cdap-container/get-tca.sh
+++ b/tca-cdap-container/get-tca.sh
@@ -37,7 +37,7 @@ echo "Getting version $VERSION of $GROUPID.$ARTIFACTID from $REPO repo on $NEXUS
if [ "$REPO" == "snapshots" ]; then
# SNOTSHOT repo container many snapshots for each version. get the newest among them
URL="${PROTO}://${NEXUSREPO}/service/local/repositories/${REPO}/content/${GROUPID//.//}/${ARTIFACTID}/${VERSION}/maven-metadata.xml"
- VT=$(wget --no-check-certificate -O- $URL | grep -m 1 \<value\> | sed -e 's/<value>\(.*\)<\/value>/\1/' | sed -e 's/ //g')
+ VT=$(wget --no-check-certificate -O- "$URL" | grep -m 1 \<value\> | sed -e 's/<value>\(.*\)<\/value>/\1/' | sed -e 's/ //g')
else
VT=${VERSION}
fi
diff --git a/tca-cdap-container/mr-watchdog.sh b/tca-cdap-container/mr-watchdog.sh
new file mode 100755
index 0000000..fa623a1
--- /dev/null
+++ b/tca-cdap-container/mr-watchdog.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# ================================================================================
+# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
+# ================================================================================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============LICENSE_END=========================================================
+
+
+
+SUB_TOPIC=${3:-unauthenticated.VES_MEASUREMENT_OUTPUT}
+MR_LOCATION=${1:-10.0.11.1}
+MR_PORT=${2:-3904}
+MR_PROTO='http'
+
+
+TOPIC_LIST_URL="${MR_PROTO}://${MR_LOCATION}:${MR_PORT}/topics"
+TEST_PUB_URL="${MR_PROTO}://${MR_LOCATION}:${MR_PORT}/events/${SUB_TOPIC}"
+
+unset RES
+echo "==> Check topic [${SUB_TOPIC}] availbility on ${MR_LOCATION}:${MR_PORT}"
+until [ -n "$RES" ]; do
+ URL="$TOPIC_LIST_URL"
+ HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL")
+ HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed -e 's/HTTPSTATUS\:.*//g')
+ HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+ if [ "${HTTP_STATUS}" != "200" ]; then
+ echo " ==> MR topic listing not ready, retry in 30 seconds"
+ sleep 30
+ continue
+ fi
+
+ echo " ==> MR topic listing received, check topic availbility"
+ RES=$(echo "${HTTP_BODY}" |jq .topics |grep "\"$SUB_TOPIC\"")
+ if [ -z "${RES}" ]; then
+ echo " ==> No topic [${SUB_TOPIC}] found, send test publish"
+ URL="$TEST_PUB_URL"
+ HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -H "Content-Type:text/plain" -X POST -d "{}" "$URL")
+ HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed -e 's/HTTPSTATUS\:.*//g')
+ HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+
+ if [ "$HTTP_STATUS" != "200" ]; then
+ echo " ==> Testing MR topic publishing received status $HTTP_STATUS != 200, retesting in 30 seconds"
+ sleep 30
+ else
+ echo " ==> Testing MR topic publishing received status $HTTP_STATUS, topic [$SUB_TOPIC] created"
+ fi
+ fi
+done
+echo "==> Topic [${SUB_TOPIC}] ready"
diff --git a/tca-cdap-container/pom.xml b/tca-cdap-container/pom.xml
index cab867b..212feab 100644
--- a/tca-cdap-container/pom.xml
+++ b/tca-cdap-container/pom.xml
@@ -27,7 +27,7 @@ limitations under the License.
<groupId>org.onap.dcaegen2.deployments</groupId>
<artifactId>tca-cdap-container</artifactId>
<name>dcaegen2-deployments-tca-cdap-container</name>
- <version>1.0.0</version>
+ <version>1.1.0</version>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
diff --git a/tca-cdap-container/restart.sh b/tca-cdap-container/restart.sh
index 4f6ed92..6d0c60f 100755
--- a/tca-cdap-container/restart.sh
+++ b/tca-cdap-container/restart.sh
@@ -36,6 +36,10 @@ TCA_PREF_TEMP='/tmp/tca_preferences.json'
TCA_PATH_APP="${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/${TCA_NAMESPACE}/apps/${TCA_APPNAME}"
TCA_PATH_ARTIFACT="${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/${TCA_NAMESPACE}/artifacts"
+MR_WATCHDOG_PATH="${TCA_FILE_PATH}/mr-watchdog.sh"
+
+
+WORKER_COUNT='0'
CONSUL_HOST=${CONSUL_HOST:-consul}
CONSUL_PORT=${CONSUL_PORT:-8500}
@@ -48,12 +52,14 @@ MY_NAME=${HOSTNAME:-tca}
echo "Generting preference file"
+DMAAPSUBGROUP=${DMAAPSUBGROUP:-OpenDCAEc12}
+DMAAPSUBID=${DMAAPSUBID:=c12}
sed -i 's/{{DMAAPHOST}}/'"${DMAAPHOST}"'/g' ${TCA_PREF}
sed -i 's/{{DMAAPPORT}}/'"${DMAAPPORT}"'/g' ${TCA_PREF}
sed -i 's/{{DMAAPPUBTOPIC}}/'"${DMAAPPUBTOPIC}"'/g' ${TCA_PREF}
sed -i 's/{{DMAAPSUBTOPIC}}/'"${DMAAPSUBTOPIC}"'/g' ${TCA_PREF}
-sed -i 's/{{DMAAPSUBGROUP}}/OpenDCAEc12/g' ${TCA_PREF}
-sed -i 's/{{DMAAPSUBID}}/c12/g' ${TCA_PREF}
+sed -i 's/{{DMAAPSUBGROUP}}/'"${DMAAPSUBGROUP}"'/g' ${TCA_PREF}
+sed -i 's/{{DMAAPSUBID}}/'"${DMAAPSUBID}"'/g' ${TCA_PREF}
sed -i 's/{{AAIHOST}}/'"${AAIHOST}"'/g' ${TCA_PREF}
sed -i 's/{{AAIPORT}}/'"${AAIPORT}"'/g' ${TCA_PREF}
if [ -z "$REDISHOSTPORT" ]; then
@@ -121,19 +127,41 @@ function tca_start {
function tca_status {
+ WORKER_COUNT='0'
echo
- echo "TCADMaaPMRPublisherWorker status: "
- curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRPublisherWorker/status"
+ STATUS=$(curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRPublisherWorker/status")
+ echo "TCADMaaPMRPublisherWorker status: $STATUS"
+ INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l)
+ WORKER_COUNT=$((WORKER_COUNT+INC))
+
+ STATUS=$(curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRSubscriberWorker/status")
+ echo "TCADMaaPMRSubscriberWorker status: $STATUS"
+ INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l)
+ WORKER_COUNT=$((WORKER_COUNT+INC))
+
+ STATUS=$(curl -s "http://${TCA_PATH_APP}/flows/TCAVESCollectorFlow/status")
+ echo "TCAVESCollectorFlow status: $STATUS"
+ INC=$(echo "$STATUS" | jq . |grep RUNNING |wc -l)
+ WORKER_COUNT=$((WORKER_COUNT+INC))
echo
- echo "TCADMaaPMRSubscriberWorker status: "
- curl -s "http://${TCA_PATH_APP}/workers/TCADMaaPMRSubscriberWorker/status"
- echo
- echo "TCAVESCollectorFlow status"
- curl -s "http://${TCA_PATH_APP}/flows/TCAVESCollectorFlow/status"
- echo; echo
}
+function tca_restart {
+ MR_HOST=$(jq .subscriberHostName ${TCA_PREF} |sed -e 's/\"//g')
+ MR_PORT=$(jq .subscriberHostPort ${TCA_PREF} |sed -e 's/\"//g')
+ MR_TOPIC=$(jq .subscriberTopicName ${TCA_PREF} |sed -e 's/\"//g')
+ echo "Verifying DMaaP topic: ${MR_TOPIC}@${MR_HOST}:${MR_PORT} (will block until topic ready)"
+ "${MR_WATCHDOG_PATH}" "${MR_HOST}" "${MR_PORT}" "${MR_TOPIC}"
+ tca_stop
+ tca_delete
+ tca_load_artifact
+ tca_load_conf
+ tca_start
+ sleep 5
+ tca_status
+}
+
function tca_poll_policy {
URL0="${CBS_HOST}:${CBS_PORT}/service_component_all/${MY_NAME}"
echo "tca_poll_policy: Retrieving all-in-one config at ${URL0}"
@@ -246,12 +274,9 @@ function tca_poll_policy {
if [[ "$PERF_CHANGED" == "1" || "$CONF_CHANGED" == "1" ]]; then
echo "Newly received configuration/preference differ from the running instance's. reload confg"
- tca_stop
- tca_delete
- tca_load_artifact
- tca_load_conf
- tca_start
- tca_status
+ tca_restart
+ else
+ echo "Newly received configuration/preference identical from the running instance's"
fi
}
@@ -264,9 +289,9 @@ echo "Starting TCA-CDAP in standalone mode"
# starting CDAP SDK in background
cdap sdk start
-echo "Started, waiting CDAP ready on port 11015 ..."
+echo "CDAP Started, waiting CDAP ready on ${CDAP_HOST}:${CDAP_PORT} ..."
while ! nc -z ${CDAP_HOST} ${CDAP_PORT}; do
- sleep 0.1 # wait for 1/10 of the second before check again
+ sleep 1 # wait for 1 second before check again
done
echo "Creating namespace cdap_tca_hi_lo ..."
@@ -274,21 +299,18 @@ curl -s -X PUT "http://${CDAP_HOST}:${CDAP_PORT}/v3/namespaces/cdap_tca_hi_lo"
# stop programs
tca_stop
-
# delete application
tca_delete
-
# load artifact
tca_load_artifact
tca_load_conf
-
# start programs
tca_start
# get status of programs
tca_status
-echo "TCA-CDAP standalone mode initialization completed"
+echo "TCA-CDAP standalone mode initialization completed, with $WORKER_COUNT / 3 up"
@@ -301,7 +323,18 @@ echo "TCA environment: I am ${MY_NAME}, consul at ${CONSUL_HOST}:${CONSUL_PORT},
while echo
do
- echo "$(date): ======================================================"
+ echo "======================================================> $(date)"
+ tca_status
+
+ while [ "$WORKER_COUNT" != "3" ]; do
+ echo "Status checking: worker count is $WORKER_COUNT, needs a reset"
+ sleep 5
+
+ tca_restart
+ echo "TCA restarted"
+ done
+
+
if [[ -z "$CBS_HOST" || -z "$CBS_PORT" ]]; then
echo "Retrieving host and port for ${CBS_SERVICE_NAME} from ${CONSUL_HOST}:${CONSUL_PORT}"
sleep 2