From b3350fa9629b558c5bdba5ff8d5e5d670d150204 Mon Sep 17 00:00:00 2001 From: Jack Lucas Date: Fri, 17 Aug 2018 12:58:51 +0000 Subject: Add code to enhance k8s bootstrap - Make bootstrap container continue to run after initial bootstrap done - Do deployments in parallel where feasible - Accommodate deployments with no input files - Test to see if actions (uploads, installs, etc.) are needed before doing them Issue-ID: DCAEGEN2-594 Change-Id: Ie188c1fd69695479593aa82b516e5504a849099c Signed-off-by: Jack Lucas --- k8s-bootstrap-container/00-consul.json | 9 ++ k8s-bootstrap-container/Dockerfile-template | 24 +++- k8s-bootstrap-container/bootstrap.sh | 184 +++++++++++++++++++++++----- k8s-bootstrap-container/build-plugins.sh | 5 +- k8s-bootstrap-container/pom.xml | 2 +- k8s-bootstrap-container/uninstall.sh | 36 ++++++ 6 files changed, 218 insertions(+), 42 deletions(-) create mode 100644 k8s-bootstrap-container/00-consul.json create mode 100644 k8s-bootstrap-container/uninstall.sh diff --git a/k8s-bootstrap-container/00-consul.json b/k8s-bootstrap-container/00-consul.json new file mode 100644 index 0000000..a7d6ff4 --- /dev/null +++ b/k8s-bootstrap-container/00-consul.json @@ -0,0 +1,9 @@ +{ + "datacenter": "dc1", + "data_dir": "/opt/consul", + "log_level": "INFO", + "server": false, + "retry_join": ["consul"], + "node_name": "dcae-bootstrap", + "disable_update_check": true + } \ No newline at end of file diff --git a/k8s-bootstrap-container/Dockerfile-template b/k8s-bootstrap-container/Dockerfile-template index 26bc9a1..55e64f0 100644 --- a/k8s-bootstrap-container/Dockerfile-template +++ b/k8s-bootstrap-container/Dockerfile-template @@ -27,23 +27,39 @@ ENV BP_REPO {{ ONAPTEMPLATE_RAWREPOURL_org_onap_dcaegen2_platform_blueprints_rel # Install gcc RUN yum install -y gcc python-devel +# Install Consul +RUN yum install -y unzip \ + && mkdir -p /opt/consul/bin \ + && mkdir -p /opt/consul/data \ + && mkdir -p /opt/consul/config \ + && curl -Ss https://releases.hashicorp.com/consul/1.2.1/consul_1.2.1_linux_amd64.zip > /tmp/consul.zip \ + && unzip -pj /tmp/consul.zip > /opt/consul/bin/consul \ + && chmod +x /opt/consul/bin/consul +COPY 00-consul.json /opt/consul/config/ + +# Install jq +RUN curl -Ss -L "https://github.com/stedolan/jq/releases/download/jq-1.5/jq-linux64" > /bin/jq \ +&& chmod +x /bin/jq + # Install pip RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \ && python get-pip.py \ && rm get-pip.py \ && pip install cloudify==4.2 -# Get plugin archives and build wagons +# Copy scripts RUN mkdir scripts -COPY build-plugins.sh scripts +COPY build-plugins.sh load-blueprints.sh bootstrap.sh uninstall.sh scripts/ +RUN chmod +x /scripts/*.sh + +# Get plugin archives and build wagons RUN scripts/build-plugins.sh ${DCAE_REPO} ${CCSDK_REPO} \ && rm scripts/build-plugins.sh # Load blueprints and input templates -COPY load-blueprints.sh scripts RUN scripts/load-blueprints.sh ${BP_REPO} \ && rm scripts/load-blueprints.sh + # Set up runtime script -COPY bootstrap.sh scripts ENTRYPOINT exec "/scripts/bootstrap.sh" diff --git a/k8s-bootstrap-container/bootstrap.sh b/k8s-bootstrap-container/bootstrap.sh index 6d39404..fb092ce 100755 --- a/k8s-bootstrap-container/bootstrap.sh +++ b/k8s-bootstrap-container/bootstrap.sh @@ -26,8 +26,99 @@ # Blueprints for components to be installed in /blueprints # Input files for components to be installed in /inputs # Configuration JSON files that need to be loaded into Consul in /dcae-configs +# Consul is installed in /opt/consul/bin/consul, with base config in /opt/consul/config/00consul.json -set -ex +### FUNCTION DEFINITIONS ### + +# keep_running: Keep running after bootstrap finishes or after error +keep_running() { + echo $1 + sleep infinity & + wait +} + +# cm_hasany: Query Cloudify Manager and return 0 (true) if there are any entities matching the query +# Used to see if something is already present on CM +# $1 -- query fragment, for instance "plugins?archive_name=xyz.wgn" to get +# the number of plugins that came from the archive file "xyz.wgn" +function cm_hasany { + # We use _include=id to limit the amount of data the CM sends back + # We rely on the "metadata.pagination.total" field in the response + # for the total number of matching entities + COUNT=$(curl -Ss -H "Tenant: default_tenant" --user admin:${CMPASS} "${CMADDR}/api/v3.1/$1&_include=id" \ + | /bin/jq .metadata.pagination.total) + if (( $COUNT > 0 )) + then + return 0 + else + return 1 + fi +} + +# deploy: Deploy components if they're not already deployed +# $1 -- name (for bp and deployment) +# $2 -- blueprint file name +# $3 -- inputs file name (optional) +function deploy { + # Don't crash the script on error + set +e + + # Upload blueprint if it's not already there + if cm_hasany "blueprints?id=$1" + then + echo blueprint $1 is already installed on ${CMADDR} + else + cfy blueprints upload -b $1 /blueprints/$2 + fi + + # Create deployment if it doesn't already exist + if cm_hasany "deployments?id=$1" + then + echo deployment $1 has already been created on ${CMADDR} + else + INPUTS= + if [ -n "$3" ] + then + INPUTS="-i/inputs/$3" + fi + cfy deployments create -b $1 ${INPUTS} $1 + fi + + # Run the install workflow if it hasn't been run already + # We don't have a completely certain way of determining this. + # We check to see if the deployment has any node instances + # that are in the 'uninitialized' or 'deleted' states. (Note that + # the & in the query acts as a logical OR for the multiple state values.) + # We'll try to install when a deployment has node instances in those states + if cm_hasany "node-instances?deployment_id=$1&state=uninitialized&state=deleted" + then + cfy executions start -d $1 install + else + echo deployment $1 appears to have had an install workflow executed already or is not ready for an install + fi +} + +# Install plugin if it's not already installed +# $1 -- path to wagon file for plugin +function install_plugin { + ARCHIVE=$(basename $1) + # See if it's already installed + if cm_hasany "plugins?archive_name=$ARCHIVE" + then + echo plugin $1 already installed on ${CMADDR} + else + cfy plugin upload $1 + fi +} + +### END FUNCTION DEFINTIONS ### + +set -x + +# Make sure we keep the container alive after an error +trap keep_running ERR + +set -e # Consul service registration data CBS_REG='{"ID": "dcae-cbs0", "Name": "config_binding_service", "Address": "config-binding-service", "Port": 10000}' @@ -48,20 +139,35 @@ then fi PH_REG="${PH_REG}\"}" -# Deploy components -# $1 -- name (for bp and deployment) -# $2 -- blueprint name -# $3 -- inputs file name -function deploy { - cfy install -b $1 -d $1 -i /inputs/$3 /blueprints/$2 -} + + # Set up profile to access Cloudify Manager cfy profiles use -u admin -t default_tenant -p "${CMPASS}" "${CMADDR}" # Output status, for debugging purposes cfy status -# Load configurations into Consul +# Check Consul readiness +# The readiness container waits for a "consul-server" container to be ready, +# but this isn't always enough. We need the Consul API to be up and for +# the cluster to be formed, otherwise our Consul accesses might fail. +# (Note in ONAP R2, we never saw a problem, but occasionally in R3 we +# have seen Consul not be fully ready, so we add these checks, originally +# used in the R1 HEAT-based deployment.) +# Wait for Consul API to come up +until curl http://${CONSUL}/v1/agent/services +do + echo Waiting for Consul API + sleep 60 +done +# Wait for a leader to be elected +until [[ "$(curl -Ss http://{$CONSUL}/v1/status/leader)" != '""' ]] +do + echo Waiting for leader + sleep 30 +done + +# Load configurations into Consul KV store for config in /dcae-configs/*.json do # The basename of the file is the Consul key @@ -71,47 +177,57 @@ do curl -v -X PUT -H "Content-Type: application/json" --data-binary @/tmp/dcae-upload ${CONSUL}/v1/kv/${key} done -# For backward compatibility, load some platform services into Consul service registry -# Some components still rely on looking up a service in Consul -curl -v -X PUT -H "Content-Type: application/json" --data "${CBS_REG}" ${CONSUL}/v1/agent/service/register -curl -v -X PUT -H "Content-Type: application/json" --data "${CBS_REG1}" ${CONSUL}/v1/agent/service/register -curl -v -X PUT -H "Content-Type: application/json" --data "${CM_REG}" ${CONSUL}/v1/agent/service/register -curl -v -X PUT -H "Content-Type: application/json" --data "${INV_REG}" ${CONSUL}/v1/agent/service/register -curl -v -X PUT -H "Content-Type: application/json" --data "${PH_REG}" ${CONSUL}/v1/agent/service/register -curl -v -X PUT -H "Content-Type: application/json" --data "${HE_REG}" ${CONSUL}/v1/agent/service/register -curl -v -X PUT -H "Content-Type: application/json" --data "${HR_REG}" ${CONSUL}/v1/agent/service/register +# Put service registrations into the local Consul configuration directory +for sr in CBS_REG CBS_REG1 INV_REG HE_REG HR_REG CM_REG PH_REG +do + echo '{"service" : ' ${!sr} ' }'> /opt/consul/config/${sr}.json +done + +# Start the local consul agent instance +/opt/consul/bin/consul agent --config-dir /opt/consul/config 2>&1 | tee /opt/consul/consul.log & # Store the CM password into a Cloudify secret cfy secret create -s ${CMPASS} cmpass # Load plugins onto CM -# Allow "already loaded" error -# (If there are other problems, will -# be caught in deployments.) -set +e for wagon in /wagons/*.wgn do - cfy plugins upload ${wagon} + install_plugin ${wagon} done -set -e set +e -# (don't let failure of one stop the script. this is likely due to image pull taking too long) +# (Don't let failure of one stop the script. This is likely due to image pull taking too long.) + # Deploy platform components -deploy config_binding_service k8s-config_binding_service.yaml k8s-config_binding_service-inputs.yaml -deploy inventory k8s-inventory.yaml k8s-inventory-inputs.yaml -deploy deployment_handler k8s-deployment_handler.yaml k8s-deployment_handler-inputs.yaml -deploy policy_handler k8s-policy_handler.yaml k8s-policy_handler-inputs.yaml -deploy pgaas_initdb k8s-pgaas-initdb.yaml k8s-pgaas-initdb-inputs.yaml +# Allow for some parallelism to speed up the process. Probably could be somewhat more aggressive. +# config_binding_service and pgaas_initdb needed by others, but can execute in parallel +deploy config_binding_service k8s-config_binding_service.yaml k8s-config_binding_service-inputs.yaml & +CBS_PID=$! +deploy pgaas_initdb k8s-pgaas-initdb.yaml k8s-pgaas-initdb-inputs.yaml & +PG_PID=$! +wait ${CBS_PID} ${PG_PID} +# inventory, deployment_handler, and policy_handler can be deployed simultaneously +deploy inventory k8s-inventory.yaml k8s-inventory-inputs.yaml & +INV_PID=$! +deploy deployment_handler k8s-deployment_handler.yaml k8s-deployment_handler-inputs.yaml & +DH_PID=$! +deploy policy_handler k8s-policy_handler.yaml k8s-policy_handler-inputs.yaml& +PH_PID=$! +wait ${INV_PID} ${DH_PID} ${PH_PID} # Deploy service components -deploy tca k8s-tca.yaml k8s-tca-inputs.yaml -deploy ves k8s-ves.yaml k8s-ves-inputs.yaml -deploy prh k8s-prh.yaml k8s-prh-inputs.yaml -# holmes_rules must be deployed before holmes_engine +# tca, ves, prh can be deployed simultaneously +deploy tca k8s-tca.yaml k8s-tca-inputs.yaml & +deploy ves k8s-ves.yaml k8s-ves-inputs.yaml & +deploy prh k8s-prh.yaml & +# holmes_rules must be deployed before holmes_engine, but holmes_rules can go in parallel with other service components deploy holmes_rules k8s-holmes-rules.yaml k8s-holmes_rules-inputs.yaml deploy holmes_engine k8s-holmes-engine.yaml k8s-holmes_engine-inputs.yaml set -e # Display deployments, for debugging purposes cfy deployments list + +# Continue running +keep_running "Finished bootstrap steps." +echo "Exiting!" \ No newline at end of file diff --git a/k8s-bootstrap-container/build-plugins.sh b/k8s-bootstrap-container/build-plugins.sh index 383e5e9..adf8c7c 100755 --- a/k8s-bootstrap-container/build-plugins.sh +++ b/k8s-bootstrap-container/build-plugins.sh @@ -31,8 +31,7 @@ DEST=wagons # For DCAE, we get zips of the archives and build wagons DCAEPLUGINFILES=\ "\ -k8splugin/1.4.0/k8splugin-1.4.0.tgz -dockerplugin/3.2.0/dockerplugin-3.2.0.tgz +k8splugin/1.4.1/k8splugin-1.4.1.tgz relationshipplugin/1.0.0/relationshipplugin-1.0.0.tgz dcaepolicyplugin/2.3.0/dcaepolicyplugin-2.3.0.tgz \ " @@ -56,7 +55,7 @@ function build { target=$(basename ${plugin}) curl -Ss $1/${plugin} > ${target} tar zxvf ${target} --strip-components=2 -C work - wagon create -t tar.gz -o ${DEST} -r work/requirements.txt --validate ./work + wagon create -t tar.gz -o ${DEST} -r work/requirements.txt --validate ./work rm -rf work done } diff --git a/k8s-bootstrap-container/pom.xml b/k8s-bootstrap-container/pom.xml index 69a7170..33b58ae 100644 --- a/k8s-bootstrap-container/pom.xml +++ b/k8s-bootstrap-container/pom.xml @@ -27,7 +27,7 @@ limitations under the License. org.onap.dcaegen2.deployments k8s-bootstrap-container dcaegen2-deployments-k8s-bootstrap-container - 1.3.0 + 1.4.0 http://maven.apache.org UTF-8 diff --git a/k8s-bootstrap-container/uninstall.sh b/k8s-bootstrap-container/uninstall.sh new file mode 100644 index 0000000..71f8161 --- /dev/null +++ b/k8s-bootstrap-container/uninstall.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# ================================================================================ +# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved. +# ================================================================================ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============LICENSE_END========================================================= + +# Clean up DCAE during ONAP uninstall + +set -x +set +e + +# Leave the Consul cluster +/opt/consul/bin/consul leave + +# Uninstall components managed by Cloudify +# Get the list of deployment ids known to Cloudify via curl to Cloudify API. +# The output of the curl is JSON that looks like {"items" :[{"id": "config_binding_service"}, ...], "metadata" :{...}} +# +# jq gives us the just the deployment ids (e.g., "config_binding_service"), one per line +# +# xargs -I lets us run the cfy uninstall command once for each deployment id extracted by jq + +curl -Ss --user admin:$CMPASS -H "Tenant: default_tenant" "$CMADDR/api/v3.1/deployments?_include=id" \ +| /bin/jq .items[].id \ +| xargs -I % sh -c 'cfy uninstall %' -- cgit 1.2.3-korg