From b0e3650bd4717fe97a5fcd2e8fa8575bc0aa2e23 Mon Sep 17 00:00:00 2001 From: Gary Wu Date: Tue, 16 Oct 2018 11:01:07 -0700 Subject: Add retries to handle intermittent network issues Change-Id: I1fbdbd27b9b480dfafed87b12cdad23e4f3626f6 Issue-ID: INT-586 Signed-off-by: Gary Wu --- deployment/heat/onap-oom/k8s_vm_entrypoint.sh | 27 +++++++++++-------- deployment/heat/onap-oom/rancher_vm_entrypoint.sh | 33 ++++++++++++++--------- deployment/heat/onap-oom/scripts/deploy.sh | 7 ++--- 3 files changed, 41 insertions(+), 26 deletions(-) (limited to 'deployment') diff --git a/deployment/heat/onap-oom/k8s_vm_entrypoint.sh b/deployment/heat/onap-oom/k8s_vm_entrypoint.sh index 54a674125..52f120498 100644 --- a/deployment/heat/onap-oom/k8s_vm_entrypoint.sh +++ b/deployment/heat/onap-oom/k8s_vm_entrypoint.sh @@ -32,29 +32,34 @@ Acquire::http { Proxy "http://__apt_proxy__"; }; Acquire::https::Proxy "DIRECT"; EOF fi -apt-get -y update mkdir -p /dockerdata-nfs echo "__rancher_private_ip_addr__:/dockerdata-nfs /dockerdata-nfs nfs auto,nofail,noatime,nolock,intr,tcp,actimeo=1800 0 0" | tee -a /etc/fstab -apt-get -y install linux-image-extra-$(uname -r) jq nfs-common - -cd ~ - -# install docker 17.03 -curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh -usermod -aG docker ubuntu - # Fix virtual memory allocation for onap-log:elasticsearch: echo "vm.max_map_count=262144" >> /etc/sysctl.conf sysctl -p -sleep 100 + +while ! hash jq &> /dev/null; do + apt-get -y update + apt-get -y install linux-image-extra-$(uname -r) jq nfs-common + sleep 10 +done + +# install docker 17.03 +while ! hash docker &> /dev/null; do + curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh + usermod -aG docker ubuntu + sleep 10 +done while [ ! -e /dockerdata-nfs/rancher_agent_cmd.sh ]; do mount /dockerdata-nfs - sleep 5 + sleep 10 done + +cd ~ cp /dockerdata-nfs/rancher_agent_cmd.sh . sed -i "s/docker run/docker run -e CATTLE_AGENT_IP=${HOST_IP}/g" rancher_agent_cmd.sh source rancher_agent_cmd.sh diff --git a/deployment/heat/onap-oom/rancher_vm_entrypoint.sh b/deployment/heat/onap-oom/rancher_vm_entrypoint.sh index c1b13a469..9e70da6d1 100644 --- a/deployment/heat/onap-oom/rancher_vm_entrypoint.sh +++ b/deployment/heat/onap-oom/rancher_vm_entrypoint.sh @@ -59,9 +59,12 @@ Acquire::http { Proxy "http://__apt_proxy__"; }; Acquire::https::Proxy "DIRECT"; EOF fi -apt-get -y update -apt-get -y install linux-image-extra-$(uname -r) jq make nfs-kernel-server moreutils +while ! hash jq &> /dev/null; do + apt-get -y update + apt-get -y install linux-image-extra-$(uname -r) jq make nfs-kernel-server moreutils + sleep 10 +done # use RAM disk for /dockerdata-nfs for testing #echo "tmpfs /dockerdata-nfs tmpfs noatime 1 2" >> /etc/fstab @@ -92,8 +95,11 @@ systemctl restart nfs-kernel-server cd ~ # install docker __docker_version__ -curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh -usermod -aG docker ubuntu +while ! hash docker &> /dev/null; do + curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh + usermod -aG docker ubuntu + sleep 10 +done # install rancher __rancher_version__ docker run --restart unless-stopped -d -p 8080:8080 -e CATTLE_BOOTSTRAP_REQUIRED_IMAGE=__docker_proxy__/rancher/agent:v__rancher_agent_version__ __docker_proxy__/rancher/server:v__rancher_version__ @@ -112,9 +118,8 @@ sudo mv linux-amd64/helm /usr/local/bin/helm echo export RANCHER_IP=__rancher_private_ip_addr__ > api-keys-rc source api-keys-rc -sleep 50 until curl -s -o projects.json -H "Accept: application/json" http://$RANCHER_IP:8080/v2-beta/projects; do - sleep 10 + sleep 30 done OLD_PID=$(jq -r '.data[0].id' projects.json) @@ -195,10 +200,14 @@ EOF export KUBECONFIG=/root/.kube/config kubectl config view +# Enable auto-completion for kubectl +echo "source <(kubectl completion bash)" >> ~/.bashrc + + # wait for kubernetes to initialze -sleep 100 +sleep 3m until [ $(kubectl get pods --namespace kube-system | tail -n +2 | grep -c Running) -ge 6 ]; do - sleep 10 + sleep 1m done @@ -246,7 +255,7 @@ cd ~/oom/kubernetes/ helm init --client-only helm init --upgrade helm serve & -sleep 3 +sleep 10 helm repo add local http://127.0.0.1:8879 helm repo list make all @@ -255,9 +264,9 @@ helm search -l | grep local helm deploy dev local/onap -f ~/integration-override.yaml --namespace onap | tee ~/helm-deploy.log helm list -# Enable auto-completion for kubectl -echo "source <(kubectl completion bash)" >> ~/.bashrc # Check ONAP status: -sleep 3 +sleep 10 kubectl get pods --all-namespaces +kubectl get nodes +kubectl top nodes diff --git a/deployment/heat/onap-oom/scripts/deploy.sh b/deployment/heat/onap-oom/scripts/deploy.sh index e97c5a3f5..459af34bc 100755 --- a/deployment/heat/onap-oom/scripts/deploy.sh +++ b/deployment/heat/onap-oom/scripts/deploy.sh @@ -153,12 +153,13 @@ fi ssh-keygen -R $RANCHER_IP +sleep 2m ssh -o StrictHostKeychecking=no -i $SSH_KEY ubuntu@$RANCHER_IP "sed -u '/Cloud-init.*finished/q' <(tail -n+0 -f /var/log/cloud-init-output.log)" -for n in $(seq 1 6); do - echo "Wait count $n of 6" +for n in $(seq 1 8); do + echo "Wait count $n of 8" sleep 15m - timeout 15m ssh -i $SSH_KEY ubuntu@$RANCHER_IP 'sudo su -l root -c "/root/oom/kubernetes/robot/ete-k8s.sh onap health"' + ssh -i $SSH_KEY ubuntu@$RANCHER_IP 'sudo su -l root -c "/root/oom/kubernetes/robot/ete-k8s.sh onap health"' RESULT=$? if [ $RESULT -eq 0 ]; then break -- cgit 1.2.3-korg