diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/cicdansible/ansible.cfg | 1 | ||||
-rw-r--r-- | tools/cicdansible/group_vars/all.yml | 4 | ||||
-rw-r--r-- | tools/cicdansible/heat/installer.yaml | 5 | ||||
-rw-r--r-- | tools/cicdansible/roles/configure_instances/templates/daemon.json.j2 | 18 | ||||
-rw-r--r-- | tools/cicdansible/roles/setup_openstack_infrastructure/tasks/deploy/heat.yml | 1 | ||||
-rwxr-xr-x | tools/helm-healer.sh | 190 | ||||
-rwxr-xr-x | tools/helm_deployment_status.py | 30 | ||||
-rwxr-xr-x | tools/remove_runtime_images.py | 1 |
8 files changed, 165 insertions, 85 deletions
diff --git a/tools/cicdansible/ansible.cfg b/tools/cicdansible/ansible.cfg index e74dda58..8ffd6ee2 100644 --- a/tools/cicdansible/ansible.cfg +++ b/tools/cicdansible/ansible.cfg @@ -4,6 +4,7 @@ stdout_callback=debug #Default verbosity level, for logging all module outputs. verbosity=1 +jinja2_extensions = jinja2.ext.do [inventory] #Fail when inventory parsing fails. diff --git a/tools/cicdansible/group_vars/all.yml b/tools/cicdansible/group_vars/all.yml index 3165e374..ef23eb98 100644 --- a/tools/cicdansible/group_vars/all.yml +++ b/tools/cicdansible/group_vars/all.yml @@ -37,11 +37,15 @@ subnet_range_start: "10.1.0.4" subnet_range_end: "10.1.0.254" #Ip address of router used as a gateway to external network. router_addr: "10.1.0.1" +#A list of dns resolvers for all instances +dns_nameservers: [] #Cidr of external subnet to allow access to, 0.0.0.0/0 means allow internet access. # For offline deployment it is recommended to set this to a cidr of intranet. external_subnet_cidr: "" #Address of cicd docker registry. cicd_docker_registry: "" +#Address of custom docker registry mirror +docker_registry_mirror: "" #Number of nodes to deploy. num_nodes: "3" #Stack name to deploy on heat. diff --git a/tools/cicdansible/heat/installer.yaml b/tools/cicdansible/heat/installer.yaml index 1f65f73f..b7b03d1a 100644 --- a/tools/cicdansible/heat/installer.yaml +++ b/tools/cicdansible/heat/installer.yaml @@ -58,6 +58,10 @@ parameters: type: string constraints: - custom_constraint: ip_addr + dns_nameservers: + label: "dns resolvers" + description: "List of dns resolvers" + type: comma_delimited_list public_network_name: label: "name of the public network" description: "Name of the public, internet facing network, also allowing access to company internal hosts" @@ -140,6 +144,7 @@ resources: allocation_pools: - { start: { get_param: subnet_range_start }, end: { get_param: subnet_range_end } } gateway_ip: { get_param: router_addr } + dns_nameservers: { get_param: dns_nameservers } ip_version: 4 #A port connected to the private network, taken by router. routerport: diff --git a/tools/cicdansible/roles/configure_instances/templates/daemon.json.j2 b/tools/cicdansible/roles/configure_instances/templates/daemon.json.j2 index 1c3ca9bb..01e5e512 100644 --- a/tools/cicdansible/roles/configure_instances/templates/daemon.json.j2 +++ b/tools/cicdansible/roles/configure_instances/templates/daemon.json.j2 @@ -1,3 +1,19 @@ +{% set insecure_registries = [] %} +{% for registry in [cicd_docker_registry, docker_registry_mirror] %} +{% if registry|length > 0 %} +{% do insecure_registries.append(registry) %} +{% endif %} +{% endfor %} { -"insecure-registries": ["{{ cicd_docker_registry }}"] +"insecure-registries": [ +{%- for registry in insecure_registries %} +"{{ registry }}"{% if not loop.last %}, {% else %}]{% endif %} +{% endfor %} +{% if docker_registry_mirror|length > 0 %} +, +"registry-mirrors": ["http://{{ docker_registry_mirror }}"] } +{% else %} + +} +{% endif %} diff --git a/tools/cicdansible/roles/setup_openstack_infrastructure/tasks/deploy/heat.yml b/tools/cicdansible/roles/setup_openstack_infrastructure/tasks/deploy/heat.yml index 25e7ac79..21dfadcf 100644 --- a/tools/cicdansible/roles/setup_openstack_infrastructure/tasks/deploy/heat.yml +++ b/tools/cicdansible/roles/setup_openstack_infrastructure/tasks/deploy/heat.yml @@ -24,6 +24,7 @@ subnet_range_start: "{{ subnet_range_start }}" subnet_range_end: "{{ subnet_range_end }}" router_addr: "{{ router_addr }}" + dns_nameservers: "{{ dns_nameservers }}" auth_key: "{{ auth_public_key }}" image_name: "{{ image_name }}" node_flavor_name: "{{ node_flavor_name }}" diff --git a/tools/helm-healer.sh b/tools/helm-healer.sh index a6e5b398..92ddbdb7 100755 --- a/tools/helm-healer.sh +++ b/tools/helm-healer.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin @@ -191,6 +191,7 @@ helm_undeploy() { msg "Undeploy helm release name: ${1}" helm undeploy ${1} --purge + sleep 15s } # arg: <job name> @@ -212,41 +213,64 @@ delete_job() done } -# arg: <resource> <release name> -delete_resource() +#arg: <component> +get_resources_for_component() { - _resource="$1" - _release="$2" - msg "Delete ${_resource} for ${_release}..." - { - kubectl get ${_resource} -n ${NAMESPACE} \ - --ignore-not-found=true \ - --selector="release=${_release}" \ - --no-headers=true - - # this is due to missing "release" label in some pods - # grep for the rescue... - kubectl get ${_resource} -n ${NAMESPACE} \ - --no-headers=true | grep "^${_release}[-]" - } | awk '{print $1}' | sort -u | while read -r _name _rest ; do - echo "Deleting '${_name}'" - kubectl delete ${_resource} -n ${NAMESPACE} \ - --cascade=true \ - --now=true \ - --wait=true \ - ${_name} \ - 2>&1 | grep -iv 'not[[:space:]]*found' - - # wait for resource to be deleted - _output=start - while [ -n "$_output" ] && sleep 1 ; do - _output=$(kubectl get ${_resource} -n ${NAMESPACE} \ - --ignore-not-found=true \ - --no-headers=true \ - --field-selector="metadata.name=${_name}") - done +helm status $1 | awk -f <(cat - <<-'EOD' +BEGIN { + work="no" + kind="" + a["dummy"]="" +} + +$1 ~ ":" { + if ( $1 == "RESOURCES:" ) { + work="yes" +} else { + work="no" +} + +} + +$1 == "==>" { + split($2, a, "[/(]") + kind=a[2] +} + +$1 != "NAME" && $1 != "==>" && work == "yes" && $1 !~ ":" && $1 != "" { + printf "%s/%s\n", kind, $1 +} + +EOD +) +} + +# arg: <resource> +delete_resource() +{ + local _resource="$1" + local _kind="${_resource%/*}" + local _name="${_resource#*/}" + + + if kubectl get ${_resource} >/dev/null 2>&1; then + msg "${_resource} has not been removed with helm undeploy, manual removal is required. Proceeding" + kubectl delete ${_resource} -n ${NAMESPACE} \ + --cascade=true \ + --now=true \ + --wait=true \ + 2>&1 | grep -iv 'not[[:space:]]*found' + + # wait for resource to be deleted + _output=start + while [ -n "$_output" ] && sleep 1 ; do + _output=$(kubectl get ${_kind} ${_name} -n ${NAMESPACE} \ + --ignore-not-found=true \ + --no-headers=true ) done + msg "Done" + fi } delete_namespace() @@ -267,7 +291,18 @@ delete_namespace() done } -# arg: [optional: subdir] +delete_persistent_volume() +{ + _persistent_volume=$1 + if kubectl get ${_persistent_volume} >/dev/null 2>&1; then + msg "${_persistent_volume} has not been removed with helm undeploy, manual removal is required. Proceeding" + #very often k8s hangs on Terminating state for pv due to still active pvc. It is better to delete pvc directly + _claim=$(kubectl get ${_persistent_volume} -o jsonpath='{ .spec.claimRef.name}') + delete_resource PersistentVolumeClaim/${_claim} + fi +} + +# arg: [optional: directory] delete_storage() { _node=$(kubectl get nodes \ @@ -280,15 +315,11 @@ delete_storage() error "Could not list kubernetes nodes - SKIPPING DELETION" else if [ -n "$1" ] ; then - msg "Delete directory '${VOLUME_STORAGE}/${1}' on $_node" - ssh -T $_node <<EOF -rm -rf "${VOLUME_STORAGE}/${1}" -EOF + msg "Delete directory '${1}' on $_node" + ssh $_node "rm -rf '${1}'" else msg "Delete directories '${VOLUME_STORAGE}/*' on $_node" - ssh -T $_node <<EOF -find "${VOLUME_STORAGE}" -maxdepth 1 -mindepth 1 -exec rm -rf '{}' \; -EOF + ssh $_node "find '${VOLUME_STORAGE}' -maxdepth 1 -mindepth 1 -exec rm -rf '{}' \;" fi fi } @@ -308,13 +339,7 @@ docker_cleanup() for _node in $_nodes ; do msg "Docker cleanup on $_node" - { - ssh -T $_node >/dev/null <<EOF -if which docker >/dev/null ; then - docker system prune --force --all --volumes -fi -EOF - } & + ssh $_node "docker system prune --force --all --volumes" >/dev/null & done msg "We are waiting now for docker cleanup to finish on all nodes..." @@ -338,28 +363,57 @@ is_helm_serve_running() # arg: <release name> undeploy_component() { - _chart=$(echo "$1" | sed 's/[^-]*-//') - helm_undeploy ${1} - - # for all kubernetes resources: kubectl api-resources - # TODO: does deleted secret per component break something? - for x in jobs \ - deployments \ - services \ - replicasets \ - statefulsets \ - daemonsets \ - pods \ - pvc \ - pv \ - ; - do - delete_resource ${x} ${1} + local _component=$1 + + #Because Helm undeploy is not reliable: Gathering resources assigned to componen to track and remove orphans later + _component_resources=($(get_resources_for_component ${_component})) + + declare -a _persistent_volumes + declare -a _standard + declare -a _unknown_kinds + + for resource in ${_component_resources[@]}; do + case $resource in + CronJob/* | Job/* | Secret/* | ConfigMap/* | Pod/* | Service/* | Deployment/* | StatefulSet/*) + _standard+=(${resource});; + #Ignoring PVC, they will be handled along with PV as 'helm' status does not return them for some components + PersistentVolumeClaim/*) + ;; + PersistentVolume/*) + _persistent_volumes+=(${resource});; + *) + _unknown_kinds+=(${resource}) + esac done - if [ -n "$VOLUME_STORAGE" ] ; then - msg "Persistent volume data deletion in directory: ${VOLUME_STORAGE}/${1}" - delete_storage "$1" + + #Gathering physical location of directories for persistent volumes to delete them after undeploy + declare -a _physical_locations + for volume in ${_persistent_volumes[@]}; do + _physical_locations+=($(kubectl get ${volume} -o jsonpath='{ .spec.hostPath.path}' )) + done + + helm_undeploy ${_component} + + #Manual items removal + for resource in ${_standard[@]}; do + delete_resource ${resource} + done + + for volume in ${_persistent_volumes[@]}; do + delete_persistent_volume ${volume} + done + + for subdir in ${_physical_locations[@]}; do + delete_storage ${subdir} + done + + if [ "${#_unknown_kinds[@]}" -ne 0 ] ; then + for resource in ${_unknown_kinds[@]}; do + error "Untracked resource kind present: ${resource}, attempting to delete it..." + delete_resource ${resource} + done + return fi } diff --git a/tools/helm_deployment_status.py b/tools/helm_deployment_status.py index 8917e992..f92773db 100755 --- a/tools/helm_deployment_status.py +++ b/tools/helm_deployment_status.py @@ -111,20 +111,20 @@ def exec_healthcheck(hp_script, namespace, hp_mode): return hc.returncode def check_readiness(k8s, verbosity): - k8s_controllers, not_ready_controllers = get_k8s_controllers(k8s) - - # check pods only when it is explicitly wanted (judging readiness by deployment status) - if verbosity > 1: - pods = k8s.get_resources('api/v1', 'pods') - unready_pods = chain.from_iterable( - get_names(not_ready_pods( - pods_by_parent(pods, x))) - for x in not_ready_controllers) - else: - unready_pods = [] + k8s_controllers, not_ready_controllers = get_k8s_controllers(k8s) + + # check pods only when it is explicitly wanted (judging readiness by deployment status) + if verbosity > 1: + pods = k8s.get_resources('api/v1', 'pods') + unready_pods = chain.from_iterable( + get_names(not_ready_pods( + pods_by_parent(pods, x))) + for x in not_ready_controllers) + else: + unready_pods = [] - print_status(verbosity, k8s_controllers, unready_pods) - return not not_ready_controllers + print_status(verbosity, k8s_controllers, unready_pods) + return not not_ready_controllers def check_in_loop(k8s, max_time, sleep_time, verbosity): max_end_time = datetime.datetime.now() + datetime.timedelta(minutes=max_time) @@ -224,7 +224,7 @@ class Kubernetes: req = requests.get(url, verify=False) else: req = requests.get(url, verify=self.crt_tmp_file.name, cert=self.crt_tmp_file.name) - except requests.exceptions.ConnectionError as err: + except requests.exceptions.ConnectionError: sys.exit('Error: Could not connect to {}'.format(self.url)) if req.status_code == 200: json = req.json() @@ -264,7 +264,7 @@ class Kubernetes: config['users'][0]['user']['client-certificate-data']))) certs.update(dict(client_key=b64decode( config['users'][0]['user']['client-key-data']))) - except KeyError as err: + except KeyError: print('Warning: could not get Kubernetes config for certificates. ' \ 'Turning off SSL authentication.') self.no_ssl_auth = True diff --git a/tools/remove_runtime_images.py b/tools/remove_runtime_images.py index 67d732bb..40f38eb1 100755 --- a/tools/remove_runtime_images.py +++ b/tools/remove_runtime_images.py @@ -76,4 +76,3 @@ def run_cli(): if __name__ == '__main__': run_cli() - |