summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Ptacek <m.ptacek@partner.samsung.com>2019-06-03 10:41:03 +0000
committerGerrit Code Review <gerrit@onap.org>2019-06-03 10:41:03 +0000
commit82052ddde7b2896c05c7b930d8e09d582025f0b3 (patch)
tree77c7a36634b35fe82f7795fbdce787181ba33782
parentf45f73f749fffb301d825ec53fae2878c505980d (diff)
parent32bf2fbf337448d5a43b630677dbd72756f00cc0 (diff)
Merge changes from topic "OOM-1806"
* changes: Add doc for new healthcheck feature in helm_deployment_status.py Make healthcheck script output unbuffered Support running healthcheck in any mode supported Always show healthcheck output if it was run Support SSL certificate based authentication to kube API Add Kubernetes class to track API connection settings
-rw-r--r--docs/InstallGuide.rst14
-rwxr-xr-xhelm_deployment_status.py167
2 files changed, 122 insertions, 59 deletions
diff --git a/docs/InstallGuide.rst b/docs/InstallGuide.rst
index 7849047d..95b5749f 100644
--- a/docs/InstallGuide.rst
+++ b/docs/InstallGuide.rst
@@ -406,23 +406,23 @@ This will take a while so be patient.
.. _oooi_installguide_postinstall:
-Part 4. Postinstallation and troubleshooting
---------------------------------------------
+Part 4. Post-installation and troubleshooting
+---------------------------------------------
-After all the playbooks are finished, it will still take a lot of time until all pods will be up and running. You can monitor your newly created kubernetes cluster for example like this::
+After all of the playbooks are run successfully, it will still take a lot of time until all pods are up and running. You can monitor your newly created kubernetes cluster for example like this::
$ ssh -i ~/.ssh/offline_ssh_key root@10.8.8.4 # tailor this command to connect to your infra-node
$ watch -d -n 5 'kubectl get pods --all-namespaces'
-Alternatively you can monitor progress with ``helm_deployment_status.py`` script located in offline-installer directory. While on infra-node this can be achieved like this::
+Alternatively you can monitor progress with ``helm_deployment_status.py`` script located in offline-installer directory. Transfer it to infra-node and run::
$ python helm_deployment_status.py -n <namespace_name> # namespace defaults to onap
-To automatically verify functionality, after deployment becomes ready or after timeout period expires, add path to healthcheck scripts::
+To automatically verify functionality with healthchecks after deployment becomes ready or after timeout period expires, append ``-hp`` switch followed by the full path to the healthcheck script and ``--health-mode`` optional switch with appropriate mode supported by that script (``health`` by default, ``--help`` displays available modes)::
- $ python helm_deployment_status.py -hp <app_data_path>/<app_name>/helm_charts/robot/ete-k8s.sh
+ $ python helm_deployment_status.py -hp <app_data_path>/<app_name>/helm_charts/robot/ete-k8s.sh --health-mode <healthcheck mode>
-It is strongly recommended to check help for ``helm_deployment_status.py`` to be able to tailor monitoring to your needs since default values might not be what you are looking for.
+It is strongly recommended to tailor ``helm_deployment_status.py`` to your needs since default values might not be what you'd expect. The defaults can be displayed with ``--help`` switch.
Final result of installation varies based on number of k8s nodes used and distribution of pods. In some dev envs we quite frequently hit problems with not all pods properly deployed. In successful deployments all jobs should be in successful state.
This can be verified using ::
diff --git a/helm_deployment_status.py b/helm_deployment_status.py
index 448263d5..8f9a931d 100755
--- a/helm_deployment_status.py
+++ b/helm_deployment_status.py
@@ -25,30 +25,21 @@ import sys
import argparse
import yaml
import requests
-import subprocess
+from subprocess import Popen,STDOUT,PIPE
import datetime
from time import sleep
from os.path import expanduser
from itertools import chain
import csv
from requests.packages.urllib3.exceptions import InsecureRequestWarning
-
+from base64 import b64decode
+from tempfile import NamedTemporaryFile
def add_resource_kind(resources, kind):
for item in resources:
item['kind'] = kind
return resources
-def get_resources(server, namespace, api, kind, ssl_verify=False):
- url = '/'.join([server, api, 'namespaces', namespace, kind])
- try:
- req = requests.get(url, verify=ssl_verify)
- except requests.exceptions.ConnectionError as err:
- sys.exit('Could not connect to {}'.format(server))
- json = req.json()
- # kind is <resource>List in response so [:-4] removes 'List' from value
- return add_resource_kind(json['items'], json['kind'][:-4])
-
def pods_by_parent(pods, parent):
for pod in pods:
if pod['metadata']['labels']['app'] == parent:
@@ -87,48 +78,44 @@ def analyze_k8s_controllers(resources_data):
return resources
-def get_k8s_controllers(namespace, k8s_url):
+def get_k8s_controllers(k8s):
k8s_controllers = {}
- k8s_controllers['deployments'] = {'data': get_resources(k8s_url, namespace,
+ k8s_controllers['deployments'] = {'data': k8s.get_resources(
'apis/apps/v1', 'deployments')}
- k8s_controllers['deployments'].update(analyze_k8s_controllers(k8s_controllers['deployments']['data']))
+ k8s_controllers['deployments'].update(analyze_k8s_controllers(
+ k8s_controllers['deployments']['data']))
- k8s_controllers['statefulsets'] = {'data': get_resources(k8s_url, namespace,
+ k8s_controllers['statefulsets'] = {'data': k8s.get_resources(
'apis/apps/v1', 'statefulsets')}
- k8s_controllers['statefulsets'].update(analyze_k8s_controllers(k8s_controllers['statefulsets']['data']))
+ k8s_controllers['statefulsets'].update(analyze_k8s_controllers(
+ k8s_controllers['statefulsets']['data']))
- k8s_controllers['jobs'] = {'data': get_resources(k8s_url, namespace,
+ k8s_controllers['jobs'] = {'data': k8s.get_resources(
'apis/batch/v1', 'jobs')}
- k8s_controllers['jobs'].update(analyze_k8s_controllers(k8s_controllers['jobs']['data']))
+ k8s_controllers['jobs'].update(analyze_k8s_controllers(
+ k8s_controllers['jobs']['data']))
not_ready_controllers = chain.from_iterable(
k8s_controllers[x]['not_ready_list'] for x in k8s_controllers)
return k8s_controllers, list(not_ready_controllers)
-def get_k8s_url(kube_config):
- # TODO: Get login info
- with open(kube_config) as f:
- config = yaml.load(f)
- # TODO: Support cluster by name
- return config['clusters'][0]['cluster']['server']
-
-def exec_healthcheck(hp_script, namespace):
- try:
- hc = subprocess.check_output(
- ['sh', hp_script, namespace, 'health'],
- stderr=subprocess.STDOUT)
- return 0, hc.output
- except subprocess.CalledProcessError as err:
- return err.returncode, err.output
+def exec_healthcheck(hp_script, namespace, hp_mode):
+ # spawn healthcheck script and redirect it's stderr to stdout
+ hc = Popen(['sh',hp_script,namespace,hp_mode],stdout=PIPE,stderr=STDOUT)
+ # Trace the output of subprocess until it has finished
+ for line in iter(hc.stdout.readline, ''):
+ print(line.strip())
+ hc.poll() # set returncode in Popen object
+ return hc.returncode
-def check_readiness(k8s_url, namespace, verbosity):
- k8s_controllers, not_ready_controllers = get_k8s_controllers(namespace, k8s_url)
+def check_readiness(k8s, verbosity):
+ k8s_controllers, not_ready_controllers = get_k8s_controllers(k8s)
# check pods only when it is explicitly wanted (judging readiness by deployment status)
if verbosity > 1:
- pods = get_resources(k8s_url, namespace, 'api/v1', 'pods')
+ pods = k8s.get_resources('api/v1', 'pods')
unready_pods = chain.from_iterable(
get_names(not_ready_pods(
pods_by_parent(pods, x)))
@@ -139,11 +126,11 @@ def check_readiness(k8s_url, namespace, verbosity):
print_status(verbosity, k8s_controllers, unready_pods)
return not not_ready_controllers
-def check_in_loop(k8s_url, namespace, max_time, sleep_time, verbosity):
+def check_in_loop(k8s, max_time, sleep_time, verbosity):
max_end_time = datetime.datetime.now() + datetime.timedelta(minutes=max_time)
ready = False
while datetime.datetime.now() < max_end_time:
- ready = check_readiness(k8s_url, namespace, verbosity)
+ ready = check_readiness(k8s, verbosity)
if ready:
return ready
sleep(sleep_time)
@@ -184,7 +171,8 @@ def print_status(verbosity, resources, not_ready_pods):
print('\n'.join(status_strings), '\n')
def parse_args():
- parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress')
+ parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--namespace', '-n', default='onap',
help='Kubernetes namespace of ONAP')
parser.add_argument('--server', '-s', help='address of Kubernetes cluster')
@@ -192,6 +180,9 @@ def parse_args():
default=expanduser('~') + '/.kube/config',
help='path to .kube/config file')
parser.add_argument('--health-path', '-hp', help='path to ONAP robot ete-k8s.sh')
+ parser.add_argument('--health-mode', default='health', help='healthcheck mode',
+ choices=('health','healthdist','distribute','instantiate','instantiateVFWCL',
+ 'instantiateDemoVFWCL','portal'))
parser.add_argument('--no-helm', action='store_true', help='Do not check Helm')
parser.add_argument('--check-frequency', '-w', default=300, type=int,
help='time between readiness checks in seconds')
@@ -201,9 +192,86 @@ def parse_args():
help='run check loop only once')
parser.add_argument('-v', dest='verbosity', action='count', default=0,
help='increase output verbosity, e.g. -vv is more verbose than -v')
+ parser.add_argument('--no-ssl-auth', action='store_true',
+ help='Disable SSL certificate based authentication while connecting to server')
return parser.parse_args()
+class Kubernetes:
+ '''Class exposing get_resources() routine for connecting to kube API.
+ It keeps all attributes required by that call as an internal
+ object state.'''
+
+ requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
+
+ def __init__(self,args):
+
+ self.config = args.kubeconfig
+ self.url = args.server if args.server is not None else \
+ self._get_k8s_url()
+ self.no_ssl_auth = args.no_ssl_auth
+ self.certs = self._get_k8s_certs() if not self.no_ssl_auth else {}
+ self.namespace = args.namespace
+
+ # Setup tmp file with ca chain only if certs were gathered successfully
+ # and --no-ssl-auth wasn't set
+ if self.certs and not self.no_ssl_auth:
+ self._setup_cert_files()
+
+ def get_resources(self, api, kind):
+ '''Performs actual API call'''
+ url = '/'.join([self.url, api, 'namespaces', self.namespace, kind])
+ try:
+ if self.no_ssl_auth:
+ req = requests.get(url, verify=False)
+ else:
+ req = requests.get(url, verify=self.crt_tmp_file.name, cert=self.crt_tmp_file.name)
+ except requests.exceptions.ConnectionError as err:
+ sys.exit('Error: Could not connect to {}'.format(self.url))
+ if req.status_code == 200:
+ json = req.json()
+ # kind is <resource>List in response so [:-4] removes 'List' from value
+ return add_resource_kind(json['items'], json['kind'][:-4])
+ elif (req.status_code == 401):
+ sys.exit('Error: Server replied with "401 Unauthorized" while making connection')
+ else:
+ sys.exit("Error: There's been an unspecified issue while making a request to the API")
+
+ def _setup_cert_files(self):
+ '''Helper funtion to setup named file for requests.get() call
+ in self.get_resources() which is able read certificate only
+ from file'''
+ ca_chain = NamedTemporaryFile()
+ for crt in self.certs.values():
+ ca_chain.write(crt)
+ ca_chain.read() # flush the file buffer
+ self.crt_tmp_file = ca_chain
+
+ def _get_k8s_url(self):
+ # TODO: Get login info
+ with open(self.config) as f:
+ config = yaml.load(f)
+ # TODO: Support cluster by name
+ return config['clusters'][0]['cluster']['server']
+
+ def _get_k8s_certs(self):
+ '''Helper function to read and decode certificates from kube config'''
+ with open(self.config) as f:
+ config = yaml.load(f)
+ certs = {}
+ try:
+ certs.update(dict(ca_cert=b64decode(
+ config['clusters'][0]['cluster']['certificate-authority-data'])))
+ certs.update(dict(client_cert=b64decode(
+ config['users'][0]['user']['client-certificate-data'])))
+ certs.update(dict(client_key=b64decode(
+ config['users'][0]['user']['client-key-data'])))
+ except KeyError as err:
+ print('Warning: could not get Kubernetes config for certificates. ' \
+ 'Turning off SSL authentication.')
+ self.no_ssl_auth = True
+ return certs
+
def main():
args = parse_args()
@@ -218,25 +286,20 @@ def main():
except IOError as err:
sys.exit(err.strerror)
- requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
- k8s_url = args.server if args.server is not None else get_k8s_url(args.kubeconfig)
+ k8s = Kubernetes(args)
ready = False
if args.single_run:
- ready = check_readiness(k8s_url, args.namespace, args.verbosity)
+ ready = check_readiness(k8s, args.verbosity)
else:
- if not check_in_loop(k8s_url, args.namespace, args.max_time, args.check_frequency, args.verbosity):
+ if not check_in_loop(k8s, args.max_time, args.check_frequency, args.verbosity):
# Double-check last 5 minutes and write verbosely in case it is not ready
- ready = check_readiness(k8s_url, args.namespace, 2)
+ ready = check_readiness(k8s, 2)
if args.health_path is not None:
- try:
- hc_rc, hc_output = exec_healthcheck(args.health_path, args.namespace)
- except IOError as err:
- sys.exit(err.strerror)
- if args.verbosity > 1 or hc_rc > 0:
- print(hc_output.decode('utf-8'))
- sys.exit(hc_rc)
+ hc_rc = exec_healthcheck(args.health_path, args.namespace, args.health_mode)
+ if hc_rc:
+ sys.exit(hc_rc)
if not ready:
sys.exit('Deployment is not ready')