Merge changes from topic "OOM-1806"

* changes: Add doc for new healthcheck feature in helm_deployment_status.py Make healthcheck script output unbuffered Support running healthcheck in any mode supported Always show healthcheck output if it was run Support SSL certificate based authentication to kube API Add Kubernetes class to track API connection settings
author: Michal Ptacek <m.ptacek@partner.samsung.com> 2019-06-03 10:41:03 +0000
committer: Gerrit Code Review <gerrit@onap.org> 2019-06-03 10:41:03 +0000
commit: 82052ddde7b2896c05c7b930d8e09d582025f0b3 (patch)
tree: 77c7a36634b35fe82f7795fbdce787181ba33782
parent: f45f73f749fffb301d825ec53fae2878c505980d (diff)
parent: 32bf2fbf337448d5a43b630677dbd72756f00cc0 (diff)
2 files changed, 122 insertions, 59 deletions
diff --git a/docs/InstallGuide.rst b/docs/InstallGuide.rst
index 7849047d..95b5749f 100644
--- a/docs/InstallGuide.rst
+++ b/docs/InstallGuide.rst
@@ -406,23 +406,23 @@ This will take a while so be patient.
 
 .. _oooi_installguide_postinstall:
 
-Part 4. Postinstallation and troubleshooting
---------------------------------------------
+Part 4. Post-installation and troubleshooting
+---------------------------------------------
 
-After all the playbooks are finished, it will still take a lot of time until all pods will be up and running. You can monitor your newly created kubernetes cluster for example like this::
+After all of the playbooks are run successfully, it will still take a lot of time until all pods are up and running. You can monitor your newly created kubernetes cluster for example like this::
 
     $ ssh -i ~/.ssh/offline_ssh_key root@10.8.8.4 # tailor this command to connect to your infra-node
     $ watch -d -n 5 'kubectl get pods --all-namespaces'
 
-Alternatively you can monitor progress with ``helm_deployment_status.py`` script located in offline-installer directory. While on infra-node this can be achieved like this::
+Alternatively you can monitor progress with ``helm_deployment_status.py`` script located in offline-installer directory. Transfer it to infra-node and run::
 
     $ python helm_deployment_status.py -n <namespace_name> # namespace defaults to onap
 
-To automatically verify functionality, after deployment becomes ready or after timeout period expires, add path to healthcheck scripts::
+To automatically verify functionality with healthchecks after deployment becomes ready or after timeout period expires, append ``-hp`` switch followed by the full path to the healthcheck script and ``--health-mode`` optional switch with appropriate mode supported by that script (``health`` by default, ``--help`` displays available modes)::
 
-    $ python helm_deployment_status.py -hp <app_data_path>/<app_name>/helm_charts/robot/ete-k8s.sh
+    $ python helm_deployment_status.py -hp <app_data_path>/<app_name>/helm_charts/robot/ete-k8s.sh --health-mode <healthcheck mode>
 
-It is strongly recommended to check help for ``helm_deployment_status.py`` to be able to tailor monitoring to your needs since default values might not be what you are looking for.
+It is strongly recommended to tailor ``helm_deployment_status.py`` to your needs since default values might not be what you'd expect. The defaults can be displayed with ``--help`` switch.
 
 Final result of installation varies based on number of k8s nodes used and distribution of pods. In some dev envs we quite frequently hit problems with not all pods properly deployed. In successful deployments all jobs should be in successful state.
 This can be verified using ::
diff --git a/helm_deployment_status.py b/helm_deployment_status.py
index 448263d5..8f9a931d 100755
--- a/helm_deployment_status.py
+++ b/helm_deployment_status.py
@@ -25,30 +25,21 @@ import sys
 import argparse
 import yaml
 import requests
-import subprocess
+from subprocess import Popen,STDOUT,PIPE
 import datetime
 from time import sleep
 from os.path import expanduser
 from itertools import chain
 import csv
 from requests.packages.urllib3.exceptions import InsecureRequestWarning
-
+from base64 import b64decode
+from tempfile import NamedTemporaryFile
 
 def add_resource_kind(resources, kind):
     for item in resources:
         item['kind'] = kind
     return resources
 
-def get_resources(server, namespace, api, kind, ssl_verify=False):
-    url = '/'.join([server, api, 'namespaces', namespace, kind])
-    try:
-        req = requests.get(url, verify=ssl_verify)
-    except requests.exceptions.ConnectionError as err:
-        sys.exit('Could not connect to {}'.format(server))
-    json = req.json()
-    # kind is <resource>List in response so [:-4] removes 'List' from value
-    return add_resource_kind(json['items'], json['kind'][:-4])
-
 def pods_by_parent(pods, parent):
     for pod in pods:
         if pod['metadata']['labels']['app'] == parent:
@@ -87,48 +78,44 @@ def analyze_k8s_controllers(resources_data):
 
     return resources
 
-def get_k8s_controllers(namespace, k8s_url):
+def get_k8s_controllers(k8s):
     k8s_controllers = {}
 
-    k8s_controllers['deployments'] = {'data': get_resources(k8s_url, namespace,
+    k8s_controllers['deployments'] = {'data': k8s.get_resources(
         'apis/apps/v1', 'deployments')}
-    k8s_controllers['deployments'].update(analyze_k8s_controllers(k8s_controllers['deployments']['data']))
+    k8s_controllers['deployments'].update(analyze_k8s_controllers(
+        k8s_controllers['deployments']['data']))
 
-    k8s_controllers['statefulsets'] = {'data': get_resources(k8s_url, namespace,
+    k8s_controllers['statefulsets'] = {'data': k8s.get_resources(
         'apis/apps/v1', 'statefulsets')}
-    k8s_controllers['statefulsets'].update(analyze_k8s_controllers(k8s_controllers['statefulsets']['data']))
+    k8s_controllers['statefulsets'].update(analyze_k8s_controllers(
+        k8s_controllers['statefulsets']['data']))
 
-    k8s_controllers['jobs'] = {'data': get_resources(k8s_url, namespace,
+    k8s_controllers['jobs'] = {'data': k8s.get_resources(
         'apis/batch/v1', 'jobs')}
-    k8s_controllers['jobs'].update(analyze_k8s_controllers(k8s_controllers['jobs']['data']))
+    k8s_controllers['jobs'].update(analyze_k8s_controllers(
+        k8s_controllers['jobs']['data']))
 
     not_ready_controllers = chain.from_iterable(
             k8s_controllers[x]['not_ready_list'] for x in k8s_controllers)
 
     return k8s_controllers, list(not_ready_controllers)
 
-def get_k8s_url(kube_config):
-    # TODO: Get login info
-    with open(kube_config) as f:
-        config = yaml.load(f)
-    # TODO: Support cluster by name
-    return config['clusters'][0]['cluster']['server']
-
-def exec_healthcheck(hp_script, namespace):
-    try:
-        hc = subprocess.check_output(
-                ['sh', hp_script, namespace, 'health'],
-                stderr=subprocess.STDOUT)
-        return 0, hc.output
-    except subprocess.CalledProcessError as err:
-        return err.returncode, err.output
+def exec_healthcheck(hp_script, namespace, hp_mode):
+    # spawn healthcheck script and redirect it's stderr to stdout
+    hc = Popen(['sh',hp_script,namespace,hp_mode],stdout=PIPE,stderr=STDOUT)
+    # Trace the output of subprocess until it has finished
+    for line in iter(hc.stdout.readline, ''):
+        print(line.strip())
+    hc.poll() # set returncode in Popen object
+    return hc.returncode
 
-def check_readiness(k8s_url, namespace, verbosity):
-        k8s_controllers, not_ready_controllers = get_k8s_controllers(namespace, k8s_url)
+def check_readiness(k8s, verbosity):
+        k8s_controllers, not_ready_controllers = get_k8s_controllers(k8s)
 
         # check pods only when it is explicitly wanted (judging readiness by deployment status)
         if verbosity > 1:
-            pods = get_resources(k8s_url, namespace, 'api/v1', 'pods')
+            pods = k8s.get_resources('api/v1', 'pods')
             unready_pods = chain.from_iterable(
                    get_names(not_ready_pods(
                        pods_by_parent(pods, x)))
@@ -139,11 +126,11 @@ def check_readiness(k8s_url, namespace, verbosity):
         print_status(verbosity, k8s_controllers, unready_pods)
         return not not_ready_controllers
 
-def check_in_loop(k8s_url, namespace, max_time, sleep_time, verbosity):
+def check_in_loop(k8s, max_time, sleep_time, verbosity):
     max_end_time = datetime.datetime.now() + datetime.timedelta(minutes=max_time)
     ready = False
     while datetime.datetime.now() < max_end_time:
-        ready = check_readiness(k8s_url, namespace, verbosity)
+        ready = check_readiness(k8s, verbosity)
         if ready:
             return ready
         sleep(sleep_time)
@@ -184,7 +171,8 @@ def print_status(verbosity, resources, not_ready_pods):
     print('\n'.join(status_strings), '\n')
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress')
+    parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress',
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--namespace', '-n', default='onap',
             help='Kubernetes namespace of ONAP')
     parser.add_argument('--server', '-s', help='address of Kubernetes cluster')
@@ -192,6 +180,9 @@ def parse_args():
             default=expanduser('~') + '/.kube/config',
             help='path to .kube/config file')
     parser.add_argument('--health-path', '-hp', help='path to ONAP robot ete-k8s.sh')
+    parser.add_argument('--health-mode', default='health', help='healthcheck mode',
+            choices=('health','healthdist','distribute','instantiate','instantiateVFWCL',
+                     'instantiateDemoVFWCL','portal'))
     parser.add_argument('--no-helm', action='store_true', help='Do not check Helm')
     parser.add_argument('--check-frequency', '-w', default=300, type=int,
             help='time between readiness checks in seconds')
@@ -201,9 +192,86 @@ def parse_args():
             help='run check loop only once')
     parser.add_argument('-v', dest='verbosity', action='count', default=0,
             help='increase output verbosity, e.g. -vv is more verbose than -v')
+    parser.add_argument('--no-ssl-auth', action='store_true',
+            help='Disable SSL certificate based authentication while connecting to server')
 
     return parser.parse_args()
 
+class Kubernetes:
+    '''Class exposing get_resources() routine for connecting to kube API.
+       It keeps all attributes required by that call as an internal
+       object state.'''
+
+    requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
+
+    def __init__(self,args):
+
+        self.config = args.kubeconfig
+        self.url = args.server if args.server is not None else \
+                   self._get_k8s_url()
+        self.no_ssl_auth = args.no_ssl_auth
+        self.certs = self._get_k8s_certs() if not self.no_ssl_auth else {}
+        self.namespace = args.namespace
+
+        # Setup tmp file with ca chain only if certs were gathered successfully
+        # and --no-ssl-auth wasn't set
+        if self.certs and not self.no_ssl_auth:
+            self._setup_cert_files()
+
+    def get_resources(self, api, kind):
+        '''Performs actual API call'''
+        url = '/'.join([self.url, api, 'namespaces', self.namespace, kind])
+        try:
+            if self.no_ssl_auth:
+                req = requests.get(url, verify=False)
+            else:
+                req = requests.get(url, verify=self.crt_tmp_file.name, cert=self.crt_tmp_file.name)
+        except requests.exceptions.ConnectionError as err:
+            sys.exit('Error: Could not connect to {}'.format(self.url))
+        if req.status_code == 200:
+            json = req.json()
+            # kind is <resource>List in response so [:-4] removes 'List' from value
+            return add_resource_kind(json['items'], json['kind'][:-4])
+        elif (req.status_code == 401):
+            sys.exit('Error: Server replied with "401 Unauthorized" while making connection')
+        else:
+            sys.exit("Error: There's been an unspecified issue while making a request to the API")
+
+    def _setup_cert_files(self):
+        '''Helper funtion to setup named file for requests.get() call
+           in self.get_resources() which is able read certificate only
+           from file'''
+        ca_chain = NamedTemporaryFile()
+        for crt in self.certs.values():
+            ca_chain.write(crt)
+        ca_chain.read() # flush the file buffer
+        self.crt_tmp_file = ca_chain
+
+    def _get_k8s_url(self):
+        # TODO: Get login info
+        with open(self.config) as f:
+            config = yaml.load(f)
+        # TODO: Support cluster by name
+        return config['clusters'][0]['cluster']['server']
+
+    def _get_k8s_certs(self):
+        '''Helper function to read and decode certificates from kube config'''
+        with open(self.config) as f:
+            config = yaml.load(f)
+        certs = {}
+        try:
+            certs.update(dict(ca_cert=b64decode(
+              config['clusters'][0]['cluster']['certificate-authority-data'])))
+            certs.update(dict(client_cert=b64decode(
+              config['users'][0]['user']['client-certificate-data'])))
+            certs.update(dict(client_key=b64decode(
+              config['users'][0]['user']['client-key-data'])))
+        except KeyError as err:
+            print('Warning: could not get Kubernetes config for certificates. ' \
+                      'Turning off SSL authentication.')
+            self.no_ssl_auth = True
+        return certs
+
 def main():
     args = parse_args()
 
@@ -218,25 +286,20 @@ def main():
         except IOError as err:
             sys.exit(err.strerror)
 
-    requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
-    k8s_url = args.server if args.server is not None else get_k8s_url(args.kubeconfig)
+    k8s = Kubernetes(args)
 
     ready = False
     if args.single_run:
-        ready = check_readiness(k8s_url, args.namespace, args.verbosity)
+        ready = check_readiness(k8s, args.verbosity)
     else:
-        if not check_in_loop(k8s_url, args.namespace, args.max_time, args.check_frequency, args.verbosity):
+        if not check_in_loop(k8s, args.max_time, args.check_frequency, args.verbosity):
             # Double-check last 5 minutes and write verbosely in case it is not ready
-            ready = check_readiness(k8s_url, args.namespace, 2)
+            ready = check_readiness(k8s, 2)
 
     if args.health_path is not None:
-        try:
-            hc_rc, hc_output = exec_healthcheck(args.health_path, args.namespace)
-        except IOError as err:
-            sys.exit(err.strerror)
-        if args.verbosity > 1 or hc_rc > 0:
-            print(hc_output.decode('utf-8'))
-        sys.exit(hc_rc)
+        hc_rc = exec_healthcheck(args.health_path, args.namespace, args.health_mode)
+        if hc_rc:
+            sys.exit(hc_rc)
 
     if not ready:
         sys.exit('Deployment is not ready')
author	Michal Ptacek <m.ptacek@partner.samsung.com>	2019-06-03 10:41:03 +0000
committer	Gerrit Code Review <gerrit@onap.org>	2019-06-03 10:41:03 +0000
commit	82052ddde7b2896c05c7b930d8e09d582025f0b3 (patch)
tree	77c7a36634b35fe82f7795fbdce787181ba33782
parent	f45f73f749fffb301d825ec53fae2878c505980d (diff)
parent	32bf2fbf337448d5a43b630677dbd72756f00cc0 (diff)