From 1369bea8b3c24ef063799acefbfc01659878f034 Mon Sep 17 00:00:00 2001 From: Alex Shatov Date: Wed, 10 Jan 2018 11:00:50 -0500 Subject: variable collection of policies per component * new feature variable collection of policies per component in DCAE * massive refactoring * dissolved the external PolicyEngine.py into policy_receiver.py - kept only the web-socket communication to PolicyEngine * new /healthcheck - shows some stats of service running * Unit Test coverage 75% Change-Id: I816b7d5713ae0dd88fa73d3656f272b4f3e7946e Issue-ID: DCAEGEN2-249 Signed-off-by: Alex Shatov --- policyhandler/onap/audit.py | 142 ++++++++++++++++++++++++++++++------------- policyhandler/onap/health.py | 104 +++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+), 41 deletions(-) create mode 100644 policyhandler/onap/health.py (limited to 'policyhandler/onap') diff --git a/policyhandler/onap/audit.py b/policyhandler/onap/audit.py index a1df861..c338b76 100644 --- a/policyhandler/onap/audit.py +++ b/policyhandler/onap/audit.py @@ -27,14 +27,18 @@ # ECOMP is a trademark and service mark of AT&T Intellectual Property. import os +import sys import json import uuid import time import copy +from datetime import datetime from threading import Lock from enum import Enum +from pip import utils as pip_utils from .CommonLogger import CommonLogger +from .health import Health REQUEST_X_ECOMP_REQUESTID = "X-ECOMP-RequestID" REQUEST_REMOTE_ADDR = "Remote-Addr" @@ -44,6 +48,7 @@ HOSTNAME = "HOSTNAME" AUDIT_REQUESTID = 'requestID' AUDIT_IPADDRESS = 'IPAddress' AUDIT_SERVER = 'server' +AUDIT_TARGET_ENTITY = 'targetEntity' HEADER_CLIENTAUTH = "clientauth" HEADER_AUTHORIZATION = "authorization" @@ -51,9 +56,10 @@ HEADER_AUTHORIZATION = "authorization" class AuditHttpCode(Enum): """audit http codes""" HTTP_OK = 200 - DATA_NOT_FOUND_ERROR = 400 PERMISSION_UNAUTHORIZED_ERROR = 401 PERMISSION_FORBIDDEN_ERROR = 403 + RESPONSE_ERROR = 400 + DATA_NOT_FOUND_ERROR = 404 SERVER_INTERNAL_ERROR = 500 SERVICE_UNAVAILABLE_ERROR = 503 DATA_ERROR = 1030 @@ -72,23 +78,25 @@ class AuditResponseCode(Enum): @staticmethod def get_response_code(http_status_code): """calculates the response_code from max_http_status_code""" + response_code = AuditResponseCode.UNKNOWN_ERROR if http_status_code <= AuditHttpCode.HTTP_OK.value: - return AuditResponseCode.SUCCESS - - if http_status_code in [AuditHttpCode.PERMISSION_UNAUTHORIZED_ERROR.value, \ - AuditHttpCode.PERMISSION_FORBIDDEN_ERROR.value]: - return AuditResponseCode.PERMISSION_ERROR - if http_status_code == AuditHttpCode.SERVICE_UNAVAILABLE_ERROR.value: - return AuditResponseCode.AVAILABILITY_ERROR - if http_status_code == AuditHttpCode.SERVER_INTERNAL_ERROR.value: - return AuditResponseCode.BUSINESS_PROCESS_ERROR - if http_status_code in [AuditHttpCode.DATA_ERROR.value, \ - AuditHttpCode.DATA_NOT_FOUND_ERROR.value]: - return AuditResponseCode.DATA_ERROR - if http_status_code == AuditHttpCode.SCHEMA_ERROR.value: - return AuditResponseCode.SCHEMA_ERROR - - return AuditResponseCode.UNKNOWN_ERROR + response_code = AuditResponseCode.SUCCESS + + elif http_status_code in [AuditHttpCode.PERMISSION_UNAUTHORIZED_ERROR.value, + AuditHttpCode.PERMISSION_FORBIDDEN_ERROR.value]: + response_code = AuditResponseCode.PERMISSION_ERROR + elif http_status_code == AuditHttpCode.SERVICE_UNAVAILABLE_ERROR.value: + response_code = AuditResponseCode.AVAILABILITY_ERROR + elif http_status_code == AuditHttpCode.SERVER_INTERNAL_ERROR.value: + response_code = AuditResponseCode.BUSINESS_PROCESS_ERROR + elif http_status_code in [AuditHttpCode.DATA_ERROR.value, + AuditHttpCode.RESPONSE_ERROR.value, + AuditHttpCode.DATA_NOT_FOUND_ERROR.value]: + response_code = AuditResponseCode.DATA_ERROR + elif http_status_code == AuditHttpCode.SCHEMA_ERROR.value: + response_code = AuditResponseCode.SCHEMA_ERROR + + return response_code @staticmethod def get_human_text(response_code): @@ -109,16 +117,23 @@ class Audit(object): :kwargs: - put any request related params into kwargs """ _service_name = "" + _service_version = "" _service_instance_UUID = str(uuid.uuid4()) + _started = datetime.now() _logger_debug = None _logger_error = None _logger_metrics = None _logger_audit = None + _health = Health() + _py_ver = sys.version.replace("\n", "") + _packages = sorted([pckg.project_name + "==" + pckg.version + for pckg in pip_utils.get_installed_distributions()]) @staticmethod - def init(service_name, config_file_path): + def init(service_name, service_version, config_file_path): """init static invariants and loggers""" Audit._service_name = service_name + Audit._service_version = service_version Audit._logger_debug = CommonLogger(config_file_path, "debug", \ instanceUUID=Audit._service_instance_UUID, serviceName=Audit._service_name) Audit._logger_error = CommonLogger(config_file_path, "error", \ @@ -128,6 +143,22 @@ class Audit(object): Audit._logger_audit = CommonLogger(config_file_path, "audit", \ instanceUUID=Audit._service_instance_UUID, serviceName=Audit._service_name) + @staticmethod + def health(): + """returns json for health check""" + now = datetime.now() + return { + "service_name" : Audit._service_name, + "service_version" : Audit._service_version, + "service_instance_UUID" : Audit._service_instance_UUID, + "python" : Audit._py_ver, + "started" : str(Audit._started), + "now" : str(now), + "uptime" : str(now - Audit._started), + "stats" : Audit._health.dump(), + "packages" : Audit._packages + } + def __init__(self, request_id=None, req_message=None, aud_parent=None, **kwargs): """create audit object per each request in the system @@ -193,6 +224,13 @@ class Audit(object): self.max_http_status_code = max(http_status_code, self.max_http_status_code) self._lock.release() + def get_max_http_status_code(self): + """returns the highest(worst) http status code""" + self._lock.acquire() + max_http_status_code = self.max_http_status_code + self._lock.release() + return max_http_status_code + @staticmethod def get_status_code(success): """COMPLETE versus ERROR""" @@ -222,12 +260,24 @@ class Audit(object): return json.dumps(Audit.hide_secrets(copy.deepcopy(obj)), **kwargs) - def get_response_code(self): - """calculates the response_code from max_http_status_code""" - self._lock.acquire() - max_http_status_code = self.max_http_status_code - self._lock.release() - return AuditResponseCode.get_response_code(max_http_status_code) + def is_serious_error(self, status_code): + """returns whether the response_code is success and a human text for response code""" + return AuditResponseCode.PERMISSION_ERROR.value \ + == AuditResponseCode.get_response_code(status_code).value \ + or self.get_max_http_status_code() >= AuditHttpCode.SERVER_INTERNAL_ERROR.value + + def _get_response_status(self): + """calculates the response status fields from max_http_status_code""" + max_http_status_code = self.get_max_http_status_code() + response_code = AuditResponseCode.get_response_code(max_http_status_code) + success = (response_code.value == AuditResponseCode.SUCCESS.value) + response_description = AuditResponseCode.get_human_text(response_code) + return success, max_http_status_code, response_code, response_description + + def is_success(self): + """returns whether the response_code is success and a human text for response code""" + success, _, _, _ = self._get_response_status() + return success def debug(self, log_line, **kwargs): """debug - the debug=lowest level of logging""" @@ -275,46 +325,56 @@ class Audit(object): def metrics(self, log_line, **kwargs): """debug+metrics - the metrics=sub-audit level of logging""" all_kwargs = self.merge_all_kwargs(**kwargs) - response_code = self.get_response_code() - success = (response_code.value == AuditResponseCode.SUCCESS.value) + success, max_http_status_code, response_code, response_description = \ + self._get_response_status() metrics_func = None + timer = Audit.get_elapsed_time(self._metrics_started) if success: log_line = "done: {0}".format(log_line) self.info(log_line, **all_kwargs) metrics_func = Audit._logger_metrics.info + Audit._health.success(all_kwargs.get(AUDIT_TARGET_ENTITY, Audit._service_name), timer) else: log_line = "failed: {0}".format(log_line) self.error(log_line, errorCode=response_code.value, \ - errorDescription=AuditResponseCode.get_human_text(response_code), **all_kwargs) + errorDescription=response_description, **all_kwargs) metrics_func = Audit._logger_metrics.error + Audit._health.error(all_kwargs.get(AUDIT_TARGET_ENTITY, Audit._service_name), timer) - metrics_func(log_line, begTime=self._metrics_start_event, \ - timer=Audit.get_elapsed_time(self._metrics_started), \ - statusCode=Audit.get_status_code(success), responseCode=response_code.value, \ - responseDescription=AuditResponseCode.get_human_text(response_code), \ - **all_kwargs) + metrics_func(log_line, begTime=self._metrics_start_event, timer=timer, + statusCode=Audit.get_status_code(success), responseCode=response_code.value, + responseDescription=response_description, + **all_kwargs + ) self.metrics_start() + return (success, max_http_status_code, response_description) def audit_done(self, result=None, **kwargs): """debug+audit - the audit=top level of logging""" all_kwargs = self.merge_all_kwargs(**kwargs) - response_code = self.get_response_code() - success = (response_code.value == AuditResponseCode.SUCCESS.value) + success, max_http_status_code, response_code, response_description = \ + self._get_response_status() log_line = "{0} {1}".format(self.req_message, result or "").strip() audit_func = None + timer = Audit.get_elapsed_time(self._started) if success: log_line = "done: {0}".format(log_line) self.info(log_line, **all_kwargs) audit_func = Audit._logger_audit.info + Audit._health.success(all_kwargs.get(AUDIT_TARGET_ENTITY, Audit._service_name), timer) else: log_line = "failed: {0}".format(log_line) - self.error(log_line, errorCode=response_code.value, \ - errorDescription=AuditResponseCode.get_human_text(response_code), **all_kwargs) + self.error(log_line, errorCode=response_code.value, + errorDescription=response_description, **all_kwargs) audit_func = Audit._logger_audit.error + Audit._health.error(all_kwargs.get(AUDIT_TARGET_ENTITY, Audit._service_name), timer) + + audit_func(log_line, begTime=self._start_event, timer=timer, + statusCode=Audit.get_status_code(success), + responseCode=response_code.value, + responseDescription=response_description, + **all_kwargs + ) - audit_func(log_line, begTime=self._start_event, \ - timer=Audit.get_elapsed_time(self._started), \ - statusCode=Audit.get_status_code(success), responseCode=response_code.value, \ - responseDescription=AuditResponseCode.get_human_text(response_code), \ - **all_kwargs) + return (success, max_http_status_code, response_description) diff --git a/policyhandler/onap/health.py b/policyhandler/onap/health.py new file mode 100644 index 0000000..eefa7d2 --- /dev/null +++ b/policyhandler/onap/health.py @@ -0,0 +1,104 @@ +"""generic class to keep track of app health""" + +# org.onap.dcae +# ================================================================================ +# Copyright (c) 2017 AT&T Intellectual Property. All rights reserved. +# ================================================================================ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============LICENSE_END========================================================= +# +# ECOMP is a trademark and service mark of AT&T Intellectual Property. + +import uuid +from threading import Lock +from datetime import datetime + +class HealthStats(object): + """keep track of stats for calls""" + def __init__(self, name): + """keep track of stats for metrics calls""" + self._name = name or "stats_" + str(uuid.uuid4()) + self._lock = Lock() + self._call_count = 0 + self._error_count = 0 + self._longest_timer = 0 + self._total_timer = 0 + self._last_success = None + self._last_error = None + + def dump(self): + """returns dict of stats""" + dump = None + with self._lock: + dump = { + "call_count" : self._call_count, + "error_count" : self._error_count, + "last_success" : str(self._last_success), + "last_error" : str(self._last_error), + "longest_timer_millisecs" : self._longest_timer, + "ave_timer_millisecs" : (float(self._total_timer)/self._call_count \ + if self._call_count else 0) + } + return dump + + def success(self, timer): + """records the successful execution""" + with self._lock: + self._call_count += 1 + self._last_success = datetime.now() + self._total_timer += timer + if not self._longest_timer or self._longest_timer < timer: + self._longest_timer = timer + + def error(self, timer): + """records the errored execution""" + with self._lock: + self._call_count += 1 + self._error_count += 1 + self._last_error = datetime.now() + self._total_timer += timer + if not self._longest_timer or self._longest_timer < timer: + self._longest_timer = timer + +class Health(object): + """Health stats for multiple requests""" + def __init__(self): + """Health stats for application""" + self._all_stats = {} + self._lock = Lock() + + def _add_or_get_stats(self, stats_name): + """add to or get from the ever growing dict of HealthStats""" + stats = None + with self._lock: + stats = self._all_stats.get(stats_name) + if not stats: + self._all_stats[stats_name] = stats = HealthStats(stats_name) + return stats + + def success(self, stats_name, timer): + """records the successful execution on stats_name""" + stats = self._add_or_get_stats(stats_name) + stats.success(timer) + + def error(self, stats_name, timer): + """records the error execution on stats_name""" + stats = self._add_or_get_stats(stats_name) + stats.error(timer) + + def dump(self): + """returns dict of stats""" + with self._lock: + stats = dict((k, v.dump()) for (k, v) in self._all_stats.iteritems()) + + return stats -- cgit 1.2.3-korg