aboutsummaryrefslogtreecommitdiffstats
path: root/policyhandler/onap/health.py
diff options
context:
space:
mode:
authorAlex Shatov <alexs@att.com>2018-05-10 09:23:16 -0400
committerAlex Shatov <alexs@att.com>2018-05-10 09:23:16 -0400
commitf2d7bef13705812c1bf147c2fb65162fbf385c6b (patch)
treee6efb8e25287576a48952942aacdb3cf84a825ff /policyhandler/onap/health.py
parent50bed534083c96cbf1f8fa4e220cb2b00dff9621 (diff)
2.4.3 policy-handler - try-catch top Exceptions
- added try-except for top level Exception into all threads of policy-handler to avoid losing the thread and tracking the unexpected crashes - rediscover the deployment-handler if not found before and after each catchup - refactored audit - separated metrics from audit - added more stats and runtime info to healthcheck = gc counts and garbage info if any detected = memory usage - to detect the potential memory leaks = request_id to all stats = stats of active requests - avoid reallocating the whole Queue of policy-updates after catchup = clear of the internal queue under proper lock Change-Id: I3fabcaac70419a68bd070ff7d591a75942f37663 Signed-off-by: Alex Shatov <alexs@att.com> Issue-ID: DCAEGEN2-483
Diffstat (limited to 'policyhandler/onap/health.py')
-rw-r--r--policyhandler/onap/health.py93
1 files changed, 75 insertions, 18 deletions
diff --git a/policyhandler/onap/health.py b/policyhandler/onap/health.py
index 485f422..e6a6f69 100644
--- a/policyhandler/onap/health.py
+++ b/policyhandler/onap/health.py
@@ -28,46 +28,95 @@ class HealthStats(object):
"""keep track of stats for metrics calls"""
self._name = name or "stats_" + str(uuid.uuid4())
self._lock = Lock()
+
self._call_count = 0
self._error_count = 0
+ self._active_count = 0
+
self._longest_timer = 0
self._total_timer = 0
+
self._last_success = None
self._last_error = None
+ self._last_start = None
+ self._longest_end_ts = None
+
+ self._last_success_request_id = None
+ self._last_error_request_id = None
+ self._last_started_request_id = None
+ self._longest_request_id = None
+
def dump(self):
"""returns dict of stats"""
dump = None
with self._lock:
dump = {
- "call_count" : self._call_count,
- "error_count" : self._error_count,
- "last_success" : str(self._last_success),
- "last_error" : str(self._last_error),
- "longest_timer_millisecs" : self._longest_timer,
- "ave_timer_millisecs" : (float(self._total_timer)/self._call_count \
- if self._call_count else 0)
+ "total" : {
+ "call_count" : self._call_count,
+ "ave_timer_millisecs" : (float(self._total_timer)/self._call_count
+ if self._call_count else 0)
+ },
+ "success" : {
+ "success_count" : (self._call_count - self._error_count),
+ "last_success" : str(self._last_success),
+ "last_success_request_id" : self._last_success_request_id
+ },
+ "error" : {
+ "error_count" : self._error_count,
+ "last_error" : str(self._last_error),
+ "last_error_request_id" : self._last_error_request_id
+ },
+ "active" : {
+ "active_count" : self._active_count,
+ "last_start" : str(self._last_start),
+ "last_started_request_id" : self._last_started_request_id
+ },
+ "longest" : {
+ "longest_timer_millisecs" : self._longest_timer,
+ "longest_request_id" : self._longest_request_id,
+ "longest_end" : str(self._longest_end_ts)
+ }
}
return dump
- def success(self, timer):
+
+ def start(self, request_id=None):
+ """records the start of active execution"""
+ with self._lock:
+ self._active_count += 1
+ self._last_start = datetime.utcnow()
+ self._last_started_request_id = request_id
+
+
+ def success(self, timer, request_id=None):
"""records the successful execution"""
with self._lock:
+ self._active_count -= 1
self._call_count += 1
- self._last_success = datetime.now()
+ self._last_success = datetime.utcnow()
+ self._last_success_request_id = request_id
self._total_timer += timer
if not self._longest_timer or self._longest_timer < timer:
self._longest_timer = timer
+ self._longest_request_id = request_id
+ self._longest_end_ts = self._last_success
+
- def error(self, timer):
+ def error(self, timer, request_id=None):
"""records the errored execution"""
with self._lock:
+ self._active_count -= 1
self._call_count += 1
self._error_count += 1
- self._last_error = datetime.now()
+ self._last_error = datetime.utcnow()
+ self._last_error_request_id = request_id
self._total_timer += timer
if not self._longest_timer or self._longest_timer < timer:
self._longest_timer = timer
+ self._longest_request_id = request_id
+ self._longest_end_ts = self._last_error
+
class Health(object):
"""Health stats for multiple requests"""
@@ -76,28 +125,36 @@ class Health(object):
self._all_stats = {}
self._lock = Lock()
+
def _add_or_get_stats(self, stats_name):
"""add to or get from the ever growing dict of HealthStats"""
- stats = None
with self._lock:
stats = self._all_stats.get(stats_name)
if not stats:
self._all_stats[stats_name] = stats = HealthStats(stats_name)
- return stats
+ return stats
+
+
+ def start(self, stats_name, request_id=None):
+ """records the start of execution on stats_name"""
+ stats = self._add_or_get_stats(stats_name)
+ stats.start(request_id)
+
- def success(self, stats_name, timer):
+ def success(self, stats_name, timer, request_id=None):
"""records the successful execution on stats_name"""
stats = self._add_or_get_stats(stats_name)
- stats.success(timer)
+ stats.success(timer, request_id)
+
- def error(self, stats_name, timer):
+ def error(self, stats_name, timer, request_id=None):
"""records the error execution on stats_name"""
stats = self._add_or_get_stats(stats_name)
- stats.error(timer)
+ stats.error(timer, request_id)
+
def dump(self):
"""returns dict of stats"""
with self._lock:
stats = dict((k, v.dump()) for (k, v) in self._all_stats.iteritems())
-
return stats