From 1d9c7564c0b1d63ab447a4fc51c7fc601f251773 Mon Sep 17 00:00:00 2001 From: "adheli.tavares" Date: Mon, 9 Jan 2023 11:28:25 +0000 Subject: Adding apex-pdp metrics to SLAs dashboard. Issue-ID: POLICY-4147 Change-Id: I8ae2d5f63fe36a436578c677af3252fbfe151805 Signed-off-by: adheli.tavares --- csit/metrics/dashboards/sla-metrics.json | 585 +++++++++++++++++++++++++++++-- 1 file changed, 553 insertions(+), 32 deletions(-) diff --git a/csit/metrics/dashboards/sla-metrics.json b/csit/metrics/dashboards/sla-metrics.json index 7b96c935..6093b51f 100644 --- a/csit/metrics/dashboards/sla-metrics.json +++ b/csit/metrics/dashboards/sla-metrics.json @@ -23,18 +23,114 @@ "fiscalYearStartMonth": 0, "gnetId": 14430, "graphTooltip": 0, - "id": 3, - "iteration": 1671098389991, + "id": 5, + "iteration": 1673087127172, "links": [], "liveNow": false, "panels": [ + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 135, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))", + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "title": "Panel Title", + "type": "timeseries" + }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 0 + "y": 8 }, "id": 54, "panels": [], @@ -80,7 +176,7 @@ "h": 6, "w": 6, "x": 0, - "y": 1 + "y": 9 }, "id": 52, "links": [], @@ -158,7 +254,7 @@ "h": 6, "w": 6, "x": 6, - "y": 1 + "y": 9 }, "id": 56, "links": [], @@ -185,7 +281,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "process_start_time_seconds{job=~\"api-metrics|acm-metrics|pap-metrics\"}*1000", + "expr": "process_start_time_seconds{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\"}*1000", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -243,7 +339,7 @@ "h": 6, "w": 6, "x": 0, - "y": 7 + "y": 15 }, "id": 58, "links": [], @@ -268,7 +364,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "sum(jvm_memory_used_bytes{job=~\"api-metrics|acm-metrics|pap-metrics\", area=\"heap\"})*100/sum(jvm_memory_max_bytes{job=~\"api-metrics|acm-metrics|pap-metrics\", area=\"heap\"})", + "expr": "sum(jvm_memory_used_bytes{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\", area=\"heap\"})*100/sum(jvm_memory_max_bytes{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\", area=\"heap\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -335,7 +431,7 @@ "h": 6, "w": 6, "x": 6, - "y": 7 + "y": 15 }, "id": 60, "links": [], @@ -360,7 +456,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "sum(jvm_memory_used_bytes{job=~\"api-metrics|acm-metrics|pap-metrics\", area=\"nonheap\"})*100/sum(jvm_memory_max_bytes{job=~\"api-metrics|acm-metrics|pap-metrics\", area=\"nonheap\"})", + "expr": "sum(jvm_memory_used_bytes{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\", area=\"nonheap\"})*100/sum(jvm_memory_max_bytes{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\", area=\"nonheap\"})", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -429,7 +525,7 @@ "h": 8, "w": 12, "x": 0, - "y": 13 + "y": 21 }, "id": 96, "links": [], @@ -456,7 +552,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "avg(system_load_average_1m{job=~\"api-metrics|acm-metrics|pap-metrics\"})", + "expr": "avg(system_load_average_1m{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -469,7 +565,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "avg(system_cpu_count{job=~\"api-metrics|acm-metrics|pap-metrics\"})", + "expr": "avg(system_cpu_count{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -537,7 +633,7 @@ "h": 7, "w": 12, "x": 0, - "y": 21 + "y": 29 }, "id": 95, "links": [], @@ -564,7 +660,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "avg(system_cpu_usage{job=~\"api-metrics|acm-metrics|pap-metrics\"})", + "expr": "avg(system_cpu_usage{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -577,7 +673,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "avg(process_cpu_usage{job=~\"api-metrics|acm-metrics|pap-metrics\"})", + "expr": "avg(process_cpu_usage{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -644,7 +740,7 @@ "h": 8, "w": 12, "x": 0, - "y": 28 + "y": 36 }, "id": 113, "options": { @@ -668,7 +764,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "avg by (id) (jvm_memory_used_bytes{job=~\"api-metrics|acm-metrics|pap-metrics\"})", + "expr": "avg by (id) (jvm_memory_used_bytes{job=~\"api-metrics|acm-metrics|pap-metrics|apex-pdp-metrics\"})", "interval": "", "legendFormat": "{{id}}", "refId": "A" @@ -683,7 +779,7 @@ "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 44 }, "id": 18, "panels": [], @@ -747,7 +843,7 @@ "h": 10, "w": 18, "x": 0, - "y": 37 + "y": 45 }, "id": 4, "links": [], @@ -772,7 +868,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "rate(http_server_requests_seconds_count[5m])", + "expr": "rate(http_server_requests_seconds_count{uri!~\".*metrics.*|.*prometheus.*\"}[5m])", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -840,7 +936,7 @@ "h": 10, "w": 18, "x": 0, - "y": 47 + "y": 55 }, "id": 2, "links": [], @@ -866,7 +962,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "irate(http_server_requests_seconds_sum{job=~\"api-metrics|acm-metrics|pap-metrics\", exception=\"None\", uri!~\".*metrics.*\"}[5m]) / irate(http_server_requests_seconds_count{job=~\"api-metrics|acm-metrics|pap-metrics\", exception=\"None\", uri!~\".*metrics.*\"}[5m])", + "expr": "irate(http_server_requests_seconds_sum{job=~\"api-metrics|acm-metrics|pap-metrics\", exception=\"None\", uri!~\".*metrics.*|.*prometheus.*\"}[5m]) / irate(http_server_requests_seconds_count{job=~\"api-metrics|acm-metrics|pap-metrics\", exception=\"None\", uri!~\".*metrics.*|.*prometheus.*\"}[5m])", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -934,7 +1030,7 @@ "h": 8, "w": 13, "x": 0, - "y": 57 + "y": 65 }, "id": 111, "links": [], @@ -959,7 +1055,7 @@ "uid": "dkSf71fnz" }, "exemplar": true, - "expr": "http_server_requests_seconds_max{job=~\"api-metrics|acm-metrics|pap-metrics\", uri!~\".*metrics.*\"}", + "expr": "http_server_requests_seconds_max{job=~\"api-metrics|acm-metrics|pap-metrics\", uri!~\".*metrics.*|.*prometheus.*\"}", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -976,7 +1072,7 @@ "h": 1, "w": 24, "x": 0, - "y": 65 + "y": 73 }, "id": 115, "panels": [], @@ -1008,7 +1104,7 @@ "h": 6, "w": 3, "x": 0, - "y": 66 + "y": 74 }, "id": 117, "options": { @@ -1067,7 +1163,7 @@ "h": 6, "w": 3, "x": 3, - "y": 66 + "y": 74 }, "id": 118, "options": { @@ -1126,7 +1222,7 @@ "h": 6, "w": 3, "x": 6, - "y": 66 + "y": 74 }, "id": 119, "options": { @@ -1185,7 +1281,7 @@ "h": 6, "w": 3, "x": 9, - "y": 66 + "y": 74 }, "id": 120, "options": { @@ -1219,6 +1315,90 @@ "title": "Failed undeployments", "type": "stat" }, + { + "description": "Policy Executions Total for APEX-PDP", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 80 + }, + "id": 128, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "pdpa_policy_executions_total{job=\"apex-pdp-metrics\", status=\"SUCCESS\"}", + "interval": "", + "legendFormat": "Successful", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "pdpa_policy_executions_total{job=\"apex-pdp-metrics\", status=\"FAIL\"}", + "hide": false, + "interval": "", + "legendFormat": "Failed", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "pdpa_policy_executions_total{job=\"apex-pdp-metrics\", status=\"TOTAL\"}", + "hide": false, + "interval": "", + "legendFormat": "Total", + "refId": "C" + } + ], + "title": "Policy Executions", + "type": "stat" + }, { "fieldConfig": { "defaults": { @@ -1275,7 +1455,7 @@ "h": 8, "w": 12, "x": 0, - "y": 72 + "y": 88 }, "id": 122, "options": { @@ -1307,6 +1487,347 @@ ], "title": "Deployment timing", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 96 + }, + "id": 124, + "panels": [], + "title": "Apex-PDP", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0, + "text": "Undefined" + }, + "1": { + "color": "#ffffff", + "index": 1, + "text": "Stopped" + }, + "2": { + "color": "blue", + "index": 2, + "text": "Ready" + }, + "3": { + "color": "green", + "index": 3, + "text": "Running" + }, + "4": { + "color": "yellow", + "index": 4, + "text": "Stopping" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 0, + "y": 97 + }, + "id": 126, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "titleSize": 20, + "valueSize": 20 + }, + "textMode": "auto" + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "pdpa_engine_state{job=\"apex-pdp-metrics\"}", + "interval": "", + "legendFormat": "{{ engine_instance_id }}", + "refId": "A" + } + ], + "title": "Engines Stats", + "type": "stat" + }, + { + "description": "Uptime per engine for Policy APEX-PDP application calculated in days", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 5, + "y": 97 + }, + "id": 130, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "titleSize": 25, + "valueSize": 25 + }, + "textMode": "auto" + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "avg by(engine_instance_id)((time() - pdpa_engine_last_start_timestamp_epoch{job=\"apex-pdp-metrics\"}/1000)/86400)", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{ engine_instance_id }}", + "refId": "A" + } + ], + "title": "Engine Uptime (days)", + "transformations": [], + "type": "stat" + }, + { + "description": "Number of APEX event execution counter per engine thread", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 105 + }, + "id": 132, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "rate(pdpa_engine_average_execution_time_seconds{job=\"apex-pdp-metrics\"}[5m])", + "format": "time_series", + "interval": "", + "legendFormat": "{{ engine_instance_id }}", + "refId": "A" + } + ], + "title": "Avg Engine Execution Time", + "type": "timeseries" + }, + { + "description": "Number of APEX event execution counter per engine thread", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "id": 133, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "values": [ + "value" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.4.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dkSf71fnz" + }, + "exemplar": true, + "expr": "pdpa_engine_event_executions{job=\"apex-pdp-metrics\"}", + "format": "time_series", + "interval": "", + "legendFormat": "{{ engine_instance_id }}", + "refId": "A" + } + ], + "title": "Engine Executions Count", + "type": "piechart" } ], "refresh": "10s", @@ -1378,6 +1899,6 @@ "timezone": "", "title": "SLA's Metrics", "uid": "ySoel0jnk", - "version": 3, + "version": 4, "weekStart": "" } \ No newline at end of file -- cgit 1.2.3-korg