Collection Service Helm charts package

The packages needed for distributed analytics are separated as collection, messaging, training, inference and visualization. Collection package consists of collection agents, Prometheus operator. and Prometheus. Change-Id: I12c6ed0607fbaedf7bbc207562fb5bf2a1950623 Issue-ID: ONAPARC-366 Signed-off-by: Dileep Ranganathan <dileep.ranganathan@intel.com>
author: Dileep Ranganathan <dileep.ranganathan@intel.com> 2019-03-05 10:24:06 -0800
committer: Dileep Ranganathan <dileep.ranganathan@intel.com> 2019-03-05 10:38:48 -0800
commit: 3dfd3180c0a4d192f4524d74e36d2ba50bffff71 (patch)
tree: 7df49d15b185b73af9a902b17323e5fba46b208f /vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
parent: 1b81e8f0b51576f761aa8e3329285bfb61e6dd79 (diff)
1 files changed, 105 insertions, 0 deletions
diff --git a/vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml b/vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
new file mode 100644
index 00000000..3c9e1490
--- /dev/null
+++ b/vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
@@ -0,0 +1,105 @@
+# Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
+# Do not change in-place! In order to change this file first read following link:
+# https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+{{- if and .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
+{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
+{{- $namespace := .Release.Namespace }}
+apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
+kind: PrometheusRule
+metadata:
+  name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
+  labels:
+    app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+  annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+  groups:
+  - name: prometheus.rules
+    rules:
+    - alert: PrometheusConfigReloadFailed
+      annotations:
+        description: Reloading Prometheus' configuration has failed for {{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}}
+        summary: Reloading Prometheus' configuration failed
+      expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
+      for: 10m
+      labels:
+        severity: warning
+    - alert: PrometheusNotificationQueueRunningFull
+      annotations:
+        description: Prometheus' alert notification queue is running full for {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}}
+        summary: Prometheus' alert notification queue is running full
+      expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
+      for: 10m
+      labels:
+        severity: warning
+    - alert: PrometheusErrorSendingAlerts
+      annotations:
+        description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}}
+        summary: Errors while sending alert from Prometheus
+      expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
+      for: 10m
+      labels:
+        severity: warning
+    - alert: PrometheusErrorSendingAlerts
+      annotations:
+        description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}}
+        summary: Errors while sending alerts from Prometheus
+      expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
+      for: 10m
+      labels:
+        severity: critical
+    - alert: PrometheusNotConnectedToAlertmanagers
+      annotations:
+        description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} is not connected to any Alertmanagers
+        summary: Prometheus is not connected to any Alertmanagers
+      expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
+      for: 10m
+      labels:
+        severity: warning
+    - alert: PrometheusTSDBReloadsFailing
+      annotations:
+        description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} reload failures over the last four hours.'
+        summary: Prometheus has issues reloading data blocks from disk
+      expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
+      for: 12h
+      labels:
+        severity: warning
+    - alert: PrometheusTSDBCompactionsFailing
+      annotations:
+        description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} compaction failures over the last four hours.'
+        summary: Prometheus has issues compacting sample blocks
+      expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
+      for: 12h
+      labels:
+        severity: warning
+    - alert: PrometheusTSDBWALCorruptions
+      annotations:
+        description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} has a corrupted write-ahead log (WAL).'
+        summary: Prometheus write-ahead log is corrupted
+      expr: tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
+      for: 4h
+      labels:
+        severity: warning
+    - alert: PrometheusNotIngestingSamples
+      annotations:
+        description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} isn't ingesting samples.
+        summary: Prometheus isn't ingesting samples
+      expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
+      for: 10m
+      labels:
+        severity: warning
+    - alert: PrometheusTargetScrapesDuplicate
+      annotations:
+        description: '{{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}} has many samples rejected due to duplicate timestamps but different values'
+        summary: Prometheus has many samples rejected
+      expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+      for: 10m
+      labels:
+        severity: warning
+{{- end }}
+\ No newline at end of file
author	Dileep Ranganathan <dileep.ranganathan@intel.com>	2019-03-05 10:24:06 -0800
committer	Dileep Ranganathan <dileep.ranganathan@intel.com>	2019-03-05 10:38:48 -0800
commit	3dfd3180c0a4d192f4524d74e36d2ba50bffff71 (patch)
tree	7df49d15b185b73af9a902b17323e5fba46b208f /vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
parent	1b81e8f0b51576f761aa8e3329285bfb61e6dd79 (diff)