summaryrefslogtreecommitdiffstats
path: root/vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
diff options
context:
space:
mode:
authorDileep Ranganathan <dileep.ranganathan@intel.com>2019-03-05 10:24:06 -0800
committerDileep Ranganathan <dileep.ranganathan@intel.com>2019-03-05 10:38:48 -0800
commit3dfd3180c0a4d192f4524d74e36d2ba50bffff71 (patch)
tree7df49d15b185b73af9a902b17323e5fba46b208f /vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
parent1b81e8f0b51576f761aa8e3329285bfb61e6dd79 (diff)
Collection Service Helm charts package
The packages needed for distributed analytics are separated as collection, messaging, training, inference and visualization. Collection package consists of collection agents, Prometheus operator. and Prometheus. Change-Id: I12c6ed0607fbaedf7bbc207562fb5bf2a1950623 Issue-ID: ONAPARC-366 Signed-off-by: Dileep Ranganathan <dileep.ranganathan@intel.com>
Diffstat (limited to 'vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml')
-rw-r--r--vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml105
1 files changed, 105 insertions, 0 deletions
diff --git a/vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml b/vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
new file mode 100644
index 00000000..3c9e1490
--- /dev/null
+++ b/vnfs/DAaaS/collection/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
@@ -0,0 +1,105 @@
+# Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
+# Do not change in-place! In order to change this file first read following link:
+# https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+{{- if and .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
+{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
+{{- $namespace := .Release.Namespace }}
+apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: prometheus.rules
+ rules:
+ - alert: PrometheusConfigReloadFailed
+ annotations:
+ description: Reloading Prometheus' configuration has failed for {{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}}
+ summary: Reloading Prometheus' configuration failed
+ expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusNotificationQueueRunningFull
+ annotations:
+ description: Prometheus' alert notification queue is running full for {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}}
+ summary: Prometheus' alert notification queue is running full
+ expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlerts
+ annotations:
+ description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}}
+ summary: Errors while sending alert from Prometheus
+ expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlerts
+ annotations:
+ description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}}
+ summary: Errors while sending alerts from Prometheus
+ expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
+ for: 10m
+ labels:
+ severity: critical
+ - alert: PrometheusNotConnectedToAlertmanagers
+ annotations:
+ description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} is not connected to any Alertmanagers
+ summary: Prometheus is not connected to any Alertmanagers
+ expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBReloadsFailing
+ annotations:
+ description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} reload failures over the last four hours.'
+ summary: Prometheus has issues reloading data blocks from disk
+ expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
+ for: 12h
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBCompactionsFailing
+ annotations:
+ description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} compaction failures over the last four hours.'
+ summary: Prometheus has issues compacting sample blocks
+ expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
+ for: 12h
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBWALCorruptions
+ annotations:
+ description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} has a corrupted write-ahead log (WAL).'
+ summary: Prometheus write-ahead log is corrupted
+ expr: tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusNotIngestingSamples
+ annotations:
+ description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} isn't ingesting samples.
+ summary: Prometheus isn't ingesting samples
+ expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusTargetScrapesDuplicate
+ annotations:
+ description: '{{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}} has many samples rejected due to duplicate timestamps but different values'
+ summary: Prometheus has many samples rejected
+ expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file