From 3d5a3e06530c1250d48f7d838c619f3bfbcd019d Mon Sep 17 00:00:00 2001 From: Dileep Ranganathan Date: Thu, 30 May 2019 12:38:37 -0700 Subject: Refactor Distributed Analytics project structure Modified the project structure to improve maintainability and to add future CI and integration test support. Change-Id: Id30bfb1f83f23785a6b5f99e81f42f752d59c0f8 Issue-ID: ONAPARC-280 Signed-off-by: Dileep Ranganathan --- .../prometheus/rules/prometheus.rules.yaml | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 vnfs/DAaaS/deploy/operator/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml (limited to 'vnfs/DAaaS/deploy/operator/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml') diff --git a/vnfs/DAaaS/deploy/operator/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml b/vnfs/DAaaS/deploy/operator/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml new file mode 100644 index 00000000..3c9e1490 --- /dev/null +++ b/vnfs/DAaaS/deploy/operator/charts/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml @@ -0,0 +1,105 @@ +# Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml +# Do not change in-place! In order to change this file first read following link: +# https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack +{{- if and .Values.defaultRules.create .Values.defaultRules.rules.prometheus }} +{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }} +{{- $namespace := .Release.Namespace }} +apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }} +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }} + labels: + app: {{ template "prometheus-operator.name" . }} +{{ include "prometheus-operator.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus.rules + rules: + - alert: PrometheusConfigReloadFailed + annotations: + description: Reloading Prometheus' configuration has failed for {{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}} + summary: Reloading Prometheus' configuration failed + expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0 + for: 10m + labels: + severity: warning + - alert: PrometheusNotificationQueueRunningFull + annotations: + description: Prometheus' alert notification queue is running full for {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} + summary: Prometheus' alert notification queue is running full + expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} + for: 10m + labels: + severity: warning + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}} + summary: Errors while sending alert from Prometheus + expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01 + for: 10m + labels: + severity: warning + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}} + summary: Errors while sending alerts from Prometheus + expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03 + for: 10m + labels: + severity: critical + - alert: PrometheusNotConnectedToAlertmanagers + annotations: + description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} is not connected to any Alertmanagers + summary: Prometheus is not connected to any Alertmanagers + expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1 + for: 10m + labels: + severity: warning + - alert: PrometheusTSDBReloadsFailing + annotations: + description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} reload failures over the last four hours.' + summary: Prometheus has issues reloading data blocks from disk + expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 + for: 12h + labels: + severity: warning + - alert: PrometheusTSDBCompactionsFailing + annotations: + description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} compaction failures over the last four hours.' + summary: Prometheus has issues compacting sample blocks + expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 + for: 12h + labels: + severity: warning + - alert: PrometheusTSDBWALCorruptions + annotations: + description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} has a corrupted write-ahead log (WAL).' + summary: Prometheus write-ahead log is corrupted + expr: tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusNotIngestingSamples + annotations: + description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} isn't ingesting samples. + summary: Prometheus isn't ingesting samples + expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0 + for: 10m + labels: + severity: warning + - alert: PrometheusTargetScrapesDuplicate + annotations: + description: '{{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}} has many samples rejected due to duplicate timestamps but different values' + summary: Prometheus has many samples rejected + expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: 10m + labels: + severity: warning +{{- end }} \ No newline at end of file -- cgit 1.2.3-korg