summaryrefslogtreecommitdiffstats
path: root/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml
diff options
context:
space:
mode:
authorRajamohan Raj <rajamohan.raj@intel.com>2020-06-18 14:55:42 -0700
committerRajamohan Raj <rajamohan.raj@intel.com>2020-06-24 11:57:04 -0700
commit94bfc956f43bcaec29f2fc9844b9ca4c35d72260 (patch)
treebc5050f2e8c6c87672e06f8da319363745f49f8f /kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml
parent819a687195ef9d6c8dd9753d366c0120886d7736 (diff)
Integrate collectd, prometheus and grafana.
In this patch, made neccessary changes in collectd and prometheus helm charts such that prometheus can pull data from collectd. Prometheus GUI and Grafana GUI are verified as well. Issue-ID: MULTICLOUD-1082 Signed-off-by: Rajamohan Raj <rajamohan.raj@intel.com> Change-Id: I39b7e20f46aa789272be671056a76dd926701068
Diffstat (limited to 'kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml')
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml161
1 files changed, 161 insertions, 0 deletions
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml
new file mode 100755
index 00000000..fa82f081
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml
@@ -0,0 +1,161 @@
+{{- /*
+Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeStateMetrics.enabled .Values.defaultRules.rules.kubernetesApps }}
+{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-apps
+ rules:
+ - alert: KubePodCrashLooping
+ annotations:
+ message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
+ expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0
+ for: 1h
+ labels:
+ severity: critical
+ - alert: KubePodNotReady
+ annotations:
+ message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than an hour.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready
+ expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) > 0
+ for: 1h
+ labels:
+ severity: critical
+ - alert: KubeDeploymentGenerationMismatch
+ annotations:
+ message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch
+ expr: |-
+ kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDeploymentReplicasMismatch
+ annotations:
+ message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than an hour.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch
+ expr: |-
+ kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 1h
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetReplicasMismatch
+ annotations:
+ message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch
+ expr: |-
+ kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetGenerationMismatch
+ annotations:
+ message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch
+ expr: |-
+ kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetUpdateNotRolledOut
+ annotations:
+ message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout
+ expr: |-
+ max without (revision) (
+ kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ unless
+ kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ )
+ *
+ (
+ kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ )
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDaemonSetRolloutStuck
+ annotations:
+ message: Only {{`{{`}} $value {{`}}`}}% of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck
+ expr: |-
+ kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ /
+ kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} * 100 < 100
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDaemonSetNotScheduled
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled
+ expr: |-
+ kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ -
+ kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeDaemonSetMisScheduled
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled
+ expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeCronJobRunning
+ annotations:
+ message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning
+ expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeJobCompletion
+ annotations:
+ message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion
+ expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeJobFailed
+ annotations:
+ message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed
+ expr: kube_job_status_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 1h
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file