aboutsummaryrefslogtreecommitdiffstats
path: root/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus
diff options
context:
space:
mode:
Diffstat (limited to 'kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus')
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertRelabelConfigs.yaml16
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertmanagerConfigs.yaml16
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalPrometheusRules.yaml40
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalScrapeConfigs.yaml16
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrole.yaml36
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrolebinding.yaml18
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingress.yaml53
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingressperreplica.yaml53
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podDisruptionBudget.yaml21
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podmonitors.yaml37
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/prometheus.yaml249
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrole.yaml20
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrolebinding.yaml18
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp.yaml56
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/alertmanager.rules.yaml54
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/etcd.yaml155
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/general.rules.yaml50
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/k8s.rules.yaml121
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml71
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml393
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml31
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml41
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml63
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-state-metrics.yaml51
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubelet.rules.yaml39
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-apps.yaml205
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-resources.yaml103
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-storage.yaml63
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml100
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml37
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml84
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml37
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system.yaml47
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.rules.yaml79
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.yaml202
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-network.yaml34
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node.rules.yaml53
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus-operator.yaml43
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus.yaml202
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/alertmanager.rules.yaml54
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/etcd.yaml155
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/general.rules.yaml50
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/k8s.rules.yaml83
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-apiserver.rules.yaml39
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml41
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml41
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-scheduler.rules.yaml63
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-absent.yaml129
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml161
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml103
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-storage.yaml63
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-system.yaml145
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-network.yaml48
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-time.yaml34
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node.rules.yaml202
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus-operator.yaml43
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml109
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/service.yaml52
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceaccount.yaml16
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitor.yaml42
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitors.yaml34
-rwxr-xr-xkud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceperreplica.yaml46
62 files changed, 4760 insertions, 0 deletions
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertRelabelConfigs.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertRelabelConfigs.yaml
new file mode 100755
index 00000000..33227dc5
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertRelabelConfigs.yaml
@@ -0,0 +1,16 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }}
+apiVersion: v1
+kind: Secret
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-am-relabel-confg
+ namespace: {{ template "prometheus-operator.namespace" . }}
+{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }}
+ annotations:
+{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }}
+{{- end }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus-am-relabel-confg
+{{ include "prometheus-operator.labels" . | indent 4 }}
+data:
+ additional-alert-relabel-configs.yaml: {{ toYaml .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs | b64enc | quote }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertmanagerConfigs.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertmanagerConfigs.yaml
new file mode 100755
index 00000000..61ba8c00
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalAlertmanagerConfigs.yaml
@@ -0,0 +1,16 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }}
+apiVersion: v1
+kind: Secret
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-am-confg
+ namespace: {{ template "prometheus-operator.namespace" . }}
+{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }}
+ annotations:
+{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }}
+{{- end }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus-am-confg
+{{ include "prometheus-operator.labels" . | indent 4 }}
+data:
+ additional-alertmanager-configs.yaml: {{ toYaml .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs | b64enc | quote }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalPrometheusRules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalPrometheusRules.yaml
new file mode 100755
index 00000000..bc631fc8
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalPrometheusRules.yaml
@@ -0,0 +1,40 @@
+{{- if or .Values.additionalPrometheusRules .Values.additionalPrometheusRulesMap}}
+apiVersion: v1
+kind: List
+items:
+{{- if .Values.additionalPrometheusRulesMap }}
+{{- range $prometheusRuleName, $prometheusRule := .Values.additionalPrometheusRulesMap }}
+ - apiVersion: monitoring.coreos.com/v1
+ kind: PrometheusRule
+ metadata:
+ name: {{ template "prometheus-operator.name" $ }}-{{ $prometheusRuleName }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" $ }}
+{{ include "prometheus-operator.labels" $ | indent 8 }}
+ {{- if $prometheusRule.additionalLabels }}
+{{ toYaml $prometheusRule.additionalLabels | indent 8 }}
+ {{- end }}
+ spec:
+ groups:
+{{ toYaml $prometheusRule.groups| indent 8 }}
+{{- end }}
+{{- else }}
+{{- range .Values.additionalPrometheusRules }}
+ - apiVersion: monitoring.coreos.com/v1
+ kind: PrometheusRule
+ metadata:
+ name: {{ template "prometheus-operator.name" $ }}-{{ .name }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" $ }}
+{{ include "prometheus-operator.labels" $ | indent 8 }}
+ {{- if .additionalLabels }}
+{{ toYaml .additionalLabels | indent 8 }}
+ {{- end }}
+ spec:
+ groups:
+{{ toYaml .groups| indent 8 }}
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalScrapeConfigs.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalScrapeConfigs.yaml
new file mode 100755
index 00000000..1158b346
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/additionalScrapeConfigs.yaml
@@ -0,0 +1,16 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalScrapeConfigs }}
+apiVersion: v1
+kind: Secret
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-scrape-confg
+ namespace: {{ template "prometheus-operator.namespace" . }}
+{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }}
+ annotations:
+{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }}
+{{- end }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus-scrape-confg
+{{ include "prometheus-operator.labels" . | indent 4 }}
+data:
+ additional-scrape-configs.yaml: {{ toYaml .Values.prometheus.prometheusSpec.additionalScrapeConfigs | b64enc | quote }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrole.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrole.yaml
new file mode 100755
index 00000000..4dc3dc18
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrole.yaml
@@ -0,0 +1,36 @@
+{{- if and .Values.prometheus.enabled .Values.global.rbac.create }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+rules:
+- apiGroups:
+ - ""
+ resources:
+ - nodes/metrics
+ verbs:
+ - get
+ - list
+ - watch
+# This permission are not in the prometheus-operator repo
+# they're grabbed from https://github.com/prometheus/prometheus/blob/master/documentation/examples/rbac-setup.yml
+- apiGroups: [""]
+ resources:
+ - nodes
+ - nodes/proxy
+ - services
+ - endpoints
+ - pods
+ verbs: ["get", "list", "watch"]
+- apiGroups:
+ - extensions
+ - "networking.k8s.io"
+ resources:
+ - ingresses
+ verbs: ["get", "list", "watch"]
+- nonResourceURLs: ["/metrics"]
+ verbs: ["get"]
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrolebinding.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrolebinding.yaml
new file mode 100755
index 00000000..c3195233
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/clusterrolebinding.yaml
@@ -0,0 +1,18 @@
+{{- if and .Values.prometheus.enabled .Values.global.rbac.create }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+subjects:
+ - kind: ServiceAccount
+ name: {{ template "prometheus-operator.prometheus.serviceAccountName" . }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+{{- end }}
+
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingress.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingress.yaml
new file mode 100755
index 00000000..2a9c4e60
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingress.yaml
@@ -0,0 +1,53 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.ingress.enabled }}
+{{- $serviceName := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
+{{- $servicePort := .Values.prometheus.service.port -}}
+{{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix }}
+{{- $paths := .Values.prometheus.ingress.paths | default $routePrefix -}}
+{{- if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" }}
+apiVersion: networking.k8s.io/v1beta1
+{{ else }}
+apiVersion: extensions/v1beta1
+{{ end -}}
+kind: Ingress
+metadata:
+{{- if .Values.prometheus.ingress.annotations }}
+ annotations:
+{{ toYaml .Values.prometheus.ingress.annotations | indent 4 }}
+{{- end }}
+ name: {{ $serviceName }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.prometheus.ingress.labels }}
+{{ toYaml .Values.prometheus.ingress.labels | indent 4 }}
+{{- end }}
+spec:
+ rules:
+ {{- if .Values.prometheus.ingress.hosts }}
+ {{- range $host := .Values.prometheus.ingress.hosts }}
+ - host: {{ tpl $host $ }}
+ http:
+ paths:
+ {{- range $p := $paths }}
+ - path: {{ tpl $p $ }}
+ backend:
+ serviceName: {{ $serviceName }}
+ servicePort: {{ $servicePort }}
+ {{- end -}}
+ {{- end -}}
+ {{- else }}
+ - http:
+ paths:
+ {{- range $p := $paths }}
+ - path: {{ tpl $p $ }}
+ backend:
+ serviceName: {{ $serviceName }}
+ servicePort: {{ $servicePort }}
+ {{- end -}}
+ {{- end -}}
+ {{- if .Values.prometheus.ingress.tls }}
+ tls:
+{{ toYaml .Values.prometheus.ingress.tls | indent 4 }}
+ {{- end -}}
+{{- end -}}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingressperreplica.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingressperreplica.yaml
new file mode 100755
index 00000000..57283b22
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/ingressperreplica.yaml
@@ -0,0 +1,53 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.servicePerReplica.enabled .Values.prometheus.ingressPerReplica.enabled }}
+{{- $count := .Values.prometheus.prometheusSpec.replicas | int -}}
+{{- $servicePort := .Values.prometheus.servicePerReplica.port -}}
+{{- $ingressValues := .Values.prometheus.ingressPerReplica -}}
+apiVersion: v1
+kind: List
+metadata:
+ name: {{ include "prometheus-operator.fullname" $ }}-prometheus-ingressperreplica
+ namespace: {{ template "prometheus-operator.namespace" . }}
+items:
+{{ range $i, $e := until $count }}
+ - kind: Ingress
+ {{- if $.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" }}
+ apiVersion: networking.k8s.io/v1beta1
+ {{ else }}
+ apiVersion: extensions/v1beta1
+ {{ end -}}
+ metadata:
+ name: {{ include "prometheus-operator.fullname" $ }}-prometheus-{{ $i }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ include "prometheus-operator.name" $ }}-prometheus
+{{ include "prometheus-operator.labels" $ | indent 8 }}
+ {{- if $ingressValues.labels }}
+ {{ toYaml $ingressValues.labels | indent 8 }}
+ {{- end }}
+ {{- if $ingressValues.annotations }}
+ annotations:
+{{ toYaml $ingressValues.annotations | indent 8 }}
+ {{- end }}
+ spec:
+ rules:
+ - host: {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }}
+ http:
+ paths:
+ {{- range $p := $ingressValues.paths }}
+ - path: {{ tpl $p $ }}
+ backend:
+ serviceName: {{ include "prometheus-operator.fullname" $ }}-prometheus-{{ $i }}
+ servicePort: {{ $servicePort }}
+ {{- end -}}
+ {{- if or $ingressValues.tlsSecretName $ingressValues.tlsSecretPerReplica.enabled }}
+ tls:
+ - hosts:
+ - {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }}
+ {{- if $ingressValues.tlsSecretPerReplica.enabled }}
+ secretName: {{ $ingressValues.tlsSecretPerReplica.prefix }}-{{ $i }}
+ {{- else }}
+ secretName: {{ $ingressValues.tlsSecretName }}
+ {{- end }}
+ {{- end }}
+{{- end -}}
+{{- end -}}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podDisruptionBudget.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podDisruptionBudget.yaml
new file mode 100755
index 00000000..2853ac73
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podDisruptionBudget.yaml
@@ -0,0 +1,21 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.podDisruptionBudget.enabled }}
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+spec:
+ {{- if .Values.prometheus.podDisruptionBudget.minAvailable }}
+ minAvailable: {{ .Values.prometheus.podDisruptionBudget.minAvailable }}
+ {{- end }}
+ {{- if .Values.prometheus.podDisruptionBudget.maxUnavailable }}
+ maxUnavailable: {{ .Values.prometheus.podDisruptionBudget.maxUnavailable }}
+ {{- end }}
+ selector:
+ matchLabels:
+ app: prometheus
+ prometheus: {{ template "prometheus-operator.fullname" . }}-prometheus
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podmonitors.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podmonitors.yaml
new file mode 100755
index 00000000..88789557
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/podmonitors.yaml
@@ -0,0 +1,37 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.additionalPodMonitors }}
+apiVersion: v1
+kind: List
+items:
+{{- range .Values.prometheus.additionalPodMonitors }}
+ - apiVersion: monitoring.coreos.com/v1
+ kind: PodMonitor
+ metadata:
+ name: {{ .name }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" $ }}-prometheus
+{{ include "prometheus-operator.labels" $ | indent 8 }}
+ {{- if .additionalLabels }}
+{{ toYaml .additionalLabels | indent 8 }}
+ {{- end }}
+ spec:
+ podMetricsEndpoints:
+{{ toYaml .podMetricsEndpoints | indent 8 }}
+ {{- if .jobLabel }}
+ jobLabel: {{ .jobLabel }}
+ {{- end }}
+ {{- if .namespaceSelector }}
+ namespaceSelector:
+{{ toYaml .namespaceSelector | indent 8 }}
+ {{- end }}
+ selector:
+{{ toYaml .selector | indent 8 }}
+ {{- if .podTargetLabels }}
+ podTargetLabels:
+{{ toYaml .podTargetLabels | indent 8 }}
+ {{- end }}
+ {{- if .sampleLimit }}
+ sampleLimit: {{ .sampleLimit }}
+ {{- end }}
+{{- end }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/prometheus.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/prometheus.yaml
new file mode 100755
index 00000000..894b346d
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/prometheus.yaml
@@ -0,0 +1,249 @@
+{{- if .Values.prometheus.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: Prometheus
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.prometheus.annotations }}
+ annotations:
+{{ toYaml .Values.prometheus.annotations | indent 4 }}
+{{- end }}
+spec:
+ alerting:
+ alertmanagers:
+{{- if .Values.prometheus.prometheusSpec.alertingEndpoints }}
+{{ toYaml .Values.prometheus.prometheusSpec.alertingEndpoints | indent 6 }}
+{{- else if .Values.alertmanager.enabled }}
+ - namespace: {{ template "prometheus-operator.namespace" . }}
+ name: {{ template "prometheus-operator.fullname" . }}-alertmanager
+ port: {{ .Values.alertmanager.alertmanagerSpec.portName }}
+ {{- if .Values.alertmanager.alertmanagerSpec.routePrefix }}
+ pathPrefix: "{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}"
+ {{- end }}
+ apiVersion: {{ .Values.alertmanager.apiVersion }}
+{{- else }}
+ []
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.apiserverConfig }}
+ apiserverConfig:
+{{ toYaml .Values.prometheus.prometheusSpec.apiserverConfig | indent 4}}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.image }}
+ baseImage: {{ .Values.prometheus.prometheusSpec.image.repository }}
+ version: {{ .Values.prometheus.prometheusSpec.image.tag }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.externalLabels }}
+ externalLabels:
+{{ toYaml .Values.prometheus.prometheusSpec.externalLabels | indent 4}}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.prometheusExternalLabelNameClear }}
+ prometheusExternalLabelName: ""
+{{- else if .Values.prometheus.prometheusSpec.prometheusExternalLabelName }}
+ prometheusExternalLabelName: "{{ .Values.prometheus.prometheusSpec.prometheusExternalLabelName }}"
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.replicaExternalLabelNameClear }}
+ replicaExternalLabelName: ""
+{{- else if .Values.prometheus.prometheusSpec.replicaExternalLabelName }}
+ replicaExternalLabelName: "{{ .Values.prometheus.prometheusSpec.replicaExternalLabelName }}"
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.externalUrl }}
+ externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}"
+{{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }}
+ externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}"
+{{- else }}
+ externalUrl: http://{{ template "prometheus-operator.fullname" . }}-prometheus.{{ template "prometheus-operator.namespace" . }}:{{ .Values.prometheus.service.port }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }}
+{{- end }}
+ paused: {{ .Values.prometheus.prometheusSpec.paused }}
+ replicas: {{ .Values.prometheus.prometheusSpec.replicas }}
+ logLevel: {{ .Values.prometheus.prometheusSpec.logLevel }}
+ logFormat: {{ .Values.prometheus.prometheusSpec.logFormat }}
+ listenLocal: {{ .Values.prometheus.prometheusSpec.listenLocal }}
+ enableAdminAPI: {{ .Values.prometheus.prometheusSpec.enableAdminAPI }}
+{{- if .Values.prometheus.prometheusSpec.scrapeInterval }}
+ scrapeInterval: {{ .Values.prometheus.prometheusSpec.scrapeInterval }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.evaluationInterval }}
+ evaluationInterval: {{ .Values.prometheus.prometheusSpec.evaluationInterval }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.resources }}
+ resources:
+{{ toYaml .Values.prometheus.prometheusSpec.resources | indent 4 }}
+{{- end }}
+ retention: {{ .Values.prometheus.prometheusSpec.retention | quote }}
+{{- if .Values.prometheus.prometheusSpec.retentionSize }}
+ retentionSize: {{ .Values.prometheus.prometheusSpec.retentionSize | quote }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.walCompression }}
+ walCompression: {{ .Values.prometheus.prometheusSpec.walCompression }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.routePrefix }}
+ routePrefix: {{ .Values.prometheus.prometheusSpec.routePrefix | quote }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.secrets }}
+ secrets:
+{{ toYaml .Values.prometheus.prometheusSpec.secrets | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.configMaps }}
+ configMaps:
+{{ toYaml .Values.prometheus.prometheusSpec.configMaps | indent 4 }}
+{{- end }}
+ serviceAccountName: {{ template "prometheus-operator.prometheus.serviceAccountName" . }}
+{{- if .Values.prometheus.prometheusSpec.serviceMonitorSelector }}
+ serviceMonitorSelector:
+{{ toYaml .Values.prometheus.prometheusSpec.serviceMonitorSelector | indent 4 }}
+{{ else if .Values.prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues }}
+ serviceMonitorSelector:
+ matchLabels:
+ release: {{ $.Release.Name | quote }}
+{{ else }}
+ serviceMonitorSelector: {}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.serviceMonitorNamespaceSelector }}
+ serviceMonitorNamespaceSelector:
+{{ toYaml .Values.prometheus.prometheusSpec.serviceMonitorNamespaceSelector | indent 4 }}
+{{ else }}
+ serviceMonitorNamespaceSelector: {}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.podMonitorSelector }}
+ podMonitorSelector:
+{{ toYaml .Values.prometheus.prometheusSpec.podMonitorSelector | indent 4 }}
+{{ else if .Values.prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues }}
+ podMonitorSelector:
+ matchLabels:
+ release: {{ $.Release.Name | quote }}
+{{ else }}
+ podMonitorSelector: {}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.podMonitorNamespaceSelector }}
+ podMonitorNamespaceSelector:
+{{ toYaml .Values.prometheus.prometheusSpec.podMonitorNamespaceSelector | indent 4 }}
+{{ else }}
+ podMonitorNamespaceSelector: {}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.remoteRead }}
+ remoteRead:
+{{ toYaml .Values.prometheus.prometheusSpec.remoteRead | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.remoteWrite }}
+ remoteWrite:
+{{ toYaml .Values.prometheus.prometheusSpec.remoteWrite | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.securityContext }}
+ securityContext:
+{{ toYaml .Values.prometheus.prometheusSpec.securityContext | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.ruleNamespaceSelector }}
+ ruleNamespaceSelector:
+{{ toYaml .Values.prometheus.prometheusSpec.ruleNamespaceSelector | indent 4 }}
+{{ else }}
+ ruleNamespaceSelector: {}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.ruleSelector }}
+ ruleSelector:
+{{ toYaml .Values.prometheus.prometheusSpec.ruleSelector | indent 4}}
+{{- else if .Values.prometheus.prometheusSpec.ruleSelectorNilUsesHelmValues }}
+ ruleSelector:
+ matchLabels:
+ app: {{ template "prometheus-operator.name" . }}
+ release: {{ $.Release.Name | quote }}
+{{ else }}
+ ruleSelector: {}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.storageSpec }}
+ storage:
+{{ toYaml .Values.prometheus.prometheusSpec.storageSpec | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.podMetadata }}
+ podMetadata:
+{{ toYaml .Values.prometheus.prometheusSpec.podMetadata | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.query }}
+ query:
+{{ toYaml .Values.prometheus.prometheusSpec.query | indent 4}}
+{{- end }}
+{{- if or .Values.prometheus.prometheusSpec.podAntiAffinity .Values.prometheus.prometheusSpec.affinity }}
+ affinity:
+{{- if .Values.prometheus.prometheusSpec.affinity }}
+{{ toYaml .Values.prometheus.prometheusSpec.affinity | indent 4 }}
+{{- end }}
+{{- if eq .Values.prometheus.prometheusSpec.podAntiAffinity "hard" }}
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }}
+ labelSelector:
+ matchLabels:
+ app: prometheus
+ prometheus: {{ template "prometheus-operator.fullname" . }}-prometheus
+{{- else if eq .Values.prometheus.prometheusSpec.podAntiAffinity "soft" }}
+ podAntiAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ podAffinityTerm:
+ topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }}
+ labelSelector:
+ matchLabels:
+ app: prometheus
+ prometheus: {{ template "prometheus-operator.fullname" . }}-prometheus
+{{- end }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.tolerations }}
+ tolerations:
+{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }}
+{{- end }}
+{{- if .Values.global.imagePullSecrets }}
+ imagePullSecrets:
+{{ toYaml .Values.global.imagePullSecrets | indent 4 }}
+{{- end }}
+{{- if or .Values.prometheus.prometheusSpec.additionalScrapeConfigs .Values.prometheus.prometheusSpec.additionalScrapeConfigsExternal }}
+ additionalScrapeConfigs:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-scrape-confg
+ key: additional-scrape-configs.yaml
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }}
+ additionalAlertManagerConfigs:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-am-confg
+ key: additional-alertmanager-configs.yaml
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }}
+ additionalAlertRelabelConfigs:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-am-relabel-confg
+ key: additional-alert-relabel-configs.yaml
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.containers }}
+ containers:
+{{ toYaml .Values.prometheus.prometheusSpec.containers | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.initContainers }}
+ initContainers:
+{{ toYaml .Values.prometheus.prometheusSpec.initContainers | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.priorityClassName }}
+ priorityClassName: {{ .Values.prometheus.prometheusSpec.priorityClassName }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.thanos }}
+ thanos:
+{{ toYaml .Values.prometheus.prometheusSpec.thanos | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.disableCompaction }}
+ disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
+{{- end }}
+ portName: {{ .Values.prometheus.prometheusSpec.portName }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.enforcedNamespaceLabel }}
+ enforcedNamespaceLabel: {{ .Values.prometheus.prometheusSpec.enforcedNamespaceLabel }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.volumes }}
+ volumes:
+{{ toYaml .Values.prometheus.prometheusSpec.volumes | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.prometheusSpec.volumeMounts }}
+ volumeMounts:
+{{ toYaml .Values.prometheus.prometheusSpec.volumeMounts | indent 4 }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrole.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrole.yaml
new file mode 100755
index 00000000..d5523d66
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrole.yaml
@@ -0,0 +1,20 @@
+{{- if and .Values.prometheus.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled }}
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-psp
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+rules:
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if semverCompare "> 1.15.0-0" $kubeTargetVersion }}
+- apiGroups: ['policy']
+{{- else }}
+- apiGroups: ['extensions']
+{{- end }}
+ resources: ['podsecuritypolicies']
+ verbs: ['use']
+ resourceNames:
+ - {{ template "prometheus-operator.fullname" . }}-prometheus
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrolebinding.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrolebinding.yaml
new file mode 100755
index 00000000..cf26f49d
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp-clusterrolebinding.yaml
@@ -0,0 +1,18 @@
+{{- if and .Values.prometheus.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-psp
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus-psp
+subjects:
+ - kind: ServiceAccount
+ name: {{ template "prometheus-operator.prometheus.serviceAccountName" . }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+{{- end }}
+
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp.yaml
new file mode 100755
index 00000000..6a238f88
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/psp.yaml
@@ -0,0 +1,56 @@
+{{- if and .Values.prometheus.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled }}
+apiVersion: policy/v1beta1
+kind: PodSecurityPolicy
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{- if .Values.global.rbac.pspAnnotations }}
+ annotations:
+{{ toYaml .Values.global.rbac.pspAnnotations | indent 4 }}
+{{- end }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+spec:
+ privileged: false
+ # Required to prevent escalations to root.
+ # allowPrivilegeEscalation: false
+ # This is redundant with non-root + disallow privilege escalation,
+ # but we can provide it for defense in depth.
+ #requiredDropCapabilities:
+ # - ALL
+ # Allow core volume types.
+ volumes:
+ - 'configMap'
+ - 'emptyDir'
+ - 'projected'
+ - 'secret'
+ - 'downwardAPI'
+ - 'persistentVolumeClaim'
+ hostNetwork: false
+ hostIPC: false
+ hostPID: false
+ runAsUser:
+ # Permits the container to run with root privileges as well.
+ rule: 'RunAsAny'
+ seLinux:
+ # This policy assumes the nodes are using AppArmor rather than SELinux.
+ rule: 'RunAsAny'
+ supplementalGroups:
+ rule: 'MustRunAs'
+ ranges:
+ # Forbid adding the root group.
+ - min: 0
+ max: 65535
+ fsGroup:
+ rule: 'MustRunAs'
+ ranges:
+ # Forbid adding the root group.
+ - min: 0
+ max: 65535
+ readOnlyRootFilesystem: false
+{{- if .Values.prometheus.podSecurityPolicy.allowedCapabilities }}
+ allowedCapabilities:
+{{ toYaml .Values.prometheus.podSecurityPolicy.allowedCapabilities | indent 4 }}
+{{- end }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/alertmanager.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/alertmanager.rules.yaml
new file mode 100755
index 00000000..1c6db409
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/alertmanager.rules.yaml
@@ -0,0 +1,54 @@
+{{- /*
+Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
+{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
+{{- $alertmanagerJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager" }}
+{{- $namespace := printf "%s" (include "prometheus-operator.namespace" .) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: alertmanager.rules
+ rules:
+ - alert: AlertmanagerConfigInconsistent
+ annotations:
+ message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync.
+ expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1
+ for: 5m
+ labels:
+ severity: critical
+ - alert: AlertmanagerFailedReload
+ annotations:
+ message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
+ expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: AlertmanagerMembersInconsistent
+ annotations:
+ message: Alertmanager has not found all other members of the cluster.
+ expr: |-
+ alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
+ != on (service) GROUP_LEFT()
+ count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
+ for: 5m
+ labels:
+ severity: critical
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/etcd.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/etcd.yaml
new file mode 100755
index 00000000..97a9825d
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/etcd.yaml
@@ -0,0 +1,155 @@
+{{- /*
+Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/etcd/master/Documentation/op-guide/etcd3_alert.rules.yml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: etcd
+ rules:
+ - alert: etcdMembersDown
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).'
+ expr: |-
+ max by (job) (
+ sum by (job) (up{job=~".*etcd.*"} == bool 0)
+ or
+ count by (job,endpoint) (
+ sum by (job,endpoint,To) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[3m])) > 0.01
+ )
+ )
+ > 0
+ for: 3m
+ labels:
+ severity: critical
+ - alert: etcdInsufficientMembers
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
+ expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2)
+ for: 3m
+ labels:
+ severity: critical
+ - alert: etcdNoLeader
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.'
+ expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ - alert: etcdHighNumberOfLeaderChanges
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
+ expr: increase((max by (job) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 3
+ for: 5m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedGRPCRequests
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ /
+ sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ > 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedGRPCRequests
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ /
+ sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ > 5
+ for: 5m
+ labels:
+ severity: critical
+ - alert: etcdGRPCRequestsSlow
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
+ > 0.15
+ for: 10m
+ labels:
+ severity: critical
+ - alert: etcdMemberCommunicationSlow
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
+ > 0.15
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedProposals
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
+ for: 15m
+ labels:
+ severity: warning
+ - alert: etcdHighFsyncDurations
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
+ > 0.5
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighCommitDurations
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
+ > 0.25
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedHTTPRequests
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}'
+ expr: |-
+ sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
+ BY (method) > 0.01
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedHTTPRequests
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
+ BY (method) > 0.05
+ for: 10m
+ labels:
+ severity: critical
+ - alert: etcdHTTPRequestsSlow
+ annotations:
+ message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow.
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
+ > 0.15
+ for: 10m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/general.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/general.rules.yaml
new file mode 100755
index 00000000..4ccd9441
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/general.rules.yaml
@@ -0,0 +1,50 @@
+{{- /*
+Generated from 'general.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: general.rules
+ rules:
+ - alert: TargetDown
+ annotations:
+ message: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.'
+ expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
+ for: 10m
+ labels:
+ severity: warning
+ - alert: Watchdog
+ annotations:
+ message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
+
+ This alert is always firing, therefore it should always be firing in Alertmanager
+
+ and always fire against a receiver. There are integrations with various notification
+
+ mechanisms that send a notification when this alert is not firing. For example the
+
+ "DeadMansSnitch" integration in PagerDuty.
+
+ '
+ expr: vector(1)
+ labels:
+ severity: none
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/k8s.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/k8s.rules.yaml
new file mode 100755
index 00000000..4bc2cc7d
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/k8s.rules.yaml
@@ -0,0 +1,121 @@
+{{- /*
+Generated from 'k8s.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: k8s.rules
+ rules:
+ - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace)
+ record: namespace:container_cpu_usage_seconds_total:sum_rate
+ - expr: |-
+ sum by (cluster, namespace, pod, container) (
+ rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
+ ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
+ 1, max by(cluster, namespace, pod, node) (kube_pod_info)
+ )
+ record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
+ - expr: |-
+ container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
+ * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
+ max by(namespace, pod, node) (kube_pod_info)
+ )
+ record: node_namespace_pod_container:container_memory_working_set_bytes
+ - expr: |-
+ container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
+ * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
+ max by(namespace, pod, node) (kube_pod_info)
+ )
+ record: node_namespace_pod_container:container_memory_rss
+ - expr: |-
+ container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
+ * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
+ max by(namespace, pod, node) (kube_pod_info)
+ )
+ record: node_namespace_pod_container:container_memory_cache
+ - expr: |-
+ container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
+ * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
+ max by(namespace, pod, node) (kube_pod_info)
+ )
+ record: node_namespace_pod_container:container_memory_swap
+ - expr: sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
+ record: namespace:container_memory_usage_bytes:sum
+ - expr: |-
+ sum by (namespace) (
+ sum by (namespace, pod) (
+ max by (namespace, pod, container) (
+ kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}
+ ) * on(namespace, pod) group_left() max by (namespace, pod) (
+ kube_pod_status_phase{phase=~"Pending|Running"} == 1
+ )
+ )
+ )
+ record: namespace:kube_pod_container_resource_requests_memory_bytes:sum
+ - expr: |-
+ sum by (namespace) (
+ sum by (namespace, pod) (
+ max by (namespace, pod, container) (
+ kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"}
+ ) * on(namespace, pod) group_left() max by (namespace, pod) (
+ kube_pod_status_phase{phase=~"Pending|Running"} == 1
+ )
+ )
+ )
+ record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
+ - expr: |-
+ max by (cluster, namespace, workload, pod) (
+ label_replace(
+ label_replace(
+ kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
+ "replicaset", "$1", "owner_name", "(.*)"
+ ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
+ 1, max by (replicaset, namespace, owner_name) (
+ kube_replicaset_owner{job="kube-state-metrics"}
+ )
+ ),
+ "workload", "$1", "owner_name", "(.*)"
+ )
+ )
+ labels:
+ workload_type: deployment
+ record: mixin_pod_workload
+ - expr: |-
+ max by (cluster, namespace, workload, pod) (
+ label_replace(
+ kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
+ "workload", "$1", "owner_name", "(.*)"
+ )
+ )
+ labels:
+ workload_type: daemonset
+ record: mixin_pod_workload
+ - expr: |-
+ max by (cluster, namespace, workload, pod) (
+ label_replace(
+ kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
+ "workload", "$1", "owner_name", "(.*)"
+ )
+ )
+ labels:
+ workload_type: statefulset
+ record: mixin_pod_workload
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml
new file mode 100755
index 00000000..010d3446
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml
@@ -0,0 +1,71 @@
+{{- /*
+Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverSlos }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-apiserver-slos" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-apiserver-slos
+ rules:
+ - alert: KubeAPIErrorBudgetBurn
+ annotations:
+ message: The API server is burning too much error budget
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorbudgetburn
+ expr: |-
+ sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
+ and
+ sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
+ for: 2m
+ labels:
+ severity: critical
+ - alert: KubeAPIErrorBudgetBurn
+ annotations:
+ message: The API server is burning too much error budget
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorbudgetburn
+ expr: |-
+ sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
+ and
+ sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeAPIErrorBudgetBurn
+ annotations:
+ message: The API server is burning too much error budget
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorbudgetburn
+ expr: |-
+ sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
+ and
+ sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeAPIErrorBudgetBurn
+ annotations:
+ message: The API server is burning too much error budget
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorbudgetburn
+ expr: |-
+ sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
+ and
+ sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
+ for: 3h
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml
new file mode 100755
index 00000000..1b00134e
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml
@@ -0,0 +1,393 @@
+{{- /*
+Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserver }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-apiserver.rules
+ rules:
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
+ -
+ (
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1d])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
+ )
+ )
+ +
+ # errors
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
+ labels:
+ verb: read
+ record: apiserver_request:burnrate1d
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
+ -
+ (
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1h])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
+ )
+ )
+ +
+ # errors
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
+ labels:
+ verb: read
+ record: apiserver_request:burnrate1h
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
+ -
+ (
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[2h])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
+ )
+ )
+ +
+ # errors
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
+ labels:
+ verb: read
+ record: apiserver_request:burnrate2h
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
+ -
+ (
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30m])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
+ )
+ )
+ +
+ # errors
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
+ labels:
+ verb: read
+ record: apiserver_request:burnrate30m
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
+ -
+ (
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[3d])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
+ )
+ )
+ +
+ # errors
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
+ labels:
+ verb: read
+ record: apiserver_request:burnrate3d
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
+ -
+ (
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[5m])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
+ )
+ )
+ +
+ # errors
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
+ labels:
+ verb: read
+ record: apiserver_request:burnrate5m
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
+ -
+ (
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[6h])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
+ )
+ )
+ +
+ # errors
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
+ labels:
+ verb: read
+ record: apiserver_request:burnrate6h
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
+ -
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
+ )
+ +
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
+ labels:
+ verb: write
+ record: apiserver_request:burnrate1d
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
+ -
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
+ )
+ +
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
+ labels:
+ verb: write
+ record: apiserver_request:burnrate1h
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
+ -
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
+ )
+ +
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
+ labels:
+ verb: write
+ record: apiserver_request:burnrate2h
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
+ -
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
+ )
+ +
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
+ labels:
+ verb: write
+ record: apiserver_request:burnrate30m
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
+ -
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
+ )
+ +
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
+ labels:
+ verb: write
+ record: apiserver_request:burnrate3d
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
+ -
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
+ )
+ +
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
+ labels:
+ verb: write
+ record: apiserver_request:burnrate5m
+ - expr: |-
+ (
+ (
+ # too slow
+ sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
+ -
+ sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
+ )
+ +
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
+ )
+ /
+ sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
+ labels:
+ verb: write
+ record: apiserver_request:burnrate6h
+ - expr: |-
+ 1 - (
+ (
+ # write too slow
+ sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
+ -
+ sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
+ ) +
+ (
+ # read too slow
+ sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
+ -
+ (
+ sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) +
+ sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
+ sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
+ )
+ ) +
+ # errors
+ sum(code:apiserver_request_total:increase30d{code=~"5.."})
+ )
+ /
+ sum(code:apiserver_request_total:increase30d)
+ labels:
+ verb: all
+ record: apiserver_request:availability30d
+ - expr: |-
+ 1 - (
+ sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
+ -
+ (
+ # too slow
+ sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) +
+ sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
+ sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
+ )
+ +
+ # errors
+ sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."})
+ )
+ /
+ sum(code:apiserver_request_total:increase30d{verb="read"})
+ labels:
+ verb: read
+ record: apiserver_request:availability30d
+ - expr: |-
+ 1 - (
+ (
+ # too slow
+ sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
+ -
+ sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
+ )
+ +
+ # errors
+ sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."})
+ )
+ /
+ sum(code:apiserver_request_total:increase30d{verb="write"})
+ labels:
+ verb: write
+ record: apiserver_request:availability30d
+ - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver"}[30d]))
+ record: code_verb:apiserver_request_total:increase30d
+ - expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
+ labels:
+ verb: read
+ record: code:apiserver_request_total:increase30d
+ - expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
+ labels:
+ verb: write
+ record: code:apiserver_request_total:increase30d
+ - expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
+ labels:
+ verb: read
+ record: code_resource:apiserver_request_total:rate5m
+ - expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
+ labels:
+ verb: write
+ record: code_resource:apiserver_request_total:rate5m
+ - expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
+ labels:
+ quantile: '0.99'
+ verb: read
+ record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
+ labels:
+ quantile: '0.99'
+ verb: write
+ record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
+ - expr: |-
+ sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
+ /
+ sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
+ record: cluster:apiserver_request_duration_seconds:mean5m
+ - expr: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml
new file mode 100755
index 00000000..0b963276
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml
@@ -0,0 +1,31 @@
+{{- /*
+Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-prometheus-general.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-prometheus-general.rules
+ rules:
+ - expr: count without(instance, pod, node) (up == 1)
+ record: count:up1
+ - expr: count without(instance, pod, node) (up == 0)
+ record: count:up0
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml
new file mode 100755
index 00000000..1ff4cb07
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml
@@ -0,0 +1,41 @@
+{{- /*
+Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-prometheus-node-recording.rules
+ rules:
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
+ record: instance:node_cpu:rate:sum
+ - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
+ record: instance:node_filesystem_usage:sum
+ - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
+ record: instance:node_network_receive_bytes:rate:sum
+ - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
+ record: instance:node_network_transmit_bytes:rate:sum
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
+ record: instance:node_cpu:ratio
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
+ record: cluster:node_cpu:sum_rate5m
+ - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
+ record: cluster:node_cpu:ratio
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
new file mode 100755
index 00000000..ec718cef
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
@@ -0,0 +1,63 @@
+{{- /*
+Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-scheduler.rules
+ rules:
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-state-metrics.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-state-metrics.yaml
new file mode 100755
index 00000000..6f281bcb
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kube-state-metrics.yaml
@@ -0,0 +1,51 @@
+{{- /*
+Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-state-metrics" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-state-metrics
+ rules:
+ - alert: KubeStateMetricsListErrors
+ annotations:
+ message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricslisterrors
+ expr: |-
+ (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
+ /
+ sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
+ > 0.01
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStateMetricsWatchErrors
+ annotations:
+ message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricswatcherrors
+ expr: |-
+ (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
+ /
+ sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
+ > 0.01
+ for: 15m
+ labels:
+ severity: critical
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubelet.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubelet.rules.yaml
new file mode 100755
index 00000000..9d9fa950
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubelet.rules.yaml
@@ -0,0 +1,39 @@
+{{- /*
+Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubelet.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubelet.rules
+ rules:
+ - expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
+ labels:
+ quantile: '0.99'
+ record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
+ labels:
+ quantile: '0.9'
+ record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
+ labels:
+ quantile: '0.5'
+ record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-apps.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-apps.yaml
new file mode 100755
index 00000000..3ae09119
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-apps.yaml
@@ -0,0 +1,205 @@
+{{- /*
+Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeStateMetrics.enabled .Values.defaultRules.rules.kubernetesApps }}
+{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-apps
+ rules:
+ - alert: KubePodCrashLooping
+ annotations:
+ message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
+ expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubePodNotReady
+ annotations:
+ message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready
+ expr: sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDeploymentGenerationMismatch
+ annotations:
+ message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch
+ expr: |-
+ kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDeploymentReplicasMismatch
+ annotations:
+ message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch
+ expr: |-
+ (
+ kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ ) and (
+ changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m])
+ ==
+ 0
+ )
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetReplicasMismatch
+ annotations:
+ message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch
+ expr: |-
+ (
+ kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ ) and (
+ changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m])
+ ==
+ 0
+ )
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetGenerationMismatch
+ annotations:
+ message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch
+ expr: |-
+ kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetUpdateNotRolledOut
+ annotations:
+ message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout
+ expr: |-
+ max without (revision) (
+ kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ unless
+ kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ )
+ *
+ (
+ kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ )
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDaemonSetRolloutStuck
+ annotations:
+ message: Only {{`{{`}} $value | humanizePercentage {{`}}`}} of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck
+ expr: |-
+ kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ /
+ kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} < 1.00
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeContainerWaiting
+ annotations:
+ message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontainerwaiting
+ expr: sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) > 0
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeDaemonSetNotScheduled
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled
+ expr: |-
+ kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ -
+ kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeDaemonSetMisScheduled
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled
+ expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeCronJobRunning
+ annotations:
+ message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning
+ expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeJobCompletion
+ annotations:
+ message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion
+ expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeJobFailed
+ annotations:
+ message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed
+ expr: kube_job_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeHpaReplicasMismatch
+ annotations:
+ message: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.hpa {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpareplicasmismatch
+ expr: |-
+ (kube_hpa_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
+ and
+ changes(kube_hpa_status_current_replicas[15m]) == 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeHpaMaxedOut
+ annotations:
+ message: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.hpa {{`}}`}} has been running at max replicas for longer than 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpamaxedout
+ expr: |-
+ kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ ==
+ kube_hpa_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-resources.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-resources.yaml
new file mode 100755
index 00000000..0247f5eb
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-resources.yaml
@@ -0,0 +1,103 @@
+{{- /*
+Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-resources
+ rules:
+ - alert: KubeCPUOvercommit
+ annotations:
+ message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
+ expr: |-
+ sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
+ /
+ sum(kube_node_status_allocatable_cpu_cores)
+ >
+ (count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores)
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeMemoryOvercommit
+ annotations:
+ message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememoryovercommit
+ expr: |-
+ sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
+ /
+ sum(kube_node_status_allocatable_memory_bytes)
+ >
+ (count(kube_node_status_allocatable_memory_bytes)-1)
+ /
+ count(kube_node_status_allocatable_memory_bytes)
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeCPUQuotaOvercommit
+ annotations:
+ message: Cluster has overcommitted CPU resource requests for Namespaces.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuquotaovercommit
+ expr: |-
+ sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
+ /
+ sum(kube_node_status_allocatable_cpu_cores)
+ > 1.5
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeMemoryQuotaOvercommit
+ annotations:
+ message: Cluster has overcommitted memory resource requests for Namespaces.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememoryquotaovercommit
+ expr: |-
+ sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
+ /
+ sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"})
+ > 1.5
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeQuotaExceeded
+ annotations:
+ message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded
+ expr: |-
+ kube_resourcequota{job="kube-state-metrics", type="used"}
+ / ignoring(instance, job, type)
+ (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
+ > 0.90
+ for: 15m
+ labels:
+ severity: warning
+ - alert: CPUThrottlingHigh
+ annotations:
+ message: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh
+ expr: |-
+ sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
+ /
+ sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)
+ > ( 25 / 100 )
+ for: 15m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-storage.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-storage.yaml
new file mode 100755
index 00000000..f2573966
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-storage.yaml
@@ -0,0 +1,63 @@
+{{- /*
+Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }}
+{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-storage
+ rules:
+ - alert: KubePersistentVolumeFillingUp
+ annotations:
+ message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup
+ expr: |-
+ kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ /
+ kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ < 0.03
+ for: 1m
+ labels:
+ severity: critical
+ - alert: KubePersistentVolumeFillingUp
+ annotations:
+ message: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup
+ expr: |-
+ (
+ kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ /
+ kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ ) < 0.15
+ and
+ predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubePersistentVolumeErrors
+ annotations:
+ message: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeerrors
+ expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
+ for: 5m
+ labels:
+ severity: critical
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml
new file mode 100755
index 00000000..7583a599
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml
@@ -0,0 +1,100 @@
+{{- /*
+Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-system-apiserver" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-system-apiserver
+ rules:
+ - alert: KubeAPILatencyHigh
+ annotations:
+ message: The API server has an abnormal latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
+ expr: |-
+ (
+ cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
+ >
+ on (verb) group_left()
+ (
+ avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
+ +
+ 2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
+ )
+ ) > on (verb) group_left()
+ 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
+ and on (verb,resource)
+ cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
+ >
+ 1
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeAPIErrorsHigh
+ annotations:
+ message: API server is returning errors for {{`{{`}} $value | humanizePercentage {{`}}`}} of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
+ expr: |-
+ sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
+ /
+ sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeClientCertificateExpiration
+ annotations:
+ message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
+ labels:
+ severity: warning
+ - alert: KubeClientCertificateExpiration
+ annotations:
+ message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
+ labels:
+ severity: critical
+ - alert: AggregatedAPIErrors
+ annotations:
+ message: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-aggregatedapierrors
+ expr: sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
+ labels:
+ severity: warning
+ - alert: AggregatedAPIDown
+ annotations:
+ message: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} is down. It has not been available at least for the past five minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-aggregatedapidown
+ expr: sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0
+ for: 5m
+ labels:
+ severity: warning
+{{- if .Values.kubeApiServer.enabled }}
+ - alert: KubeAPIDown
+ annotations:
+ message: KubeAPI has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown
+ expr: absent(up{job="apiserver"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
new file mode 100755
index 00000000..6214d775
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
@@ -0,0 +1,37 @@
+{{- /*
+Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeControllerManager.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-system-controller-manager" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-system-controller-manager
+ rules:
+{{- if .Values.kubeControllerManager.enabled }}
+ - alert: KubeControllerManagerDown
+ annotations:
+ message: KubeControllerManager has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown
+ expr: absent(up{job="kube-controller-manager"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
new file mode 100755
index 00000000..bb7a2838
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
@@ -0,0 +1,84 @@
+{{- /*
+Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-system-kubelet" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-system-kubelet
+ rules:
+ - alert: KubeNodeNotReady
+ annotations:
+ message: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodenotready
+ expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeNodeUnreachable
+ annotations:
+ message: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodeunreachable
+ expr: (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key="ToBeDeletedByClusterAutoscaler"}) == 1
+ labels:
+ severity: warning
+ - alert: KubeletTooManyPods
+ annotations:
+ message: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods
+ expr: max(max(kubelet_running_pod_count{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"} != 1) by(node) > 0.95
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeNodeReadinessFlapping
+ annotations:
+ message: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodereadinessflapping
+ expr: sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeletPlegDurationHigh
+ annotations:
+ message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletplegdurationhigh
+ expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeletPodStartUpLatencyHigh
+ annotations:
+ message: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletpodstartuplatencyhigh
+ expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
+ for: 15m
+ labels:
+ severity: warning
+{{- if .Values.prometheusOperator.kubeletService.enabled }}
+ - alert: KubeletDown
+ annotations:
+ message: Kubelet has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown
+ expr: absent(up{job="kubelet", metrics_path="/metrics"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
new file mode 100755
index 00000000..84e3b1ab
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
@@ -0,0 +1,37 @@
+{{- /*
+Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-system-scheduler" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-system-scheduler
+ rules:
+{{- if .Values.kubeScheduler.enabled }}
+ - alert: KubeSchedulerDown
+ annotations:
+ message: KubeScheduler has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown
+ expr: absent(up{job="kube-scheduler"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system.yaml
new file mode 100755
index 00000000..fc455f37
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/kubernetes-system.yaml
@@ -0,0 +1,47 @@
+{{- /*
+Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-system
+ rules:
+ - alert: KubeVersionMismatch
+ annotations:
+ message: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeversionmismatch
+ expr: count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeClientErrors
+ annotations:
+ message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
+ expr: |-
+ (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
+ /
+ sum(rate(rest_client_requests_total[5m])) by (instance, job))
+ > 0.01
+ for: 15m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.rules.yaml
new file mode 100755
index 00000000..7adf85db
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.rules.yaml
@@ -0,0 +1,79 @@
+{{- /*
+Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.nodeExporter.enabled .Values.defaultRules.rules.node }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node-exporter.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: node-exporter.rules
+ rules:
+ - expr: |-
+ count without (cpu) (
+ count without (mode) (
+ node_cpu_seconds_total{job="node-exporter"}
+ )
+ )
+ record: instance:node_num_cpu:sum
+ - expr: |-
+ 1 - avg without (cpu, mode) (
+ rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
+ )
+ record: instance:node_cpu_utilisation:rate1m
+ - expr: |-
+ (
+ node_load1{job="node-exporter"}
+ /
+ instance:node_num_cpu:sum{job="node-exporter"}
+ )
+ record: instance:node_load1_per_cpu:ratio
+ - expr: |-
+ 1 - (
+ node_memory_MemAvailable_bytes{job="node-exporter"}
+ /
+ node_memory_MemTotal_bytes{job="node-exporter"}
+ )
+ record: instance:node_memory_utilisation:ratio
+ - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
+ record: instance:node_vmstat_pgmajfault:rate1m
+ - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
+ record: instance_device:node_disk_io_time_seconds:rate1m
+ - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
+ record: instance_device:node_disk_io_time_weighted_seconds:rate1m
+ - expr: |-
+ sum without (device) (
+ rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
+ )
+ record: instance:node_network_receive_bytes_excluding_lo:rate1m
+ - expr: |-
+ sum without (device) (
+ rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
+ )
+ record: instance:node_network_transmit_bytes_excluding_lo:rate1m
+ - expr: |-
+ sum without (device) (
+ rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
+ )
+ record: instance:node_network_receive_drop_excluding_lo:rate1m
+ - expr: |-
+ sum without (device) (
+ rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
+ )
+ record: instance:node_network_transmit_drop_excluding_lo:rate1m
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.yaml
new file mode 100755
index 00000000..7b6c601b
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-exporter.yaml
@@ -0,0 +1,202 @@
+{{- /*
+Generated from 'node-exporter' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.nodeExporter.enabled .Values.defaultRules.rules.node }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node-exporter" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: node-exporter
+ rules:
+ - alert: NodeFilesystemSpaceFillingUp
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
+ summary: Filesystem is predicted to run out of space within the next 24 hours.
+ expr: |-
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
+ and
+ predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemSpaceFillingUp
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
+ summary: Filesystem is predicted to run out of space within the next 4 hours.
+ expr: |-
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
+ and
+ predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeFilesystemAlmostOutOfSpace
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
+ summary: Filesystem has less than 5% space left.
+ expr: |-
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemAlmostOutOfSpace
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
+ summary: Filesystem has less than 3% space left.
+ expr: |-
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeFilesystemFilesFillingUp
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
+ summary: Filesystem is predicted to run out of inodes within the next 24 hours.
+ expr: |-
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
+ and
+ predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemFilesFillingUp
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
+ summary: Filesystem is predicted to run out of inodes within the next 4 hours.
+ expr: |-
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
+ and
+ predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeFilesystemAlmostOutOfFiles
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
+ summary: Filesystem has less than 5% inodes left.
+ expr: |-
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemAlmostOutOfFiles
+ annotations:
+ description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
+ summary: Filesystem has less than 3% inodes left.
+ expr: |-
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeNetworkReceiveErrs
+ annotations:
+ description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkreceiveerrs
+ summary: Network interface is reporting many receive errors.
+ expr: increase(node_network_receive_errs_total[2m]) > 10
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeNetworkTransmitErrs
+ annotations:
+ description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworktransmiterrs
+ summary: Network interface is reporting many transmit errors.
+ expr: increase(node_network_transmit_errs_total[2m]) > 10
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeHighNumberConntrackEntriesUsed
+ annotations:
+ description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodehighnumberconntrackentriesused
+ summary: Number of conntrack are getting close to the limit
+ expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
+ labels:
+ severity: warning
+ - alert: NodeClockSkewDetected
+ annotations:
+ message: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclockskewdetected
+ summary: Clock skew detected.
+ expr: |-
+ (
+ node_timex_offset_seconds > 0.05
+ and
+ deriv(node_timex_offset_seconds[5m]) >= 0
+ )
+ or
+ (
+ node_timex_offset_seconds < -0.05
+ and
+ deriv(node_timex_offset_seconds[5m]) <= 0
+ )
+ for: 10m
+ labels:
+ severity: warning
+ - alert: NodeClockNotSynchronising
+ annotations:
+ message: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclocknotsynchronising
+ summary: Clock not synchronising.
+ expr: min_over_time(node_timex_sync_status[5m]) == 0
+ for: 10m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-network.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-network.yaml
new file mode 100755
index 00000000..b4b206d8
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node-network.yaml
@@ -0,0 +1,34 @@
+{{- /*
+Generated from 'node-network' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: node-network
+ rules:
+ - alert: NodeNetworkInterfaceFlapping
+ annotations:
+ message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
+ expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
+ for: 2m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node.rules.yaml
new file mode 100755
index 00000000..75ab27ed
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/node.rules.yaml
@@ -0,0 +1,53 @@
+{{- /*
+Generated from 'node.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.nodeExporter.enabled .Values.defaultRules.rules.node }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: node.rules
+ rules:
+ - expr: sum(min(kube_pod_info) by (cluster, node))
+ record: ':kube_pod_info_node_count:'
+ - expr: |-
+ topk by(namespace, pod) (1,
+ max by (node, namespace, pod) (
+ label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")
+ ))
+ record: 'node_namespace_pod:kube_pod_info:'
+ - expr: |-
+ count by (cluster, node) (sum by (node, cpu) (
+ node_cpu_seconds_total{job="node-exporter"}
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ ))
+ record: node:node_num_cpu:sum
+ - expr: |-
+ sum(
+ node_memory_MemAvailable_bytes{job="node-exporter"} or
+ (
+ node_memory_Buffers_bytes{job="node-exporter"} +
+ node_memory_Cached_bytes{job="node-exporter"} +
+ node_memory_MemFree_bytes{job="node-exporter"} +
+ node_memory_Slab_bytes{job="node-exporter"}
+ )
+ ) by (cluster)
+ record: :node_memory_MemAvailable_bytes:sum
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus-operator.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus-operator.yaml
new file mode 100755
index 00000000..98f2d3bc
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus-operator.yaml
@@ -0,0 +1,43 @@
+{{- /*
+Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }}
+{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
+{{- $namespace := printf "%s" (include "prometheus-operator.namespace" .) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: prometheus-operator
+ rules:
+ - alert: PrometheusOperatorReconcileErrors
+ annotations:
+ message: Errors while reconciling {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
+ expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusOperatorNodeLookupErrors
+ annotations:
+ message: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
+ expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
+ for: 10m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus.yaml
new file mode 100755
index 00000000..71883f7e
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules-1.14/prometheus.yaml
@@ -0,0 +1,202 @@
+{{- /*
+Generated from 'prometheus' group from https://raw.githubusercontent.com/coreos/kube-prometheus/master/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
+{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
+{{- $namespace := printf "%s" (include "prometheus-operator.namespace" .) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: prometheus
+ rules:
+ - alert: PrometheusBadConfig
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration.
+ summary: Failed Prometheus configuration reload.
+ expr: |-
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) == 0
+ for: 10m
+ labels:
+ severity: critical
+ - alert: PrometheusNotificationQueueRunningFull
+ annotations:
+ description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full.
+ summary: Prometheus alert notification queue predicted to run full in less than 30m.
+ expr: |-
+ # Without min_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30)
+ >
+ min_over_time(prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
+ annotations:
+ description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.'
+ summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+ expr: |-
+ (
+ rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ /
+ rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ )
+ * 100
+ > 1
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+ annotations:
+ description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.'
+ summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+ expr: |-
+ min without(alertmanager) (
+ rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ /
+ rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ )
+ * 100
+ > 3
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusNotConnectedToAlertmanagers
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers.
+ summary: Prometheus is not connected to any Alertmanagers.
+ expr: |-
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) < 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBReloadsFailing
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h.
+ summary: Prometheus has issues reloading blocks from disk.
+ expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBCompactionsFailing
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h.
+ summary: Prometheus has issues compacting blocks.
+ expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusNotIngestingSamples
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples.
+ summary: Prometheus is not ingesting samples.
+ expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusDuplicateTimestamps
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp.
+ summary: Prometheus is dropping samples with duplicate timestamps.
+ expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusOutOfOrderTimestamps
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order.
+ summary: Prometheus drops samples with out-of-order timestamps.
+ expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusRemoteStorageFailures
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}
+ summary: Prometheus fails to send samples to remote storage.
+ expr: |-
+ (
+ rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ /
+ (
+ rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ +
+ rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ )
+ )
+ * 100
+ > 1
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusRemoteWriteBehind
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}.
+ summary: Prometheus remote write is behind.
+ expr: |-
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ - on(job, instance) group_right
+ max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ )
+ > 120
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusRemoteWriteDesiredShards
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}.
+ summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
+ expr: |-
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ max_over_time(prometheus_remote_storage_shards_desired{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ >
+ max_over_time(prometheus_remote_storage_shards_max{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusRuleFailures
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m.
+ summary: Prometheus is failing rule evaluations.
+ expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusMissingRuleEvaluations
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m.
+ summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
+ expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/alertmanager.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/alertmanager.rules.yaml
new file mode 100755
index 00000000..54440239
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/alertmanager.rules.yaml
@@ -0,0 +1,54 @@
+{{- /*
+Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
+{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
+{{- $alertmanagerJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager" }}
+{{- $namespace := printf "%s" (include "prometheus-operator.namespace" .) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: alertmanager.rules
+ rules:
+ - alert: AlertmanagerConfigInconsistent
+ annotations:
+ message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync.
+ expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1
+ for: 5m
+ labels:
+ severity: critical
+ - alert: AlertmanagerFailedReload
+ annotations:
+ message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
+ expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: AlertmanagerMembersInconsistent
+ annotations:
+ message: Alertmanager has not found all other members of the cluster.
+ expr: |-
+ alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
+ != on (service) GROUP_LEFT()
+ count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
+ for: 5m
+ labels:
+ severity: critical
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/etcd.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/etcd.yaml
new file mode 100755
index 00000000..6abda2d3
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/etcd.yaml
@@ -0,0 +1,155 @@
+{{- /*
+Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/etcd/master/Documentation/op-guide/etcd3_alert.rules.yml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: etcd
+ rules:
+ - alert: etcdMembersDown
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).'
+ expr: |-
+ max by (job) (
+ sum by (job) (up{job=~".*etcd.*"} == bool 0)
+ or
+ count by (job,endpoint) (
+ sum by (job,endpoint,To) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[3m])) > 0.01
+ )
+ )
+ > 0
+ for: 3m
+ labels:
+ severity: critical
+ - alert: etcdInsufficientMembers
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
+ expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2)
+ for: 3m
+ labels:
+ severity: critical
+ - alert: etcdNoLeader
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.'
+ expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ - alert: etcdHighNumberOfLeaderChanges
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
+ expr: increase((max by (job) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 3
+ for: 5m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedGRPCRequests
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ /
+ sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ > 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedGRPCRequests
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ /
+ sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
+ > 5
+ for: 5m
+ labels:
+ severity: critical
+ - alert: etcdGRPCRequestsSlow
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
+ > 0.15
+ for: 10m
+ labels:
+ severity: critical
+ - alert: etcdMemberCommunicationSlow
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
+ > 0.15
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedProposals
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
+ for: 15m
+ labels:
+ severity: warning
+ - alert: etcdHighFsyncDurations
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
+ > 0.5
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighCommitDurations
+ annotations:
+ message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
+ > 0.25
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedHTTPRequests
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}'
+ expr: |-
+ sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
+ BY (method) > 0.01
+ for: 10m
+ labels:
+ severity: warning
+ - alert: etcdHighNumberOfFailedHTTPRequests
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
+ expr: |-
+ sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
+ BY (method) > 0.05
+ for: 10m
+ labels:
+ severity: critical
+ - alert: etcdHTTPRequestsSlow
+ annotations:
+ message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow.
+ expr: |-
+ histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
+ > 0.15
+ for: 10m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/general.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/general.rules.yaml
new file mode 100755
index 00000000..d220cb38
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/general.rules.yaml
@@ -0,0 +1,50 @@
+{{- /*
+Generated from 'general.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: general.rules
+ rules:
+ - alert: TargetDown
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}} targets are down.'
+ expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
+ for: 10m
+ labels:
+ severity: warning
+ - alert: Watchdog
+ annotations:
+ message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
+
+ This alert is always firing, therefore it should always be firing in Alertmanager
+
+ and always fire against a receiver. There are integrations with various notification
+
+ mechanisms that send a notification when this alert is not firing. For example the
+
+ "DeadMansSnitch" integration in PagerDuty.
+
+ '
+ expr: vector(1)
+ labels:
+ severity: none
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/k8s.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/k8s.rules.yaml
new file mode 100755
index 00000000..71c75fcc
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/k8s.rules.yaml
@@ -0,0 +1,83 @@
+{{- /*
+Generated from 'k8s.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: k8s.rules
+ rules:
+ - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace)
+ record: namespace:container_cpu_usage_seconds_total:sum_rate
+ - expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace)
+ record: namespace:container_memory_usage_bytes:sum
+ - expr: |-
+ sum by (namespace, pod_name, container_name) (
+ rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])
+ )
+ record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
+ - expr: |-
+ sum by(namespace) (
+ kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}
+ * on (endpoint, instance, job, namespace, pod, service)
+ group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
+ )
+ record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
+ - expr: |-
+ sum by (namespace) (
+ kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"}
+ * on (endpoint, instance, job, namespace, pod, service)
+ group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
+ )
+ record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum
+ - expr: |-
+ sum(
+ label_replace(
+ label_replace(
+ kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
+ "replicaset", "$1", "owner_name", "(.*)"
+ ) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
+ "workload", "$1", "owner_name", "(.*)"
+ )
+ ) by (namespace, workload, pod)
+ labels:
+ workload_type: deployment
+ record: mixin_pod_workload
+ - expr: |-
+ sum(
+ label_replace(
+ kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
+ "workload", "$1", "owner_name", "(.*)"
+ )
+ ) by (namespace, workload, pod)
+ labels:
+ workload_type: daemonset
+ record: mixin_pod_workload
+ - expr: |-
+ sum(
+ label_replace(
+ kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
+ "workload", "$1", "owner_name", "(.*)"
+ )
+ ) by (namespace, workload, pod)
+ labels:
+ workload_type: statefulset
+ record: mixin_pod_workload
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-apiserver.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-apiserver.rules.yaml
new file mode 100755
index 00000000..5e565317
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-apiserver.rules.yaml
@@ -0,0 +1,39 @@
+{{- /*
+Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserver }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-apiserver.rules
+ rules:
+ - expr: histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:apiserver_request_latencies:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:apiserver_request_latencies:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:apiserver_request_latencies:histogram_quantile
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml
new file mode 100755
index 00000000..09a7c754
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml
@@ -0,0 +1,41 @@
+{{- /*
+Generated from 'kube-prometheus-node-alerting.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeAlerting }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-prometheus-node-alerting.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-prometheus-node-alerting.rules
+ rules:
+ - alert: NodeDiskRunningFull
+ annotations:
+ message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 24 hours.
+ expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)'
+ for: 30m
+ labels:
+ severity: warning
+ - alert: NodeDiskRunningFull
+ annotations:
+ message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 2 hours.
+ expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)'
+ for: 10m
+ labels:
+ severity: critical
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml
new file mode 100755
index 00000000..fc0f4830
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml
@@ -0,0 +1,41 @@
+{{- /*
+Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-prometheus-node-recording.rules
+ rules:
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
+ record: instance:node_cpu:rate:sum
+ - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
+ record: instance:node_filesystem_usage:sum
+ - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
+ record: instance:node_network_receive_bytes:rate:sum
+ - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
+ record: instance:node_network_transmit_bytes:rate:sum
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
+ record: instance:node_cpu:ratio
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
+ record: cluster:node_cpu:sum_rate5m
+ - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
+ record: cluster:node_cpu:ratio
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-scheduler.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-scheduler.rules.yaml
new file mode 100755
index 00000000..3861fa63
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kube-scheduler.rules.yaml
@@ -0,0 +1,63 @@
+{{- /*
+Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kube-scheduler.rules
+ rules:
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.99'
+ record: cluster_quantile:scheduler_binding_latency:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.9'
+ record: cluster_quantile:scheduler_binding_latency:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ labels:
+ quantile: '0.5'
+ record: cluster_quantile:scheduler_binding_latency:histogram_quantile
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-absent.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-absent.yaml
new file mode 100755
index 00000000..7391f16b
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-absent.yaml
@@ -0,0 +1,129 @@
+{{- /*
+Generated from 'kubernetes-absent' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesAbsent }}
+{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
+{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
+{{- $alertmanagerJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager" }}
+{{- $namespace := printf "%s" (include "prometheus-operator.namespace" .) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-absent" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-absent
+ rules:
+{{- if .Values.alertmanager.enabled }}
+ - alert: AlertmanagerDown
+ annotations:
+ message: Alertmanager has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerdown
+ expr: absent(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- if .Values.kubeDns.enabled }}
+ - alert: CoreDNSDown
+ annotations:
+ message: CoreDNS has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-corednsdown
+ expr: absent(up{job="kube-dns"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- if .Values.kubeApiServer.enabled }}
+ - alert: KubeAPIDown
+ annotations:
+ message: KubeAPI has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown
+ expr: absent(up{job="apiserver"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- if .Values.kubeControllerManager.enabled }}
+ - alert: KubeControllerManagerDown
+ annotations:
+ message: KubeControllerManager has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown
+ expr: absent(up{job="kube-controller-manager"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- if .Values.kubeScheduler.enabled }}
+ - alert: KubeSchedulerDown
+ annotations:
+ message: KubeScheduler has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown
+ expr: absent(up{job="kube-scheduler"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- if .Values.kubeStateMetrics.enabled }}
+ - alert: KubeStateMetricsDown
+ annotations:
+ message: KubeStateMetrics has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsdown
+ expr: absent(up{job="kube-state-metrics"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- if .Values.prometheusOperator.kubeletService.enabled }}
+ - alert: KubeletDown
+ annotations:
+ message: Kubelet has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown
+ expr: absent(up{job="kubelet"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- if .Values.nodeExporter.enabled }}
+ - alert: NodeExporterDown
+ annotations:
+ message: NodeExporter has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeexporterdown
+ expr: absent(up{job="node-exporter"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+ - alert: PrometheusDown
+ annotations:
+ message: Prometheus has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusdown
+ expr: absent(up{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- if .Values.prometheusOperator.enabled }}
+ - alert: PrometheusOperatorDown
+ annotations:
+ message: PrometheusOperator has disappeared from Prometheus target discovery.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoperatordown
+ expr: absent(up{job="{{ $operatorJob }}",namespace="{{ $namespace }}"} == 1)
+ for: 15m
+ labels:
+ severity: critical
+{{- end }}
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml
new file mode 100755
index 00000000..fa82f081
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-apps.yaml
@@ -0,0 +1,161 @@
+{{- /*
+Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeStateMetrics.enabled .Values.defaultRules.rules.kubernetesApps }}
+{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-apps
+ rules:
+ - alert: KubePodCrashLooping
+ annotations:
+ message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
+ expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0
+ for: 1h
+ labels:
+ severity: critical
+ - alert: KubePodNotReady
+ annotations:
+ message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than an hour.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready
+ expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) > 0
+ for: 1h
+ labels:
+ severity: critical
+ - alert: KubeDeploymentGenerationMismatch
+ annotations:
+ message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch
+ expr: |-
+ kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDeploymentReplicasMismatch
+ annotations:
+ message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than an hour.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch
+ expr: |-
+ kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 1h
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetReplicasMismatch
+ annotations:
+ message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch
+ expr: |-
+ kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetGenerationMismatch
+ annotations:
+ message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch
+ expr: |-
+ kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStatefulSetUpdateNotRolledOut
+ annotations:
+ message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout
+ expr: |-
+ max without (revision) (
+ kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ unless
+ kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ )
+ *
+ (
+ kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ !=
+ kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ )
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDaemonSetRolloutStuck
+ annotations:
+ message: Only {{`{{`}} $value {{`}}`}}% of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck
+ expr: |-
+ kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ /
+ kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} * 100 < 100
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeDaemonSetNotScheduled
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled
+ expr: |-
+ kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ -
+ kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeDaemonSetMisScheduled
+ annotations:
+ message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled
+ expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeCronJobRunning
+ annotations:
+ message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning
+ expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeJobCompletion
+ annotations:
+ message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion
+ expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeJobFailed
+ annotations:
+ message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed
+ expr: kube_job_status_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
+ for: 1h
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml
new file mode 100755
index 00000000..ee51ebd0
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml
@@ -0,0 +1,103 @@
+{{- /*
+Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-resources
+ rules:
+ - alert: KubeCPUOvercommit
+ annotations:
+ message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
+ expr: |-
+ sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
+ /
+ sum(node:node_num_cpu:sum)
+ >
+ (count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum)
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeMemOvercommit
+ annotations:
+ message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
+ expr: |-
+ sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
+ /
+ sum(node_memory_MemTotal_bytes)
+ >
+ (count(node:node_num_cpu:sum)-1)
+ /
+ count(node:node_num_cpu:sum)
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeCPUOvercommit
+ annotations:
+ message: Cluster has overcommitted CPU resource requests for Namespaces.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
+ expr: |-
+ sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
+ /
+ sum(node:node_num_cpu:sum)
+ > 1.5
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeMemOvercommit
+ annotations:
+ message: Cluster has overcommitted memory resource requests for Namespaces.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
+ expr: |-
+ sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
+ /
+ sum(node_memory_MemTotal_bytes{job="node-exporter"})
+ > 1.5
+ for: 5m
+ labels:
+ severity: warning
+ - alert: KubeQuotaExceeded
+ annotations:
+ message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} printf "%0.0f" $value {{`}}`}}% of its {{`{{`}} $labels.resource {{`}}`}} quota.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded
+ expr: |-
+ 100 * kube_resourcequota{job="kube-state-metrics", type="used"}
+ / ignoring(instance, job, type)
+ (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
+ > 90
+ for: 15m
+ labels:
+ severity: warning
+ - alert: CPUThrottlingHigh
+ annotations:
+ message: '{{`{{`}} printf "%0.0f" $value {{`}}`}}% throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container_name {{`}}`}} in pod {{`{{`}} $labels.pod_name {{`}}`}}.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh
+ expr: |-
+ 100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!="", }[5m])) by (container_name, pod_name, namespace)
+ /
+ sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace)
+ > 25
+ for: 15m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-storage.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-storage.yaml
new file mode 100755
index 00000000..715924b8
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-storage.yaml
@@ -0,0 +1,63 @@
+{{- /*
+Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }}
+{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-storage
+ rules:
+ - alert: KubePersistentVolumeUsageCritical
+ annotations:
+ message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical
+ expr: |-
+ 100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ /
+ kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ < 3
+ for: 1m
+ labels:
+ severity: critical
+ - alert: KubePersistentVolumeFullInFourDays
+ annotations:
+ message: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} printf "%0.2f" $value {{`}}`}}% is available.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays
+ expr: |-
+ 100 * (
+ kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ /
+ kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ ) < 15
+ and
+ predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0
+ for: 5m
+ labels:
+ severity: critical
+ - alert: KubePersistentVolumeErrors
+ annotations:
+ message: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeerrors
+ expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
+ for: 5m
+ labels:
+ severity: critical
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-system.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-system.yaml
new file mode 100755
index 00000000..36a11931
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-system.yaml
@@ -0,0 +1,145 @@
+{{- /*
+Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: kubernetes-system
+ rules:
+ - alert: KubeNodeNotReady
+ annotations:
+ message: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than an hour.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodenotready
+ expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeVersionMismatch
+ annotations:
+ message: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeversionmismatch
+ expr: count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
+ for: 1h
+ labels:
+ severity: warning
+ - alert: KubeClientErrors
+ annotations:
+ message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}}% errors.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
+ expr: |-
+ (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
+ /
+ sum(rate(rest_client_requests_total[5m])) by (instance, job))
+ * 100 > 1
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeClientErrors
+ annotations:
+ message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}} errors / second.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
+ expr: sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeletTooManyPods
+ annotations:
+ message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods
+ expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
+ for: 15m
+ labels:
+ severity: warning
+ - alert: KubeAPILatencyHigh
+ annotations:
+ message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
+ expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeAPILatencyHigh
+ annotations:
+ message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
+ expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
+ for: 10m
+ labels:
+ severity: critical
+ - alert: KubeAPIErrorsHigh
+ annotations:
+ message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
+ expr: |-
+ sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
+ /
+ sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
+ for: 10m
+ labels:
+ severity: critical
+ - alert: KubeAPIErrorsHigh
+ annotations:
+ message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
+ expr: |-
+ sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
+ /
+ sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeAPIErrorsHigh
+ annotations:
+ message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
+ expr: |-
+ sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
+ /
+ sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
+ for: 10m
+ labels:
+ severity: critical
+ - alert: KubeAPIErrorsHigh
+ annotations:
+ message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
+ expr: |-
+ sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
+ /
+ sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
+ for: 10m
+ labels:
+ severity: warning
+ - alert: KubeClientCertificateExpiration
+ annotations:
+ message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
+ labels:
+ severity: warning
+ - alert: KubeClientCertificateExpiration
+ annotations:
+ message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
+ expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
+ labels:
+ severity: critical
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-network.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-network.yaml
new file mode 100755
index 00000000..1de2a621
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-network.yaml
@@ -0,0 +1,48 @@
+{{- /*
+Generated from 'node-network' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: node-network
+ rules:
+ - alert: NetworkReceiveErrors
+ annotations:
+ message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing receive errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
+ expr: rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
+ for: 2m
+ labels:
+ severity: warning
+ - alert: NetworkTransmitErrors
+ annotations:
+ message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing transmit errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
+ expr: rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
+ for: 2m
+ labels:
+ severity: warning
+ - alert: NodeNetworkInterfaceFlapping
+ annotations:
+ message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
+ expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
+ for: 2m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-time.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-time.yaml
new file mode 100755
index 00000000..b53a6af2
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node-time.yaml
@@ -0,0 +1,34 @@
+{{- /*
+Generated from 'node-time' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.time }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node-time" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: node-time
+ rules:
+ - alert: ClockSkewDetected
+ annotations:
+ message: Clock skew detected on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}. Ensure NTP is configured correctly on this host.
+ expr: abs(node_timex_offset_seconds{job="node-exporter"}) > 0.03
+ for: 2m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node.rules.yaml
new file mode 100755
index 00000000..bd2c50fe
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/node.rules.yaml
@@ -0,0 +1,202 @@
+{{- /*
+Generated from 'node.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.nodeExporter.enabled .Values.defaultRules.rules.node }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: node.rules
+ rules:
+ - expr: sum(min(kube_pod_info) by (node))
+ record: ':kube_pod_info_node_count:'
+ - expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
+ record: 'node_namespace_pod:kube_pod_info:'
+ - expr: |-
+ count by (node) (sum by (node, cpu) (
+ node_cpu_seconds_total{job="node-exporter"}
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ ))
+ record: node:node_num_cpu:sum
+ - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
+ record: :node_cpu_utilisation:avg1m
+ - expr: |-
+ 1 - avg by (node) (
+ rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:)
+ record: node:node_cpu_utilisation:avg1m
+ - expr: |-
+ node:node_cpu_utilisation:avg1m
+ *
+ node:node_num_cpu:sum
+ /
+ scalar(sum(node:node_num_cpu:sum))
+ record: node:cluster_cpu_utilisation:ratio
+ - expr: |-
+ sum(node_load1{job="node-exporter"})
+ /
+ sum(node:node_num_cpu:sum)
+ record: ':node_cpu_saturation_load1:'
+ - expr: |-
+ sum by (node) (
+ node_load1{job="node-exporter"}
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ /
+ node:node_num_cpu:sum
+ record: 'node:node_cpu_saturation_load1:'
+ - expr: |-
+ 1 -
+ sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
+ /
+ sum(node_memory_MemTotal_bytes{job="node-exporter"})
+ record: ':node_memory_utilisation:'
+ - expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
+ record: :node_memory_MemFreeCachedBuffers_bytes:sum
+ - expr: sum(node_memory_MemTotal_bytes{job="node-exporter"})
+ record: :node_memory_MemTotal_bytes:sum
+ - expr: |-
+ sum by (node) (
+ (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: node:node_memory_bytes_available:sum
+ - expr: |-
+ sum by (node) (
+ node_memory_MemTotal_bytes{job="node-exporter"}
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: node:node_memory_bytes_total:sum
+ - expr: |-
+ (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
+ /
+ node:node_memory_bytes_total:sum
+ record: node:node_memory_utilisation:ratio
+ - expr: |-
+ (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
+ /
+ scalar(sum(node:node_memory_bytes_total:sum))
+ record: node:cluster_memory_utilisation:ratio
+ - expr: |-
+ 1e3 * sum(
+ (rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ + rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
+ )
+ record: :node_memory_swap_io_bytes:sum_rate
+ - expr: |-
+ 1 -
+ sum by (node) (
+ (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ /
+ sum by (node) (
+ node_memory_MemTotal_bytes{job="node-exporter"}
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: 'node:node_memory_utilisation:'
+ - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
+ record: 'node:node_memory_utilisation_2:'
+ - expr: |-
+ 1e3 * sum by (node) (
+ (rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ + rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: node:node_memory_swap_io_bytes:sum_rate
+ - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
+ record: :node_disk_utilisation:avg_irate
+ - expr: |-
+ avg by (node) (
+ irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: node:node_disk_utilisation:avg_irate
+ - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
+ record: :node_disk_saturation:avg_irate
+ - expr: |-
+ avg by (node) (
+ irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: node:node_disk_saturation:avg_irate
+ - expr: |-
+ max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
+ - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
+ / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
+ record: 'node:node_filesystem_usage:'
+ - expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
+ record: 'node:node_filesystem_avail:'
+ - expr: |-
+ sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) +
+ sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
+ record: :node_net_utilisation:sum_irate
+ - expr: |-
+ sum by (node) (
+ (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) +
+ irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: node:node_net_utilisation:sum_irate
+ - expr: |-
+ sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) +
+ sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
+ record: :node_net_saturation:sum_irate
+ - expr: |-
+ sum by (node) (
+ (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) +
+ irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
+ * on (namespace, pod) group_left(node)
+ node_namespace_pod:kube_pod_info:
+ )
+ record: node:node_net_saturation:sum_irate
+ - expr: |-
+ max(
+ max(
+ kube_pod_info{job="kube-state-metrics", host_ip!=""}
+ ) by (node, host_ip)
+ * on (host_ip) group_right (node)
+ label_replace(
+ (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
+ )
+ ) by (node)
+ record: 'node:node_inodes_total:'
+ - expr: |-
+ max(
+ max(
+ kube_pod_info{job="kube-state-metrics", host_ip!=""}
+ ) by (node, host_ip)
+ * on (host_ip) group_right (node)
+ label_replace(
+ (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
+ )
+ ) by (node)
+ record: 'node:node_inodes_free:'
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus-operator.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus-operator.yaml
new file mode 100755
index 00000000..9975be36
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus-operator.yaml
@@ -0,0 +1,43 @@
+{{- /*
+Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }}
+{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
+{{- $namespace := printf "%s" (include "prometheus-operator.namespace" .) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: prometheus-operator
+ rules:
+ - alert: PrometheusOperatorReconcileErrors
+ annotations:
+ message: Errors while reconciling {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
+ expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusOperatorNodeLookupErrors
+ annotations:
+ message: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
+ expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
+ for: 10m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
new file mode 100755
index 00000000..9cd2eea0
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/prometheus.rules.yaml
@@ -0,0 +1,109 @@
+{{- /*
+Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
+Do not change in-place! In order to change this file first read following link:
+https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack
+*/ -}}
+{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
+{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
+{{- $namespace := printf "%s" (include "prometheus-operator.namespace" .) }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.defaultRules.labels }}
+{{ toYaml .Values.defaultRules.labels | indent 4 }}
+{{- end }}
+{{- if .Values.defaultRules.annotations }}
+ annotations:
+{{ toYaml .Values.defaultRules.annotations | indent 4 }}
+{{- end }}
+spec:
+ groups:
+ - name: prometheus.rules
+ rules:
+ - alert: PrometheusConfigReloadFailed
+ annotations:
+ description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}}
+ summary: Reloading Prometheus' configuration failed
+ expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusNotificationQueueRunningFull
+ annotations:
+ description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}
+ summary: Prometheus' alert notification queue is running full
+ expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlerts
+ annotations:
+ description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
+ summary: Errors while sending alert from Prometheus
+ expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlerts
+ annotations:
+ description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
+ summary: Errors while sending alerts from Prometheus
+ expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
+ for: 10m
+ labels:
+ severity: critical
+ - alert: PrometheusNotConnectedToAlertmanagers
+ annotations:
+ description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers
+ summary: Prometheus is not connected to any Alertmanagers
+ expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBReloadsFailing
+ annotations:
+ description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.'
+ summary: Prometheus has issues reloading data blocks from disk
+ expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
+ for: 12h
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBCompactionsFailing
+ annotations:
+ description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.'
+ summary: Prometheus has issues compacting sample blocks
+ expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
+ for: 12h
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBWALCorruptions
+ annotations:
+ description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).'
+ summary: Prometheus write-ahead log is corrupted
+ expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusNotIngestingSamples
+ annotations:
+ description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples.
+ summary: Prometheus isn't ingesting samples
+ expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusTargetScrapesDuplicate
+ annotations:
+ description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values'
+ summary: Prometheus has many samples rejected
+ expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+{{- end }} \ No newline at end of file
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/service.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/service.yaml
new file mode 100755
index 00000000..e38b4813
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/service.yaml
@@ -0,0 +1,52 @@
+{{- if .Values.prometheus.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+ self-monitor: {{ .Values.prometheus.serviceMonitor.selfMonitor | quote }}
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.prometheus.service.labels }}
+{{ toYaml .Values.prometheus.service.labels | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.service.annotations }}
+ annotations:
+{{ toYaml .Values.prometheus.service.annotations | indent 4 }}
+{{- end }}
+spec:
+{{- if .Values.prometheus.service.clusterIP }}
+ clusterIP: {{ .Values.prometheus.service.clusterIP }}
+{{- end }}
+{{- if .Values.prometheus.service.externalIPs }}
+ externalIPs:
+{{ toYaml .Values.prometheus.service.externalIPs | indent 4 }}
+{{- end }}
+{{- if .Values.prometheus.service.loadBalancerIP }}
+ loadBalancerIP: {{ .Values.prometheus.service.loadBalancerIP }}
+{{- end }}
+{{- if .Values.prometheus.service.loadBalancerSourceRanges }}
+ loadBalancerSourceRanges:
+ {{- range $cidr := .Values.prometheus.service.loadBalancerSourceRanges }}
+ - {{ $cidr }}
+ {{- end }}
+{{- end }}
+ ports:
+ - name: {{ .Values.prometheus.prometheusSpec.portName }}
+ {{- if eq .Values.prometheus.service.type "NodePort" }}
+ nodePort: {{ .Values.prometheus.service.nodePort }}
+ {{- end }}
+ port: {{ .Values.prometheus.service.port }}
+ targetPort: {{ .Values.prometheus.service.targetPort }}
+{{- if .Values.prometheus.service.additionalPorts }}
+{{ toYaml .Values.prometheus.service.additionalPorts | indent 2 }}
+{{- end }}
+ selector:
+ app: prometheus
+ prometheus: {{ template "prometheus-operator.fullname" . }}-prometheus
+{{- if .Values.prometheus.service.sessionAffinity }}
+ sessionAffinity: {{ .Values.prometheus.service.sessionAffinity }}
+{{- end }}
+ type: "{{ .Values.prometheus.service.type }}"
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceaccount.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceaccount.yaml
new file mode 100755
index 00000000..273e79f6
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceaccount.yaml
@@ -0,0 +1,16 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.serviceAccount.create }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: {{ template "prometheus-operator.prometheus.serviceAccountName" . }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+{{- if .Values.prometheus.serviceAccount.annotations }}
+ annotations:
+{{ toYaml .Values.prometheus.serviceAccount.annotations | indent 4 }}
+{{- end }}
+imagePullSecrets:
+{{ toYaml .Values.global.imagePullSecrets | indent 2 }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitor.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitor.yaml
new file mode 100755
index 00000000..1d328f98
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitor.yaml
@@ -0,0 +1,42 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.serviceMonitor.selfMonitor }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ name: {{ template "prometheus-operator.fullname" . }}-prometheus
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+{{ include "prometheus-operator.labels" . | indent 4 }}
+spec:
+ selector:
+ matchLabels:
+ app: {{ template "prometheus-operator.name" . }}-prometheus
+ release: {{ $.Release.Name | quote }}
+ self-monitor: "true"
+ namespaceSelector:
+ matchNames:
+ - {{ $.Release.Namespace | quote }}
+ endpoints:
+ - port: {{ .Values.prometheus.prometheusSpec.portName }}
+ {{- if .Values.prometheus.serviceMonitor.interval }}
+ interval: {{ .Values.prometheus.serviceMonitor.interval }}
+ {{- end }}
+ {{- if .Values.prometheus.serviceMonitor.scheme }}
+ scheme: {{ .Values.prometheus.serviceMonitor.scheme }}
+ {{- end }}
+ {{- if .Values.prometheus.serviceMonitor.tlsConfig }}
+ tlsConfig: {{ toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }}
+ {{- end }}
+ {{- if .Values.prometheus.serviceMonitor.bearerTokenFile }}
+ bearerTokenFile: {{ .Values.prometheus.serviceMonitor.bearerTokenFile }}
+ {{- end }}
+ path: "{{ trimSuffix "/" .Values.prometheus.prometheusSpec.routePrefix }}/metrics"
+{{- if .Values.prometheus.serviceMonitor.metricRelabelings }}
+ metricRelabelings:
+{{ tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | indent 6) . }}
+{{- end }}
+{{- if .Values.prometheus.serviceMonitor.relabelings }}
+ relabelings:
+{{ toYaml .Values.prometheus.serviceMonitor.relabelings | indent 6 }}
+{{- end }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitors.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitors.yaml
new file mode 100755
index 00000000..4da752ff
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/servicemonitors.yaml
@@ -0,0 +1,34 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.additionalServiceMonitors }}
+apiVersion: v1
+kind: List
+items:
+{{- range .Values.prometheus.additionalServiceMonitors }}
+ - apiVersion: monitoring.coreos.com/v1
+ kind: ServiceMonitor
+ metadata:
+ name: {{ .name }}
+ namespace: {{ $.Release.Namespace | quote }}
+ labels:
+ app: {{ template "prometheus-operator.name" $ }}-prometheus
+{{ include "prometheus-operator.labels" $ | indent 8 }}
+ {{- if .additionalLabels }}
+{{ toYaml .additionalLabels | indent 8 }}
+ {{- end }}
+ spec:
+ endpoints:
+{{ toYaml .endpoints | indent 8 }}
+ {{- if .jobLabel }}
+ jobLabel: {{ .jobLabel }}
+ {{- end }}
+ {{- if .namespaceSelector }}
+ namespaceSelector:
+{{ toYaml .namespaceSelector | indent 8 }}
+ {{- end }}
+ selector:
+{{ toYaml .selector | indent 8 }}
+ {{- if .targetLabels }}
+ targetLabels:
+{{ toYaml .targetLabels | indent 8 }}
+ {{- end }}
+{{- end }}
+{{- end }}
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceperreplica.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceperreplica.yaml
new file mode 100755
index 00000000..791057b7
--- /dev/null
+++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/serviceperreplica.yaml
@@ -0,0 +1,46 @@
+{{- if and .Values.prometheus.enabled .Values.prometheus.servicePerReplica.enabled }}
+{{- $count := .Values.prometheus.prometheusSpec.replicas | int -}}
+{{- $serviceValues := .Values.prometheus.servicePerReplica -}}
+apiVersion: v1
+kind: List
+metadata:
+ name: {{ include "prometheus-operator.fullname" $ }}-prometheus-serviceperreplica
+ namespace: {{ template "prometheus-operator.namespace" . }}
+items:
+{{- range $i, $e := until $count }}
+ - apiVersion: v1
+ kind: Service
+ metadata:
+ name: {{ include "prometheus-operator.fullname" $ }}-prometheus-{{ $i }}
+ namespace: {{ template "prometheus-operator.namespace" . }}
+ labels:
+ app: {{ include "prometheus-operator.name" $ }}-prometheus
+{{ include "prometheus-operator.labels" $ | indent 8 }}
+ {{- if $serviceValues.annotations }}
+ annotations:
+{{ toYaml $serviceValues.annotations | indent 8 }}
+ {{- end }}
+ spec:
+ {{- if $serviceValues.clusterIP }}
+ clusterIP: {{ $serviceValues.clusterIP }}
+ {{- end }}
+ {{- if $serviceValues.loadBalancerSourceRanges }}
+ loadBalancerSourceRanges:
+ {{- range $cidr := $serviceValues.loadBalancerSourceRanges }}
+ - {{ $cidr }}
+ {{- end }}
+ {{- end }}
+ ports:
+ - name: {{ $.Values.prometheus.prometheusSpec.portName }}
+ {{- if eq $serviceValues.type "NodePort" }}
+ nodePort: {{ $serviceValues.nodePort }}
+ {{- end }}
+ port: {{ $serviceValues.port }}
+ targetPort: {{ $serviceValues.targetPort }}
+ selector:
+ app: prometheus
+ prometheus: {{ include "prometheus-operator.fullname" $ }}-prometheus
+ statefulset.kubernetes.io/pod-name: prometheus-{{ include "prometheus-operator.fullname" $ }}-prometheus-{{ $i }}
+ type: "{{ $serviceValues.type }}"
+{{- end }}
+{{- end }}