summaryrefslogtreecommitdiffstats
path: root/vnfs/DAaaS/prometheus-operator/templates/alertmanager
diff options
context:
space:
mode:
Diffstat (limited to 'vnfs/DAaaS/prometheus-operator/templates/alertmanager')
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/alertmanager.yaml100
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/ingress.yaml33
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/podDisruptionBudget.yaml20
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrole.yaml15
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrolebinding.yaml17
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp.yaml48
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/alertmanager.rules.yaml47
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/etcd.yaml134
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/general.rules.yaml34
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/k8s.rules.yaml58
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-apiserver.rules.yaml33
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-alerting.rules.yaml35
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-recording.rules.yaml35
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-scheduler.rules.yaml57
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-absent.yaml120
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-apps.yaml154
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-resources.yaml93
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-storage.yaml56
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-system.yaml117
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/node.rules.yaml184
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus-operator.yaml36
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus.rules.yaml102
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/secret.yaml14
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/service.yaml42
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/serviceaccount.yaml11
-rw-r--r--vnfs/DAaaS/prometheus-operator/templates/alertmanager/servicemonitor.yaml21
26 files changed, 0 insertions, 1616 deletions
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/alertmanager.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/alertmanager.yaml
deleted file mode 100644
index 64466466..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/alertmanager.yaml
+++ /dev/null
@@ -1,100 +0,0 @@
-{{- if .Values.alertmanager.enabled }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: Alertmanager
-metadata:
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-spec:
-{{- if .Values.alertmanager.alertmanagerSpec.image }}
- baseImage: {{ .Values.alertmanager.alertmanagerSpec.image.repository }}
- version: {{ .Values.alertmanager.alertmanagerSpec.image.tag }}
-{{- end }}
- replicas: {{ .Values.alertmanager.alertmanagerSpec.replicas }}
- listenLocal: {{ .Values.alertmanager.alertmanagerSpec.listenLocal }}
- serviceAccountName: {{ template "prometheus-operator.alertmanager.serviceAccountName" . }}
-{{- if .Values.alertmanager.alertmanagerSpec.externalUrl }}
- externalUrl: "{{ .Values.alertmanager.alertmanagerSpec.externalUrl }}"
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.externalUrl }}
- externalUrl: "{{ .Values.alertmanager.alertmanagerSpec.externalUrl }}"
-{{- else if .Values.alertmanager.ingress.enabled }}
- externalUrl: "http://{{ index .Values.alertmanager.ingress.hosts 0 }}{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}"
-{{- else }}
- externalUrl: http://{{ template "prometheus-operator.fullname" . }}-alertmanager.{{ .Release.Namespace }}:9093
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.nodeSelector }}
- nodeSelector:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.nodeSelector | indent 4 }}
-{{- end }}
- paused: {{ .Values.alertmanager.alertmanagerSpec.paused }}
- logLevel: {{ .Values.alertmanager.alertmanagerSpec.logLevel | quote }}
- retention: {{ .Values.alertmanager.alertmanagerSpec.retention | quote }}
-{{- if .Values.alertmanager.alertmanagerSpec.secrets }}
- secrets:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.secrets | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.configMaps }}
- configMaps:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.configMaps | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.resources }}
- resources:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.resources | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.routePrefix }}
- routePrefix: "{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}"
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.securityContext }}
- securityContext:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.securityContext | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.storage }}
- storage:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.storage | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.podMetadata }}
- podMetadata:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.podMetadata | indent 4 }}
-{{- end }}
-{{- if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "hard" }}
- affinity:
- podAntiAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- - topologyKey: kubernetes.io/hostname
- labelSelector:
- matchLabels:
- app: alertmanager
- alertmanager: {{ template "prometheus-operator.fullname" . }}-alertmanager
-{{- else if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "soft" }}
- affinity:
- podAntiAffinity:
- preferredDuringSchedulingIgnoredDuringExecution:
- - weight: 100
- podAffinityTerm:
- topologyKey: kubernetes.io/hostname
- labelSelector:
- matchLabels:
- app: alertmanager
- alertmanager: {{ template "prometheus-operator.fullname" . }}-alertmanager
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.tolerations }}
- tolerations:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.tolerations | indent 4 }}
-{{- end }}
-{{- if .Values.global.imagePullSecrets }}
- imagePullSecrets:
-{{ toYaml .Values.global.imagePullSecrets | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.containers }}
- containers:
-{{ toYaml .Values.alertmanager.alertmanagerSpec.containers | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.priorityClassName }}
- priorityClassName: {{.Values.alertmanager.alertmanagerSpec.priorityClassName }}
-{{- end }}
-{{- if .Values.alertmanager.alertmanagerSpec.additionalPeers }}
- additionalPeers: {{.Values.alertmanager.alertmanagerSpec.additionalPeers }}
-{{- end }}
-{{- end }}
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/ingress.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/ingress.yaml
deleted file mode 100644
index fd657f71..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/ingress.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-{{- if and .Values.alertmanager.enabled .Values.alertmanager.ingress.enabled }}
-{{- $routePrefix := .Values.alertmanager.alertmanagerSpec.routePrefix }}
-{{- $serviceName := printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager" }}
-apiVersion: extensions/v1beta1
-kind: Ingress
-metadata:
- name: {{ $serviceName }}
-{{- if .Values.alertmanager.ingress.annotations }}
- annotations:
-{{ toYaml .Values.alertmanager.ingress.annotations | indent 4 }}
-{{- end }}
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{- if .Values.alertmanager.ingress.labels }}
-{{ toYaml .Values.alertmanager.ingress.labels | indent 4 }}
-{{- end }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-spec:
- rules:
- {{- range $host := .Values.alertmanager.ingress.hosts }}
- - host: {{ . }}
- http:
- paths:
- - path: "{{ $routePrefix }}"
- backend:
- serviceName: {{ $serviceName }}
- servicePort: 9093
- {{- end }}
-{{- if .Values.alertmanager.ingress.tls }}
- tls:
-{{ toYaml .Values.alertmanager.ingress.tls | indent 4 }}
-{{- end }}
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/podDisruptionBudget.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/podDisruptionBudget.yaml
deleted file mode 100644
index f240fe76..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/podDisruptionBudget.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-{{- if and .Values.alertmanager.enabled .Values.alertmanager.podDisruptionBudget.enabled }}
-apiVersion: policy/v1beta1
-kind: PodDisruptionBudget
-metadata:
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-spec:
- {{- if .Values.alertmanager.podDisruptionBudget.minAvailable }}
- minAvailable: {{ .Values.alertmanager.podDisruptionBudget.minAvailable }}
- {{- end }}
- {{- if .Values.alertmanager.podDisruptionBudget.maxUnavailable }}
- maxUnavailable: {{ .Values.alertmanager.podDisruptionBudget.maxUnavailable }}
- {{- end }}
- selector:
- matchLabels:
- app: alertmanager
- alertmanager: {{ template "prometheus-operator.fullname" . }}-alertmanager
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrole.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrole.yaml
deleted file mode 100644
index e83d8bc7..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrole.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-{{- if and .Values.alertmanager.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled }}
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-rules:
-- apiGroups: ['extensions']
- resources: ['podsecuritypolicies']
- verbs: ['use']
- resourceNames:
- - {{ template "prometheus-operator.fullname" . }}-alertmanager
-{{- end }}
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrolebinding.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrolebinding.yaml
deleted file mode 100644
index e1d06ab4..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp-clusterrolebinding.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-{{- if and .Values.alertmanager.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled }}
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
-subjects:
- - kind: ServiceAccount
- name: {{ template "prometheus-operator.alertmanager.serviceAccountName" . }}
- namespace: {{ .Release.Namespace }}
-{{- end }}
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp.yaml
deleted file mode 100644
index 01eda240..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/psp.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-{{- if and .Values.alertmanager.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled }}
-apiVersion: policy/v1beta1
-kind: PodSecurityPolicy
-metadata:
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-spec:
- privileged: false
- # Required to prevent escalations to root.
- # allowPrivilegeEscalation: false
- # This is redundant with non-root + disallow privilege escalation,
- # but we can provide it for defense in depth.
- #requiredDropCapabilities:
- # - ALL
- # Allow core volume types.
- volumes:
- - 'configMap'
- - 'emptyDir'
- - 'projected'
- - 'secret'
- - 'downwardAPI'
- - 'persistentVolumeClaim'
- hostNetwork: false
- hostIPC: false
- hostPID: false
- runAsUser:
- # Permits the container to run with root privileges as well.
- rule: 'RunAsAny'
- seLinux:
- # This policy assumes the nodes are using AppArmor rather than SELinux.
- rule: 'RunAsAny'
- supplementalGroups:
- rule: 'MustRunAs'
- ranges:
- # Forbid adding the root group.
- - min: 0
- max: 65535
- fsGroup:
- rule: 'MustRunAs'
- ranges:
- # Forbid adding the root group.
- - min: 0
- max: 65535
- readOnlyRootFilesystem: false
-{{- end }}
-
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/alertmanager.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/alertmanager.rules.yaml
deleted file mode 100644
index f196db48..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/alertmanager.rules.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
-{{- $alertmanagerJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager" }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: alertmanager.rules
- rules:
- - alert: AlertmanagerConfigInconsistent
- annotations:
- message: The configuration of the instances of the Alertmanager cluster `{{`{{$labels.service}}`}}` are out of sync.
- expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1
- for: 5m
- labels:
- severity: critical
- - alert: AlertmanagerFailedReload
- annotations:
- message: Reloading Alertmanager's configuration has failed for {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}}.
- expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}"} == 0
- for: 10m
- labels:
- severity: warning
- - alert: AlertmanagerMembersInconsistent
- annotations:
- message: Alertmanager has not found all other members of the cluster.
- expr: |-
- alertmanager_cluster_members{job="{{ $alertmanagerJob }}"}
- != on (service) GROUP_LEFT()
- count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}"})
- for: 5m
- labels:
- severity: critical
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/etcd.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/etcd.yaml
deleted file mode 100644
index dd0140db..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/etcd.yaml
+++ /dev/null
@@ -1,134 +0,0 @@
-# Generated from 'etcd' group from https://raw.githubusercontent.com/paskal/etcd/master/Documentation/op-guide/etcd3_alert.rules.yml
-{{- if and .Values.defaultRules.create .Values.kubeEtcd.enabled }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: etcd
- rules:
- - alert: etcdInsufficientMembers
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": insufficient members ({{`{{ $value }}`}}).'
- expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2)
- for: 3m
- labels:
- severity: critical
- - alert: etcdNoLeader
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": member {{`{{ $labels.instance }}`}} has no leader.'
- expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
- for: 1m
- labels:
- severity: critical
- - alert: etcdHighNumberOfLeaderChanges
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": instance {{`{{ $labels.instance }}`}} has seen {{`{{ $value }}`}} leader changes within the last hour.'
- expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3
- for: 15m
- labels:
- severity: warning
- - alert: etcdHighNumberOfFailedGRPCRequests
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": {{`{{ $value }}`}}% of requests for {{`{{ $labels.grpc_method }}`}} failed on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: |-
- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
- /
- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
- > 1
- for: 10m
- labels:
- severity: warning
- - alert: etcdHighNumberOfFailedGRPCRequests
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": {{`{{ $value }}`}}% of requests for {{`{{ $labels.grpc_method }}`}} failed on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: |-
- 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
- /
- sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
- > 5
- for: 5m
- labels:
- severity: critical
- - alert: etcdGRPCRequestsSlow
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": gRPC requests to {{`{{ $labels.grpc_method }}`}} are taking {{`{{ $value }}`}}s on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: |-
- histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
- > 0.15
- for: 10m
- labels:
- severity: critical
- - alert: etcdMemberCommunicationSlow
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": member communication with {{`{{ $labels.To }}`}} is taking {{`{{ $value }}`}}s on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: |-
- histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
- > 0.15
- for: 10m
- labels:
- severity: warning
- - alert: etcdHighNumberOfFailedProposals
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": {{`{{ $value }}`}} proposal failures within the last hour on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
- for: 15m
- labels:
- severity: warning
- - alert: etcdHighFsyncDurations
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": 99th percentile fync durations are {{`{{ $value }}`}}s on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: |-
- histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
- > 0.5
- for: 10m
- labels:
- severity: warning
- - alert: etcdHighCommitDurations
- annotations:
- message: 'etcd cluster "{{`{{ $labels.job }}`}}": 99th percentile commit durations {{`{{ $value }}`}}s on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: |-
- histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
- > 0.25
- for: 10m
- labels:
- severity: warning
- - alert: etcdHighNumberOfFailedHTTPRequests
- annotations:
- message: '{{`{{ $value }}`}}% of requests for {{`{{ $labels.method }}`}} failed on etcd instance {{`{{ $labels.instance }}`}}'
- expr: |-
- sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
- BY (method) > 0.01
- for: 10m
- labels:
- severity: warning
- - alert: etcdHighNumberOfFailedHTTPRequests
- annotations:
- message: '{{`{{ $value }}`}}% of requests for {{`{{ $labels.method }}`}} failed on etcd instance {{`{{ $labels.instance }}`}}.'
- expr: |-
- sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
- BY (method) > 0.05
- for: 10m
- labels:
- severity: critical
- - alert: etcdHTTPRequestsSlow
- annotations:
- message: etcd instance {{`{{ $labels.instance }}`}} HTTP requests to {{`{{ $labels.method }}`}} are slow.
- expr: |-
- histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
- > 0.15
- for: 10m
- labels:
- severity: warning
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/general.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/general.rules.yaml
deleted file mode 100644
index 020c29eb..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/general.rules.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Generated from 'general.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: general.rules
- rules:
- - alert: TargetDown
- annotations:
- message: '{{`{{ $value }}`}}% of the {{`{{ $labels.job }}`}} targets are down.'
- expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
- for: 10m
- labels:
- severity: warning
- - alert: DeadMansSwitch
- annotations:
- message: This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.
- expr: vector(1)
- labels:
- severity: none
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/k8s.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/k8s.rules.yaml
deleted file mode 100644
index 620bd15b..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/k8s.rules.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-# Generated from 'k8s.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: k8s.rules
- rules:
- - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace)
- record: namespace:container_cpu_usage_seconds_total:sum_rate
- - expr: |-
- sum by (namespace, pod_name, container_name) (
- rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])
- )
- record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
- - expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace)
- record: namespace:container_memory_usage_bytes:sum
- - expr: |-
- sum by (namespace, label_name) (
- sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace, pod_name)
- * on (namespace, pod_name) group_left(label_name)
- label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
- )
- record: namespace_name:container_cpu_usage_seconds_total:sum_rate
- - expr: |-
- sum by (namespace, label_name) (
- sum(container_memory_usage_bytes{job="kubelet",image!="", container_name!=""}) by (pod_name, namespace)
- * on (namespace, pod_name) group_left(label_name)
- label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
- )
- record: namespace_name:container_memory_usage_bytes:sum
- - expr: |-
- sum by (namespace, label_name) (
- sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}) by (namespace, pod)
- * on (namespace, pod) group_left(label_name)
- label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
- )
- record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
- - expr: |-
- sum by (namespace, label_name) (
- sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} and on(pod) kube_pod_status_scheduled{condition="true"}) by (namespace, pod)
- * on (namespace, pod) group_left(label_name)
- label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
- )
- record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-apiserver.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-apiserver.rules.yaml
deleted file mode 100644
index d1db5296..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-apiserver.rules.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create .Values.kubeApiServer.enabled }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kube-apiserver.rules
- rules:
- - expr: histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.99'
- record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- - expr: histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.9'
- record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- - expr: histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.5'
- record: cluster_quantile:apiserver_request_latencies:histogram_quantile
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-alerting.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-alerting.rules.yaml
deleted file mode 100644
index d0a643b7..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-alerting.rules.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Generated from 'kube-prometheus-node-alerting.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-prometheus-node-alerting.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kube-prometheus-node-alerting.rules
- rules:
- - alert: NodeDiskRunningFull
- annotations:
- message: Device {{`{{ $labels.device }}`}} of node-exporter {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod }}`}} will be full within the next 24 hours.
- expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)'
- for: 30m
- labels:
- severity: warning
- - alert: NodeDiskRunningFull
- annotations:
- message: Device {{`{{ $labels.device }}`}} of node-exporter {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod }}`}} will be full within the next 2 hours.
- expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)'
- for: 10m
- labels:
- severity: critical
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-recording.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-recording.rules.yaml
deleted file mode 100644
index 87d3556a..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-prometheus-node-recording.rules.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kube-prometheus-node-recording.rules
- rules:
- - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m])) BY (instance)
- record: instance:node_cpu:rate:sum
- - expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})) BY (instance)
- record: instance:node_filesystem_usage:sum
- - expr: sum(rate(node_network_receive_bytes[3m])) BY (instance)
- record: instance:node_network_receive_bytes:rate:sum
- - expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance)
- record: instance:node_network_transmit_bytes:rate:sum
- - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance)
- record: instance:node_cpu:ratio
- - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m]))
- record: cluster:node_cpu:sum_rate5m
- - expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
- record: cluster:node_cpu:ratio
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-scheduler.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-scheduler.rules.yaml
deleted file mode 100644
index 3a279661..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kube-scheduler.rules.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-# Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create .Values.kubeScheduler.enabled }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kube-scheduler.rules
- rules:
- - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.99'
- record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.99'
- record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.99'
- record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.9'
- record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.9'
- record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.9'
- record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.5'
- record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.5'
- record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
- labels:
- quantile: '0.5'
- record: cluster_quantile:scheduler_binding_latency:histogram_quantile
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-absent.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-absent.yaml
deleted file mode 100644
index 37fc5465..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-absent.yaml
+++ /dev/null
@@ -1,120 +0,0 @@
-# Generated from 'kubernetes-absent' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
-{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
-{{- $alertmanagerJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "alertmanager" }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-absent" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kubernetes-absent
- rules:
- - alert: AlertmanagerDown
- annotations:
- message: Alertmanager has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown
- expr: absent(up{job="{{ $alertmanagerJob }}"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- if .Values.kubeDns.enabled }}
- - alert: CoreDNSDown
- annotations:
- message: CoreDNS has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-corednsdown
- expr: absent(up{job="kube-dns"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- if .Values.kubeApiServer.enabled }}
-{{- end }}
- - alert: KubeAPIDown
- annotations:
- message: KubeAPI has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
- expr: absent(up{job="apiserver"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- end }}
-{{- if .Values.kubeControllerManager.enabled }}
- - alert: KubeControllerManagerDown
- annotations:
- message: KubeControllerManager has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
- expr: absent(up{job="kube-controller-manager"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- end }}
-{{- if .Values.kubeScheduler.enabled }}
- - alert: KubeSchedulerDown
- annotations:
- message: KubeScheduler has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
- expr: absent(up{job="kube-scheduler"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- end }}
-{{- if .Values.kubeStateMetrics.enabled }}
- - alert: KubeStateMetricsDown
- annotations:
- message: KubeStateMetrics has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown
- expr: absent(up{job="kube-state-metrics"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- end }}
-{{- if .Values.prometheusOperator.kubeletService.enabled }}
- - alert: KubeletDown
- annotations:
- message: Kubelet has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
- expr: absent(up{job="kubelet"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- end }}
-{{- if .Values.nodeExporter.enabled }}
- - alert: NodeExporterDown
- annotations:
- message: NodeExporter has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown
- expr: absent(up{job="node-exporter"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- end }}
- - alert: PrometheusDown
- annotations:
- message: Prometheus has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown
- expr: absent(up{job="{{ $prometheusJob }}"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- if .Values.prometheusOperator.enabled }}
- - alert: PrometheusOperatorDown
- annotations:
- message: PrometheusOperator has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown
- expr: absent(up{job="{{ $operatorJob }}"} == 1)
- for: 15m
- labels:
- severity: critical
-{{- end }}
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-apps.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-apps.yaml
deleted file mode 100644
index 21549c23..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-apps.yaml
+++ /dev/null
@@ -1,154 +0,0 @@
-# Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create .Values.kubeStateMetrics.enabled }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kubernetes-apps
- rules:
- - alert: KubePodCrashLooping
- annotations:
- message: Pod {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod }}`}} ({{`{{ $labels.container }}`}}) is restarting {{`{{ printf "%.2f" $value }}`}} times / 5 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
- expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0
- for: 1h
- labels:
- severity: critical
- - alert: KubePodNotReady
- annotations:
- message: Pod {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod }}`}} has been in a non-ready state for longer than an hour.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
- expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0
- for: 1h
- labels:
- severity: critical
- - alert: KubeDeploymentGenerationMismatch
- annotations:
- message: Deployment generation for {{`{{ $labels.namespace }}`}}/{{`{{ $labels.deployment }}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
- expr: |-
- kube_deployment_status_observed_generation{job="kube-state-metrics"}
- !=
- kube_deployment_metadata_generation{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: critical
- - alert: KubeDeploymentReplicasMismatch
- annotations:
- message: Deployment {{`{{ $labels.namespace }}`}}/{{`{{ $labels.deployment }}`}} has not matched the expected number of replicas for longer than an hour.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
- expr: |-
- kube_deployment_spec_replicas{job="kube-state-metrics"}
- !=
- kube_deployment_status_replicas_available{job="kube-state-metrics"}
- for: 1h
- labels:
- severity: critical
- - alert: KubeStatefulSetReplicasMismatch
- annotations:
- message: StatefulSet {{`{{ $labels.namespace }}`}}/{{`{{ $labels.statefulset }}`}} has not matched the expected number of replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
- expr: |-
- kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
- !=
- kube_statefulset_status_replicas{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: critical
- - alert: KubeStatefulSetGenerationMismatch
- annotations:
- message: StatefulSet generation for {{`{{ $labels.namespace }}`}}/{{`{{ $labels.statefulset }}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
- expr: |-
- kube_statefulset_status_observed_generation{job="kube-state-metrics"}
- !=
- kube_statefulset_metadata_generation{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: critical
- - alert: KubeStatefulSetUpdateNotRolledOut
- annotations:
- message: StatefulSet {{`{{ $labels.namespace }}`}}/{{`{{ $labels.statefulset }}`}} update has not been rolled out.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
- expr: |-
- max without (revision) (
- kube_statefulset_status_current_revision{job="kube-state-metrics"}
- unless
- kube_statefulset_status_update_revision{job="kube-state-metrics"}
- )
- *
- (
- kube_statefulset_replicas{job="kube-state-metrics"}
- !=
- kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
- )
- for: 15m
- labels:
- severity: critical
- - alert: KubeDaemonSetRolloutStuck
- annotations:
- message: Only {{`{{ $value }}`}}% of the desired Pods of DaemonSet {{`{{ $labels.namespace }}`}}/{{`{{ $labels.daemonset }}`}} are scheduled and ready.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
- expr: |-
- kube_daemonset_status_number_ready{job="kube-state-metrics"}
- /
- kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} * 100 < 100
- for: 15m
- labels:
- severity: critical
- - alert: KubeDaemonSetNotScheduled
- annotations:
- message: '{{`{{ $value }}`}} Pods of DaemonSet {{`{{ $labels.namespace }}`}}/{{`{{ $labels.daemonset }}`}} are not scheduled.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
- expr: |-
- kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
- -
- kube_daemonset_status_current_number_scheduled{job="kube-state-metrics"} > 0
- for: 10m
- labels:
- severity: warning
- - alert: KubeDaemonSetMisScheduled
- annotations:
- message: '{{`{{ $value }}`}} Pods of DaemonSet {{`{{ $labels.namespace }}`}}/{{`{{ $labels.daemonset }}`}} are running where they are not supposed to run.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
- expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
- for: 10m
- labels:
- severity: warning
- - alert: KubeCronJobRunning
- annotations:
- message: CronJob {{`{{ $labels.namespace }}`}}/{{`{{ $labels.cronjob }}`}} is taking more than 1h to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
- expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
- for: 1h
- labels:
- severity: warning
- - alert: KubeJobCompletion
- annotations:
- message: Job {{`{{ $labels.namespace }}`}}/{{`{{ $labels.job_name }}`}} is taking more than one hour to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
- expr: kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
- for: 1h
- labels:
- severity: warning
- - alert: KubeJobFailed
- annotations:
- message: Job {{`{{ $labels.namespace }}`}}/{{`{{ $labels.job_name }}`}} failed to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
- expr: kube_job_status_failed{job="kube-state-metrics"} > 0
- for: 1h
- labels:
- severity: warning
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-resources.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-resources.yaml
deleted file mode 100644
index 4a7b9f95..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-resources.yaml
+++ /dev/null
@@ -1,93 +0,0 @@
-# Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kubernetes-resources
- rules:
- - alert: KubeCPUOvercommit
- annotations:
- message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
- expr: |-
- sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
- /
- sum(node:node_num_cpu:sum)
- >
- (count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum)
- for: 5m
- labels:
- severity: warning
- - alert: KubeMemOvercommit
- annotations:
- message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
- expr: |-
- sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
- /
- sum(node_memory_MemTotal_bytes)
- >
- (count(node:node_num_cpu:sum)-1)
- /
- count(node:node_num_cpu:sum)
- for: 5m
- labels:
- severity: warning
- - alert: KubeCPUOvercommit
- annotations:
- message: Cluster has overcommitted CPU resource requests for Namespaces.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
- expr: |-
- sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"})
- /
- sum(node:node_num_cpu:sum)
- > 1.5
- for: 5m
- labels:
- severity: warning
- - alert: KubeMemOvercommit
- annotations:
- message: Cluster has overcommitted memory resource requests for Namespaces.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
- expr: |-
- sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
- /
- sum(node_memory_MemTotal_bytes{job="node-exporter"})
- > 1.5
- for: 5m
- labels:
- severity: warning
- - alert: KubeQuotaExceeded
- annotations:
- message: Namespace {{`{{ $labels.namespace }}`}} is using {{`{{ printf "%0.0f" $value }}`}}% of its {{`{{ $labels.resource }}`}} quota.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
- expr: |-
- 100 * kube_resourcequota{job="kube-state-metrics", type="used"}
- / ignoring(instance, job, type)
- (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
- > 90
- for: 15m
- labels:
- severity: warning
- - alert: CPUThrottlingHigh
- annotations:
- message: '{{`{{ printf "%0.0f" $value }}`}}% throttling of CPU in namespace {{`{{ $labels.namespace }}`}} for container {{`{{ $labels.container_name }}`}} in pod {{`{{ $labels.pod_name }}`}}.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
- expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{}[5m])) by (container_name, pod_name, namespace) \n / \nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace)\n > 25"
- for: 15m
- labels:
- severity: warning
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-storage.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-storage.yaml
deleted file mode 100644
index d290f0cf..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-storage.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-# Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kubernetes-storage
- rules:
- - alert: KubePersistentVolumeUsageCritical
- annotations:
- message: The PersistentVolume claimed by {{`{{ $labels.persistentvolumeclaim }}`}} in Namespace {{`{{ $labels.namespace }}`}} is only {{`{{ printf "%0.2f" $value }}`}}% free.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
- expr: |-
- 100 * kubelet_volume_stats_available_bytes{job="kubelet"}
- /
- kubelet_volume_stats_capacity_bytes{job="kubelet"}
- < 3
- for: 1m
- labels:
- severity: critical
- - alert: KubePersistentVolumeFullInFourDays
- annotations:
- message: Based on recent sampling, the PersistentVolume claimed by {{`{{ $labels.persistentvolumeclaim }}`}} in Namespace {{`{{ $labels.namespace }}`}} is expected to fill up within four days. Currently {{`{{ printf "%0.2f" $value }}`}}% is available.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
- expr: |-
- 100 * (
- kubelet_volume_stats_available_bytes{job="kubelet"}
- /
- kubelet_volume_stats_capacity_bytes{job="kubelet"}
- ) < 15
- and
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0
- for: 5m
- labels:
- severity: critical
- - alert: KubePersistentVolumeErrors
- annotations:
- message: The persistent volume {{`{{ $labels.persistentvolume }}`}} has status {{`{{ $labels.phase }}`}}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
- expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
- for: 5m
- labels:
- severity: critical
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-system.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-system.yaml
deleted file mode 100644
index 78f90b79..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/kubernetes-system.yaml
+++ /dev/null
@@ -1,117 +0,0 @@
-# Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: kubernetes-system
- rules:
- - alert: KubeNodeNotReady
- annotations:
- message: '{{`{{ $labels.node }}`}} has been unready for more than an hour.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
- expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
- for: 1h
- labels:
- severity: warning
- - alert: KubeVersionMismatch
- annotations:
- message: There are {{`{{ $value }}`}} different versions of Kubernetes components running.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
- expr: count(count(kubernetes_build_info{job!="kube-dns"}) by (gitVersion)) > 1
- for: 1h
- labels:
- severity: warning
- - alert: KubeClientErrors
- annotations:
- message: Kubernetes API server client '{{`{{ $labels.job }}`}}/{{`{{ $labels.instance }}`}}' is experiencing {{`{{ printf "%0.0f" $value }}`}}% errors.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
- expr: |-
- (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
- /
- sum(rate(rest_client_requests_total[5m])) by (instance, job))
- * 100 > 1
- for: 15m
- labels:
- severity: warning
- - alert: KubeClientErrors
- annotations:
- message: Kubernetes API server client '{{`{{ $labels.job }}`}}/{{`{{ $labels.instance }}`}}' is experiencing {{`{{ printf "%0.0f" $value }}`}} errors / second.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
- expr: sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
- for: 15m
- labels:
- severity: warning
- - alert: KubeletTooManyPods
- annotations:
- message: Kubelet {{`{{ $labels.instance }}`}} is running {{`{{ $value }}`}} Pods, close to the limit of 110.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
- expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
- for: 15m
- labels:
- severity: warning
- - alert: KubeAPILatencyHigh
- annotations:
- message: The API server has a 99th percentile latency of {{`{{ $value }}`}} seconds for {{`{{ $labels.verb }}`}} {{`{{ $labels.resource }}`}}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
- expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
- for: 10m
- labels:
- severity: warning
- - alert: KubeAPILatencyHigh
- annotations:
- message: The API server has a 99th percentile latency of {{`{{ $value }}`}} seconds for {{`{{ $labels.verb }}`}} {{`{{ $labels.resource }}`}}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
- expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
- for: 10m
- labels:
- severity: critical
- - alert: KubeAPIErrorsHigh
- annotations:
- message: API server is returning errors for {{`{{ $value }}`}}% of requests.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
- expr: |-
- sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
- /
- sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 10
- for: 10m
- labels:
- severity: critical
- - alert: KubeAPIErrorsHigh
- annotations:
- message: API server is returning errors for {{`{{ $value }}`}}% of requests.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
- expr: |-
- sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
- /
- sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 5
- for: 10m
- labels:
- severity: warning
- - alert: KubeClientCertificateExpiration
- annotations:
- message: Kubernetes API certificate is expiring in less than 7 days.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- expr: histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
- labels:
- severity: warning
- - alert: KubeClientCertificateExpiration
- annotations:
- message: Kubernetes API certificate is expiring in less than 24 hours.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- expr: histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
- labels:
- severity: critical
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/node.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/node.rules.yaml
deleted file mode 100644
index b039d1ac..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/node.rules.yaml
+++ /dev/null
@@ -1,184 +0,0 @@
-# Generated from 'node.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create .Values.nodeExporter.enabled }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: node.rules
- rules:
- - expr: sum(min(kube_pod_info) by (node))
- record: ':kube_pod_info_node_count:'
- - expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
- record: 'node_namespace_pod:kube_pod_info:'
- - expr: |-
- count by (node) (sum by (node, cpu) (
- node_cpu_seconds_total{job="node-exporter"}
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- ))
- record: node:node_num_cpu:sum
- - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
- record: :node_cpu_utilisation:avg1m
- - expr: |-
- 1 - avg by (node) (
- rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:)
- record: node:node_cpu_utilisation:avg1m
- - expr: |-
- sum(node_load1{job="node-exporter"})
- /
- sum(node:node_num_cpu:sum)
- record: ':node_cpu_saturation_load1:'
- - expr: |-
- sum by (node) (
- node_load1{job="node-exporter"}
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- /
- node:node_num_cpu:sum
- record: 'node:node_cpu_saturation_load1:'
- - expr: |-
- 1 -
- sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
- /
- sum(node_memory_MemTotal_bytes{job="node-exporter"})
- record: ':node_memory_utilisation:'
- - expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
- record: :node_memory_MemFreeCachedBuffers_bytes:sum
- - expr: sum(node_memory_MemTotal_bytes{job="node-exporter"})
- record: :node_memory_MemTotal_bytes:sum
- - expr: |-
- sum by (node) (
- (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: node:node_memory_bytes_available:sum
- - expr: |-
- sum by (node) (
- node_memory_MemTotal_bytes{job="node-exporter"}
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: node:node_memory_bytes_total:sum
- - expr: |-
- (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
- /
- scalar(sum(node:node_memory_bytes_total:sum))
- record: node:node_memory_utilisation:ratio
- - expr: |-
- 1e3 * sum(
- (rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
- + rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
- )
- record: :node_memory_swap_io_bytes:sum_rate
- - expr: |-
- 1 -
- sum by (node) (
- (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- /
- sum by (node) (
- node_memory_MemTotal_bytes{job="node-exporter"}
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: 'node:node_memory_utilisation:'
- - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
- record: 'node:node_memory_utilisation_2:'
- - expr: |-
- 1e3 * sum by (node) (
- (rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
- + rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: node:node_memory_swap_io_bytes:sum_rate
- - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]))
- record: :node_disk_utilisation:avg_irate
- - expr: |-
- avg by (node) (
- irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m])
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: node:node_disk_utilisation:avg_irate
- - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3)
- record: :node_disk_saturation:avg_irate
- - expr: |-
- avg by (node) (
- irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: node:node_disk_saturation:avg_irate
- - expr: |-
- max by (namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
- - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
- / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
- record: 'node:node_filesystem_usage:'
- - expr: max by (namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
- record: 'node:node_filesystem_avail:'
- - expr: |-
- sum(irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m])) +
- sum(irate(node_network_transmit_bytes_total{job="node-exporter",device="eth0"}[1m]))
- record: :node_net_utilisation:sum_irate
- - expr: |-
- sum by (node) (
- (irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m]) +
- irate(node_network_transmit_bytes_total{job="node-exporter",device="eth0"}[1m]))
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: node:node_net_utilisation:sum_irate
- - expr: |-
- sum(irate(node_network_receive_drop_total{job="node-exporter",device="eth0"}[1m])) +
- sum(irate(node_network_transmit_drop_total{job="node-exporter",device="eth0"}[1m]))
- record: :node_net_saturation:sum_irate
- - expr: |-
- sum by (node) (
- (irate(node_network_receive_drop_total{job="node-exporter",device="eth0"}[1m]) +
- irate(node_network_transmit_drop_total{job="node-exporter",device="eth0"}[1m]))
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- )
- record: node:node_net_saturation:sum_irate
- - expr: |-
- max(
- max(
- kube_pod_info{job="kube-state-metrics", host_ip!=""}
- ) by (node, host_ip)
- * on (host_ip) group_right (node)
- label_replace(
- (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
- )
- ) by (node)
- record: 'node:node_inodes_total:'
- - expr: |-
- max(
- max(
- kube_pod_info{job="kube-state-metrics", host_ip!=""}
- ) by (node, host_ip)
- * on (host_ip) group_right (node)
- label_replace(
- (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
- )
- ) by (node)
- record: 'node:node_inodes_free:'
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus-operator.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus-operator.yaml
deleted file mode 100644
index 7f19763e..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus-operator.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-{{- $operatorJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "operator" }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: prometheus-operator
- rules:
- - alert: PrometheusOperatorReconcileErrors
- annotations:
- message: Errors while reconciling {{`{{ $labels.controller }}`}} in {{`{{ $labels.namespace }}`}} Namespace.
- expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}"}[5m]) > 0.1
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusOperatorNodeLookupErrors
- annotations:
- message: Errors while reconciling Prometheus in {{`{{ $labels.namespace }}`}} Namespace.
- expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}"}[5m]) > 0.1
- for: 10m
- labels:
- severity: warning
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus.rules.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus.rules.yaml
deleted file mode 100644
index e2d8a68d..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/rules/prometheus.rules.yaml
+++ /dev/null
@@ -1,102 +0,0 @@
-# Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/coreos/prometheus-operator/master/contrib/kube-prometheus/manifests/prometheus-rules.yaml
-{{- if and .Values.defaultRules.create }}
-{{- $prometheusJob := printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus" }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: PrometheusRule
-metadata:
- name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
- labels:
- app: {{ template "prometheus-operator.name" . }}
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.defaultRules.labels }}
-{{ toYaml .Values.defaultRules.labels | indent 4 }}
-{{- end }}
-{{- if .Values.defaultRules.annotations }}
- annotations:
-{{ toYaml .Values.defaultRules.annotations | indent 4 }}
-{{- end }}
-spec:
- groups:
- - name: prometheus.rules
- rules:
- - alert: PrometheusConfigReloadFailed
- annotations:
- description: Reloading Prometheus' configuration has failed for {{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}}
- summary: Reloading Prometheus' configuration failed
- expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}"} == 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusNotificationQueueRunningFull
- annotations:
- description: Prometheus' alert notification queue is running full for {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}}
- summary: Prometheus' alert notification queue is running full
- expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}"}
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusErrorSendingAlerts
- annotations:
- description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}}
- summary: Errors while sending alert from Prometheus
- expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}"}[5m]) > 0.01
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusErrorSendingAlerts
- annotations:
- description: Errors while sending alerts from Prometheus {{`{{$labels.namespace}}`}}/{{`{{ $labels.pod}}`}} to Alertmanager {{`{{$labels.Alertmanager}}`}}
- summary: Errors while sending alerts from Prometheus
- expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}"}[5m]) > 0.03
- for: 10m
- labels:
- severity: critical
- - alert: PrometheusNotConnectedToAlertmanagers
- annotations:
- description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} is not connected to any Alertmanagers
- summary: Prometheus is not connected to any Alertmanagers
- expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}"} < 1
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusTSDBReloadsFailing
- annotations:
- description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} reload failures over the last four hours.'
- summary: Prometheus has issues reloading data blocks from disk
- expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}"}[2h]) > 0
- for: 12h
- labels:
- severity: warning
- - alert: PrometheusTSDBCompactionsFailing
- annotations:
- description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} had {{`{{$value | humanize}}`}} compaction failures over the last four hours.'
- summary: Prometheus has issues compacting sample blocks
- expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}"}[2h]) > 0
- for: 12h
- labels:
- severity: warning
- - alert: PrometheusTSDBWALCorruptions
- annotations:
- description: '{{`{{$labels.job}}`}} at {{`{{$labels.instance}}`}} has a corrupted write-ahead log (WAL).'
- summary: Prometheus write-ahead log is corrupted
- expr: tsdb_wal_corruptions_total{job="{{ $prometheusJob }}"} > 0
- for: 4h
- labels:
- severity: warning
- - alert: PrometheusNotIngestingSamples
- annotations:
- description: Prometheus {{`{{ $labels.namespace }}`}}/{{`{{ $labels.pod}}`}} isn't ingesting samples.
- summary: Prometheus isn't ingesting samples
- expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}"}[5m]) <= 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusTargetScrapesDuplicate
- annotations:
- description: '{{`{{$labels.namespace}}`}}/{{`{{$labels.pod}}`}} has many samples rejected due to duplicate timestamps but different values'
- summary: Prometheus has many samples rejected
- expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}"}[5m]) > 0
- for: 10m
- labels:
- severity: warning
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/secret.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/secret.yaml
deleted file mode 100644
index e73c465f..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/secret.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-{{- if and .Values.alertmanager.enabled }}
-apiVersion: v1
-kind: Secret
-metadata:
- name: alertmanager-{{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-data:
- alertmanager.yaml: {{ toYaml .Values.alertmanager.config | b64enc | quote }}
-{{- range $key, $val := .Values.alertmanager.templateFiles }}
- {{ $key }}: {{ $val | b64enc | quote }}
-{{- end }}
-{{- end }} \ No newline at end of file
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/service.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/service.yaml
deleted file mode 100644
index d10bf745..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/service.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-{{- if .Values.alertmanager.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-{{- if .Values.alertmanager.service.annotations }}
- annotations:
-{{ toYaml .Values.alertmanager.service.annotations | indent 4 }}
-{{- end }}
-spec:
-{{- if .Values.alertmanager.service.clusterIP }}
- clusterIP: {{ .Values.alertmanager.service.clusterIP }}
-{{- end }}
-{{- if .Values.alertmanager.service.externalIPs }}
- externalIPs:
-{{ toYaml .Values.alertmanager.service.externalIPs | indent 4 }}
-{{- end }}
-{{- if .Values.alertmanager.service.loadBalancerIP }}
- loadBalancerIP: {{ .Values.alertmanager.service.loadBalancerIP }}
-{{- end }}
-{{- if .Values.alertmanager.service.loadBalancerSourceRanges }}
- loadBalancerSourceRanges:
- {{- range $cidr := .Values.alertmanager.service.loadBalancerSourceRanges }}
- - {{ $cidr }}
- {{- end }}
-{{- end }}
- ports:
- - name: web
- {{- if eq .Values.alertmanager.service.type "NodePort" }}
- nodePort: {{ .Values.alertmanager.service.nodePort }}
- {{- end }}
- port: 9093
- targetPort: 9093
- protocol: TCP
- selector:
- app: alertmanager
- alertmanager: {{ template "prometheus-operator.fullname" . }}-alertmanager
- type: "{{ .Values.alertmanager.service.type }}"
-{{- end }}
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/serviceaccount.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/serviceaccount.yaml
deleted file mode 100644
index bbed0287..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/serviceaccount.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-{{- if and .Values.alertmanager.enabled .Values.global.rbac.create .Values.alertmanager.serviceAccount.create }}
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: {{ template "prometheus-operator.alertmanager.serviceAccountName" . }}
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-imagePullSecrets:
-{{ toYaml .Values.global.imagePullSecrets | indent 2 }}
-{{- end }}
diff --git a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/servicemonitor.yaml b/vnfs/DAaaS/prometheus-operator/templates/alertmanager/servicemonitor.yaml
deleted file mode 100644
index 5c8cab90..00000000
--- a/vnfs/DAaaS/prometheus-operator/templates/alertmanager/servicemonitor.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }}
-apiVersion: {{ printf "%s/v1" (.Values.prometheusOperator.crdApiGroup | default "monitoring.coreos.com") }}
-kind: ServiceMonitor
-metadata:
- name: {{ template "prometheus-operator.fullname" . }}-alertmanager
- labels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
-{{ include "prometheus-operator.labels" . | indent 4 }}
-spec:
- selector:
- matchLabels:
- app: {{ template "prometheus-operator.name" . }}-alertmanager
- release: {{ .Release.Name | quote }}
- namespaceSelector:
- matchNames:
- - {{ .Release.Namespace | quote }}
- endpoints:
- - port: web
- interval: 30s
- path: "{{ trimSuffix "/" .Values.alertmanager.alertmanagerSpec.routePrefix }}/metrics"
-{{- end }}