diff options
author | Rajamohan Raj <rajamohan.raj@intel.com> | 2020-06-18 14:55:42 -0700 |
---|---|---|
committer | Rajamohan Raj <rajamohan.raj@intel.com> | 2020-06-24 11:57:04 -0700 |
commit | 94bfc956f43bcaec29f2fc9844b9ca4c35d72260 (patch) | |
tree | bc5050f2e8c6c87672e06f8da319363745f49f8f /kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml | |
parent | 819a687195ef9d6c8dd9753d366c0120886d7736 (diff) |
Integrate collectd, prometheus and grafana.
In this patch, made neccessary changes in collectd and prometheus
helm charts such that prometheus can pull data from collectd.
Prometheus GUI and Grafana GUI are verified as well.
Issue-ID: MULTICLOUD-1082
Signed-off-by: Rajamohan Raj <rajamohan.raj@intel.com>
Change-Id: I39b7e20f46aa789272be671056a76dd926701068
Diffstat (limited to 'kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml')
-rwxr-xr-x | kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml new file mode 100755 index 00000000..ee51ebd0 --- /dev/null +++ b/kud/tests/vnfs/comp-app/collection/app2/helm/prometheus-operator/templates/prometheus/rules/kubernetes-resources.yaml @@ -0,0 +1,103 @@ +{{- /* +Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/coreos/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/helm/charts/tree/master/stable/prometheus-operator/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "prometheus-operator.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "prometheus-operator.namespace" . }} + labels: + app: {{ template "prometheus-operator.name" . }} +{{ include "prometheus-operator.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-resources + rules: + - alert: KubeCPUOvercommit + annotations: + message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit + expr: |- + sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum) + / + sum(node:node_num_cpu:sum) + > + (count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum) + for: 5m + labels: + severity: warning + - alert: KubeMemOvercommit + annotations: + message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit + expr: |- + sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum) + / + sum(node_memory_MemTotal_bytes) + > + (count(node:node_num_cpu:sum)-1) + / + count(node:node_num_cpu:sum) + for: 5m + labels: + severity: warning + - alert: KubeCPUOvercommit + annotations: + message: Cluster has overcommitted CPU resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit + expr: |- + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) + / + sum(node:node_num_cpu:sum) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeMemOvercommit + annotations: + message: Cluster has overcommitted memory resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit + expr: |- + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) + / + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeQuotaExceeded + annotations: + message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} printf "%0.0f" $value {{`}}`}}% of its {{`{{`}} $labels.resource {{`}}`}} quota. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded + expr: |- + 100 * kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 90 + for: 15m + labels: + severity: warning + - alert: CPUThrottlingHigh + annotations: + message: '{{`{{`}} printf "%0.0f" $value {{`}}`}}% throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container_name {{`}}`}} in pod {{`{{`}} $labels.pod_name {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh + expr: |- + 100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!="", }[5m])) by (container_name, pod_name, namespace) + / + sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace) + > 25 + for: 15m + labels: + severity: warning +{{- end }}
\ No newline at end of file |