diff options
author | miroslavmasaryk <miroslav.masaryk@telekom.com> | 2023-03-20 10:21:36 +0100 |
---|---|---|
committer | samrim96 <miroslav.masaryk@telekom.com> | 2023-04-03 16:07:39 +0200 |
commit | aa5f0fa05ac774865691e2c17cd0c452f28d2492 (patch) | |
tree | 975076f6bf6c41c8ddd8a4678a45dfb798918e7a /kubernetes | |
parent | 1e6ad11acdc684bfba3ad2ba99a7a20a1b813fce (diff) |
[STRIMZI] Monitoring chart improvement
Add Monitoring into charts of Strimzi
Issue-ID: OOM-3150
Signed-off-by: miroslavmasaryk <miroslav.masaryk@telekom.com>
Change-Id: I0621399f5f555f40f96d52f6c64e404bd91f119b
Diffstat (limited to 'kubernetes')
-rw-r--r-- | kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml | 20 | ||||
-rw-r--r-- | kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml | 137 | ||||
-rw-r--r-- | kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml | 44 | ||||
-rw-r--r-- | kubernetes/strimzi/templates/configmap.yaml | 21 | ||||
-rw-r--r-- | kubernetes/strimzi/templates/kafka-rebalance.yaml | 24 | ||||
-rw-r--r-- | kubernetes/strimzi/templates/pod-monitor.yaml | 45 | ||||
-rw-r--r-- | kubernetes/strimzi/templates/strimzi-kafka.yaml | 46 | ||||
-rw-r--r-- | kubernetes/strimzi/values.yaml | 48 |
8 files changed, 384 insertions, 1 deletions
diff --git a/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml b/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml new file mode 100644 index 0000000000..12c742ef35 --- /dev/null +++ b/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml @@ -0,0 +1,20 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +lowercaseOutputName: true +rules: + - pattern: kafka.cruisecontrol<name=(.+)><>(\w+) + name: kafka_cruisecontrol_$1_$2 + type: GAUGE
\ No newline at end of file diff --git a/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml b/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml new file mode 100644 index 0000000000..7ad971fc16 --- /dev/null +++ b/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml @@ -0,0 +1,137 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +lowercaseOutputName: true +rules: + # Special cases and very specific rules + - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + topic: "$4" + partition: "$5" + - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + broker: "$4:$5" + - pattern: kafka.server<type=(.+), cipher=(.+), protocol=(.+), listener=(.+), networkProcessor=(.+)><>connections + name: kafka_server_$1_connections_tls_info + type: GAUGE + labels: + cipher: "$2" + protocol: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: kafka.server<type=(.+), clientSoftwareName=(.+), clientSoftwareVersion=(.+), listener=(.+), networkProcessor=(.+)><>connections + name: kafka_server_$1_connections_software + type: GAUGE + labels: + clientSoftwareName: "$2" + clientSoftwareVersion: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+):" + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + - pattern: kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+) + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + # Some percent metrics use MeanRate attribute + # Ex) kafka.server<type=(KafkaRequestHandlerPool), name=(RequestHandlerAvgIdlePercent)><>MeanRate + - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>MeanRate + name: kafka_$1_$2_$3_percent + type: GAUGE + # Generic gauges for percents + - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*, (.+)=(.+)><>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + labels: + "$4": "$5" + # Generic per-second counters with 0-2 key/value pairs + - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+), (.+)=(.+)><>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+)><>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*><>Count + name: kafka_$1_$2_$3_total + type: COUNTER + # Generic gauges with 0-2 key/value pairs + - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Value + name: kafka_$1_$2_$3 + type: GAUGE + # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's. + # Note that these are missing the '_sum' metric! + - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + quantile: "0.$8" + - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + quantile: "0.$6" + - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Count + name: kafka_$1_$2_$3_count + type: COUNTER + - pattern: kafka.(\w+)<type=(.+), name=(.+)><>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + quantile: "0.$4"
\ No newline at end of file diff --git a/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml b/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml new file mode 100644 index 0000000000..6a1eab7825 --- /dev/null +++ b/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml @@ -0,0 +1,44 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +lowercaseOutputName: true +rules: + # replicated Zookeeper + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+)><>(\\w+)" + name: "zookeeper_$2" + type: GAUGE + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+)><>(\\w+)" + name: "zookeeper_$3" + type: GAUGE + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(Packets\\w+)" + name: "zookeeper_$4" + type: COUNTER + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(\\w+)" + name: "zookeeper_$4" + type: GAUGE + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+), name3=(\\w+)><>(\\w+)" + name: "zookeeper_$4_$5" + type: GAUGE + labels: + replicaId: "$2" + memberType: "$3"
\ No newline at end of file diff --git a/kubernetes/strimzi/templates/configmap.yaml b/kubernetes/strimzi/templates/configmap.yaml new file mode 100644 index 0000000000..ace51f78ba --- /dev/null +++ b/kubernetes/strimzi/templates/configmap.yaml @@ -0,0 +1,21 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +{{- if .Values.metrics.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: {{- include "common.resourceMetadata" . | nindent 2 }} +data: {{ tpl (.Files.Glob "resources/metrics/*").AsConfig . | nindent 2 }} +{{ end }} diff --git a/kubernetes/strimzi/templates/kafka-rebalance.yaml b/kubernetes/strimzi/templates/kafka-rebalance.yaml new file mode 100644 index 0000000000..6d5f143220 --- /dev/null +++ b/kubernetes/strimzi/templates/kafka-rebalance.yaml @@ -0,0 +1,24 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +{{- if .Values.cruiseControl.kafkaRebalance.enabled }} +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaRebalance +metadata: + name: {{ include "common.fullname" . }}-kafka-rebalance + labels: + strimzi.io/cluster: {{ include "common.release" . }}-strimzi +spec: {} +{{- end }} diff --git a/kubernetes/strimzi/templates/pod-monitor.yaml b/kubernetes/strimzi/templates/pod-monitor.yaml new file mode 100644 index 0000000000..be288a4d75 --- /dev/null +++ b/kubernetes/strimzi/templates/pod-monitor.yaml @@ -0,0 +1,45 @@ +{{/* +# Copyright (c) 2023 Deutsche Telekom +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.. +*/}} +{{- if .Values.metrics.podMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ include "common.fullname" . }}-podmonitor + ## podMonitor labels for prometheus to pick up the podMonitor + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor + ## + # labels: + # prometheus: kube-prometheus + labels: {{- toYaml $.Values.metrics.podMonitor.labels | nindent 4 }} +spec: + selector: + matchLabels: + strimzi.io/cluster: {{ include "common.release" . }}-strimzi + podMetricsEndpoints: + - port: {{ .Values.metrics.podMonitor.port }} + {{- if .Values.metrics.podMonitor.relabelings }} + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig + ## Value is evalued as a template + relabelings: {{- toYaml .Values.metrics.podMonitor.relabelings | nindent 6 }} + {{- end }} + {{- if .Values.metrics.podMonitor.metricRelabelings }} + metricRelabelings: {{- toYaml .Values.metrics.podMonitor.metricRelabelings | nindent 6 }} + ## MetricRelabelConfigs to apply to samples before ingestion + ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig + ## Value is evalued as a template + {{- end }} +{{- end }} diff --git a/kubernetes/strimzi/templates/strimzi-kafka.yaml b/kubernetes/strimzi/templates/strimzi-kafka.yaml index 3ce7b1d627..421d93a6cb 100644 --- a/kubernetes/strimzi/templates/strimzi-kafka.yaml +++ b/kubernetes/strimzi/templates/strimzi-kafka.yaml @@ -89,6 +89,14 @@ spec: size: {{ .Values.persistence.kafka.size }} deleteClaim: true class: {{ include "common.storageClass" (dict "dot" . "suffix" "kafka" "persistenceInfos" .Values.persistence.kafka) }} + {{- if .Values.metrics.kafkaExporter.enabled }} + metricsConfig: + type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }} + valueFrom: + configMapKeyRef: + name: {{ include "common.fullname" . }} + key: kafka-metrics-config.yml + {{- end }} zookeeper: template: pod: @@ -107,7 +115,43 @@ spec: size: {{ .Values.persistence.zookeeper.size }} deleteClaim: true class: {{ include "common.storageClass" (dict "dot" . "suffix" "zk" "persistenceInfos" .Values.persistence.zookeeper) }} + {{- if .Values.metrics.kafkaExporter.enabled }} + metricsConfig: + type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }} + valueFrom: + configMapKeyRef: + name: {{ include "common.fullname" . }} + key: zookeeper-metrics-config.yml + {{- end }} entityOperator: topicOperator: {} userOperator: {} - + {{- if .Values.cruiseControl.enabled }} + cruiseControl: + metricsConfig: + type: {{ .Values.cruiseControl.metricsConfig.type }} + valueFrom: + configMapKeyRef: + name: {{ include "common.fullname" . }} + key: cruisecontrol-metrics-config.yml + {{- end }} + {{- if .Values.metrics.kafkaExporter.enabled }} + kafkaExporter: + topicRegex: {{ .Values.metrics.kafkaExporter.topicRegex }} + groupRegex: {{ .Values.metrics.kafkaExporter.groupRegex }} + resources: + requests: + cpu: {{ .Values.metrics.kafkaExporter.resources.requests.cpu }} + memory: {{ .Values.metrics.kafkaExporter.resources.requests.memory }} + limits: + cpu: {{ .Values.metrics.kafkaExporter.resources.limits.cpu }} + memory: {{ .Values.metrics.kafkaExporter.resources.limits.memory }} + logging: {{ .Values.metrics.kafkaExporter.logging }} + enableSaramaLogging: {{ .Values.metrics.kafkaExporter.enableSaramaLogging }} + readinessProbe: + initialDelaySeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.timeoutSeconds }} + livenessProbe: + initialDelaySeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.initialDelaySeconds }} + timeoutSeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.timeoutSeconds }} + {{- end }} diff --git a/kubernetes/strimzi/values.yaml b/kubernetes/strimzi/values.yaml index 057f2003c7..8963cf3cda 100644 --- a/kubernetes/strimzi/values.yaml +++ b/kubernetes/strimzi/values.yaml @@ -90,6 +90,54 @@ ingress: exposedPort: *advertizedPortBroker2 exposedProtocol: TLS +# Kafka Exporter for metrics +metrics: + enabled: false + kafkaExporter: + enabled: false + metricsConfig: + type: jmxPrometheusExporter + topicRegex: ".*" + groupRegex: ".*" + resources: + requests: + cpu: 2000m + memory: 640Mi + limits: + cpu: 5000m + memory: 1280Mi + logging: debug + enableSaramaLogging: true + readinessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + livenessProbe: + initialDelaySeconds: 15 + timeoutSeconds: 5 + podMonitor: + # Prometheus pre requisite. Currently an optional addon in the OOM docs + enabled: false + # default port for strimzi metrics + port: "tcp-prometheus" + # podMonitor labels for prometheus to pick up the podMonitor + # dummy value + labels: + release: dummy + relabelings: [] + metricRelabelings: [] + +cruiseControl: +## Cruise Control provides a Kafka metrics reporter implementation +## once installed into the Kafka brokers, filters and records a wide range of metrics provided by the brokers themselves. +## pre requisite is having 2 or more broker nodes + enabled: false + metricsConfig: + type: jmxPrometheusExporter + ## Custom resource for Kafka that can rebalance your cluster + # ref. https://strimzi.io/blog/2020/06/15/cruise-control/ + kafkaRebalance: + enabled: false + ###################### # Component overrides ###################### |