{{/* # Copyright © 2019 Intel Corporation Inc # Modifications Copyright © 2024 Deutsche Telekom # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. */}} apiVersion: apps/v1 kind: StatefulSet metadata: {{- include "common.resourceMetadata" . | nindent 2 }} spec: selector: {{- include "common.selectors" . | nindent 4 }} serviceName: {{ include "common.servicename" . }} replicas: {{ .Values.replicaCount }} template: metadata: {{- include "common.templateMetadata" . | nindent 6 }} spec: {{- if .Values.affinity }} affinity: {{ toYaml .Values.affinity | indent 8 }} {{- end }} {{- if .Values.nodeSelector }} nodeSelector: {{ toYaml .Values.nodeSelector | indent 8 }} {{- end }} {{- if .Values.tolerations }} tolerations: {{ toYaml .Values.tolerations | indent 8 }} {{- end }} {{- include "common.imagePullSecrets" . | nindent 6 }} {{ include "common.podSecurityContext" . | indent 6 | trim }} containers: - name: {{ include "common.name" . }} image: {{ include "repositoryGenerator.googleK8sRepository" . }}/{{ .Values.image }} imagePullPolicy: "{{ .Values.pullPolicy }}" {{ include "common.containerSecurityContext" . | indent 10 | trim }} ports: - containerPort: {{ .Values.service.peerInternalPort }} name: {{ .Values.service.peerPortName }} - containerPort: {{ .Values.service.clientInternalPort }} name: {{ .Values.service.clientPortName }} {{- if eq .Values.liveness.enabled true }} livenessProbe: tcpSocket: port: {{ .Values.service.clientInternalPort }} initialDelaySeconds: {{ .Values.liveness.initialDelaySeconds }} periodSeconds: {{ .Values.liveness.periodSeconds }} timeoutSeconds: {{ .Values.liveness.timeoutSeconds }} {{ end -}} resources: {{ include "common.resources" . | nindent 10 }} env: - name: INITIAL_CLUSTER_SIZE value: {{ .Values.replicaCount | quote }} - name: SET_NAME value: {{ include "common.fullname" . }} - name: SERVICE_NAME value: {{ include "common.servicename" . }}.{{ include "common.namespace" . }}.svc.{{ .Values.global.clusterName }} {{- if .Values.extraEnv }} {{ toYaml .Values.extraEnv | indent 8 }} {{- end }} lifecycle: preStop: exec: command: - "/bin/sh" - "-ec" - | EPS="" for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SERVICE_NAME}:2379" done HOSTNAME=$(hostname) member_hash() { etcdctl member list | grep http://${HOSTNAME}.${SERVICE_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 } SET_ID=${HOSTNAME##*[^0-9]} if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then echo "Removing ${HOSTNAME} from etcd cluster" ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash) if [ $? -eq 0 ]; then # Remove everything otherwise the cluster will no longer scale-up rm -rf /var/run/etcd/* fi fi command: - "/bin/sh" - "-ec" - | HOSTNAME=$(hostname) # store member id into PVC for later member replacement collect_member() { while ! etcdctl member list &>/dev/null; do sleep 1; done etcdctl member list | grep http://${HOSTNAME}.${SERVICE_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id exit 0 } eps() { EPS="" for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SERVICE_NAME}:2379" done echo ${EPS} } member_hash() { etcdctl member list | grep http://${HOSTNAME}.${SERVICE_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 } # we should wait for other pods to be up before trying to join # otherwise we got "no such host" errors when trying to resolve other members for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do if [ "${SET_NAME}-${i}" == "${HOSTNAME}" ]; then echo "Skipping self-checking" continue fi while true; do echo "Waiting for ${SET_NAME}-${i}.${SERVICE_NAME} to come up" ping -W 1 -c 1 ${SET_NAME}-${i}.${SERVICE_NAME} > /dev/null && break sleep 1s done done # re-joining after failure? if [[ -e /var/run/etcd/default.etcd && -f /var/run/etcd/member_id ]]; then echo "Re-joining etcd member" member_id=$(cat /var/run/etcd/member_id) # re-join member ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SERVICE_NAME}:2380 | true exec etcd --name ${HOSTNAME} \ --listen-peer-urls http://0.0.0.0:2380 \ --listen-client-urls http://0.0.0.0:2379\ --advertise-client-urls http://${HOSTNAME}.${SERVICE_NAME}:2379 \ --data-dir /var/run/etcd/default.etcd fi # etcd-SET_ID SET_ID=${HOSTNAME##*[^0-9]} # adding a new member to existing cluster (assuming all initial pods are available) if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then export ETCDCTL_ENDPOINT=$(eps) # member already added? MEMBER_HASH=$(member_hash) if [ -n "${MEMBER_HASH}" ]; then # the member hash exists but for some reason etcd failed # as the datadir has not be created, we can remove the member # and retrieve new hash etcdctl member remove ${MEMBER_HASH} fi echo "Adding new member" etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SERVICE_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs if [ $? -ne 0 ]; then echo "Exiting" rm -f /var/run/etcd/new_member_envs exit 1 fi cat /var/run/etcd/new_member_envs . /var/run/etcd/new_member_envs collect_member & exec etcd --name ${HOSTNAME} \ --listen-peer-urls http://0.0.0.0:2380 \ --listen-client-urls http://0.0.0.0:2379 \ --advertise-client-urls http://${HOSTNAME}.${SERVICE_NAME}:2379 \ --data-dir /var/run/etcd/default.etcd \ --initial-advertise-peer-urls http://${HOSTNAME}.${SERVICE_NAME}:2380 \ --initial-cluster ${ETCD_INITIAL_CLUSTER} \ --initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE} fi PEERS="" for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SERVICE_NAME}:2380" done collect_member & # join member exec etcd --name ${HOSTNAME} \ --initial-advertise-peer-urls http://${HOSTNAME}.${SERVICE_NAME}:2380 \ --listen-peer-urls http://0.0.0.0:2380 \ --listen-client-urls http://0.0.0.0:2379 \ --advertise-client-urls http://${HOSTNAME}.${SERVICE_NAME}:2379 \ --initial-cluster-token etcd-cluster-1 \ --initial-cluster ${PEERS} \ --initial-cluster-state new \ --data-dir /var/run/etcd/default.etcd volumeMounts: - name: {{ include "common.fullname" . }}-data mountPath: /var/run/etcd {{- if .Values.persistence.enabled }} volumeClaimTemplates: - metadata: name: {{ include "common.fullname" . }}-data labels: name: {{ include "common.fullname" . }} chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" release: "{{ include "common.release" . }}" heritage: "{{ .Release.Service }}" spec: accessModes: - "{{ .Values.persistence.accessMode }}" storageClassName: {{ include "common.storageClass" . }} resources: requests: # upstream recommended max is 700M storage: "{{ .Values.persistence.storage }}" {{- else }} volumes: - name: {{ include "common.fullname" . }}-data {{- if .Values.memoryMode }} emptyDir: medium: Memory {{- else }} emptyDir: {} {{- end }} {{- end }}