From b679d7bfa1ff3bdba9d70fee6beecdbf9f45d015 Mon Sep 17 00:00:00 2001 From: Sebastien Premont-Tendland Date: Mon, 17 Feb 2020 11:32:15 -0500 Subject: Cluster Distributed lock service integration with OOM. Disabled by default. In order to enable cluster replicaCount should be higher than 2 and useScriptCompileCache is set to false. We need to disable script compile cache otherwise there is issue with updating CBA when running multiple replicas of blueprint processor. Issue-ID: CCSDK-2011 Signed-off-by: Sebastien Premont-Tendland Change-Id: I6f6071556eb499832f9a765ba4c27100497c6e88 --- .../resources/config/hazelcast.yaml | 35 ++++++++++++++++++++++ .../templates/deployment.yaml | 31 +++++++++++++++++++ .../templates/service.yaml | 24 +++++++++++++++ .../charts/cds-blueprints-processor/values.yaml | 17 +++++++++++ 4 files changed, 107 insertions(+) create mode 100755 kubernetes/cds/charts/cds-blueprints-processor/resources/config/hazelcast.yaml (limited to 'kubernetes/cds/charts/cds-blueprints-processor') diff --git a/kubernetes/cds/charts/cds-blueprints-processor/resources/config/hazelcast.yaml b/kubernetes/cds/charts/cds-blueprints-processor/resources/config/hazelcast.yaml new file mode 100755 index 0000000000..3a3a1ce095 --- /dev/null +++ b/kubernetes/cds/charts/cds-blueprints-processor/resources/config/hazelcast.yaml @@ -0,0 +1,35 @@ +hazelcast: + cp-subsystem: + cp-member-count: {{ .Values.replicaCount }} + group-size: {{ .Values.cluster.groupSize }} + session-time-to-live-seconds: 10 + session-heartbeat-interval-seconds: 5 + missing-cp-member-auto-removal-seconds: 120 + fail-on-indeterminate-operation-state: false + raft-algorithm: + leader-election-timeout-in-millis: 2000 + leader-heartbeat-period-in-millis: 5000 + max-missed-leader-heartbeat-count: 5 + append-request-max-entry-count: 50 + commit-index-advance-count-to-snapshot: 1000 + uncommitted-entry-count-to-reject-new-appends: 100 + append-request-backoff-timeout-in-millis: 100 + network: + enabled: true + rest-api: + enabled: true + endpoint-groups: + HEALTH_CHECK: + enabled: true + CP: + enabled: true + join: + multicast: + enabled: false + kubernetes: + enabled: true + namespace: {{ include "common.namespace" . }} + service-name: {{ include "common.servicename" . }}-cluster + resolve-not-ready-addresses: true + # service-label-name: MY-SERVICE-LABEL-NAME + # service-label-value: MY-SERVICE-LABEL-VALUE diff --git a/kubernetes/cds/charts/cds-blueprints-processor/templates/deployment.yaml b/kubernetes/cds/charts/cds-blueprints-processor/templates/deployment.yaml index a90e4d7b68..749e9a4637 100755 --- a/kubernetes/cds/charts/cds-blueprints-processor/templates/deployment.yaml +++ b/kubernetes/cds/charts/cds-blueprints-processor/templates/deployment.yaml @@ -24,6 +24,18 @@ metadata: heritage: {{ .Release.Service }} spec: replicas: {{ .Values.replicaCount }} + strategy: + type: RollingUpdate + rollingUpdate: + # This allow a new pod to be ready before terminating the old one + # causing no downtime when replicas is set to 1 + maxUnavailable: 0 + + # maxSurge to 1 is very important for the hazelcast integration + # we only want one pod at a time to restart not multiple + # and break the hazelcast cluster. We should not use % maxSurge value + # ref : https://hazelcast.com/blog/rolling-upgrade-hazelcast-imdg-on-kubernetes/ + maxSurge: 1 template: metadata: labels: @@ -56,9 +68,23 @@ spec: env: - name: APP_CONFIG_HOME value: {{ .Values.config.appConfigDir }} + - name: USE_SCRIPT_COMPILE_CACHE + value: {{ .Values.config.useScriptCompileCache | quote }} + # Cluster should only be enabled when replicaCount is more than 2 and useScriptCompileCache is set to false otherwise it won't work properly + - name: CLUSTER_ENABLED + value: {{ if and (gt (int (.Values.replicaCount)) 2) (not .Values.config.useScriptCompileCache) }} {{ .Values.cluster.enabled | quote }} {{ else }} "false" {{ end }} + - name: CLUSTER_ID + value: {{ .Values.cluster.clusterName }} + - name: CLUSTER_NODE_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CLUSTER_CONFIG_FILE + value: {{ .Values.config.appConfigDir }}/hazelcast.yaml ports: - containerPort: {{ .Values.service.http.internalPort }} - containerPort: {{ .Values.service.grpc.internalPort }} + - containerPort: {{ .Values.service.cluster.internalPort }} # disable liveness probe when breakpoints set in debugger # so K8s doesn't restart unresponsive container {{ if .Values.liveness.enabled }} @@ -93,6 +119,9 @@ spec: - mountPath: {{ .Values.config.appConfigDir }}/logback.xml name: {{ include "common.fullname" . }}-config subPath: logback.xml + - mountPath: {{ .Values.config.appConfigDir }}/hazelcast.yaml + name: {{ include "common.fullname" . }}-config + subPath: hazelcast.yaml - mountPath: {{ .Values.config.appConfigDir }}/ONAP_RootCA.cer name: {{ include "common.fullname" . }}-config @@ -122,6 +151,8 @@ spec: path: application.properties - key: logback.xml path: logback.xml + - key: hazelcast.yaml + path: hazelcast.yaml - key: ONAP_RootCA.cer path: ONAP_RootCA.cer - name: {{ include "common.fullname" . }}-blueprints diff --git a/kubernetes/cds/charts/cds-blueprints-processor/templates/service.yaml b/kubernetes/cds/charts/cds-blueprints-processor/templates/service.yaml index 411df8631c..a6a21a6a01 100755 --- a/kubernetes/cds/charts/cds-blueprints-processor/templates/service.yaml +++ b/kubernetes/cds/charts/cds-blueprints-processor/templates/service.yaml @@ -56,3 +56,27 @@ spec: selector: app: {{ include "common.name" . }} release: {{ include "common.release" . }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "common.servicename" . }}-cluster + namespace: {{ include "common.namespace" . }} + labels: + app: {{ include "common.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: +spec: + type: {{ .Values.service.cluster.type }} + ports: + - port: {{ .Values.service.cluster.externalPort }} + targetPort: {{ .Values.service.cluster.internalPort }} + {{- if eq .Values.service.cluster.type "NodePort"}} + nodePort: {{ .Values.global.nodePortPrefixExt | default .Values.nodePortPrefixExt }}{{ .Values.service.cluster.nodePort }} + {{- end}} + name: {{ .Values.service.cluster.portName | default "cluster" }} + selector: + app: {{ include "common.name" . }} + release: {{ .Release.Name }} diff --git a/kubernetes/cds/charts/cds-blueprints-processor/values.yaml b/kubernetes/cds/charts/cds-blueprints-processor/values.yaml index 781b5383fe..35661c29e9 100755 --- a/kubernetes/cds/charts/cds-blueprints-processor/values.yaml +++ b/kubernetes/cds/charts/cds-blueprints-processor/values.yaml @@ -47,6 +47,7 @@ debugEnabled: false # application configuration config: appConfigDir: /opt/app/onap/config + useScriptCompileCache: true # default number of instances replicaCount: 1 @@ -85,6 +86,11 @@ service: portName: blueprints-processor-grpc internalPort: 9111 externalPort: 9111 + cluster: + type: ClusterIP + portName: blueprints-processor-cluster + internalPort: 5701 + externalPort: 5701 persistence: volumeReclaimPolicy: Retain @@ -94,6 +100,17 @@ persistence: mountSubPath: cds/blueprints/deploy deployedBlueprint: /opt/app/onap/blueprints/deploy +cluster: + # Cannot have cluster enabled if the replicaCount is not at least 3 + # AND config value useScriptCompileCache is not set to false + enabled: false + + clusterName: cds-cluster + + # Defines the number of node to be part of the CP subsystem/raft algorithm. This value should be + # between 3 and 7 only. + groupSize: 3 + ingress: enabled: false service: -- cgit 1.2.3-korg