From 58774777260192181015ad388fa2b446fd7ca31b Mon Sep 17 00:00:00 2001 From: "Timoney, Dan (dt5972)" Date: Wed, 21 Aug 2019 16:50:54 -0400 Subject: Tune OpenDaylight parameters Add files to tune akka settings in OpenDaylight. Also, added setenv to tune Java settings. Change-Id: I2071fceb48f990929cd9f4c885bd0c9f7db0263c Issue-ID: SDNC-858 Signed-off-by: Timoney, Dan (dt5972) --- kubernetes/sdnc/resources/config/conf/akka.conf | 80 +++++++++++++++ ...g.opendaylight.controller.cluster.datastore.cfg | 107 +++++++++++++++++++++ kubernetes/sdnc/resources/config/conf/setenv | 61 ++++++++++++ kubernetes/sdnc/templates/statefulset.yaml | 9 ++ kubernetes/sdnc/values.yaml | 26 +++++ 5 files changed, 283 insertions(+) create mode 100644 kubernetes/sdnc/resources/config/conf/akka.conf create mode 100644 kubernetes/sdnc/resources/config/conf/org.opendaylight.controller.cluster.datastore.cfg create mode 100644 kubernetes/sdnc/resources/config/conf/setenv (limited to 'kubernetes') diff --git a/kubernetes/sdnc/resources/config/conf/akka.conf b/kubernetes/sdnc/resources/config/conf/akka.conf new file mode 100644 index 0000000000..8440a9de56 --- /dev/null +++ b/kubernetes/sdnc/resources/config/conf/akka.conf @@ -0,0 +1,80 @@ + +odl-cluster-data { + akka { + remote { + artery { + enabled = off + canonical.hostname = "127.0.0.1" + canonical.port = 2550 + } + netty.tcp { + hostname = "127.0.0.1" + port = 2550 + } + + use-passive-connections = off + # when under load we might trip a false positive on the failure detector + # transport-failure-detector { + # heartbeat-interval = 4 s + # acceptable-heartbeat-pause = 16s + # } + } + + actor { + debug { + autoreceive = on + lifecycle = on + unhandled = on + fsm = on + event-stream = on + } + } + + cluster { + # Remove ".tcp" when using artery. + seed-nodes = ["akka.tcp://opendaylight-cluster-data@127.0.0.1:2550"] + + seed-node-timeout = {{.Values.config.odl.akka.seedNodeTimeout}} + + roles = ["member-1"] + + } + + persistence { + # By default the snapshots/journal directories live in KARAF_HOME. You can choose to put it somewhere else by + # modifying the following two properties. The directory location specified may be a relative or absolute path. + # The relative path is always relative to KARAF_HOME. + + # snapshot-store.local.dir = "target/snapshots" + # journal.leveldb.dir = "target/journal" + + journal { + leveldb { + # Set native = off to use a Java-only implementation of leveldb. + # Note that the Java-only version is not currently considered by Akka to be production quality. + + # native = off + } + + journal-plugin-fallback { + circuit-breaker { + max-failures = {{.Values.config.odl.akka.circuitBreaker.maxFailures}} + call-timeout = {{.Values.config.odl.akka.circuitBreaker.callTimeout}} + reset-timeout = {{.Values.config.odl.akka.circuitBreaker.resetTimeout}} + } + recovery-event-timeout = {{.Values.config.odl.akka.recoveryEventTimeout}} + } + + snapshot-store-plugin-fallback { + circuit-breaker { + max-failures = {{.Values.config.odl.akka.circuitBreaker.maxFailures}} + call-timeout = {{.Values.config.odl.akka.circuitBreaker.callTimeout}} + reset-timeout = {{.Values.config.odl.akka.circuitBreaker.resetTimeout}} + } + recovery-event-timeout = {{.Values.config.odl.akka.recoveryEventTimeout}} + } + } + } + } +} + diff --git a/kubernetes/sdnc/resources/config/conf/org.opendaylight.controller.cluster.datastore.cfg b/kubernetes/sdnc/resources/config/conf/org.opendaylight.controller.cluster.datastore.cfg new file mode 100644 index 0000000000..29dd0e54dd --- /dev/null +++ b/kubernetes/sdnc/resources/config/conf/org.opendaylight.controller.cluster.datastore.cfg @@ -0,0 +1,107 @@ +# This file specifies property settings for the clustered data store to control its behavior. A +# property may be applied to every data store type ("config" and "operational") or can be customized +# differently for each data store type by prefixing the data store type + '.'. For example, specifying +# the "shard-election-timeout-factor" property would be applied to both data stores whereas specifying +# "operational.shard-election-timeout-factor" would only apply to the "operational" data store. Similarly, +# specifying "config.shard-election-timeout-factor" would only apply to the "config" data store. + +# The multiplication factor to be used to determine shard election timeout. The shard election timeout +# is determined by multiplying shardHeartbeatIntervalInMillis with the shardElectionTimeoutFactor. +shard-election-timeout-factor=20 + +# The interval at which a shard will send a heart beat message to its remote shard. +#shard-heartbeat-interval-in-millis=500 + +# The amount by which to divide election timeout in case of a candidate. This serves as a counter-balance +# to shard-election-timeout-factor. The default value is 1, i.e. election timeout is the same in all +# situations. +#shard-candidate-election-timeout-divisor=1 + +# The maximum amount of time to wait for a shard to elect a leader before failing an operation (eg transaction create). +#shard-leader-election-timeout-in-seconds=30 + +# Enable or disable data persistence. +#persistent=true + +# Disable persistence for the operational data store by default. +operational.persistent=false + +# The maximum amount of time a shard transaction can be idle without receiving any messages before it self-destructs. +#shard-transaction-idle-timeout-in-minutes=10 + +# The maximum amount of time a shard transaction three-phase commit can be idle without receiving the +# next messages before it aborts the transaction. +#shard-transaction-commit-timeout-in-seconds=30 + +# The maximum allowed capacity for each shard's transaction commit queue. +#shard-transaction-commit-queue-capacity=20000 + +# The maximum amount of time to wait for a shard to initialize from persistence on startup before +# failing an operation (eg transaction create and change listener registration). +#shard-initialization-timeout-in-seconds=300 + +# The minimum number of entries to be present in the in-memory journal log before a snapshot is to be taken. +#shard-snapshot-batch-count=20000 + +# The percentage of Runtime.totalMemory() used by the in-memory journal log before a snapshot is to be taken. +#shard-snapshot-data-threshold-percentage=12 + +# The interval at which the leader of the shard will check if its majority followers are active and +# term itself as isolated. +#shard-isolated-leader-check-interval-in-millis=5000 + +# The number of transaction modification operations (put, merge, delete) to batch before sending to the +# shard transaction actor. Batching improves performance as less modifications messages are sent to the +# actor and thus lessens the chance that the transaction actor's mailbox queue could get full. +#shard-batched-modification-count=1000 + +# The maximum amount of time for akka operations (remote or local) to complete before failing. +#operation-timeout-in-seconds=5 + +# The initial number of transactions per second that are allowed before the data store should begin +# applying back pressure. This number is only used as an initial guidance, subsequently the datastore +# measures the latency for a commit and auto-adjusts the rate limit. +#transaction-creation-initial-rate-limit=100 + +# The maximum thread pool size for each shard's data store data change notification executor. +#max-shard-data-change-executor-pool-size=20 + +# The maximum queue size for each shard's data store data change notification executor. +#max-shard-data-change-executor-queue-size=1000 + +# The maximum queue size for each shard's data store data change listener. +#max-shard-data-change-listener-queue-size=1000 + +# The maximum queue size for each shard's data store executor. +#max-shard-data-store-executor-queue-size=5000 + +# A fully qualified java class name. The class should implement +# org.opendaylight.controller.cluster.raft.policy.RaftPolicy. This java class should be +# accessible to the distributed data store OSGi module so that it can be dynamically loaded via +# reflection. For now let's assume that these classes to customize raft behaviors should be +# present in the distributed data store module itself. If this property is set to a class which +# cannot be found then the default raft policy will be applied +#custom-raft-policy-implementation= + +# When fragmenting messages thru the akka remoting framework, this is the maximum size in bytes +# for a message slice. +#maximum-message-slice-size=20480000 + +# Enable tell-based protocol between frontend (applications) and backend (shards). Using this protocol +# should avoid AskTimeoutExceptions seen under heavy load. Defaults to false (use ask-based protocol). +#use-tell-based-protocol=true + +# Tune the maximum number of entries a follower is allowed to lag behind the leader before it is +# considered out-of-sync. This flag may require tuning in face of a large number of small transactions. +#sync-index-threshold=10 + +# Record new transaction allocation stack trace, useful for debugging. This makes the log include +# the stack trace of the creator of the Tx when there is an exception when the transaction is submitted +# (e.g. for a failed validation). Defaults to false due to performance impact. +#transaction-debug-context-enabled=true +persistent-actor-restart-min-backoff-in-seconds={{.Values.config.odl.datastore.persistentActorRestartMinBackoffInSeconds}} +persistent-actor-restart-max-backoff-in-seconds={{.Values.config.odl.datastore.persistentActorRestartMaxBackoffInSeconds}} +persistent-actor-restart-reset-backoff-in-seconds={{.Values.config.odl.datastore.persistentActorRestartResetBackoffInSeconds}} +shard-transaction-commit-timeout-in-seconds={{.Values.config.odl.datastore.shardTransactionCommitTimeoutInSeconds}} +shard-isolated-leader-check-interval-in-millis={{.Values.config.odl.datastore.shardIsolatedLeaderCheckIntervalInMillis}} +operation-timeout-in-seconds={{.Values.config.odl.datastore.operationTimeoutInSeconds}} diff --git a/kubernetes/sdnc/resources/config/conf/setenv b/kubernetes/sdnc/resources/config/conf/setenv new file mode 100644 index 0000000000..7476e6849a --- /dev/null +++ b/kubernetes/sdnc/resources/config/conf/setenv @@ -0,0 +1,61 @@ +#!/bin/sh +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# handle specific scripts; the SCRIPT_NAME is exactly the name of the Karaf +# script: client, instance, shell, start, status, stop, karaf +# +# if [ "${KARAF_SCRIPT}" == "SCRIPT_NAME" ]; then +# Actions go here... +# fi + +# +# general settings which should be applied for all scripts go here; please keep +# in mind that it is possible that scripts might be executed more than once, e.g. +# in example of the start script where the start script is executed first and the +# karaf script afterwards. +# + +# +# The following section shows the possible configuration options for the default +# karaf scripts +# +# export JAVA_HOME # Location of Java installation +# export JAVA_MIN_MEM # Minimum memory for the JVM +# export JAVA_MAX_MEM # Maximum memory for the JVM +# export JAVA_PERM_MEM # Minimum perm memory for the JVM +# export JAVA_MAX_PERM_MEM # Maximum perm memory for the JVM +# export EXTRA_JAVA_OPTS # Additional JVM options +# export KARAF_HOME # Karaf home folder +# export KARAF_DATA # Karaf data folder +# export KARAF_BASE # Karaf base folder +# export KARAF_ETC # Karaf etc folder +# export KARAF_SYSTEM_OPTS # First citizen Karaf options +# export KARAF_OPTS # Additional available Karaf options +# export KARAF_DEBUG # Enable debug mode +# export KARAF_REDIRECT # Enable/set the std/err redirection when using bin/start +# export KARAF_NOROOT # Prevent execution as root if set to true +if [ "x$JAVA_MAX_MEM" = "x" ]; then + export JAVA_MAX_MEM="2048m" +fi + +EXTRA_JAVA_OPTS: "-XX:+UseG1GC -XX:MaxGCPauseMillis={{.Values.config.odl.javaOptions.maxGCPauseMillis}} \ + -XX:ParallelGCThreads={{.Values.config.odl.javaOptions.parallelGCThreads}} -XX:+ParallelRefProcEnabled \ + -XX:+UseStringDeduplication -XX:+PrintGC -XX:+PrintGCDateStamps -XX:+PrintGCDetails \ + -XX:+PrintGCTimeStamps -XX:+UseGCLogFileRotation \ + -XX:NumberOfGCLogFiles={{.Values.config.odl.javaOptions.numberGGLogFiles}} -Xloggc:/var/log/onap/sdnc/gc-%t.log" diff --git a/kubernetes/sdnc/templates/statefulset.yaml b/kubernetes/sdnc/templates/statefulset.yaml index d60319fa1c..e3ac50fc61 100644 --- a/kubernetes/sdnc/templates/statefulset.yaml +++ b/kubernetes/sdnc/templates/statefulset.yaml @@ -145,6 +145,15 @@ spec: name: logs - mountPath: {{ .Values.certpersistence.certPath }} name: {{ include "common.fullname" . }}-certs + - mountPath: {{ .Values.config.odl.salConfigDir }}/{{ .Values.config.odl.salConfigVersion}}/sal-clustering-config-{{ .Values.config.odl.salConfigVersion}}-akkaconf.xml + name: properties + subPath: akka.conf + - mountPath: {{ .Values.config.odl.etcDir }}/org.opendaylight.controller.cluster.datastore.cfg + name: properties + subPath: org.opendaylight.controller.cluster.datastore.cfg + - mountPath: {{ .Values.config.odl.binDir }}/setenv + name: properties + subPath: setenv resources: {{ include "common.resources" . | indent 12 }} {{- if .Values.nodeSelector }} diff --git a/kubernetes/sdnc/values.yaml b/kubernetes/sdnc/values.yaml index 76608d73de..dfad46ac79 100644 --- a/kubernetes/sdnc/values.yaml +++ b/kubernetes/sdnc/values.yaml @@ -66,6 +66,32 @@ config: ansiblePort: 8000 javaHome: /usr/lib/jvm/java-1.8-openjdk + odl: + etcDir: /opt/opendaylight/etc + binDir: /opt/opendaylight/bin + salConfigDir: /opt/opendaylight/system/org/opendaylight/controller/sal-clustering-config + salConfigVersion: 1.8.2 + akka: + seedNodeTimeout: 15s + circuitBreaker: + maxFailures: 10 + callTimeout: 90s + resetTimeout: 30s + recoveryEventTimeout: 90s + datastore: + persistentActorRestartMinBackoffInSeconds: 10 + persistentActorRestartMaxBackoffInSeconds: 40 + persistentActorRestartResetBackoffInSeconds: 20 + shardTransactionCommitTimeoutInSeconds: 120 + shardIsolatedLeaderCheckIntervalInMillis: 30000 + operationTimeoutInSeconds: 120 + javaOptions: + maxGCPauseMillis: 100 + parallelGCThreads : 3 + numberGGLogFiles: 10 + + + #local Mariadb-galera cluster localDBCluster: false -- cgit 1.2.3-korg