Fix issue with etcd pod startup

Etcd pod fails to start with file not found error. The error is being caused because the pod is stuck in an infinite loop as the readiness probe is not true. Since, we are anyway checking the pod status, we removed the readiness probe as it is not needed. Bumped up version of etcd to 3.2.24 This fixes a known issue: https://github.com/etcd-io/etcd/pull/4861 Issue-ID: MULTICLOUD-660 Change-Id: I815766b4a8f187d88bb2fcdb71e9d6e24b277d25 Signed-off-by: Kiran Kamineni <kiran.k.kamineni@intel.com>
author: Kiran Kamineni <kiran.k.kamineni@intel.com> 2019-05-28 13:19:32 -0700
committer: Kiran Kamineni <kiran.k.kamineni@intel.com> 2019-06-04 12:08:07 -0700
commit: f02245c6cd6a5cf32e75d4671e923afa3e08b651 (patch)
tree: 2e6b826bfeb905a0ff8f3a67641adda8a4bb871c /kubernetes/common/etcd/templates
parent: 34af0b56bfa82db322848ba50ff7539f6d085a69 (diff)
1 files changed, 7 insertions, 13 deletions
diff --git a/kubernetes/common/etcd/templates/statefulset.yaml b/kubernetes/common/etcd/templates/statefulset.yaml
index 8b6a53454e..7190c5bca0 100644
--- a/kubernetes/common/etcd/templates/statefulset.yaml
+++ b/kubernetes/common/etcd/templates/statefulset.yaml
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 apiVersion: apps/v1beta1
 kind: StatefulSet
 metadata:
@@ -55,17 +54,12 @@ spec:
           name: {{ .Values.service.clientPortName }}
         {{- if eq .Values.liveness.enabled true }}
         livenessProbe:
-          exec:
-            command: ["/bin/sh", "-c", "etcdctl cluster-health | grep -w healthy" ]
-            initialDelaySeconds: {{ .Values.liveness.initialDelaySeconds }}
-            periodSeconds: {{ .Values.liveness.periodSeconds }}
-            timeoutSeconds: {{ .Values.liveness.timeoutSeconds }}
-          {{ end -}}
-        readinessProbe:
-          exec:
-            command: ["/bin/sh", "-c", "etcdctl cluster-health | grep -w healthy" ]
-            initialDelaySeconds: {{ .Values.readiness.initialDelaySeconds }}
-            periodSeconds: {{ .Values.readiness.periodSeconds }}
+          tcpSocket:
+            port: {{ .Values.service.clientInternalPort }}
+          initialDelaySeconds: {{ .Values.liveness.initialDelaySeconds }}
+          periodSeconds: {{ .Values.liveness.periodSeconds }}
+          timeoutSeconds: {{ .Values.liveness.timeoutSeconds }}
+        {{ end -}}
         resources:
 {{ include "common.resources" . | indent 10 }}
         env:
@@ -142,7 +136,7 @@ spec:
             done
 
             # re-joining after failure?
-            if [ -e /var/run/etcd/default.etcd ]; then
+            if [[ -e /var/run/etcd/default.etcd && -f /var/run/etcd/member_id ]]; then
                 echo "Re-joining etcd member"
                 member_id=$(cat /var/run/etcd/member_id)
author	Kiran Kamineni <kiran.k.kamineni@intel.com>	2019-05-28 13:19:32 -0700
committer	Kiran Kamineni <kiran.k.kamineni@intel.com>	2019-06-04 12:08:07 -0700
commit	f02245c6cd6a5cf32e75d4671e923afa3e08b651 (patch)
tree	2e6b826bfeb905a0ff8f3a67641adda8a4bb871c /kubernetes/common/etcd/templates
parent	34af0b56bfa82db322848ba50ff7539f6d085a69 (diff)