From 9d98267fe7046c304a71f98e89de6774d09f094e Mon Sep 17 00:00:00 2001 From: Rajamohan Raj Date: Thu, 11 Apr 2019 00:36:20 +0000 Subject: Add helm charts for horovod based app Tasks in this patch: Create sample Tensorflow MNIST application using horovod: https://jira.onap.org/browse/ONAPARC-450 Change-Id: I50827a47f7c48574944cde28fca32f265b649b59 Issue-ID: ONAPARC-450 Signed-off-by: Rajamohan Raj --- .../charts/sample-horovod-app/templates/NOTES.txt | 5 + .../sample-horovod-app/templates/_helpers.tpl | 32 +++++ .../sample-horovod-app/templates/config.yaml | 130 +++++++++++++++++++++ .../sample-horovod-app/templates/job-service.yaml | 19 +++ .../charts/sample-horovod-app/templates/job.yaml | 126 ++++++++++++++++++++ .../sample-horovod-app/templates/secrets.yaml | 15 +++ .../templates/statefulset-service.yaml | 19 +++ .../sample-horovod-app/templates/statefulset.yaml | 115 ++++++++++++++++++ 8 files changed, 461 insertions(+) create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/NOTES.txt create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/_helpers.tpl create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/config.yaml create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job-service.yaml create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job.yaml create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/secrets.yaml create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset-service.yaml create mode 100644 vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset.yaml (limited to 'vnfs/DAaaS/applications/charts/sample-horovod-app/templates') diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/NOTES.txt b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/NOTES.txt new file mode 100644 index 00000000..774555ae --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/NOTES.txt @@ -0,0 +1,5 @@ +1. Get the application URL by running these commands: + +*** NOTE: It may take a few minutes for the statefulset to be available + +*** you can watch the status of statefulset by running 'kubectl get sts --namespace {{ .Release.Namespace }} -w {{ template "horovod.fullname" . }}' *** \ No newline at end of file diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/_helpers.tpl b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/_helpers.tpl new file mode 100644 index 00000000..02071c0f --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/_helpers.tpl @@ -0,0 +1,32 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "horovod.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "horovod.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "horovod.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/config.yaml b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/config.yaml new file mode 100644 index 00000000..ae93c445 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/config.yaml @@ -0,0 +1,130 @@ +{{- $workerNum := .Values.worker.number -}} +{{- $name := include "horovod.fullname" . }} +{{- $slots := 1 }} +{{- if index .Values.resources "nvidia.com/gpu" }} +{{- $slots := index .Values.resources "nvidia.com/gpu" }} +{{- end }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "horovod.fullname" . }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: {{ template "horovod.chart" . }} + app: {{ template "horovod.fullname" . }} +data: + hostfile.config: | + {{ $name }}-master slots={{ $slots }} + {{- range $i, $none := until (int $workerNum) }} + {{ $name }}-{{ $i }}.{{ $name }} slots={{ $slots }} + {{- end }} + ssh.readiness: | + #!/bin/bash + set -xev + ssh localhost ls + master.run: | + #!/bin/bash + set -x + sleep 5 + + mkdir -p /root/.ssh + rm -f /root/.ssh/config + touch /root/.ssh/config + + if [ "$USESECRETS" == "true" ];then + set +e + yes | cp /etc/secret-volume/id_rsa /root/.ssh/id_rsa + yes | cp /etc/secret-volume/authorized_keys /root/.ssh/authorized_keys + set -e + fi + + if [ -n "$SSHPORT" ]; then + echo "Port $SSHPORT" > /root/.ssh/config + sed -i "s/^Port.*/Port $SSHPORT /g" /etc/ssh/sshd_config + fi + echo "StrictHostKeyChecking no" >> /root/.ssh/config + /usr/sbin/sshd + + if [ $# -eq 0 ]; then + sleep infinity + else + bash -c "$*" + fi + sleep 300 + master.waitWorkerReady: | + #!/bin/bash + set -xev + function updateSSHPort() { + mkdir -p /root/.ssh + rm -f /root/.ssh/config + touch /root/.ssh/config + + if [ -n "$SSHPORT" ]; then + echo "Port $SSHPORT" > /root/.ssh/config + echo "StrictHostKeyChecking no" >> /root/.ssh/config + fi + } + + function runCheckSSH() { + if [[ "$USESECRETS" == "true" ]];then + set +e + yes | cp /etc/secret-volume/id_rsa /root/.ssh/id_rsa + yes | cp /etc/secret-volume/authorized_keys /root/.ssh/authorized_keys + set -e + fi + + for i in `cat $1 | awk '{print $(1)}'`;do + if [[ "$i" != *"master" ]];then + retry 30 ssh -o ConnectTimeout=2 -q $i exit + fi + done + } + + function retry() + { + local n=0;local try=$1 + local cmd="${@: 2}" + [[ $# -le 1 ]] && { + echo "Usage $0 "; + } + set +e + until [[ $n -ge $try ]] + do + $cmd && break || { + echo "Command Fail.." + ((n++)) + echo "retry $n :: [$cmd]" + sleep 1; + } + done + $cmd + if [ $? -ne 0 ]; then + exit 1 + fi + set -e + } + updateSSHPort + runCheckSSH $1 + worker.run: | + #!/bin/bash + set -x + + mkdir -p /root/.ssh + rm -f /root/.ssh/config + touch /root/.ssh/config + + if [[ "$USESECRETS" == "true" ]];then + set +e + yes | cp /etc/secret-volume/id_rsa /root/.ssh/id_rsa + yes | cp /etc/secret-volume/authorized_keys /root/.ssh/authorized_keys + set -e + fi + + if [ -n "$SSHPORT" ]; then + echo "Port $SSHPORT" > /root/.ssh/config + sed -i "s/^Port.*/Port $SSHPORT /g" /etc/ssh/sshd_config + fi + echo "StrictHostKeyChecking no" >> /root/.ssh/config + + /usr/sbin/sshd -D diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job-service.yaml b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job-service.yaml new file mode 100644 index 00000000..e7b05c26 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "horovod.fullname" . }}-master + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + clusterIP: None + ports: + - name: ssh + port: {{ .Values.ssh.port }} + targetPort: {{ .Values.ssh.port }} + selector: + app: {{ template "horovod.name" . }} + release: {{ .Release.Name }} + role: master diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job.yaml b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job.yaml new file mode 100644 index 00000000..4e59b277 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/job.yaml @@ -0,0 +1,126 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "horovod.fullname" . }} + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + role: master +spec: + template: + metadata: + labels: + app: {{ template "horovod.name" . }} + release: {{ .Release.Name }} + role: master + spec: + {{- if .Values.useHostNetwork }} + hostNetwork: {{ .Values.useHostNetwork }} + dnsPolicy: ClusterFirstWithHostNet + {{- end }} + {{- if .Values.useHostPID }} + hostPID: {{ .Values.useHostPID }} + {{- end }} + restartPolicy: OnFailure + volumes: + - name: {{ template "horovod.fullname" . }}-cm + configMap: + name: {{ template "horovod.fullname" . }} + items: + - key: hostfile.config + path: hostfile + mode: 438 + - key: master.waitWorkerReady + path: waitWorkersReady.sh + mode: 365 + - key: master.run + path: run.sh + mode: 365 + {{- if .Values.ssh.useSecrets }} + - name: {{ template "horovod.fullname" . }}-secret + secret: + secretName: {{ template "horovod.fullname" . }} + defaultMode: 448 + items: + - key: host-key + path: id_rsa + - key: host-key-pub + path: authorized_keys + {{- end }} +{{- if .Values.volumes }} +{{ toYaml .Values.volumes | indent 6 }} +{{- end }} + containers: + - name: horovod-master + image: "{{ .Values.master.image.repository }}:{{ .Values.master.image.tag }}" + imagePullPolicy: {{ .Values.master.image.pullPolicy }} + env: + - name: SSHPORT + value: "{{ .Values.ssh.port }}" + {{- if .Values.ssh.useSecrets }} + - name: USESECRETS + value: "{{ .Values.ssh.useSecrets }}" + {{- end }} + {{- if .Values.master.env }} + {{- range $key, $value := .Values.master.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- end }} +{{- if .Values.master.privileged }} + securityContext: + privileged: true +{{- end }} + ports: + - containerPort: {{ .Values.ssh.port }} + volumeMounts: + - name: {{ template "horovod.fullname" . }}-cm + mountPath: /horovod/generated + {{- if .Values.ssh.useSecrets }} + - name: {{ template "horovod.fullname" . }}-secret + readOnly: true + mountPath: "/etc/secret-volume" + {{- end }} +{{- if .Values.volumeMounts }} +{{ toYaml .Values.volumeMounts | indent 8 }} +{{- end }} + command: + - /horovod/generated/run.sh + args: +{{ toYaml .Values.master.args | indent 10 }} + resources: +{{ toYaml .Values.resources | indent 10 }} +{{- if .Values.ssh.useSecrets }} + initContainers: + - name: wait-workers + image: "{{ .Values.master.image.repository }}:{{ .Values.master.image.tag }}" + imagePullPolicy: {{ .Values.master.image.pullPolicy }} + env: + - name: SSHPORT + value: "{{ .Values.ssh.port }}" + {{- if .Values.ssh.useSecrets }} + - name: USESECRETS + value: "{{ .Values.ssh.useSecrets }}" + {{- end }} + {{- if .Values.master.env }} + {{- range $key, $value := .Values.master.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- end }} + command: + - /horovod/generated/waitWorkersReady.sh + args: + - /horovod/generated/hostfile + volumeMounts: + - name: {{ template "horovod.fullname" . }}-cm + mountPath: /horovod/generated + {{- if .Values.ssh.useSecrets }} + - name: {{ template "horovod.fullname" . }}-secret + readOnly: true + mountPath: "/etc/secret-volume" + {{- end }} +{{- end }} diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/secrets.yaml b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/secrets.yaml new file mode 100644 index 00000000..c9853ed0 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/secrets.yaml @@ -0,0 +1,15 @@ +{{- if .Values.ssh.useSecrets }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "horovod.fullname" . }} + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +type: Opaque +data: + host-key: {{ .Values.ssh.hostKey | b64enc | quote }} + host-key-pub: {{ .Values.ssh.hostKeyPub | b64enc | quote }} +{{- end }} \ No newline at end of file diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset-service.yaml b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset-service.yaml new file mode 100644 index 00000000..d0216a86 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "horovod.fullname" . }} + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + clusterIP: None + ports: + - name: ssh + port: {{ .Values.ssh.port }} + targetPort: {{ .Values.ssh.port }} + selector: + app: {{ template "horovod.name" . }} + release: {{ .Release.Name }} + role: worker diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset.yaml b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset.yaml new file mode 100644 index 00000000..1d3f7577 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-horovod-app/templates/statefulset.yaml @@ -0,0 +1,115 @@ +apiVersion: apps/v1beta2 +kind: StatefulSet +metadata: + name: {{ template "horovod.fullname" . }} + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + role: worker +spec: + selector: + matchLabels: + app: {{ template "horovod.name" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + role: worker + serviceName: {{ template "horovod.fullname" . }} + podManagementPolicy: {{ .Values.worker.podManagementPolicy }} + replicas: {{.Values.worker.number}} + template: + metadata: + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + role: worker + spec: + selector: + matchLabels: + app: {{ template "horovod.name" . }} + release: {{ .Release.Name }} + role: worker + {{- if .Values.useHostNetwork }} + hostNetwork: {{ .Values.useHostNetwork }} + dnsPolicy: ClusterFirstWithHostNet + {{- end }} + {{- if .Values.useHostPID }} + hostPID: {{ .Values.useHostPID }} + {{- end }} + volumes: + - name: {{ template "horovod.fullname" . }}-cm + configMap: + name: {{ template "horovod.fullname" . }} + items: + - key: hostfile.config + path: hostfile + mode: 438 + - key: ssh.readiness + path: check.sh + mode: 365 + - key: worker.run + path: run.sh + mode: 365 + {{- if .Values.ssh.useSecrets }} + - name: {{ template "horovod.fullname" . }}-secret + secret: + secretName: {{ template "horovod.fullname" . }} + defaultMode: 448 + items: + - key: host-key + path: id_rsa + - key: host-key-pub + path: authorized_keys + {{- end }} +{{- if .Values.volumes }} +{{ toYaml .Values.volumes | indent 6 }} +{{- end }} + containers: + - name: worker + image: "{{ .Values.worker.image.repository }}:{{ .Values.worker.image.tag }}" + imagePullPolicy: {{ .Values.worker.image.pullPolicy }} + env: + - name: SSHPORT + value: "{{ .Values.ssh.port }}" + {{- if .Values.ssh.useSecrets }} + - name: USESECRETS + value: "{{ .Values.ssh.useSecrets }}" + {{- end }} + {{- if .Values.master.env }} + {{- range $key, $value := .Values.master.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- end }} +{{- if .Values.worker.privileged }} + securityContext: + privileged: true +{{- end }} + ports: + - containerPort: {{ .Values.ssh.port }} + volumeMounts: + - name: {{ template "horovod.fullname" . }}-cm + mountPath: /horovod/generated + {{- if .Values.ssh.useSecrets }} + - name: {{ template "horovod.fullname" . }}-secret + readOnly: true + mountPath: "/etc/secret-volume" + {{- end }} +{{- if .Values.volumeMounts }} +{{ toYaml .Values.volumeMounts | indent 8 }} +{{- end }} + command: + - /horovod/generated/run.sh +{{- if .Values.ssh.useSecrets }} + readinessProbe: + exec: + command: + - /horovod/generated/check.sh + initialDelaySeconds: 1 + periodSeconds: 2 +{{- end }} + resources: +{{ toYaml .Values.resources | indent 10 }} -- cgit 1.2.3-korg