diff options
author | Rajamohan Raj <rajamohan.raj@intel.com> | 2019-03-11 23:53:41 +0000 |
---|---|---|
committer | Rajamohan Mohan Raj <rajamohan.raj@intel.com> | 2019-03-13 23:40:21 +0000 |
commit | b94b8b3ff5f403d9460f97acb7c2a553a42498f7 (patch) | |
tree | b0fd552f7a24bb6c2ff912fe338369cdd73be4a3 /vnfs/DAaaS | |
parent | e8f7e027283f8630733fb423d834e7d828d0db11 (diff) |
Helm charts for spark and hdfs
Tasks accomplished in this patch:
https://jira.onap.org/browse/ONAPARC-445 - Create helm chart for Spark on K8S operator and add it to operator.
https://jira.onap.org/browse/ONAPARC-446 - Create helm charts for HDFS
https://jira.onap.org/browse/ONAPARC-447 - Create Spark application helm chart as part of application pacakge
https://jira.onap.org/browse/ONAPARC-448 - Add Anaconda with tensorflow,
keras, horovod support to Spark image
Change-Id: Icb4adeaa8a0aa445614f91203d7793e4e4f304c1
Issue-ID: ONAPARC-391
Signed-off-by: Rajamohan Raj <rajamohan.raj@intel.com>
Diffstat (limited to 'vnfs/DAaaS')
49 files changed, 3177 insertions, 0 deletions
diff --git a/vnfs/DAaaS/applications/Charts.yaml b/vnfs/DAaaS/applications/Charts.yaml new file mode 100644 index 00000000..803e19aa --- /dev/null +++ b/vnfs/DAaaS/applications/Charts.yaml @@ -0,0 +1,5 @@ + apiVersion: v1 + appVersion: "1.0" + description: Helm chart for sample applications that use the components that the training-core framework deploys. + name: sample-applications + version: 0.1.0 diff --git a/vnfs/DAaaS/applications/charts/sample-spark-app/.helmignore b/vnfs/DAaaS/applications/charts/sample-spark-app/.helmignore new file mode 100644 index 00000000..50af0317 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-spark-app/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/vnfs/DAaaS/applications/charts/sample-spark-app/Chart.yaml b/vnfs/DAaaS/applications/charts/sample-spark-app/Chart.yaml new file mode 100644 index 00000000..42ed0400 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-spark-app/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A sample spark application which finds the top users from the apache logs which is stored in the remote hdfs-k8s cluster +name: sample-spark-app-apache-log-analysis +version: 0.1.0 diff --git a/vnfs/DAaaS/applications/charts/sample-spark-app/Dockerfile b/vnfs/DAaaS/applications/charts/sample-spark-app/Dockerfile new file mode 100644 index 00000000..cd42d4c7 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-spark-app/Dockerfile @@ -0,0 +1,133 @@ +# Copyright (c) 2019 Intel Corporation +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Ported kubernetes spark image to Ubuntu + +FROM ubuntu:18.04 + +# Install jdk +RUN apt update -yqq +RUN apt install -y locales openjdk-8-jdk && rm -rf /var/lib/apt/lists/* \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 + +# Install all the essentials +RUN apt-get update --fix-missing && \ + apt-get install -y numactl wget curl bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 \ + git mercurial subversion build-essential openssh-server openssh-client net-tools && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV LANG en_US.utf8 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 +ENV PATH $JAVA_HOME/bin:$PATH +ENV PATH /opt/conda/bin:$PATH +ENV OPENMPI_VERSION 3.1 + +# Install openMPI +RUN mkdir /tmp/openmpi && \ + cd /tmp/openmpi && \ + wget --quiet https://www.open-mpi.org/software/ompi/v${OPENMPI_VERSION}/downloads/openmpi-${OPENMPI_VERSION}.2.tar.gz -O openmpi.tar.gz && \ + tar zxf openmpi.tar.gz && \ + cd openmpi-3.1.2 && \ + ./configure --enable-orterun-prefix-by-default && \ + make -j $(nproc) all && \ + make install && \ + ldconfig && \ + rm -rf /tmp/openmpi + +# Install miniconda +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc + +# Install tf & keras using conda in the virtual_environment:tf_env +SHELL ["/bin/bash", "-c"] +RUN conda update -n base -c defaults conda && \ + conda create -n tf_env +RUN conda install -n tf_env -y -c anaconda \ + pip tensorflow keras nltk + +RUN echo "conda activate tf_env" >> ~/.bashrc && \ + conda install -n tf_env -y -c conda-forge clangdev + +RUN source ~/.bashrc +RUN HOROVOD_WITH_TENSORFLOW=1 /opt/conda/envs/tf_env/bin/pip install --no-cache-dir horovod + +# openMPI sane defaults: +RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf && \ + echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf && \ + echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install tini +RUN apt-get install -y curl grep sed dpkg && \ + TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && echo ${TINI_VERSION} && \ + curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \ + dpkg -i tini.deb && \ + rm tini.deb && \ + apt clean + +# This is needed to match the original entrypoint.sh file. +RUN cp /usr/bin/tini /sbin + +# Begin: Installing spark +ARG spark_jars=jars +ARG img_path=kubernetes/dockerfiles +ARG k8s_tests=kubernetes/tests + +# Before building the docker image, first build and make a Spark distribution following +# the instructions in http://spark.apache.org/docs/latest/building-spark.html. +# If this docker file is being used in the context of building your images from a Spark +# distribution, the docker build command should be invoked from the top level directory +# of the Spark distribution. E.g.: +# docker build -t spark:latest -f kubernetes/dockerfiles/spark/ubuntu18.04/Dockerfile . + +RUN mkdir -p /opt/spark && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd + + +COPY ${spark_jars} /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY ${img_path}/spark/entrypoint.sh /opt/ +COPY examples /opt/spark/examples +COPY ${k8s_tests} /opt/spark/tests +COPY data /opt/spark/data +ENV SPARK_HOME /opt/spark + +RUN mkdir /opt/spark/python +COPY python/pyspark /opt/spark/python/pyspark +COPY python/lib /opt/spark/python/lib +ENV PYTHONPATH /opt/spark/python/lib/pyspark.zip:/opt/spark/python/lib/py4j-*.zip + +WORKDIR /opt/spark/work-dir + +ENTRYPOINT [ "/opt/entrypoint.sh" ] + +# End: Installing spark diff --git a/vnfs/DAaaS/applications/charts/sample-spark-app/templates/SampleSparkApp.yaml b/vnfs/DAaaS/applications/charts/sample-spark-app/templates/SampleSparkApp.yaml new file mode 100644 index 00000000..f728f82e --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-spark-app/templates/SampleSparkApp.yaml @@ -0,0 +1,43 @@ +apiVersion: "sparkoperator.k8s.io/v1beta1" +kind: SparkApplication +metadata: + name: {{ .Values.nameOfTheSparkApp }} + namespace: {{ .Release.Namespace }} +spec: + type: {{ .Values.programmingLanguageType }} + mode: {{ .Values.modeOfSparkApp | default "cluster" }} + image: {{ quote .Values.image }} + imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} + mainClass: {{ .Values.mainClassOfTheSparkApp }} + mainApplicationFile: {{ .Values.mainApplicationFileOfTheSparkApp }} + arguments: + {{- range .Values.argumentsOfTheSparkProgram }} + - {{ . }} + {{ end }} + hadoopConfigMap: {{ .Values.hadoopConfigMap }} + restartPolicy: + type: {{ .Values.restartPolicy | default "Never" }} + volumes: + - name: {{ quote .Values.volumesName | default "test-volume" }} + hostpath: + path: {{ quote .Values.hostpath | default "/tmp" }} + type: {{ .Values.hostpathType | default "Directory" }} + driver: + cores: {{ .Values.driverCores | default 0.1 }} + coreLimit: {{ quote .Values.driverCoreLimit | default "200m" }} + memory: {{ quote .Values.driverMemory | default "1024m" }} + labels: + version: 2.4.0 + serviceAccount: spark + volumeMounts: + - name: {{ quote .Values.driverVolumeMountsName | default "test-volume" }} + mountPath: {{ quote .Values.driverVolumeMountPath | default "/tmp" }} + executor: + cores: {{ .Values.executorCores | default 1 }} + instances: {{ .Values.executorInstances | default 1 }} + memory: {{ quote .Values.executorMemory | default "512m" }} + labels: + version: 2.4.0 + volumeMounts: + - name: {{ quote .Values.executorVolumeMountsName | default "test-volume" }} + mountPath: {{ quote .Values.executorVolumeMountPath | default "/tmp" }} diff --git a/vnfs/DAaaS/applications/charts/sample-spark-app/templates/_helpers.tpl b/vnfs/DAaaS/applications/charts/sample-spark-app/templates/_helpers.tpl new file mode 100644 index 00000000..6f51811d --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-spark-app/templates/_helpers.tpl @@ -0,0 +1,32 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "sample-spark-app.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "sample-spark-app.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "sample-spark-app.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/vnfs/DAaaS/applications/charts/sample-spark-app/values.yaml b/vnfs/DAaaS/applications/charts/sample-spark-app/values.yaml new file mode 100644 index 00000000..afb48d67 --- /dev/null +++ b/vnfs/DAaaS/applications/charts/sample-spark-app/values.yaml @@ -0,0 +1,57 @@ +# Default values for sample-spark-app. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + + +#===========================KUBERNETES POD RELATED CONFIGs======================== +image: spark-tf-keras-horo:latest +imagePullPolicy: Never +restartPolicy: Never +volumesName: test-volume +hostpath: /tmp +hostpathType: Directory + + + +#============================SPARK APP RELATED CONFIGs============================= + +nameOfTheSparkApp: spark-apache-logs2 +# Python or Scala supported. +programmingLanguageType: Scala +modeOfSparkApp: cluster +mainClassOfTheSparkApp: ApacheLogAnalysis +# can be http path, s3 path, minio path +mainApplicationFileOfTheSparkApp: https://github.com/mohanraj1311/ApacheLogAnalysisJar/raw/master/analysisofapachelogs_2.11-0.1.jar +argumentsOfTheSparkProgram: + - hdfs://hdfs-1-namenode-1.hdfs-1-namenode.hdfs1.svc.cluster.local:8020/data/apache-logs + + + +#============================SPARK DRIVER RELATED CONFIGs========================= +driverCores: 0.1 +driverCoreLimit: 200m +driverMemory: 1024m +driverVolumeMountsName: test-volume +driverVolumeMountPath: /tmp + + + +#============================SPARK EXECUTOR RELATED CONFIGs======================= +executorCores: 1 +executorInstances: 1 +executorMemory: 512m +executorVolumeMountsName: test-volume +executorVolumeMountPath: /tmp + + + +#===========================HADOOP RELATED CONFIGs=============================== +# config map of the hdfs +hadoopConfigMap: hdfs-1-config + + +################################################################################### + + + + diff --git a/vnfs/DAaaS/applications/values.yaml b/vnfs/DAaaS/applications/values.yaml new file mode 100644 index 00000000..fd98eb36 --- /dev/null +++ b/vnfs/DAaaS/applications/values.yaml @@ -0,0 +1,29 @@ +# Copyright © 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +################################################################# +# Global configuration defaults. +################################################################# +global: + nodePortPrefix: 310 + repository: nexus3.onap.org:10001 + readinessRepository: oomk8s + readinessImage: readiness-check:2.0.0 + loggingRepository: docker.elastic.co + loggingImage: beats/filebeat:5.5.0 + +################################################################# +# k8s Operator Day-0 configuration defaults. +################################################################# + diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/.helmignore b/vnfs/DAaaS/operator/charts/sparkoperator/.helmignore new file mode 100644 index 00000000..b7f6f9f1 --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/.helmignore @@ -0,0 +1 @@ +OWNERS diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/Chart.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/Chart.yaml new file mode 100644 index 00000000..86d0c3ab --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for Kubernetes +name: sparkoperator +version: 0.1.0 diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/README.md b/vnfs/DAaaS/operator/charts/sparkoperator/README.md new file mode 100755 index 00000000..ba0f05bc --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/README.md @@ -0,0 +1,42 @@ +### Helm Chart for Spark Operator + +This is the Helm chart for the [Spark-on-Kubernetes Operator](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator). + +#### Prerequisites + +The Operator requires Kubernetes version 1.8 and above because it relies on garbage collection of custom resources. If customization of driver and executor pods (through mounting custom ConfigMaps and volumes) is desired, then the [Mutating Admission Webhook](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/quick-start-guide.md#using-the-mutating-admission-webhook) needs to be enabled and it only became beta in Kubernetes 1.9. + +#### Installing the chart + +The chart can be installed by running: + +```bash +$ helm repo add incubator http://storage.googleapis.com/kubernetes-charts-incubator +$ helm install incubator/sparkoperator --namespace spark-operator +``` + +Note that you need to use the `--namespace` flag during `helm install` to specify in which namespace you want to install the operator. The namespace can be existing or not. When it's not available, Helm would take care of creating the namespace. Note that this namespace has no relation to the namespace where you would like to deploy Spark jobs (i.e. the setting `sparkJobNamespace` shown in the table below). They can be the same namespace or different ones. + +#### Configuration + +The following table lists the configurable parameters of the Spark operator chart and their default values. + +| Parameter | Description | Default | +| ------------------------- | ------------------------------------------------------------ | -------------------------------------- | +| `operatorImageName` | The name of the operator image | `gcr.io/spark-operator/spark-operator` | +| `operatorVersion` | The version of the operator to install | `v2.4.0-v1beta1-latest` | +| `imagePullPolicy` | Docker image pull policy | `IfNotPresent` | +| `sparkJobNamespace` | K8s namespace where Spark jobs are to be deployed | `default` | +| `enableWebhook` | Whether to enable mutating admission webhook | false | +| `enableMetrics` | Whether to expose metrics to be scraped by Premetheus | true | +| `controllerThreads` | Number of worker threads used by the SparkApplication controller | 10 | +| `ingressUrlFormat` | Ingress URL format | "" | +| `installCrds` | Whether to install CRDs | true | +| `metricsPort` | Port for the metrics endpoint | 10254 | +| `metricsEndpoint` | Metrics endpoint | "/metrics" | +| `metricsPrefix` | Prefix for the metrics | "" | +| `resyncInterval` | Informer resync interval in seconds | 30 | +| `webhookPort` | Service port of the webhook server | 8080 | + +Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. + diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/_helpers.tpl b/vnfs/DAaaS/operator/charts/sparkoperator/templates/_helpers.tpl new file mode 100644 index 00000000..741b500d --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/_helpers.tpl @@ -0,0 +1,48 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "sparkoperator.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + {{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "sparkoperator.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + {{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "sparkoperator.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + {{/* +Create the name of the service account to use +*/}} +{{- define "sparkoperator.serviceAccountName" -}} +{{- if .Values.serviceAccounts.sparkoperator.create -}} + {{ default (include "sparkoperator.fullname" .) .Values.serviceAccounts.sparkoperator.name }} +{{- else -}} + {{ default "default" .Values.serviceAccounts.sparkoperator.name }} +{{- end -}} +{{- end -}} +{{- define "spark.serviceAccountName" -}} +{{- if .Values.serviceAccounts.spark.create -}} + {{ $sparkServiceaccount := printf "%s-%s" .Release.Name "spark" }} + {{ default $sparkServiceaccount .Values.serviceAccounts.spark.name }} +{{- else -}} + {{ default "default" .Values.serviceAccounts.spark.name }} +{{- end -}} +{{- end -}} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-deployment.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-deployment.yaml new file mode 100755 index 00000000..fdfc51a2 --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-deployment.yaml @@ -0,0 +1,79 @@ +# If the admission webhook is enabled, then a post-install step is required +# to generate and install the secret in the operator namespace. + +# In the post-install hook, the token corresponding to the operator service account +# is used to authenticate with the Kubernetes API server to install the secret bundle. + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "sparkoperator.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + app.kubernetes.io/version: {{ .Values.operatorVersion }} + strategy: + type: Recreate + template: + metadata: + {{- if .Values.enableMetrics }} + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "{{ .Values.metricsPort }}" + prometheus.io/path: {{ .Values.metricsEndpoint }} + {{- end }} + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + app.kubernetes.io/version: {{ .Values.operatorVersion }} + initializers: + pending: [] + spec: + serviceAccountName: {{ include "sparkoperator.serviceAccountName" . }} + {{- if .Values.enableWebhook }} + volumes: + - name: webhook-certs + secret: + secretName: spark-webhook-certs + {{- end }} + containers: + - name: sparkoperator + image: {{ .Values.operatorImageName }}:{{ .Values.operatorVersion }} + imagePullPolicy: {{ .Values.imagePullPolicy }} + {{- if .Values.enableWebhook }} + volumeMounts: + - name: webhook-certs + mountPath: /etc/webhook-certs + {{- end }} + {{- if .Values.enableMetrics }} + ports: + - containerPort: {{ .Values.metricsPort }} + {{ end }} + args: + - -v=2 + - -namespace={{ .Values.sparkJobNamespace }} + - -ingress-url-format={{ .Values.ingressUrlFormat }} + - -install-crds={{ .Values.installCrds }} + - -controller-threads={{ .Values.controllerThreads }} + - -resync-interval={{ .Values.resyncInterval }} + - -logtostderr + {{- if .Values.enableMetrics }} + - -enable-metrics=true + - -metrics-labels=app_type + - -metrics-port={{ .Values.metricsPort }} + - -metrics-endpoint={{ .Values.metricsEndpoint }} + - -metrics-prefix={{ .Values.metricsPrefix }} + {{- end }} + {{- if .Values.enableWebhook }} + - -enable-webhook=true + - -webhook-svc-namespace={{ .Release.Namespace }} + - -webhook-port={{ .Values.webhookPort }} + - -webhook-svc-name={{ .Release.Name }}-webhook + - -webhook-config-name={{ include "sparkoperator.fullname" . }}-webhook-config + {{- end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-rbac.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-rbac.yaml new file mode 100755 index 00000000..bd5fd3fe --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-rbac.yaml @@ -0,0 +1,55 @@ +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "sparkoperator.fullname" . }}-cr + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["*"] +- apiGroups: [""] + resources: ["services", "configmaps", "secrets"] + verbs: ["create", "get", "delete"] +- apiGroups: ["extensions"] + resources: ["ingresses"] + verbs: ["create", "get", "delete"] +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get"] +- apiGroups: [""] + resources: ["events"] + verbs: ["create", "update", "patch"] +- apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["create", "get", "update", "delete"] +- apiGroups: ["admissionregistration.k8s.io"] + resources: ["mutatingwebhookconfigurations"] + verbs: ["create", "get", "update", "delete"] +- apiGroups: ["sparkoperator.k8s.io"] + resources: ["sparkapplications", "scheduledsparkapplications"] + verbs: ["*"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "sparkoperator.fullname" . }}-crb + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +subjects: + - kind: ServiceAccount + name: {{ include "sparkoperator.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "sparkoperator.fullname" . }}-cr + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-serviceaccount.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-serviceaccount.yaml new file mode 100755 index 00000000..5216f8dd --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-operator-serviceaccount.yaml @@ -0,0 +1,11 @@ +{{- if .Values.serviceAccounts.sparkoperator.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "sparkoperator.serviceAccountName" . }} + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-rbac.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-rbac.yaml new file mode 100755 index 00000000..fa066053 --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-rbac.yaml @@ -0,0 +1,44 @@ +{{- if and (.Values.rbac.create) (ne .Values.sparkJobNamespace "") }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: {{ .Values.sparkJobNamespace }} + name: spark-role + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +rules: +- apiGroups: + - "" # "" indicates the core API group + resources: + - "pods" + verbs: + - "*" +- apiGroups: + - "" # "" indicates the core API group + resources: + - "services" + verbs: + - "*" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spark-role-binding + namespace: {{ .Values.sparkJobNamespace }} + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +subjects: +- kind: ServiceAccount + name: {{ include "spark.serviceAccountName" . }} + namespace: {{ .Values.sparkJobNamespace }} +roleRef: + kind: Role + name: spark-role + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-serviceaccount.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-serviceaccount.yaml new file mode 100755 index 00000000..bb0e55ea --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/spark-serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccounts.spark.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "spark.serviceAccountName" . }} + namespace: {{ .Values.sparkJobNamespace }} + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-cleanup-job.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-cleanup-job.yaml new file mode 100755 index 00000000..d6d9df7c --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-cleanup-job.yaml @@ -0,0 +1,32 @@ +{{ if .Values.enableWebhook }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "sparkoperator.fullname" . }}-cleanup + annotations: + "helm.sh/hook": pre-delete, pre-upgrade + "helm.sh/hook-delete-policy": hook-succeeded + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + template: + spec: + serviceAccountName: {{ include "sparkoperator.serviceAccountName" . }} + restartPolicy: OnFailure + containers: + - name: main + image: {{ .Values.operatorImageName }}:{{ .Values.operatorVersion }} + imagePullPolicy: {{ .Values.imagePullPolicy }} + command: + - "/bin/sh" + - "-c" + - "curl -ik \ + -X DELETE \ + -H \"Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)\" \ + -H \"Accept: application/json\" \ + -H \"Content-Type: application/json\" \ + https://kubernetes.default.svc/api/v1/namespaces/{{ .Release.Namespace }}/secrets/spark-webhook-certs" +{{ end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-init-job.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-init-job.yaml new file mode 100755 index 00000000..a42c3097 --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-init-job.yaml @@ -0,0 +1,24 @@ +{{ if .Values.enableWebhook }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "sparkoperator.fullname" . }}-init + annotations: + "helm.sh/hook": post-install, post-upgrade + "helm.sh/hook-delete-policy": hook-succeeded + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + template: + spec: + serviceAccountName: {{ include "sparkoperator.serviceAccountName" . }} + restartPolicy: OnFailure + containers: + - name: main + image: {{ .Values.operatorImageName }}:{{ .Values.operatorVersion }} + imagePullPolicy: {{ .Values.imagePullPolicy }} + command: ["/usr/bin/gencerts.sh", "-n", "{{ .Release.Namespace }}", "-s", "{{ .Release.Name }}-webhook", "-p"] +{{ end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-service.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-service.yaml new file mode 100755 index 00000000..42c5bc62 --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/templates/webhook-service.yaml @@ -0,0 +1,19 @@ +{{ if .Values.enableWebhook }} +kind: Service +apiVersion: v1 +metadata: + name: {{ .Release.Name }}-webhook + labels: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + helm.sh/chart: {{ include "sparkoperator.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + ports: + - port: 443 + targetPort: 8080 + name: webhook + selector: + app.kubernetes.io/name: {{ include "sparkoperator.name" . }} + app.kubernetes.io/version: {{ .Values.operatorVersion }} +{{ end }} diff --git a/vnfs/DAaaS/operator/charts/sparkoperator/values.yaml b/vnfs/DAaaS/operator/charts/sparkoperator/values.yaml new file mode 100644 index 00000000..bfb03eab --- /dev/null +++ b/vnfs/DAaaS/operator/charts/sparkoperator/values.yaml @@ -0,0 +1,28 @@ +operatorImageName: gcr.io/spark-operator/spark-operator +operatorVersion: v2.4.0-v1beta1-latest +imagePullPolicy: IfNotPresent + +rbac: + create: true + +serviceAccounts: + spark: + create: true + name: + sparkoperator: + create: true + name: + +sparkJobNamespace: "" + +enableWebhook: false +enableMetrics: true + +controllerThreads: 10 +ingressUrlFormat: "" +installCrds: true +metricsPort: 10254 +metricsEndpoint: "/metrics" +metricsPrefix: "" +resyncInterval: 30 +webhookPort: 8080 diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.gitignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.gitignore new file mode 100644 index 00000000..fc82fcb5 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.gitignore @@ -0,0 +1,2 @@ +tests/bin +tests/tmp diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.travis.yml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.travis.yml new file mode 100644 index 00000000..1d3351da --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.travis.yml @@ -0,0 +1,20 @@ +sudo: required + +env: + - CASES=_basic.sh + - CASES=_basic-subcharts.sh + - CASES=_kerberos.sh + - CASES=_single-namenode.sh + +before_script: +# Required for K8s v1.10.x. See +# https://github.com/kubernetes/kubernetes/issues/61058#issuecomment-372764783 +- sudo mount --make-shared / && sudo service docker restart +- USE_MINIKUBE_DRIVER_NONE=true USE_SUDO_MINIKUBE=true tests/setup.sh + +script: +- tests/run.sh + +after_script: +- tests/cleanup.sh +- tests/teardown.sh diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/LICENSE b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/LICENSE new file mode 100644 index 00000000..8dada3ed --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/README.md b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/README.md new file mode 100644 index 00000000..ca694a19 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/README.md @@ -0,0 +1,12 @@ +--- +layout: global +title: HDFS on Kubernetes +--- +# HDFS on Kubernetes +Repository holding helm charts for running Hadoop Distributed File System (HDFS) +on Kubernetes. + +See [charts/README.md](charts/README.md) for how to run the charts. + +See [tests/README.md](tests/README.md) for how to run integration tests for +HDFS on Kubernetes. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/README.md b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/README.md new file mode 100644 index 00000000..15ee8867 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/README.md @@ -0,0 +1,390 @@ +--- +layout: global +title: HDFS charts +--- + +# HDFS charts + +Helm charts for launching HDFS daemons in a K8s cluster. The main entry-point +chart is `hdfs-k8s`, which is a uber-chart that specifies other charts as +dependency subcharts. This means you can launch all HDFS components using +`hdfs-k8s`. + +Note that the HDFS charts are currently in pre-alpha quality. They are also +being heavily revised and are subject to change. + +HDFS on K8s supports the following features: + - namenode high availability (HA): HDFS namenode daemons are in charge of + maintaining file system metadata concerning which directories have which + files and where are the file data. Namenode crash will cause service outage. + HDFS can run two namenodes in active/standby setup. HDFS on K8s supports HA. + - K8s persistent volumes (PV) for metadata: Namenode crash will cause service + outage. Losing namenode metadata can lead to loss of file system. HDFS on + K8s can store the metadata in remote K8s persistent volumes so that metdata + can remain intact even if both namenode daemons are lost or restarted. + - K8s HostPath volumes for file data: HDFS datanodes daemons store actual + file data. File data should also survive datanode crash or restart. HDFS on + K8s stores the file data on the local disks of the K8s cluster nodes using + K8s HostPath volumes. (We plan to switch to a better mechanism, K8s + persistent local volumes) + - Kerberos: Vanilla HDFS is not secure. Intruders can easily write custom + client code, put a fake user name in requests and steal data. Production + HDFS often secure itself using Kerberos. HDFS on K8s supports Kerberos. + +Here is the list of all charts. + + - hdfs-k8s: main uber-chart. Launches other charts. + - hdfs-namenode-k8s: a statefulset and other K8s components for launching HDFS + namenode daemons, which maintains file system metadata. The chart supports + namenode high availability (HA). + - hdfs-datanode-k8s: a daemonset and other K8s components for launching HDFS + datanode daemons, which are responsible for storing file data. + - hdfs-config-k8s: a configmap containing Hadoop config files for HDFS. + - zookeeper: This chart is NOT in this repo. But hdfs-k8s pulls the zookeeper + chart in the incubator remote repo + (https://kubernetes-charts-incubator.storage.googleapis.com/) + as a dependency and launhces zookeeper daemons. Zookeeper makes sure + only one namenode is active in the HA setup, while the other namenode + becomes standby. By default, we will launch three zookeeper servers. + - hdfs-journalnode-k8s: a statefulset and other K8s components for launching + HDFS journalnode quorums, which ensures the file system metadata are + properly shared among the two namenode daemons in the HA setup. + By default, we will launch three journalnode servers. + - hdfs-client-k8s: a pod that is configured to run Hadoop client commands + for accessing HDFS. + - hdfs-krb5-k8s: a size-1 statefulset and other K8s components for launching + a Kerberos server, which can be used to secure HDFS. Disabled by default. + - hdfs-simple-namenode-k8s: Disabled by default. A simpler setup of the + namenode that launches only one namenode. i.e. This does not support HA. It + does not support Kerberos nor persistent volumes either. As it does not + support HA, we also don't need zookeeper nor journal nodes. You may prefer + this if you want the simplest possible setup. + +# Prerequisite + +Requires Kubernetes 1.6+ as the `namenode` and `datanodes` are using +`ClusterFirstWithHostNet`, which was introduced in Kubernetes 1.6 + +# Usage + +## Basic + +The HDFS daemons can be launched using the main `hdfs-k8s` chart. First, build +the main chart using: + +``` + $ helm repo add incubator \ + https://kubernetes-charts-incubator.storage.googleapis.com/ + $ helm dependency build charts/hdfs-k8s +``` + +Zookeeper, journalnodes and namenodes need persistent volumes for storing +metadata. By default, the helm charts do not set the storage class name for +dynamically provisioned volumes, nor does it use persistent volume selectors for +static persistent volumes. + +This means it will rely on a provisioner for default storage volume class for +dynamic volumes. Or if your cluster has statically provisioned volumes, the +chart will match existing volumes entirely based on the size requirements. To +override this default behavior, you can specify storage volume classes for +dynamic volumes, or volume selectors for static volumes. See below for how to +set these options. + + - namenodes: Each of the two namenodes needs at least a 100 GB volume. i.e. + Yon need two 100 GB volumes. This can be overridden by the + `hdfs-namenode-k8s.persistence.size` option. + You can also override the storage class or the selector using + `hdfs-namenode-k8s.persistence.storageClass`, or + `hdfs-namenode-k8s.persistence.selector` respectively. For details, see the + values.yaml file inside `hdfs-namenode-k8s` chart dir. + - zookeeper: You need three > 5 GB volumes. i.e. Each of the two zookeeper + servers will need at least 5 GB in the volume. Can be overridden by + the `zookeeper.persistence.size` option. You can also override + the storage class using `zookeeper.persistence.storageClass`. + - journalnodes: Each of the three journalnodes will need at least 20 GB in + the volume. The size can be overridden by the + `hdfs-journalnode-k8s.persistence.size` option. + You can also override the storage class or the selector using + `hdfs-journalnode-k8s.persistence.storageClass`, or + `hdfs-journalnode-k8s.persistence.selector` respectively. For details, see the + values.yaml file inside `hdfs-journalnode-k8s` chart dir. + - kerberos: The single Kerberos server will need at least 20 GB in the volume. + The size can be overridden by the `hdfs-krb5-k8s.persistence.size` option. + You can also override the storage class or the selector using + `hdfs-krb5-k8s.persistence.storageClass`, or + `hdfs-krb5-k8s.persistence.selector` respectively. For details, see the + values.yaml file inside `hdfs-krb5-k8s` chart dir. + +Then launch the main chart. Specify the chart release name say "my-hdfs", +which will be the prefix of the K8s resource names for the HDFS components. + +``` + $ helm install -n my-hdfs charts/hdfs-k8s +``` + +Wait for all daemons to be ready. Note some daemons may restart themselves +a few times before they become ready. + +``` + $ kubectl get pod -l release=my-hdfs + + NAME READY STATUS RESTARTS AGE + my-hdfs-client-c749d9f8f-d5pvk 1/1 Running 0 2m + my-hdfs-datanode-o7jia 1/1 Running 3 2m + my-hdfs-datanode-p5kch 1/1 Running 3 2m + my-hdfs-datanode-r3kjo 1/1 Running 3 2m + my-hdfs-journalnode-0 1/1 Running 0 2m + my-hdfs-journalnode-1 1/1 Running 0 2m + my-hdfs-journalnode-2 1/1 Running 0 1m + my-hdfs-namenode-0 1/1 Running 3 2m + my-hdfs-namenode-1 1/1 Running 3 2m + my-hdfs-zookeeper-0 1/1 Running 0 2m + my-hdfs-zookeeper-1 1/1 Running 0 2m + my-hdfs-zookeeper-2 1/1 Running 0 2m +``` + +Namenodes and datanodes are currently using the K8s `hostNetwork` so they can +see physical IPs of each other. If they are not using `hostNetowrk`, +overlay K8s network providers such as weave-net may mask the physical IPs, +which will confuse the data locality later inside namenodes. + +Finally, test with the client pod: + +``` + $ _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs -o name | \ + cut -d/ -f 2) + $ kubectl exec $_CLIENT -- hdfs dfsadmin -report + $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn0 + $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn1 + + $ kubectl exec $_CLIENT -- hadoop fs -rm -r -f /tmp + $ kubectl exec $_CLIENT -- hadoop fs -mkdir /tmp + $ kubectl exec $_CLIENT -- sh -c \ + "(head -c 100M < /dev/urandom > /tmp/random-100M)" + $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp +``` + +## Kerberos + +Kerberos can be enabled by setting a few related options: + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set global.kerberosEnabled=true \ + --set global.kerberosRealm=MYCOMPANY.COM \ + --set tags.kerberos=true +``` + +This will launch all charts including the Kerberos server, which will become +ready pretty soon. However, HDFS daemon charts will be blocked as the deamons +require Kerberos service principals to be available. So we need to unblock +them by creating those principals. + +First, create a configmap containing the common Kerberos config file: + +``` + _MY_DIR=~/krb5 + mkdir -p $_MY_DIR + _KDC=$(kubectl get pod -l app=hdfs-krb5,release=my-hdfs --no-headers \ + -o name | cut -d/ -f2) + _run kubectl cp $_KDC:/etc/krb5.conf $_MY_DIR/tmp/krb5.conf + _run kubectl create configmap my-hdfs-krb5-config \ + --from-file=$_MY_DIR/tmp/krb5.conf +``` + +Second, create the service principals and passwords. Kerberos requires service +principals to be host specific. Some HDFS daemons are associated with your K8s +cluster nodes' physical host names say kube-n1.mycompany.com, while others are +associated with Kubernetes virtual service names, for instance +my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local. You can get +the list of these host names like: + +``` + $ _HOSTS=$(kubectl get nodes \ + -o=jsonpath='{.items[*].status.addresses[?(@.type == "Hostname")].address}') + + $ _HOSTS+=$(kubectl describe configmap my-hdfs-config | \ + grep -A 1 -e dfs.namenode.rpc-address.hdfs-k8s \ + -e dfs.namenode.shared.edits.dir | + grep "<value>" | + sed -e "s/<value>//" \ + -e "s/<\/value>//" \ + -e "s/:8020//" \ + -e "s/qjournal:\/\///" \ + -e "s/:8485;/ /g" \ + -e "s/:8485\/hdfs-k8s//") +``` + +Then generate per-host principal accounts and password keytab files. + +``` + $ _SECRET_CMD="kubectl create secret generic my-hdfs-krb5-keytabs" + $ for _HOST in $_HOSTS; do + kubectl exec $_KDC -- kadmin.local -q \ + "addprinc -randkey hdfs/$_HOST@MYCOMPANY.COM" + kubectl exec $_KDC -- kadmin.local -q \ + "addprinc -randkey HTTP/$_HOST@MYCOMPANY.COM" + kubectl exec $_KDC -- kadmin.local -q \ + "ktadd -norandkey -k /tmp/$_HOST.keytab hdfs/$_HOST@MYCOMPANY.COM HTTP/$_HOST@MYCOMPANY.COM" + kubectl cp $_KDC:/tmp/$_HOST.keytab $_MY_DIR/tmp/$_HOST.keytab + _SECRET_CMD+=" --from-file=$_MY_DIR/tmp/$_HOST.keytab" + done +``` + +The above was building a command using a shell variable `SECRET_CMD` for +creating a K8s secret that contains all keytab files. Run the command to create +the secret. + +``` + $ $_SECRET_CMD +``` + +This will unblock all HDFS daemon pods. Wait until they become ready. + +Finally, test the setup using the following commands: + +``` + $ _NN0=$(kubectl get pods -l app=hdfs-namenode,release=my-hdfs -o name | \ + head -1 | \ + cut -d/ -f2) + $ kubectl exec $_NN0 -- sh -c "(apt install -y krb5-user > /dev/null)" \ + || true + $ kubectl exec $_NN0 -- \ + kinit -kt /etc/security/hdfs.keytab \ + hdfs/my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local@MYCOMPANY.COM + $ kubectl exec $_NN0 -- hdfs dfsadmin -report + $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn0 + $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn1 + $ kubectl exec $_NN0 -- hadoop fs -rm -r -f /tmp + $ kubectl exec $_NN0 -- hadoop fs -mkdir /tmp + $ kubectl exec $_NN0 -- hadoop fs -chmod 0777 /tmp + $ kubectl exec $_KDC -- kadmin.local -q \ + "addprinc -randkey user1@MYCOMPANY.COM" + $ kubectl exec $_KDC -- kadmin.local -q \ + "ktadd -norandkey -k /tmp/user1.keytab user1@MYCOMPANY.COM" + $ kubectl cp $_KDC:/tmp/user1.keytab $_MY_DIR/tmp/user1.keytab + $ kubectl cp $_MY_DIR/tmp/user1.keytab $_CLIENT:/tmp/user1.keytab + + $ kubectl exec $_CLIENT -- sh -c "(apt install -y krb5-user > /dev/null)" \ + || true + + $ kubectl exec $_CLIENT -- kinit -kt /tmp/user1.keytab user1@MYCOMPANY.COM + $ kubectl exec $_CLIENT -- sh -c \ + "(head -c 100M < /dev/urandom > /tmp/random-100M)" + $ kubectl exec $_CLIENT -- hadoop fs -ls / + $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp +``` + +## Advanced options + +### Setting HostPath volume locations for datanodes + +HDFS on K8s stores the file data on the local disks of the K8s cluster nodes +using K8s HostPath volumes. You may want to change the default locations. Set +global.dataNodeHostPath to override the default value. Note the option +takes a list in case you want to use multiple disks. + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set "global.dataNodeHostPath={/mnt/sda1/hdfs-data0,/mnt/sda1/hdfs-data1}" +``` + +### Using an existing zookeeper quorum + +By default, HDFS on K8s pulls in the zookeeper chart in the incubator remote +repo (https://kubernetes-charts-incubator.storage.googleapis.com/) as a +dependency and launhces zookeeper daemons. But your K8s cluster may already +have a zookeeper quorum. + +It is possible to use the existing zookeeper. We just need set a few options +in the helm install command line. It should be something like: + +``` + $helm install -n my-hdfs charts/hdfs-k8s \ + --set condition.subchart.zookeeper=false \ + --set global.zookeeperQuorumOverride=zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181 +``` + +Setting `condition.subchart.zookeeper` to false prevents the uber-chart from +bringing in zookeeper as sub-chart. And the `global.zookeeperQuorumOverride` +option specifies the custom address for a zookeeper quorum. Use your +zookeeper address here. + +### Pinning namenodes to specific K8s cluster nodes + +Optionally, you can attach labels to some of your k8s cluster nodes so that +namenodes will always run on those cluster nodes. This can allow your HDFS +client outside the Kubernetes cluster to expect stable IP addresses. When used +by those outside clients, Kerberos expects the namenode addresses to be stable. + +``` + $ kubectl label nodes YOUR-HOST-1 hdfs-namenode-selector=hdfs-namenode + $ kubectl label nodes YOUR-HOST-2 hdfs-namenode-selector=hdfs-namenode +``` + +You should add the nodeSelector option to the helm chart command: + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set hdfs-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode \ + ... +``` + +### Excluding datanodes from some K8s cluster nodes + +You may want to exclude some K8s cluster nodes from datanodes launch target. +For instance, some K8s clusters may let the K8s cluster master node launch +a datanode. To prevent this, label the cluster nodes with +`hdfs-datanode-exclude`. + +``` + $ kubectl label node YOUR-CLUSTER-NODE hdfs-datanode-exclude=yes +``` + +### Launching with a non-HA namenode + +You may want non-HA namenode since it is the simplest possible setup. +Note this won't launch zookeepers nor journalnodes. + +The single namenode is supposed to be pinned to a cluster host using a node +label. Attach a label to one of your K8s cluster node. + +``` + $ kubectl label nodes YOUR-CLUSTER-NODE hdfs-namenode-selector=hdfs-namenode-0 +``` + +The non-HA setup does not even use persistent vlumes. So you don't even +need to prepare persistent volumes. Instead, it is using hostPath volume +of the pinned cluster node. So, just launch the chart while +setting options to turn off HA. You should add the nodeSelector option +so that the single namenode would find the hostPath volume of the same cluster +node when the pod restarts. + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set tags.ha=false \ + --set tags.simple=true \ + --set global.namenodeHAEnabled=false \ + --set hdfs-simple-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode-0 +``` + +# Security + +## K8s secret containing Kerberos keytab files + +The Kerberos setup creates a K8s secret containing all the keytab files of HDFS +daemon service princialps. This will be mounted onto HDFS daemon pods. You may +want to restrict access to this secret using k8s +[RBAC](https://kubernetes.io/docs/admin/authorization/rbac/), to minimize +exposure of the keytab files. + +## HostPath volumes +`Datanode` daemons run on every cluster node. They also mount k8s `hostPath` +local disk volumes. You may want to restrict access of `hostPath` +using `pod security policy`. +See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) + +## Credits + +Many charts are using public Hadoop docker images hosted by +[uhopper](https://hub.docker.com/u/uhopper/). diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml new file mode 100644 index 00000000..00d6f47d --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-client-k8s +version: 0.1.0 +description: A client for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml new file mode 100644 index 00000000..afffedfd --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: {{ template "hdfs-k8s.client.fullname" . }} + labels: + app: {{ template "hdfs-k8s.client.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + replicas: 1 + selector: + matchLabels: + app: {{ template "hdfs-k8s.client.name" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "hdfs-k8s.client.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + containers: + - name: hdfs-client + image: uhopper/hadoop:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: MULTIHOMED_NETWORK + value: "0" + command: ['/bin/sh', '-c'] + args: + - /entrypoint.sh /usr/bin/tail -f /var/log/dmesg + volumeMounts: + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + {{- end }} + restartPolicy: Always + volumes: + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + {{- end }} diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml new file mode 100644 index 00000000..229c4344 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for configuring HDFS on Kubernetes +name: hdfs-config-k8s +version: 0.1.0 diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl new file mode 100644 index 00000000..cd2ff083 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl @@ -0,0 +1,64 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "hdfs-config-k8s.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "hdfs-config-k8s.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "hdfs-config-k8s.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the kerberos principal suffix for core HDFS services +*/}} +{{- define "hdfs-principal" -}} +{{- printf "hdfs/_HOST@%s" .Values.kerberosRealm -}} +{{- end -}} + +{{/* +Create the kerberos principal for HTTP services +*/}} +{{- define "http-principal" -}} +{{- printf "HTTP/_HOST@%s" .Values.kerberosRealm -}} +{{- end -}} + +{{/* +Create the datanode data dir list. The below uses two loops to make sure the +last item does not have comma. It uses index 0 for the last item since that is +the only special index that helm template gives us. +*/}} +{{- define "datanode-data-dirs" -}} +{{- range $index, $path := .Values.global.dataNodeHostPath -}} + {{- if ne $index 0 -}} + /hadoop/dfs/data/{{ $index }}, + {{- end -}} +{{- end -}} +{{- range $index, $path := .Values.global.dataNodeHostPath -}} + {{- if eq $index 0 -}} + /hadoop/dfs/data/{{ $index }} + {{- end -}} +{{- end -}} +{{- end -}} diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml new file mode 100644 index 00000000..379dab8f --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml @@ -0,0 +1,197 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "hdfs-k8s.config.fullname" . }} + labels: + app: {{ template "hdfs-k8s.client.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +data: + core-site.xml: | + <?xml version="1.0"?> + <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + <configuration> + {{- if .Values.global.kerberosEnabled }} + <property> + <name>hadoop.security.authentication</name> + <value>kerberos</value> + </property> + <!-- + This is service level RPC authorization, which is separate from HDFS file + level ACLs. This concerns who can talk to HDFS daemons including + datanodes talking to namenode. As part of the authorization, namenode + tries to validate that DNS can uniquely traslate the datanode IP to the + hostname in the datanode Kerberos principal. (i.e. The client IP is what + Kerberos has authenticated). This does not work well when both namenode + and datanodes are using the Kubernetes HostNetwork and namenode is using + the StatefulSet. The same cluster node IP can be mapped to two different + DNS names. So we disable this. Again this is only service level RPC + authorization and does not affect HDFS file level permission ACLs. + --> + <property> + <name>hadoop.security.authorization</name> + <value>false</value> + </property> + <property> + <name>hadoop.rpc.protection</name> + <value>privacy</value> + </property> + <property> + <name>hadoop.user.group.static.mapping.overrides</name> + <value>hdfs=root;</value> + </property> + {{- end }} + {{- range $key, $value := .Values.customHadoopConfig.coreSite }} + <property> + <name>{{ $key }}</name> + <value>{{ $value }}</value> + </property> + {{- end }} + {{- if .Values.global.namenodeHAEnabled }} + <property> + <name>fs.defaultFS</name> + <value>hdfs://hdfs-k8s</value> + </property> + <property> + <name>ha.zookeeper.quorum</name> + <value>{{ template "zookeeper-quorum" . }}</value> + </property> + {{- else }} + <property> + <name>fs.defaultFS</name> + <value>hdfs://{{ template "namenode-svc-0" . }}:8020</value> + </property> + {{- end }} + </configuration> + hdfs-site.xml: | + <?xml version="1.0"?> + <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + <configuration> + {{- if .Values.global.kerberosEnabled }} + <property> + <name>dfs.block.access.token.enable</name> + <value>true</value> + </property> + <property> + <name>dfs.encrypt.data.transfer</name> + <value>true</value> + </property> + <property> + <name>dfs.namenode.kerberos.principal</name> + <value>{{ template "hdfs-principal" . }}</value> + </property> + {{/* + TODO: Check if the https principal is no longer needed in newer Hadoop version. + */}} + <property> + <name>dfs.namenode.kerberos.https.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.web.authentication.kerberos.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.namenode.keytab.file</name> + <value>/etc/security/hdfs.keytab</value> + </property> + <property> + <name>dfs.journalnode.kerberos.principal</name> + <value>{{ template "hdfs-principal" . }}</value> + </property> + <property> + <name>dfs.journalnode.kerberos.internal.spnego.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.journalnode.keytab.file</name> + <value>/etc/security/hdfs.keytab</value> + </property> + <property> + <name>dfs.datanode.kerberos.principal</name> + <value>{{ template "hdfs-principal" . }}</value> + </property> + <property> + <name>dfs.datanode.kerberos.https.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.datanode.keytab.file</name> + <value>/etc/security/hdfs.keytab</value> + </property> + {{- if .Values.global.jsvcEnabled }} + <property> + <name>dfs.datanode.address</name> + <value>0.0.0.0:1004</value> + </property> + <property> + <name>dfs.datanode.http.address</name> + <value>0.0.0.0:1006</value> + </property> + {{- end }} + {{- end }} + {{- range $key, $value := .Values.customHadoopConfig.hdfsSite }} + <property> + <name>{{ $key }}</name> + <value>{{ $value }}</value> + </property> + {{- end }} + {{- if .Values.global.namenodeHAEnabled }} + <property> + <name>dfs.nameservices</name> + <value>hdfs-k8s</value> + </property> + <property> + <name>dfs.ha.namenodes.hdfs-k8s</name> + <value>nn0,nn1</value> + </property> + <property> + <name>dfs.namenode.rpc-address.hdfs-k8s.nn0</name> + <value>{{ template "namenode-svc-0" . }}:8020</value> + </property> + <property> + <name>dfs.namenode.rpc-address.hdfs-k8s.nn1</name> + <value>{{ template "namenode-svc-1" . }}:8020</value> + </property> + <property> + <name>dfs.namenode.http-address.hdfs-k8s.nn0</name> + <value>{{ template "namenode-svc-0" . }}:50070</value> + </property> + <property> + <name>dfs.namenode.http-address.hdfs-k8s.nn1</name> + <value>{{ template "namenode-svc-1" . }}:50070</value> + </property> + <property> + <name>dfs.namenode.shared.edits.dir</name> + <value>qjournal://{{ template "journalnode-quorum" . }}/hdfs-k8s</value> + </property> + <property> + <name>dfs.ha.automatic-failover.enabled</name> + <value>true</value> + </property> + <property> + <name>dfs.ha.fencing.methods</name> + <value>shell(/bin/true)</value> + </property> + <property> + <name>dfs.journalnode.edits.dir</name> + <value>/hadoop/dfs/journal</value> + </property> + <property> + <name>dfs.client.failover.proxy.provider.hdfs-k8s</name> + <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> + </property> + {{- end }} + <property> + <name>dfs.namenode.name.dir</name> + <value>file:///hadoop/dfs/name</value> + </property> + <property> + <name>dfs.namenode.datanode.registration.ip-hostname-check</name> + <value>false</value> + </property> + <property> + <name>dfs.datanode.data.dir</name> + <value>{{ template "datanode-data-dirs" . }}</value> + </property> + </configuration> diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml new file mode 100644 index 00000000..ec837254 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-datanode-k8s +version: 0.1.0 +description: Datanodes for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml new file mode 100644 index 00000000..09445ed0 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -0,0 +1,191 @@ +# Provides datanode helper scripts. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts + labels: + app: {{ template "hdfs-k8s.datanode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +data: + check-status.sh: | + #!/usr/bin/env bash + # Exit on error. Append "|| true" if you expect an error. + set -o errexit + # Exit on error inside any functions or subshells. + set -o errtrace + # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR + set -o nounset + # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) + # in `mysqldump |gzip` + set -o pipefail + # Turn on traces, useful while debugging. + set -o xtrace + + # Check if datanode registered with the namenode and got non-null cluster ID. + _PORTS="50075 1006" + _URL_PATH="jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo" + _CLUSTER_ID="" + for _PORT in $_PORTS; do + _CLUSTER_ID+=$(curl -s http://localhost:${_PORT}/$_URL_PATH | \ + grep ClusterId) || true + done + echo $_CLUSTER_ID | grep -q -v null +--- +# Deleting a daemonset may need some trick. See +# https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: {{ template "hdfs-k8s.datanode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.datanode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + template: + metadata: + labels: + app: {{ template "hdfs-k8s.datanode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- else if .Values.global.defaultAffinityEnabled }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ template "hdfs-k8s.datanode.fullname" . }}-exclude + operator: DoesNotExist + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + hostNetwork: true + hostPID: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: datanode + image: uhopper/hadoop-datanode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: MULTIHOMED_NETWORK + value: "0" + {{- if and .Values.global.kerberosEnabled .Values.global.jsvcEnabled }} + - name: HADOOP_SECURE_DN_USER + value: root + - name: JSVC_OUTFILE + value: /dev/stdout + - name: JSVC_ERRFILE + value: /dev/stderr + - name: JSVC_HOME + value: /jsvc-home + {{- end }} + livenessProbe: + exec: + command: + - /dn-scripts/check-status.sh + initialDelaySeconds: 60 + periodSeconds: 30 + readinessProbe: + exec: + command: + - /dn-scripts/check-status.sh + initialDelaySeconds: 60 + periodSeconds: 30 + securityContext: + privileged: true + volumeMounts: + - name: dn-scripts + mountPath: /dn-scripts + readOnly: true + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- range $index, $path := .Values.global.dataNodeHostPath }} + - name: hdfs-data-{{ $index }} + mountPath: /hadoop/dfs/data/{{ $index }} + {{- end }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab-copy + mountPath: /etc/security/ + readOnly: true + {{- if .Values.global.jsvcEnabled }} + - name: jsvc-home + mountPath: /jsvc-home + {{- end }} + {{- end }} + {{- if .Values.global.kerberosEnabled }} + initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy + {{- if .Values.global.jsvcEnabled }} + - name: copy-jsvc + # Pull by digest because the image doesn't have tags to pin. + image: mschlimb/jsvc@sha256:bf20eb9a319e9a2f87473d8da7418d21503a97528b932800b6b8417cd31e30ef + command: ['sh', '-c'] + args: + - cp /usr/bin/jsvc /jsvc-home/jsvc + volumeMounts: + - name: jsvc-home + mountPath: /jsvc-home + {{- end }} + {{- end }} + restartPolicy: Always + volumes: + - name: dn-scripts + configMap: + name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts + defaultMode: 0744 + {{- range $index, $path := .Values.global.dataNodeHostPath }} + - name: hdfs-data-{{ $index }} + hostPath: + path: {{ $path }} + {{- end }} + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + - name: kerberos-keytabs + secret: + secretName: {{ template "krb5-keytabs-secret" . }} + - name: kerberos-keytab-copy + emptyDir: {} + {{- if .Values.global.jsvcEnabled }} + - name: jsvc-home + emptyDir: {} + {{- end }} + {{- end }} diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml new file mode 100644 index 00000000..a7ea6c8f --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-journalnode-k8s +version: 0.1.0 +description: Journalnode quorum used by HDFS on Kubernetes. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml new file mode 100644 index 00000000..22a4a2b4 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml @@ -0,0 +1,180 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.journalnode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} + annotations: + # TODO: Deprecated. Replace tolerate-unready-endpoints with + # v1.Service.PublishNotReadyAddresses. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + ports: + - port: 8485 + name: jn + - port: 8480 + name: http + clusterIP: None + selector: + app: {{ template "hdfs-k8s.journalnode.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: {{ template "hdfs-k8s.journalnode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + selector: + matchLabels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + release: {{ .Release.Name }} + minAvailable: {{ div .Values.global.journalnodeQuorumSize 2 | add1 }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.journalnode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.journalnode.fullname" . }} + replicas: {{ .Values.global.journalnodeQuorumSize }} + template: + metadata: + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- else if .Values.global.defaultAffinityEnabled }} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - {{ template "hdfs-k8s.journalnode.name" . }} + - key: "release" + operator: In + values: + - {{ .Release.Name }} + topologyKey: "kubernetes.io/hostname" + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + containers: + - name: hdfs-journalnode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + command: ["/entrypoint.sh"] + args: ["/opt/hadoop-2.7.2/bin/hdfs", "--config", "/etc/hadoop", "journalnode"] + ports: + - containerPort: 8485 + name: jn + - containerPort: 8480 + name: http + volumeMounts: + # Mount a subpath of the volume so that the journal subdir would be + # a brand new empty dir. This way, we won't get affected by + # existing files in the volume top dir. + - name: editdir + mountPath: /hadoop/dfs/journal + subPath: journal + - name: editdir + mountPath: /hadoop/dfs/name + subPath: name + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab-copy + mountPath: /etc/security/ + readOnly: true + {{- end }} + {{- if .Values.global.kerberosEnabled }} + initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_KERBEROS_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy + {{- end }} + restartPolicy: Always + volumes: + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + - name: kerberos-keytabs + secret: + secretName: {{ template "krb5-keytabs-secret" . }} + - name: kerberos-keytab-copy + emptyDir: {} + {{- end }} + {{- if .Values.global.podSecurityContext.enabled }} + securityContext: + runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} + fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: editdir + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} + {{- if .Values.persistence.selector }} + selector: +{{ toYaml .Values.persistence.selector | indent 10 }} + {{- end }} diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore new file mode 100644 index 00000000..28ebd32d --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore @@ -0,0 +1,2 @@ +charts +requirements.lock diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml new file mode 100644 index 00000000..ec58ffb6 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: An entry-point Helm chart for launching HDFS on Kubernetes +name: hdfs +version: 0.1.0 diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml new file mode 100644 index 00000000..7f803fdc --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml @@ -0,0 +1,59 @@ +dependencies: + - name: zookeeper + version: "1.0.0" + repository: https://kubernetes-charts-incubator.storage.googleapis.com/ + condition: condition.subchart.zookeeper + tags: + - ha + - kerberos + - name: hdfs-config-k8s + version: "0.1.0" + repository: "file://../hdfs-config-k8s" + condition: condition.subchart.config + tags: + - ha + - kerberos + - simple + - name: hdfs-krb5-k8s + version: "0.1.0" + repository: "file://../hdfs-krb5-k8s" + condition: condition.subchart.kerberos + tags: + - kerberos + - name: hdfs-journalnode-k8s + version: "0.1.0" + repository: "file://../hdfs-journalnode-k8s" + condition: condition.subchart.journalnode + tags: + - ha + - kerberos + - name: hdfs-namenode-k8s + version: "0.1.0" + repository: "file://../hdfs-namenode-k8s" + condition: condition.subchart.namenode + tags: + - ha + - kerberos + # Non-HA namenode. Disabled by default + - name: hdfs-simple-namenode-k8s + version: "0.1.0" + repository: "file://../hdfs-simple-namenode-k8s" + condition: condition.subchart.simple-namenode + tags: + - simple + - name: hdfs-datanode-k8s + version: "0.1.0" + repository: "file://../hdfs-datanode-k8s" + condition: condition.subchart.datanode + tags: + - ha + - kerberos + - simple + - name: hdfs-client-k8s + version: "0.1.0" + repository: "file://../hdfs-client-k8s" + condition: condition.subchart.client + tags: + - ha + - kerberos + - simple diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl new file mode 100644 index 00000000..9d03c4d2 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl @@ -0,0 +1,264 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Create a short app name. +*/}} +{{- define "hdfs-k8s.name" -}} +hdfs +{{- end -}} + +{{/* +Create a fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "hdfs-k8s.fullname" -}} +{{- if .Values.global.fullnameOverride -}} +{{- .Values.global.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "hdfs-k8s.name" . -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the subchart label. +*/}} +{{- define "hdfs-k8s.subchart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "zookeeper-fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "zookeeper" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-zookeeper" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.config.name" -}} +{{- template "hdfs-k8s.name" . -}}-config +{{- end -}} + +{{- define "hdfs-k8s.config.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "config" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-config" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.krb5.name" -}} +{{- template "hdfs-k8s.name" . -}}-krb5 +{{- end -}} + +{{- define "hdfs-k8s.krb5.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "config" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-krb5" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.journalnode.name" -}} +{{- template "hdfs-k8s.name" . -}}-journalnode +{{- end -}} + +{{- define "hdfs-k8s.journalnode.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "journalnode" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-journalnode" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.namenode.name" -}} +{{- template "hdfs-k8s.name" . -}}-namenode +{{- end -}} + +{{- define "hdfs-k8s.namenode.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "namenode" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-namenode" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.datanode.name" -}} +{{- template "hdfs-k8s.name" . -}}-datanode +{{- end -}} + +{{- define "hdfs-k8s.datanode.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "datanode" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-datanode" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.client.name" -}} +{{- template "hdfs-k8s.name" . -}}-client +{{- end -}} + +{{- define "hdfs-k8s.client.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "client" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-client" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Create the kerberos principal suffix for core HDFS services +*/}} +{{- define "hdfs-principal" -}} +{{- printf "hdfs/_HOST@%s" .Values.global.kerberosRealm -}} +{{- end -}} + +{{/* +Create the kerberos principal for HTTP services +*/}} +{{- define "http-principal" -}} +{{- printf "HTTP/_HOST@%s" .Values.global.kerberosRealm -}} +{{- end -}} + +{{/* +Create the name for a Kubernetes Configmap containing a Kerberos config file. +*/}} +{{- define "krb5-configmap" -}} +{{- if .Values.global.kerberosConfigMapOverride -}} +{{- .Values.global.kerberosConfigMapOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "hdfs-k8s.krb5.fullname" . -}} +{{- printf "%s-config" $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Create the name for a Kubernetes Secret containing Kerberos keytabs. +*/}} +{{- define "krb5-keytabs-secret" -}} +{{- if .Values.global.kerberosKeytabsSecretOverride -}} +{{- .Values.global.kerberosKeytabsSecretOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "hdfs-k8s.krb5.fullname" . -}} +{{- printf "%s-keytabs" $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + + +{{/* +Create the domain name part of services. +The HDFS config file should specify FQDN of services. Otherwise, Kerberos +login may fail. +*/}} +{{- define "svc-domain" -}} +{{- printf "%s.svc.cluster.local" .Release.Namespace -}} +{{- end -}} + +{{/* +Create the zookeeper quorum server list. The below uses two loops to make +sure the last item does not have comma. It uses index 0 for the last item +since that is the only special index that helm template gives us. +*/}} +{{- define "zookeeper-quorum" -}} +{{- if .Values.global.zookeeperQuorumOverride -}} +{{- .Values.global.zookeeperQuorumOverride -}} +{{- else -}} +{{- $service := include "zookeeper-fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- $replicas := .Values.global.zookeeperQuorumSize | int -}} +{{- range $i, $e := until $replicas -}} + {{- if ne $i 0 -}} + {{- printf "%s-%d.%s-headless.%s:2181," $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- range $i, $e := until $replicas -}} + {{- if eq $i 0 -}} + {{- printf "%s-%d.%s-headless.%s:2181" $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Construct the name of the Kerberos KDC pod 0. +*/}} +{{- define "krb5-pod-0" -}} +{{- template "hdfs-k8s.krb5.fullname" . -}}-0 +{{- end -}} + +{{/* +Construct the full name of the Kerberos KDC statefulset member 0. +*/}} +{{- define "krb5-svc-0" -}} +{{- $pod := include "krb5-pod-0" . -}} +{{- $service := include "hdfs-k8s.krb5.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- printf "%s.%s.%s" $pod $service $domain -}} +{{- end -}} + +{{/* +Create the journalnode quorum server list. The below uses two loops to make +sure the last item does not have the delimiter. It uses index 0 for the last +item since that is the only special index that helm template gives us. +*/}} +{{- define "journalnode-quorum" -}} +{{- $service := include "hdfs-k8s.journalnode.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- $replicas := .Values.global.journalnodeQuorumSize | int -}} +{{- range $i, $e := until $replicas -}} + {{- if ne $i 0 -}} + {{- printf "%s-%d.%s.%s:8485;" $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- range $i, $e := until $replicas -}} + {{- if eq $i 0 -}} + {{- printf "%s-%d.%s.%s:8485" $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Construct the name of the namenode pod 0. +*/}} +{{- define "namenode-pod-0" -}} +{{- template "hdfs-k8s.namenode.fullname" . -}}-0 +{{- end -}} + +{{/* +Construct the full name of the namenode statefulset member 0. +*/}} +{{- define "namenode-svc-0" -}} +{{- $pod := include "namenode-pod-0" . -}} +{{- $service := include "hdfs-k8s.namenode.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- printf "%s.%s.%s" $pod $service $domain -}} +{{- end -}} + +{{/* +Construct the name of the namenode pod 1. +*/}} +{{- define "namenode-pod-1" -}} +{{- template "hdfs-k8s.namenode.fullname" . -}}-1 +{{- end -}} + +{{/* +Construct the full name of the namenode statefulset member 1. +*/}} +{{- define "namenode-svc-1" -}} +{{- $pod := include "namenode-pod-1" . -}} +{{- $service := include "hdfs-k8s.namenode.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- printf "%s.%s.%s" $pod $service $domain -}} +{{- end -}} diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml new file mode 100644 index 00000000..77ca3fe0 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml @@ -0,0 +1,248 @@ +## ------------------------------------------------------------------------------ +## zookeeper: +## ------------------------------------------------------------------------------ +zookeeper: + ## Configure Zookeeper resource requests and limits + ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ + resources: ~ + + ## The JVM heap size to allocate to Zookeeper + env: + ZK_HEAP_SIZE: 1G + + ## The number of zookeeper server to have in the quorum. + replicaCount: 3 + +## ------------------------------------------------------------------------------ +## hdfs-config-k8s: +## ------------------------------------------------------------------------------ +hdfs-config-k8s: + ## Custom hadoop config keys passed to the hdfs configmap as extra keys. + customHadoopConfig: + coreSite: {} + ## Set config key and value pairs, e.g. + # hadoop.http.authentication.type: kerberos + + hdfsSite: {} + ## Set config key and value pairs, e.g. + # dfs.datanode.use.datanode.hostname: "false" + +## ------------------------------------------------------------------------------ +## hdfs-journalnode-k8s: +## ------------------------------------------------------------------------------ +hdfs-journalnode-k8s: + persistence: + ## Persistent Volume Storage Class + ## If defined, storageClassName: <storageClass> + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + ## To choose a suitable persistent volume from available static volumes, selectors + ## are used. + # selector: + # matchLabels: + # volume-type: hdfs-ssd + accessMode: ReadWriteOnce + size: 20Gi + + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-namenode-k8s: +## ------------------------------------------------------------------------------ +hdfs-namenode-k8s: + ## Name of the namenode start script in the config map. + namenodeStartScript: format-and-run.sh + + ## A namenode start script that can have user specified content. + ## Can be used to conduct ad-hoc operation as specified by a user. + ## To use this, also set the namenodeStartScript variable above + ## to custom-run.sh. + customRunScript: | + #!/bin/bash -x + echo Write your own script content! + echo This message will disappear in 10 seconds. + sleep 10 + + persistence: + ## Persistent Volume Storage Class + ## If defined, storageClassName: <storageClass> + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + + ## To choose a suitable persistent volume from available static volumes, selectors + ## are used. + # selector: + # matchLabels: + # volume-type: hdfs-ssd + + accessMode: ReadWriteOnce + + size: 100Gi + + ## Whether or not to use hostNetwork in namenode pods. Disabling this will break + ## data locality as namenode will see pod virtual IPs and fails to equate them with + ## cluster node physical IPs associated with data nodes. + ## We currently disable this only for CI on minikube. + hostNetworkEnabled: true + + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-simple-namenode-k8s: +## ------------------------------------------------------------------------------ +hdfs-simple-namenode-k8s: + ## Path of the local disk directory on a cluster node that will contain the namenode + ## fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath + ## volume. + nameNodeHostPath: /hdfs-name + + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-datanode-k8s: +## ------------------------------------------------------------------------------ +hdfs-datanode-k8s: + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-krb5-k8s: +## ------------------------------------------------------------------------------ +hdfs-krb5-k8s: + persistence: + ## Persistent Volume Storage Class + ## If defined, storageClassName: <storageClass> + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + + ## To choose a suitable persistent volume from available static volumes, selectors + ## are used. + # selector: + # matchLabels: + # volume-type: hdfs-ssd + + accessMode: ReadWriteOnce + + size: 20Gi + + ## We use a 3rd party image built from https://github.com/gcavalcante8808/docker-krb5-server. + ## TODO: The pod currently prints out the admin account in plain text. + ## Supply an admin account password using a k8s secret. + ## TODO: The auto-generated passwords might be weak due to low entropy. + ## Increase entropy by running rngd or haveged. + ## TODO: Using latest tag is not desirable. The current image does not have specific tags. + ## Find a way to fix it. + image: + repository: gcavalcante8808/krb5-server + + tag: latest + + pullPolicy: IfNotPresent + + service: + type: ClusterIP + + port: 88 +## ------------------------------------------------------------------------------ +## Global values affecting all sub-charts: +## ------------------------------------------------------------------------------ +global: + ## A list of the local disk directories on cluster nodes that will contain the datanode + ## blocks. These paths will be mounted to the datanode as K8s HostPath volumes. + ## In a command line, the list should be enclosed in '{' and '}'. + ## e.g. --set "dataNodeHostPath={/hdfs-data,/hdfs-data1}" + dataNodeHostPath: + - /hdfs-data + + ## Parameters for determining which Unix user and group IDs to use in pods. + ## Persistent volume permission may need to match these. + podSecurityContext: + enabled: false + runAsUser: 0 + fsGroup: 1000 + + ## Whether or not to expect namenodes in the HA setup. + namenodeHAEnabled: true + + ## The number of zookeeper server to have in the quorum. + ## This should match zookeeper.replicaCount above. Used only when + ## namenodeHAEnabled is set. + zookeeperQuorumSize: 3 + + ## Override zookeeper quorum address. Zookeeper is used for determining which namenode + ## instance is active. Separated by the comma character. Used only when + ## namenodeHAEnabled is set. + ## + # zookeeperQuorumOverride: zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181 + + ## How many journal nodes to launch as a quorum. Used only when + ## namenodeHAEnabled is set. + journalnodeQuorumSize: 3 + + ## Whether or not to enable default affinity setting. + defaultAffinityEnabled: true + + ## Whether or not Kerberos support is enabled. + kerberosEnabled: false + + ## Effective only if Kerberos is enabled. Override th name of the k8s + ## ConfigMap containing the kerberos config file. + ## + # kerberosConfigMapOverride: kerberos-config + + ## Effective only if Kerberos is enabled. Name of the kerberos config file inside + ## the config map. + kerberosConfigFileName: krb5.conf + + ## Effective only if Kerberos is enabled. Override the name of the k8s Secret + ## containing the kerberos keytab files of per-host HDFS principals. + ## The secret should have multiple data items. Each data item name + ## should be formatted as: + ## `HOST-NAME.keytab` + ## where HOST-NAME should match the cluster node + ## host name that each per-host hdfs principal is associated with. + ## + # kerberosKeytabsSecretOverride: hdfs-kerberos-keytabs + + ## Required to be non-empty if Kerberos is enabled. Specify your Kerberos realm name. + ## This should match the realm name in your Kerberos config file. + kerberosRealm: MYCOMPANY.COM + + ## Effective only if Kerberos is enabled. Enable protection of datanodes using + ## the jsvc utility. See the reference doc at + ## https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-common/SecureMode.html#Secure_DataNode + jsvcEnabled: true + +## Tags and conditions for triggering a group of relevant subcharts. +tags: + ## Trigger all subcharts required for high availability. Enabled by default. + ha: true + + ## Trigger all subcharts required for using Kerberos. Disabled by default. + kerberos: false + + ## Trigger all subcharts required for non-HA setup. Disabled by default. + simple: false diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml new file mode 100644 index 00000000..f8c301f1 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-krb5-k8s +version: 0.1.0 +description: Kerberos server that can be used for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml new file mode 100644 index 00000000..15be4b2f --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml @@ -0,0 +1,99 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.krb5.fullname" . }} + labels: + app: {{ template "hdfs-k8s.krb5.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} + annotations: + # TODO: Deprecated. Replace tolerate-unready-endpoints with + # v1.Service.PublishNotReadyAddresses. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + ports: + - port: {{ .Values.service.port }} + protocol: TCP + name: kdc-tcp + - port: {{ .Values.service.port }} + protocol: UDP + name: kdc-udp + clusterIP: None + selector: + app: {{ template "hdfs-k8s.krb5.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.krb5.fullname" . }} + labels: + app: {{ template "hdfs-k8s.krb5.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.krb5.fullname" . }} + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ template "hdfs-k8s.krb5.name" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "hdfs-k8s.krb5.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: KRB5_REALM + value: {{ .Values.global.kerberosRealm }} + - name: KRB5_KDC + value: {{ template "krb5-svc-0" . }} + ports: + - name: kdc-tcp + containerPort: 88 + protocol: TCP + - name: kdc-udp + containerPort: 88 + protocol: UDP + livenessProbe: + tcpSocket: + port: kdc-tcp + readinessProbe: + tcpSocket: + port: kdc-tcp + restartPolicy: Always + {{- if .Values.global.podSecurityContext.enabled }} + securityContext: + runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} + fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: datadir + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} + {{- if .Values.persistence.selector }} + selector: +{{ toYaml .Values.persistence.selector | indent 10 }} + {{- end }} diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml new file mode 100644 index 00000000..f45655f5 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-namenode-k8s +version: 0.1.0 +description: namenodes in HDFS on Kubernetes. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml new file mode 100644 index 00000000..44e8fc60 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml @@ -0,0 +1,287 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} + annotations: + # TODO: Deprecated. Replace tolerate-unready-endpoints with + # v1.Service.PublishNotReadyAddresses. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + ports: + - port: 8020 + name: fs + - port: 50070 + name: http + clusterIP: None + selector: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + selector: + matchLabels: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} + minAvailable: 1 +--- +# Provides namenode helper scripts. Most of them are start scripts +# that meet different needs. +# TODO: Support upgrade of metadata in case a new Hadoop version requires it. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +data: + # A bootstrap script which will start namenode daemons after conducting + # optional metadata initialization steps. The metadata initialization + # steps will take place in case the metadata dir is empty, + # which will be the case only for the very first run. The specific steps + # will differ depending on whether the namenode is active or standby. + # We also assume, for the very first run, namenode-0 will be active and + # namenode-1 will be standby as StatefulSet will launch namenode-0 first + # and zookeeper will determine the sole namenode to be the active one. + # For active namenode, the initialization steps will format the metadata, + # zookeeper dir and journal node data entries. + # For standby namenode, the initialization steps will simply receieve + # the first batch of metadata updates from the journal node. + format-and-run.sh: | + #!/usr/bin/env bash + # Exit on error. Append "|| true" if you expect an error. + set -o errexit + # Exit on error inside any functions or subshells. + set -o errtrace + # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR + set -o nounset + # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) + # in `mysqldump |gzip` + set -o pipefail + # Turn on traces, useful while debugging. + set -o xtrace + + _HDFS_BIN=$HADOOP_PREFIX/bin/hdfs + _METADATA_DIR=/hadoop/dfs/name/current + if [[ "$MY_POD" = "$NAMENODE_POD_0" ]]; then + if [[ ! -d $_METADATA_DIR ]]; then + $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -format \ + -nonInteractive hdfs-k8s || + (rm -rf $_METADATA_DIR; exit 1) + fi + _ZKFC_FORMATTED=/hadoop/dfs/name/current/.hdfs-k8s-zkfc-formatted + if [[ ! -f $_ZKFC_FORMATTED ]]; then + _OUT=$($_HDFS_BIN --config $HADOOP_CONF_DIR zkfc -formatZK -nonInteractive 2>&1) + # zkfc masks fatal exceptions and returns exit code 0 + (echo $_OUT | grep -q "FATAL") && exit 1 + touch $_ZKFC_FORMATTED + fi + elif [[ "$MY_POD" = "$NAMENODE_POD_1" ]]; then + if [[ ! -d $_METADATA_DIR ]]; then + $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -bootstrapStandby \ + -nonInteractive || \ + (rm -rf $_METADATA_DIR; exit 1) + fi + fi + $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start zkfc + $_HDFS_BIN --config $HADOOP_CONF_DIR namenode + + # A start script that will just hang indefinitely. A user can then get + # inside the pod and debug. Or a user can conduct a custom manual operations. + do-nothing.sh: | + #!/usr/bin/env bash + tail -f /var/log/dmesg + + # A start script that has user specified content. Can be used to conduct + # ad-hoc operation as specified by a user. + custom-run.sh: {{ .Values.customRunScript | quote }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.namenode.fullname" . }} + replicas: 2 + template: + metadata: + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.hostNetworkEnabled }} + # Use hostNetwork so datanodes connect to namenode without going through an overlay network + # like weave. Otherwise, namenode fails to see physical IP address of datanodes. + # Disabling this will break data locality as namenode will see pod virtual IPs and fails to + # equate them with cluster node physical IPs associated with data nodes. + # We currently disable this only for CI on minikube. + hostNetwork: true + hostPID: true + dnsPolicy: ClusterFirstWithHostNet + {{- else }} + dnsPolicy: ClusterFirst + {{- end }} + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- else if .Values.global.defaultAffinityEnabled }} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - {{ template "hdfs-k8s.namenode.name" . }} + - key: "release" + operator: In + values: + - {{ .Release.Name }} + topologyKey: "kubernetes.io/hostname" + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + containers: + # TODO: Support hadoop version as option. + - name: hdfs-namenode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: MULTIHOMED_NETWORK + value: "0" + # Used by the start script below. + - name: MY_POD + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMENODE_POD_0 + value: {{ template "namenode-pod-0" . }} + - name: NAMENODE_POD_1 + value: {{ template "namenode-pod-1" . }} + command: ['/bin/sh', '-c'] + # The start script is provided by a config map. + args: + - /entrypoint.sh "/nn-scripts/{{ .Values.namenodeStartScript }}" + ports: + - containerPort: 8020 + name: fs + - containerPort: 50070 + name: http + volumeMounts: + - name: nn-scripts + mountPath: /nn-scripts + readOnly: true + # Mount a subpath of the volume so that the name subdir would be a + # brand new empty dir. This way, we won't get affected by existing + # files in the volume top dir. + - name: metadatadir + mountPath: /hadoop/dfs/name + subPath: name + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab-copy + mountPath: /etc/security/ + readOnly: true + {{- end }} + {{- if .Values.global.kerberosEnabled }} + initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_KERBEROS_NAME + valueFrom: + fieldRef: + {{- if .Values.hostNetworkEnabled }} + fieldPath: spec.nodeName + {{- else }} + fieldPath: metadata.name + {{- end }} + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy + {{- end }} + restartPolicy: Always + volumes: + - name: nn-scripts + configMap: + name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts + defaultMode: 0744 + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + - name: kerberos-keytabs + secret: + secretName: {{ template "krb5-keytabs-secret" . }} + - name: kerberos-keytab-copy + emptyDir: {} + {{- end }} + {{- if .Values.global.podSecurityContext.enabled }} + securityContext: + runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} + fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: metadatadir + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} + {{- if .Values.persistence.selector }} + selector: +{{ toYaml .Values.persistence.selector | indent 10 }} + {{- end }} diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml new file mode 100644 index 00000000..bcf6f5b0 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-simple-namenode-k8s +version: 0.1.0 +description: Non-HA namenode for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml new file mode 100644 index 00000000..ab92efa9 --- /dev/null +++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml @@ -0,0 +1,82 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + ports: + - port: 8020 + name: fs + clusterIP: None + selector: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.namenode.fullname" . }} + # Create a size-1 set. + replicas: 1 + template: + metadata: + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + # Use hostNetwork so datanodes connect to namenode without going through an overlay network + # like weave. Otherwise, namenode fails to see physical IP address of datanodes. + hostNetwork: true + hostPID: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: hdfs-namenode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: CLUSTER_NAME + value: hdfs-k8s + ports: + - containerPort: 8020 + name: fs + volumeMounts: + - name: hdfs-name + mountPath: /hadoop/dfs/name + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + restartPolicy: Always + volumes: + - name: hdfs-name + hostPath: + path: {{ .Values.nameNodeHostPath }} + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} |