aboutsummaryrefslogtreecommitdiffstats
path: root/vnfs/DAaaS/training-core/charts/kubernetes-HDFS
diff options
context:
space:
mode:
Diffstat (limited to 'vnfs/DAaaS/training-core/charts/kubernetes-HDFS')
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.gitignore2
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.travis.yml20
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/LICENSE201
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/README.md12
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/README.md390
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml4
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml56
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore21
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml5
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl64
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml197
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml4
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml191
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml4
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml180
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore2
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore21
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml5
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml59
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl264
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml248
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore21
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml4
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml99
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml4
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml287
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml4
-rw-r--r--vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml82
28 files changed, 2451 insertions, 0 deletions
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.gitignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.gitignore
new file mode 100644
index 00000000..fc82fcb5
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.gitignore
@@ -0,0 +1,2 @@
+tests/bin
+tests/tmp
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.travis.yml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.travis.yml
new file mode 100644
index 00000000..1d3351da
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/.travis.yml
@@ -0,0 +1,20 @@
+sudo: required
+
+env:
+ - CASES=_basic.sh
+ - CASES=_basic-subcharts.sh
+ - CASES=_kerberos.sh
+ - CASES=_single-namenode.sh
+
+before_script:
+# Required for K8s v1.10.x. See
+# https://github.com/kubernetes/kubernetes/issues/61058#issuecomment-372764783
+- sudo mount --make-shared / && sudo service docker restart
+- USE_MINIKUBE_DRIVER_NONE=true USE_SUDO_MINIKUBE=true tests/setup.sh
+
+script:
+- tests/run.sh
+
+after_script:
+- tests/cleanup.sh
+- tests/teardown.sh
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/LICENSE b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/LICENSE
new file mode 100644
index 00000000..8dada3ed
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/README.md b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/README.md
new file mode 100644
index 00000000..ca694a19
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/README.md
@@ -0,0 +1,12 @@
+---
+layout: global
+title: HDFS on Kubernetes
+---
+# HDFS on Kubernetes
+Repository holding helm charts for running Hadoop Distributed File System (HDFS)
+on Kubernetes.
+
+See [charts/README.md](charts/README.md) for how to run the charts.
+
+See [tests/README.md](tests/README.md) for how to run integration tests for
+HDFS on Kubernetes.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/README.md b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/README.md
new file mode 100644
index 00000000..15ee8867
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/README.md
@@ -0,0 +1,390 @@
+---
+layout: global
+title: HDFS charts
+---
+
+# HDFS charts
+
+Helm charts for launching HDFS daemons in a K8s cluster. The main entry-point
+chart is `hdfs-k8s`, which is a uber-chart that specifies other charts as
+dependency subcharts. This means you can launch all HDFS components using
+`hdfs-k8s`.
+
+Note that the HDFS charts are currently in pre-alpha quality. They are also
+being heavily revised and are subject to change.
+
+HDFS on K8s supports the following features:
+ - namenode high availability (HA): HDFS namenode daemons are in charge of
+ maintaining file system metadata concerning which directories have which
+ files and where are the file data. Namenode crash will cause service outage.
+ HDFS can run two namenodes in active/standby setup. HDFS on K8s supports HA.
+ - K8s persistent volumes (PV) for metadata: Namenode crash will cause service
+ outage. Losing namenode metadata can lead to loss of file system. HDFS on
+ K8s can store the metadata in remote K8s persistent volumes so that metdata
+ can remain intact even if both namenode daemons are lost or restarted.
+ - K8s HostPath volumes for file data: HDFS datanodes daemons store actual
+ file data. File data should also survive datanode crash or restart. HDFS on
+ K8s stores the file data on the local disks of the K8s cluster nodes using
+ K8s HostPath volumes. (We plan to switch to a better mechanism, K8s
+ persistent local volumes)
+ - Kerberos: Vanilla HDFS is not secure. Intruders can easily write custom
+ client code, put a fake user name in requests and steal data. Production
+ HDFS often secure itself using Kerberos. HDFS on K8s supports Kerberos.
+
+Here is the list of all charts.
+
+ - hdfs-k8s: main uber-chart. Launches other charts.
+ - hdfs-namenode-k8s: a statefulset and other K8s components for launching HDFS
+ namenode daemons, which maintains file system metadata. The chart supports
+ namenode high availability (HA).
+ - hdfs-datanode-k8s: a daemonset and other K8s components for launching HDFS
+ datanode daemons, which are responsible for storing file data.
+ - hdfs-config-k8s: a configmap containing Hadoop config files for HDFS.
+ - zookeeper: This chart is NOT in this repo. But hdfs-k8s pulls the zookeeper
+ chart in the incubator remote repo
+ (https://kubernetes-charts-incubator.storage.googleapis.com/)
+ as a dependency and launhces zookeeper daemons. Zookeeper makes sure
+ only one namenode is active in the HA setup, while the other namenode
+ becomes standby. By default, we will launch three zookeeper servers.
+ - hdfs-journalnode-k8s: a statefulset and other K8s components for launching
+ HDFS journalnode quorums, which ensures the file system metadata are
+ properly shared among the two namenode daemons in the HA setup.
+ By default, we will launch three journalnode servers.
+ - hdfs-client-k8s: a pod that is configured to run Hadoop client commands
+ for accessing HDFS.
+ - hdfs-krb5-k8s: a size-1 statefulset and other K8s components for launching
+ a Kerberos server, which can be used to secure HDFS. Disabled by default.
+ - hdfs-simple-namenode-k8s: Disabled by default. A simpler setup of the
+ namenode that launches only one namenode. i.e. This does not support HA. It
+ does not support Kerberos nor persistent volumes either. As it does not
+ support HA, we also don't need zookeeper nor journal nodes. You may prefer
+ this if you want the simplest possible setup.
+
+# Prerequisite
+
+Requires Kubernetes 1.6+ as the `namenode` and `datanodes` are using
+`ClusterFirstWithHostNet`, which was introduced in Kubernetes 1.6
+
+# Usage
+
+## Basic
+
+The HDFS daemons can be launched using the main `hdfs-k8s` chart. First, build
+the main chart using:
+
+```
+ $ helm repo add incubator \
+ https://kubernetes-charts-incubator.storage.googleapis.com/
+ $ helm dependency build charts/hdfs-k8s
+```
+
+Zookeeper, journalnodes and namenodes need persistent volumes for storing
+metadata. By default, the helm charts do not set the storage class name for
+dynamically provisioned volumes, nor does it use persistent volume selectors for
+static persistent volumes.
+
+This means it will rely on a provisioner for default storage volume class for
+dynamic volumes. Or if your cluster has statically provisioned volumes, the
+chart will match existing volumes entirely based on the size requirements. To
+override this default behavior, you can specify storage volume classes for
+dynamic volumes, or volume selectors for static volumes. See below for how to
+set these options.
+
+ - namenodes: Each of the two namenodes needs at least a 100 GB volume. i.e.
+ Yon need two 100 GB volumes. This can be overridden by the
+ `hdfs-namenode-k8s.persistence.size` option.
+ You can also override the storage class or the selector using
+ `hdfs-namenode-k8s.persistence.storageClass`, or
+ `hdfs-namenode-k8s.persistence.selector` respectively. For details, see the
+ values.yaml file inside `hdfs-namenode-k8s` chart dir.
+ - zookeeper: You need three > 5 GB volumes. i.e. Each of the two zookeeper
+ servers will need at least 5 GB in the volume. Can be overridden by
+ the `zookeeper.persistence.size` option. You can also override
+ the storage class using `zookeeper.persistence.storageClass`.
+ - journalnodes: Each of the three journalnodes will need at least 20 GB in
+ the volume. The size can be overridden by the
+ `hdfs-journalnode-k8s.persistence.size` option.
+ You can also override the storage class or the selector using
+ `hdfs-journalnode-k8s.persistence.storageClass`, or
+ `hdfs-journalnode-k8s.persistence.selector` respectively. For details, see the
+ values.yaml file inside `hdfs-journalnode-k8s` chart dir.
+ - kerberos: The single Kerberos server will need at least 20 GB in the volume.
+ The size can be overridden by the `hdfs-krb5-k8s.persistence.size` option.
+ You can also override the storage class or the selector using
+ `hdfs-krb5-k8s.persistence.storageClass`, or
+ `hdfs-krb5-k8s.persistence.selector` respectively. For details, see the
+ values.yaml file inside `hdfs-krb5-k8s` chart dir.
+
+Then launch the main chart. Specify the chart release name say "my-hdfs",
+which will be the prefix of the K8s resource names for the HDFS components.
+
+```
+ $ helm install -n my-hdfs charts/hdfs-k8s
+```
+
+Wait for all daemons to be ready. Note some daemons may restart themselves
+a few times before they become ready.
+
+```
+ $ kubectl get pod -l release=my-hdfs
+
+ NAME READY STATUS RESTARTS AGE
+ my-hdfs-client-c749d9f8f-d5pvk 1/1 Running 0 2m
+ my-hdfs-datanode-o7jia 1/1 Running 3 2m
+ my-hdfs-datanode-p5kch 1/1 Running 3 2m
+ my-hdfs-datanode-r3kjo 1/1 Running 3 2m
+ my-hdfs-journalnode-0 1/1 Running 0 2m
+ my-hdfs-journalnode-1 1/1 Running 0 2m
+ my-hdfs-journalnode-2 1/1 Running 0 1m
+ my-hdfs-namenode-0 1/1 Running 3 2m
+ my-hdfs-namenode-1 1/1 Running 3 2m
+ my-hdfs-zookeeper-0 1/1 Running 0 2m
+ my-hdfs-zookeeper-1 1/1 Running 0 2m
+ my-hdfs-zookeeper-2 1/1 Running 0 2m
+```
+
+Namenodes and datanodes are currently using the K8s `hostNetwork` so they can
+see physical IPs of each other. If they are not using `hostNetowrk`,
+overlay K8s network providers such as weave-net may mask the physical IPs,
+which will confuse the data locality later inside namenodes.
+
+Finally, test with the client pod:
+
+```
+ $ _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs -o name | \
+ cut -d/ -f 2)
+ $ kubectl exec $_CLIENT -- hdfs dfsadmin -report
+ $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn0
+ $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn1
+
+ $ kubectl exec $_CLIENT -- hadoop fs -rm -r -f /tmp
+ $ kubectl exec $_CLIENT -- hadoop fs -mkdir /tmp
+ $ kubectl exec $_CLIENT -- sh -c \
+ "(head -c 100M < /dev/urandom > /tmp/random-100M)"
+ $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp
+```
+
+## Kerberos
+
+Kerberos can be enabled by setting a few related options:
+
+```
+ $ helm install -n my-hdfs charts/hdfs-k8s \
+ --set global.kerberosEnabled=true \
+ --set global.kerberosRealm=MYCOMPANY.COM \
+ --set tags.kerberos=true
+```
+
+This will launch all charts including the Kerberos server, which will become
+ready pretty soon. However, HDFS daemon charts will be blocked as the deamons
+require Kerberos service principals to be available. So we need to unblock
+them by creating those principals.
+
+First, create a configmap containing the common Kerberos config file:
+
+```
+ _MY_DIR=~/krb5
+ mkdir -p $_MY_DIR
+ _KDC=$(kubectl get pod -l app=hdfs-krb5,release=my-hdfs --no-headers \
+ -o name | cut -d/ -f2)
+ _run kubectl cp $_KDC:/etc/krb5.conf $_MY_DIR/tmp/krb5.conf
+ _run kubectl create configmap my-hdfs-krb5-config \
+ --from-file=$_MY_DIR/tmp/krb5.conf
+```
+
+Second, create the service principals and passwords. Kerberos requires service
+principals to be host specific. Some HDFS daemons are associated with your K8s
+cluster nodes' physical host names say kube-n1.mycompany.com, while others are
+associated with Kubernetes virtual service names, for instance
+my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local. You can get
+the list of these host names like:
+
+```
+ $ _HOSTS=$(kubectl get nodes \
+ -o=jsonpath='{.items[*].status.addresses[?(@.type == "Hostname")].address}')
+
+ $ _HOSTS+=$(kubectl describe configmap my-hdfs-config | \
+ grep -A 1 -e dfs.namenode.rpc-address.hdfs-k8s \
+ -e dfs.namenode.shared.edits.dir |
+ grep "<value>" |
+ sed -e "s/<value>//" \
+ -e "s/<\/value>//" \
+ -e "s/:8020//" \
+ -e "s/qjournal:\/\///" \
+ -e "s/:8485;/ /g" \
+ -e "s/:8485\/hdfs-k8s//")
+```
+
+Then generate per-host principal accounts and password keytab files.
+
+```
+ $ _SECRET_CMD="kubectl create secret generic my-hdfs-krb5-keytabs"
+ $ for _HOST in $_HOSTS; do
+ kubectl exec $_KDC -- kadmin.local -q \
+ "addprinc -randkey hdfs/$_HOST@MYCOMPANY.COM"
+ kubectl exec $_KDC -- kadmin.local -q \
+ "addprinc -randkey HTTP/$_HOST@MYCOMPANY.COM"
+ kubectl exec $_KDC -- kadmin.local -q \
+ "ktadd -norandkey -k /tmp/$_HOST.keytab hdfs/$_HOST@MYCOMPANY.COM HTTP/$_HOST@MYCOMPANY.COM"
+ kubectl cp $_KDC:/tmp/$_HOST.keytab $_MY_DIR/tmp/$_HOST.keytab
+ _SECRET_CMD+=" --from-file=$_MY_DIR/tmp/$_HOST.keytab"
+ done
+```
+
+The above was building a command using a shell variable `SECRET_CMD` for
+creating a K8s secret that contains all keytab files. Run the command to create
+the secret.
+
+```
+ $ $_SECRET_CMD
+```
+
+This will unblock all HDFS daemon pods. Wait until they become ready.
+
+Finally, test the setup using the following commands:
+
+```
+ $ _NN0=$(kubectl get pods -l app=hdfs-namenode,release=my-hdfs -o name | \
+ head -1 | \
+ cut -d/ -f2)
+ $ kubectl exec $_NN0 -- sh -c "(apt install -y krb5-user > /dev/null)" \
+ || true
+ $ kubectl exec $_NN0 -- \
+ kinit -kt /etc/security/hdfs.keytab \
+ hdfs/my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local@MYCOMPANY.COM
+ $ kubectl exec $_NN0 -- hdfs dfsadmin -report
+ $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn0
+ $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn1
+ $ kubectl exec $_NN0 -- hadoop fs -rm -r -f /tmp
+ $ kubectl exec $_NN0 -- hadoop fs -mkdir /tmp
+ $ kubectl exec $_NN0 -- hadoop fs -chmod 0777 /tmp
+ $ kubectl exec $_KDC -- kadmin.local -q \
+ "addprinc -randkey user1@MYCOMPANY.COM"
+ $ kubectl exec $_KDC -- kadmin.local -q \
+ "ktadd -norandkey -k /tmp/user1.keytab user1@MYCOMPANY.COM"
+ $ kubectl cp $_KDC:/tmp/user1.keytab $_MY_DIR/tmp/user1.keytab
+ $ kubectl cp $_MY_DIR/tmp/user1.keytab $_CLIENT:/tmp/user1.keytab
+
+ $ kubectl exec $_CLIENT -- sh -c "(apt install -y krb5-user > /dev/null)" \
+ || true
+
+ $ kubectl exec $_CLIENT -- kinit -kt /tmp/user1.keytab user1@MYCOMPANY.COM
+ $ kubectl exec $_CLIENT -- sh -c \
+ "(head -c 100M < /dev/urandom > /tmp/random-100M)"
+ $ kubectl exec $_CLIENT -- hadoop fs -ls /
+ $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp
+```
+
+## Advanced options
+
+### Setting HostPath volume locations for datanodes
+
+HDFS on K8s stores the file data on the local disks of the K8s cluster nodes
+using K8s HostPath volumes. You may want to change the default locations. Set
+global.dataNodeHostPath to override the default value. Note the option
+takes a list in case you want to use multiple disks.
+
+```
+ $ helm install -n my-hdfs charts/hdfs-k8s \
+ --set "global.dataNodeHostPath={/mnt/sda1/hdfs-data0,/mnt/sda1/hdfs-data1}"
+```
+
+### Using an existing zookeeper quorum
+
+By default, HDFS on K8s pulls in the zookeeper chart in the incubator remote
+repo (https://kubernetes-charts-incubator.storage.googleapis.com/) as a
+dependency and launhces zookeeper daemons. But your K8s cluster may already
+have a zookeeper quorum.
+
+It is possible to use the existing zookeeper. We just need set a few options
+in the helm install command line. It should be something like:
+
+```
+ $helm install -n my-hdfs charts/hdfs-k8s \
+ --set condition.subchart.zookeeper=false \
+ --set global.zookeeperQuorumOverride=zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181
+```
+
+Setting `condition.subchart.zookeeper` to false prevents the uber-chart from
+bringing in zookeeper as sub-chart. And the `global.zookeeperQuorumOverride`
+option specifies the custom address for a zookeeper quorum. Use your
+zookeeper address here.
+
+### Pinning namenodes to specific K8s cluster nodes
+
+Optionally, you can attach labels to some of your k8s cluster nodes so that
+namenodes will always run on those cluster nodes. This can allow your HDFS
+client outside the Kubernetes cluster to expect stable IP addresses. When used
+by those outside clients, Kerberos expects the namenode addresses to be stable.
+
+```
+ $ kubectl label nodes YOUR-HOST-1 hdfs-namenode-selector=hdfs-namenode
+ $ kubectl label nodes YOUR-HOST-2 hdfs-namenode-selector=hdfs-namenode
+```
+
+You should add the nodeSelector option to the helm chart command:
+
+```
+ $ helm install -n my-hdfs charts/hdfs-k8s \
+ --set hdfs-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode \
+ ...
+```
+
+### Excluding datanodes from some K8s cluster nodes
+
+You may want to exclude some K8s cluster nodes from datanodes launch target.
+For instance, some K8s clusters may let the K8s cluster master node launch
+a datanode. To prevent this, label the cluster nodes with
+`hdfs-datanode-exclude`.
+
+```
+ $ kubectl label node YOUR-CLUSTER-NODE hdfs-datanode-exclude=yes
+```
+
+### Launching with a non-HA namenode
+
+You may want non-HA namenode since it is the simplest possible setup.
+Note this won't launch zookeepers nor journalnodes.
+
+The single namenode is supposed to be pinned to a cluster host using a node
+label. Attach a label to one of your K8s cluster node.
+
+```
+ $ kubectl label nodes YOUR-CLUSTER-NODE hdfs-namenode-selector=hdfs-namenode-0
+```
+
+The non-HA setup does not even use persistent vlumes. So you don't even
+need to prepare persistent volumes. Instead, it is using hostPath volume
+of the pinned cluster node. So, just launch the chart while
+setting options to turn off HA. You should add the nodeSelector option
+so that the single namenode would find the hostPath volume of the same cluster
+node when the pod restarts.
+
+```
+ $ helm install -n my-hdfs charts/hdfs-k8s \
+ --set tags.ha=false \
+ --set tags.simple=true \
+ --set global.namenodeHAEnabled=false \
+ --set hdfs-simple-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode-0
+```
+
+# Security
+
+## K8s secret containing Kerberos keytab files
+
+The Kerberos setup creates a K8s secret containing all the keytab files of HDFS
+daemon service princialps. This will be mounted onto HDFS daemon pods. You may
+want to restrict access to this secret using k8s
+[RBAC](https://kubernetes.io/docs/admin/authorization/rbac/), to minimize
+exposure of the keytab files.
+
+## HostPath volumes
+`Datanode` daemons run on every cluster node. They also mount k8s `hostPath`
+local disk volumes. You may want to restrict access of `hostPath`
+using `pod security policy`.
+See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md))
+
+## Credits
+
+Many charts are using public Hadoop docker images hosted by
+[uhopper](https://hub.docker.com/u/uhopper/).
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml
new file mode 100644
index 00000000..00d6f47d
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+name: hdfs-client-k8s
+version: 0.1.0
+description: A client for HDFS on Kubernetes.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml
new file mode 100644
index 00000000..afffedfd
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml
@@ -0,0 +1,56 @@
+apiVersion: apps/v1
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+ name: {{ template "hdfs-k8s.client.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.client.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: {{ template "hdfs-k8s.client.name" . }}
+ release: {{ .Release.Name }}
+ template:
+ metadata:
+ labels:
+ app: {{ template "hdfs-k8s.client.name" . }}
+ release: {{ .Release.Name }}
+ {{- if .Values.podAnnotations }}
+ annotations:
+{{ toYaml .Values.podAnnotations | indent 8 }}
+ {{- end }}
+ spec:
+ containers:
+ - name: hdfs-client
+ image: uhopper/hadoop:2.7.2
+ env:
+ - name: HADOOP_CUSTOM_CONF_DIR
+ value: /etc/hadoop-custom-conf
+ - name: MULTIHOMED_NETWORK
+ value: "0"
+ command: ['/bin/sh', '-c']
+ args:
+ - /entrypoint.sh /usr/bin/tail -f /var/log/dmesg
+ volumeMounts:
+ - name: hdfs-config
+ mountPath: /etc/hadoop-custom-conf
+ readOnly: true
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ mountPath: /etc/krb5.conf
+ subPath: {{ .Values.global.kerberosConfigFileName }}
+ readOnly: true
+ {{- end }}
+ restartPolicy: Always
+ volumes:
+ - name: hdfs-config
+ configMap:
+ name: {{ template "hdfs-k8s.config.fullname" . }}
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ configMap:
+ name: {{ template "krb5-configmap" . }}
+ {{- end }}
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore
new file mode 100644
index 00000000..f0c13194
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore
@@ -0,0 +1,21 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml
new file mode 100644
index 00000000..229c4344
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml
@@ -0,0 +1,5 @@
+apiVersion: v1
+appVersion: "1.0"
+description: A Helm chart for configuring HDFS on Kubernetes
+name: hdfs-config-k8s
+version: 0.1.0
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl
new file mode 100644
index 00000000..cd2ff083
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl
@@ -0,0 +1,64 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "hdfs-config-k8s.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "hdfs-config-k8s.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "hdfs-config-k8s.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create the kerberos principal suffix for core HDFS services
+*/}}
+{{- define "hdfs-principal" -}}
+{{- printf "hdfs/_HOST@%s" .Values.kerberosRealm -}}
+{{- end -}}
+
+{{/*
+Create the kerberos principal for HTTP services
+*/}}
+{{- define "http-principal" -}}
+{{- printf "HTTP/_HOST@%s" .Values.kerberosRealm -}}
+{{- end -}}
+
+{{/*
+Create the datanode data dir list. The below uses two loops to make sure the
+last item does not have comma. It uses index 0 for the last item since that is
+the only special index that helm template gives us.
+*/}}
+{{- define "datanode-data-dirs" -}}
+{{- range $index, $path := .Values.global.dataNodeHostPath -}}
+ {{- if ne $index 0 -}}
+ /hadoop/dfs/data/{{ $index }},
+ {{- end -}}
+{{- end -}}
+{{- range $index, $path := .Values.global.dataNodeHostPath -}}
+ {{- if eq $index 0 -}}
+ /hadoop/dfs/data/{{ $index }}
+ {{- end -}}
+{{- end -}}
+{{- end -}}
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml
new file mode 100644
index 00000000..379dab8f
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml
@@ -0,0 +1,197 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ template "hdfs-k8s.config.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.client.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+data:
+ core-site.xml: |
+ <?xml version="1.0"?>
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ <configuration>
+ {{- if .Values.global.kerberosEnabled }}
+ <property>
+ <name>hadoop.security.authentication</name>
+ <value>kerberos</value>
+ </property>
+ <!--
+ This is service level RPC authorization, which is separate from HDFS file
+ level ACLs. This concerns who can talk to HDFS daemons including
+ datanodes talking to namenode. As part of the authorization, namenode
+ tries to validate that DNS can uniquely traslate the datanode IP to the
+ hostname in the datanode Kerberos principal. (i.e. The client IP is what
+ Kerberos has authenticated). This does not work well when both namenode
+ and datanodes are using the Kubernetes HostNetwork and namenode is using
+ the StatefulSet. The same cluster node IP can be mapped to two different
+ DNS names. So we disable this. Again this is only service level RPC
+ authorization and does not affect HDFS file level permission ACLs.
+ -->
+ <property>
+ <name>hadoop.security.authorization</name>
+ <value>false</value>
+ </property>
+ <property>
+ <name>hadoop.rpc.protection</name>
+ <value>privacy</value>
+ </property>
+ <property>
+ <name>hadoop.user.group.static.mapping.overrides</name>
+ <value>hdfs=root;</value>
+ </property>
+ {{- end }}
+ {{- range $key, $value := .Values.customHadoopConfig.coreSite }}
+ <property>
+ <name>{{ $key }}</name>
+ <value>{{ $value }}</value>
+ </property>
+ {{- end }}
+ {{- if .Values.global.namenodeHAEnabled }}
+ <property>
+ <name>fs.defaultFS</name>
+ <value>hdfs://hdfs-k8s</value>
+ </property>
+ <property>
+ <name>ha.zookeeper.quorum</name>
+ <value>{{ template "zookeeper-quorum" . }}</value>
+ </property>
+ {{- else }}
+ <property>
+ <name>fs.defaultFS</name>
+ <value>hdfs://{{ template "namenode-svc-0" . }}:8020</value>
+ </property>
+ {{- end }}
+ </configuration>
+ hdfs-site.xml: |
+ <?xml version="1.0"?>
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ <configuration>
+ {{- if .Values.global.kerberosEnabled }}
+ <property>
+ <name>dfs.block.access.token.enable</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>dfs.encrypt.data.transfer</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>dfs.namenode.kerberos.principal</name>
+ <value>{{ template "hdfs-principal" . }}</value>
+ </property>
+ {{/*
+ TODO: Check if the https principal is no longer needed in newer Hadoop version.
+ */}}
+ <property>
+ <name>dfs.namenode.kerberos.https.principal</name>
+ <value>{{ template "http-principal" . }}</value>
+ </property>
+ <property>
+ <name>dfs.web.authentication.kerberos.principal</name>
+ <value>{{ template "http-principal" . }}</value>
+ </property>
+ <property>
+ <name>dfs.namenode.keytab.file</name>
+ <value>/etc/security/hdfs.keytab</value>
+ </property>
+ <property>
+ <name>dfs.journalnode.kerberos.principal</name>
+ <value>{{ template "hdfs-principal" . }}</value>
+ </property>
+ <property>
+ <name>dfs.journalnode.kerberos.internal.spnego.principal</name>
+ <value>{{ template "http-principal" . }}</value>
+ </property>
+ <property>
+ <name>dfs.journalnode.keytab.file</name>
+ <value>/etc/security/hdfs.keytab</value>
+ </property>
+ <property>
+ <name>dfs.datanode.kerberos.principal</name>
+ <value>{{ template "hdfs-principal" . }}</value>
+ </property>
+ <property>
+ <name>dfs.datanode.kerberos.https.principal</name>
+ <value>{{ template "http-principal" . }}</value>
+ </property>
+ <property>
+ <name>dfs.datanode.keytab.file</name>
+ <value>/etc/security/hdfs.keytab</value>
+ </property>
+ {{- if .Values.global.jsvcEnabled }}
+ <property>
+ <name>dfs.datanode.address</name>
+ <value>0.0.0.0:1004</value>
+ </property>
+ <property>
+ <name>dfs.datanode.http.address</name>
+ <value>0.0.0.0:1006</value>
+ </property>
+ {{- end }}
+ {{- end }}
+ {{- range $key, $value := .Values.customHadoopConfig.hdfsSite }}
+ <property>
+ <name>{{ $key }}</name>
+ <value>{{ $value }}</value>
+ </property>
+ {{- end }}
+ {{- if .Values.global.namenodeHAEnabled }}
+ <property>
+ <name>dfs.nameservices</name>
+ <value>hdfs-k8s</value>
+ </property>
+ <property>
+ <name>dfs.ha.namenodes.hdfs-k8s</name>
+ <value>nn0,nn1</value>
+ </property>
+ <property>
+ <name>dfs.namenode.rpc-address.hdfs-k8s.nn0</name>
+ <value>{{ template "namenode-svc-0" . }}:8020</value>
+ </property>
+ <property>
+ <name>dfs.namenode.rpc-address.hdfs-k8s.nn1</name>
+ <value>{{ template "namenode-svc-1" . }}:8020</value>
+ </property>
+ <property>
+ <name>dfs.namenode.http-address.hdfs-k8s.nn0</name>
+ <value>{{ template "namenode-svc-0" . }}:50070</value>
+ </property>
+ <property>
+ <name>dfs.namenode.http-address.hdfs-k8s.nn1</name>
+ <value>{{ template "namenode-svc-1" . }}:50070</value>
+ </property>
+ <property>
+ <name>dfs.namenode.shared.edits.dir</name>
+ <value>qjournal://{{ template "journalnode-quorum" . }}/hdfs-k8s</value>
+ </property>
+ <property>
+ <name>dfs.ha.automatic-failover.enabled</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>dfs.ha.fencing.methods</name>
+ <value>shell(/bin/true)</value>
+ </property>
+ <property>
+ <name>dfs.journalnode.edits.dir</name>
+ <value>/hadoop/dfs/journal</value>
+ </property>
+ <property>
+ <name>dfs.client.failover.proxy.provider.hdfs-k8s</name>
+ <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+ </property>
+ {{- end }}
+ <property>
+ <name>dfs.namenode.name.dir</name>
+ <value>file:///hadoop/dfs/name</value>
+ </property>
+ <property>
+ <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+ <value>false</value>
+ </property>
+ <property>
+ <name>dfs.datanode.data.dir</name>
+ <value>{{ template "datanode-data-dirs" . }}</value>
+ </property>
+ </configuration>
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml
new file mode 100644
index 00000000..ec837254
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+name: hdfs-datanode-k8s
+version: 0.1.0
+description: Datanodes for HDFS on Kubernetes.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml
new file mode 100644
index 00000000..09445ed0
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml
@@ -0,0 +1,191 @@
+# Provides datanode helper scripts.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts
+ labels:
+ app: {{ template "hdfs-k8s.datanode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+data:
+ check-status.sh: |
+ #!/usr/bin/env bash
+ # Exit on error. Append "|| true" if you expect an error.
+ set -o errexit
+ # Exit on error inside any functions or subshells.
+ set -o errtrace
+ # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR
+ set -o nounset
+ # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds)
+ # in `mysqldump |gzip`
+ set -o pipefail
+ # Turn on traces, useful while debugging.
+ set -o xtrace
+
+ # Check if datanode registered with the namenode and got non-null cluster ID.
+ _PORTS="50075 1006"
+ _URL_PATH="jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo"
+ _CLUSTER_ID=""
+ for _PORT in $_PORTS; do
+ _CLUSTER_ID+=$(curl -s http://localhost:${_PORT}/$_URL_PATH | \
+ grep ClusterId) || true
+ done
+ echo $_CLUSTER_ID | grep -q -v null
+---
+# Deleting a daemonset may need some trick. See
+# https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+ name: {{ template "hdfs-k8s.datanode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.datanode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ template:
+ metadata:
+ labels:
+ app: {{ template "hdfs-k8s.datanode.name" . }}
+ release: {{ .Release.Name }}
+ {{- if .Values.podAnnotations }}
+ annotations:
+{{ toYaml .Values.podAnnotations | indent 8 }}
+ {{- end }}
+ spec:
+ {{- if .Values.affinity }}
+ affinity:
+{{ toYaml .Values.affinity | indent 8 }}
+ {{- else if .Values.global.defaultAffinityEnabled }}
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: {{ template "hdfs-k8s.datanode.fullname" . }}-exclude
+ operator: DoesNotExist
+ {{- end }}
+ {{- if .Values.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.nodeSelector | indent 8 }}
+ {{- end }}
+ {{- if .Values.tolerations }}
+ tolerations:
+{{ toYaml .Values.tolerations | indent 8 }}
+ {{- end }}
+ hostNetwork: true
+ hostPID: true
+ dnsPolicy: ClusterFirstWithHostNet
+ containers:
+ - name: datanode
+ image: uhopper/hadoop-datanode:2.7.2
+ env:
+ - name: HADOOP_CUSTOM_CONF_DIR
+ value: /etc/hadoop-custom-conf
+ - name: MULTIHOMED_NETWORK
+ value: "0"
+ {{- if and .Values.global.kerberosEnabled .Values.global.jsvcEnabled }}
+ - name: HADOOP_SECURE_DN_USER
+ value: root
+ - name: JSVC_OUTFILE
+ value: /dev/stdout
+ - name: JSVC_ERRFILE
+ value: /dev/stderr
+ - name: JSVC_HOME
+ value: /jsvc-home
+ {{- end }}
+ livenessProbe:
+ exec:
+ command:
+ - /dn-scripts/check-status.sh
+ initialDelaySeconds: 60
+ periodSeconds: 30
+ readinessProbe:
+ exec:
+ command:
+ - /dn-scripts/check-status.sh
+ initialDelaySeconds: 60
+ periodSeconds: 30
+ securityContext:
+ privileged: true
+ volumeMounts:
+ - name: dn-scripts
+ mountPath: /dn-scripts
+ readOnly: true
+ - name: hdfs-config
+ mountPath: /etc/hadoop-custom-conf
+ readOnly: true
+ {{- range $index, $path := .Values.global.dataNodeHostPath }}
+ - name: hdfs-data-{{ $index }}
+ mountPath: /hadoop/dfs/data/{{ $index }}
+ {{- end }}
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ mountPath: /etc/krb5.conf
+ subPath: {{ .Values.global.kerberosConfigFileName }}
+ readOnly: true
+ - name: kerberos-keytab-copy
+ mountPath: /etc/security/
+ readOnly: true
+ {{- if .Values.global.jsvcEnabled }}
+ - name: jsvc-home
+ mountPath: /jsvc-home
+ {{- end }}
+ {{- end }}
+ {{- if .Values.global.kerberosEnabled }}
+ initContainers:
+ - name: copy-kerberos-keytab
+ image: busybox:1.27.1
+ command: ['sh', '-c']
+ args:
+ - cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab
+ env:
+ - name: MY_NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ volumeMounts:
+ - name: kerberos-keytabs
+ mountPath: /kerberos-keytabs
+ - name: kerberos-keytab-copy
+ mountPath: /kerberos-keytab-copy
+ {{- if .Values.global.jsvcEnabled }}
+ - name: copy-jsvc
+ # Pull by digest because the image doesn't have tags to pin.
+ image: mschlimb/jsvc@sha256:bf20eb9a319e9a2f87473d8da7418d21503a97528b932800b6b8417cd31e30ef
+ command: ['sh', '-c']
+ args:
+ - cp /usr/bin/jsvc /jsvc-home/jsvc
+ volumeMounts:
+ - name: jsvc-home
+ mountPath: /jsvc-home
+ {{- end }}
+ {{- end }}
+ restartPolicy: Always
+ volumes:
+ - name: dn-scripts
+ configMap:
+ name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts
+ defaultMode: 0744
+ {{- range $index, $path := .Values.global.dataNodeHostPath }}
+ - name: hdfs-data-{{ $index }}
+ hostPath:
+ path: {{ $path }}
+ {{- end }}
+ - name: hdfs-config
+ configMap:
+ name: {{ template "hdfs-k8s.config.fullname" . }}
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ configMap:
+ name: {{ template "krb5-configmap" . }}
+ - name: kerberos-keytabs
+ secret:
+ secretName: {{ template "krb5-keytabs-secret" . }}
+ - name: kerberos-keytab-copy
+ emptyDir: {}
+ {{- if .Values.global.jsvcEnabled }}
+ - name: jsvc-home
+ emptyDir: {}
+ {{- end }}
+ {{- end }}
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml
new file mode 100644
index 00000000..a7ea6c8f
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+name: hdfs-journalnode-k8s
+version: 0.1.0
+description: Journalnode quorum used by HDFS on Kubernetes.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml
new file mode 100644
index 00000000..22a4a2b4
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml
@@ -0,0 +1,180 @@
+# A headless service to create DNS records.
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ template "hdfs-k8s.journalnode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.journalnode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+ annotations:
+ # TODO: Deprecated. Replace tolerate-unready-endpoints with
+ # v1.Service.PublishNotReadyAddresses.
+ service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
+spec:
+ ports:
+ - port: 8485
+ name: jn
+ - port: 8480
+ name: http
+ clusterIP: None
+ selector:
+ app: {{ template "hdfs-k8s.journalnode.name" . }}
+ release: {{ .Release.Name }}
+---
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+ name: {{ template "hdfs-k8s.journalnode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.journalnode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ selector:
+ matchLabels:
+ app: {{ template "hdfs-k8s.journalnode.name" . }}
+ release: {{ .Release.Name }}
+ minAvailable: {{ div .Values.global.journalnodeQuorumSize 2 | add1 }}
+---
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+ name: {{ template "hdfs-k8s.journalnode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.journalnode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ serviceName: {{ template "hdfs-k8s.journalnode.fullname" . }}
+ replicas: {{ .Values.global.journalnodeQuorumSize }}
+ template:
+ metadata:
+ labels:
+ app: {{ template "hdfs-k8s.journalnode.name" . }}
+ release: {{ .Release.Name }}
+ {{- if .Values.podAnnotations }}
+ annotations:
+{{ toYaml .Values.podAnnotations | indent 8 }}
+ {{- end }}
+ spec:
+ {{- if .Values.affinity }}
+ affinity:
+{{ toYaml .Values.affinity | indent 8 }}
+ {{- else if .Values.global.defaultAffinityEnabled }}
+ affinity:
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - labelSelector:
+ matchExpressions:
+ - key: "app"
+ operator: In
+ values:
+ - {{ template "hdfs-k8s.journalnode.name" . }}
+ - key: "release"
+ operator: In
+ values:
+ - {{ .Release.Name }}
+ topologyKey: "kubernetes.io/hostname"
+ {{- end }}
+ {{- if .Values.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.nodeSelector | indent 8 }}
+ {{- end }}
+ {{- if .Values.tolerations }}
+ tolerations:
+{{ toYaml .Values.tolerations | indent 8 }}
+ {{- end }}
+ containers:
+ - name: hdfs-journalnode
+ image: uhopper/hadoop-namenode:2.7.2
+ env:
+ - name: HADOOP_CUSTOM_CONF_DIR
+ value: /etc/hadoop-custom-conf
+ command: ["/entrypoint.sh"]
+ args: ["/opt/hadoop-2.7.2/bin/hdfs", "--config", "/etc/hadoop", "journalnode"]
+ ports:
+ - containerPort: 8485
+ name: jn
+ - containerPort: 8480
+ name: http
+ volumeMounts:
+ # Mount a subpath of the volume so that the journal subdir would be
+ # a brand new empty dir. This way, we won't get affected by
+ # existing files in the volume top dir.
+ - name: editdir
+ mountPath: /hadoop/dfs/journal
+ subPath: journal
+ - name: editdir
+ mountPath: /hadoop/dfs/name
+ subPath: name
+ - name: hdfs-config
+ mountPath: /etc/hadoop-custom-conf
+ readOnly: true
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ mountPath: /etc/krb5.conf
+ subPath: {{ .Values.global.kerberosConfigFileName }}
+ readOnly: true
+ - name: kerberos-keytab-copy
+ mountPath: /etc/security/
+ readOnly: true
+ {{- end }}
+ {{- if .Values.global.kerberosEnabled }}
+ initContainers:
+ - name: copy-kerberos-keytab
+ image: busybox:1.27.1
+ command: ['sh', '-c']
+ args:
+ - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab
+ env:
+ - name: MY_KERBEROS_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.name
+ volumeMounts:
+ - name: kerberos-keytabs
+ mountPath: /kerberos-keytabs
+ - name: kerberos-keytab-copy
+ mountPath: /kerberos-keytab-copy
+ {{- end }}
+ restartPolicy: Always
+ volumes:
+ - name: hdfs-config
+ configMap:
+ name: {{ template "hdfs-k8s.config.fullname" . }}
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ configMap:
+ name: {{ template "krb5-configmap" . }}
+ - name: kerberos-keytabs
+ secret:
+ secretName: {{ template "krb5-keytabs-secret" . }}
+ - name: kerberos-keytab-copy
+ emptyDir: {}
+ {{- end }}
+ {{- if .Values.global.podSecurityContext.enabled }}
+ securityContext:
+ runAsUser: {{ .Values.global.podSecurityContext.runAsUser }}
+ fsGroup: {{ .Values.global.podSecurityContext.fsGroup }}
+ {{- end }}
+ volumeClaimTemplates:
+ - metadata:
+ name: editdir
+ spec:
+ accessModes:
+ - {{ .Values.persistence.accessMode | quote }}
+ resources:
+ requests:
+ storage: {{ .Values.persistence.size | quote }}
+ {{- if .Values.persistence.storageClass }}
+ {{- if (eq "-" .Values.persistence.storageClass) }}
+ storageClassName: ""
+ {{- else }}
+ storageClassName: "{{ .Values.persistence.storageClass }}"
+ {{- end }}
+ {{- end }}
+ {{- if .Values.persistence.selector }}
+ selector:
+{{ toYaml .Values.persistence.selector | indent 10 }}
+ {{- end }}
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore
new file mode 100644
index 00000000..28ebd32d
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore
@@ -0,0 +1,2 @@
+charts
+requirements.lock
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore
new file mode 100644
index 00000000..f0c13194
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore
@@ -0,0 +1,21 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml
new file mode 100644
index 00000000..ec58ffb6
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml
@@ -0,0 +1,5 @@
+apiVersion: v1
+appVersion: "1.0"
+description: An entry-point Helm chart for launching HDFS on Kubernetes
+name: hdfs
+version: 0.1.0
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml
new file mode 100644
index 00000000..7f803fdc
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml
@@ -0,0 +1,59 @@
+dependencies:
+ - name: zookeeper
+ version: "1.0.0"
+ repository: https://kubernetes-charts-incubator.storage.googleapis.com/
+ condition: condition.subchart.zookeeper
+ tags:
+ - ha
+ - kerberos
+ - name: hdfs-config-k8s
+ version: "0.1.0"
+ repository: "file://../hdfs-config-k8s"
+ condition: condition.subchart.config
+ tags:
+ - ha
+ - kerberos
+ - simple
+ - name: hdfs-krb5-k8s
+ version: "0.1.0"
+ repository: "file://../hdfs-krb5-k8s"
+ condition: condition.subchart.kerberos
+ tags:
+ - kerberos
+ - name: hdfs-journalnode-k8s
+ version: "0.1.0"
+ repository: "file://../hdfs-journalnode-k8s"
+ condition: condition.subchart.journalnode
+ tags:
+ - ha
+ - kerberos
+ - name: hdfs-namenode-k8s
+ version: "0.1.0"
+ repository: "file://../hdfs-namenode-k8s"
+ condition: condition.subchart.namenode
+ tags:
+ - ha
+ - kerberos
+ # Non-HA namenode. Disabled by default
+ - name: hdfs-simple-namenode-k8s
+ version: "0.1.0"
+ repository: "file://../hdfs-simple-namenode-k8s"
+ condition: condition.subchart.simple-namenode
+ tags:
+ - simple
+ - name: hdfs-datanode-k8s
+ version: "0.1.0"
+ repository: "file://../hdfs-datanode-k8s"
+ condition: condition.subchart.datanode
+ tags:
+ - ha
+ - kerberos
+ - simple
+ - name: hdfs-client-k8s
+ version: "0.1.0"
+ repository: "file://../hdfs-client-k8s"
+ condition: condition.subchart.client
+ tags:
+ - ha
+ - kerberos
+ - simple
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl
new file mode 100644
index 00000000..9d03c4d2
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl
@@ -0,0 +1,264 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Create a short app name.
+*/}}
+{{- define "hdfs-k8s.name" -}}
+hdfs
+{{- end -}}
+
+{{/*
+Create a fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "hdfs-k8s.fullname" -}}
+{{- if .Values.global.fullnameOverride -}}
+{{- .Values.global.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := include "hdfs-k8s.name" . -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the subchart label.
+*/}}
+{{- define "hdfs-k8s.subchart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{- define "zookeeper-fullname" -}}
+{{- $fullname := include "hdfs-k8s.fullname" . -}}
+{{- if contains "zookeeper" $fullname -}}
+{{- printf "%s" $fullname -}}
+{{- else -}}
+{{- printf "%s-zookeeper" $fullname | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "hdfs-k8s.config.name" -}}
+{{- template "hdfs-k8s.name" . -}}-config
+{{- end -}}
+
+{{- define "hdfs-k8s.config.fullname" -}}
+{{- $fullname := include "hdfs-k8s.fullname" . -}}
+{{- if contains "config" $fullname -}}
+{{- printf "%s" $fullname -}}
+{{- else -}}
+{{- printf "%s-config" $fullname | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "hdfs-k8s.krb5.name" -}}
+{{- template "hdfs-k8s.name" . -}}-krb5
+{{- end -}}
+
+{{- define "hdfs-k8s.krb5.fullname" -}}
+{{- $fullname := include "hdfs-k8s.fullname" . -}}
+{{- if contains "config" $fullname -}}
+{{- printf "%s" $fullname -}}
+{{- else -}}
+{{- printf "%s-krb5" $fullname | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "hdfs-k8s.journalnode.name" -}}
+{{- template "hdfs-k8s.name" . -}}-journalnode
+{{- end -}}
+
+{{- define "hdfs-k8s.journalnode.fullname" -}}
+{{- $fullname := include "hdfs-k8s.fullname" . -}}
+{{- if contains "journalnode" $fullname -}}
+{{- printf "%s" $fullname -}}
+{{- else -}}
+{{- printf "%s-journalnode" $fullname | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "hdfs-k8s.namenode.name" -}}
+{{- template "hdfs-k8s.name" . -}}-namenode
+{{- end -}}
+
+{{- define "hdfs-k8s.namenode.fullname" -}}
+{{- $fullname := include "hdfs-k8s.fullname" . -}}
+{{- if contains "namenode" $fullname -}}
+{{- printf "%s" $fullname -}}
+{{- else -}}
+{{- printf "%s-namenode" $fullname | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "hdfs-k8s.datanode.name" -}}
+{{- template "hdfs-k8s.name" . -}}-datanode
+{{- end -}}
+
+{{- define "hdfs-k8s.datanode.fullname" -}}
+{{- $fullname := include "hdfs-k8s.fullname" . -}}
+{{- if contains "datanode" $fullname -}}
+{{- printf "%s" $fullname -}}
+{{- else -}}
+{{- printf "%s-datanode" $fullname | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "hdfs-k8s.client.name" -}}
+{{- template "hdfs-k8s.name" . -}}-client
+{{- end -}}
+
+{{- define "hdfs-k8s.client.fullname" -}}
+{{- $fullname := include "hdfs-k8s.fullname" . -}}
+{{- if contains "client" $fullname -}}
+{{- printf "%s" $fullname -}}
+{{- else -}}
+{{- printf "%s-client" $fullname | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create the kerberos principal suffix for core HDFS services
+*/}}
+{{- define "hdfs-principal" -}}
+{{- printf "hdfs/_HOST@%s" .Values.global.kerberosRealm -}}
+{{- end -}}
+
+{{/*
+Create the kerberos principal for HTTP services
+*/}}
+{{- define "http-principal" -}}
+{{- printf "HTTP/_HOST@%s" .Values.global.kerberosRealm -}}
+{{- end -}}
+
+{{/*
+Create the name for a Kubernetes Configmap containing a Kerberos config file.
+*/}}
+{{- define "krb5-configmap" -}}
+{{- if .Values.global.kerberosConfigMapOverride -}}
+{{- .Values.global.kerberosConfigMapOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := include "hdfs-k8s.krb5.fullname" . -}}
+{{- printf "%s-config" $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create the name for a Kubernetes Secret containing Kerberos keytabs.
+*/}}
+{{- define "krb5-keytabs-secret" -}}
+{{- if .Values.global.kerberosKeytabsSecretOverride -}}
+{{- .Values.global.kerberosKeytabsSecretOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := include "hdfs-k8s.krb5.fullname" . -}}
+{{- printf "%s-keytabs" $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+
+{{/*
+Create the domain name part of services.
+The HDFS config file should specify FQDN of services. Otherwise, Kerberos
+login may fail.
+*/}}
+{{- define "svc-domain" -}}
+{{- printf "%s.svc.cluster.local" .Release.Namespace -}}
+{{- end -}}
+
+{{/*
+Create the zookeeper quorum server list. The below uses two loops to make
+sure the last item does not have comma. It uses index 0 for the last item
+since that is the only special index that helm template gives us.
+*/}}
+{{- define "zookeeper-quorum" -}}
+{{- if .Values.global.zookeeperQuorumOverride -}}
+{{- .Values.global.zookeeperQuorumOverride -}}
+{{- else -}}
+{{- $service := include "zookeeper-fullname" . -}}
+{{- $domain := include "svc-domain" . -}}
+{{- $replicas := .Values.global.zookeeperQuorumSize | int -}}
+{{- range $i, $e := until $replicas -}}
+ {{- if ne $i 0 -}}
+ {{- printf "%s-%d.%s-headless.%s:2181," $service $i $service $domain -}}
+ {{- end -}}
+{{- end -}}
+{{- range $i, $e := until $replicas -}}
+ {{- if eq $i 0 -}}
+ {{- printf "%s-%d.%s-headless.%s:2181" $service $i $service $domain -}}
+ {{- end -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Construct the name of the Kerberos KDC pod 0.
+*/}}
+{{- define "krb5-pod-0" -}}
+{{- template "hdfs-k8s.krb5.fullname" . -}}-0
+{{- end -}}
+
+{{/*
+Construct the full name of the Kerberos KDC statefulset member 0.
+*/}}
+{{- define "krb5-svc-0" -}}
+{{- $pod := include "krb5-pod-0" . -}}
+{{- $service := include "hdfs-k8s.krb5.fullname" . -}}
+{{- $domain := include "svc-domain" . -}}
+{{- printf "%s.%s.%s" $pod $service $domain -}}
+{{- end -}}
+
+{{/*
+Create the journalnode quorum server list. The below uses two loops to make
+sure the last item does not have the delimiter. It uses index 0 for the last
+item since that is the only special index that helm template gives us.
+*/}}
+{{- define "journalnode-quorum" -}}
+{{- $service := include "hdfs-k8s.journalnode.fullname" . -}}
+{{- $domain := include "svc-domain" . -}}
+{{- $replicas := .Values.global.journalnodeQuorumSize | int -}}
+{{- range $i, $e := until $replicas -}}
+ {{- if ne $i 0 -}}
+ {{- printf "%s-%d.%s.%s:8485;" $service $i $service $domain -}}
+ {{- end -}}
+{{- end -}}
+{{- range $i, $e := until $replicas -}}
+ {{- if eq $i 0 -}}
+ {{- printf "%s-%d.%s.%s:8485" $service $i $service $domain -}}
+ {{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Construct the name of the namenode pod 0.
+*/}}
+{{- define "namenode-pod-0" -}}
+{{- template "hdfs-k8s.namenode.fullname" . -}}-0
+{{- end -}}
+
+{{/*
+Construct the full name of the namenode statefulset member 0.
+*/}}
+{{- define "namenode-svc-0" -}}
+{{- $pod := include "namenode-pod-0" . -}}
+{{- $service := include "hdfs-k8s.namenode.fullname" . -}}
+{{- $domain := include "svc-domain" . -}}
+{{- printf "%s.%s.%s" $pod $service $domain -}}
+{{- end -}}
+
+{{/*
+Construct the name of the namenode pod 1.
+*/}}
+{{- define "namenode-pod-1" -}}
+{{- template "hdfs-k8s.namenode.fullname" . -}}-1
+{{- end -}}
+
+{{/*
+Construct the full name of the namenode statefulset member 1.
+*/}}
+{{- define "namenode-svc-1" -}}
+{{- $pod := include "namenode-pod-1" . -}}
+{{- $service := include "hdfs-k8s.namenode.fullname" . -}}
+{{- $domain := include "svc-domain" . -}}
+{{- printf "%s.%s.%s" $pod $service $domain -}}
+{{- end -}}
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml
new file mode 100644
index 00000000..77ca3fe0
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml
@@ -0,0 +1,248 @@
+## ------------------------------------------------------------------------------
+## zookeeper:
+## ------------------------------------------------------------------------------
+zookeeper:
+ ## Configure Zookeeper resource requests and limits
+ ## ref: http://kubernetes.io/docs/user-guide/compute-resources/
+ resources: ~
+
+ ## The JVM heap size to allocate to Zookeeper
+ env:
+ ZK_HEAP_SIZE: 1G
+
+ ## The number of zookeeper server to have in the quorum.
+ replicaCount: 3
+
+## ------------------------------------------------------------------------------
+## hdfs-config-k8s:
+## ------------------------------------------------------------------------------
+hdfs-config-k8s:
+ ## Custom hadoop config keys passed to the hdfs configmap as extra keys.
+ customHadoopConfig:
+ coreSite: {}
+ ## Set config key and value pairs, e.g.
+ # hadoop.http.authentication.type: kerberos
+
+ hdfsSite: {}
+ ## Set config key and value pairs, e.g.
+ # dfs.datanode.use.datanode.hostname: "false"
+
+## ------------------------------------------------------------------------------
+## hdfs-journalnode-k8s:
+## ------------------------------------------------------------------------------
+hdfs-journalnode-k8s:
+ persistence:
+ ## Persistent Volume Storage Class
+ ## If defined, storageClassName: <storageClass>
+ ## If set to "-", storageClassName: "", which disables dynamic provisioning
+ ## If undefined (the default) or set to null, no storageClassName spec is
+ ## set, choosing the default provisioner. (gp2 on AWS, standard on
+ ## GKE, AWS & OpenStack)
+ ##
+ # storageClass: "-"
+ ## To choose a suitable persistent volume from available static volumes, selectors
+ ## are used.
+ # selector:
+ # matchLabels:
+ # volume-type: hdfs-ssd
+ accessMode: ReadWriteOnce
+ size: 20Gi
+
+ ## Node labels and tolerations for pod assignment
+ nodeSelector: {}
+ tolerations: []
+ affinity: {}
+
+## ------------------------------------------------------------------------------
+## hdfs-namenode-k8s:
+## ------------------------------------------------------------------------------
+hdfs-namenode-k8s:
+ ## Name of the namenode start script in the config map.
+ namenodeStartScript: format-and-run.sh
+
+ ## A namenode start script that can have user specified content.
+ ## Can be used to conduct ad-hoc operation as specified by a user.
+ ## To use this, also set the namenodeStartScript variable above
+ ## to custom-run.sh.
+ customRunScript: |
+ #!/bin/bash -x
+ echo Write your own script content!
+ echo This message will disappear in 10 seconds.
+ sleep 10
+
+ persistence:
+ ## Persistent Volume Storage Class
+ ## If defined, storageClassName: <storageClass>
+ ## If set to "-", storageClassName: "", which disables dynamic provisioning
+ ## If undefined (the default) or set to null, no storageClassName spec is
+ ## set, choosing the default provisioner. (gp2 on AWS, standard on
+ ## GKE, AWS & OpenStack)
+ ##
+ # storageClass: "-"
+
+ ## To choose a suitable persistent volume from available static volumes, selectors
+ ## are used.
+ # selector:
+ # matchLabels:
+ # volume-type: hdfs-ssd
+
+ accessMode: ReadWriteOnce
+
+ size: 100Gi
+
+ ## Whether or not to use hostNetwork in namenode pods. Disabling this will break
+ ## data locality as namenode will see pod virtual IPs and fails to equate them with
+ ## cluster node physical IPs associated with data nodes.
+ ## We currently disable this only for CI on minikube.
+ hostNetworkEnabled: true
+
+ ## Node labels and tolerations for pod assignment
+ nodeSelector: {}
+ tolerations: []
+ affinity: {}
+
+## ------------------------------------------------------------------------------
+## hdfs-simple-namenode-k8s:
+## ------------------------------------------------------------------------------
+hdfs-simple-namenode-k8s:
+ ## Path of the local disk directory on a cluster node that will contain the namenode
+ ## fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath
+ ## volume.
+ nameNodeHostPath: /hdfs-name
+
+ ## Node labels and tolerations for pod assignment
+ nodeSelector: {}
+ tolerations: []
+ affinity: {}
+
+## ------------------------------------------------------------------------------
+## hdfs-datanode-k8s:
+## ------------------------------------------------------------------------------
+hdfs-datanode-k8s:
+ ## Node labels and tolerations for pod assignment
+ nodeSelector: {}
+ tolerations: []
+ affinity: {}
+
+## ------------------------------------------------------------------------------
+## hdfs-krb5-k8s:
+## ------------------------------------------------------------------------------
+hdfs-krb5-k8s:
+ persistence:
+ ## Persistent Volume Storage Class
+ ## If defined, storageClassName: <storageClass>
+ ## If set to "-", storageClassName: "", which disables dynamic provisioning
+ ## If undefined (the default) or set to null, no storageClassName spec is
+ ## set, choosing the default provisioner. (gp2 on AWS, standard on
+ ## GKE, AWS & OpenStack)
+ ##
+ # storageClass: "-"
+
+ ## To choose a suitable persistent volume from available static volumes, selectors
+ ## are used.
+ # selector:
+ # matchLabels:
+ # volume-type: hdfs-ssd
+
+ accessMode: ReadWriteOnce
+
+ size: 20Gi
+
+ ## We use a 3rd party image built from https://github.com/gcavalcante8808/docker-krb5-server.
+ ## TODO: The pod currently prints out the admin account in plain text.
+ ## Supply an admin account password using a k8s secret.
+ ## TODO: The auto-generated passwords might be weak due to low entropy.
+ ## Increase entropy by running rngd or haveged.
+ ## TODO: Using latest tag is not desirable. The current image does not have specific tags.
+ ## Find a way to fix it.
+ image:
+ repository: gcavalcante8808/krb5-server
+
+ tag: latest
+
+ pullPolicy: IfNotPresent
+
+ service:
+ type: ClusterIP
+
+ port: 88
+## ------------------------------------------------------------------------------
+## Global values affecting all sub-charts:
+## ------------------------------------------------------------------------------
+global:
+ ## A list of the local disk directories on cluster nodes that will contain the datanode
+ ## blocks. These paths will be mounted to the datanode as K8s HostPath volumes.
+ ## In a command line, the list should be enclosed in '{' and '}'.
+ ## e.g. --set "dataNodeHostPath={/hdfs-data,/hdfs-data1}"
+ dataNodeHostPath:
+ - /hdfs-data
+
+ ## Parameters for determining which Unix user and group IDs to use in pods.
+ ## Persistent volume permission may need to match these.
+ podSecurityContext:
+ enabled: false
+ runAsUser: 0
+ fsGroup: 1000
+
+ ## Whether or not to expect namenodes in the HA setup.
+ namenodeHAEnabled: true
+
+ ## The number of zookeeper server to have in the quorum.
+ ## This should match zookeeper.replicaCount above. Used only when
+ ## namenodeHAEnabled is set.
+ zookeeperQuorumSize: 3
+
+ ## Override zookeeper quorum address. Zookeeper is used for determining which namenode
+ ## instance is active. Separated by the comma character. Used only when
+ ## namenodeHAEnabled is set.
+ ##
+ # zookeeperQuorumOverride: zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181
+
+ ## How many journal nodes to launch as a quorum. Used only when
+ ## namenodeHAEnabled is set.
+ journalnodeQuorumSize: 3
+
+ ## Whether or not to enable default affinity setting.
+ defaultAffinityEnabled: true
+
+ ## Whether or not Kerberos support is enabled.
+ kerberosEnabled: false
+
+ ## Effective only if Kerberos is enabled. Override th name of the k8s
+ ## ConfigMap containing the kerberos config file.
+ ##
+ # kerberosConfigMapOverride: kerberos-config
+
+ ## Effective only if Kerberos is enabled. Name of the kerberos config file inside
+ ## the config map.
+ kerberosConfigFileName: krb5.conf
+
+ ## Effective only if Kerberos is enabled. Override the name of the k8s Secret
+ ## containing the kerberos keytab files of per-host HDFS principals.
+ ## The secret should have multiple data items. Each data item name
+ ## should be formatted as:
+ ## `HOST-NAME.keytab`
+ ## where HOST-NAME should match the cluster node
+ ## host name that each per-host hdfs principal is associated with.
+ ##
+ # kerberosKeytabsSecretOverride: hdfs-kerberos-keytabs
+
+ ## Required to be non-empty if Kerberos is enabled. Specify your Kerberos realm name.
+ ## This should match the realm name in your Kerberos config file.
+ kerberosRealm: MYCOMPANY.COM
+
+ ## Effective only if Kerberos is enabled. Enable protection of datanodes using
+ ## the jsvc utility. See the reference doc at
+ ## https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-common/SecureMode.html#Secure_DataNode
+ jsvcEnabled: true
+
+## Tags and conditions for triggering a group of relevant subcharts.
+tags:
+ ## Trigger all subcharts required for high availability. Enabled by default.
+ ha: true
+
+ ## Trigger all subcharts required for using Kerberos. Disabled by default.
+ kerberos: false
+
+ ## Trigger all subcharts required for non-HA setup. Disabled by default.
+ simple: false
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore
new file mode 100644
index 00000000..f0c13194
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore
@@ -0,0 +1,21 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml
new file mode 100644
index 00000000..f8c301f1
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+name: hdfs-krb5-k8s
+version: 0.1.0
+description: Kerberos server that can be used for HDFS on Kubernetes.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml
new file mode 100644
index 00000000..15be4b2f
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml
@@ -0,0 +1,99 @@
+# A headless service to create DNS records.
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ template "hdfs-k8s.krb5.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.krb5.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+ annotations:
+ # TODO: Deprecated. Replace tolerate-unready-endpoints with
+ # v1.Service.PublishNotReadyAddresses.
+ service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
+spec:
+ ports:
+ - port: {{ .Values.service.port }}
+ protocol: TCP
+ name: kdc-tcp
+ - port: {{ .Values.service.port }}
+ protocol: UDP
+ name: kdc-udp
+ clusterIP: None
+ selector:
+ app: {{ template "hdfs-k8s.krb5.name" . }}
+ release: {{ .Release.Name }}
+---
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+ name: {{ template "hdfs-k8s.krb5.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.krb5.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ serviceName: {{ template "hdfs-k8s.krb5.fullname" . }}
+ replicas: {{ .Values.replicaCount }}
+ selector:
+ matchLabels:
+ app: {{ template "hdfs-k8s.krb5.name" . }}
+ release: {{ .Release.Name }}
+ template:
+ metadata:
+ labels:
+ app: {{ template "hdfs-k8s.krb5.name" . }}
+ release: {{ .Release.Name }}
+ {{- if .Values.podAnnotations }}
+ annotations:
+{{ toYaml .Values.podAnnotations | indent 8 }}
+ {{- end }}
+ spec:
+ containers:
+ - name: {{ .Chart.Name }}
+ image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
+ env:
+ - name: KRB5_REALM
+ value: {{ .Values.global.kerberosRealm }}
+ - name: KRB5_KDC
+ value: {{ template "krb5-svc-0" . }}
+ ports:
+ - name: kdc-tcp
+ containerPort: 88
+ protocol: TCP
+ - name: kdc-udp
+ containerPort: 88
+ protocol: UDP
+ livenessProbe:
+ tcpSocket:
+ port: kdc-tcp
+ readinessProbe:
+ tcpSocket:
+ port: kdc-tcp
+ restartPolicy: Always
+ {{- if .Values.global.podSecurityContext.enabled }}
+ securityContext:
+ runAsUser: {{ .Values.global.podSecurityContext.runAsUser }}
+ fsGroup: {{ .Values.global.podSecurityContext.fsGroup }}
+ {{- end }}
+ volumeClaimTemplates:
+ - metadata:
+ name: datadir
+ spec:
+ accessModes:
+ - {{ .Values.persistence.accessMode | quote }}
+ resources:
+ requests:
+ storage: {{ .Values.persistence.size | quote }}
+ {{- if .Values.persistence.storageClass }}
+ {{- if (eq "-" .Values.persistence.storageClass) }}
+ storageClassName: ""
+ {{- else }}
+ storageClassName: "{{ .Values.persistence.storageClass }}"
+ {{- end }}
+ {{- end }}
+ {{- if .Values.persistence.selector }}
+ selector:
+{{ toYaml .Values.persistence.selector | indent 10 }}
+ {{- end }}
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml
new file mode 100644
index 00000000..f45655f5
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+name: hdfs-namenode-k8s
+version: 0.1.0
+description: namenodes in HDFS on Kubernetes.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml
new file mode 100644
index 00000000..44e8fc60
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml
@@ -0,0 +1,287 @@
+# A headless service to create DNS records.
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ template "hdfs-k8s.namenode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+ annotations:
+ # TODO: Deprecated. Replace tolerate-unready-endpoints with
+ # v1.Service.PublishNotReadyAddresses.
+ service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
+spec:
+ ports:
+ - port: 8020
+ name: fs
+ - port: 50070
+ name: http
+ clusterIP: None
+ selector:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ release: {{ .Release.Name }}
+---
+apiVersion: policy/v1beta1
+kind: PodDisruptionBudget
+metadata:
+ name: {{ template "hdfs-k8s.namenode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ selector:
+ matchLabels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ release: {{ .Release.Name }}
+ minAvailable: 1
+---
+# Provides namenode helper scripts. Most of them are start scripts
+# that meet different needs.
+# TODO: Support upgrade of metadata in case a new Hadoop version requires it.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+data:
+ # A bootstrap script which will start namenode daemons after conducting
+ # optional metadata initialization steps. The metadata initialization
+ # steps will take place in case the metadata dir is empty,
+ # which will be the case only for the very first run. The specific steps
+ # will differ depending on whether the namenode is active or standby.
+ # We also assume, for the very first run, namenode-0 will be active and
+ # namenode-1 will be standby as StatefulSet will launch namenode-0 first
+ # and zookeeper will determine the sole namenode to be the active one.
+ # For active namenode, the initialization steps will format the metadata,
+ # zookeeper dir and journal node data entries.
+ # For standby namenode, the initialization steps will simply receieve
+ # the first batch of metadata updates from the journal node.
+ format-and-run.sh: |
+ #!/usr/bin/env bash
+ # Exit on error. Append "|| true" if you expect an error.
+ set -o errexit
+ # Exit on error inside any functions or subshells.
+ set -o errtrace
+ # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR
+ set -o nounset
+ # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds)
+ # in `mysqldump |gzip`
+ set -o pipefail
+ # Turn on traces, useful while debugging.
+ set -o xtrace
+
+ _HDFS_BIN=$HADOOP_PREFIX/bin/hdfs
+ _METADATA_DIR=/hadoop/dfs/name/current
+ if [[ "$MY_POD" = "$NAMENODE_POD_0" ]]; then
+ if [[ ! -d $_METADATA_DIR ]]; then
+ $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -format \
+ -nonInteractive hdfs-k8s ||
+ (rm -rf $_METADATA_DIR; exit 1)
+ fi
+ _ZKFC_FORMATTED=/hadoop/dfs/name/current/.hdfs-k8s-zkfc-formatted
+ if [[ ! -f $_ZKFC_FORMATTED ]]; then
+ _OUT=$($_HDFS_BIN --config $HADOOP_CONF_DIR zkfc -formatZK -nonInteractive 2>&1)
+ # zkfc masks fatal exceptions and returns exit code 0
+ (echo $_OUT | grep -q "FATAL") && exit 1
+ touch $_ZKFC_FORMATTED
+ fi
+ elif [[ "$MY_POD" = "$NAMENODE_POD_1" ]]; then
+ if [[ ! -d $_METADATA_DIR ]]; then
+ $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -bootstrapStandby \
+ -nonInteractive || \
+ (rm -rf $_METADATA_DIR; exit 1)
+ fi
+ fi
+ $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start zkfc
+ $_HDFS_BIN --config $HADOOP_CONF_DIR namenode
+
+ # A start script that will just hang indefinitely. A user can then get
+ # inside the pod and debug. Or a user can conduct a custom manual operations.
+ do-nothing.sh: |
+ #!/usr/bin/env bash
+ tail -f /var/log/dmesg
+
+ # A start script that has user specified content. Can be used to conduct
+ # ad-hoc operation as specified by a user.
+ custom-run.sh: {{ .Values.customRunScript | quote }}
+---
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+ name: {{ template "hdfs-k8s.namenode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ serviceName: {{ template "hdfs-k8s.namenode.fullname" . }}
+ replicas: 2
+ template:
+ metadata:
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ release: {{ .Release.Name }}
+ {{- if .Values.podAnnotations }}
+ annotations:
+{{ toYaml .Values.podAnnotations | indent 8 }}
+ {{- end }}
+ spec:
+ {{- if .Values.hostNetworkEnabled }}
+ # Use hostNetwork so datanodes connect to namenode without going through an overlay network
+ # like weave. Otherwise, namenode fails to see physical IP address of datanodes.
+ # Disabling this will break data locality as namenode will see pod virtual IPs and fails to
+ # equate them with cluster node physical IPs associated with data nodes.
+ # We currently disable this only for CI on minikube.
+ hostNetwork: true
+ hostPID: true
+ dnsPolicy: ClusterFirstWithHostNet
+ {{- else }}
+ dnsPolicy: ClusterFirst
+ {{- end }}
+ {{- if .Values.affinity }}
+ affinity:
+{{ toYaml .Values.affinity | indent 8 }}
+ {{- else if .Values.global.defaultAffinityEnabled }}
+ affinity:
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - labelSelector:
+ matchExpressions:
+ - key: "app"
+ operator: In
+ values:
+ - {{ template "hdfs-k8s.namenode.name" . }}
+ - key: "release"
+ operator: In
+ values:
+ - {{ .Release.Name }}
+ topologyKey: "kubernetes.io/hostname"
+ {{- end }}
+ {{- if .Values.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.nodeSelector | indent 8 }}
+ {{- end }}
+ {{- if .Values.tolerations }}
+ tolerations:
+{{ toYaml .Values.tolerations | indent 8 }}
+ {{- end }}
+ containers:
+ # TODO: Support hadoop version as option.
+ - name: hdfs-namenode
+ image: uhopper/hadoop-namenode:2.7.2
+ env:
+ - name: HADOOP_CUSTOM_CONF_DIR
+ value: /etc/hadoop-custom-conf
+ - name: MULTIHOMED_NETWORK
+ value: "0"
+ # Used by the start script below.
+ - name: MY_POD
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.name
+ - name: NAMENODE_POD_0
+ value: {{ template "namenode-pod-0" . }}
+ - name: NAMENODE_POD_1
+ value: {{ template "namenode-pod-1" . }}
+ command: ['/bin/sh', '-c']
+ # The start script is provided by a config map.
+ args:
+ - /entrypoint.sh "/nn-scripts/{{ .Values.namenodeStartScript }}"
+ ports:
+ - containerPort: 8020
+ name: fs
+ - containerPort: 50070
+ name: http
+ volumeMounts:
+ - name: nn-scripts
+ mountPath: /nn-scripts
+ readOnly: true
+ # Mount a subpath of the volume so that the name subdir would be a
+ # brand new empty dir. This way, we won't get affected by existing
+ # files in the volume top dir.
+ - name: metadatadir
+ mountPath: /hadoop/dfs/name
+ subPath: name
+ - name: hdfs-config
+ mountPath: /etc/hadoop-custom-conf
+ readOnly: true
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ mountPath: /etc/krb5.conf
+ subPath: {{ .Values.global.kerberosConfigFileName }}
+ readOnly: true
+ - name: kerberos-keytab-copy
+ mountPath: /etc/security/
+ readOnly: true
+ {{- end }}
+ {{- if .Values.global.kerberosEnabled }}
+ initContainers:
+ - name: copy-kerberos-keytab
+ image: busybox:1.27.1
+ command: ['sh', '-c']
+ args:
+ - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab
+ env:
+ - name: MY_KERBEROS_NAME
+ valueFrom:
+ fieldRef:
+ {{- if .Values.hostNetworkEnabled }}
+ fieldPath: spec.nodeName
+ {{- else }}
+ fieldPath: metadata.name
+ {{- end }}
+ volumeMounts:
+ - name: kerberos-keytabs
+ mountPath: /kerberos-keytabs
+ - name: kerberos-keytab-copy
+ mountPath: /kerberos-keytab-copy
+ {{- end }}
+ restartPolicy: Always
+ volumes:
+ - name: nn-scripts
+ configMap:
+ name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts
+ defaultMode: 0744
+ - name: hdfs-config
+ configMap:
+ name: {{ template "hdfs-k8s.config.fullname" . }}
+ {{- if .Values.global.kerberosEnabled }}
+ - name: kerberos-config
+ configMap:
+ name: {{ template "krb5-configmap" . }}
+ - name: kerberos-keytabs
+ secret:
+ secretName: {{ template "krb5-keytabs-secret" . }}
+ - name: kerberos-keytab-copy
+ emptyDir: {}
+ {{- end }}
+ {{- if .Values.global.podSecurityContext.enabled }}
+ securityContext:
+ runAsUser: {{ .Values.global.podSecurityContext.runAsUser }}
+ fsGroup: {{ .Values.global.podSecurityContext.fsGroup }}
+ {{- end }}
+ volumeClaimTemplates:
+ - metadata:
+ name: metadatadir
+ spec:
+ accessModes:
+ - {{ .Values.persistence.accessMode | quote }}
+ resources:
+ requests:
+ storage: {{ .Values.persistence.size | quote }}
+ {{- if .Values.persistence.storageClass }}
+ {{- if (eq "-" .Values.persistence.storageClass) }}
+ storageClassName: ""
+ {{- else }}
+ storageClassName: "{{ .Values.persistence.storageClass }}"
+ {{- end }}
+ {{- end }}
+ {{- if .Values.persistence.selector }}
+ selector:
+{{ toYaml .Values.persistence.selector | indent 10 }}
+ {{- end }}
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml
new file mode 100644
index 00000000..bcf6f5b0
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+name: hdfs-simple-namenode-k8s
+version: 0.1.0
+description: Non-HA namenode for HDFS on Kubernetes.
diff --git a/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml
new file mode 100644
index 00000000..ab92efa9
--- /dev/null
+++ b/vnfs/DAaaS/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml
@@ -0,0 +1,82 @@
+# A headless service to create DNS records.
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ template "hdfs-k8s.namenode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ ports:
+ - port: 8020
+ name: fs
+ clusterIP: None
+ selector:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ release: {{ .Release.Name }}
+---
+apiVersion: apps/v1beta1
+kind: StatefulSet
+metadata:
+ name: {{ template "hdfs-k8s.namenode.fullname" . }}
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ chart: {{ template "hdfs-k8s.subchart" . }}
+ release: {{ .Release.Name }}
+spec:
+ serviceName: {{ template "hdfs-k8s.namenode.fullname" . }}
+ # Create a size-1 set.
+ replicas: 1
+ template:
+ metadata:
+ labels:
+ app: {{ template "hdfs-k8s.namenode.name" . }}
+ release: {{ .Release.Name }}
+ {{- if .Values.podAnnotations }}
+ annotations:
+{{ toYaml .Values.podAnnotations | indent 8 }}
+ {{- end }}
+ spec:
+ {{- if .Values.affinity }}
+ affinity:
+{{ toYaml .Values.affinity | indent 8 }}
+ {{- end }}
+ {{- if .Values.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.nodeSelector | indent 8 }}
+ {{- end }}
+ {{- if .Values.tolerations }}
+ tolerations:
+{{ toYaml .Values.tolerations | indent 8 }}
+ {{- end }}
+ # Use hostNetwork so datanodes connect to namenode without going through an overlay network
+ # like weave. Otherwise, namenode fails to see physical IP address of datanodes.
+ hostNetwork: true
+ hostPID: true
+ dnsPolicy: ClusterFirstWithHostNet
+ containers:
+ - name: hdfs-namenode
+ image: uhopper/hadoop-namenode:2.7.2
+ env:
+ - name: HADOOP_CUSTOM_CONF_DIR
+ value: /etc/hadoop-custom-conf
+ - name: CLUSTER_NAME
+ value: hdfs-k8s
+ ports:
+ - containerPort: 8020
+ name: fs
+ volumeMounts:
+ - name: hdfs-name
+ mountPath: /hadoop/dfs/name
+ - name: hdfs-config
+ mountPath: /etc/hadoop-custom-conf
+ readOnly: true
+ restartPolicy: Always
+ volumes:
+ - name: hdfs-name
+ hostPath:
+ path: {{ .Values.nameNodeHostPath }}
+ - name: hdfs-config
+ configMap:
+ name: {{ template "hdfs-k8s.config.fullname" . }}