diff options
author | Dileep Ranganathan <dileep.ranganathan@intel.com> | 2019-05-30 12:38:37 -0700 |
---|---|---|
committer | Dileep Ranganathan <dileep.ranganathan@intel.com> | 2019-05-30 21:11:52 +0000 |
commit | 3d5a3e06530c1250d48f7d838c619f3bfbcd019d (patch) | |
tree | 349e370c43ce7318b3f7eb7736345de6872cbef2 /vnfs/DAaaS/deploy/training-core | |
parent | 31802660dfe74a8671ae29789f0018f0f887ea1a (diff) |
Refactor Distributed Analytics project structure
Modified the project structure to improve maintainability and to add future CI and
integration test support.
Change-Id: Id30bfb1f83f23785a6b5f99e81f42f752d59c0f8
Issue-ID: ONAPARC-280
Signed-off-by: Dileep Ranganathan <dileep.ranganathan@intel.com>
Diffstat (limited to 'vnfs/DAaaS/deploy/training-core')
47 files changed, 3231 insertions, 0 deletions
diff --git a/vnfs/DAaaS/deploy/training-core/.helmignore b/vnfs/DAaaS/deploy/training-core/.helmignore new file mode 100644 index 00000000..ef839191 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +*.label* +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/vnfs/DAaaS/deploy/training-core/Chart.yaml b/vnfs/DAaaS/deploy/training-core/Chart.yaml new file mode 100644 index 00000000..9057c590 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: Helm chart for training framework components +name: training-core +version: 0.1.0 diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/.gitignore b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/.gitignore new file mode 100644 index 00000000..fc82fcb5 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/.gitignore @@ -0,0 +1,2 @@ +tests/bin +tests/tmp diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/.travis.yml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/.travis.yml new file mode 100644 index 00000000..1d3351da --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/.travis.yml @@ -0,0 +1,20 @@ +sudo: required + +env: + - CASES=_basic.sh + - CASES=_basic-subcharts.sh + - CASES=_kerberos.sh + - CASES=_single-namenode.sh + +before_script: +# Required for K8s v1.10.x. See +# https://github.com/kubernetes/kubernetes/issues/61058#issuecomment-372764783 +- sudo mount --make-shared / && sudo service docker restart +- USE_MINIKUBE_DRIVER_NONE=true USE_SUDO_MINIKUBE=true tests/setup.sh + +script: +- tests/run.sh + +after_script: +- tests/cleanup.sh +- tests/teardown.sh diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/LICENSE b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/LICENSE new file mode 100644 index 00000000..8dada3ed --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/README.md b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/README.md new file mode 100644 index 00000000..ca694a19 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/README.md @@ -0,0 +1,12 @@ +--- +layout: global +title: HDFS on Kubernetes +--- +# HDFS on Kubernetes +Repository holding helm charts for running Hadoop Distributed File System (HDFS) +on Kubernetes. + +See [charts/README.md](charts/README.md) for how to run the charts. + +See [tests/README.md](tests/README.md) for how to run integration tests for +HDFS on Kubernetes. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/README.md b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/README.md new file mode 100644 index 00000000..15ee8867 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/README.md @@ -0,0 +1,390 @@ +--- +layout: global +title: HDFS charts +--- + +# HDFS charts + +Helm charts for launching HDFS daemons in a K8s cluster. The main entry-point +chart is `hdfs-k8s`, which is a uber-chart that specifies other charts as +dependency subcharts. This means you can launch all HDFS components using +`hdfs-k8s`. + +Note that the HDFS charts are currently in pre-alpha quality. They are also +being heavily revised and are subject to change. + +HDFS on K8s supports the following features: + - namenode high availability (HA): HDFS namenode daemons are in charge of + maintaining file system metadata concerning which directories have which + files and where are the file data. Namenode crash will cause service outage. + HDFS can run two namenodes in active/standby setup. HDFS on K8s supports HA. + - K8s persistent volumes (PV) for metadata: Namenode crash will cause service + outage. Losing namenode metadata can lead to loss of file system. HDFS on + K8s can store the metadata in remote K8s persistent volumes so that metdata + can remain intact even if both namenode daemons are lost or restarted. + - K8s HostPath volumes for file data: HDFS datanodes daemons store actual + file data. File data should also survive datanode crash or restart. HDFS on + K8s stores the file data on the local disks of the K8s cluster nodes using + K8s HostPath volumes. (We plan to switch to a better mechanism, K8s + persistent local volumes) + - Kerberos: Vanilla HDFS is not secure. Intruders can easily write custom + client code, put a fake user name in requests and steal data. Production + HDFS often secure itself using Kerberos. HDFS on K8s supports Kerberos. + +Here is the list of all charts. + + - hdfs-k8s: main uber-chart. Launches other charts. + - hdfs-namenode-k8s: a statefulset and other K8s components for launching HDFS + namenode daemons, which maintains file system metadata. The chart supports + namenode high availability (HA). + - hdfs-datanode-k8s: a daemonset and other K8s components for launching HDFS + datanode daemons, which are responsible for storing file data. + - hdfs-config-k8s: a configmap containing Hadoop config files for HDFS. + - zookeeper: This chart is NOT in this repo. But hdfs-k8s pulls the zookeeper + chart in the incubator remote repo + (https://kubernetes-charts-incubator.storage.googleapis.com/) + as a dependency and launhces zookeeper daemons. Zookeeper makes sure + only one namenode is active in the HA setup, while the other namenode + becomes standby. By default, we will launch three zookeeper servers. + - hdfs-journalnode-k8s: a statefulset and other K8s components for launching + HDFS journalnode quorums, which ensures the file system metadata are + properly shared among the two namenode daemons in the HA setup. + By default, we will launch three journalnode servers. + - hdfs-client-k8s: a pod that is configured to run Hadoop client commands + for accessing HDFS. + - hdfs-krb5-k8s: a size-1 statefulset and other K8s components for launching + a Kerberos server, which can be used to secure HDFS. Disabled by default. + - hdfs-simple-namenode-k8s: Disabled by default. A simpler setup of the + namenode that launches only one namenode. i.e. This does not support HA. It + does not support Kerberos nor persistent volumes either. As it does not + support HA, we also don't need zookeeper nor journal nodes. You may prefer + this if you want the simplest possible setup. + +# Prerequisite + +Requires Kubernetes 1.6+ as the `namenode` and `datanodes` are using +`ClusterFirstWithHostNet`, which was introduced in Kubernetes 1.6 + +# Usage + +## Basic + +The HDFS daemons can be launched using the main `hdfs-k8s` chart. First, build +the main chart using: + +``` + $ helm repo add incubator \ + https://kubernetes-charts-incubator.storage.googleapis.com/ + $ helm dependency build charts/hdfs-k8s +``` + +Zookeeper, journalnodes and namenodes need persistent volumes for storing +metadata. By default, the helm charts do not set the storage class name for +dynamically provisioned volumes, nor does it use persistent volume selectors for +static persistent volumes. + +This means it will rely on a provisioner for default storage volume class for +dynamic volumes. Or if your cluster has statically provisioned volumes, the +chart will match existing volumes entirely based on the size requirements. To +override this default behavior, you can specify storage volume classes for +dynamic volumes, or volume selectors for static volumes. See below for how to +set these options. + + - namenodes: Each of the two namenodes needs at least a 100 GB volume. i.e. + Yon need two 100 GB volumes. This can be overridden by the + `hdfs-namenode-k8s.persistence.size` option. + You can also override the storage class or the selector using + `hdfs-namenode-k8s.persistence.storageClass`, or + `hdfs-namenode-k8s.persistence.selector` respectively. For details, see the + values.yaml file inside `hdfs-namenode-k8s` chart dir. + - zookeeper: You need three > 5 GB volumes. i.e. Each of the two zookeeper + servers will need at least 5 GB in the volume. Can be overridden by + the `zookeeper.persistence.size` option. You can also override + the storage class using `zookeeper.persistence.storageClass`. + - journalnodes: Each of the three journalnodes will need at least 20 GB in + the volume. The size can be overridden by the + `hdfs-journalnode-k8s.persistence.size` option. + You can also override the storage class or the selector using + `hdfs-journalnode-k8s.persistence.storageClass`, or + `hdfs-journalnode-k8s.persistence.selector` respectively. For details, see the + values.yaml file inside `hdfs-journalnode-k8s` chart dir. + - kerberos: The single Kerberos server will need at least 20 GB in the volume. + The size can be overridden by the `hdfs-krb5-k8s.persistence.size` option. + You can also override the storage class or the selector using + `hdfs-krb5-k8s.persistence.storageClass`, or + `hdfs-krb5-k8s.persistence.selector` respectively. For details, see the + values.yaml file inside `hdfs-krb5-k8s` chart dir. + +Then launch the main chart. Specify the chart release name say "my-hdfs", +which will be the prefix of the K8s resource names for the HDFS components. + +``` + $ helm install -n my-hdfs charts/hdfs-k8s +``` + +Wait for all daemons to be ready. Note some daemons may restart themselves +a few times before they become ready. + +``` + $ kubectl get pod -l release=my-hdfs + + NAME READY STATUS RESTARTS AGE + my-hdfs-client-c749d9f8f-d5pvk 1/1 Running 0 2m + my-hdfs-datanode-o7jia 1/1 Running 3 2m + my-hdfs-datanode-p5kch 1/1 Running 3 2m + my-hdfs-datanode-r3kjo 1/1 Running 3 2m + my-hdfs-journalnode-0 1/1 Running 0 2m + my-hdfs-journalnode-1 1/1 Running 0 2m + my-hdfs-journalnode-2 1/1 Running 0 1m + my-hdfs-namenode-0 1/1 Running 3 2m + my-hdfs-namenode-1 1/1 Running 3 2m + my-hdfs-zookeeper-0 1/1 Running 0 2m + my-hdfs-zookeeper-1 1/1 Running 0 2m + my-hdfs-zookeeper-2 1/1 Running 0 2m +``` + +Namenodes and datanodes are currently using the K8s `hostNetwork` so they can +see physical IPs of each other. If they are not using `hostNetowrk`, +overlay K8s network providers such as weave-net may mask the physical IPs, +which will confuse the data locality later inside namenodes. + +Finally, test with the client pod: + +``` + $ _CLIENT=$(kubectl get pods -l app=hdfs-client,release=my-hdfs -o name | \ + cut -d/ -f 2) + $ kubectl exec $_CLIENT -- hdfs dfsadmin -report + $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn0 + $ kubectl exec $_CLIENT -- hdfs haadmin -getServiceState nn1 + + $ kubectl exec $_CLIENT -- hadoop fs -rm -r -f /tmp + $ kubectl exec $_CLIENT -- hadoop fs -mkdir /tmp + $ kubectl exec $_CLIENT -- sh -c \ + "(head -c 100M < /dev/urandom > /tmp/random-100M)" + $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp +``` + +## Kerberos + +Kerberos can be enabled by setting a few related options: + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set global.kerberosEnabled=true \ + --set global.kerberosRealm=MYCOMPANY.COM \ + --set tags.kerberos=true +``` + +This will launch all charts including the Kerberos server, which will become +ready pretty soon. However, HDFS daemon charts will be blocked as the deamons +require Kerberos service principals to be available. So we need to unblock +them by creating those principals. + +First, create a configmap containing the common Kerberos config file: + +``` + _MY_DIR=~/krb5 + mkdir -p $_MY_DIR + _KDC=$(kubectl get pod -l app=hdfs-krb5,release=my-hdfs --no-headers \ + -o name | cut -d/ -f2) + _run kubectl cp $_KDC:/etc/krb5.conf $_MY_DIR/tmp/krb5.conf + _run kubectl create configmap my-hdfs-krb5-config \ + --from-file=$_MY_DIR/tmp/krb5.conf +``` + +Second, create the service principals and passwords. Kerberos requires service +principals to be host specific. Some HDFS daemons are associated with your K8s +cluster nodes' physical host names say kube-n1.mycompany.com, while others are +associated with Kubernetes virtual service names, for instance +my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local. You can get +the list of these host names like: + +``` + $ _HOSTS=$(kubectl get nodes \ + -o=jsonpath='{.items[*].status.addresses[?(@.type == "Hostname")].address}') + + $ _HOSTS+=$(kubectl describe configmap my-hdfs-config | \ + grep -A 1 -e dfs.namenode.rpc-address.hdfs-k8s \ + -e dfs.namenode.shared.edits.dir | + grep "<value>" | + sed -e "s/<value>//" \ + -e "s/<\/value>//" \ + -e "s/:8020//" \ + -e "s/qjournal:\/\///" \ + -e "s/:8485;/ /g" \ + -e "s/:8485\/hdfs-k8s//") +``` + +Then generate per-host principal accounts and password keytab files. + +``` + $ _SECRET_CMD="kubectl create secret generic my-hdfs-krb5-keytabs" + $ for _HOST in $_HOSTS; do + kubectl exec $_KDC -- kadmin.local -q \ + "addprinc -randkey hdfs/$_HOST@MYCOMPANY.COM" + kubectl exec $_KDC -- kadmin.local -q \ + "addprinc -randkey HTTP/$_HOST@MYCOMPANY.COM" + kubectl exec $_KDC -- kadmin.local -q \ + "ktadd -norandkey -k /tmp/$_HOST.keytab hdfs/$_HOST@MYCOMPANY.COM HTTP/$_HOST@MYCOMPANY.COM" + kubectl cp $_KDC:/tmp/$_HOST.keytab $_MY_DIR/tmp/$_HOST.keytab + _SECRET_CMD+=" --from-file=$_MY_DIR/tmp/$_HOST.keytab" + done +``` + +The above was building a command using a shell variable `SECRET_CMD` for +creating a K8s secret that contains all keytab files. Run the command to create +the secret. + +``` + $ $_SECRET_CMD +``` + +This will unblock all HDFS daemon pods. Wait until they become ready. + +Finally, test the setup using the following commands: + +``` + $ _NN0=$(kubectl get pods -l app=hdfs-namenode,release=my-hdfs -o name | \ + head -1 | \ + cut -d/ -f2) + $ kubectl exec $_NN0 -- sh -c "(apt install -y krb5-user > /dev/null)" \ + || true + $ kubectl exec $_NN0 -- \ + kinit -kt /etc/security/hdfs.keytab \ + hdfs/my-hdfs-namenode-0.my-hdfs-namenode.default.svc.cluster.local@MYCOMPANY.COM + $ kubectl exec $_NN0 -- hdfs dfsadmin -report + $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn0 + $ kubectl exec $_NN0 -- hdfs haadmin -getServiceState nn1 + $ kubectl exec $_NN0 -- hadoop fs -rm -r -f /tmp + $ kubectl exec $_NN0 -- hadoop fs -mkdir /tmp + $ kubectl exec $_NN0 -- hadoop fs -chmod 0777 /tmp + $ kubectl exec $_KDC -- kadmin.local -q \ + "addprinc -randkey user1@MYCOMPANY.COM" + $ kubectl exec $_KDC -- kadmin.local -q \ + "ktadd -norandkey -k /tmp/user1.keytab user1@MYCOMPANY.COM" + $ kubectl cp $_KDC:/tmp/user1.keytab $_MY_DIR/tmp/user1.keytab + $ kubectl cp $_MY_DIR/tmp/user1.keytab $_CLIENT:/tmp/user1.keytab + + $ kubectl exec $_CLIENT -- sh -c "(apt install -y krb5-user > /dev/null)" \ + || true + + $ kubectl exec $_CLIENT -- kinit -kt /tmp/user1.keytab user1@MYCOMPANY.COM + $ kubectl exec $_CLIENT -- sh -c \ + "(head -c 100M < /dev/urandom > /tmp/random-100M)" + $ kubectl exec $_CLIENT -- hadoop fs -ls / + $ kubectl exec $_CLIENT -- hadoop fs -copyFromLocal /tmp/random-100M /tmp +``` + +## Advanced options + +### Setting HostPath volume locations for datanodes + +HDFS on K8s stores the file data on the local disks of the K8s cluster nodes +using K8s HostPath volumes. You may want to change the default locations. Set +global.dataNodeHostPath to override the default value. Note the option +takes a list in case you want to use multiple disks. + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set "global.dataNodeHostPath={/mnt/sda1/hdfs-data0,/mnt/sda1/hdfs-data1}" +``` + +### Using an existing zookeeper quorum + +By default, HDFS on K8s pulls in the zookeeper chart in the incubator remote +repo (https://kubernetes-charts-incubator.storage.googleapis.com/) as a +dependency and launhces zookeeper daemons. But your K8s cluster may already +have a zookeeper quorum. + +It is possible to use the existing zookeeper. We just need set a few options +in the helm install command line. It should be something like: + +``` + $helm install -n my-hdfs charts/hdfs-k8s \ + --set condition.subchart.zookeeper=false \ + --set global.zookeeperQuorumOverride=zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181 +``` + +Setting `condition.subchart.zookeeper` to false prevents the uber-chart from +bringing in zookeeper as sub-chart. And the `global.zookeeperQuorumOverride` +option specifies the custom address for a zookeeper quorum. Use your +zookeeper address here. + +### Pinning namenodes to specific K8s cluster nodes + +Optionally, you can attach labels to some of your k8s cluster nodes so that +namenodes will always run on those cluster nodes. This can allow your HDFS +client outside the Kubernetes cluster to expect stable IP addresses. When used +by those outside clients, Kerberos expects the namenode addresses to be stable. + +``` + $ kubectl label nodes YOUR-HOST-1 hdfs-namenode-selector=hdfs-namenode + $ kubectl label nodes YOUR-HOST-2 hdfs-namenode-selector=hdfs-namenode +``` + +You should add the nodeSelector option to the helm chart command: + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set hdfs-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode \ + ... +``` + +### Excluding datanodes from some K8s cluster nodes + +You may want to exclude some K8s cluster nodes from datanodes launch target. +For instance, some K8s clusters may let the K8s cluster master node launch +a datanode. To prevent this, label the cluster nodes with +`hdfs-datanode-exclude`. + +``` + $ kubectl label node YOUR-CLUSTER-NODE hdfs-datanode-exclude=yes +``` + +### Launching with a non-HA namenode + +You may want non-HA namenode since it is the simplest possible setup. +Note this won't launch zookeepers nor journalnodes. + +The single namenode is supposed to be pinned to a cluster host using a node +label. Attach a label to one of your K8s cluster node. + +``` + $ kubectl label nodes YOUR-CLUSTER-NODE hdfs-namenode-selector=hdfs-namenode-0 +``` + +The non-HA setup does not even use persistent vlumes. So you don't even +need to prepare persistent volumes. Instead, it is using hostPath volume +of the pinned cluster node. So, just launch the chart while +setting options to turn off HA. You should add the nodeSelector option +so that the single namenode would find the hostPath volume of the same cluster +node when the pod restarts. + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set tags.ha=false \ + --set tags.simple=true \ + --set global.namenodeHAEnabled=false \ + --set hdfs-simple-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode-0 +``` + +# Security + +## K8s secret containing Kerberos keytab files + +The Kerberos setup creates a K8s secret containing all the keytab files of HDFS +daemon service princialps. This will be mounted onto HDFS daemon pods. You may +want to restrict access to this secret using k8s +[RBAC](https://kubernetes.io/docs/admin/authorization/rbac/), to minimize +exposure of the keytab files. + +## HostPath volumes +`Datanode` daemons run on every cluster node. They also mount k8s `hostPath` +local disk volumes. You may want to restrict access of `hostPath` +using `pod security policy`. +See [reference](https://github.com/kubernetes/examples/blob/master/staging/podsecuritypolicy/rbac/README.md)) + +## Credits + +Many charts are using public Hadoop docker images hosted by +[uhopper](https://hub.docker.com/u/uhopper/). diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml new file mode 100644 index 00000000..00d6f47d --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-client-k8s +version: 0.1.0 +description: A client for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml new file mode 100644 index 00000000..afffedfd --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-client-k8s/templates/client-deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: {{ template "hdfs-k8s.client.fullname" . }} + labels: + app: {{ template "hdfs-k8s.client.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + replicas: 1 + selector: + matchLabels: + app: {{ template "hdfs-k8s.client.name" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "hdfs-k8s.client.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + containers: + - name: hdfs-client + image: uhopper/hadoop:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: MULTIHOMED_NETWORK + value: "0" + command: ['/bin/sh', '-c'] + args: + - /entrypoint.sh /usr/bin/tail -f /var/log/dmesg + volumeMounts: + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + {{- end }} + restartPolicy: Always + volumes: + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + {{- end }} diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml new file mode 100644 index 00000000..229c4344 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for configuring HDFS on Kubernetes +name: hdfs-config-k8s +version: 0.1.0 diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl new file mode 100644 index 00000000..cd2ff083 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/_helpers.tpl @@ -0,0 +1,64 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "hdfs-config-k8s.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "hdfs-config-k8s.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "hdfs-config-k8s.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the kerberos principal suffix for core HDFS services +*/}} +{{- define "hdfs-principal" -}} +{{- printf "hdfs/_HOST@%s" .Values.kerberosRealm -}} +{{- end -}} + +{{/* +Create the kerberos principal for HTTP services +*/}} +{{- define "http-principal" -}} +{{- printf "HTTP/_HOST@%s" .Values.kerberosRealm -}} +{{- end -}} + +{{/* +Create the datanode data dir list. The below uses two loops to make sure the +last item does not have comma. It uses index 0 for the last item since that is +the only special index that helm template gives us. +*/}} +{{- define "datanode-data-dirs" -}} +{{- range $index, $path := .Values.global.dataNodeHostPath -}} + {{- if ne $index 0 -}} + /hadoop/dfs/data/{{ $index }}, + {{- end -}} +{{- end -}} +{{- range $index, $path := .Values.global.dataNodeHostPath -}} + {{- if eq $index 0 -}} + /hadoop/dfs/data/{{ $index }} + {{- end -}} +{{- end -}} +{{- end -}} diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml new file mode 100644 index 00000000..379dab8f --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-config-k8s/templates/configmap.yaml @@ -0,0 +1,197 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "hdfs-k8s.config.fullname" . }} + labels: + app: {{ template "hdfs-k8s.client.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +data: + core-site.xml: | + <?xml version="1.0"?> + <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + <configuration> + {{- if .Values.global.kerberosEnabled }} + <property> + <name>hadoop.security.authentication</name> + <value>kerberos</value> + </property> + <!-- + This is service level RPC authorization, which is separate from HDFS file + level ACLs. This concerns who can talk to HDFS daemons including + datanodes talking to namenode. As part of the authorization, namenode + tries to validate that DNS can uniquely traslate the datanode IP to the + hostname in the datanode Kerberos principal. (i.e. The client IP is what + Kerberos has authenticated). This does not work well when both namenode + and datanodes are using the Kubernetes HostNetwork and namenode is using + the StatefulSet. The same cluster node IP can be mapped to two different + DNS names. So we disable this. Again this is only service level RPC + authorization and does not affect HDFS file level permission ACLs. + --> + <property> + <name>hadoop.security.authorization</name> + <value>false</value> + </property> + <property> + <name>hadoop.rpc.protection</name> + <value>privacy</value> + </property> + <property> + <name>hadoop.user.group.static.mapping.overrides</name> + <value>hdfs=root;</value> + </property> + {{- end }} + {{- range $key, $value := .Values.customHadoopConfig.coreSite }} + <property> + <name>{{ $key }}</name> + <value>{{ $value }}</value> + </property> + {{- end }} + {{- if .Values.global.namenodeHAEnabled }} + <property> + <name>fs.defaultFS</name> + <value>hdfs://hdfs-k8s</value> + </property> + <property> + <name>ha.zookeeper.quorum</name> + <value>{{ template "zookeeper-quorum" . }}</value> + </property> + {{- else }} + <property> + <name>fs.defaultFS</name> + <value>hdfs://{{ template "namenode-svc-0" . }}:8020</value> + </property> + {{- end }} + </configuration> + hdfs-site.xml: | + <?xml version="1.0"?> + <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + <configuration> + {{- if .Values.global.kerberosEnabled }} + <property> + <name>dfs.block.access.token.enable</name> + <value>true</value> + </property> + <property> + <name>dfs.encrypt.data.transfer</name> + <value>true</value> + </property> + <property> + <name>dfs.namenode.kerberos.principal</name> + <value>{{ template "hdfs-principal" . }}</value> + </property> + {{/* + TODO: Check if the https principal is no longer needed in newer Hadoop version. + */}} + <property> + <name>dfs.namenode.kerberos.https.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.web.authentication.kerberos.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.namenode.keytab.file</name> + <value>/etc/security/hdfs.keytab</value> + </property> + <property> + <name>dfs.journalnode.kerberos.principal</name> + <value>{{ template "hdfs-principal" . }}</value> + </property> + <property> + <name>dfs.journalnode.kerberos.internal.spnego.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.journalnode.keytab.file</name> + <value>/etc/security/hdfs.keytab</value> + </property> + <property> + <name>dfs.datanode.kerberos.principal</name> + <value>{{ template "hdfs-principal" . }}</value> + </property> + <property> + <name>dfs.datanode.kerberos.https.principal</name> + <value>{{ template "http-principal" . }}</value> + </property> + <property> + <name>dfs.datanode.keytab.file</name> + <value>/etc/security/hdfs.keytab</value> + </property> + {{- if .Values.global.jsvcEnabled }} + <property> + <name>dfs.datanode.address</name> + <value>0.0.0.0:1004</value> + </property> + <property> + <name>dfs.datanode.http.address</name> + <value>0.0.0.0:1006</value> + </property> + {{- end }} + {{- end }} + {{- range $key, $value := .Values.customHadoopConfig.hdfsSite }} + <property> + <name>{{ $key }}</name> + <value>{{ $value }}</value> + </property> + {{- end }} + {{- if .Values.global.namenodeHAEnabled }} + <property> + <name>dfs.nameservices</name> + <value>hdfs-k8s</value> + </property> + <property> + <name>dfs.ha.namenodes.hdfs-k8s</name> + <value>nn0,nn1</value> + </property> + <property> + <name>dfs.namenode.rpc-address.hdfs-k8s.nn0</name> + <value>{{ template "namenode-svc-0" . }}:8020</value> + </property> + <property> + <name>dfs.namenode.rpc-address.hdfs-k8s.nn1</name> + <value>{{ template "namenode-svc-1" . }}:8020</value> + </property> + <property> + <name>dfs.namenode.http-address.hdfs-k8s.nn0</name> + <value>{{ template "namenode-svc-0" . }}:50070</value> + </property> + <property> + <name>dfs.namenode.http-address.hdfs-k8s.nn1</name> + <value>{{ template "namenode-svc-1" . }}:50070</value> + </property> + <property> + <name>dfs.namenode.shared.edits.dir</name> + <value>qjournal://{{ template "journalnode-quorum" . }}/hdfs-k8s</value> + </property> + <property> + <name>dfs.ha.automatic-failover.enabled</name> + <value>true</value> + </property> + <property> + <name>dfs.ha.fencing.methods</name> + <value>shell(/bin/true)</value> + </property> + <property> + <name>dfs.journalnode.edits.dir</name> + <value>/hadoop/dfs/journal</value> + </property> + <property> + <name>dfs.client.failover.proxy.provider.hdfs-k8s</name> + <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> + </property> + {{- end }} + <property> + <name>dfs.namenode.name.dir</name> + <value>file:///hadoop/dfs/name</value> + </property> + <property> + <name>dfs.namenode.datanode.registration.ip-hostname-check</name> + <value>false</value> + </property> + <property> + <name>dfs.datanode.data.dir</name> + <value>{{ template "datanode-data-dirs" . }}</value> + </property> + </configuration> diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml new file mode 100644 index 00000000..ec837254 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-datanode-k8s +version: 0.1.0 +description: Datanodes for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml new file mode 100644 index 00000000..09445ed0 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -0,0 +1,191 @@ +# Provides datanode helper scripts. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts + labels: + app: {{ template "hdfs-k8s.datanode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +data: + check-status.sh: | + #!/usr/bin/env bash + # Exit on error. Append "|| true" if you expect an error. + set -o errexit + # Exit on error inside any functions or subshells. + set -o errtrace + # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR + set -o nounset + # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) + # in `mysqldump |gzip` + set -o pipefail + # Turn on traces, useful while debugging. + set -o xtrace + + # Check if datanode registered with the namenode and got non-null cluster ID. + _PORTS="50075 1006" + _URL_PATH="jmx?qry=Hadoop:service=DataNode,name=DataNodeInfo" + _CLUSTER_ID="" + for _PORT in $_PORTS; do + _CLUSTER_ID+=$(curl -s http://localhost:${_PORT}/$_URL_PATH | \ + grep ClusterId) || true + done + echo $_CLUSTER_ID | grep -q -v null +--- +# Deleting a daemonset may need some trick. See +# https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: {{ template "hdfs-k8s.datanode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.datanode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + template: + metadata: + labels: + app: {{ template "hdfs-k8s.datanode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- else if .Values.global.defaultAffinityEnabled }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ template "hdfs-k8s.datanode.fullname" . }}-exclude + operator: DoesNotExist + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + hostNetwork: true + hostPID: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: datanode + image: uhopper/hadoop-datanode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: MULTIHOMED_NETWORK + value: "0" + {{- if and .Values.global.kerberosEnabled .Values.global.jsvcEnabled }} + - name: HADOOP_SECURE_DN_USER + value: root + - name: JSVC_OUTFILE + value: /dev/stdout + - name: JSVC_ERRFILE + value: /dev/stderr + - name: JSVC_HOME + value: /jsvc-home + {{- end }} + livenessProbe: + exec: + command: + - /dn-scripts/check-status.sh + initialDelaySeconds: 60 + periodSeconds: 30 + readinessProbe: + exec: + command: + - /dn-scripts/check-status.sh + initialDelaySeconds: 60 + periodSeconds: 30 + securityContext: + privileged: true + volumeMounts: + - name: dn-scripts + mountPath: /dn-scripts + readOnly: true + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- range $index, $path := .Values.global.dataNodeHostPath }} + - name: hdfs-data-{{ $index }} + mountPath: /hadoop/dfs/data/{{ $index }} + {{- end }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab-copy + mountPath: /etc/security/ + readOnly: true + {{- if .Values.global.jsvcEnabled }} + - name: jsvc-home + mountPath: /jsvc-home + {{- end }} + {{- end }} + {{- if .Values.global.kerberosEnabled }} + initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/$MY_NODE_NAME.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy + {{- if .Values.global.jsvcEnabled }} + - name: copy-jsvc + # Pull by digest because the image doesn't have tags to pin. + image: mschlimb/jsvc@sha256:bf20eb9a319e9a2f87473d8da7418d21503a97528b932800b6b8417cd31e30ef + command: ['sh', '-c'] + args: + - cp /usr/bin/jsvc /jsvc-home/jsvc + volumeMounts: + - name: jsvc-home + mountPath: /jsvc-home + {{- end }} + {{- end }} + restartPolicy: Always + volumes: + - name: dn-scripts + configMap: + name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts + defaultMode: 0744 + {{- range $index, $path := .Values.global.dataNodeHostPath }} + - name: hdfs-data-{{ $index }} + hostPath: + path: {{ $path }} + {{- end }} + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + - name: kerberos-keytabs + secret: + secretName: {{ template "krb5-keytabs-secret" . }} + - name: kerberos-keytab-copy + emptyDir: {} + {{- if .Values.global.jsvcEnabled }} + - name: jsvc-home + emptyDir: {} + {{- end }} + {{- end }} diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml new file mode 100644 index 00000000..a7ea6c8f --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-journalnode-k8s +version: 0.1.0 +description: Journalnode quorum used by HDFS on Kubernetes. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml new file mode 100644 index 00000000..22a4a2b4 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-journalnode-k8s/templates/journalnode-statefulset.yaml @@ -0,0 +1,180 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.journalnode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} + annotations: + # TODO: Deprecated. Replace tolerate-unready-endpoints with + # v1.Service.PublishNotReadyAddresses. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + ports: + - port: 8485 + name: jn + - port: 8480 + name: http + clusterIP: None + selector: + app: {{ template "hdfs-k8s.journalnode.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: {{ template "hdfs-k8s.journalnode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + selector: + matchLabels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + release: {{ .Release.Name }} + minAvailable: {{ div .Values.global.journalnodeQuorumSize 2 | add1 }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.journalnode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.journalnode.fullname" . }} + replicas: {{ .Values.global.journalnodeQuorumSize }} + template: + metadata: + labels: + app: {{ template "hdfs-k8s.journalnode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- else if .Values.global.defaultAffinityEnabled }} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - {{ template "hdfs-k8s.journalnode.name" . }} + - key: "release" + operator: In + values: + - {{ .Release.Name }} + topologyKey: "kubernetes.io/hostname" + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + containers: + - name: hdfs-journalnode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + command: ["/entrypoint.sh"] + args: ["/opt/hadoop-2.7.2/bin/hdfs", "--config", "/etc/hadoop", "journalnode"] + ports: + - containerPort: 8485 + name: jn + - containerPort: 8480 + name: http + volumeMounts: + # Mount a subpath of the volume so that the journal subdir would be + # a brand new empty dir. This way, we won't get affected by + # existing files in the volume top dir. + - name: editdir + mountPath: /hadoop/dfs/journal + subPath: journal + - name: editdir + mountPath: /hadoop/dfs/name + subPath: name + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab-copy + mountPath: /etc/security/ + readOnly: true + {{- end }} + {{- if .Values.global.kerberosEnabled }} + initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_KERBEROS_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy + {{- end }} + restartPolicy: Always + volumes: + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + - name: kerberos-keytabs + secret: + secretName: {{ template "krb5-keytabs-secret" . }} + - name: kerberos-keytab-copy + emptyDir: {} + {{- end }} + {{- if .Values.global.podSecurityContext.enabled }} + securityContext: + runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} + fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: editdir + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} + {{- if .Values.persistence.selector }} + selector: +{{ toYaml .Values.persistence.selector | indent 10 }} + {{- end }} diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore new file mode 100644 index 00000000..28ebd32d --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.gitignore @@ -0,0 +1,2 @@ +charts +requirements.lock diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml new file mode 100644 index 00000000..ec58ffb6 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: An entry-point Helm chart for launching HDFS on Kubernetes +name: hdfs +version: 0.1.0 diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml new file mode 100644 index 00000000..7f803fdc --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/requirements.yaml @@ -0,0 +1,59 @@ +dependencies: + - name: zookeeper + version: "1.0.0" + repository: https://kubernetes-charts-incubator.storage.googleapis.com/ + condition: condition.subchart.zookeeper + tags: + - ha + - kerberos + - name: hdfs-config-k8s + version: "0.1.0" + repository: "file://../hdfs-config-k8s" + condition: condition.subchart.config + tags: + - ha + - kerberos + - simple + - name: hdfs-krb5-k8s + version: "0.1.0" + repository: "file://../hdfs-krb5-k8s" + condition: condition.subchart.kerberos + tags: + - kerberos + - name: hdfs-journalnode-k8s + version: "0.1.0" + repository: "file://../hdfs-journalnode-k8s" + condition: condition.subchart.journalnode + tags: + - ha + - kerberos + - name: hdfs-namenode-k8s + version: "0.1.0" + repository: "file://../hdfs-namenode-k8s" + condition: condition.subchart.namenode + tags: + - ha + - kerberos + # Non-HA namenode. Disabled by default + - name: hdfs-simple-namenode-k8s + version: "0.1.0" + repository: "file://../hdfs-simple-namenode-k8s" + condition: condition.subchart.simple-namenode + tags: + - simple + - name: hdfs-datanode-k8s + version: "0.1.0" + repository: "file://../hdfs-datanode-k8s" + condition: condition.subchart.datanode + tags: + - ha + - kerberos + - simple + - name: hdfs-client-k8s + version: "0.1.0" + repository: "file://../hdfs-client-k8s" + condition: condition.subchart.client + tags: + - ha + - kerberos + - simple diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl new file mode 100644 index 00000000..9d03c4d2 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/templates/_helpers.tpl @@ -0,0 +1,264 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Create a short app name. +*/}} +{{- define "hdfs-k8s.name" -}} +hdfs +{{- end -}} + +{{/* +Create a fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "hdfs-k8s.fullname" -}} +{{- if .Values.global.fullnameOverride -}} +{{- .Values.global.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "hdfs-k8s.name" . -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the subchart label. +*/}} +{{- define "hdfs-k8s.subchart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "zookeeper-fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "zookeeper" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-zookeeper" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.config.name" -}} +{{- template "hdfs-k8s.name" . -}}-config +{{- end -}} + +{{- define "hdfs-k8s.config.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "config" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-config" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.krb5.name" -}} +{{- template "hdfs-k8s.name" . -}}-krb5 +{{- end -}} + +{{- define "hdfs-k8s.krb5.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "config" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-krb5" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.journalnode.name" -}} +{{- template "hdfs-k8s.name" . -}}-journalnode +{{- end -}} + +{{- define "hdfs-k8s.journalnode.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "journalnode" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-journalnode" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.namenode.name" -}} +{{- template "hdfs-k8s.name" . -}}-namenode +{{- end -}} + +{{- define "hdfs-k8s.namenode.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "namenode" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-namenode" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.datanode.name" -}} +{{- template "hdfs-k8s.name" . -}}-datanode +{{- end -}} + +{{- define "hdfs-k8s.datanode.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "datanode" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-datanode" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "hdfs-k8s.client.name" -}} +{{- template "hdfs-k8s.name" . -}}-client +{{- end -}} + +{{- define "hdfs-k8s.client.fullname" -}} +{{- $fullname := include "hdfs-k8s.fullname" . -}} +{{- if contains "client" $fullname -}} +{{- printf "%s" $fullname -}} +{{- else -}} +{{- printf "%s-client" $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Create the kerberos principal suffix for core HDFS services +*/}} +{{- define "hdfs-principal" -}} +{{- printf "hdfs/_HOST@%s" .Values.global.kerberosRealm -}} +{{- end -}} + +{{/* +Create the kerberos principal for HTTP services +*/}} +{{- define "http-principal" -}} +{{- printf "HTTP/_HOST@%s" .Values.global.kerberosRealm -}} +{{- end -}} + +{{/* +Create the name for a Kubernetes Configmap containing a Kerberos config file. +*/}} +{{- define "krb5-configmap" -}} +{{- if .Values.global.kerberosConfigMapOverride -}} +{{- .Values.global.kerberosConfigMapOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "hdfs-k8s.krb5.fullname" . -}} +{{- printf "%s-config" $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Create the name for a Kubernetes Secret containing Kerberos keytabs. +*/}} +{{- define "krb5-keytabs-secret" -}} +{{- if .Values.global.kerberosKeytabsSecretOverride -}} +{{- .Values.global.kerberosKeytabsSecretOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "hdfs-k8s.krb5.fullname" . -}} +{{- printf "%s-keytabs" $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + + +{{/* +Create the domain name part of services. +The HDFS config file should specify FQDN of services. Otherwise, Kerberos +login may fail. +*/}} +{{- define "svc-domain" -}} +{{- printf "%s.svc.cluster.local" .Release.Namespace -}} +{{- end -}} + +{{/* +Create the zookeeper quorum server list. The below uses two loops to make +sure the last item does not have comma. It uses index 0 for the last item +since that is the only special index that helm template gives us. +*/}} +{{- define "zookeeper-quorum" -}} +{{- if .Values.global.zookeeperQuorumOverride -}} +{{- .Values.global.zookeeperQuorumOverride -}} +{{- else -}} +{{- $service := include "zookeeper-fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- $replicas := .Values.global.zookeeperQuorumSize | int -}} +{{- range $i, $e := until $replicas -}} + {{- if ne $i 0 -}} + {{- printf "%s-%d.%s-headless.%s:2181," $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- range $i, $e := until $replicas -}} + {{- if eq $i 0 -}} + {{- printf "%s-%d.%s-headless.%s:2181" $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Construct the name of the Kerberos KDC pod 0. +*/}} +{{- define "krb5-pod-0" -}} +{{- template "hdfs-k8s.krb5.fullname" . -}}-0 +{{- end -}} + +{{/* +Construct the full name of the Kerberos KDC statefulset member 0. +*/}} +{{- define "krb5-svc-0" -}} +{{- $pod := include "krb5-pod-0" . -}} +{{- $service := include "hdfs-k8s.krb5.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- printf "%s.%s.%s" $pod $service $domain -}} +{{- end -}} + +{{/* +Create the journalnode quorum server list. The below uses two loops to make +sure the last item does not have the delimiter. It uses index 0 for the last +item since that is the only special index that helm template gives us. +*/}} +{{- define "journalnode-quorum" -}} +{{- $service := include "hdfs-k8s.journalnode.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- $replicas := .Values.global.journalnodeQuorumSize | int -}} +{{- range $i, $e := until $replicas -}} + {{- if ne $i 0 -}} + {{- printf "%s-%d.%s.%s:8485;" $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- range $i, $e := until $replicas -}} + {{- if eq $i 0 -}} + {{- printf "%s-%d.%s.%s:8485" $service $i $service $domain -}} + {{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Construct the name of the namenode pod 0. +*/}} +{{- define "namenode-pod-0" -}} +{{- template "hdfs-k8s.namenode.fullname" . -}}-0 +{{- end -}} + +{{/* +Construct the full name of the namenode statefulset member 0. +*/}} +{{- define "namenode-svc-0" -}} +{{- $pod := include "namenode-pod-0" . -}} +{{- $service := include "hdfs-k8s.namenode.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- printf "%s.%s.%s" $pod $service $domain -}} +{{- end -}} + +{{/* +Construct the name of the namenode pod 1. +*/}} +{{- define "namenode-pod-1" -}} +{{- template "hdfs-k8s.namenode.fullname" . -}}-1 +{{- end -}} + +{{/* +Construct the full name of the namenode statefulset member 1. +*/}} +{{- define "namenode-svc-1" -}} +{{- $pod := include "namenode-pod-1" . -}} +{{- $service := include "hdfs-k8s.namenode.fullname" . -}} +{{- $domain := include "svc-domain" . -}} +{{- printf "%s.%s.%s" $pod $service $domain -}} +{{- end -}} diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml new file mode 100644 index 00000000..77ca3fe0 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-k8s/values.yaml @@ -0,0 +1,248 @@ +## ------------------------------------------------------------------------------ +## zookeeper: +## ------------------------------------------------------------------------------ +zookeeper: + ## Configure Zookeeper resource requests and limits + ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ + resources: ~ + + ## The JVM heap size to allocate to Zookeeper + env: + ZK_HEAP_SIZE: 1G + + ## The number of zookeeper server to have in the quorum. + replicaCount: 3 + +## ------------------------------------------------------------------------------ +## hdfs-config-k8s: +## ------------------------------------------------------------------------------ +hdfs-config-k8s: + ## Custom hadoop config keys passed to the hdfs configmap as extra keys. + customHadoopConfig: + coreSite: {} + ## Set config key and value pairs, e.g. + # hadoop.http.authentication.type: kerberos + + hdfsSite: {} + ## Set config key and value pairs, e.g. + # dfs.datanode.use.datanode.hostname: "false" + +## ------------------------------------------------------------------------------ +## hdfs-journalnode-k8s: +## ------------------------------------------------------------------------------ +hdfs-journalnode-k8s: + persistence: + ## Persistent Volume Storage Class + ## If defined, storageClassName: <storageClass> + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + ## To choose a suitable persistent volume from available static volumes, selectors + ## are used. + # selector: + # matchLabels: + # volume-type: hdfs-ssd + accessMode: ReadWriteOnce + size: 20Gi + + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-namenode-k8s: +## ------------------------------------------------------------------------------ +hdfs-namenode-k8s: + ## Name of the namenode start script in the config map. + namenodeStartScript: format-and-run.sh + + ## A namenode start script that can have user specified content. + ## Can be used to conduct ad-hoc operation as specified by a user. + ## To use this, also set the namenodeStartScript variable above + ## to custom-run.sh. + customRunScript: | + #!/bin/bash -x + echo Write your own script content! + echo This message will disappear in 10 seconds. + sleep 10 + + persistence: + ## Persistent Volume Storage Class + ## If defined, storageClassName: <storageClass> + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + + ## To choose a suitable persistent volume from available static volumes, selectors + ## are used. + # selector: + # matchLabels: + # volume-type: hdfs-ssd + + accessMode: ReadWriteOnce + + size: 100Gi + + ## Whether or not to use hostNetwork in namenode pods. Disabling this will break + ## data locality as namenode will see pod virtual IPs and fails to equate them with + ## cluster node physical IPs associated with data nodes. + ## We currently disable this only for CI on minikube. + hostNetworkEnabled: true + + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-simple-namenode-k8s: +## ------------------------------------------------------------------------------ +hdfs-simple-namenode-k8s: + ## Path of the local disk directory on a cluster node that will contain the namenode + ## fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath + ## volume. + nameNodeHostPath: /hdfs-name + + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-datanode-k8s: +## ------------------------------------------------------------------------------ +hdfs-datanode-k8s: + ## Node labels and tolerations for pod assignment + nodeSelector: {} + tolerations: [] + affinity: {} + +## ------------------------------------------------------------------------------ +## hdfs-krb5-k8s: +## ------------------------------------------------------------------------------ +hdfs-krb5-k8s: + persistence: + ## Persistent Volume Storage Class + ## If defined, storageClassName: <storageClass> + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + + ## To choose a suitable persistent volume from available static volumes, selectors + ## are used. + # selector: + # matchLabels: + # volume-type: hdfs-ssd + + accessMode: ReadWriteOnce + + size: 20Gi + + ## We use a 3rd party image built from https://github.com/gcavalcante8808/docker-krb5-server. + ## TODO: The pod currently prints out the admin account in plain text. + ## Supply an admin account password using a k8s secret. + ## TODO: The auto-generated passwords might be weak due to low entropy. + ## Increase entropy by running rngd or haveged. + ## TODO: Using latest tag is not desirable. The current image does not have specific tags. + ## Find a way to fix it. + image: + repository: gcavalcante8808/krb5-server + + tag: latest + + pullPolicy: IfNotPresent + + service: + type: ClusterIP + + port: 88 +## ------------------------------------------------------------------------------ +## Global values affecting all sub-charts: +## ------------------------------------------------------------------------------ +global: + ## A list of the local disk directories on cluster nodes that will contain the datanode + ## blocks. These paths will be mounted to the datanode as K8s HostPath volumes. + ## In a command line, the list should be enclosed in '{' and '}'. + ## e.g. --set "dataNodeHostPath={/hdfs-data,/hdfs-data1}" + dataNodeHostPath: + - /hdfs-data + + ## Parameters for determining which Unix user and group IDs to use in pods. + ## Persistent volume permission may need to match these. + podSecurityContext: + enabled: false + runAsUser: 0 + fsGroup: 1000 + + ## Whether or not to expect namenodes in the HA setup. + namenodeHAEnabled: true + + ## The number of zookeeper server to have in the quorum. + ## This should match zookeeper.replicaCount above. Used only when + ## namenodeHAEnabled is set. + zookeeperQuorumSize: 3 + + ## Override zookeeper quorum address. Zookeeper is used for determining which namenode + ## instance is active. Separated by the comma character. Used only when + ## namenodeHAEnabled is set. + ## + # zookeeperQuorumOverride: zk-0.zk-svc.default.svc.cluster.local:2181,zk-1.zk-svc.default.svc.cluster.local:2181,zk-2.zk-svc.default.svc.cluster.local:2181 + + ## How many journal nodes to launch as a quorum. Used only when + ## namenodeHAEnabled is set. + journalnodeQuorumSize: 3 + + ## Whether or not to enable default affinity setting. + defaultAffinityEnabled: true + + ## Whether or not Kerberos support is enabled. + kerberosEnabled: false + + ## Effective only if Kerberos is enabled. Override th name of the k8s + ## ConfigMap containing the kerberos config file. + ## + # kerberosConfigMapOverride: kerberos-config + + ## Effective only if Kerberos is enabled. Name of the kerberos config file inside + ## the config map. + kerberosConfigFileName: krb5.conf + + ## Effective only if Kerberos is enabled. Override the name of the k8s Secret + ## containing the kerberos keytab files of per-host HDFS principals. + ## The secret should have multiple data items. Each data item name + ## should be formatted as: + ## `HOST-NAME.keytab` + ## where HOST-NAME should match the cluster node + ## host name that each per-host hdfs principal is associated with. + ## + # kerberosKeytabsSecretOverride: hdfs-kerberos-keytabs + + ## Required to be non-empty if Kerberos is enabled. Specify your Kerberos realm name. + ## This should match the realm name in your Kerberos config file. + kerberosRealm: MYCOMPANY.COM + + ## Effective only if Kerberos is enabled. Enable protection of datanodes using + ## the jsvc utility. See the reference doc at + ## https://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-common/SecureMode.html#Secure_DataNode + jsvcEnabled: true + +## Tags and conditions for triggering a group of relevant subcharts. +tags: + ## Trigger all subcharts required for high availability. Enabled by default. + ha: true + + ## Trigger all subcharts required for using Kerberos. Disabled by default. + kerberos: false + + ## Trigger all subcharts required for non-HA setup. Disabled by default. + simple: false diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml new file mode 100644 index 00000000..f8c301f1 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-krb5-k8s +version: 0.1.0 +description: Kerberos server that can be used for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml new file mode 100644 index 00000000..15be4b2f --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-krb5-k8s/templates/statefulset.yaml @@ -0,0 +1,99 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.krb5.fullname" . }} + labels: + app: {{ template "hdfs-k8s.krb5.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} + annotations: + # TODO: Deprecated. Replace tolerate-unready-endpoints with + # v1.Service.PublishNotReadyAddresses. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + ports: + - port: {{ .Values.service.port }} + protocol: TCP + name: kdc-tcp + - port: {{ .Values.service.port }} + protocol: UDP + name: kdc-udp + clusterIP: None + selector: + app: {{ template "hdfs-k8s.krb5.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.krb5.fullname" . }} + labels: + app: {{ template "hdfs-k8s.krb5.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.krb5.fullname" . }} + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ template "hdfs-k8s.krb5.name" . }} + release: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ template "hdfs-k8s.krb5.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: KRB5_REALM + value: {{ .Values.global.kerberosRealm }} + - name: KRB5_KDC + value: {{ template "krb5-svc-0" . }} + ports: + - name: kdc-tcp + containerPort: 88 + protocol: TCP + - name: kdc-udp + containerPort: 88 + protocol: UDP + livenessProbe: + tcpSocket: + port: kdc-tcp + readinessProbe: + tcpSocket: + port: kdc-tcp + restartPolicy: Always + {{- if .Values.global.podSecurityContext.enabled }} + securityContext: + runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} + fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: datadir + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} + {{- if .Values.persistence.selector }} + selector: +{{ toYaml .Values.persistence.selector | indent 10 }} + {{- end }} diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml new file mode 100644 index 00000000..f45655f5 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-namenode-k8s +version: 0.1.0 +description: namenodes in HDFS on Kubernetes. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml new file mode 100644 index 00000000..44e8fc60 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml @@ -0,0 +1,287 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} + annotations: + # TODO: Deprecated. Replace tolerate-unready-endpoints with + # v1.Service.PublishNotReadyAddresses. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + ports: + - port: 8020 + name: fs + - port: 50070 + name: http + clusterIP: None + selector: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + selector: + matchLabels: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} + minAvailable: 1 +--- +# Provides namenode helper scripts. Most of them are start scripts +# that meet different needs. +# TODO: Support upgrade of metadata in case a new Hadoop version requires it. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +data: + # A bootstrap script which will start namenode daemons after conducting + # optional metadata initialization steps. The metadata initialization + # steps will take place in case the metadata dir is empty, + # which will be the case only for the very first run. The specific steps + # will differ depending on whether the namenode is active or standby. + # We also assume, for the very first run, namenode-0 will be active and + # namenode-1 will be standby as StatefulSet will launch namenode-0 first + # and zookeeper will determine the sole namenode to be the active one. + # For active namenode, the initialization steps will format the metadata, + # zookeeper dir and journal node data entries. + # For standby namenode, the initialization steps will simply receieve + # the first batch of metadata updates from the journal node. + format-and-run.sh: | + #!/usr/bin/env bash + # Exit on error. Append "|| true" if you expect an error. + set -o errexit + # Exit on error inside any functions or subshells. + set -o errtrace + # Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR + set -o nounset + # Catch an error in command pipes. e.g. mysqldump fails (but gzip succeeds) + # in `mysqldump |gzip` + set -o pipefail + # Turn on traces, useful while debugging. + set -o xtrace + + _HDFS_BIN=$HADOOP_PREFIX/bin/hdfs + _METADATA_DIR=/hadoop/dfs/name/current + if [[ "$MY_POD" = "$NAMENODE_POD_0" ]]; then + if [[ ! -d $_METADATA_DIR ]]; then + $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -format \ + -nonInteractive hdfs-k8s || + (rm -rf $_METADATA_DIR; exit 1) + fi + _ZKFC_FORMATTED=/hadoop/dfs/name/current/.hdfs-k8s-zkfc-formatted + if [[ ! -f $_ZKFC_FORMATTED ]]; then + _OUT=$($_HDFS_BIN --config $HADOOP_CONF_DIR zkfc -formatZK -nonInteractive 2>&1) + # zkfc masks fatal exceptions and returns exit code 0 + (echo $_OUT | grep -q "FATAL") && exit 1 + touch $_ZKFC_FORMATTED + fi + elif [[ "$MY_POD" = "$NAMENODE_POD_1" ]]; then + if [[ ! -d $_METADATA_DIR ]]; then + $_HDFS_BIN --config $HADOOP_CONF_DIR namenode -bootstrapStandby \ + -nonInteractive || \ + (rm -rf $_METADATA_DIR; exit 1) + fi + fi + $HADOOP_PREFIX/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR start zkfc + $_HDFS_BIN --config $HADOOP_CONF_DIR namenode + + # A start script that will just hang indefinitely. A user can then get + # inside the pod and debug. Or a user can conduct a custom manual operations. + do-nothing.sh: | + #!/usr/bin/env bash + tail -f /var/log/dmesg + + # A start script that has user specified content. Can be used to conduct + # ad-hoc operation as specified by a user. + custom-run.sh: {{ .Values.customRunScript | quote }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.namenode.fullname" . }} + replicas: 2 + template: + metadata: + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.hostNetworkEnabled }} + # Use hostNetwork so datanodes connect to namenode without going through an overlay network + # like weave. Otherwise, namenode fails to see physical IP address of datanodes. + # Disabling this will break data locality as namenode will see pod virtual IPs and fails to + # equate them with cluster node physical IPs associated with data nodes. + # We currently disable this only for CI on minikube. + hostNetwork: true + hostPID: true + dnsPolicy: ClusterFirstWithHostNet + {{- else }} + dnsPolicy: ClusterFirst + {{- end }} + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- else if .Values.global.defaultAffinityEnabled }} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - {{ template "hdfs-k8s.namenode.name" . }} + - key: "release" + operator: In + values: + - {{ .Release.Name }} + topologyKey: "kubernetes.io/hostname" + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + containers: + # TODO: Support hadoop version as option. + - name: hdfs-namenode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: MULTIHOMED_NETWORK + value: "0" + # Used by the start script below. + - name: MY_POD + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMENODE_POD_0 + value: {{ template "namenode-pod-0" . }} + - name: NAMENODE_POD_1 + value: {{ template "namenode-pod-1" . }} + command: ['/bin/sh', '-c'] + # The start script is provided by a config map. + args: + - /entrypoint.sh "/nn-scripts/{{ .Values.namenodeStartScript }}" + ports: + - containerPort: 8020 + name: fs + - containerPort: 50070 + name: http + volumeMounts: + - name: nn-scripts + mountPath: /nn-scripts + readOnly: true + # Mount a subpath of the volume so that the name subdir would be a + # brand new empty dir. This way, we won't get affected by existing + # files in the volume top dir. + - name: metadatadir + mountPath: /hadoop/dfs/name + subPath: name + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + mountPath: /etc/krb5.conf + subPath: {{ .Values.global.kerberosConfigFileName }} + readOnly: true + - name: kerberos-keytab-copy + mountPath: /etc/security/ + readOnly: true + {{- end }} + {{- if .Values.global.kerberosEnabled }} + initContainers: + - name: copy-kerberos-keytab + image: busybox:1.27.1 + command: ['sh', '-c'] + args: + - cp /kerberos-keytabs/${MY_KERBEROS_NAME}*.keytab /kerberos-keytab-copy/hdfs.keytab + env: + - name: MY_KERBEROS_NAME + valueFrom: + fieldRef: + {{- if .Values.hostNetworkEnabled }} + fieldPath: spec.nodeName + {{- else }} + fieldPath: metadata.name + {{- end }} + volumeMounts: + - name: kerberos-keytabs + mountPath: /kerberos-keytabs + - name: kerberos-keytab-copy + mountPath: /kerberos-keytab-copy + {{- end }} + restartPolicy: Always + volumes: + - name: nn-scripts + configMap: + name: {{ template "hdfs-k8s.namenode.fullname" . }}-scripts + defaultMode: 0744 + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} + {{- if .Values.global.kerberosEnabled }} + - name: kerberos-config + configMap: + name: {{ template "krb5-configmap" . }} + - name: kerberos-keytabs + secret: + secretName: {{ template "krb5-keytabs-secret" . }} + - name: kerberos-keytab-copy + emptyDir: {} + {{- end }} + {{- if .Values.global.podSecurityContext.enabled }} + securityContext: + runAsUser: {{ .Values.global.podSecurityContext.runAsUser }} + fsGroup: {{ .Values.global.podSecurityContext.fsGroup }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: metadatadir + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} + {{- if .Values.persistence.selector }} + selector: +{{ toYaml .Values.persistence.selector | indent 10 }} + {{- end }} diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml new file mode 100644 index 00000000..bcf6f5b0 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +name: hdfs-simple-namenode-k8s +version: 0.1.0 +description: Non-HA namenode for HDFS on Kubernetes. diff --git a/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml new file mode 100644 index 00000000..ab92efa9 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/kubernetes-HDFS/charts/hdfs-simple-namenode-k8s/templates/namenode-statefulset.yaml @@ -0,0 +1,82 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + ports: + - port: 8020 + name: fs + clusterIP: None + selector: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: {{ template "hdfs-k8s.namenode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "hdfs-k8s.namenode.fullname" . }} + # Create a size-1 set. + replicas: 1 + template: + metadata: + labels: + app: {{ template "hdfs-k8s.namenode.name" . }} + release: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + # Use hostNetwork so datanodes connect to namenode without going through an overlay network + # like weave. Otherwise, namenode fails to see physical IP address of datanodes. + hostNetwork: true + hostPID: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: hdfs-namenode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: HADOOP_CUSTOM_CONF_DIR + value: /etc/hadoop-custom-conf + - name: CLUSTER_NAME + value: hdfs-k8s + ports: + - containerPort: 8020 + name: fs + volumeMounts: + - name: hdfs-name + mountPath: /hadoop/dfs/name + - name: hdfs-config + mountPath: /etc/hadoop-custom-conf + readOnly: true + restartPolicy: Always + volumes: + - name: hdfs-name + hostPath: + path: {{ .Values.nameNodeHostPath }} + - name: hdfs-config + configMap: + name: {{ template "hdfs-k8s.config.fullname" . }} diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/.helmignore b/vnfs/DAaaS/deploy/training-core/charts/m3db/.helmignore new file mode 100644 index 00000000..50af0317 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/Chart.yaml b/vnfs/DAaaS/deploy/training-core/charts/m3db/Chart.yaml new file mode 100644 index 00000000..10d9d542 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v1 +name: m3db +version: 0.1.1 diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/NOTES.txt b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/NOTES.txt new file mode 100644 index 00000000..ee7ee3d7 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/NOTES.txt @@ -0,0 +1 @@ +M3DB Cluster {{ .Values.m3dbCluster.name }} has been created
\ No newline at end of file diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/_helpers.tpl b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/_helpers.tpl new file mode 100644 index 00000000..36544b12 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/_helpers.tpl @@ -0,0 +1,32 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "m3db.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "m3db.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "m3db.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/configmap.yaml b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/configmap.yaml new file mode 100644 index 00000000..d7197ae9 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/configmap.yaml @@ -0,0 +1,216 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.m3dbCluster.configMapName }} +data: + m3.yml: |+ + coordinator: + listenAddress: + type: "config" + value: "0.0.0.0:7201" + metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 + sanitization: prometheus + samplingRate: 1.0 + extended: none + tagOptions: + idScheme: quoted + local: + namespaces: + - namespace: "collectd" + type: unaggregated + retention: 48h + db: + logging: + level: info + + metrics: + prometheus: + handlerPath: /metrics + sanitization: prometheus + samplingRate: 1.0 + extended: detailed + + listenAddress: 0.0.0.0:9000 + clusterListenAddress: 0.0.0.0:9001 + httpNodeListenAddress: 0.0.0.0:9002 + httpClusterListenAddress: 0.0.0.0:9003 + debugListenAddress: 0.0.0.0:9004 + + hostID: + resolver: file + file: + path: /etc/m3db/pod-identity/identity + timeout: 5m + + client: + writeConsistencyLevel: majority + readConsistencyLevel: unstrict_majority + writeTimeout: 10s + fetchTimeout: 15s + connectTimeout: 20s + writeRetry: + initialBackoff: 500ms + backoffFactor: 3 + maxRetries: 2 + jitter: true + fetchRetry: + initialBackoff: 500ms + backoffFactor: 2 + maxRetries: 3 + jitter: true + backgroundHealthCheckFailLimit: 4 + backgroundHealthCheckFailThrottleFactor: 0.5 + + gcPercentage: 100 + + writeNewSeriesAsync: true + writeNewSeriesLimitPerSecond: 1048576 + writeNewSeriesBackoffDuration: 2ms + + bootstrap: + bootstrappers: + - filesystem + - commitlog + - peers + - uninitialized_topology + fs: + numProcessorsPerCPU: 0.125 + + commitlog: + flushMaxBytes: 524288 + flushEvery: 1s + queue: + calculationType: fixed + size: 2097152 + blockSize: 10m + + fs: + filePathPrefix: /var/lib/m3db + writeBufferSize: 65536 + dataReadBufferSize: 65536 + infoReadBufferSize: 128 + seekReadBufferSize: 4096 + throughputLimitMbps: 100.0 + throughputCheckEvery: 128 + + repair: + enabled: false + interval: 2h + offset: 30m + jitter: 1h + throttle: 2m + checkInterval: 1m + + pooling: + blockAllocSize: 16 + type: simple + seriesPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + blockPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + encoderPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + closersPool: + size: 104857 + lowWatermark: 0.7 + highWatermark: 1.0 + contextPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + segmentReaderPool: + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + iteratorPool: + size: 2048 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlockMetadataResultsPool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlocksMetadataResultsPool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + hostBlockMetadataSlicePool: + size: 131072 + capacity: 3 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataSlicePool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataSlicePool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + identifierPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + bytesPool: + buckets: + - capacity: 16 + size: 524288 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 32 + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 64 + size: 131072 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 128 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 256 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 1440 + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 4096 + size: 8192 + lowWatermark: 0.7 + highWatermark: 1.0 + config: + service: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - http://{{ .Release.Name }}-{{ .Values.etcdCluster.name }}:2379 diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/etcd-cluster.yaml b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/etcd-cluster.yaml new file mode 100644 index 00000000..fcf44256 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/etcd-cluster.yaml @@ -0,0 +1,20 @@ +apiVersion: "etcd.database.coreos.com/v1beta2" +kind: "EtcdCluster" +metadata: + name: {{ .Release.Name }}-{{ .Values.etcdCluster.name }} + labels: + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ template "m3db.name" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + annotations: + etcd.database.coreos.com/scope: clusterwide +spec: + size: {{ .Values.etcdCluster.size }} + version: "{{ .Values.etcdCluster.version }}" + pod: +{{ toYaml .Values.etcdCluster.pod | indent 4 }} + {{- if .Values.etcdCluster.enableTLS }} + TLS: +{{ toYaml .Values.etcdCluster.tls | indent 4 }} + {{- end }} diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/m3dbcluster.yaml b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/m3dbcluster.yaml new file mode 100644 index 00000000..5e804351 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/templates/m3dbcluster.yaml @@ -0,0 +1,22 @@ +apiVersion: operator.m3db.io/v1alpha1 +kind: M3DBCluster +metadata: + name: {{ .Values.m3dbCluster.name }} +spec: + image: {{ .Values.m3dbCluster.image.repository }}:{{ .Values.m3dbCluster.image.tag }} + replicationFactor: {{ .Values.m3dbCluster.replicationFactor }} + numberOfShards: {{ .Values.m3dbCluster.numberOfShards }} + isolationGroups: +{{ toYaml .Values.m3dbCluster.isolationGroups | indent 4 }} + namespaces: +{{ toYaml .Values.m3dbCluster.namespaces | indent 4 }} + configMapName: {{ .Values.m3dbCluster.configMapName }} + resources: + requests: + memory: 4Gi + cpu: '1' + limits: + memory: 12Gi + cpu: '4' + + diff --git a/vnfs/DAaaS/deploy/training-core/charts/m3db/values.yaml b/vnfs/DAaaS/deploy/training-core/charts/m3db/values.yaml new file mode 100644 index 00000000..ab365cfa --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/charts/m3db/values.yaml @@ -0,0 +1,51 @@ +m3dbCluster: + name: m3db-cluster + image: + repository: quay.io/m3db/m3dbnode + tag: latest + replicationFactor: 3 + numberOfShards: 256 + isolationGroups: + - name: us-west1-a + numInstances: 1 + - name: us-west1-b + numInstances: 1 + - name: us-west1-c + numInstances: 1 + namespaces: + - name: collectd + preset: 10s:2d + configMapName: m3-configuration + +etcdCluster: + name: etcd + size: 3 + version: 3.3.3 + image: + repository: quay.io/coreos/etcd + tag: v3.3.3 + pullPolicy: Always + enableTLS: false + # TLS configs + tls: + static: + member: + peerSecret: etcd-peer-tls + serverSecret: etcd-server-tls + operatorSecret: etcd-client-tls + ## etcd cluster pod specific values + ## Ref: https://github.com/coreos/etcd-operator/blob/master/doc/user/spec_examples.md#three-members-cluster-with-resource-requirement + pod: + ## Antiaffinity for etcd pod assignment + ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + antiAffinity: false + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + ## Node labels for etcd pod assignment + ## Ref: https://kubernetes.io/docs/user-guide/node-selection/ + nodeSelector: {} diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/README.md b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/README.md new file mode 100644 index 00000000..4de7d0f9 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/README.md @@ -0,0 +1,11 @@ +# HDFS-writer + +HDFS writer can read from a message from kafka topic and persist that in the +HDFS file system given. This is a work in progress and shall be moved +to separate source code repo later. + +## Usage + +## Config items + +## Troubleshooting
\ No newline at end of file diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/pom.xml b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/pom.xml new file mode 100644 index 00000000..20c11fea --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/pom.xml @@ -0,0 +1,111 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>com.intel.onap</groupId> + <artifactId>hdfs-writer</artifactId> + <version>1.0</version> + + <!--Begin: compile and build the fat jar --> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.8.1</version> + <configuration> + <source>1.8</source> + <target>1.8</target> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>2.3</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + </transformers> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <configuration> + <archive> + <manifest> + <mainClass>kafka2hdfsApp</mainClass> + </manifest> + </archive> + <descriptorRefs> + <descriptorRef>jar-with-dependencies</descriptorRef> + </descriptorRefs> + </configuration> + <executions> + <execution> + <id>make-assembly</id> <!-- this is used for inheritance merges --> + <phase>package</phase> <!-- bind to the packaging phase --> + <goals> + <goal>single</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + <!--End: compile and build the fat jar --> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-core</artifactId> + <version>1.2.1</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>3.2.0</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>2.7.1</version> + </dependency> + <dependency> + <groupId>org.apache.kafka</groupId> + <artifactId>kafka-clients</artifactId> + <version>2.2.0</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.dataformat</groupId> + <artifactId>jackson-dataformat-yaml</artifactId> + <version>2.9.8</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>2.2.3</version> + </dependency> + + </dependencies> + +</project>
\ No newline at end of file diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/CreateKafkaConsumer.java b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/CreateKafkaConsumer.java new file mode 100644 index 00000000..2042a146 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/CreateKafkaConsumer.java @@ -0,0 +1,81 @@ +import config.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.*; + +public class CreateKafkaConsumer { + + + private static Logger log = LoggerFactory.getLogger(CreateKafkaConsumer.class); + + private final String BOOTSTRAP_SERVERS = (String) Configuration.getSettings().get("kafka").get("bootStrapServers"); + private final String GROUP_ID_CONFIG = (String) Configuration.getSettings().get("kafka").get("group_id"); + private final String KEY_DESERIALIZER = (String) Configuration.getSettings().get("kafka").get("key_deserialize_class"); + private final String VAL_DESERIALIZER = (String) Configuration.getSettings().get("kafka").get("value_deserialize_class"); + private final String KAFKA_TOPIC = (String) Configuration.getSettings().get("kafka").get("topic"); + + private final String HDFS_URL= (String) Configuration.getSettings().get("hdfs").get("hdfsURL"); + private final String HDFS_REMOTE_FILE = (String) Configuration.getSettings().get("hdfs").get("hdfs_remote_file"); + + private KafkaConsumer<String, String> kafkaConsumer; + private Properties properties = new Properties(); + private HdfsWriter hdfsWriter; + private FileSystem hdfsFileSystem; + + + + public CreateKafkaConsumer() throws IOException{ + setKafkaProperties(); + kafkaConsumer = new KafkaConsumer<>(properties); + kafkaConsumer.subscribe(Collections.singletonList(KAFKA_TOPIC)); + hdfsWriter = new HdfsWriter(); + hdfsFileSystem = hdfsWriter.createHdfsFileSystem(HDFS_URL); + log.info(":::Created kafkaConsumer:::"); + } + + private void setKafkaProperties(){ + + properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, BOOTSTRAP_SERVERS); + properties.put(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG); + properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, KEY_DESERIALIZER); + properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, VAL_DESERIALIZER); + log.info(":::Set kafka properties:::"); + } + + + public void processKafkaMessage() throws IOException{ + try{ + while(true){ + ConsumerRecords<String, String> recordsPerPartition = kafkaConsumer.poll(100000); + if(recordsPerPartition.isEmpty()) + log.info(":::recordsPerPartition is NULL:::"); + else + log.info(":::size of recordsPerPartition: "+recordsPerPartition.count()+" :::"); + + for(ConsumerRecord<String, String> record:recordsPerPartition){ + log.info("Topic: "+record.topic()); + log.info("partition: "+record.partition()); + log.info("ReceivedKey: "+record.key()+" ReceivedValue: "+record.value()); + FSDataOutputStream fsDataOutputStream = hdfsWriter.invokeHdfsWriter(hdfsFileSystem, HDFS_REMOTE_FILE); + hdfsWriter.writeMessageToHdfs(fsDataOutputStream, record.value()); + fsDataOutputStream.close(); + } + + } + } + + finally { + log.info(":::Closing kafkaConsumer:::"); + kafkaConsumer.close(); + } + } +} diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/HdfsWriter.java b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/HdfsWriter.java new file mode 100644 index 00000000..cd5b6635 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/HdfsWriter.java @@ -0,0 +1,40 @@ +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; + +public class HdfsWriter { + + private static Logger log = LoggerFactory.getLogger(CreateKafkaConsumer.class); + + + public FileSystem createHdfsFileSystem(String hdfsDestination) throws IOException { + Configuration hdfsConfiguration = new Configuration(); + FileSystem hdfsFileSystem = FileSystem.get(URI.create(hdfsDestination), hdfsConfiguration); + log.info(":::Created hdfsFileSystem:::"); + return hdfsFileSystem; + } + + + public void writeMessageToHdfs(FSDataOutputStream fsDataOutputStream, String bytesFromKafka) throws IOException { + fsDataOutputStream.writeBytes(bytesFromKafka); + log.info(":::Wrote to HDFS:::"); + } + + + public FSDataOutputStream invokeHdfsWriter(FileSystem hdfsFileSystem, String hdfsFile) throws IOException { + FSDataOutputStream fsDataOutputStream; + if(!hdfsFileSystem.exists(new Path("/"+hdfsFile))) + fsDataOutputStream = hdfsFileSystem.create(new Path("/"+hdfsFile)); + else + fsDataOutputStream = hdfsFileSystem.append(new Path("/"+hdfsFile)); + log.info(":::HDFSWriter invoked:::"); + return fsDataOutputStream; + } + +} diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/Orchestrator.java b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/Orchestrator.java new file mode 100644 index 00000000..b4daf2d1 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/Orchestrator.java @@ -0,0 +1,51 @@ +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import config.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.InputStream; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + + +public class Orchestrator { + + private static Logger logger = LoggerFactory.getLogger(Orchestrator.class); + + public void init(String configYamlFile){ + + parseConfigYaml(configYamlFile); + } + + private void parseConfigYaml(String configYaml) { + + URL fileUrl = getClass().getResource(configYaml); + if(fileUrl==null) + System.out.println("::: Config file missing!!! :::"); + + else{ + Configuration conf = new Configuration(); + ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); + String realConfigYaml = configYaml; + + if (!realConfigYaml.startsWith("/")) { + realConfigYaml = "/" + configYaml; + } + Map<String, Object> configs; + try (InputStream is = getClass().getResourceAsStream(realConfigYaml)) { + TypeReference<HashMap<String, Object>> typeRef + = new TypeReference<HashMap<String, Object>>() { + }; + configs = mapper.readValue(is, typeRef); + conf.init(configs); + + } catch (Exception e) { + logger.error(e.getMessage()); + } + } + } +} + diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/config/Configuration.java b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/config/Configuration.java new file mode 100644 index 00000000..c7de131b --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/config/Configuration.java @@ -0,0 +1,38 @@ +package config; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class Configuration{ + + private static Logger log = LoggerFactory.getLogger(Configuration.class); + private static Map<String, Map<String, Object>> settings; + + public void init(Map<String, Object> yamlConfigs){ + settings = new HashMap<>(); + + if(yamlConfigs!=null){ + Iterator<String> keys = yamlConfigs.keySet().iterator(); + while(keys.hasNext()){ + String key = keys.next(); + + Object value = yamlConfigs.get(key); + + if(value instanceof Map){ + Map<String, Object> valueMap = (Map<String, Object>) value; + settings.put(key, valueMap); + } + } + } + log.info(":::Settings initiated :::"); + } + + public static Map<String, Map<String, Object>> getSettings() { + return settings; + } +}
\ No newline at end of file diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/kafka2hdfsApp.java b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/kafka2hdfsApp.java new file mode 100644 index 00000000..5c041134 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/java/kafka2hdfsApp.java @@ -0,0 +1,14 @@ +import java.io.IOException; + +public class kafka2hdfsApp { + + public static void main(String[] args) throws IOException { + System.out.println("Begin::: kafka2hdfsApp"); + Orchestrator orchestrator = new Orchestrator(); + orchestrator.init(args[1]); + + CreateKafkaConsumer createKafkaConsumer = new CreateKafkaConsumer(); + createKafkaConsumer.processKafkaMessage(); + + } +} diff --git a/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/resources/configs.yaml b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/resources/configs.yaml new file mode 100644 index 00000000..8955c304 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/hdfs-writer-source-code/hdfs-writer/src/main/resources/configs.yaml @@ -0,0 +1,10 @@ +kafka: + bootStrapServers: + group_id: + key_deserialize_class: + value_deserialize_class: + topic: + +hdfs: + hdfsURL: + hdfs_remote_file: diff --git a/vnfs/DAaaS/deploy/training-core/values.yaml b/vnfs/DAaaS/deploy/training-core/values.yaml new file mode 100644 index 00000000..fd98eb36 --- /dev/null +++ b/vnfs/DAaaS/deploy/training-core/values.yaml @@ -0,0 +1,29 @@ +# Copyright © 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +################################################################# +# Global configuration defaults. +################################################################# +global: + nodePortPrefix: 310 + repository: nexus3.onap.org:10001 + readinessRepository: oomk8s + readinessImage: readiness-check:2.0.0 + loggingRepository: docker.elastic.co + loggingImage: beats/filebeat:5.5.0 + +################################################################# +# k8s Operator Day-0 configuration defaults. +################################################################# + |