Refactor Distributed Analytics project structure

Modified the project structure to improve maintainability and to add future CI and integration test support. Change-Id: Id30bfb1f83f23785a6b5f99e81f42f752d59c0f8 Issue-ID: ONAPARC-280 Signed-off-by: Dileep Ranganathan <dileep.ranganathan@intel.com>
author: Dileep Ranganathan <dileep.ranganathan@intel.com> 2019-05-30 12:38:37 -0700
committer: Dileep Ranganathan <dileep.ranganathan@intel.com> 2019-05-30 21:11:52 +0000
commit: 3d5a3e06530c1250d48f7d838c619f3bfbcd019d (patch)
tree: 349e370c43ce7318b3f7eb7736345de6872cbef2 /vnfs/DAaaS/sample-apps/training/sample-spark-app
parent: 31802660dfe74a8671ae29789f0018f0f887ea1a (diff)
6 files changed, 292 insertions, 0 deletions
diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/.helmignore b/vnfs/DAaaS/sample-apps/training/sample-spark-app/.helmignore
new file mode 100644
index 00000000..50af0317
--- /dev/null
+++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/.helmignore
@@ -0,0 +1,22 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/Chart.yaml b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Chart.yaml
new file mode 100644
index 00000000..42ed0400
--- /dev/null
+++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Chart.yaml
@@ -0,0 +1,5 @@
+apiVersion: v1
+appVersion: "1.0"
+description: A sample spark application which finds the top users from the apache logs which is stored in the remote hdfs-k8s cluster
+name: sample-spark-app-apache-log-analysis
+version: 0.1.0
diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/Dockerfile b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Dockerfile
new file mode 100644
index 00000000..cd42d4c7
--- /dev/null
+++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Dockerfile
@@ -0,0 +1,133 @@
+# Copyright (c) 2019 Intel Corporation
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Ported kubernetes spark image to Ubuntu
+
+FROM ubuntu:18.04
+
+# Install jdk
+RUN apt update -yqq 
+RUN apt install -y locales openjdk-8-jdk && rm -rf /var/lib/apt/lists/* \
+    && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
+
+# Install all the essentials
+RUN apt-get update --fix-missing && \
+    apt-get install -y numactl wget curl bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 \
+                       git mercurial subversion build-essential openssh-server openssh-client net-tools && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+ENV LANG en_US.utf8
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
+ENV PATH $JAVA_HOME/bin:$PATH
+ENV PATH /opt/conda/bin:$PATH
+ENV OPENMPI_VERSION 3.1
+
+# Install openMPI
+RUN mkdir /tmp/openmpi && \
+    cd /tmp/openmpi && \
+    wget --quiet https://www.open-mpi.org/software/ompi/v${OPENMPI_VERSION}/downloads/openmpi-${OPENMPI_VERSION}.2.tar.gz -O openmpi.tar.gz && \
+    tar zxf openmpi.tar.gz && \
+    cd openmpi-3.1.2 && \
+    ./configure --enable-orterun-prefix-by-default && \
+    make -j $(nproc) all && \
+    make install && \
+    ldconfig && \
+    rm -rf /tmp/openmpi
+
+# Install miniconda
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
+    /bin/bash ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh && \
+    ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc
+
+# Install tf & keras using conda in the virtual_environment:tf_env
+SHELL ["/bin/bash", "-c"]
+RUN conda update -n base -c defaults conda && \
+    conda create -n tf_env
+RUN conda install -n tf_env -y -c anaconda \
+    pip tensorflow keras nltk
+
+RUN echo "conda activate tf_env" >> ~/.bashrc && \
+    conda install -n tf_env -y -c conda-forge clangdev
+
+RUN source ~/.bashrc
+RUN HOROVOD_WITH_TENSORFLOW=1 /opt/conda/envs/tf_env/bin/pip install --no-cache-dir horovod
+
+# openMPI sane defaults:
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf && \
+    echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf && \
+    echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
+    echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
+    mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+# Install tini
+RUN apt-get install -y curl grep sed dpkg && \
+    TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && echo ${TINI_VERSION} && \
+    curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
+    dpkg -i tini.deb && \
+    rm tini.deb && \
+    apt clean
+
+# This is needed to match the original entrypoint.sh file.
+RUN cp /usr/bin/tini /sbin
+
+# Begin: Installing spark
+ARG spark_jars=jars
+ARG img_path=kubernetes/dockerfiles
+ARG k8s_tests=kubernetes/tests
+
+# Before building the docker image, first build and make a Spark distribution following
+# the instructions in http://spark.apache.org/docs/latest/building-spark.html.
+# If this docker file is being used in the context of building your images from a Spark
+# distribution, the docker build command should be invoked from the top level directory
+# of the Spark distribution. E.g.:
+# docker build -t spark:latest -f kubernetes/dockerfiles/spark/ubuntu18.04/Dockerfile .
+
+RUN mkdir -p /opt/spark && \
+    mkdir -p /opt/spark/work-dir && \
+    touch /opt/spark/RELEASE && \
+    rm /bin/sh && \
+    ln -sv /bin/bash /bin/sh && \
+    echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
+    chgrp root /etc/passwd && chmod ug+rw /etc/passwd
+
+
+COPY ${spark_jars} /opt/spark/jars
+COPY bin /opt/spark/bin
+COPY sbin /opt/spark/sbin
+COPY ${img_path}/spark/entrypoint.sh /opt/
+COPY examples /opt/spark/examples
+COPY ${k8s_tests} /opt/spark/tests
+COPY data /opt/spark/data
+ENV SPARK_HOME /opt/spark
+
+RUN mkdir /opt/spark/python
+COPY python/pyspark /opt/spark/python/pyspark
+COPY python/lib /opt/spark/python/lib
+ENV PYTHONPATH /opt/spark/python/lib/pyspark.zip:/opt/spark/python/lib/py4j-*.zip
+
+WORKDIR /opt/spark/work-dir
+
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
+
+# End: Installing spark
diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/SampleSparkApp.yaml b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/SampleSparkApp.yaml
new file mode 100644
index 00000000..f728f82e
--- /dev/null
+++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/SampleSparkApp.yaml
@@ -0,0 +1,43 @@
+apiVersion: "sparkoperator.k8s.io/v1beta1"
+kind: SparkApplication
+metadata:
+  name: {{ .Values.nameOfTheSparkApp }}
+  namespace: {{ .Release.Namespace  }}
+spec:
+  type: {{ .Values.programmingLanguageType }}
+  mode: {{ .Values.modeOfSparkApp | default "cluster" }}
+  image: {{ quote .Values.image }}
+  imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
+  mainClass: {{ .Values.mainClassOfTheSparkApp }}
+  mainApplicationFile: {{ .Values.mainApplicationFileOfTheSparkApp }}
+  arguments:  
+	{{- range .Values.argumentsOfTheSparkProgram }}
+   - {{ . }}
+    {{ end }}
+  hadoopConfigMap: {{ .Values.hadoopConfigMap }}
+  restartPolicy:
+    type: {{ .Values.restartPolicy | default "Never" }}
+  volumes:
+    - name: {{ quote .Values.volumesName | default "test-volume" }}
+      hostpath:
+        path: {{ quote .Values.hostpath | default "/tmp" }}
+        type: {{ .Values.hostpathType | default "Directory" }}
+  driver:
+     cores: {{ .Values.driverCores | default 0.1 }}
+     coreLimit: {{ quote .Values.driverCoreLimit | default "200m" }}
+     memory: {{ quote .Values.driverMemory | default "1024m" }}
+     labels:
+       version: 2.4.0
+     serviceAccount: spark
+     volumeMounts:
+       - name: {{ quote .Values.driverVolumeMountsName | default "test-volume" }}
+         mountPath: {{ quote .Values.driverVolumeMountPath | default "/tmp" }}
+  executor:
+    cores: {{ .Values.executorCores | default 1 }}
+    instances: {{ .Values.executorInstances | default 1 }}
+    memory: {{ quote .Values.executorMemory | default "512m" }}
+    labels:
+      version: 2.4.0
+    volumeMounts:
+      - name: {{ quote .Values.executorVolumeMountsName | default "test-volume" }}
+        mountPath: {{ quote .Values.executorVolumeMountPath | default "/tmp" }}
diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/_helpers.tpl b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/_helpers.tpl
new file mode 100644
index 00000000..6f51811d
--- /dev/null
+++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/_helpers.tpl
@@ -0,0 +1,32 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "sample-spark-app.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "sample-spark-app.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "sample-spark-app.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/values.yaml b/vnfs/DAaaS/sample-apps/training/sample-spark-app/values.yaml
new file mode 100644
index 00000000..afb48d67
--- /dev/null
+++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/values.yaml
@@ -0,0 +1,57 @@
+# Default values for sample-spark-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+
+#===========================KUBERNETES POD RELATED CONFIGs========================
+image: spark-tf-keras-horo:latest
+imagePullPolicy: Never
+restartPolicy: Never
+volumesName: test-volume
+hostpath: /tmp
+hostpathType: Directory
+
+
+
+#============================SPARK APP RELATED CONFIGs=============================
+
+nameOfTheSparkApp: spark-apache-logs2 
+# Python or Scala supported.
+programmingLanguageType: Scala
+modeOfSparkApp: cluster
+mainClassOfTheSparkApp: ApacheLogAnalysis
+# can be http path, s3 path, minio path
+mainApplicationFileOfTheSparkApp: https://github.com/mohanraj1311/ApacheLogAnalysisJar/raw/master/analysisofapachelogs_2.11-0.1.jar 
+argumentsOfTheSparkProgram:
+    - hdfs://hdfs-1-namenode-1.hdfs-1-namenode.hdfs1.svc.cluster.local:8020/data/apache-logs 
+
+
+
+#============================SPARK DRIVER RELATED CONFIGs=========================
+driverCores: 0.1
+driverCoreLimit: 200m
+driverMemory: 1024m
+driverVolumeMountsName: test-volume
+driverVolumeMountPath: /tmp 
+
+
+
+#============================SPARK EXECUTOR RELATED CONFIGs=======================
+executorCores: 1 
+executorInstances: 1 
+executorMemory: 512m
+executorVolumeMountsName: test-volume
+executorVolumeMountPath: /tmp
+
+
+
+#===========================HADOOP RELATED CONFIGs===============================
+# config map of the hdfs
+hadoopConfigMap: hdfs-1-config
+
+
+###################################################################################
+
+
+
+
author	Dileep Ranganathan <dileep.ranganathan@intel.com>	2019-05-30 12:38:37 -0700
committer	Dileep Ranganathan <dileep.ranganathan@intel.com>	2019-05-30 21:11:52 +0000
commit	3d5a3e06530c1250d48f7d838c619f3bfbcd019d (patch)
tree	349e370c43ce7318b3f7eb7736345de6872cbef2 /vnfs/DAaaS/sample-apps/training/sample-spark-app
parent	31802660dfe74a8671ae29789f0018f0f887ea1a (diff)