diff options
author | Dileep Ranganathan <dileep.ranganathan@intel.com> | 2019-05-30 12:38:37 -0700 |
---|---|---|
committer | Dileep Ranganathan <dileep.ranganathan@intel.com> | 2019-05-30 21:11:52 +0000 |
commit | 3d5a3e06530c1250d48f7d838c619f3bfbcd019d (patch) | |
tree | 349e370c43ce7318b3f7eb7736345de6872cbef2 /vnfs/DAaaS/sample-apps/training/sample-spark-app | |
parent | 31802660dfe74a8671ae29789f0018f0f887ea1a (diff) |
Refactor Distributed Analytics project structure
Modified the project structure to improve maintainability and to add future CI and
integration test support.
Change-Id: Id30bfb1f83f23785a6b5f99e81f42f752d59c0f8
Issue-ID: ONAPARC-280
Signed-off-by: Dileep Ranganathan <dileep.ranganathan@intel.com>
Diffstat (limited to 'vnfs/DAaaS/sample-apps/training/sample-spark-app')
6 files changed, 292 insertions, 0 deletions
diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/.helmignore b/vnfs/DAaaS/sample-apps/training/sample-spark-app/.helmignore new file mode 100644 index 00000000..50af0317 --- /dev/null +++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/Chart.yaml b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Chart.yaml new file mode 100644 index 00000000..42ed0400 --- /dev/null +++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A sample spark application which finds the top users from the apache logs which is stored in the remote hdfs-k8s cluster +name: sample-spark-app-apache-log-analysis +version: 0.1.0 diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/Dockerfile b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Dockerfile new file mode 100644 index 00000000..cd42d4c7 --- /dev/null +++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/Dockerfile @@ -0,0 +1,133 @@ +# Copyright (c) 2019 Intel Corporation +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Ported kubernetes spark image to Ubuntu + +FROM ubuntu:18.04 + +# Install jdk +RUN apt update -yqq +RUN apt install -y locales openjdk-8-jdk && rm -rf /var/lib/apt/lists/* \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 + +# Install all the essentials +RUN apt-get update --fix-missing && \ + apt-get install -y numactl wget curl bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 \ + git mercurial subversion build-essential openssh-server openssh-client net-tools && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV LANG en_US.utf8 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 +ENV PATH $JAVA_HOME/bin:$PATH +ENV PATH /opt/conda/bin:$PATH +ENV OPENMPI_VERSION 3.1 + +# Install openMPI +RUN mkdir /tmp/openmpi && \ + cd /tmp/openmpi && \ + wget --quiet https://www.open-mpi.org/software/ompi/v${OPENMPI_VERSION}/downloads/openmpi-${OPENMPI_VERSION}.2.tar.gz -O openmpi.tar.gz && \ + tar zxf openmpi.tar.gz && \ + cd openmpi-3.1.2 && \ + ./configure --enable-orterun-prefix-by-default && \ + make -j $(nproc) all && \ + make install && \ + ldconfig && \ + rm -rf /tmp/openmpi + +# Install miniconda +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc + +# Install tf & keras using conda in the virtual_environment:tf_env +SHELL ["/bin/bash", "-c"] +RUN conda update -n base -c defaults conda && \ + conda create -n tf_env +RUN conda install -n tf_env -y -c anaconda \ + pip tensorflow keras nltk + +RUN echo "conda activate tf_env" >> ~/.bashrc && \ + conda install -n tf_env -y -c conda-forge clangdev + +RUN source ~/.bashrc +RUN HOROVOD_WITH_TENSORFLOW=1 /opt/conda/envs/tf_env/bin/pip install --no-cache-dir horovod + +# openMPI sane defaults: +RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf && \ + echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf && \ + echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install tini +RUN apt-get install -y curl grep sed dpkg && \ + TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && echo ${TINI_VERSION} && \ + curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \ + dpkg -i tini.deb && \ + rm tini.deb && \ + apt clean + +# This is needed to match the original entrypoint.sh file. +RUN cp /usr/bin/tini /sbin + +# Begin: Installing spark +ARG spark_jars=jars +ARG img_path=kubernetes/dockerfiles +ARG k8s_tests=kubernetes/tests + +# Before building the docker image, first build and make a Spark distribution following +# the instructions in http://spark.apache.org/docs/latest/building-spark.html. +# If this docker file is being used in the context of building your images from a Spark +# distribution, the docker build command should be invoked from the top level directory +# of the Spark distribution. E.g.: +# docker build -t spark:latest -f kubernetes/dockerfiles/spark/ubuntu18.04/Dockerfile . + +RUN mkdir -p /opt/spark && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd + + +COPY ${spark_jars} /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY ${img_path}/spark/entrypoint.sh /opt/ +COPY examples /opt/spark/examples +COPY ${k8s_tests} /opt/spark/tests +COPY data /opt/spark/data +ENV SPARK_HOME /opt/spark + +RUN mkdir /opt/spark/python +COPY python/pyspark /opt/spark/python/pyspark +COPY python/lib /opt/spark/python/lib +ENV PYTHONPATH /opt/spark/python/lib/pyspark.zip:/opt/spark/python/lib/py4j-*.zip + +WORKDIR /opt/spark/work-dir + +ENTRYPOINT [ "/opt/entrypoint.sh" ] + +# End: Installing spark diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/SampleSparkApp.yaml b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/SampleSparkApp.yaml new file mode 100644 index 00000000..f728f82e --- /dev/null +++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/SampleSparkApp.yaml @@ -0,0 +1,43 @@ +apiVersion: "sparkoperator.k8s.io/v1beta1" +kind: SparkApplication +metadata: + name: {{ .Values.nameOfTheSparkApp }} + namespace: {{ .Release.Namespace }} +spec: + type: {{ .Values.programmingLanguageType }} + mode: {{ .Values.modeOfSparkApp | default "cluster" }} + image: {{ quote .Values.image }} + imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} + mainClass: {{ .Values.mainClassOfTheSparkApp }} + mainApplicationFile: {{ .Values.mainApplicationFileOfTheSparkApp }} + arguments: + {{- range .Values.argumentsOfTheSparkProgram }} + - {{ . }} + {{ end }} + hadoopConfigMap: {{ .Values.hadoopConfigMap }} + restartPolicy: + type: {{ .Values.restartPolicy | default "Never" }} + volumes: + - name: {{ quote .Values.volumesName | default "test-volume" }} + hostpath: + path: {{ quote .Values.hostpath | default "/tmp" }} + type: {{ .Values.hostpathType | default "Directory" }} + driver: + cores: {{ .Values.driverCores | default 0.1 }} + coreLimit: {{ quote .Values.driverCoreLimit | default "200m" }} + memory: {{ quote .Values.driverMemory | default "1024m" }} + labels: + version: 2.4.0 + serviceAccount: spark + volumeMounts: + - name: {{ quote .Values.driverVolumeMountsName | default "test-volume" }} + mountPath: {{ quote .Values.driverVolumeMountPath | default "/tmp" }} + executor: + cores: {{ .Values.executorCores | default 1 }} + instances: {{ .Values.executorInstances | default 1 }} + memory: {{ quote .Values.executorMemory | default "512m" }} + labels: + version: 2.4.0 + volumeMounts: + - name: {{ quote .Values.executorVolumeMountsName | default "test-volume" }} + mountPath: {{ quote .Values.executorVolumeMountPath | default "/tmp" }} diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/_helpers.tpl b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/_helpers.tpl new file mode 100644 index 00000000..6f51811d --- /dev/null +++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/templates/_helpers.tpl @@ -0,0 +1,32 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "sample-spark-app.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "sample-spark-app.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "sample-spark-app.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/vnfs/DAaaS/sample-apps/training/sample-spark-app/values.yaml b/vnfs/DAaaS/sample-apps/training/sample-spark-app/values.yaml new file mode 100644 index 00000000..afb48d67 --- /dev/null +++ b/vnfs/DAaaS/sample-apps/training/sample-spark-app/values.yaml @@ -0,0 +1,57 @@ +# Default values for sample-spark-app. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + + +#===========================KUBERNETES POD RELATED CONFIGs======================== +image: spark-tf-keras-horo:latest +imagePullPolicy: Never +restartPolicy: Never +volumesName: test-volume +hostpath: /tmp +hostpathType: Directory + + + +#============================SPARK APP RELATED CONFIGs============================= + +nameOfTheSparkApp: spark-apache-logs2 +# Python or Scala supported. +programmingLanguageType: Scala +modeOfSparkApp: cluster +mainClassOfTheSparkApp: ApacheLogAnalysis +# can be http path, s3 path, minio path +mainApplicationFileOfTheSparkApp: https://github.com/mohanraj1311/ApacheLogAnalysisJar/raw/master/analysisofapachelogs_2.11-0.1.jar +argumentsOfTheSparkProgram: + - hdfs://hdfs-1-namenode-1.hdfs-1-namenode.hdfs1.svc.cluster.local:8020/data/apache-logs + + + +#============================SPARK DRIVER RELATED CONFIGs========================= +driverCores: 0.1 +driverCoreLimit: 200m +driverMemory: 1024m +driverVolumeMountsName: test-volume +driverVolumeMountPath: /tmp + + + +#============================SPARK EXECUTOR RELATED CONFIGs======================= +executorCores: 1 +executorInstances: 1 +executorMemory: 512m +executorVolumeMountsName: test-volume +executorVolumeMountPath: /tmp + + + +#===========================HADOOP RELATED CONFIGs=============================== +# config map of the hdfs +hadoopConfigMap: hdfs-1-config + + +################################################################################### + + + + |