summaryrefslogtreecommitdiffstats
path: root/vnfs/DAaaS/applications/sample-horovod-app
diff options
context:
space:
mode:
Diffstat (limited to 'vnfs/DAaaS/applications/sample-horovod-app')
-rw-r--r--vnfs/DAaaS/applications/sample-horovod-app/Dockerfile3
-rw-r--r--vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py70
-rw-r--r--vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml18
-rw-r--r--vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml14
-rw-r--r--vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml31
5 files changed, 121 insertions, 15 deletions
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile b/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile
index 8bdcf5b6..5b8f5636 100644
--- a/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile
+++ b/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile
@@ -121,7 +121,7 @@ COPY ${spark_jars} /opt/spark/jars
COPY bin /opt/spark/bin
COPY sbin /opt/spark/sbin
COPY ${img_path}/spark/entrypoint.sh /opt/
-COPY examples /opt/spark/examples
+
COPY ${k8s_tests} /opt/spark/tests
COPY data /opt/spark/data
ENV SPARK_HOME /opt/spark
@@ -135,6 +135,7 @@ ENV PATH /opt/conda/envs/tf_env/bin:$PATH
RUN echo "export PATH=/opt/conda/envs/tf_env/bin:$PATH" >> ~/.bashrc
# echo "activate tf_env\n" >> ~/.bashrc
RUN pip install petastorm
+COPY examples /opt/spark/examples
WORKDIR /opt/spark/work-dir
ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py
index 03425ff7..fa39cb6a 100644
--- a/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py
+++ b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py
@@ -1,13 +1,19 @@
from __future__ import print_function
import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras.preprocessing.image import ImageDataGenerator
-from keras import backend as K
+import os
+from tensorflow.keras.datasets import mnist
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout, Flatten
+from tensorflow.keras.layers import Conv2D, MaxPooling2D
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras import backend as K
+from tensorflow_estimator.python.estimator.export import export as export_helpers
+from tensorflow.python.saved_model import builder as saved_model_builder
+from tensorflow.python.saved_model import tag_constants, signature_constants
+from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def
import tensorflow as tf
-import horovod.keras as hvd
+import horovod.tensorflow.keras as hvd
+
# Horovod: initialize Horovod.
hvd.init()
@@ -53,8 +59,8 @@ print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
+y_train = tf.keras.utils.to_categorical(y_train, num_classes)
+y_test = tf.keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
@@ -69,12 +75,12 @@ model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# Horovod: adjust learning rate based on number of GPUs.
-opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size())
+opt = tf.keras.optimizers.Adadelta(lr=1.0 * hvd.size())
# Horovod: add Horovod Distributed Optimizer.
opt = hvd.DistributedOptimizer(opt)
-model.compile(loss=keras.losses.categorical_crossentropy,
+model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=opt,
metrics=['accuracy'])
@@ -96,12 +102,13 @@ callbacks = [
hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1),
# Reduce the learning rate if training plateaues.
- keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1),
+ tf.keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1),
]
# Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
if hvd.rank() == 0:
- callbacks.append(keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5'))
+ callbacks.append(tf.keras.callbacks.ModelCheckpoint(
+ './checkpoint-{epoch}.h5'))
# Set up ImageDataGenerators to do data augmentation for the training images.
train_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
@@ -118,10 +125,45 @@ model.fit_generator(train_gen.flow(x_train, y_train, batch_size=batch_size),
callbacks=callbacks,
epochs=epochs,
verbose=1,
- validation_data=test_gen.flow(x_test, y_test, batch_size=batch_size),
+ validation_data=test_gen.flow(
+ x_test, y_test, batch_size=batch_size),
validation_steps=3 * test_batches // hvd.size())
# Evaluate the model on the full data set.
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
+
+# Save Model to Minio
+if hvd.rank() == 0:
+ print('Model Summary')
+ model.summary()
+ print('Exporting trained model to Minio Model Repo')
+ base_path = os.environ['MODEL_BASE_PATH']
+
+ # Option 1(Preferred) - Using Keras api and Tensorflow v1.13 version
+ saved_model_path = tf.contrib.saved_model.save_keras_model(model, base_path)
+ print('Model Saved to {} Using new Keras API!!!'.format(saved_model_path))
+ # Option 2 - Tensorflow v1.13+ Builder saved_model api.
+ # builder = saved_model_builder.SavedModelBuilder(base_path)
+
+ # print(model.input)
+ # print(model.outputs)
+
+ # signature = predict_signature_def(inputs={"inputs": model.input},
+ # outputs={t.name:t for t in model.outputs})
+ # print(signature)
+ # K.set_learning_phase(0)
+ # with K.get_session() as sess:
+ # builder.add_meta_graph_and_variables(sess=sess,
+ # tags=[tag_constants.SERVING],
+ # signature_def_map={'predict': signature})
+ # builder.save()
+ # print('Model Saved to S3 Using Builder!!!')
+
+ # Option 3 - Tensorflow v1.13 Will be deprecated in Tensorflow v2
+ # tf.saved_model.simple_save(
+ # keras.backend.get_session(),
+ # export_path,
+ # inputs={'input_image': model.input},
+ # outputs={t.name: t for t in model.outputs})
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml b/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml
index 6ac31359..7030dd24 100644
--- a/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml
+++ b/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml
@@ -3,6 +3,7 @@
ssh:
useSecrets: true
+ port: 22
hostKey: |-
-----BEGIN RSA PRIVATE KEY-----
ThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKey
@@ -42,3 +43,20 @@ master:
pullPolicy: Never
args:
- "mpirun -np 3 --hostfile /horovod/generated/hostfile --mca orte_keep_fqdn_hostnames t --allow-run-as-root --display-map --tag-output --timestamp-output sh -c '/opt/conda/envs/tf_env/bin/python /opt/spark/examples/src/main/python/tensorflow/keras_mnist_advanced_modified.py'"
+
+## Model repository information (Minio)
+minio:
+ existingSecret: ""
+ accessKey: "onapdaas"
+ secretKey: "onapsecretdaas"
+ environment:
+ AWS_REGION: "us-west-1"
+ S3_REGION: "us-west-1"
+ S3_ENDPOINT: "minio.edge1.svc.cluster.local:9000"
+ AWS_ENDPOINT_URL: "http://minio.edge1.svc.cluster.local:9000"
+ S3_USE_HTTPS: 0
+ S3_VERIFY_SSL: 0
+ AWS_LOG_LEVEL: 3
+ TF_CPP_MIN_LOG_LEVEL: 3
+ MODEL_NAME: "mnist"
+ MODEL_BASE_PATH: "s3://models/mnist/export/"
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml b/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml
index 4e59b277..da42ded8 100644
--- a/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml
+++ b/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml
@@ -70,6 +70,20 @@ spec:
value: "{{ $value }}"
{{- end }}
{{- end }}
+ - name: AWS_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }}
+ key: accesskey
+ - name: AWS_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }}
+ key: secretkey
+ {{- range $key, $val := .Values.minio.environment }}
+ - name: {{ $key }}
+ value: {{ $val | quote }}
+ {{- end}}
{{- if .Values.master.privileged }}
securityContext:
privileged: true
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml b/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml
new file mode 100644
index 00000000..c99abe67
--- /dev/null
+++ b/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml
@@ -0,0 +1,31 @@
+{{/*
+# Copyright 2019 Intel Corporation, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/}}
+
+{{- if not .Values.minio.existingSecret }}
+apiVersion: v1
+kind: Secret
+metadata:
+ name: {{ template "horovod.fullname" . }}-minio
+ labels:
+ app: {{ template "horovod.name" . }}
+ chart: {{ template "horovod.chart" . }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+type: Opaque
+data:
+ accesskey: {{ .Values.minio.accessKey | b64enc }}
+ secretkey: {{ .Values.minio.secretKey | b64enc }}
+{{- end }}