diff options
Diffstat (limited to 'vnfs/DAaaS/applications/sample-horovod-app')
5 files changed, 121 insertions, 15 deletions
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile b/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile index 8bdcf5b6..5b8f5636 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile +++ b/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile @@ -121,7 +121,7 @@ COPY ${spark_jars} /opt/spark/jars COPY bin /opt/spark/bin COPY sbin /opt/spark/sbin COPY ${img_path}/spark/entrypoint.sh /opt/ -COPY examples /opt/spark/examples + COPY ${k8s_tests} /opt/spark/tests COPY data /opt/spark/data ENV SPARK_HOME /opt/spark @@ -135,6 +135,7 @@ ENV PATH /opt/conda/envs/tf_env/bin:$PATH RUN echo "export PATH=/opt/conda/envs/tf_env/bin:$PATH" >> ~/.bashrc # echo "activate tf_env\n" >> ~/.bashrc RUN pip install petastorm +COPY examples /opt/spark/examples WORKDIR /opt/spark/work-dir ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py index 03425ff7..fa39cb6a 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py +++ b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py @@ -1,13 +1,19 @@ from __future__ import print_function import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K +import os +from tensorflow.keras.datasets import mnist +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Dropout, Flatten +from tensorflow.keras.layers import Conv2D, MaxPooling2D +from tensorflow.keras.preprocessing.image import ImageDataGenerator +from tensorflow.keras import backend as K +from tensorflow_estimator.python.estimator.export import export as export_helpers +from tensorflow.python.saved_model import builder as saved_model_builder +from tensorflow.python.saved_model import tag_constants, signature_constants +from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def import tensorflow as tf -import horovod.keras as hvd +import horovod.tensorflow.keras as hvd + # Horovod: initialize Horovod. hvd.init() @@ -53,8 +59,8 @@ print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) +y_train = tf.keras.utils.to_categorical(y_train, num_classes) +y_test = tf.keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), @@ -69,12 +75,12 @@ model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Horovod: adjust learning rate based on number of GPUs. -opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size()) +opt = tf.keras.optimizers.Adadelta(lr=1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) -model.compile(loss=keras.losses.categorical_crossentropy, +model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) @@ -96,12 +102,13 @@ callbacks = [ hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1), # Reduce the learning rate if training plateaues. - keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), + tf.keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), ] # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them. if hvd.rank() == 0: - callbacks.append(keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) + callbacks.append(tf.keras.callbacks.ModelCheckpoint( + './checkpoint-{epoch}.h5')) # Set up ImageDataGenerators to do data augmentation for the training images. train_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3, @@ -118,10 +125,45 @@ model.fit_generator(train_gen.flow(x_train, y_train, batch_size=batch_size), callbacks=callbacks, epochs=epochs, verbose=1, - validation_data=test_gen.flow(x_test, y_test, batch_size=batch_size), + validation_data=test_gen.flow( + x_test, y_test, batch_size=batch_size), validation_steps=3 * test_batches // hvd.size()) # Evaluate the model on the full data set. score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) + +# Save Model to Minio +if hvd.rank() == 0: + print('Model Summary') + model.summary() + print('Exporting trained model to Minio Model Repo') + base_path = os.environ['MODEL_BASE_PATH'] + + # Option 1(Preferred) - Using Keras api and Tensorflow v1.13 version + saved_model_path = tf.contrib.saved_model.save_keras_model(model, base_path) + print('Model Saved to {} Using new Keras API!!!'.format(saved_model_path)) + # Option 2 - Tensorflow v1.13+ Builder saved_model api. + # builder = saved_model_builder.SavedModelBuilder(base_path) + + # print(model.input) + # print(model.outputs) + + # signature = predict_signature_def(inputs={"inputs": model.input}, + # outputs={t.name:t for t in model.outputs}) + # print(signature) + # K.set_learning_phase(0) + # with K.get_session() as sess: + # builder.add_meta_graph_and_variables(sess=sess, + # tags=[tag_constants.SERVING], + # signature_def_map={'predict': signature}) + # builder.save() + # print('Model Saved to S3 Using Builder!!!') + + # Option 3 - Tensorflow v1.13 Will be deprecated in Tensorflow v2 + # tf.saved_model.simple_save( + # keras.backend.get_session(), + # export_path, + # inputs={'input_image': model.input}, + # outputs={t.name: t for t in model.outputs}) diff --git a/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml b/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml index 6ac31359..7030dd24 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml +++ b/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml @@ -3,6 +3,7 @@ ssh: useSecrets: true + port: 22 hostKey: |- -----BEGIN RSA PRIVATE KEY----- ThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKey @@ -42,3 +43,20 @@ master: pullPolicy: Never args: - "mpirun -np 3 --hostfile /horovod/generated/hostfile --mca orte_keep_fqdn_hostnames t --allow-run-as-root --display-map --tag-output --timestamp-output sh -c '/opt/conda/envs/tf_env/bin/python /opt/spark/examples/src/main/python/tensorflow/keras_mnist_advanced_modified.py'" + +## Model repository information (Minio) +minio: + existingSecret: "" + accessKey: "onapdaas" + secretKey: "onapsecretdaas" + environment: + AWS_REGION: "us-west-1" + S3_REGION: "us-west-1" + S3_ENDPOINT: "minio.edge1.svc.cluster.local:9000" + AWS_ENDPOINT_URL: "http://minio.edge1.svc.cluster.local:9000" + S3_USE_HTTPS: 0 + S3_VERIFY_SSL: 0 + AWS_LOG_LEVEL: 3 + TF_CPP_MIN_LOG_LEVEL: 3 + MODEL_NAME: "mnist" + MODEL_BASE_PATH: "s3://models/mnist/export/" diff --git a/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml b/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml index 4e59b277..da42ded8 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml +++ b/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml @@ -70,6 +70,20 @@ spec: value: "{{ $value }}" {{- end }} {{- end }} + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }} + key: accesskey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }} + key: secretkey + {{- range $key, $val := .Values.minio.environment }} + - name: {{ $key }} + value: {{ $val | quote }} + {{- end}} {{- if .Values.master.privileged }} securityContext: privileged: true diff --git a/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml b/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml new file mode 100644 index 00000000..c99abe67 --- /dev/null +++ b/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml @@ -0,0 +1,31 @@ +{{/* +# Copyright 2019 Intel Corporation, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/}} + +{{- if not .Values.minio.existingSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "horovod.fullname" . }}-minio + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +type: Opaque +data: + accesskey: {{ .Values.minio.accessKey | b64enc }} + secretkey: {{ .Values.minio.secretKey | b64enc }} +{{- end }} |