diff options
author | Dileep Ranganathan <dileep.ranganathan@intel.com> | 2019-05-20 11:29:01 -0700 |
---|---|---|
committer | Dileep Ranganathan <dileep.ranganathan@intel.com> | 2019-05-20 11:29:01 -0700 |
commit | 08d610ca498dc89997fd138d6b7fd4eb341c21ff (patch) | |
tree | b8bb7ded841080d912218277ac52dcb08cdc00f0 | |
parent | 592f615ffd3c7452d534b82e48c58535ecc4199c (diff) |
Save Tensorflow model to Minio repository
Export and save the tensorflow model using Keras API.
Added support for Minio model repo by injecting the credentials to
Horovod pods. This model then can be served using Tensorflow Serving.
Change-Id: Id1e0b6696bc2bb1699786b08651c4d3bc353976c
Issue-ID: ONAPARC-460
Signed-off-by: Dileep Ranganathan <dileep.ranganathan@intel.com>
5 files changed, 121 insertions, 15 deletions
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile b/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile index 8bdcf5b6..5b8f5636 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile +++ b/vnfs/DAaaS/applications/sample-horovod-app/Dockerfile @@ -121,7 +121,7 @@ COPY ${spark_jars} /opt/spark/jars COPY bin /opt/spark/bin COPY sbin /opt/spark/sbin COPY ${img_path}/spark/entrypoint.sh /opt/ -COPY examples /opt/spark/examples + COPY ${k8s_tests} /opt/spark/tests COPY data /opt/spark/data ENV SPARK_HOME /opt/spark @@ -135,6 +135,7 @@ ENV PATH /opt/conda/envs/tf_env/bin:$PATH RUN echo "export PATH=/opt/conda/envs/tf_env/bin:$PATH" >> ~/.bashrc # echo "activate tf_env\n" >> ~/.bashrc RUN pip install petastorm +COPY examples /opt/spark/examples WORKDIR /opt/spark/work-dir ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py index 03425ff7..fa39cb6a 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py +++ b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py @@ -1,13 +1,19 @@ from __future__ import print_function import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K +import os +from tensorflow.keras.datasets import mnist +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Dropout, Flatten +from tensorflow.keras.layers import Conv2D, MaxPooling2D +from tensorflow.keras.preprocessing.image import ImageDataGenerator +from tensorflow.keras import backend as K +from tensorflow_estimator.python.estimator.export import export as export_helpers +from tensorflow.python.saved_model import builder as saved_model_builder +from tensorflow.python.saved_model import tag_constants, signature_constants +from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def import tensorflow as tf -import horovod.keras as hvd +import horovod.tensorflow.keras as hvd + # Horovod: initialize Horovod. hvd.init() @@ -53,8 +59,8 @@ print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) +y_train = tf.keras.utils.to_categorical(y_train, num_classes) +y_test = tf.keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), @@ -69,12 +75,12 @@ model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Horovod: adjust learning rate based on number of GPUs. -opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size()) +opt = tf.keras.optimizers.Adadelta(lr=1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) -model.compile(loss=keras.losses.categorical_crossentropy, +model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) @@ -96,12 +102,13 @@ callbacks = [ hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1), # Reduce the learning rate if training plateaues. - keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), + tf.keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), ] # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them. if hvd.rank() == 0: - callbacks.append(keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) + callbacks.append(tf.keras.callbacks.ModelCheckpoint( + './checkpoint-{epoch}.h5')) # Set up ImageDataGenerators to do data augmentation for the training images. train_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3, @@ -118,10 +125,45 @@ model.fit_generator(train_gen.flow(x_train, y_train, batch_size=batch_size), callbacks=callbacks, epochs=epochs, verbose=1, - validation_data=test_gen.flow(x_test, y_test, batch_size=batch_size), + validation_data=test_gen.flow( + x_test, y_test, batch_size=batch_size), validation_steps=3 * test_batches // hvd.size()) # Evaluate the model on the full data set. score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) + +# Save Model to Minio +if hvd.rank() == 0: + print('Model Summary') + model.summary() + print('Exporting trained model to Minio Model Repo') + base_path = os.environ['MODEL_BASE_PATH'] + + # Option 1(Preferred) - Using Keras api and Tensorflow v1.13 version + saved_model_path = tf.contrib.saved_model.save_keras_model(model, base_path) + print('Model Saved to {} Using new Keras API!!!'.format(saved_model_path)) + # Option 2 - Tensorflow v1.13+ Builder saved_model api. + # builder = saved_model_builder.SavedModelBuilder(base_path) + + # print(model.input) + # print(model.outputs) + + # signature = predict_signature_def(inputs={"inputs": model.input}, + # outputs={t.name:t for t in model.outputs}) + # print(signature) + # K.set_learning_phase(0) + # with K.get_session() as sess: + # builder.add_meta_graph_and_variables(sess=sess, + # tags=[tag_constants.SERVING], + # signature_def_map={'predict': signature}) + # builder.save() + # print('Model Saved to S3 Using Builder!!!') + + # Option 3 - Tensorflow v1.13 Will be deprecated in Tensorflow v2 + # tf.saved_model.simple_save( + # keras.backend.get_session(), + # export_path, + # inputs={'input_image': model.input}, + # outputs={t.name: t for t in model.outputs}) diff --git a/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml b/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml index 6ac31359..7030dd24 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml +++ b/vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml @@ -3,6 +3,7 @@ ssh: useSecrets: true + port: 22 hostKey: |- -----BEGIN RSA PRIVATE KEY----- ThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKey @@ -42,3 +43,20 @@ master: pullPolicy: Never args: - "mpirun -np 3 --hostfile /horovod/generated/hostfile --mca orte_keep_fqdn_hostnames t --allow-run-as-root --display-map --tag-output --timestamp-output sh -c '/opt/conda/envs/tf_env/bin/python /opt/spark/examples/src/main/python/tensorflow/keras_mnist_advanced_modified.py'" + +## Model repository information (Minio) +minio: + existingSecret: "" + accessKey: "onapdaas" + secretKey: "onapsecretdaas" + environment: + AWS_REGION: "us-west-1" + S3_REGION: "us-west-1" + S3_ENDPOINT: "minio.edge1.svc.cluster.local:9000" + AWS_ENDPOINT_URL: "http://minio.edge1.svc.cluster.local:9000" + S3_USE_HTTPS: 0 + S3_VERIFY_SSL: 0 + AWS_LOG_LEVEL: 3 + TF_CPP_MIN_LOG_LEVEL: 3 + MODEL_NAME: "mnist" + MODEL_BASE_PATH: "s3://models/mnist/export/" diff --git a/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml b/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml index 4e59b277..da42ded8 100644 --- a/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml +++ b/vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml @@ -70,6 +70,20 @@ spec: value: "{{ $value }}" {{- end }} {{- end }} + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }} + key: accesskey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }} + key: secretkey + {{- range $key, $val := .Values.minio.environment }} + - name: {{ $key }} + value: {{ $val | quote }} + {{- end}} {{- if .Values.master.privileged }} securityContext: privileged: true diff --git a/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml b/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml new file mode 100644 index 00000000..c99abe67 --- /dev/null +++ b/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml @@ -0,0 +1,31 @@ +{{/* +# Copyright 2019 Intel Corporation, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/}} + +{{- if not .Values.minio.existingSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "horovod.fullname" . }}-minio + labels: + app: {{ template "horovod.name" . }} + chart: {{ template "horovod.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +type: Opaque +data: + accesskey: {{ .Values.minio.accessKey | b64enc }} + secretkey: {{ .Values.minio.secretKey | b64enc }} +{{- end }} |