diff options
author | Rajamohan Raj <rajamohan.raj@intel.com> | 2019-04-20 00:54:45 +0000 |
---|---|---|
committer | Rajamohan Raj <rajamohan.raj@intel.com> | 2019-04-20 00:56:28 +0000 |
commit | 2fd13e2728c1b2e1c290ff2df32b6420d1f1c45f (patch) | |
tree | 24fa5890c1eb3d0308dedde987c813d4ab275409 /vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py | |
parent | aa92ee02641b8f24615736130fa2d7d63abd9934 (diff) |
Fixing some helm lint issues.
Change-Id: I6d62bcd10c60c422aaeb146078aee1b162838926
Issue-ID: ONAPARC-450
Signed-off-by: Rajamohan Raj <rajamohan.raj@intel.com>
Diffstat (limited to 'vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py')
-rw-r--r-- | vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py new file mode 100644 index 00000000..03425ff7 --- /dev/null +++ b/vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py @@ -0,0 +1,127 @@ +from __future__ import print_function +import keras +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten +from keras.layers import Conv2D, MaxPooling2D +from keras.preprocessing.image import ImageDataGenerator +from keras import backend as K +import tensorflow as tf +import horovod.keras as hvd + +# Horovod: initialize Horovod. +hvd.init() + +# Horovod: pin GPU to be used to process local rank (one GPU per process) +config = tf.ConfigProto() +#config.gpu_options.allow_growth = True +#config.gpu_options.visible_device_list = str(hvd.local_rank()) +K.set_session(tf.Session(config=config)) + +batch_size = 128 +num_classes = 10 + +# Enough epochs to demonstrate learning rate warmup and the reduction of +# learning rate when training plateaues. +epochs = 24 + +# Input image dimensions +img_rows, img_cols = 28, 28 + +# The data, shuffled and split between train and test sets +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +# Determine how many batches are there in train and test sets +train_batches = len(x_train) // batch_size +test_batches = len(x_test) // batch_size + +if K.image_data_format() == 'channels_first': + x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) + x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) +else: + x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) + x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) + input_shape = (img_rows, img_cols, 1) + +x_train = x_train.astype('float32') +x_test = x_test.astype('float32') +x_train /= 255 +x_test /= 255 +print('x_train shape:', x_train.shape) +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') + +# Convert class vectors to binary class matrices +y_train = keras.utils.to_categorical(y_train, num_classes) +y_test = keras.utils.to_categorical(y_test, num_classes) + +model = Sequential() +model.add(Conv2D(32, kernel_size=(3, 3), + activation='relu', + input_shape=input_shape)) +model.add(Conv2D(64, (3, 3), activation='relu')) +model.add(MaxPooling2D(pool_size=(2, 2))) +model.add(Dropout(0.25)) +model.add(Flatten()) +model.add(Dense(128, activation='relu')) +model.add(Dropout(0.5)) +model.add(Dense(num_classes, activation='softmax')) + +# Horovod: adjust learning rate based on number of GPUs. +opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size()) + +# Horovod: add Horovod Distributed Optimizer. +opt = hvd.DistributedOptimizer(opt) + +model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=opt, + metrics=['accuracy']) + +callbacks = [ + # Horovod: broadcast initial variable states from rank 0 to all other processes. + # This is necessary to ensure consistent initialization of all workers when + # training is started with random weights or restored from a checkpoint. + hvd.callbacks.BroadcastGlobalVariablesCallback(0), + + # Horovod: average metrics among workers at the end of every epoch. + # + # Note: This callback must be in the list before the ReduceLROnPlateau, + # TensorBoard or other metrics-based callbacks. + hvd.callbacks.MetricAverageCallback(), + + # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final + # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during + # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. + hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1), + + # Reduce the learning rate if training plateaues. + keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), +] + +# Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them. +if hvd.rank() == 0: + callbacks.append(keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) + +# Set up ImageDataGenerators to do data augmentation for the training images. +train_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3, + height_shift_range=0.08, zoom_range=0.08) +test_gen = ImageDataGenerator() + +# Train the model. +# Horovod: the training will randomly sample 1 / N batches of training data and +# 3 / N batches of validation data on every worker, where N is the number of workers. +# Over-sampling of validation data helps to increase probability that every validation +# example will be evaluated. +model.fit_generator(train_gen.flow(x_train, y_train, batch_size=batch_size), + steps_per_epoch=train_batches // hvd.size(), + callbacks=callbacks, + epochs=epochs, + verbose=1, + validation_data=test_gen.flow(x_test, y_test, batch_size=batch_size), + validation_steps=3 * test_batches // hvd.size()) + +# Evaluate the model on the full data set. +score = model.evaluate(x_test, y_test, verbose=0) +print('Test loss:', score[0]) +print('Test accuracy:', score[1]) |