diff options
Diffstat (limited to 'vnfs/DAaaS/applications/charts/sample-horovod-app/keras_mnist_advanced_modified.py')
-rw-r--r-- | vnfs/DAaaS/applications/charts/sample-horovod-app/keras_mnist_advanced_modified.py | 127 |
1 files changed, 0 insertions, 127 deletions
diff --git a/vnfs/DAaaS/applications/charts/sample-horovod-app/keras_mnist_advanced_modified.py b/vnfs/DAaaS/applications/charts/sample-horovod-app/keras_mnist_advanced_modified.py deleted file mode 100644 index 03425ff7..00000000 --- a/vnfs/DAaaS/applications/charts/sample-horovod-app/keras_mnist_advanced_modified.py +++ /dev/null @@ -1,127 +0,0 @@ -from __future__ import print_function -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K -import tensorflow as tf -import horovod.keras as hvd - -# Horovod: initialize Horovod. -hvd.init() - -# Horovod: pin GPU to be used to process local rank (one GPU per process) -config = tf.ConfigProto() -#config.gpu_options.allow_growth = True -#config.gpu_options.visible_device_list = str(hvd.local_rank()) -K.set_session(tf.Session(config=config)) - -batch_size = 128 -num_classes = 10 - -# Enough epochs to demonstrate learning rate warmup and the reduction of -# learning rate when training plateaues. -epochs = 24 - -# Input image dimensions -img_rows, img_cols = 28, 28 - -# The data, shuffled and split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -# Determine how many batches are there in train and test sets -train_batches = len(x_train) // batch_size -test_batches = len(x_test) // batch_size - -if K.image_data_format() == 'channels_first': - x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) - x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) -else: - x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) - x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -model = Sequential() -model.add(Conv2D(32, kernel_size=(3, 3), - activation='relu', - input_shape=input_shape)) -model.add(Conv2D(64, (3, 3), activation='relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) -model.add(Flatten()) -model.add(Dense(128, activation='relu')) -model.add(Dropout(0.5)) -model.add(Dense(num_classes, activation='softmax')) - -# Horovod: adjust learning rate based on number of GPUs. -opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size()) - -# Horovod: add Horovod Distributed Optimizer. -opt = hvd.DistributedOptimizer(opt) - -model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=opt, - metrics=['accuracy']) - -callbacks = [ - # Horovod: broadcast initial variable states from rank 0 to all other processes. - # This is necessary to ensure consistent initialization of all workers when - # training is started with random weights or restored from a checkpoint. - hvd.callbacks.BroadcastGlobalVariablesCallback(0), - - # Horovod: average metrics among workers at the end of every epoch. - # - # Note: This callback must be in the list before the ReduceLROnPlateau, - # TensorBoard or other metrics-based callbacks. - hvd.callbacks.MetricAverageCallback(), - - # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final - # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during - # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. - hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1), - - # Reduce the learning rate if training plateaues. - keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), -] - -# Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them. -if hvd.rank() == 0: - callbacks.append(keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5')) - -# Set up ImageDataGenerators to do data augmentation for the training images. -train_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3, - height_shift_range=0.08, zoom_range=0.08) -test_gen = ImageDataGenerator() - -# Train the model. -# Horovod: the training will randomly sample 1 / N batches of training data and -# 3 / N batches of validation data on every worker, where N is the number of workers. -# Over-sampling of validation data helps to increase probability that every validation -# example will be evaluated. -model.fit_generator(train_gen.flow(x_train, y_train, batch_size=batch_size), - steps_per_epoch=train_batches // hvd.size(), - callbacks=callbacks, - epochs=epochs, - verbose=1, - validation_data=test_gen.flow(x_test, y_test, batch_size=batch_size), - validation_steps=3 * test_batches // hvd.size()) - -# Evaluate the model on the full data set. -score = model.evaluate(x_test, y_test, verbose=0) -print('Test loss:', score[0]) -print('Test accuracy:', score[1]) |