diff options
Diffstat (limited to 'components/ml-prediction-ms/train')
-rwxr-xr-x | components/ml-prediction-ms/train/ExampleSample_train_data_s1.xlsx | bin | 0 -> 3773850 bytes | |||
-rwxr-xr-x | components/ml-prediction-ms/train/ExampleSample_train_data_s1_smaller.xlsx | bin | 0 -> 5921 bytes | |||
-rwxr-xr-x | components/ml-prediction-ms/train/ParseTrain.py | 336 |
3 files changed, 336 insertions, 0 deletions
diff --git a/components/ml-prediction-ms/train/ExampleSample_train_data_s1.xlsx b/components/ml-prediction-ms/train/ExampleSample_train_data_s1.xlsx Binary files differnew file mode 100755 index 00000000..ffac8c80 --- /dev/null +++ b/components/ml-prediction-ms/train/ExampleSample_train_data_s1.xlsx diff --git a/components/ml-prediction-ms/train/ExampleSample_train_data_s1_smaller.xlsx b/components/ml-prediction-ms/train/ExampleSample_train_data_s1_smaller.xlsx Binary files differnew file mode 100755 index 00000000..2ca30a17 --- /dev/null +++ b/components/ml-prediction-ms/train/ExampleSample_train_data_s1_smaller.xlsx diff --git a/components/ml-prediction-ms/train/ParseTrain.py b/components/ml-prediction-ms/train/ParseTrain.py new file mode 100755 index 00000000..d3aa321c --- /dev/null +++ b/components/ml-prediction-ms/train/ParseTrain.py @@ -0,0 +1,336 @@ +#!/usr/bin/env python3 +# LICENSE_START======================================================= +# ml-prediction-ms +# ================================================================================ +# Copyright (C) 2023 Wipro Limited +# ================================================================================ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============LICENSE_END========================================================= + + + + +import json +import time +import requests +import logging +import numpy as np +import pandas as pd +import configparser + +from requests.auth import HTTPBasicAuth +from math import sqrt +from numpy import concatenate +from matplotlib import pyplot +from pandas import DataFrame, read_csv, read_excel +from pandas import concat +from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import LabelEncoder +from sklearn.metrics import mean_squared_error +from keras.layers import Dense, LSTM +from keras.models import Sequential +from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau + + +# Create and configure logger +logging.basicConfig(filename="ml-prediction-ms-Training.log", + format='%(asctime)s %(message)s', + filemode='w') +# Creating an object +logger = logging.getLogger() + + +config = configparser.ConfigParser() +config.readfp(open(r'ml-prediction-ms.config')) + +pathToStartPMData = config.get('PM DATA TOPICS', 'PathToStartPMData') +pathToStopPMData = config.get('PM DATA TOPICS', 'PathToStopPMData') +pathToGetData = config.get('PM DATA TOPICS', 'PathToGetData') + +pathToGetConfigData = config.get('CONFIG DATA TOPICS', 'PathToGetConfigData') + +def Parser(cells_data_list,data_dic,features, slice_name): + + """ + Perform Data Parser + READ THE ACTUAL PARAMETERS FROM THE topic MESSAGE AND ADDS IT INTO A DICTIONARY + + Args: + cells_data_list: Cell data list object + data_dic: The Parsed data on cell data contained in dictionary + features: Data featurs + slice_name: Slice Name + + Returns: + data_dic: none + + Raises: + RuntimeError: Error while Process Slices Cells Data. + """ + try: + + for cells_data in cells_data_list: + cell_id =cells_data['measObjInstId'] + response = requests.get(pathToGetConfigData + cell_id + '/snssai/'+ slice_name) + config_data=response.json()['maxNumberOfConns'] + print(config_data) + results= cells_data['measResults'] + + for result in results: + p=int(result['p']) + value=int(result['sValue']) + key = cell_id +'_'+features[p-1].split('-')[0] + if key not in data_dic: + data_dic[key]=[value] + else: + data_dic[key].append(value) + + for j in range(3,5): + key = cell_id +features[j] + if key not in data_dic: + data_dic[key]=[config_data] + elif j==3: + data_dic[key].append(config_data) + elif j==4: + change = ( + data_dic[cell_id + "_SM.PDUSessionSetupFail.0"][-1] + / data_dic[cell_id + "_SM.PDUSessionSetupReq.01"][-1] + - data_dic[cell_id + "_SM.PDUSessionSetupFail.0"][-2] + / data_dic[cell_id + "_SM.PDUSessionSetupReq.01"][-2] + ) + data_dic[key].append(change*config_data+config_data) + except Exception as e: + logger.error("Error in Parser Slices Cells Data:\n%s" % e) + + return data_dic + + + + +def GetSlicesCellsData(): + """ + Start the data simulation topic for configured slices and cells + The process waits for the data to be simulated. Waits for 2 hour period. + The topic to stop the data simulation are called. + The process waits for the data to be simulated. Waits for 2 hour period. + + Args: + none: none. + + Returns: + none: none + + Raises: + RuntimeError: if Topics cannot be retrived. + """ + + try: + #Start topic to generated data + response = requests.post(pathToStartPMData, verify=False ) + print(response) + except Exception as e: + logger.error("Error while Start topic to generated data:\n%s" % e) + + + #Wait for 2 hrs for data generation + time.sleep(3600*2) + + try: + #stop topic to generated data + response = requests.post(pathToStopPMData, verify=False) + print(response) + except Exception as e: + logger.error("Error while Stop topic on Data Generation:\n%s" % e) + + time.sleep(10) + +def ProcessSlicesCellsData(): + """ + Start Process Slices Cells Data + + Args: + none: none. + + Returns: + none: none + + Raises: + RuntimeError: Error while Process Slices Cells Data. + """ + + data_dic={} # Final data will be stored here + + try: + + #infinite loop, looking for input messages from dmaap, it will run till the topic returns a null and then create csv + while True: + response = requests.get(pathToGetData, verify=False) + data = response.json() + print(data) + + if data==[]: + break + + for datum in data: + sub_data=json.loads(datum) + sub_data = sub_data['event']['perf3gppFields']['measDataCollection']['measInfoList'][0] + features=sub_data['measTypes']['sMeasTypesList'] + features.extend(['_maxNumberOfConns.configured', '_maxNumberOfConns.predicted']) + cells_data_list= sub_data['measValuesList'] + slice_name = features[0].split('.')[2] + data_dic = Parser(cells_data_list, data_dic, features, slice_name) #calling the parser method + + time.sleep(1) + + data_df=pd.DataFrame(data_dic) + + #data_df.to_excel('PM_train_data.xlsx', index=0) + #df=pd.read_excel('Final_train_data_s1.xlsx',engine='openpyxl') + + data_df.columns = [i.split('_')[1] for i in data_df.columns] + print(data_df.columns) + + #To append all the cells one after the other for training + for x in range(0,len(data_df.columns),5): + temp_df = data_df.iloc[:,x:x+5] + if x==0: + result= pd.DataFrame(columns=temp_df.columns) + result = result.append(temp_df, ignore_index=True) + + test_data= result.iloc[int(0.9*len(result)):,:] + train_data=result.iloc[:int(0.9*len(result)),:] + train_data.to_csv('Train_Data.csv', index=0) + test_data.to_csv('Test_Data.csv',index=0) + except Exception as e: + logger.error("Error while Process Slices Cells Data:\n%s" % e) + + + +def series_to_supervised(data, n_in=1, n_out=1, dropnan=True): + """ + Convert the timeseries into Forecast series + + Args: + n_in: Input window size for time series to carry previous nth time instance value. + n_out: output future n time instance value to be forecasted against the timeseries + Returns: + agg (list): Aggregated list of past time series inputs as per the input window and the time series of the predicted future time instance. + + Raises: + RuntimeError: Error Pre Processing Slices Cells Data. + """ + + try: + n_vars = 1 if type(data) is list else data.shape[1] + df = DataFrame(data) + cols, names = [],[] + + # input sequence (t-n, ... t-1) + for i in range(n_in, 0, -1): + cols.append(df.shift(i)) + names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)] + + # forecast sequence (t, t+1, ... t+n) + for i in range(0, n_out): + cols.append(df.shift(-i)) + if i == 0: + names += [('var%d(t)' % (j+1)) for j in range(n_vars)] + else: + names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)] + + # put it all together + agg = concat(cols, axis=1) + agg.columns = names + + # drop rows with NaN values + if dropnan: + agg.dropna(inplace=True) + except Exception as e: + logger.error("Error Pre Processing Slices Cells Data f:\n%s" % e) + + return agg + +def Train(): + """ + Train the data against the Machine learning model. + + Args: + none: none. + + Returns: + none: none + + Raises: + RuntimeError: if AAI Network Function data cannot be retrieved. + """ + try: + + #Training data will be read here and will be concatenated here to form 5 columns only. + #(Will depend on how they will provide training data) + file_name= "Train_Data.csv" + value = pd.read_csv(file_name) + print(value) + + scaler = MinMaxScaler(feature_range=(-1, 1)) + scaled = scaler.fit_transform(value) + window_size=4 + reframed = series_to_supervised(scaled,window_size, 1) + value=reframed.values + + + train = value[:int(0.75*len(value)), :] #set the range of train, validation and test data + val= value[int(0.75*len(value)):int(0.90*len(value)),:] + test = value[int(0.90*len(value)):, :] + columns=scaled.shape[1] + + #Divide the data into input and output for the model to learn + train_X, train_y = train[:, :window_size*scaled.shape[1] + scaled.shape[1]-1], train[:, -1] + val_X, val_y = val[:,:window_size*scaled.shape[1] + scaled.shape[1]-1], val[:,-1] + test_X, test_y = test[:, :window_size*scaled.shape[1] + scaled.shape[1]-1], test[:, -1] + + #Reshaping the data to feed lstm model + train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1])) + val_X = val_X.reshape((val_X.shape[0], 1, val_X.shape[1])) + test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1])) + + print(val_X.shape,val_y.shape, test_y.shape) + #Defining the model block here + model = Sequential() + model.add(LSTM(20, input_shape=(train_X.shape[1], train_X.shape[2]))) + model.add(Dense(1)) + + checkpoint = ModelCheckpoint('best_model.h5', verbose=1, monitor='val_loss',save_best_only=True, mode='auto') + model.compile(optimizer='adam', loss='mean_squared_error') + + history = model.fit(train_X, train_y, epochs=250, batch_size=512, callbacks=[checkpoint], validation_data=(val_X, val_y), verbose=2, shuffle=False) #epochs>2k + # plot history + best_model.save("best_model.h5") + #print(history.history.keys()) + pyplot.plot(history.history['loss'], label='train') + pyplot.plot(history.history['val_loss'], label='test') + pyplot.legend() + pyplot.show() + + + except Exception as e: + logger.error("Error during ML Training Process:\n%s" % e) + +if __name__ == "__main__": + try: + logger.info(": starting to get the Simulated slices cells data") + GetSlicesCellsData() + ProcessSlicesCellsData() + Train() + except Exception as e: + logger.error("Error while starting to get data for ML Training Process:\n%s" % e) + |