| import pandas |
| import keras |
| from keras.layers import Dense, Dropout, BatchNormalization |
| import csv,math |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from sklearn import preprocessing |
| from sklearn.preprocessing import MinMaxScaler |
| from sklearn.metrics import mean_squared_error |
| |
| seed = 420 |
| np.random.seed(seed) |
| scaler = MinMaxScaler(feature_range=(0, 1)) #define normalization |
| |
| #model function |
| def base_model(): #create model |
| model = keras.Sequential() |
| model.add(Dense(256,activation='relu',input_shape=(1,4))) |
| model.add(Dense(128, activation='relu')) |
| model.add(Dense(64,activation='relu')) |
| model.add(Dense(32,activation='relu')) |
| model.add(Dense(16,activation='relu')) |
| model.add(Dense(8,activation='relu')) |
| model.add(Dense(4,activation='relu')) |
| model.add(Dense(1, activation='sigmoid')) |
| model.compile(loss='mean_squared_error',optimizer='adam') |
| return model |
| |
| |
| #data function |
| def matrix(): #extract the data from excel, and return, all data, train input, train output, test input, test output |
| results = [] #excel table |
| with open('input.csv','rt', encoding="ascii") as csvfile: #load excel file |
| reader = csv.reader(csvfile, delimiter=',', quotechar='|') |
| for row in reader: |
| results.append(row) |
| |
| table=[] #only necessary data |
| for row in range(1,len(results)): #load only data thats important |
| if(results[row][12]!=''): |
| table.append([results[row][4],results[row][5],results[row][7],results[row][8],results[row][12]]) |
| table=np.float64(table) #convert to numpy matrix |
| table=np.delete(table,25,axis=0) #delete one zero value |
| table = scaler.fit_transform(table) #normalize |
| |
| train_input,train_output = [],[] |
| precentage_of_train_data = 0.92 #precentage of all data that will be used for training |
| for row in range(0,int(len(table)*precentage_of_train_data)): #load train data |
| train_input.append([table[row][0],table[row][1],table[row][2],table[row][3]]) #input |
| train_output.append([table[row][4]]) #output |
| test_input, test_output = [], [] |
| for row in range(int(len(table)*precentage_of_train_data),len(table)): #load test data |
| test_input.append([table[row][0],table[row][1],table[row][2],table[row][3]]) #input |
| test_output.append([table[row][4]]) #output |
| |
| #convert to other shape |
| train_input=np.expand_dims(train_input,1) |
| train_output=np.expand_dims(train_output,1) |
| test_input=np.expand_dims(test_input,1) |
| test_output=np.expand_dims(test_output,1) |
| |
| return table, train_input, train_output, test_input, test_output |
| |
| model = base_model() |
| print(model.summary()) |
| table, tx, ty, dx, dy = matrix() |
| |
| model.fit(tx, ty, validation_data = (dx,dy), epochs=300, batch_size=12, verbose=2) |
| |
| #define predictions and true data |
| test_predict = model.predict(dx.copy()) #predictions for testing data |
| train_predict = model.predict(tx.copy()) #predictions for training data |
| #table[:,4] - true results |
| |
| #format predicted data |
| table_predicted = table.copy() #create new table for predicted data |
| for i in range(len(train_predict)): #fill up first rows with train data because thats how we splitted them |
| table_predicted[i,4]=train_predict[i] |
| for i in range(len(train_predict),len(table)-1): #and second with test data |
| table_predicted[i,4]=test_predict[i-len(train_predict)] |
| #inverse transform normalized data |
| table = scaler.inverse_transform(table.copy()) |
| table_predicted = scaler.inverse_transform(table_predicted.copy()) |
| |
| #calculate root mean squared error |
| score = math.sqrt(mean_squared_error(table_predicted[:,4],table[:,4])) |
| print('RMSE:') |
| print(score) |
| |
| #graph data |
| x=np.arange(len(table)).astype(float) #define x axis just for row number of result |
| plt.scatter(x,table[:,4], label = 'True values') #scatter true results |
| plt.scatter(x,table_predicted[:,4],alpha=0.6, marker=11, label='Predicted values') #scatter predicted results |
| plt.axis([0,len(table), #x axis |
| min([min(table[:,4]),min(table_predicted[:,4])])*1.1, #y axis min |
| max([max(table[:,4]),max(table_predicted[:,4])])*1.1]) #y axis max |
| plt.axvline(x=len(train_predict)-0.5,color='r',linestyle='--') #draws line to separate train prediction data and test prediction data |
| plt.text(len(train_predict), max([max(table[:,4]),max(table_predicted[:,4])])/2, |
| "Test data", rotation=90, verticalalignment='center') |
| plt.text(len(train_predict)-2, max([max(table[:,4]),max(table_predicted[:,4])])/2, |
| "Train data", rotation=90, verticalalignment='center') |
| #axis and legend |
| plt.xlabel('Result number') |
| plt.legend(loc='upper left') |
| plt.ylabel('Result value') |
| plt.show() |
| |