Data_Invaders - Space Apps Challenge

Data_Invaders| Chasers of the Lost Data

Team Updates

	import pandas
	import keras
	from keras.layers import Dense, Dropout, BatchNormalization
	import csv,math
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn import preprocessing
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.metrics import mean_squared_error

	seed = 420
	np.random.seed(seed)
	scaler = MinMaxScaler(feature_range=(0, 1)) #define normalization

	#model function
	def base_model(): #create model
	model = keras.Sequential()
	model.add(Dense(256,activation='relu',input_shape=(1,4)))
	model.add(Dense(128, activation='relu'))
	model.add(Dense(64,activation='relu'))
	model.add(Dense(32,activation='relu'))
	model.add(Dense(16,activation='relu'))
	model.add(Dense(8,activation='relu'))
	model.add(Dense(4,activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	model.compile(loss='mean_squared_error',optimizer='adam')
	return model


	#data function
	def matrix(): #extract the data from excel, and return, all data, train input, train output, test input, test output
	results = [] #excel table
	with open('input.csv','rt', encoding="ascii") as csvfile: #load excel file
	reader = csv.reader(csvfile, delimiter=',', quotechar='\|')
	for row in reader:
	results.append(row)

	table=[] #only necessary data
	for row in range(1,len(results)): #load only data thats important
	if(results[row][12]!=''):
	table.append([results[row][4],results[row][5],results[row][7],results[row][8],results[row][12]])
	table=np.float64(table) #convert to numpy matrix
	table=np.delete(table,25,axis=0) #delete one zero value
	table = scaler.fit_transform(table) #normalize

	train_input,train_output = [],[]
	precentage_of_train_data = 0.92 #precentage of all data that will be used for training
	for row in range(0,int(len(table)*precentage_of_train_data)): #load train data
	train_input.append([table[row][0],table[row][1],table[row][2],table[row][3]]) #input
	train_output.append([table[row][4]]) #output
	test_input, test_output = [], []
	for row in range(int(len(table)*precentage_of_train_data),len(table)): #load test data
	test_input.append([table[row][0],table[row][1],table[row][2],table[row][3]]) #input
	test_output.append([table[row][4]]) #output

	#convert to other shape
	train_input=np.expand_dims(train_input,1)
	train_output=np.expand_dims(train_output,1)
	test_input=np.expand_dims(test_input,1)
	test_output=np.expand_dims(test_output,1)

	return table, train_input, train_output, test_input, test_output

	model = base_model()
	print(model.summary())
	table, tx, ty, dx, dy = matrix()

	model.fit(tx, ty, validation_data = (dx,dy), epochs=300, batch_size=12, verbose=2)

	#define predictions and true data
	test_predict = model.predict(dx.copy()) #predictions for testing data
	train_predict = model.predict(tx.copy()) #predictions for training data
	#table[:,4] - true results

	#format predicted data
	table_predicted = table.copy() #create new table for predicted data
	for i in range(len(train_predict)): #fill up first rows with train data because thats how we splitted them
	table_predicted[i,4]=train_predict[i]
	for i in range(len(train_predict),len(table)-1): #and second with test data
	table_predicted[i,4]=test_predict[i-len(train_predict)]
	#inverse transform normalized data
	table = scaler.inverse_transform(table.copy())
	table_predicted = scaler.inverse_transform(table_predicted.copy())

	#calculate root mean squared error
	score = math.sqrt(mean_squared_error(table_predicted[:,4],table[:,4]))
	print('RMSE:')
	print(score)

	#graph data
	x=np.arange(len(table)).astype(float) #define x axis just for row number of result
	plt.scatter(x,table[:,4], label = 'True values') #scatter true results
	plt.scatter(x,table_predicted[:,4],alpha=0.6, marker=11, label='Predicted values') #scatter predicted results
	plt.axis([0,len(table), #x axis
	min([min(table[:,4]),min(table_predicted[:,4])])*1.1, #y axis min
	max([max(table[:,4]),max(table_predicted[:,4])])*1.1]) #y axis max
	plt.axvline(x=len(train_predict)-0.5,color='r',linestyle='--') #draws line to separate train prediction data and test prediction data
	plt.text(len(train_predict), max([max(table[:,4]),max(table_predicted[:,4])])/2,
	"Test data", rotation=90, verticalalignment='center')
	plt.text(len(train_predict)-2, max([max(table[:,4]),max(table_predicted[:,4])])/2,
	"Train data", rotation=90, verticalalignment='center')
	#axis and legend
	plt.xlabel('Result number')
	plt.legend(loc='upper left')
	plt.ylabel('Result value')
	plt.show()

view raw data_invaders hosted with ❤ by GitHub

Mihajlo Trajković

New poster

Mihajlo Trajković

Working

Mihajlo Trajković

We are on!!

Mihajlo Trajković