Trill Science Ministry - Space Apps Challenge

Trill Science Ministry| Chasers of the Lost Data

Huntsville, AL

Team Updates

Sarah Price

ML Testing output

Ran Crump

Doing some neat visualization for our Chasers of the lost ark

Ran Crump

	# -- coding: utf-8 --
	"""TrillApp.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1b0qU_SkDafVEvhYmLhRgYzO_B-cu3KwQ
	"""

	# Tasks!
	## Look at the Data
	## Graph some portion of it
	## Decide what portions we want to predict
	## Research what kinds prediction options we have.
	## Pick two and see which one seems to have the best results immediately.
	## Work on perfecting the solution
	src='https://data.nasa.gov/api/views/mc52-syum/rows.csv?accessType=DOWNLOAD'

	# Space Apps Challenge
	# Lost Data Chasers
	importpandasaspd
	importnumpyasnp

	# Read data into variable
	data=pd.read_csv(src)

	cleanData=data.dropna()

	print(data.columns)

	withpd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
	print(data)

	importrequests
	response=requests.get('https://ssd-api.jpl.nasa.gov/fireball.api?limit=2000&vel-comp=true')
	# from pandas.io.json import json_normalize
	# print(response.json()['data'])
	x=response.json()
	print(x.keys())
	df=pd.DataFrame(x['data'], dtype=float)
	# df = pd.io.json.json_normalize(response.json)
	# df = pd.read_json()
	df.columns=x['fields']
	# print(df)
	print(x['fields'])
	print(x['count'])
	print(x['signature'])
	withpd.option_context('display.max_rows', None, 'display.max_columns', None):
	print(df[:40]) #.dropna()

	df_na_free=df.dropna()
	print(df_na_free[:6]['vel'].to_list())
	df_na_free['vel'] =pd.to_numeric(df_na_free['vel'])
	df_na_free['impact-e'] =pd.to_numeric(df_na_free['impact-e'])
	df_na_free.plot(x='impact-e',y='vel',kind='scatter')

	# 3D Scatter
	importmatplotlib.pyplotasplt
	frommpl_toolkits.mplot3dimportAxes3D
	fig=plt.figure()
	ax=Axes3D(fig)

	cleanDF=df.dropna()


	df_clean=df.dropna()

	ax.scatter(df_clean['vx'], df_clean['vy'], df_clean['vz'])
	ax.set_ylabel('Datapoints: '+str(df_clean.shape[0]))
	plt.show()

	df_print_all=df.dropna()
	print(df_na_free[:6]['vel'].to_list())
	df_na_free.plot(x='impact-e',y='vel',kind='scatter')

	"""# Machine Learning"""

	fromsklearn.datasetsimportmake_regression
	fromsklearn.linear_modelimportLinearRegression

	temp=df.copy()
	temp.drop(['date'], axis=1, inplace=True)

	temp.loc[df['lat-dir'] =='S', 'lat-dir'] =-1
	temp.loc[df['lat-dir'] =='N', 'lat-dir'] =1
	temp.loc[df['lon-dir'] =='E', 'lon-dir'] =1
	temp.loc[df['lon-dir'] =='W', 'lon-dir'] =-1

	#X,y = make_regression(n_samples=len(df.dropna()), n_features=9, n_informative=3, n_targets=9, tail_strength=0.5, noise=0.02, shuffle=False, coef=False, random_state=0)

	X=temp
	y=temp

	icols=temp.columns
	jcols=icols
	ML=pd.concat([pd.DataFrame(X, columns=icols), pd.DataFrame(y, columns=jcols)], axis=1)

	ML.head()

	fromsklearn.ensembleimportRandomForestRegressor
	fromsklearn.multioutputimportMultiOutputRegressor
	fromsklearn.model_selectionimporttrain_test_split
	importmatplotlib.pyplotasplt
	df_notnans=ML.dropna()

	fromwarningsimportsimplefilter
	simplefilter(action='ignore', category=FutureWarning)

	X_train, X_test, y_train, y_test=train_test_split(df_notnans[icols], df_notnans[jcols], train_size=0.81, random_state=4)

	max_depth=30
	regr_multirf=MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth,
	random_state=0))

	regr_multirf.fit(X_train, y_train)
	regr_rf=RandomForestRegressor(max_depth=max_depth, random_state=2)
	regr_rf.fit(X_train, y_train)

	# Predict on new data
	y_multirf=regr_multirf.predict(X_test)
	y_rf=regr_rf.predict(X_test)

	# Check the prediction score
	scores=regr_multirf.score(X_test, y_test)
	print("The prediction score on the test data is {:.2f}%".format(scores*100))

	plt.figure()
	s=50
	a=0.4
	plt.scatter(y_test.iloc[:, 0], y_test.iloc[:, 1],
	c="navy", s=s, marker="s", alpha=a, label="Data")
	plt.scatter(y_multirf[:, 0], y_multirf[:, 1],
	c="cornflowerblue", s=s, alpha=a,
	label="Multi RF score=%.2f"%regr_multirf.score(X_test, y_test))
	plt.scatter(y_rf[:, 0], y_rf[:, 1],
	c="c", s=s, marker="^", alpha=a,
	label="RF score=%.2f"%regr_rf.score(X_test, y_test))
	plt.xlim([-6, 6])
	plt.ylim([-6, 6])

	plt.xlabel("X Testing", color='white')
	plt.ylabel("Y Testing", color='white')
	plt.title("Comparing random forests and the multi-output meta estimator")
	plt.legend()
	plt.show()

view raw trillapp.py hosted with ❤ by GitHub

Ran Crump