import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data, wb
import numpy as np
import matplotlib.pyplot as plt
import datetime
%matplotlib inline
import plotly
import cufflinks as cf
cf.go_offline()
start = datetime.date(year=2006, month=1, day=1)
end = datetime.date(year=2016, month=1, day=1)
BAC = data.DataReader(name="BAC", data_source='robinhood', start=start, end=end)
C = data.DataReader(name="C", data_source='robinhood', start=start, end=end)
GS = data.DataReader(name="GS", data_source='robinhood', start=start, end=end)
JPM = data.DataReader(name="JPM", data_source='robinhood', start=start, end=end)
MS = data.DataReader(name="MS", data_source='robinhood', start=start, end=end)
WFC = data.DataReader(name="WFC", data_source='robinhood', start=start, end=end)
BAC = BAC.xs(key='BAC', axis=0, drop_level=True)
C = C.xs(key='C', axis=0, drop_level=True)
GS = GS.xs(key='GS', axis=0, drop_level=True)
JPM = JPM.xs(key='JPM', axis=0, drop_level=True)
MS = MS.xs(key='MS', axis=0, drop_level=True)
WFC = WFC.xs(key='WFC', axis=0, drop_level=True)
tickers = ['BAC','C','GS','JPM','MS','WFC']
bank_stocks = pd.read_pickle('all_banks')
bank_stocks.head()
returns = pd.DataFrame()
for tick in tickers:
returns[tick + ' Returns'] = bank_stocks[tick]['Close'].pct_change()
returns.head()
returns.describe()
bank_stocks.xs(key='Close',axis=1,level='Stock Info').iplot()
close_corr = bank_stocks.xs(key='Close',axis=1,level='Stock Info').corr()
close_corr.iplot(kind='heatmap',colorscale='rdylbu')
bank_stocks.loc['2015-01-01':'2016-01-01']['JPM'][['Open','High', 'Low', 'Close']].iplot(kind='candle')
bank_stocks.loc['2015-01-01':'2016-01-01']['JPM']['Close'].ta_plot(study='sma')
bank_stocks.loc['2015-01-01':'2016-01-01']['JPM']['Close'].ta_plot(study='boll', color='red')
import plotly.plotly as py
import plotly.figure_factory as ff
returns[returns.index.year == 2015]['JPM Returns'].dropna().iplot(kind='histogram')
returns[returns.index.year == 2015].iplot(kind='box')
y = returns['JPM Returns']
X = bank_stocks['JPM'][['Open','Close','High','Low','Volume']]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train,y_train)
predictions = lm.predict(X_test)
import plotly.graph_objs as go
test_vs_predicted = pd.DataFrame({'Y Test':y_test, 'Predicted Y':predictions})
test_vs_predicted.iplot(kind='scatter', x='Y Test', y='Predicted Y', mode='markers', size=8, title='JPM Predicted Returns vs Actual Returns', xTitle='JPM Actual Returns', yTitle='JPM Predicted Returns')
from sklearn import metrics
test_vs_predicted = test_vs_predicted.dropna()
print('MAE:', metrics.mean_absolute_error(test_vs_predicted['Y Test'], test_vs_predicted['Predicted Y']))
print('MSE:', metrics.mean_squared_error(test_vs_predicted['Y Test'], test_vs_predicted['Predicted Y']))
print('RMSE:', np.sqrt(metrics.mean_squared_error(test_vs_predicted['Y Test'], test_vs_predicted['Predicted Y'])))
coeffecients = pd.DataFrame(lm.coef_,X.columns)
coeffecients.columns = ['Coeffecient']
coeffecients