from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import poly1d
import seaborn as sns
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
# import plotly
# import cufflinks as cf
# cf.go_offline()
symbol_list = ['IBM', 'INTC', 'GE', 'MSFT', 'CSCO', 'AAPL', 'GOOG', 'AMZN']
securities_panel = get_pricing(symbol_list, fields=['price']
, start_date='2013-01-01', end_date='2014-01-01')
securities_panel.minor_axis = map(lambda x: x.symbol, securities_panel.minor_axis)
scores, pvalues, pairs = find_cointegrated_pairs(securities_panel)
sns.heatmap(pvalues, xticklabels=symbol_list, yticklabels=symbol_list, cmap='RdYlGn_r'
, mask = (pvalues >= 0.05)
)
print pairs
symbol_list = ['MSFT', 'INTC']
prices = get_pricing(symbol_list, fields=['price']
, start_date='2013-01-01', end_date='2014-01-01')['price']
prices.columns = map(lambda x: x.symbol, prices.columns)
X1 = prices[symbol_list[0]]
X2 = prices[symbol_list[1]]
plt.plot(X1.index, X1.values)
plt.plot(X1.index, X2.values)
plt.xlabel('Time')
plt.ylabel('Series Value')
plt.legend([X1.name, X2.name]);
bt = get_backtest('5bb1966437e5f1426a9febce')
bt.create_returns_tear_sheet()
# Get pricing data for 2014 to 2018
start = '2014-01-01'
end = '2018-01-01'
pricing = get_pricing(['MSFT','INTC'], fields='price', start_date=start, end_date=end)
x = pricing.iloc[:, 0] - pricing.iloc[:, 1]
# Create Kalman filter by sprecifying input matrices
kf = KalmanFilter(transition_matrices = [1],
observation_matrices = [1],
initial_state_mean = 0,
initial_state_covariance = 1,
observation_covariance=1,
transition_covariance=.01)
# Use Kalman Filter on historical prices to get estimate for rolling average
state_means, _ = kf.filter(x.values)
state_means = pd.Series(state_means.flatten(), index=x.index)
# Moving Averages
mean30 = x.rolling(window = 30).mean()
mean60 = x.rolling(window = 60).mean()
mean90 = x.rolling(window = 90).mean()
plt.plot(state_means)
plt.plot(x)
plt.plot(mean30)
plt.plot(mean60)
plt.plot(mean90)
plt.title('Kalman filter estimate of average')
plt.legend(['Kalman Estimate', 'X', '30-day Moving Average', '60-day Moving Average','90-day Moving Average'])
plt.xlabel('Day')
plt.ylabel('Price');
bt = get_backtest('5bb195d2096b314238402073')
bt.create_returns_tear_sheet()
X1 = sm.add_constant(X1)
results = sm.OLS(X2, X1).fit()
# Get rid of the constant column
X1 = X1[symbol_list[0]]
# results.params
b = results.params[symbol_list[0]]
Z = X2 - b * X1
Z.name = 'Spread'
plt.plot(Z.index, Z.values)
plt.xlabel('Time')
plt.ylabel('Series Value')
plt.legend([Z.name]);
check_for_stationarity(Z);