# Import a Kalman filter and other useful libraries
from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import poly1d
import seaborn as sns
Effect of prior
# Load pricing data
start = '2012-01-01'
end = '2015-01-01'
spy = get_pricing('SPY', fields='price', start_date=start, end_date=end)
amzn = get_pricing('AMZN', fields='price', start_date=start, end_date=end)
def do_plots(x_data,y_data, prior_mean, prior_covariance):
delta = 1e-3
trans_cov = delta / (1 - delta) * np.eye(2) # How much random walk wiggles
obs_mat = np.expand_dims(np.vstack([[x_data], [np.ones(len(x_data))]]).T, axis=1)
kf = KalmanFilter(n_dim_obs=1, n_dim_state=2, # y is 1-dimensional, (alpha, beta) is 2-dimensional
initial_state_mean=prior_mean,
initial_state_covariance=prior_covariance,
transition_matrices=np.eye(2),
observation_matrices=obs_mat,
observation_covariance=0,
transition_covariance=trans_cov)
# Use the observations y to get running estimates and errors for the state parameters
state_means, state_covs = kf.filter(y_data.values)
_, axarr = plt.subplots(2, sharex=True)
axarr[0].plot(x.index, state_means[:,0], label='slope')
axarr[0].legend()
axarr[1].plot(x.index, state_means[:,1], label='intercept')
axarr[1].legend()
plt.tight_layout();
xx, yy = np.random.multivariate_normal(prior_mean, prior_covariance, 5000).T
sns.jointplot(xx, yy, kind="kde")
print "final state %f = %f * %f + %f"%(y_data[-1],state_means[-1][0], x_data[-1], state_means[-1][1])
Test from the original notebook. One thing I noticed is how close the intercept and slope values are.
do_plots(spy,amzn, [0,0], np.ones((2,2)))
Use a less diagonal prior with l
do_plots(spy,amzn, [0,0], np.eye(2)*1e3)
OLS regression on the last 30 values. Slope and intercept is close to test 2.
window_size = 30
import statsmodels.api as sm
X = sm.add_constant(spy.values[-window_size:])
model = sm.OLS(amzn[-window_size:].values,X,formula="A ~ B + C")
results = model.fit()
results.params
regression over last 30 values, no intercept. Slope is close to test 1.
window_size = 30
import statsmodels.api as sm
model = sm.OLS(amzn[-window_size:].values,spy.values[-window_size:],formula="A ~ B + C")
results = model.fit()
results.params
Intial test with y=x and covariance matrix of \begin{pmatrix} 1 & 1 \ 1 & 1 \ \end{pmatrix}
do_plots(spy,spy, [0,0], np.ones((2,2)))
Test with y=x and covariance matrix of \begin{pmatrix} 1 & 0 \ 0 & 1 \ \end{pmatrix}
do_plots(spy,spy, [0,0], np.eye(2))
Less confidence in prior mean
do_plots(spy,spy, [0,0], np.eye(2) * 1e5)
Test with non zero intercept
do_plots(spy-30,spy, [0,0],np.ones((2,2)))
do_plots(spy-30,spy, [0,0], np.eye(2) * 1e5)
Tests with large slope
do_plots(spy,spy*25, [0,0], np.ones((2,2)))
do_plots(spy,spy*25, [0,0], np.eye(2) * 1e3)
Test with slope and intercept
do_plots(spy,spy*25-200, [0,0], np.eye(2) * 1e3)
do_plots(spy,spy*25-200, [0,0], np.ones((2,2)))
do_plots(spy,spy*25-200, [0,0], np.eye(2) * 1e8)
do_plots(spy,spy*25-200, [0,0], np.zeros((2,2)))
do_plots(spy,spy*25-200, [20,190], np.zeros((2,2)))
do_plots(spy,spy*25-200, [20,-198], np.eye(2)*1000)