Correct on the 390 sets of m's and b's to predict for the next day. Below is the code up until the regression so that you can see the error:
import pandas as pd
import numpy as np
import math as m
from itertools import repeat
from datetime import datetime
import statsmodels.api as sm
x=2
y=3
z=4
rw=30 #Regression Rolling Window
def initialize(context):
context.stocks = symbol('SPY')
context.position_closed = {sid(8554) : True}
context.order_size = 25
context.position_cost = 0
set_commission(commission.PerTrade(cost=0.0))
set_slippage(slippage.FixedSlippage(spread=0.00))
def handle_data(context, data):
#for stock in context.stocks:
close_price = data[context.stocks].close_price
current_price = data[context.stocks].price
a_p = history(bar_count=x*30, frequency='1d', field='price')
a = a_p.pct_change()
a.columns=['a']
b_p = history(bar_count=y*30, frequency='1d', field='price')
b = b_p.pct_change(periods=y)
b.columns=['b']
c_p = history(bar_count=z*30, frequency='1d', field='price')
c = c_p.pct_change(periods=z)
c.columns=['c']
#RV-a
def avol(a):
a_ret=a.fillna(0)
a_log=np.log1p(a_ret).fillna(0)
a_log_mean=pd.rolling_mean(a_log, 30).fillna(0)
sqdev_a=(a_log-a_log_mean)**2.
avg_sqdev_a=pd.rolling_sum(sqdev_a, window=x)/x
a_vol=np.sqrt(avg_sqdev_a).shift().fillna(0)
return a_vol
# RV-a, 1 day ahead - independent variable for regression ols
def indavol(a):
ia_ret=a.fillna(0)
ia_log=np.log1p(ia_ret).fillna(0)
ia_log_mean=pd.rolling_mean(ia_log, 30).fillna(0)
sqdev_ia=(ia_log-ia_log_mean)**2.
avg_sqdev_ia=pd.rolling_sum(sqdev_ia, window=x)/x
ind_a_vol=np.sqrt(avg_sqdev_ia).fillna(0)
return ind_a_vol
#RV-b
def bvol(b):
b_ret=b.fillna(0)
b_log=np.log1p(b_ret).fillna(0)
b_log_mean=pd.rolling_mean(b_log, 30).fillna(0)
sqdev_b=(b_log-b_log_mean)**2.
avg_sqdev_b=pd.rolling_sum(sqdev_b, window=y)/y
b_vol=np.sqrt(avg_sqdev_b).shift().fillna(0)
return b_vol
#RV-c
def cvol(c):
c_ret=c.fillna(0)
c_log=np.log1p(c_ret).fillna(0)
c_log_mean=pd.rolling_mean(c_log, 30).fillna(0)
sqdev_c=(c_log-c_log_mean)**2.
avg_sqdev_c=pd.rolling_sum(sqdev_c, window=z)/z
c_vol=np.sqrt(avg_sqdev_c).shift().fillna(0)
return c_vol
a_output=avol(a).tail(1)
a_output=pd.Series(a_output['a'][-1])
inda_output=indavol(a).tail(1)
inda_output1=pd.Series(inda_output['a'][-1])
ind=inda_output.index
b_output=bvol(b).tail(1)
b_output=pd.Series(b_output['b'][-1])
c_output=cvol(c).tail(1)
c_output=pd.Series(c_output['c'][-1])
vol_table=pd.DataFrame({'a':a_output,'b':b_output,'c':c_output}, index=ind)
ind_table=pd.DataFrame({'ind':inda_output1}, index=ind)
#OLS Regression Setup
X = vol_table
Y = ind_table
model=pd.ols(y=Y, x=X, window=rw)