This is still a work-in-progress, but if anyone else is facing this problem, perhaps this will help.
from pandas import DataFrame,Series
from zipline.utils import tradingcalendar
import functools
import re
vixUrl = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/vixcurrent.csv'
AdaptationWindow = 250
def initialize(context):
fetch_csv(vixUrl,
symbol='VIX',
skiprows=1,
date_column='Date',
pre_func=addFieldsVIX,
post_func=shift_data)
def handle_data(context, data):
context.vix_vals = unpack_from_data(data, 'VIX')
def fix_close(df,closeField):
df = df.rename(columns={closeField:'Close'})
# remove spurious asterisks
df['Date'] = df['Date'].apply(lambda dt: re.sub('\*','',dt))
# convert date column to timestamps
df['Date'] = df['Date'].apply(lambda dt: pd.Timestamp(datetime.datetime.strptime(dt,'%m/%d/%Y')))
df = df.sort(columns='Date', ascending=True)
return df
def subsequent_trading_date(date):
tdays = tradingcalendar.trading_days
last_date = pd.to_datetime(date)
last_dt = tradingcalendar.canonicalize_datetime(last_date)
next_dt = tdays[tdays.searchsorted(last_dt) + 1]
return next_dt
def add_last_bar(df):
last_date = df.index[-1]
subsequent_date = subsequent_trading_date(last_date)
blank_row = Series({}, index=df.columns, name=subsequent_date)
# add today, and shift all previous data up to today. This
# should result in the same data frames as in backtest
df = df.append(blank_row).shift(1).dropna(how='all')
return df
def shift_data(df):
log.info("Pre-Shift")
df = add_last_bar(df)
df.fillna(method='ffill')
df['PrevCloses'] = my_rolling_apply_series(df['Close'], to_csv_str, AdaptationWindow)
dates = Series(df.index)
dates.index = df.index
df['PrevDates'] = my_rolling_apply_series(dates, to_csv_str, AdaptationWindow)
return df
def unpack_from_data(data, sym):
if (sym in data and
'PrevCloses' in data[sym] and
'PrevDates' in data[sym]):
v = data[sym]['PrevCloses']
i = data[sym]['PrevDates']
return from_csv_strs(i,v,True).apply(float)
else:
log.warn("Unable to unpack historical {s} data.".format(s=sym))
def addFieldsVIX(df):
log.info("VIX: Pre-Massage")
df = fix_close(df,'VIX Close')
log.info("VIX: Post-Massage")
return df
# convert a series of values to a comma-separated string of said values
def to_csv_str(s):
return functools.reduce(lambda x,y: x+','+y, Series(s).apply(str))
# a specific instance of rolling apply, for Series of any type (not just numeric,
# ala pandas.rolling_apply), where the index of the series is set to the indices
# of the last elements of each subset
def my_rolling_apply_series(s_in, f, n):
s_out = Series([f(s_in[i:i+n]) for i in range(0,len(s_in)-(n-1))])
s_out.index = s_in.index[n-1:]
return s_out
# reconstitutes a Series from two csv-encoded strings, one of the index, one of the values
def from_csv_strs(x, y, idx_is_date):
s = Series(y.split(','),index=x.split(','))
if (idx_is_date):
s.index = s.index.map(lambda x: pd.Timestamp(x))
return s