# Some imports that we'll use later on
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import timedelta
import matplotlib.pyplot as plt
# For use in Quantopian Research
# To try out the free sample
# from quantopian.interactive.data.eventvestor import earnings_calendar_free as earnings_calendar
# Free version
# study_date_ranges = range(2011, 2014)
# For using the full premium version (available for $5/ mo) which this study is based off of
from quantopian.interactive.data.eventvestor import _13d_filings as dataset
# Premium Version
study_date_ranges = range(2011, 2016)
# Next import Blaze and odo (which allows us to work between Blaze and Pandas)
from odo import odo
import blaze as bz
dataset.dshape
#: Enter in the name of the DATE column that you're using as the event date
date_column = 'asof_date'
#: Enter in the name of the SYMBOL column that contains the symbols you're looking at
symbol_column = 'symbol'
#: Type of pricing field to compare
pricing_type = 'close_price'
earnings_ranges = {}
for date in study_date_ranges:
earnings_ranges[date] = [pd.to_datetime('%s-01-01' % date), pd.to_datetime("%s-12-31" % date)]
# We are separating out a pandas DataFrame for each year
dataset_calendars = {}
for date, date_ranges in earnings_ranges.iteritems():
temp_dataset = dataset[dataset[date_column] > min(date_ranges)]
temp_dataset = temp_dataset[temp_dataset[date_column] < max(date_ranges)]
dataset_calendars[date] = odo(temp_dataset, pd.DataFrame)
starting_point = 10
def get_windows_of_returns(data, event_type, window_side_length=2):
change_windows = {}
total_rows = len(data.index)
x = 0
for event in data.iterrows():
event_date = event[1][event_type]
event_id = event[1]['event_id']
# The hard coded 25 is merely to provide a buffer, to ensure we get enough days.
window_start = event_date - pd.Timedelta('%s Days' % str(window_side_length + 20))
window_end = event_date + pd.Timedelta('%s Days' % str(window_side_length + 20))
symbol = event[1]["symbol"]
try:
prev_prices = get_pricing(symbol, start_date=window_start, end_date=event_date, fields=pricing_type).ffill()[-window_side_length-1:]
after_prices = get_pricing(symbol, start_date=event_date, end_date=window_end, fields=pricing_type).ffill()[:window_side_length+1]
except:
continue
prices = prev_prices.append(after_prices).drop_duplicates()
asset_returns = prices.pct_change().ffill().fillna(0)
# In case there's no data (e.g. delisted security, skip)
if np.average(asset_returns) == 0:#len(asset_returns.index) < (window_side_length*2+1):
continue
change_windows[event_id] = asset_returns.tolist() # Using .tolist() strips the data from time indexing
x+=1
print "Observed %s events" % x
return pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in change_windows.iteritems() ]))
returns_range = {}
for date, df in dataset_calendars.iteritems():
print "Processing data for %s" % date
returns = get_windows_of_returns(df, date_column, window_side_length=starting_point)
# Shifting our index to show appropriate date numbers.
# Start t-1 at 0
returns.index = returns.index - starting_point
returns_range[date] = returns
pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in returns.iteritems() ]))
for year, returns in returns_range.iteritems():
data = (returns + 1).cumprod() - 1
avg_returns = pd.Series(index=returns.index[:19],
data=data.mean(axis=1),
name="%s, N=%s" % (year, len(data.columns)))
avg_returns.plot()
ax = plt.axes()
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.yaxis.grid(color="none")
plt.legend(loc='best')
plt.title("Average Daily Percent Change in Price During 13-D Filings (Close-to-Close)")
plt.xlabel("Number of days after 13-D Filing")
plt.ylabel("Percent Change")