import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
import seaborn as sns
start = '2002-01-01'
end = '2016-11-01'
freq = 'minute'
#getting minutely prices
aapl_minute_price = get_pricing('AAPL', fields='price', frequency=freq, start_date=start, end_date=end)
#resampling to 15 minutes
aapl_minute_price = aapl_minute_price.resample('15T').first()
#resetting time to US/Eastern
aapl_minute_price.index.tz = 'US/Eastern'
#dropping NaN values
aapl_minute_price = aapl_minute_price.dropna()
aapl_minute_price.head()
#there are 27 observations per day
aapl_minute_price.loc['2002-01-02'].shape
# moving on to regression for each day
# create function that returns beta, volatility (std),
# and close to close returns (between last price t and last price t+1)
def beta_std_returns(data, start_date, end_date):
# creating empty dataframe which will contain everything later
columns = ['date', 'beta', 'volatility', 'gap', 'open_to_close', 'close_to_close']
df1 = pd.DataFrame(columns=columns)
# a variable that will be used for skipping the calculation of returns for the first day
i = 1
# main loop
while start_date != end_date:
# try-except in order to skip non-trading days
try:
# getting event date as a string
event_date = (datetime.strptime(start_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
# creating dataframe for that specific date only
whole_day = data.loc[event_date]
# getting returns and cumprod
whole_day = (whole_day.pct_change()[1:] + 1)
whole_day = np.cumprod(whole_day)
# preparing dataframe for time series regression
whole_day = whole_day.reset_index()
whole_day.columns = ['index', 'AAPL']
# regressing (on hour)
model = pd.ols(y=whole_day['AAPL'], x=pd.to_datetime(whole_day["index"]).dt.hour)
# if event_date is not a trading day, code will return error and jump to except
# if the first trading day, skip calculation of gap, o2c, c2c returns
if i == 1:
# creating a dataframe with regression results (date, beta, std, gap, o2c, c2c)
df2 = pd.DataFrame([[event_date, model.beta[0], model.std_err[0], 0, 0, 0]], columns=columns)
i = 0
# else, calculate gap, open2close, close2close
else:
whole_day = data.loc[event_date]
# gap
new_open_price = float(whole_day.head(1).values)
last_close_price = float(last_whole_day.tail(1).values)
temp_series1 = pd.Series([last_close_price, new_open_price], index=(1,2))
gap = float(temp_series1.pct_change()[1:])
# open to close
new_open_price = float(whole_day.head(1).values)
new_close_price = float(whole_day.tail(1).values)
temp_series2 = pd.Series([new_open_price, new_close_price], index=(1,2))
open_to_close = float(temp_series2.pct_change()[1:])
# close to close
new_close_price = float(whole_day.tail(1).values)
last_close_price = float(last_whole_day.tail(1).values)
temp_series3 = pd.Series([last_close_price, new_close_price], index=(1,2))
close_to_close = float(temp_series3.pct_change()[1:])
# creating a dataframe with regression results (date, beta, std, gap, o2c, c2c)
# note, returns columns (gap, o2c, c2c) need to be shifted afterwards
# calculated returns correspond to the previous trading day
df2 = pd.DataFrame([[event_date, model.beta[0], model.std_err[0], \
gap, open_to_close, close_to_close]], columns=columns)
# appending results to main dataframe
df1 = df1.append(df2, ignore_index=True)
# generating dataframe used in calculation of subsequent returns
last_whole_day = data.loc[event_date]
# updating counter
start_date = event_date
except:
# if try returned an error, then add another day to counter and continue
event_date = (datetime.strptime(start_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
start_date = event_date
# shift gap, open to close, close to close columns up by one row
df1.gap = df1.gap.shift(-1)
df1.open_to_close = df1.open_to_close.shift(-1)
df1.close_to_close = df1.close_to_close.shift(-1)
# drop the last row with NaN
df1 = df1.dropna()
# return the main dataframe
return df1
start = '2002-01-01'
end = '2016-11-01'
result = beta_std_returns(aapl_minute_price, start, end)
result.describe()
x,y = result['beta'], result['volatility']
# scatter plot of all observations
plt.scatter(x,y)
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show;
# scatter plot with observations colored depending on gap returns
plt.scatter(x, y, c=result['gap'], s=10, cmap='OrRd')
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show();
# scatter plot with observations colored depending on open to close returns
plt.scatter(x, y, c=result['open_to_close'], s=10, cmap='OrRd')
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show();
# scatter plot with observations colored depending on close to close returns
plt.scatter(x, y, c=result['close_to_close'], s=10, cmap='OrRd')
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show();
Heat map axes: Volatility (10 deciles), Slope (10 deciles). Each cell represents mean return within this decile for a given period. <br/> Note, that deciles are not cumulative, so 40% decile corresponds to observations higher than 30% and lower than 40%.
# create table with means for corresponding betas and volatilities
# gap returns
def beta_std_gap(data):
# empty (10x10) dataframe that will contain means
main_df = pd.DataFrame(index = np.arange(10, 110, 10), columns = np.arange(10, 110, 10))
for i in np.arange(0.1, 1.1, 0.1):
# the lowest row corresponding to <10% volatility across all beta deciles
if i == 0.1:
# 10% beta by 10% volatility
q10 = data[(data.beta < data.beta.quantile(0.1)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
# 10% beta by 20% volatility, etc.
q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
(data.beta < data.beta.quantile(0.2)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
(data.beta < data.beta.quantile(0.3)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
(data.beta < data.beta.quantile(0.4)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
(data.beta < data.beta.quantile(0.5)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
(data.beta < data.beta.quantile(0.6)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
(data.beta < data.beta.quantile(0.7)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
(data.beta < data.beta.quantile(0.8)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
(data.beta < data.beta.quantile(0.9)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
(data.volatility < data.volatility.quantile(0.1))].gap.mean()
# assigning elements
main_df.set_value(10,10,q10)
main_df.set_value(10,20,q20)
main_df.set_value(10,30,q30)
main_df.set_value(10,40,q40)
main_df.set_value(10,50,q50)
main_df.set_value(10,60,q60)
main_df.set_value(10,70,q70)
main_df.set_value(10,80,q80)
main_df.set_value(10,90,q90)
main_df.set_value(10,100,q100)
# remaining observations (row by row)
else:
# 10% beta by 20% volatility (when i=0.2)
q10 = data[(data.beta < data.beta.quantile(0.1)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
# 20% beta by 20% volatility (when i=0.2), etc.
q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
(data.beta < data.beta.quantile(0.2)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
(data.beta < data.beta.quantile(0.3)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
(data.beta < data.beta.quantile(0.4)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
(data.beta < data.beta.quantile(0.5)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
(data.beta < data.beta.quantile(0.6)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
(data.beta < data.beta.quantile(0.7)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
(data.beta < data.beta.quantile(0.8)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
(data.beta < data.beta.quantile(0.9)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].gap.mean()
# assigning values
main_df.set_value(int(100*i),10,q10)
main_df.set_value(int(100*i),20,q20)
main_df.set_value(int(100*i),30,q30)
main_df.set_value(int(100*i),40,q40)
main_df.set_value(int(100*i),50,q50)
main_df.set_value(int(100*i),60,q60)
main_df.set_value(int(100*i),70,q70)
main_df.set_value(int(100*i),80,q80)
main_df.set_value(int(100*i),90,q90)
main_df.set_value(int(100*i),100,q100)
# return the dataframe with results
return main_df
# create a table
# fill NaN with np.nan for easier masking for heatmaps
gap_table = beta_std_gap(result).fillna(np.nan)
gap_table
# create mask to exclude missing values
mask = gap_table.isnull()
# generate heatmap using seaborn
gap_heatmap = sns.heatmap(gap_table, annot=True, mask=mask)
gap_heatmap.invert_yaxis()
gap_heatmap.set_ylabel('Volatility')
gap_heatmap.set_xlabel('Betas')
gap_heatmap.set_title('Gap heatmap');
# for open to close returns, logic is similar to the function above
def beta_std_O2C(data):
main_df = pd.DataFrame(index = np.arange(10, 110, 10), columns = np.arange(10, 110, 10))
for i in np.arange(0.1, 1.1, 0.1):
if i == 0.1:
q10 = data[(data.beta < data.beta.quantile(0.1)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
(data.beta < data.beta.quantile(0.2)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
(data.beta < data.beta.quantile(0.3)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
(data.beta < data.beta.quantile(0.4)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
(data.beta < data.beta.quantile(0.5)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
(data.beta < data.beta.quantile(0.6)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
(data.beta < data.beta.quantile(0.7)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
(data.beta < data.beta.quantile(0.8)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
(data.beta < data.beta.quantile(0.9)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
(data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
# assigning elements
main_df.set_value(10,10,q10)
main_df.set_value(10,20,q20)
main_df.set_value(10,30,q30)
main_df.set_value(10,40,q40)
main_df.set_value(10,50,q50)
main_df.set_value(10,60,q60)
main_df.set_value(10,70,q70)
main_df.set_value(10,80,q80)
main_df.set_value(10,90,q90)
main_df.set_value(10,100,q100)
else:
q10 = data[(data.beta < data.beta.quantile(0.1)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
(data.beta < data.beta.quantile(0.2)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
(data.beta < data.beta.quantile(0.3)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
(data.beta < data.beta.quantile(0.4)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
(data.beta < data.beta.quantile(0.5)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
(data.beta < data.beta.quantile(0.6)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
(data.beta < data.beta.quantile(0.7)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
(data.beta < data.beta.quantile(0.8)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
(data.beta < data.beta.quantile(0.9)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].open_to_close.mean()
# assign values
main_df.set_value(int(100*i),10,q10)
main_df.set_value(int(100*i),20,q20)
main_df.set_value(int(100*i),30,q30)
main_df.set_value(int(100*i),40,q40)
main_df.set_value(int(100*i),50,q50)
main_df.set_value(int(100*i),60,q60)
main_df.set_value(int(100*i),70,q70)
main_df.set_value(int(100*i),80,q80)
main_df.set_value(int(100*i),90,q90)
main_df.set_value(int(100*i),100,q100)
return main_df
open2close_table = beta_std_O2C(result).fillna(np.nan)
open2close_table
mask = open2close_table.isnull()
open2close_heatmap = sns.heatmap(open2close_table, annot=True, mask=mask)
open2close_heatmap.invert_yaxis()
open2close_heatmap.set_ylabel('Volatility')
open2close_heatmap.set_xlabel('Betas')
open2close_heatmap.set_title('Open to close heatmap');
# for close to close returns, logic is similar to the function above
def beta_std_C2C(data):
main_df = pd.DataFrame(index = np.arange(10, 110, 10), columns = np.arange(10, 110, 10))
for i in np.arange(0.1, 1.1, 0.1):
if i == 0.1:
q10 = data[(data.beta < data.beta.quantile(0.1)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
(data.beta < data.beta.quantile(0.2)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
(data.beta < data.beta.quantile(0.3)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
(data.beta < data.beta.quantile(0.4)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
(data.beta < data.beta.quantile(0.5)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
(data.beta < data.beta.quantile(0.6)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
(data.beta < data.beta.quantile(0.7)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
(data.beta < data.beta.quantile(0.8)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
(data.beta < data.beta.quantile(0.9)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
(data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
# assigning elements
main_df.set_value(10,10,q10)
main_df.set_value(10,20,q20)
main_df.set_value(10,30,q30)
main_df.set_value(10,40,q40)
main_df.set_value(10,50,q50)
main_df.set_value(10,60,q60)
main_df.set_value(10,70,q70)
main_df.set_value(10,80,q80)
main_df.set_value(10,90,q90)
main_df.set_value(10,100,q100)
else:
q10 = data[(data.beta < data.beta.quantile(0.1)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
(data.beta < data.beta.quantile(0.2)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
(data.beta < data.beta.quantile(0.3)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
(data.beta < data.beta.quantile(0.4)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
(data.beta < data.beta.quantile(0.5)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
(data.beta < data.beta.quantile(0.6)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
(data.beta < data.beta.quantile(0.7)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
(data.beta < data.beta.quantile(0.8)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
(data.beta < data.beta.quantile(0.9)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
(data.volatility >= data.volatility.quantile(i-0.1)) & \
(data.volatility < data.volatility.quantile(i))].close_to_close.mean()
# assign values
main_df.set_value(int(100*i),10,q10)
main_df.set_value(int(100*i),20,q20)
main_df.set_value(int(100*i),30,q30)
main_df.set_value(int(100*i),40,q40)
main_df.set_value(int(100*i),50,q50)
main_df.set_value(int(100*i),60,q60)
main_df.set_value(int(100*i),70,q70)
main_df.set_value(int(100*i),80,q80)
main_df.set_value(int(100*i),90,q90)
main_df.set_value(int(100*i),100,q100)
return main_df
close2close_table = beta_std_C2C(result).fillna(np.nan)
close2close_table
mask = close2close_table.isnull()
close2close_heatmap = sns.heatmap(close2close_table, annot=True, mask=mask)
close2close_heatmap.invert_yaxis()
close2close_heatmap.set_ylabel('Volatility')
close2close_heatmap.set_xlabel('Betas')
close2close_heatmap.set_title('Close to close heatmap');