Notebook

Main Project

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
import seaborn as sns
In [2]:
start = '2002-01-01'
end = '2016-11-01'
freq = 'minute'
In [3]:
#getting minutely prices
aapl_minute_price = get_pricing('AAPL', fields='price', frequency=freq, start_date=start, end_date=end)
In [4]:
#resampling to 15 minutes
aapl_minute_price = aapl_minute_price.resample('15T').first()
In [5]:
#resetting time to US/Eastern
aapl_minute_price.index.tz = 'US/Eastern'
In [6]:
#dropping NaN values
aapl_minute_price = aapl_minute_price.dropna()
aapl_minute_price.head()
Out[6]:
2002-01-02 09:30:00-05:00    1.579
2002-01-02 09:45:00-05:00    1.598
2002-01-02 10:00:00-05:00    1.582
2002-01-02 10:15:00-05:00    1.589
2002-01-02 10:30:00-05:00    1.586
Name: Equity(24 [AAPL]), dtype: float64
In [7]:
#there are 27 observations per day
aapl_minute_price.loc['2002-01-02'].shape
Out[7]:
(27,)
In [9]:
# moving on to regression for each day
# create function that returns beta, volatility (std), 
# and close to close returns (between last price t and last price t+1)

def beta_std_returns(data, start_date, end_date):
    
    # creating empty dataframe which will contain everything later
    columns = ['date', 'beta', 'volatility', 'gap', 'open_to_close', 'close_to_close']
    df1 = pd.DataFrame(columns=columns)
    
    # a variable that will be used for skipping the calculation of returns for the first day
    i = 1
    
    
    # main loop
    while start_date != end_date:
        
        # try-except in order to skip non-trading days
        try:

            # getting event date as a string
            event_date = (datetime.strptime(start_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")

            # creating dataframe for that specific date only
            whole_day = data.loc[event_date]
            
            # getting returns and cumprod
            whole_day = (whole_day.pct_change()[1:] + 1)
            whole_day = np.cumprod(whole_day)
            
            # preparing dataframe for time series regression
            whole_day = whole_day.reset_index()
            whole_day.columns = ['index', 'AAPL']

            # regressing (on hour)
            model = pd.ols(y=whole_day['AAPL'], x=pd.to_datetime(whole_day["index"]).dt.hour)
            
            # if event_date is not a trading day, code will return error and jump to except
            
            
            # if the first trading day, skip calculation of gap, o2c, c2c returns
            if i == 1:
                # creating a dataframe with regression results (date, beta, std, gap, o2c, c2c)
                df2 = pd.DataFrame([[event_date, model.beta[0], model.std_err[0], 0, 0, 0]], columns=columns)
                i = 0
            
            # else, calculate gap, open2close, close2close
            else:
                whole_day = data.loc[event_date]
                
                # gap
                new_open_price = float(whole_day.head(1).values)
                last_close_price = float(last_whole_day.tail(1).values)
                temp_series1 = pd.Series([last_close_price, new_open_price], index=(1,2))
                gap = float(temp_series1.pct_change()[1:])
                
                # open to close
                new_open_price = float(whole_day.head(1).values)
                new_close_price = float(whole_day.tail(1).values)
                temp_series2 = pd.Series([new_open_price, new_close_price], index=(1,2))
                open_to_close = float(temp_series2.pct_change()[1:])
                
                # close to close
                new_close_price = float(whole_day.tail(1).values)
                last_close_price = float(last_whole_day.tail(1).values)
                temp_series3 = pd.Series([last_close_price, new_close_price], index=(1,2))
                close_to_close = float(temp_series3.pct_change()[1:])
                
                # creating a dataframe with regression results (date, beta, std, gap, o2c, c2c)
                # note, returns columns (gap, o2c, c2c) need to be shifted afterwards
                # calculated returns correspond to the previous trading day
                df2 = pd.DataFrame([[event_date, model.beta[0], model.std_err[0], \
                                     gap, open_to_close, close_to_close]], columns=columns)
            
            
            # appending results to main dataframe
            df1 = df1.append(df2, ignore_index=True)
            
            
            # generating dataframe used in calculation of subsequent returns
            last_whole_day = data.loc[event_date]

            # updating counter
            start_date = event_date

        except:

            # if try returned an error, then add another day to counter and continue
            event_date = (datetime.strptime(start_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
            start_date = event_date


    # shift gap, open to close, close to close columns up by one row
    df1.gap = df1.gap.shift(-1)
    df1.open_to_close = df1.open_to_close.shift(-1)
    df1.close_to_close = df1.close_to_close.shift(-1)
    
    # drop the last row with NaN
    df1 = df1.dropna()
    
    # return the main dataframe
    return df1
In [68]:
start = '2002-01-01'
end = '2016-11-01'
result = beta_std_returns(aapl_minute_price, start, end)
Out[68]:
date beta volatility gap open_to_close close_to_close
0 2002-01-02 0.005331 0.000761 -0.015051 0.028117 0.012643
1 2002-01-03 0.002561 0.000584 -0.001784 0.007743 0.005945
2 2002-01-04 0.001673 0.000853 0.008865 -0.041593 -0.033097
3 2002-01-07 -0.003100 0.000758 -0.006724 -0.004923 -0.011614
4 2002-01-08 -0.000218 0.000550 0.004329 -0.047414 -0.043290
In [69]:
result.describe()
Out[69]:
beta volatility gap open_to_close close_to_close
count 3735.000000 3735.000000 3735.000000 3735.000000 3735.000000
mean 0.000024 0.000394 0.001151 0.000248 0.001383
std 0.002482 0.000262 0.013728 0.018854 0.022648
min -0.015771 0.000050 -0.145240 -0.113711 -0.173894
25% -0.001220 0.000219 -0.004065 -0.009546 -0.010107
50% -0.000011 0.000328 0.001422 0.000186 0.000944
75% 0.001236 0.000493 0.006463 0.010195 0.012998
max 0.013250 0.003054 0.130370 0.118733 0.136177

Scatter plots

In [11]:
x,y = result['beta'], result['volatility']
In [76]:
# scatter plot of all observations

plt.scatter(x,y)
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show;
In [77]:
# scatter plot with observations colored depending on gap returns

plt.scatter(x, y, c=result['gap'], s=10, cmap='OrRd')
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show();
In [78]:
# scatter plot with observations colored depending on open to close returns

plt.scatter(x, y, c=result['open_to_close'], s=10, cmap='OrRd')
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show();
In [79]:
# scatter plot with observations colored depending on close to close returns

plt.scatter(x, y, c=result['close_to_close'], s=10, cmap='OrRd')
plt.xlabel('Betas')
plt.ylabel('Volatility')
plt.axis([-0.02,0.02,0,0.0035])
plt.show();

Heatmaps

Heat map axes: Volatility (10 deciles), Slope (10 deciles). Each cell represents mean return within this decile for a given period. <br/> Note, that deciles are not cumulative, so 40% decile corresponds to observations higher than 30% and lower than 40%.

In [80]:
# create table with means for corresponding betas and volatilities
# gap returns

def beta_std_gap(data):
    
    # empty (10x10) dataframe that will contain means
    main_df = pd.DataFrame(index = np.arange(10, 110, 10), columns = np.arange(10, 110, 10))
    
    for i in np.arange(0.1, 1.1, 0.1):
        
        # the lowest row corresponding to <10% volatility across all beta deciles
        if i == 0.1:
        
            # 10% beta by 10% volatility
            q10 = data[(data.beta < data.beta.quantile(0.1)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()
            
            # 10% beta by 20% volatility, etc.
            q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
                         (data.beta < data.beta.quantile(0.2)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()

            q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
                         (data.beta < data.beta.quantile(0.3)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()
        
            q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
                         (data.beta < data.beta.quantile(0.4)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()

            q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
                         (data.beta < data.beta.quantile(0.5)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()

            q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
                         (data.beta < data.beta.quantile(0.6)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()

            q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
                         (data.beta < data.beta.quantile(0.7)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()

            q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
                         (data.beta < data.beta.quantile(0.8)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()

            q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
                         (data.beta < data.beta.quantile(0.9)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()

            q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
                         (data.volatility < data.volatility.quantile(0.1))].gap.mean()
        
            # assigning elements
            main_df.set_value(10,10,q10)
            main_df.set_value(10,20,q20)
            main_df.set_value(10,30,q30)
            main_df.set_value(10,40,q40)
            main_df.set_value(10,50,q50)
            main_df.set_value(10,60,q60)
            main_df.set_value(10,70,q70)
            main_df.set_value(10,80,q80)
            main_df.set_value(10,90,q90)
            main_df.set_value(10,100,q100)
            
        # remaining observations (row by row)
        else:
            
            # 10% beta by 20% volatility (when i=0.2)
            q10 = data[(data.beta < data.beta.quantile(0.1)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            # 20% beta by 20% volatility (when i=0.2), etc.
            q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
                         (data.beta < data.beta.quantile(0.2)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
                         (data.beta < data.beta.quantile(0.3)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
                         (data.beta < data.beta.quantile(0.4)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
                         (data.beta < data.beta.quantile(0.5)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
                         (data.beta < data.beta.quantile(0.6)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
                         (data.beta < data.beta.quantile(0.7)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
                         (data.beta < data.beta.quantile(0.8)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
                         (data.beta < data.beta.quantile(0.9)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].gap.mean()
            
            # assigning values
            main_df.set_value(int(100*i),10,q10)
            main_df.set_value(int(100*i),20,q20)
            main_df.set_value(int(100*i),30,q30)
            main_df.set_value(int(100*i),40,q40)
            main_df.set_value(int(100*i),50,q50)
            main_df.set_value(int(100*i),60,q60)
            main_df.set_value(int(100*i),70,q70)
            main_df.set_value(int(100*i),80,q80)
            main_df.set_value(int(100*i),90,q90)
            main_df.set_value(int(100*i),100,q100)
    
    # return the dataframe with results
    return main_df
In [82]:
# create a table
# fill NaN with np.nan for easier masking for heatmaps
gap_table = beta_std_gap(result).fillna(np.nan)
gap_table
Out[82]:
10 20 30 40 50 60 70 80 90 100
10 NaN -0.007381 0.007333 0.000907 -0.000280 0.001784 0.002704 -0.002097 0.001750 NaN
20 0.003131 -0.001260 0.002237 0.000458 0.000222 0.000997 0.000967 0.002230 0.008835 0.002386
30 0.003057 0.003278 -0.000551 0.001596 -0.002794 0.002409 0.002435 0.002562 0.000989 0.006638
40 -0.000766 0.001374 0.002081 0.000681 0.000590 0.001401 -0.000580 -0.001290 0.001594 0.000021
50 -0.001052 -0.001043 0.005256 -0.000035 0.001552 0.005235 0.003021 -0.001007 0.000586 0.001004
60 0.002654 -0.004527 0.002705 0.002705 0.000526 0.002018 0.000980 0.001453 0.000300 0.000537
70 0.002913 0.002509 0.003006 0.002440 0.002653 0.000198 0.004244 0.004939 -0.000437 0.003146
80 0.000258 0.001257 0.002853 0.006433 0.004337 0.003069 -0.006816 0.006756 -0.001468 -0.000455
90 -0.001874 0.001652 0.000011 0.000947 -0.002201 -0.001152 0.001128 0.002343 0.001035 0.000569
100 0.001541 0.002742 -0.001701 0.001597 0.001547 -0.004087 0.003998 -0.004064 -0.003676 0.004620
In [83]:
# create mask to exclude missing values
mask = gap_table.isnull()

# generate heatmap using seaborn
gap_heatmap = sns.heatmap(gap_table, annot=True, mask=mask)
gap_heatmap.invert_yaxis()
gap_heatmap.set_ylabel('Volatility')
gap_heatmap.set_xlabel('Betas')
gap_heatmap.set_title('Gap heatmap');
In [84]:
# for open to close returns, logic is similar to the function above

def beta_std_O2C(data):
    
    main_df = pd.DataFrame(index = np.arange(10, 110, 10), columns = np.arange(10, 110, 10))
    
    for i in np.arange(0.1, 1.1, 0.1):
        
        if i == 0.1:
        
            q10 = data[(data.beta < data.beta.quantile(0.1)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
            
            q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
                         (data.beta < data.beta.quantile(0.2)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()

            q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
                         (data.beta < data.beta.quantile(0.3)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
        
            q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
                         (data.beta < data.beta.quantile(0.4)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()

            q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
                         (data.beta < data.beta.quantile(0.5)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()

            q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
                         (data.beta < data.beta.quantile(0.6)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()

            q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
                         (data.beta < data.beta.quantile(0.7)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()

            q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
                         (data.beta < data.beta.quantile(0.8)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()

            q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
                         (data.beta < data.beta.quantile(0.9)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()

            q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
                         (data.volatility < data.volatility.quantile(0.1))].open_to_close.mean()
        
            # assigning elements
            main_df.set_value(10,10,q10)
            main_df.set_value(10,20,q20)
            main_df.set_value(10,30,q30)
            main_df.set_value(10,40,q40)
            main_df.set_value(10,50,q50)
            main_df.set_value(10,60,q60)
            main_df.set_value(10,70,q70)
            main_df.set_value(10,80,q80)
            main_df.set_value(10,90,q90)
            main_df.set_value(10,100,q100)
            
        
        else:
        
            q10 = data[(data.beta < data.beta.quantile(0.1)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()

            q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
                         (data.beta < data.beta.quantile(0.2)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
                         (data.beta < data.beta.quantile(0.3)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
                         (data.beta < data.beta.quantile(0.4)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
                         (data.beta < data.beta.quantile(0.5)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
                         (data.beta < data.beta.quantile(0.6)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
                         (data.beta < data.beta.quantile(0.7)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
                         (data.beta < data.beta.quantile(0.8)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
                         (data.beta < data.beta.quantile(0.9)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].open_to_close.mean()
            
            # assign values
            main_df.set_value(int(100*i),10,q10)
            main_df.set_value(int(100*i),20,q20)
            main_df.set_value(int(100*i),30,q30)
            main_df.set_value(int(100*i),40,q40)
            main_df.set_value(int(100*i),50,q50)
            main_df.set_value(int(100*i),60,q60)
            main_df.set_value(int(100*i),70,q70)
            main_df.set_value(int(100*i),80,q80)
            main_df.set_value(int(100*i),90,q90)
            main_df.set_value(int(100*i),100,q100)
            
    return main_df
In [85]:
open2close_table = beta_std_O2C(result).fillna(np.nan)
open2close_table
Out[85]:
10 20 30 40 50 60 70 80 90 100
10 NaN 0.007200 -0.002328 0.000953 -0.000647 0.001209 0.004135 0.003032 0.005275 NaN
20 0.003783 0.004181 0.000197 -0.001462 -0.000135 0.000026 0.000281 -0.001481 -0.005712 0.006124
30 -0.002735 -0.002782 -0.000336 0.000888 0.000034 0.002513 0.004906 -0.002466 0.001346 -0.002931
40 0.009850 -0.001313 0.001197 0.000405 0.000642 0.000891 -0.000108 -0.000118 -0.000277 -0.004619
50 0.001067 0.004433 -0.000938 -0.001368 0.002286 0.005010 -0.003345 -0.001320 0.001995 -0.001307
60 0.001979 0.003482 -0.001359 0.002587 -0.003579 -0.001718 -0.001705 -0.006390 -0.001598 0.001541
70 0.002834 -0.000941 -0.000865 0.000011 0.000490 -0.000699 -0.002148 -0.008162 -0.001041 -0.002685
80 0.002205 0.002875 0.001526 0.008032 0.001761 -0.001609 -0.004666 -0.004652 0.001340 0.002255
90 0.006351 -0.000393 0.001354 -0.003833 -0.004386 0.001939 0.003809 0.000296 -0.002236 -0.000758
100 0.007521 0.000788 -0.003491 -0.009680 -0.002607 0.001771 -0.009843 -0.005904 -0.002314 -0.002087
In [86]:
mask = open2close_table.isnull()
open2close_heatmap = sns.heatmap(open2close_table, annot=True, mask=mask)
open2close_heatmap.invert_yaxis()
open2close_heatmap.set_ylabel('Volatility')
open2close_heatmap.set_xlabel('Betas')
open2close_heatmap.set_title('Open to close heatmap');
In [87]:
# for close to close returns, logic is similar to the function above

def beta_std_C2C(data):
    
    main_df = pd.DataFrame(index = np.arange(10, 110, 10), columns = np.arange(10, 110, 10))
    
    for i in np.arange(0.1, 1.1, 0.1):
        
        if i == 0.1:
        
            q10 = data[(data.beta < data.beta.quantile(0.1)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
            
            q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
                         (data.beta < data.beta.quantile(0.2)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()

            q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
                         (data.beta < data.beta.quantile(0.3)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
        
            q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
                         (data.beta < data.beta.quantile(0.4)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()

            q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
                         (data.beta < data.beta.quantile(0.5)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()

            q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
                         (data.beta < data.beta.quantile(0.6)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()

            q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
                         (data.beta < data.beta.quantile(0.7)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()

            q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
                         (data.beta < data.beta.quantile(0.8)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()

            q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
                         (data.beta < data.beta.quantile(0.9)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()

            q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
                         (data.volatility < data.volatility.quantile(0.1))].close_to_close.mean()
        
            # assigning elements
            main_df.set_value(10,10,q10)
            main_df.set_value(10,20,q20)
            main_df.set_value(10,30,q30)
            main_df.set_value(10,40,q40)
            main_df.set_value(10,50,q50)
            main_df.set_value(10,60,q60)
            main_df.set_value(10,70,q70)
            main_df.set_value(10,80,q80)
            main_df.set_value(10,90,q90)
            main_df.set_value(10,100,q100)
            
        
        else:
        
            q10 = data[(data.beta < data.beta.quantile(0.1)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()

            q20 = data[(data.beta >= data.beta.quantile(0.1)) & \
                         (data.beta < data.beta.quantile(0.2)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q30 = data[(data.beta >= data.beta.quantile(0.2)) & \
                         (data.beta < data.beta.quantile(0.3)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q40 = data[(data.beta >= data.beta.quantile(0.3)) & \
                         (data.beta < data.beta.quantile(0.4)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q50 = data[(data.beta >= data.beta.quantile(0.4)) & \
                         (data.beta < data.beta.quantile(0.5)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q60 = data[(data.beta >= data.beta.quantile(0.5)) & \
                         (data.beta < data.beta.quantile(0.6)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q70 = data[(data.beta >= data.beta.quantile(0.6)) & \
                         (data.beta < data.beta.quantile(0.7)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q80 = data[(data.beta >= data.beta.quantile(0.7)) & \
                         (data.beta < data.beta.quantile(0.8)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q90 = data[(data.beta >= data.beta.quantile(0.8)) & \
                         (data.beta < data.beta.quantile(0.9)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            q100 = data[(data.beta >= data.beta.quantile(0.9)) & \
                         (data.volatility >= data.volatility.quantile(i-0.1)) & \
                         (data.volatility < data.volatility.quantile(i))].close_to_close.mean()
            
            # assign values
            main_df.set_value(int(100*i),10,q10)
            main_df.set_value(int(100*i),20,q20)
            main_df.set_value(int(100*i),30,q30)
            main_df.set_value(int(100*i),40,q40)
            main_df.set_value(int(100*i),50,q50)
            main_df.set_value(int(100*i),60,q60)
            main_df.set_value(int(100*i),70,q70)
            main_df.set_value(int(100*i),80,q80)
            main_df.set_value(int(100*i),90,q90)
            main_df.set_value(int(100*i),100,q100)
            
    return main_df
In [88]:
close2close_table = beta_std_C2C(result).fillna(np.nan)
close2close_table
Out[88]:
10 20 30 40 50 60 70 80 90 100
10 NaN -0.000363 0.005040 0.001846 -0.000930 0.003002 0.006857 0.000897 0.007029 NaN
20 0.006899 0.002904 0.002420 -0.000998 0.000102 0.001035 0.001241 0.000761 0.002953 0.008524
30 0.000302 0.000457 -0.000861 0.002492 -0.002772 0.004923 0.007342 0.000072 0.002344 0.003665
40 0.009097 0.000067 0.003384 0.001092 0.001164 0.002274 -0.000668 -0.001362 0.001356 -0.004563
50 -0.000024 0.003356 0.004314 -0.001407 0.003775 0.010284 -0.000325 -0.002228 0.002551 -0.000324
60 0.004624 -0.001041 0.001299 0.005280 -0.003119 0.000229 -0.000707 -0.004963 -0.001322 0.002039
70 0.005773 0.001559 0.002129 0.002492 0.003216 -0.000533 0.002048 -0.003296 -0.001452 0.000481
80 0.002478 0.004128 0.004371 0.014726 0.006055 0.001435 -0.011520 0.001907 -0.000187 0.001769
90 0.004278 0.001184 0.001325 -0.002886 -0.006616 0.000944 0.005025 0.002677 -0.001235 -0.000223
100 0.008965 0.003689 -0.005272 -0.008199 -0.001262 -0.003142 -0.005927 -0.009617 -0.006179 0.002547
In [89]:
mask = close2close_table.isnull()
close2close_heatmap = sns.heatmap(close2close_table, annot=True, mask=mask)
close2close_heatmap.invert_yaxis()
close2close_heatmap.set_ylabel('Volatility')
close2close_heatmap.set_xlabel('Betas')
close2close_heatmap.set_title('Close to close heatmap');
In [ ]: