Notebook
In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

Exercise 2.1

In [2]:
start_date = pd.datetime(2016,8,1)
end_date = pd.datetime(2018,8,23)
spy_prices = get_pricing('SPY', start_date, end_date, frequency='minute').tz_convert('US/Eastern')
In [26]:
spy_prices.volume.plot(logy = True,style='.')
plt.figure()
(spy_prices.open_price*spy_prices.volume).plot(logy = True,style='.')
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4bb0283f10>
In [27]:
tgt_volume  = spy_prices['volume'].max()
print tgt_volume
6835676.0
In [8]:
def volume_bars(ohlcv, threshold):
    
    tot = 0
    group = 0
    groups = np.empty(ohlcv.shape[0])
    idx = 0
    for row_datetime, row_data in ohlcv.iterrows():
        tot += row_data['volume']
        if tot>threshold and abs(tot-row_data['volume']-threshold)<abs(tot-threshold):
            group += 1
            tot = row_data['volume']
        groups[idx] = group
        idx+=1
        
    volume_ohlcv = (ohlcv.reset_index().groupby(groups)
                    .agg({'open_price': 'first', 'high': 'max', 
                          'low': 'min', 'close_price': 'last', 
                          'volume': 'sum', 'index': 'last'})).set_index('index')
    volume_ohlcv = volume_ohlcv[['open_price', 'high', 'low', 'close_price', 'volume']]
    volume_ohlcv.index.name='dates'
    return volume_ohlcv
In [9]:
def dollar_bars(ohlcv, threshold):
    tot = 0
    group = 0
    groups = np.empty(ohlcv.shape[0])
    idx = 0
    for row_datetime, row_data in ohlcv.iterrows():
        if(row_data['volume']!=0):
            dollars = row_data['volume'] * (row_data['high']+row_data['low'])/2
        else:
            dollars = 0
        tot += dollars
        
        #print row_datetime
        #print tot
        #print row_data['volume']
        #print row_data['high']
        #print row_data['low']
        #print "tot: %d ,dolars: %d"%(tot,dolars)
        
        if tot>threshold and abs(tot-dollars-threshold)<abs(tot-threshold):
            #print(tot)
            group += 1
            tot = dollars
        groups[idx] = group
        idx+=1
    
    #print groups
    dollar_ohlcv = (ohlcv.reset_index().groupby(groups)
                    .agg({'open_price': 'first', 'high': 'max', 
                          'low': 'min', 'close_price': 'last', 
                          'volume': 'sum', 'index': 'last'})).set_index('index')
    dollar_ohlcv = dollar_ohlcv[['open_price', 'high', 'low', 'close_price', 'volume']]
    dollar_ohlcv.index.name='dates'
    return dollar_ohlcv
In [28]:
volBars = volume_bars(spy_prices, tgt_volume)
In [11]:
dollarBars = dollar_bars(spy_prices, 3e8)
In [12]:
wc_vol = volBars.groupby(volBars.index.year*100+volBars.index.week).agg({'low':'count'})
wc_vol=wc_vol.rename(index=str, columns={"low": "count"})
wc_vol.index.name='weekID'

wc_dolar = dollarBars.groupby(dollarBars.index.year*100+dollarBars.index.week).agg({'low':'count'})
wc_dolar=wc_dolar.rename(index=str, columns={"low": "count"})
wc_dolar.index.name='weekID'
In [13]:
ax = wc_vol.plot()
wc_dolar.plot(ax=ax)
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4bc4190250>
In [14]:
returns_vol = pd.DataFrame((volBars.close_price-volBars.open_price)/volBars.close_price, columns=['returns'])
#returns_vol=returns_vol.rename(index=str, columns={0: "returns"})
returns_dollar = pd.DataFrame((dollarBars.close_price-dollarBars.open_price)/dollarBars.close_price, columns=['returns'])
#returns_dollar=returns_dollar.rename(index=str, columns={0: "returns"})
In [15]:
def autocorrSP(x,lags):
    mean=x.mean()
    var=np.var(x)
    xp=x-mean
    corr=sp.signal.correlate(xp,xp,'full')[len(x)-1:]/var/len(x)
    return corr[:len(lags)]

def autocorrFFT(x,lags):
    var=np.var(x)
    xk=np.fft.fft(x)
    xk[0]=np.complex(0,0)
    cork=xk.conjugate()*xk
    corr=np.fft.ifft(cork).real/var/len(x)
    return corr[:len(lags)]
In [16]:
lags = range(100)
rvol_corrFFT = autocorrFFT(returns_vol.returns.values,lags)
rdollar_corrFFT = autocorrFFT(returns_dollar.returns.values,lags)
In [17]:
fig = plt.figure()
ax = plt.gca()
ax.set_ylim(-0.05,0.05)
plt.plot(lags,rvol_corrFFT,label="rvol_corrFFT")
plt.plot(lags,rdollar_corrFFT,label="rdollar_corrFFT")
plt.legend()
Out[17]:
<matplotlib.legend.Legend at 0x7f4bcdd91890>
In [20]:
rvol_monthly_std = returns_vol.groupby(returns_vol.index.year*100+returns_vol.index.month).agg({'returns':'std'})
rvol_monthly_std=rvol_monthly_std.rename(index=str, columns={"returns": "monthly_std"})
rdollar_monthly_std = returns_dollar.groupby(returns_dollar.index.year*100+returns_dollar.index.month).agg({'returns':'std'})
rdollar_monthly_std=rdollar_monthly_std.rename(index=str, columns={"returns": "monthly_std"})
In [21]:
ax = rvol_monthly_std.plot(label="volume")
rdollar_monthly_std.plot(label="price",ax=ax)
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4bcd799810>
In [22]:
print "volume std: %f"%rvol_monthly_std.std()
print "dolar std: %f"%rdollar_monthly_std.std()
volume std: 0.000384
dolar std: 0.000231
In [23]:
print sp.stats.jarque_bera(returns_vol.returns.values)
print sp.stats.jarque_bera(returns_dollar.returns.values)
(10231.881081405694, 0.0)
(11631.471699091884, 0.0)

Exercise 2.2

In [302]:
def dollar_imbalance_bars(ohlcv, threshold):
    tot = 0
    group = 0
    groups = np.empty(ohlcv.shape[0])
    idx = 0
    for row_datetime, row_data in ohlcv.iterrows():
        if(row_data['volume']!=0):
            dollars = row_data['volume'] * (row_data['high']+row_data['low'])/2
        else:
            dollars = 0
        tot += dollars
        
        #print row_datetime
        #print tot
        #print row_data['volume']
        #print row_data['high']
        #print row_data['low']
        #print "tot: %d ,dolars: %d"%(tot,dolars)
        
        if tot>threshold and abs(tot-dollars-threshold)<abs(tot-threshold):
            #print(tot)
            group += 1
            tot = dollars
        groups[idx] = group
        idx+=1
    
    #print groups
    dollar_ohlcv = (ohlcv.reset_index().groupby(groups)
                    .agg({'open_price': 'first', 'high': 'max', 
                          'low': 'min', 'close_price': 'last', 
                          'volume': 'sum', 'index': 'last'})).set_index('index')
    dollar_ohlcv = dollar_ohlcv[['open_price', 'high', 'low', 'close_price', 'volume']]
    dollar_ohlcv.index.name='dates'
    return dollar_ohlcv
In [ ]: