import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
start_date = pd.datetime(2016,8,1)
end_date = pd.datetime(2018,8,23)
spy_prices = get_pricing('SPY', start_date, end_date, frequency='minute').tz_convert('US/Eastern')
spy_prices.volume.plot(logy = True,style='.')
plt.figure()
(spy_prices.open_price*spy_prices.volume).plot(logy = True,style='.')
tgt_volume = spy_prices['volume'].max()
print tgt_volume
def volume_bars(ohlcv, threshold):
tot = 0
group = 0
groups = np.empty(ohlcv.shape[0])
idx = 0
for row_datetime, row_data in ohlcv.iterrows():
tot += row_data['volume']
if tot>threshold and abs(tot-row_data['volume']-threshold)<abs(tot-threshold):
group += 1
tot = row_data['volume']
groups[idx] = group
idx+=1
volume_ohlcv = (ohlcv.reset_index().groupby(groups)
.agg({'open_price': 'first', 'high': 'max',
'low': 'min', 'close_price': 'last',
'volume': 'sum', 'index': 'last'})).set_index('index')
volume_ohlcv = volume_ohlcv[['open_price', 'high', 'low', 'close_price', 'volume']]
volume_ohlcv.index.name='dates'
return volume_ohlcv
def dollar_bars(ohlcv, threshold):
tot = 0
group = 0
groups = np.empty(ohlcv.shape[0])
idx = 0
for row_datetime, row_data in ohlcv.iterrows():
if(row_data['volume']!=0):
dollars = row_data['volume'] * (row_data['high']+row_data['low'])/2
else:
dollars = 0
tot += dollars
#print row_datetime
#print tot
#print row_data['volume']
#print row_data['high']
#print row_data['low']
#print "tot: %d ,dolars: %d"%(tot,dolars)
if tot>threshold and abs(tot-dollars-threshold)<abs(tot-threshold):
#print(tot)
group += 1
tot = dollars
groups[idx] = group
idx+=1
#print groups
dollar_ohlcv = (ohlcv.reset_index().groupby(groups)
.agg({'open_price': 'first', 'high': 'max',
'low': 'min', 'close_price': 'last',
'volume': 'sum', 'index': 'last'})).set_index('index')
dollar_ohlcv = dollar_ohlcv[['open_price', 'high', 'low', 'close_price', 'volume']]
dollar_ohlcv.index.name='dates'
return dollar_ohlcv
volBars = volume_bars(spy_prices, tgt_volume)
dollarBars = dollar_bars(spy_prices, 3e8)
wc_vol = volBars.groupby(volBars.index.year*100+volBars.index.week).agg({'low':'count'})
wc_vol=wc_vol.rename(index=str, columns={"low": "count"})
wc_vol.index.name='weekID'
wc_dolar = dollarBars.groupby(dollarBars.index.year*100+dollarBars.index.week).agg({'low':'count'})
wc_dolar=wc_dolar.rename(index=str, columns={"low": "count"})
wc_dolar.index.name='weekID'
ax = wc_vol.plot()
wc_dolar.plot(ax=ax)
returns_vol = pd.DataFrame((volBars.close_price-volBars.open_price)/volBars.close_price, columns=['returns'])
#returns_vol=returns_vol.rename(index=str, columns={0: "returns"})
returns_dollar = pd.DataFrame((dollarBars.close_price-dollarBars.open_price)/dollarBars.close_price, columns=['returns'])
#returns_dollar=returns_dollar.rename(index=str, columns={0: "returns"})
def autocorrSP(x,lags):
mean=x.mean()
var=np.var(x)
xp=x-mean
corr=sp.signal.correlate(xp,xp,'full')[len(x)-1:]/var/len(x)
return corr[:len(lags)]
def autocorrFFT(x,lags):
var=np.var(x)
xk=np.fft.fft(x)
xk[0]=np.complex(0,0)
cork=xk.conjugate()*xk
corr=np.fft.ifft(cork).real/var/len(x)
return corr[:len(lags)]
lags = range(100)
rvol_corrFFT = autocorrFFT(returns_vol.returns.values,lags)
rdollar_corrFFT = autocorrFFT(returns_dollar.returns.values,lags)
fig = plt.figure()
ax = plt.gca()
ax.set_ylim(-0.05,0.05)
plt.plot(lags,rvol_corrFFT,label="rvol_corrFFT")
plt.plot(lags,rdollar_corrFFT,label="rdollar_corrFFT")
plt.legend()
rvol_monthly_std = returns_vol.groupby(returns_vol.index.year*100+returns_vol.index.month).agg({'returns':'std'})
rvol_monthly_std=rvol_monthly_std.rename(index=str, columns={"returns": "monthly_std"})
rdollar_monthly_std = returns_dollar.groupby(returns_dollar.index.year*100+returns_dollar.index.month).agg({'returns':'std'})
rdollar_monthly_std=rdollar_monthly_std.rename(index=str, columns={"returns": "monthly_std"})
ax = rvol_monthly_std.plot(label="volume")
rdollar_monthly_std.plot(label="price",ax=ax)
print "volume std: %f"%rvol_monthly_std.std()
print "dolar std: %f"%rdollar_monthly_std.std()
print sp.stats.jarque_bera(returns_vol.returns.values)
print sp.stats.jarque_bera(returns_dollar.returns.values)
def dollar_imbalance_bars(ohlcv, threshold):
tot = 0
group = 0
groups = np.empty(ohlcv.shape[0])
idx = 0
for row_datetime, row_data in ohlcv.iterrows():
if(row_data['volume']!=0):
dollars = row_data['volume'] * (row_data['high']+row_data['low'])/2
else:
dollars = 0
tot += dollars
#print row_datetime
#print tot
#print row_data['volume']
#print row_data['high']
#print row_data['low']
#print "tot: %d ,dolars: %d"%(tot,dolars)
if tot>threshold and abs(tot-dollars-threshold)<abs(tot-threshold):
#print(tot)
group += 1
tot = dollars
groups[idx] = group
idx+=1
#print groups
dollar_ohlcv = (ohlcv.reset_index().groupby(groups)
.agg({'open_price': 'first', 'high': 'max',
'low': 'min', 'close_price': 'last',
'volume': 'sum', 'index': 'last'})).set_index('index')
dollar_ohlcv = dollar_ohlcv[['open_price', 'high', 'low', 'close_price', 'volume']]
dollar_ohlcv.index.name='dates'
return dollar_ohlcv