Inspired by Grant Kiehne's example. Differences here include:
-- Michael Van Kleeck, 2015-04-16
import numpy as np
import pandas as pd
from scipy import stats
from pytz import timezone
import matplotlib.pyplot as plt
import seaborn as sns
data_cached = get_pricing(['SPY','SH'], start_date='2014-03-27', end_date='2015-03-27',
fields='volume', frequency='minute').dropna()
data = data_cached.copy(deep=True)
def day_of_year(dt): # Day of year (1-366) for a datetime
return dt.timetuple().tm_yday
def minute_of_day(dt): # Minute of day (0-1439) for a datetime
return (dt.hour * 60) + dt.minute
data['day_of_year'] = np.vectorize(day_of_year)(data.index.date)
data['minute_of_day'] = np.vectorize(minute_of_day)(data.index.tz_convert(timezone('US/Eastern')).time)
data.tail()
MINUTES_PER_MARKET_DAY = 390
volumes = data.iloc[:, 0:2]
means = volumes.apply(lambda x: pd.rolling_mean(x, window=MINUTES_PER_MARKET_DAY))
sds = volumes.apply(lambda x: pd.rolling_std(x, window=MINUTES_PER_MARKET_DAY))
zs = ((volumes - means) / sds)
data['z_diff'] = zs.iloc[:, 1] - zs.iloc[:, 0]
data.tail()
ht_map = pd.pivot_table(data,'z_diff',index=data['day_of_year'], columns=data['minute_of_day'],fill_value=0)
plt.pcolor(ht_map, cmap='coolwarm')
plt.colorbar()
plt.clim(ht_map.min().min(),ht_map.max().max())
def reduce_tick_labels(labels, n): # Keep every nth label, replace others by space
kept = labels[::n]
reduced = ['' for label in labels]
reduced[::n] = kept
return reduced
MINUTES_PER_HOUR = 60
DAYS_PER_MONTH = 30 # approximately
xticks = reduce_tick_labels(ht_map.columns, MINUTES_PER_HOUR)
yticks = reduce_tick_labels(ht_map.index, DAYS_PER_MONTH)
sns.heatmap(ht_map, linewidths=0, xticklabels=xticks, yticklabels=yticks)