Notebook

Heatmap Example

Inspired by Grant Kiehne's example. Differences here include:

  • Vectorized Z calculations
  • Seaborn heatmap with day_of_year and minute_of_day

-- Michael Van Kleeck, 2015-04-16

In [193]:
import numpy as np
import pandas as pd
from scipy import stats
from pytz import timezone
import matplotlib.pyplot as plt
import seaborn as sns
In [194]:
data_cached = get_pricing(['SPY','SH'], start_date='2014-03-27', end_date='2015-03-27',
                          fields='volume', frequency='minute').dropna()
In [195]:
data = data_cached.copy(deep=True)
In [196]:
def day_of_year(dt):   # Day of year (1-366) for a datetime
    return dt.timetuple().tm_yday

def minute_of_day(dt): # Minute of day (0-1439) for a datetime
    return (dt.hour * 60) + dt.minute

data['day_of_year'] = np.vectorize(day_of_year)(data.index.date)
data['minute_of_day'] = np.vectorize(minute_of_day)(data.index.tz_convert(timezone('US/Eastern')).time)

data.tail()
Out[196]:
Security(8554 [SPY]) Security(32268 [SH]) day_of_year minute_of_day
2015-03-27 19:56:00+00:00 1637914 29900 86 956
2015-03-27 19:57:00+00:00 635225 25600 86 957
2015-03-27 19:58:00+00:00 922990 33090 86 958
2015-03-27 19:59:00+00:00 1840869 31764 86 959
2015-03-27 20:00:00+00:00 3394906 120455 86 960
In [197]:
MINUTES_PER_MARKET_DAY = 390
volumes = data.iloc[:, 0:2]
means = volumes.apply(lambda x: pd.rolling_mean(x, window=MINUTES_PER_MARKET_DAY))
sds = volumes.apply(lambda x: pd.rolling_std(x, window=MINUTES_PER_MARKET_DAY))
zs = ((volumes - means) / sds)
data['z_diff'] = zs.iloc[:, 1] - zs.iloc[:, 0]
data.tail()
Out[197]:
Security(8554 [SPY]) Security(32268 [SH]) day_of_year minute_of_day z_diff
2015-03-27 19:56:00+00:00 1637914 29900 86 956 -4.596030
2015-03-27 19:57:00+00:00 635225 25600 86 957 -0.448697
2015-03-27 19:58:00+00:00 922990 33090 86 958 -1.211515
2015-03-27 19:59:00+00:00 1840869 31764 86 959 -4.897227
2015-03-27 20:00:00+00:00 3394906 120455 86 960 -4.060298
In [198]:
ht_map = pd.pivot_table(data,'z_diff',index=data['day_of_year'], columns=data['minute_of_day'],fill_value=0)
plt.pcolor(ht_map, cmap='coolwarm')
plt.colorbar()
plt.clim(ht_map.min().min(),ht_map.max().max())
In [199]:
def reduce_tick_labels(labels, n):  # Keep every nth label, replace others by space
    kept = labels[::n]
    reduced = ['' for label in labels]
    reduced[::n] = kept
    return reduced

MINUTES_PER_HOUR = 60
DAYS_PER_MONTH = 30  # approximately
xticks = reduce_tick_labels(ht_map.columns, MINUTES_PER_HOUR)
yticks = reduce_tick_labels(ht_map.index, DAYS_PER_MONTH)

sns.heatmap(ht_map, linewidths=0, xticklabels=xticks, yticklabels=yticks)
Out[199]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f2024d7fa90>