101 Alphas Project: Pipeline Factor Information Coefficent¶

This analysis computes the Spearman Rank correlation between an alpha factor and 1, 5, and 10 day forward price movements. We disagregate by sector to isolate sector specific trends.

from __future__ import division
from quantopian.pipeline import Pipeline
from quantopian.pipeline import CustomFactor
from quantopian.research import run_pipeline
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import Latest
import numpy as np
import pandas as pd
import scipy as sp
import pyfolio as pf
import matplotlib.pyplot as plt

start_date = pd.to_datetime('2014-10-1')
end_date = pd.to_datetime('2015-2-1')

Here's the alpha we are looking to analyze:

alpha_name = "alpha101"
class Alpha101(CustomFactor):
    inputs = [USEquityPricing.close, USEquityPricing.high, USEquityPricing.low]
    window_length = 2
    
    def compute(self, today, assets, out, close, high, low):
        # Using close of t-1 as the t open. Workaround for known open bug.
        out[:] = (close[-1] - close[-2]) / ((high[-1] - low[-1]) + .001)

We'll also use a five day liquidity factor to ensure we are only dealing with liquid names:

class Liquidity(CustomFactor):   
    inputs = [USEquityPricing.volume, USEquityPricing.close] 
    window_length = 5

    def compute(self, today, assets, out, volume, close): 
        out[:] = (volume * close).mean(axis=0)

Pull in sector codes for sector analysis:

class Sector(CustomFactor):
    inputs = [morningstar.asset_classification.morningstar_sector_code]
    window_length = 1
    def compute(self, today, assets, out, msc):
        out[:] = msc[-1]

alpha101 = Alpha101()
sector = Sector()
liquidity = Liquidity()

liquidity_rank = liquidity.rank(ascending=False)
# the alpha101.eq(alpha101) bit is a handy trick to filter out rows containing NaNs.
ok_universe = (1000 > liquidity_rank)  & alpha101.eq(alpha101) & sector.eq(sector)

alpha101_rank = alpha101.rank(mask=ok_universe,  ascending=False)

pipe = Pipeline()
pipe.add(alpha101, alpha_name)
pipe.add(sector, 'sector_code')
pipe.set_screen(ok_universe)

pipe.show_graph(format='png')

daily_alpha = run_pipeline(pipe, start_date=start_date, end_date=end_date)
daily_alpha = daily_alpha.reset_index().rename(columns={'level_0': 'date', 'level_1':'sid'})
daily_alpha.head()

len(equities)

1685

Now pull in pricing data and compute 1, 5, and 10 day forward price movements.

equities = daily_alpha.sid.unique()
equities = map(lambda x: x.sid, equities)
td = pd.Timedelta(days=25)
prices = get_pricing(equities, start_date=start_date, end_date=end_date+td, fields='open_price')

col_n = '%s_day_fwd_price_pct_change'

daily_alpha = daily_alpha.set_index(['date', 'sid'])
for i in [1, 5, 10]:
    delta = prices.pct_change(i).shift(-i)
    daily_alpha[col_n%i] = delta.stack()
daily_alpha = daily_alpha.reset_index()

daily_alpha.head()

Now compute the Spearman Rank correlation within each day and sector.

cn = "%s_day_IC"
src = pd.DataFrame()
src_fn = lambda x: sp.stats.spearmanr(daily_alpha.loc[x.index, alpha_name],
                                   daily_alpha.loc[x.index, x.name])[0]

src = daily_alpha.groupby(['date', 'sector_code']).agg({col_n%1: src_fn,
                                                        col_n%5: src_fn,
                                                        col_n%10: src_fn})

src = src.rename(columns={col_n%1: cn%1, col_n%5: cn%5,col_n%10: cn%10})
src = src[[cn%1, cn%5, cn%10]]
src = src.reset_index()

sector_names = {
 101: 'Basic Materials',
 102: 'Consumer Cyclical',
 103: 'Financial Services',
 104: 'Real Estate',
 205: 'Consumer Defensive',
 206: 'Healthcare',
 207: 'Utilities',
 308: 'Communication Services',
 309: 'Energy',
 310: 'Industrials',
 311: 'Technology' ,
}

for sc, cor in src.groupby('sector_code'):
    t = alpha_name + ": " + sector_names[sc] + " Information Coeficient"
    cor.drop('sector_code', axis=1).set_index('date').plot(title=t)

For Calculating Monthly IC¶

def src_std_error(rho, n):
    return (1-rho**2)/np.sqrt(n-1)

z = lambda x: x.replace(day=1)
c = daily_alpha.reset_index()['date'].apply(z)
c.index = daily_alpha.index
daily_alpha['y_m'] = c
monthly_src = pd.DataFrame()
src_fn = lambda x: sp.stats.spearmanr(daily_alpha.loc[x.index, alpha_name],
                                   daily_alpha.loc[x.index, x.name])[0]

monthly_src = daily_alpha.groupby(['y_m', 'sector_code']).agg({col_n%1: src_fn,
                                                               col_n%5: src_fn,
                                                               col_n%10: src_fn})
obs_count = daily_alpha.groupby(['y_m', 'sector_code']).count().drop(alpha_name, axis=1)
src_std_error_df = src_std_error(monthly_src, obs_count)

monthly_src = monthly_src.rename(columns={col_n%1: cn%1, col_n%5: cn%5,col_n%10: cn%10})
monthly_src = monthly_src[[cn%1, cn%5, cn%10]]
monthly_src = monthly_src.reset_index()

src_std_error_df = src_std_error_df.rename(columns={col_n%1: cn%1, col_n%5: cn%5,col_n%10: cn%10})
src_std_error_df = src_std_error_df[[cn%1, cn%5, cn%10]]
src_std_error_df = src_std_error_df.reset_index()

f, axes = plt.subplots(6,2, sharex=True, sharey=True, figsize=(20,45))
axes = axes.flatten()
i = 0
for sc, cor in monthly_src.groupby(['sector_code']):
    err = src_std_error_df[src_std_error_df.sector_code == sc].set_index('y_m')
    cor.drop('sector_code', axis=1).set_index('y_m').plot(kind='bar', 
                                                          title=sector_names[sc], 
                                                          ax=axes[i],
                                                          yerr=err)
    i+=1
fig = plt.gcf()
fig.suptitle(alpha_name + " Monthly Information Coefficient by Sector", fontsize=24, x=.5, y=.93)

<matplotlib.text.Text at 0x7f240530dbd0>

all_src = pd.DataFrame()
src_fn = lambda x: sp.stats.spearmanr(daily_alpha.loc[x.index, alpha_name],
                                      daily_alpha.loc[x.index, x.name])[0]

all_src = daily_alpha.groupby(['sector_code']).agg({col_n%1: src_fn,
                                                    col_n%5: src_fn,
                                                    col_n%10: src_fn})
obs_count = daily_alpha.groupby(['sector_code']).count().drop(alpha_name, axis=1)
src_std_error_df = src_std_error(all_src, obs_count)

all_src = all_src.rename(columns={col_n%1: cn%1, col_n%5: cn%5,col_n%10: cn%10})
all_src = all_src[[cn%1, cn%5, cn%10]]
all_src = all_src.reset_index()

src_std_error_df = src_std_error_df.rename(columns={col_n%1: cn%1, col_n%5: cn%5,col_n%10: cn%10})
src_std_error_df = src_std_error_df[[cn%1, cn%5, cn%10]]
src_std_error_df = src_std_error_df.reset_index()
all_src.sector_code = all_src.sector_code.apply(lambda x: sector_names[x])
src_std_error_df.sector_code = src_std_error_df.sector_code.apply(lambda x: sector_names[x])

t =  "{} Information Coefficient ({} - {})".format(alpha_name, 
                                                   start_date.strftime('%m/%d/%Y'),
                                                   end_date.strftime('%m/%d/%Y'))
all_src.set_index('sector_code').plot(kind='bar',
                                      title=t, 
                                      yerr=src_std_error_df.set_index('sector_code'))

<matplotlib.axes._subplots.AxesSubplot at 0x7f23ffb51790>

#pd.qcut(x, 5, labels=False) couldn't get this working... 
def quint_bucket(x):
    if x > .8:
        return 5
    elif x > .6:
        return 4
    elif x > .4:
        return 3
    elif x > .2:
        return 2
    else:
        return 1

daily_alpha = daily_alpha.sort(['date', 'sector_code']).set_index(['date', 'sector_code'])
daily_alpha['alpha_percentile'] = daily_alpha.alpha101.groupby(
            level=['date', 'sector_code']).transform(lambda x: quint_bucket(np.percentile(x)))
daily_alpha  = daily_alpha.reset_index()
daily_alpha['quint_bucket'] = daily_alpha.alpha_percentile.apply(quint_bucket)

mean_ret_by_q = daily_alpha.groupby(['sector_code', 'quint_bucket']).mean()[[col_n%1, col_n%5, col_n%10]]

f, axes = plt.subplots(6,2, sharex=False, sharey=True, figsize=(20,45))
axes = axes.flatten()
i = 0
for sc, cor in mean_ret_by_q.groupby(level='sector_code'):
    cor = cor.reset_index().drop('sector_code', axis=1).set_index('quint_bucket')
    cor.plot(kind='bar', title=sector_names[sc], ax=axes[i])
    axes[i].set_xlabel('factor quintile')
    axes[i].set_ylabel('mean price % change')
    i+=1
fig = plt.gcf()
fig.suptitle(alpha_name + ": Mean Return By Factor Quintile", fontsize=24, x=.5, y=.93)

<matplotlib.text.Text at 0x7f23fa308b10>

	date	sid	alpha101	sector_code
0	2014-10-01 00:00:00+00:00	Equity(2 [AA])	0.788382	101
1	2014-10-01 00:00:00+00:00	Equity(24 [AAPL])	0.809102	311
2	2014-10-01 00:00:00+00:00	Equity(62 [ABT])	-0.705596	206
3	2014-10-01 00:00:00+00:00	Equity(64 [ABX])	-0.996979	101
4	2014-10-01 00:00:00+00:00	Equity(67 [ADSK])	0.177936	311

	date	sid	alpha101	sector_code	1_day_fwd_price_pct_change	5_day_fwd_price_pct_change	10_day_fwd_price_pct_change	y_m
0	2014-10-01 00:00:00+00:00	Equity(2 [AA])	0.788382	101	-0.025418	-0.009919	-0.132052	2014-10-01 00:00:00+00:00
1	2014-10-01 00:00:00+00:00	Equity(24 [AAPL])	0.809102	311	-0.012327	-0.018193	-0.026046	2014-10-01 00:00:00+00:00
2	2014-10-01 00:00:00+00:00	Equity(62 [ABT])	-0.705596	206	-0.019738	-0.014507	-0.039715	2014-10-01 00:00:00+00:00
3	2014-10-01 00:00:00+00:00	Equity(64 [ABX])	-0.996979	101	0.005420	-0.058266	-0.077236	2014-10-01 00:00:00+00:00
4	2014-10-01 00:00:00+00:00	Equity(67 [ADSK])	0.177936	311	0.062699	0.056148	-0.082725	2014-10-01 00:00:00+00:00