Notebook

Alphalens + Quantopian | How To

In [1]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline import factors, filters, classifiers
from quantopian.pipeline.factors import CustomFactor, Returns, AverageDollarVolume, SimpleMovingAverage,VWAP
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing

import math
import datetime
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm
import seaborn as sns
import scipy.stats as stats
In [2]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

Helper functions

In [3]:
def high_volume_universe(top_liquid, min_price = None, min_volume = None):  
    """
    Computes a security universe of liquid stocks and filtering out
    hard to trade ones
    Returns
    -------
    high_volume_tradable - zipline.pipeline.filter
    """
    
    full_filter = filters.make_us_equity_universe(
        target_size=top_liquid,
        rankby=factors.AverageDollarVolume(window_length=200),
        mask=filters.default_us_equity_universe_mask(),
        groupby=classifiers.morningstar.Sector(),
        max_group_weight=0.3,
        smoothing_func=lambda f: f.downsample('month_start'),
    )
    
    if min_price > 0:
        price = SimpleMovingAverage(inputs=[USEquityPricing.close],
                                    window_length=21, mask=full_filter)
        full_filter &= (price >= min_price)
        
    if min_volume > 0:
        volume = SimpleMovingAverage(inputs=[USEquityPricing.volume],
                                     window_length=21, mask=full_filter)
        full_filter &= (volume >= min_volume)
        
    return full_filter

def run_pipeline_chunks(pipe, start_date, end_date, chunks_len = None):
    """
    Drop-in replacement for run_pipeline.
    run_pipeline fails over a very long period of time (memery usage),
    so we need to split in chunks the pipeline and concatenate the results
    """
    chunks  = []
    current = pd.Timestamp(start_date)
    end     = pd.Timestamp(end_date)
    step    = pd.Timedelta(weeks=26) if chunks_len is None else chunks_len
    
    while current <= end:
        
        current_end = current + step
        if current_end > end:
            current_end = end
        
        print 'Running pipeline:', current, ' - ', current_end
        results = run_pipeline(pipe, current.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
        chunks.append(results)
        
        # pipeline returns more days than requested (if no trading day), so get last date from the results
        current_end = results.index.get_level_values(0)[-1].tz_localize(None)
        current = current_end + pd.Timedelta(days=1)

    return pd.concat(chunks)
       
def construct_factor_history(factor_cls, start_date='2015-10-1', end_date='2016-2-1', 
                             factor_name='factor', top_liquid=500,
                             sector_column=None, filter_universe=True):
    """
    Creates a DataFrame containing daily factor values and sector codes for a liquidity 
    constrained universe. The returned DataFrame is can be used in the factor tear sheet.
    """
    if filter_universe: # this is very slow!
        ok_universe = high_volume_universe(top_liquid)
    else:
        ok_universe = AverageDollarVolume(window_length=20).top(top_liquid)
       
    factor = factor_cls(mask=ok_universe)
    sector = Sector(mask=ok_universe)    
       
    pipe = Pipeline()
    pipe.add(factor, factor_name)
    if sector_column is not None: # this is very slow too
        pipe.add(sector, sector_column)  
    pipe.set_screen(ok_universe)

    daily_factor = run_pipeline_chunks(pipe, start_date=start_date, end_date=end_date)
    #daily_factor = run_pipeline(pipe, start_date=start_date, end_date=end_date)
       
    return daily_factor.dropna()

def get_daily_price(sid_universe, start_date, end_date, extra_days_before=0, extra_days_after=0):
    """
    Creates a DataFrame containing daily percentage returns and price
    """   
    extra_days = math.ceil(extra_days_before * 365.0/252.0) + 3 # just to be sure
    start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d") - datetime.timedelta(days=extra_days)
    start_date = start_date.strftime("%Y-%m-%d")
    
    extra_days = math.ceil(extra_days_after * 365.0/252.0) + 3 # just to be sure
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d") + datetime.timedelta(days=extra_days)
    end_date = end_date.strftime("%Y-%m-%d")
    
    pricing = get_pricing(sid_universe, start_date=start_date, end_date=end_date, fields='open_price')
    
    return pricing

run_tear_sheet glues all the functions together

In [11]:
import alphalens
import alphalens.performance as perf 
import alphalens.utils as utils

def run_tear_sheet( factor,
                    factor_name     = 'factor',
                    sector_names    = None,
                    start_date      = '2015-06-30',
                    end_date        = '2016-05-30',
                    top_liquid      = 500,
                    filter_universe = False,
                    avgretplot      = (10, 50),
                    periods         = (1, 5, 10),
                    show_sector_plots = True,
                    quantiles       = 5,
                    filter_zscore   = 10):
          
    sector_column = 'sector_code' if show_sector_plots else None
    
    ## Run the Pipeline
    print 'construct factor history'
    factor = construct_factor_history(factor, start_date=start_date, end_date=end_date, 
                                      factor_name=factor_name, top_liquid=top_liquid,
                                      sector_column=sector_column, filter_universe=filter_universe)
    
    ## Get pricing
    sid_universe = list( factor.index.levels[1].unique() )
    print 'Get pricing, universe %d entries' % len(sid_universe)
    pricing = get_daily_price(sid_universe, start_date=start_date, end_date=end_date, 
                              extra_days_before=days_before, extra_days_after=days_after)
        
    ## Use Alphalens to create a factor tear sheet
    print 'alphalens'
    sectors_series = factor[sector_column] if show_sector_plots else None
    alphalens.tears.create_factor_tear_sheet(factor=factor[factor_name],
                                             prices=pricing,
                                             groupby=sectors_series,
                                             show_groupby_plots=show_sector_plots,
                                             periods=periods,
                                             quantiles=quantiles,
                                             filter_zscore=filter_zscore,
                                             groupby_labels=sector_names,
                                             long_short=True,
                                             avgretplot=avgretplot,
                                             turnover_for_all_periods=True)

Define our factor

In [16]:
#sid_universe
In [42]:
class ROE(CustomFactor):   
    inputs = [morningstar.operation_ratios.roe] 
    window_length = 1
    def compute(self, today, assets, out, close):        
        out[:] = close[-1]        

class Alpha41(CustomFactor):   
    inputs = [USEquityPricing.low, USEquityPricing.high, USEquityPricing.close] 
    window_length = 1
    def compute(self, today, assets, out, low, high, close):        
        out[:] = high[0]*low[0]
        
class p_high(CustomFactor):   
    inputs = [USEquityPricing.low, USEquityPricing.high, USEquityPricing.close] 
    window_length = 1
    def compute(self, today, assets, out, low, high, close):        
        out[:] = high[0]
        
class p_low(CustomFactor):   
    inputs = [USEquityPricing.low, USEquityPricing.high, USEquityPricing.close] 
    window_length = 1
    def compute(self, today, assets, out, low, high, close):        
        out[:] = low[0]
        
class p_close(CustomFactor):   
    inputs = [USEquityPricing.low, USEquityPricing.high, USEquityPricing.close] 
    window_length = 1
    def compute(self, today, assets, out, low, high, close):        
        out[:] = close[0]
        
        
        

Define settings

In [53]:
def factor41(mask):
    #alpha41 = Alpha41(mask=mask)
    high = p_high(mask=mask)
    low = p_low(mask=mask)
    close = p_close(mask=mask)
    vwap = VWAP(window_length=1, mask=mask)    
    #alpha41 = alpha41**.5 - vwap
    alpha41 = (low * high) ** .5 - vwap
    return alpha41

def factor42(mask):
    close = p_close(mask=mask)
    vwap = VWAP(window_length=1, mask=mask)    
    alpha42 = (vwap - close).rank() / (vwap + close).rank()
    return alpha42

#factor_name='Alpha41'

start_date  = '2016-05-01'#2005-04-30'
end_date    = '2016-05-30'
top_liquid  = 500
filter_universe = False  # very slow, filter out untradable stocks

# Cumulative average returns plot by quantile
days_before = 10
days_after  = 50
avgretplot  = (days_before, days_after) # None to avoid plotting

# alphalens specific
periods = (1, 5, 10)
show_sector_plots = False # very slow to load the sector column in pipeline
quantiles = 5
filter_zscore = 10

Run the tear sheet

In [54]:
run_tear_sheet( factor = factor42,
                factor_name  = 'Alpha42',
                sector_names = MORNINGSTAR_SECTOR_CODES,
                start_date  = start_date,
                end_date    = end_date,
                top_liquid  = top_liquid,
                filter_universe = filter_universe,
                avgretplot  = avgretplot,
                periods     = periods,
                show_sector_plots = show_sector_plots,
                quantiles = quantiles,
                filter_zscore = filter_zscore)
construct factor history
Running pipeline: 2016-05-01 00:00:00  -  2016-05-30 00:00:00
Get pricing, universe 587 entries
alphalens
Returns Analysis
1 5 10
Ann. alpha -0.222 -0.324 -0.031
beta -0.771 -0.331 0.241
Mean Period Wise Return Top Quantile (bps) -13.665 -7.569 1.147
Mean Period Wise Return Bottom Quantile (bps) 0.314 -1.115 -3.713
Mean Period Wise Spread (bps) -13.963 -6.432 4.866
Information Analysis
1 5 10
IC Mean -0.026 0.004 0.051
IC Std. 0.162 0.125 0.100
t-stat(IC) -0.735 0.150 2.365
p-value(IC) 0.471 0.882 0.028
IC Skew -0.006 -0.229 -0.532
IC Kurtosis -1.051 -0.594 0.434
Ann. IR -2.548 0.520 8.193
Turnover Analysis
1 5 10
Quantile 1 Mean Turnover 0.228 0.248 0.264
Quantile 2 Mean Turnover 0.156 0.197 0.231
Quantile 3 Mean Turnover 0.175 0.221 0.285
Quantile 4 Mean Turnover 0.199 0.252 0.279
Quantile 5 Mean Turnover 0.090 0.141 0.181
1 5 10
Mean Factor Rank Autocorrelation 0.826 0.825 0.828
<matplotlib.figure.Figure at 0x7efe23d57750>
In [ ]: