International Factor Research - Alphalens Example¶

from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data import EquityPricing, factset
from quantopian.pipeline.factors import Returns, SimpleMovingAverage, AverageDollarVolume
from quantopian.pipeline.domain import (
    AT_EQUITIES, # Austria
    AU_EQUITIES, # Australia
    BE_EQUITIES, # Belgium
    CA_EQUITIES, # Canada
    CH_EQUITIES, # Switzerland
    CN_EQUITIES, # China
    DE_EQUITIES, # Germany
    DK_EQUITIES, # Denmark
    ES_EQUITIES, # Spain
    FI_EQUITIES, # Finland
    FR_EQUITIES, # France
    GB_EQUITIES, # Great Britain
    HK_EQUITIES, # Hong Kong
    IE_EQUITIES, # Ireland
    IN_EQUITIES, # India
    IT_EQUITIES, # Italy
    JP_EQUITIES, # Japan
    NL_EQUITIES, # Netherlands
    NO_EQUITIES, # Norway
    NZ_EQUITIES, # New Zealand
    PT_EQUITIES, # Portugal
    SE_EQUITIES, # Sweden
    SG_EQUITIES, # Singapore
    US_EQUITIES, # United States
)
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import Q500US
# from quantopian.pipeline.classifiers.fundamentals import Sector
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# import talib as ta

The below helper function makes it easier to get Alphalens-formatted factor and returns data given a pipeline factor, a domain, and date bounds.

def evaluate_factor(factor, 
                    domain, 
                    start_date, 
                    end_date,
                    factor_screen=None,
                    quantiles=5,
                    returns_lengths=(1, 5, 10),
                    session = 'Overnight',
                    chunksize = None
                   ):
    """Analyze a Pipeline Factor using Alphalens.
    
    Parameters
    ----------
    factor : quantopian.pipeline.factors.Factor
        Factor producing scores to be evaluated.
    domain : quantopian.pipeline.domain.Domain
        Domain on which the factor should be evaluated.
    start_date : str or pd.Timestamp
        Start date for evaluation period.
    end_date : str or pd.Timestamp
        End date for evaluation period.
    standardize : 
    factor_screen : quantopian.pipeline.filters.Filter, optional
        Filter defining which assets ``factor`` should be evaluated on.
        Default is ``factor.notnull()``.
    quantiles : int, optional
        Number of buckets to use for quantile groups. Default is 5
    returns_lengths : sequence[int]
        Forward-returns horizons to use when evaluating ``factor``. 
        Default is 1-day, 5-day, and 10-day returns.
    session: str
        "Overnight", "Intraday", "Daily"
        
    Returns
    -------
    factor_data : pd.DataFrame
        A (date, asset)-indexed DataFrame with the following columns:
            'factor': float64
                Values produced by ``factor``.
            'factor_quantiles': int64
                Daily quantile label for each
    """
    calendar = domain.calendar

    # Roll input dates to the next trading session.
    start_date = calendar.minute_to_session_label(pd.Timestamp(start_date, tz='UTC'))
    end_date = calendar.minute_to_session_label(pd.Timestamp(end_date, tz='UTC'))
    
    if factor_screen is None:
        factor_screen = factor.notnull()
        
        
    # Run pipeline to get factor values and quantiles.
    factor_pipe = Pipeline(
        {'factor': factor, 
         'factor_quantile': factor.quantiles(quantiles, mask=factor_screen)},
        screen=factor_screen,
        domain=domain,
    )
    # Put chunksize ~252-504 if you run into memory problems 
    factor_results = run_pipeline(factor_pipe, start_date, end_date, chunksize=chunksize)
    
    class Daily(CustomFactor):  
        inputs = [EquityPricing.close]  

        def compute(self, today, assets, out, close):  
            out[:] = close[-1] / close[0] - 1
            
    class Overnight(CustomFactor):  
        inputs = [EquityPricing.close, EquityPricing.open]  

        def compute(self, today, assets, out, close, open):  
            out[:] = np.cumprod(open[1:] / close[:-1], axis=0)[-1] - 1
            
    class Intraday(CustomFactor):  
        inputs = [EquityPricing.close, EquityPricing.open]  

        def compute(self, today, assets, out, close, open):  
            out[:] = np.cumprod(close / open, axis=0)[-1] - 1
    
    column_order = []
    returns_cols = {}
    for length in returns_lengths:
        colname = '{}D'.format(length)
        column_order.append(colname)
        
        # Add 1 because "1-day" returns needs 2 price observations.
        # Not relevant for Intraday.
        # Winsorize returns to handle data gliches
        # Example: get_pricing("BRK_A", start_date='2014-11-03', end_date='2014-11-08')
        # 0.002 * ~500 (companies per day) = 1 (from each side)
        if session == 'Overnight':
            returns_cols[colname] = Overnight(window_length=length + 1).winsorize(.002, .998)
        elif session == "Intraday":
            returns_cols[colname] = Intraday(window_length=length).winsorize(.002, .998)
        elif session == "Daily":
            returns_cols[colname] = Daily(window_length=length + 1).winsorize(.002, .998)
        else:
            raise SystemExit("session should be one of 'Overnight', 'Intraday', 'Daily'")
        

    returns_pipe = Pipeline(returns_cols, domain=domain)
    
    # Compute returns for the period after the factor pipeline, then 
    # shift the results back to align with our factor values.
    returns_start_date = start_date
    returns_end_date = end_date + domain.calendar.day * max(returns_lengths)
    raw_returns = run_pipeline(returns_pipe, returns_start_date, returns_end_date, chunksize=252)
    
    shifted_returns = {}
    for name, length in zip(column_order, returns_lengths):
        # Shift 1-day returns back by a day, 5-day returns back by 5 days, etc.
        raw = raw_returns[name]
        shifted_returns[name] = backshift_returns_series(raw, length)
        
    # Merge backshifted returns into a single frame indexed like our desired output.
    merged_returns = pd.DataFrame(
        data=shifted_returns, 
        index=factor_results.index, 
        columns=column_order,
    )
    
    # Concat factor results and forward returns column-wise.
    merged = pd.concat([factor_results, merged_returns], axis=1)
    merged.index.set_names(['date', 'asset'], inplace=True)

    return merged.dropna(how='any')


def backshift_returns_series(series, N):
    """Shift a multi-indexed series backwards by N observations in the first level.
    
    This can be used to convert backward-looking returns into a forward-returns series.
    """
    ix = series.index
    dates, sids = ix.levels
    date_labels, sid_labels = map(np.array, ix.labels)

    # Output date labels will contain all but the last N dates.
    new_dates = dates[:-N]

    # Output data will remove the first M rows, where M is the index of the
    # last record with one of the first N dates.
    cutoff = date_labels.searchsorted(N)
    new_date_labels = date_labels[cutoff:] - N
    new_sid_labels = sid_labels[cutoff:]
    new_values = series.values[cutoff:]

    assert new_date_labels[0] == 0

    new_index = pd.MultiIndex(
        levels=[new_dates, sids],
        labels=[new_date_labels, new_sid_labels],
        sortorder=1,
        names=ix.names,
    )

    return pd.Series(data=new_values, index=new_index)

# Low Volatility factor
class MyFactor (CustomFactor):  
  
        inputs = [Returns(window_length=2)]  
        window_length=252

        def compute(self, today, assets, out, returns):  
            out[:] = -np.nanstd(returns, axis=0)

# Yield Factor
from quantopian.pipeline.data import morningstar

class Yield(CustomFactor):  
    inputs = [morningstar.valuation_ratios.total_yield]  
    window_length = 1  
    def compute(self, today, assets, out, syield):  
        out[:] =  syield[-1]

Define your factors and filters here:

Yield1= Yield().zscore().winsorize(.005, .995)
volatility_fact=MyFactor().zscore().winsorize(.005, .995)




my_factor = volatility_fact + Yield1

# Create a volume filter that filters for stocks in the top 10% companies based on Average Dollar Volume.
avg_dollar_vol = AverageDollarVolume(window_length = 63)
volume_filter = avg_dollar_vol.percentile_between(90, 100, mask=(avg_dollar_vol > 0))

# Call evaluate_factor on your factor to get Alphalens-formatted data.
al_data = evaluate_factor(
    factor = my_factor, 
    domain = US_EQUITIES, 
    start_date = '2010-01-01', 
    end_date = '2017-11-06', 
    factor_screen = volume_filter & Q500US(), # Remove Q500US() if using non-US market
    session = "Intraday", # Can be "Overnight", "Intraday", "Daily"
    quantiles = 5,
    returns_lengths = (1, 5, 10),
    chunksize = None # Put chunksize ~252-504 if you run into memory problems
)

/usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.py:1202: RuntimeWarning: Degrees of freedom <= 0 for slice.
  warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning)

Results index reference:

t(0) = index of results

Overnight_Returns (1D): From t(-1) close to t(0) open
Intraday_Returns (1D): From t(0) open to t(0) close
Daily Returns (1D): From t(-1) close to t(0) close
Factors should be as of Close of t(-1)

# Import Alphalens and run our factor data through a tear sheet.
from alphalens.tears import create_full_tear_sheet

create_full_tear_sheet(al_data)

Quantiles Statistics

Returns Analysis

/usr/local/lib/python2.7/dist-packages/alphalens/tears.py:258: UserWarning: 'freq' not set in factor_data index: assuming business day
  UserWarning

<matplotlib.figure.Figure at 0x7f90a091a1d0>

Information Analysis


ValueErrorTraceback (most recent call last)
<ipython-input-7-3dba5307e48e> in <module>()
      2 from alphalens.tears import create_full_tear_sheet
      3 
----> 4 create_full_tear_sheet(al_data)

/usr/local/lib/python2.7/dist-packages/alphalens/plotting.pyc in call_w_context(*args, **kwargs)
     43             with plotting_context(), axes_style(), color_palette:
     44                 sns.despine(left=True)
---> 45                 return func(*args, **kwargs)
     46         else:
     47             return func(*args, **kwargs)

/usr/local/lib/python2.7/dist-packages/alphalens/tears.pyc in create_full_tear_sheet(factor_data, long_short, group_neutral, by_group)
    488                                   by_group,
    489                                   set_context=False)
--> 490     create_turnover_tear_sheet(factor_data, set_context=False)
    491 
    492 

/usr/local/lib/python2.7/dist-packages/alphalens/plotting.pyc in call_w_context(*args, **kwargs)
     45                 return func(*args, **kwargs)
     46         else:
---> 47             return func(*args, **kwargs)
     48     return call_w_context
     49 

/usr/local/lib/python2.7/dist-packages/alphalens/tears.pyc in create_turnover_tear_sheet(factor_data, turnover_periods)
    415                        for q in range(1, int(quantile_factor.max()) + 1)],
    416                       axis=1)
--> 417             for p in turnover_periods}
    418 
    419     autocorrelation = pd.concat(

/usr/local/lib/python2.7/dist-packages/alphalens/tears.pyc in <dictcomp>((p,))
    415                        for q in range(1, int(quantile_factor.max()) + 1)],
    416                       axis=1)
--> 417             for p in turnover_periods}
    418 
    419     autocorrelation = pd.concat(

/usr/local/lib/python2.7/dist-packages/alphalens/performance.pyc in quantile_turnover(quantile_factor, quantile, period)
    738         shifted_idx = utils.add_custom_calendar_timedelta(
    739                 quant_name_sets.index, -pd.Timedelta(period),
--> 740                 quantile_factor.index.levels[0].freq)
    741         name_shifted = quant_name_sets.reindex(shifted_idx)
    742         name_shifted.index = quant_name_sets.index

/usr/local/lib/python2.7/dist-packages/alphalens/utils.pyc in add_custom_calendar_timedelta(input, timedelta, freq)
    915     """
    916     if not isinstance(freq, (Day, BusinessDay, CustomBusinessDay)):
--> 917         raise ValueError("freq must be Day, BDay or CustomBusinessDay")
    918     days = timedelta.components.days
    919     offset = timedelta - pd.Timedelta(days=days)

ValueError: freq must be Day, BDay or CustomBusinessDay

al_data

Plot Number of companies per day

pyfolio analysis:

from alphalens.performance import create_pyfolio_input

import alphalens
import pyfolio

pf_returns, pf_positions, pf_benchmark = \
    create_pyfolio_input(al_data,
                         period='1D',
                         capital=1000000,
                         long_short=True,
                         group_neutral=False,
                         equal_weight=False, # Equal weight vs weight based on alpha factor
                         quantiles=[0,4], # Choose the "best" quantiles to trade based on your analysis above
                         groups=None,
                         benchmark_period='1D')

/usr/local/lib/python2.7/dist-packages/alphalens/performance.py:394: UserWarning: 'freq' not set, using business day calendar
  UserWarning)
/usr/local/lib/python2.7/dist-packages/alphalens/performance.py:541: UserWarning: 'freq' not set, using business day calendar
  UserWarning)

from pyfolio.tears import create_full_tear_sheet

create_full_tear_sheet(pf_returns,
                       positions=pf_positions,
                       benchmark_rets=pf_benchmark,
                       round_trips=True)

/usr/local/lib/python2.7/dist-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)

	min	max	mean	std	count	count %
factor_quantile
0	-5.009287	0.111819	-0.509257	0.318384	189284	20.063173
1	-0.482168	0.217991	-0.129840	0.152623	188157	19.943717
2	-0.196572	0.626737	0.166137	0.165479	188340	19.963114
3	-0.037343	1.128047	0.462371	0.234858	188399	19.969367
4	0.049178	6.169489	1.167762	0.731978	189260	20.060629

	1D	5D	10D
Ann. alpha	0.097	0.103	0.103
beta	-0.241	-0.246	-0.235
Mean Period Wise Return Top Quantile (bps)	2.516	2.618	2.585
Mean Period Wise Return Bottom Quantile (bps)	-5.486	-5.697	-5.736
Mean Period Wise Spread (bps)	8.002	8.375	8.383

	1D	5D	10D
IC Mean	0.024	0.041	0.058
IC Std.	0.168	0.166	0.165
Risk-Adjusted IC	0.143	0.249	0.348
t-stat(IC)	6.336	11.065	15.486
p-value(IC)	0.000	0.000	0.000
IC Skew	0.123	0.226	0.186
IC Kurtosis	0.359	0.328	0.249

	Backtest
Start date	2010-01-05
End date	2017-11-07
Total months	136
Annual return	7.7%
Cumulative returns	131.7%
Annual volatility	5.2%
Sharpe ratio	1.45
Calmar ratio	1.10
Stability	0.97
Max drawdown	-7.0%
Omega ratio	1.35
Sortino ratio	2.29
Skew	0.31
Kurtosis	3.59
Tail ratio	1.19
Daily value at risk	-0.6%
Gross leverage	0.97
Alpha	0.09
Beta	-0.30

Worst drawdown periods	Net drawdown in %	Peak date	Valley date	Recovery date	Duration
0	6.97	2014-12-09	2015-04-16	2015-09-29	211
1	5.71	2010-02-10	2010-04-26	2010-05-21	73
2	5.58	2011-10-04	2011-11-07	2011-12-14	52
3	4.07	2017-08-13	2017-10-04	NaT	NaN
4	4.05	2011-12-20	2012-02-07	2012-04-05	78

Stress Events	mean	min	max
US downgrade/European Debt Crisis	0.11%	-1.31%	1.79%
Fukushima	0.06%	-0.59%	0.65%
EZB IR Event	0.01%	-1.06%	0.87%
Flash Crash	0.31%	-0.37%	1.01%
Apr14	0.13%	-0.74%	1.37%
Oct14	0.07%	-1.14%	0.96%
Fall2015	0.07%	-0.84%	1.23%
Recovery	0.04%	-1.66%	1.79%
New Normal	0.03%	-1.35%	1.37%

Top 10 long positions of all time	max
HRB	3.33%
INCY	3.22%
CRUS	3.11%
MSI	2.99%
TPX	2.97%
FTR	2.88%
STX	2.71%
FIS	2.67%
HLF	2.61%
AAL	2.57%

Top 10 short positions of all time	max
ICPT	-5.57%
PLUG	-4.42%
SM	-4.14%
CLF	-3.42%
ACHN	-3.26%
ARIA	-3.12%
AA	-2.90%
TSRO	-2.84%
WPX	-2.70%
OAS	-2.68%

Top 10 positions of all time	max
ICPT	5.57%
PLUG	4.42%
SM	4.14%
CLF	3.42%
HRB	3.33%
ACHN	3.26%
INCY	3.22%
ARIA	3.12%
CRUS	3.11%
MSI	2.99%

All positions ever held	max
ICPT	5.57%
PLUG	4.42%
SM	4.14%
CLF	3.42%
HRB	3.33%
ACHN	3.26%
INCY	3.22%
ARIA	3.12%
CRUS	3.11%
MSI	2.99%
TPX	2.97%
AA	2.90%
FTR	2.88%
TSRO	2.84%
STX	2.71%
WPX	2.70%
OAS	2.68%
FIS	2.67%
HLF	2.61%
AAL	2.57%
NLY	2.57%
LOW	2.53%
ADT	2.40%
ATVI	2.40%
TRV	2.36%
YUM	2.35%
OCN	2.31%
SLM	2.31%
LBTY_A	2.30%
SRPT	2.28%
HSH	2.25%
PE	2.23%
RSH	2.19%
SWY	2.17%
AKS	2.12%
ANTM	2.12%
NOV	2.11%
NRF	2.11%
ESI	2.09%
SIG	2.08%