
Alphalens - Open Source Factor Analysis¶

In [1]:
import numpy as np
import pandas as pd
import alphalens
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from import USEquityPricing
from import morningstar
from quantopian.pipeline.factors import CustomFactor, Latest, Returns, RSI, EWMA as ewma, SimpleMovingAverage
from quantopian.pipeline.filters import Q1500US

Define the Factors¶

In [2]:
#Create the classes for many rav variables and factors

class LowVol(CustomFactor):
    inputs = [Returns(window_length=2)]
    window_length = 25
    def compute(self, today, assets, out, close):
        out[:] = -np.nanstd(close, axis=0)

class growth(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.revenue_growth]
    def compute(self, today, assets, out, growth):
        out[:] = growth
class sus_growth(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.sustainable_growth_rate]
    def compute(self, today, assets, out, sustainable_growth_rate):
        out[:] = sustainable_growth_rate

class turns(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.assets_turnover]
    def compute(self, today, assets, out, assets_turnover):
        out[:] = assets_turnover

class roic(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.roic]
    def compute(self, today, assets, out, roic):
        out[:] = roic

class margins(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.ebitda_margin]
    def compute(self, today, assets, out, ebitda_margin):
        out[:] = ebitda_margin

class peg_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.peg_ratio]
    def compute(self, today, assets, out, peg_ratio):
        out[:] = np.where(peg_ratio <= 0, 0, 1/peg_ratio)
class pe_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.pe_ratio]
    def compute(self, today, assets, out, pe_ratio):
        out[:] = np.where(pe_ratio <= 0, 0, 1/pe_ratio)

class pb_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.pb_ratio]
    def compute(self, today, assets, out, pb_ratio):
        out[:] = np.where(pb_ratio <= 0, 0, 1/pb_ratio)

class total_yield(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.total_yield]
    def compute(self, today, assets, out, total_yield):
        out[:] = total_yield
class ev_ebitda(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.ev_to_ebitda]
    def compute(self, today, assets, out, ev_to_ebitda):
        out[:] = np.where(ev_to_ebitda <= 0, 0, 1/ev_to_ebitda)

class liquidity(CustomFactor):   
    inputs = [USEquityPricing.volume, USEquityPricing.close] 
    window_length = 21

    def compute(self, today, assets, out, volume, close): 
        out[:] = (volume * close).mean(axis=0)
class sector(CustomFactor):
    inputs = [morningstar.asset_classification.morningstar_sector_code]
    window_length = 1
    def compute(self, today, assets, out, morningstar_sector_code):
        out[:] = morningstar_sector_code[-1]
class AvgDailyDollarVolumeTraded(CustomFactor):
    inputs = [USEquityPricing.close, USEquityPricing.volume]
    window_length = 20
    def compute(self, today, assets, out, close_price, volume):
        out[:] = np.mean(close_price * volume, axis=0)

class PriceReturn(CustomFactor):  
    inputs = [USEquityPricing.close]
    window_length = 2
    def compute(self, today, assets, out, close):
        out[:] = close[-1]/close[0]

Set Universe¶

In [3]:
universe = Q1500US()

Create the Pipeline¶

In [4]:
pipe = Pipeline(
        #'LowVol' : LowVol(mask=universe),
        #'ROIC': roic(mask=universe),
        'PE': pe_ratio(mask=universe),
        #'Growth': growth(mask=universe),
        #'SusGrowth': sus_growth(mask=universe),
        #'Turns': turns(mask=universe),
        #'Margins': margins(mask=universe),
        #'PEG': peg_ratio(mask=universe),
        #'PB': pb_ratio(mask=universe),
        #'TotalYield': total_yield(mask=universe),
        'EV_EBITDA': ev_ebitda(mask=universe),
        'PriceReturn': PriceReturn(mask=universe),
        'Sector': sector(mask=universe)

Run the Pipeline¶

In [5]:
results = run_pipeline(pipe, '2012-06-30', '2016-06-30')
In [6]:
EV_EBITDA PriceReturn Sector
2012-07-02 00:00:00+00:00 Equity(2 [ARNC]) 0.067388 1.028235 101.0
Equity(24 [AAPL]) 0.099307 1.026795 311.0
Equity(52 [ABM]) 0.121308 0.985880 310.0
Equity(53 [ABMD]) 0.008095 1.041534 206.0
Equity(62 [ABT]) 0.171444 1.024622 206.0
In [8]:
#Develop Factors for Testing
factor_name = 'EV_EBITDA'

results['factor'] = results.groupby([results.index.get_level_values(0),'Sector'])[factor_name].apply(lambda x: x.fillna(x.median()))

zscore = lambda x: (x - x.mean()) / x.std()
results['factor_score'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor'].transform(zscore)

#Reset Factor to mean where abs(factor_score) > 3
results['factor_adj'] = np.where(abs(results['factor_score']) > 3,np.nan,results['factor'])
results['factor_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].apply(lambda x: x.fillna(x.median()))
results['factor_score_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].transform(zscore)

#Look ahead forward return
results['forward_return_1'] = results.groupby(results.index.get_level_values(1))['PriceReturn'].apply(lambda x: x.shift(-1))

Get pricing¶

In [12]:
assets = results.index.levels[1].unique()
# Make sure to extend the pricing a bit to get a full set of returns.
pricing = get_pricing(assets, start_date='2012-06-30', end_date='2016-07-31', fields='open_price')
In [21]:
EV_EBITDA PriceReturn Sector factor factor_score factor_adj factor_score_adj forward_return_1
2012-07-02 00:00:00+00:00 Equity(2 [ARNC]) 0.067388 1.028235 101.0 0.067388 -0.664975 0.067388 -0.664975 0.987414
Equity(24 [AAPL]) 0.099307 1.026795 311.0 0.099307 -0.090975 0.099307 -0.032519 1.014914
Equity(52 [ABM]) 0.121308 0.985880 310.0 0.121308 -0.082475 0.121308 -0.021833 0.992012
Equity(53 [ABMD]) 0.008095 1.041534 206.0 0.008095 -1.035993 0.008095 -1.186339 1.028046
Equity(62 [ABT]) 0.171444 1.024622 206.0 0.171444 0.901986 0.171444 1.221839 1.002172

Build Tearsheet¶

AlphaLens builds a single factor tearsheet.

In [20]:
#First, let's look at our standard factor
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['EV_EBITDA'],

Quantiles Statistics
min max mean std count count %
1 0.115815 1.008523 0.969218 0.028089 150204 10.033386
2 0.919959 1.016150 0.987489 0.010797 149678 9.998250
3 0.933965 1.020658 0.992675 0.009579 149602 9.993173
4 0.944708 1.025122 0.996145 0.009031 149720 10.001055
5 0.951937 1.028907 0.998994 0.008774 149739 10.002325
6 0.959253 1.033234 1.001705 0.008692 149476 9.984757
7 0.964623 1.038395 1.004591 0.008810 149394 9.979279
8 0.968305 1.044556 1.008080 0.009186 149654 9.996647
9 0.973426 1.056765 1.013330 0.010129 149513 9.987228
10 0.981000 8.983420 1.032713 0.039007 150062 10.023900
Returns Analysis
1 5 10
Ann. alpha 235.079 1.963 0.734
beta -0.058 -0.037 -0.028
Mean Period Wise Return Top Quantile (bps) 258.031 247.757 245.605
Mean Period Wise Return Bottom Quantile (bps) -252.796 -254.882 -262.501
Mean Period Wise Spread (bps) 510.936 100.644 50.926
Information Analysis
1 5 10
IC Mean 0.822 0.339 0.236
IC Std. 0.059 0.104 0.107
t-stat(IC) 437.814 103.106 70.102
p-value(IC) 0.000 0.000 0.000
IC Skew -3.815 -0.466 -0.320
IC Kurtosis 40.775 1.836 1.058
Ann. IR 219.233 51.630 35.103
Turnover Analysis
1 5 10
Quantile 1 Mean Turnover 0.825 0.836 0.840
Quantile 2 Mean Turnover 0.896 0.896 0.895
Quantile 3 Mean Turnover 0.896 0.897 0.897
Quantile 4 Mean Turnover 0.889 0.892 0.893
Quantile 5 Mean Turnover 0.881 0.887 0.888
Quantile 6 Mean Turnover 0.884 0.887 0.888
Quantile 7 Mean Turnover 0.890 0.891 0.891
Quantile 8 Mean Turnover 0.896 0.896 0.897
Quantile 9 Mean Turnover 0.896 0.896 0.898
Quantile 10 Mean Turnover 0.841 0.852 0.854
1 5 10
Mean Factor Rank Autocorrelation -0.008 0.001 0.005
<matplotlib.figure.Figure at 0x7f2e38a96450>
In [ ]:
#We can improve this factor a bit simply by removing outliers.
#Lets use the z-score adjusted factor we built earlier
#This can be a pattern for how we can quickly iterate and improve factors
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['factor_score_adj'],

In [23]:
#But what do good charts look like?
#Let's use the look ahead or forward return. The 1 period charts should come out looking really good. (Too good)
#Some things (like the distribution charts) aren't really useful and turnover is really high.
#However, this shows what the charts could look like.
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['forward_return_1'],

Quantiles Statistics
min max mean std count count %
1 0.115815 1.008523 0.969218 0.028089 150204 10.033386
2 0.919959 1.016150 0.987489 0.010797 149678 9.998250
3 0.933965 1.020658 0.992675 0.009579 149602 9.993173
4 0.944708 1.025122 0.996145 0.009031 149720 10.001055
5 0.951937 1.028907 0.998994 0.008774 149739 10.002325
6 0.959253 1.033234 1.001705 0.008692 149476 9.984757
7 0.964623 1.038395 1.004591 0.008810 149394 9.979279
8 0.968305 1.044556 1.008080 0.009186 149654 9.996647
9 0.973426 1.056765 1.013330 0.010129 149513 9.987228
10 0.981000 8.983420 1.032713 0.039007 150062 10.023900
Returns Analysis
1 5 10
Ann. alpha 235.079 1.963 0.734
beta -0.058 -0.037 -0.028
Mean Period Wise Return Top Quantile (bps) 258.031 247.757 245.605
Mean Period Wise Return Bottom Quantile (bps) -252.796 -254.882 -262.501
Mean Period Wise Spread (bps) 510.936 100.644 50.926
Information Analysis
1 5 10
IC Mean 0.822 0.339 0.236
IC Std. 0.059 0.104 0.107
t-stat(IC) 437.814 103.106 70.102
p-value(IC) 0.000 0.000 0.000
IC Skew -3.815 -0.466 -0.320
IC Kurtosis 40.775 1.836 1.058
Ann. IR 219.233 51.630 35.103
Turnover Analysis
1 5 10
Quantile 1 Mean Turnover 0.825 0.836 0.840
Quantile 2 Mean Turnover 0.896 0.896 0.895
Quantile 3 Mean Turnover 0.896 0.897 0.897
Quantile 4 Mean Turnover 0.889 0.892 0.893
Quantile 5 Mean Turnover 0.881 0.887 0.888
Quantile 6 Mean Turnover 0.884 0.887 0.888
Quantile 7 Mean Turnover 0.890 0.891 0.891
Quantile 8 Mean Turnover 0.896 0.896 0.897
Quantile 9 Mean Turnover 0.896 0.896 0.898
Quantile 10 Mean Turnover 0.841 0.852 0.854
1 5 10
Mean Factor Rank Autocorrelation -0.008 0.001 0.005
<matplotlib.figure.Figure at 0x7f2e3ca9f210>
In [24]:
#We can also build multiple datasets to do multifactor analysis.
#By digging around in the source ( we can find some return streams to compare

factor_data_2 = alphalens.utils.get_clean_factor_and_forward_returns(results['EV_EBITDA'],
In [29]:
factor_returns_1 = alphalens.performance.factor_returns(factor_data)#[['1']]
factor_returns_2 = alphalens.performance.factor_returns(factor_data_2)#[['1']]
In [36]:
factor_returns_composite = factor_returns_1.merge(factor_returns_2)
In [37]:
1 5 10
In [34]:
1 5 10
2012-07-02 00:00:00+00:00 0.024634 0.026867 0.032220
2012-07-03 00:00:00+00:00 0.017486 0.000177 0.001114
2012-07-05 00:00:00+00:00 0.020987 0.020025 0.021010
2012-07-06 00:00:00+00:00 0.018582 0.032391 0.019544
2012-07-09 00:00:00+00:00 0.020265 0.034153 0.035926
In [ ]: