Alphalens - Open Source Factor AnalysisÂ¶

import numpy as np
import pandas as pd
import alphalens
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.factors import CustomFactor, Latest, Returns, RSI, EWMA as ewma, SimpleMovingAverage
from quantopian.pipeline.filters import Q1500US

Define the FactorsÂ¶

#Create the classes for many rav variables and factors

class LowVol(CustomFactor):
    inputs = [Returns(window_length=2)]
    window_length = 25
    
    def compute(self, today, assets, out, close):
        out[:] = -np.nanstd(close, axis=0)

class growth(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.revenue_growth]
    
    def compute(self, today, assets, out, growth):
        out[:] = growth
        
class sus_growth(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.sustainable_growth_rate]
    
    def compute(self, today, assets, out, sustainable_growth_rate):
        out[:] = sustainable_growth_rate

#Quality
class turns(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.assets_turnover]
    
    def compute(self, today, assets, out, assets_turnover):
        out[:] = assets_turnover

class roic(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.roic]
    def compute(self, today, assets, out, roic):
        out[:] = roic

class margins(CustomFactor):
    window_length = 1
    inputs = [morningstar.operation_ratios.ebitda_margin]
    def compute(self, today, assets, out, ebitda_margin):
        out[:] = ebitda_margin

#Valuation
class peg_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.peg_ratio]
    
    def compute(self, today, assets, out, peg_ratio):
        out[:] = np.where(peg_ratio <= 0, 0, 1/peg_ratio)
        
class pe_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.pe_ratio]
    def compute(self, today, assets, out, pe_ratio):
        out[:] = np.where(pe_ratio <= 0, 0, 1/pe_ratio)

class pb_ratio(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.pb_ratio]
    def compute(self, today, assets, out, pb_ratio):
        out[:] = np.where(pb_ratio <= 0, 0, 1/pb_ratio)

class total_yield(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.total_yield]
    def compute(self, today, assets, out, total_yield):
        out[:] = total_yield
        
class ev_ebitda(CustomFactor):
    window_length = 1
    inputs = [morningstar.valuation_ratios.ev_to_ebitda]
    def compute(self, today, assets, out, ev_to_ebitda):
        out[:] = np.where(ev_to_ebitda <= 0, 0, 1/ev_to_ebitda)

class liquidity(CustomFactor):   
    inputs = [USEquityPricing.volume, USEquityPricing.close] 
    window_length = 21

    def compute(self, today, assets, out, volume, close): 
        out[:] = (volume * close).mean(axis=0)
        
class sector(CustomFactor):
    inputs = [morningstar.asset_classification.morningstar_sector_code]
    window_length = 1
    def compute(self, today, assets, out, morningstar_sector_code):
        out[:] = morningstar_sector_code[-1]
        
class AvgDailyDollarVolumeTraded(CustomFactor):
    
    inputs = [USEquityPricing.close, USEquityPricing.volume]
    window_length = 20
    
    def compute(self, today, assets, out, close_price, volume):
        out[:] = np.mean(close_price * volume, axis=0)

class PriceReturn(CustomFactor):  
    inputs = [USEquityPricing.close]
    window_length = 2
    
    def compute(self, today, assets, out, close):
        out[:] = close[-1]/close[0]

Set UniverseÂ¶

universe = Q1500US()

Create the PipelineÂ¶

pipe = Pipeline(
    columns={
        #'LowVol' : LowVol(mask=universe),
        #'ROIC': roic(mask=universe),
        'PE': pe_ratio(mask=universe),
        #'Growth': growth(mask=universe),
        #'SusGrowth': sus_growth(mask=universe),
        #'Turns': turns(mask=universe),
        #'Margins': margins(mask=universe),
        #'PEG': peg_ratio(mask=universe),
        #'PB': pb_ratio(mask=universe),
        #'TotalYield': total_yield(mask=universe),
        'EV_EBITDA': ev_ebitda(mask=universe),
        'PriceReturn': PriceReturn(mask=universe),
        'Sector': sector(mask=universe)
    },
    screen=universe
)

Run the PipelineÂ¶

results = run_pipeline(pipe, '2012-06-30', '2016-06-30')

results.head()

#Develop Factors for Testing
factor_name = 'EV_EBITDA'

results['factor'] = results.groupby([results.index.get_level_values(0),'Sector'])[factor_name].apply(lambda x: x.fillna(x.median()))

zscore = lambda x: (x - x.mean()) / x.std()
results['factor_score'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor'].transform(zscore)

#Reset Factor to mean where abs(factor_score) > 3
results['factor_adj'] = np.where(abs(results['factor_score']) > 3,np.nan,results['factor'])
results['factor_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].apply(lambda x: x.fillna(x.median()))
results['factor_score_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].transform(zscore)

#Look ahead forward return
results['forward_return_1'] = results.groupby(results.index.get_level_values(1))['PriceReturn'].apply(lambda x: x.shift(-1))

Get pricingÂ¶

assets = results.index.levels[1].unique()
# Make sure to extend the pricing a bit to get a full set of returns.
pricing = get_pricing(assets, start_date='2012-06-30', end_date='2016-07-31', fields='open_price')

results.head()

Build TearsheetÂ¶

AlphaLens builds a single factor tearsheet.

#First, let's look at our standard factor
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['EV_EBITDA'],
                                                                   pricing,
                                                                   quantiles=10,
                                                                   periods=(1,5,10))

alphalens.tears.create_full_tear_sheet(factor_data)

Quantiles Statistics

Returns Analysis

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f2e38a96450>

#We can improve this factor a bit simply by removing outliers.
#Lets use the z-score adjusted factor we built earlier
#This can be a pattern for how we can quickly iterate and improve factors
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['factor_score_adj'],
                                                                   pricing,
                                                                   quantiles=10,
                                                                   periods=(1,5,10))

alphalens.tears.create_full_tear_sheet(factor_data)

#But what do good charts look like?
#Let's use the look ahead or forward return. The 1 period charts should come out looking really good. (Too good)
#Some things (like the distribution charts) aren't really useful and turnover is really high.
#However, this shows what the charts could look like.
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['forward_return_1'],
                                                                   pricing,
                                                                   quantiles=10,
                                                                   periods=(1,5,10))

alphalens.tears.create_full_tear_sheet(factor_data)

Quantiles Statistics

Returns Analysis

Information Analysis

Turnover Analysis

<matplotlib.figure.Figure at 0x7f2e3ca9f210>

#We can also build multiple datasets to do multifactor analysis.
#By digging around in the source (https://github.com/quantopian/alphalens) we can find some return streams to compare

factor_data_2 = alphalens.utils.get_clean_factor_and_forward_returns(results['EV_EBITDA'],
                                                                   pricing,
                                                                   quantiles=10,
                                                                   periods=(1,5,10))

factor_returns_1 = alphalens.performance.factor_returns(factor_data)#[['1']]
factor_returns_2 = alphalens.performance.factor_returns(factor_data_2)#[['1']]

factor_returns_composite = factor_returns_1.merge(factor_returns_2)

factor_returns_composite.head()

factor_returns_1.head()

		EV_EBITDA	PriceReturn	Sector
2012-07-02 00:00:00+00:00	Equity(2 [ARNC])	0.067388	1.028235	101.0
	Equity(24 [AAPL])	0.099307	1.026795	311.0
	Equity(52 [ABM])	0.121308	0.985880	310.0
	Equity(53 [ABMD])	0.008095	1.041534	206.0
	Equity(62 [ABT])	0.171444	1.024622	206.0

		EV_EBITDA	PriceReturn	Sector	factor	factor_score	factor_adj	factor_score_adj	forward_return_1
2012-07-02 00:00:00+00:00	Equity(2 [ARNC])	0.067388	1.028235	101.0	0.067388	-0.664975	0.067388	-0.664975	0.987414
	Equity(24 [AAPL])	0.099307	1.026795	311.0	0.099307	-0.090975	0.099307	-0.032519	1.014914
	Equity(52 [ABM])	0.121308	0.985880	310.0	0.121308	-0.082475	0.121308	-0.021833	0.992012
	Equity(53 [ABMD])	0.008095	1.041534	206.0	0.008095	-1.035993	0.008095	-1.186339	1.028046
	Equity(62 [ABT])	0.171444	1.024622	206.0	0.171444	0.901986	0.171444	1.221839	1.002172

	min	max	mean	std	count	count %
factor_quantile
1	0.115815	1.008523	0.969218	0.028089	150204	10.033386
2	0.919959	1.016150	0.987489	0.010797	149678	9.998250
3	0.933965	1.020658	0.992675	0.009579	149602	9.993173
4	0.944708	1.025122	0.996145	0.009031	149720	10.001055
5	0.951937	1.028907	0.998994	0.008774	149739	10.002325
6	0.959253	1.033234	1.001705	0.008692	149476	9.984757
7	0.964623	1.038395	1.004591	0.008810	149394	9.979279
8	0.968305	1.044556	1.008080	0.009186	149654	9.996647
9	0.973426	1.056765	1.013330	0.010129	149513	9.987228
10	0.981000	8.983420	1.032713	0.039007	150062	10.023900

	1	5	10
Ann. alpha	235.079	1.963	0.734
beta	-0.058	-0.037	-0.028
Mean Period Wise Return Top Quantile (bps)	258.031	247.757	245.605
Mean Period Wise Return Bottom Quantile (bps)	-252.796	-254.882	-262.501
Mean Period Wise Spread (bps)	510.936	100.644	50.926

	1	5	10
IC Mean	0.822	0.339	0.236
IC Std.	0.059	0.104	0.107
t-stat(IC)	437.814	103.106	70.102
p-value(IC)	0.000	0.000	0.000
IC Skew	-3.815	-0.466	-0.320
IC Kurtosis	40.775	1.836	1.058
Ann. IR	219.233	51.630	35.103

	1	5	10
Quantile 1 Mean Turnover	0.825	0.836	0.840
Quantile 2 Mean Turnover	0.896	0.896	0.895
Quantile 3 Mean Turnover	0.896	0.897	0.897
Quantile 4 Mean Turnover	0.889	0.892	0.893
Quantile 5 Mean Turnover	0.881	0.887	0.888
Quantile 6 Mean Turnover	0.884	0.887	0.888
Quantile 7 Mean Turnover	0.890	0.891	0.891
Quantile 8 Mean Turnover	0.896	0.896	0.897
Quantile 9 Mean Turnover	0.896	0.896	0.898
Quantile 10 Mean Turnover	0.841	0.852	0.854

	1	5	10
date
2012-07-02 00:00:00+00:00	0.024634	0.026867	0.032220
2012-07-03 00:00:00+00:00	0.017486	0.000177	0.001114
2012-07-05 00:00:00+00:00	0.020987	0.020025	0.021010
2012-07-06 00:00:00+00:00	0.018582	0.032391	0.019544
2012-07-09 00:00:00+00:00	0.020265	0.034153	0.035926