import numpy as np
import pandas as pd
import alphalens
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.factors import CustomFactor, Latest, Returns, RSI, EWMA as ewma, SimpleMovingAverage
from quantopian.pipeline.filters import Q1500US
#Create the classes for many rav variables and factors
class LowVol(CustomFactor):
inputs = [Returns(window_length=2)]
window_length = 25
def compute(self, today, assets, out, close):
out[:] = -np.nanstd(close, axis=0)
class growth(CustomFactor):
window_length = 1
inputs = [morningstar.operation_ratios.revenue_growth]
def compute(self, today, assets, out, growth):
out[:] = growth
class sus_growth(CustomFactor):
window_length = 1
inputs = [morningstar.valuation_ratios.sustainable_growth_rate]
def compute(self, today, assets, out, sustainable_growth_rate):
out[:] = sustainable_growth_rate
#Quality
class turns(CustomFactor):
window_length = 1
inputs = [morningstar.operation_ratios.assets_turnover]
def compute(self, today, assets, out, assets_turnover):
out[:] = assets_turnover
class roic(CustomFactor):
window_length = 1
inputs = [morningstar.operation_ratios.roic]
def compute(self, today, assets, out, roic):
out[:] = roic
class margins(CustomFactor):
window_length = 1
inputs = [morningstar.operation_ratios.ebitda_margin]
def compute(self, today, assets, out, ebitda_margin):
out[:] = ebitda_margin
#Valuation
class peg_ratio(CustomFactor):
window_length = 1
inputs = [morningstar.valuation_ratios.peg_ratio]
def compute(self, today, assets, out, peg_ratio):
out[:] = np.where(peg_ratio <= 0, 0, 1/peg_ratio)
class pe_ratio(CustomFactor):
window_length = 1
inputs = [morningstar.valuation_ratios.pe_ratio]
def compute(self, today, assets, out, pe_ratio):
out[:] = np.where(pe_ratio <= 0, 0, 1/pe_ratio)
class pb_ratio(CustomFactor):
window_length = 1
inputs = [morningstar.valuation_ratios.pb_ratio]
def compute(self, today, assets, out, pb_ratio):
out[:] = np.where(pb_ratio <= 0, 0, 1/pb_ratio)
class total_yield(CustomFactor):
window_length = 1
inputs = [morningstar.valuation_ratios.total_yield]
def compute(self, today, assets, out, total_yield):
out[:] = total_yield
class ev_ebitda(CustomFactor):
window_length = 1
inputs = [morningstar.valuation_ratios.ev_to_ebitda]
def compute(self, today, assets, out, ev_to_ebitda):
out[:] = np.where(ev_to_ebitda <= 0, 0, 1/ev_to_ebitda)
class liquidity(CustomFactor):
inputs = [USEquityPricing.volume, USEquityPricing.close]
window_length = 21
def compute(self, today, assets, out, volume, close):
out[:] = (volume * close).mean(axis=0)
class sector(CustomFactor):
inputs = [morningstar.asset_classification.morningstar_sector_code]
window_length = 1
def compute(self, today, assets, out, morningstar_sector_code):
out[:] = morningstar_sector_code[-1]
class AvgDailyDollarVolumeTraded(CustomFactor):
inputs = [USEquityPricing.close, USEquityPricing.volume]
window_length = 20
def compute(self, today, assets, out, close_price, volume):
out[:] = np.mean(close_price * volume, axis=0)
class PriceReturn(CustomFactor):
inputs = [USEquityPricing.close]
window_length = 2
def compute(self, today, assets, out, close):
out[:] = close[-1]/close[0]
universe = Q1500US()
pipe = Pipeline(
columns={
#'LowVol' : LowVol(mask=universe),
#'ROIC': roic(mask=universe),
'PE': pe_ratio(mask=universe),
#'Growth': growth(mask=universe),
#'SusGrowth': sus_growth(mask=universe),
#'Turns': turns(mask=universe),
#'Margins': margins(mask=universe),
#'PEG': peg_ratio(mask=universe),
#'PB': pb_ratio(mask=universe),
#'TotalYield': total_yield(mask=universe),
'EV_EBITDA': ev_ebitda(mask=universe),
'PriceReturn': PriceReturn(mask=universe),
'Sector': sector(mask=universe)
},
screen=universe
)
results = run_pipeline(pipe, '2012-06-30', '2016-06-30')
results.head()
#Develop Factors for Testing
factor_name = 'EV_EBITDA'
results['factor'] = results.groupby([results.index.get_level_values(0),'Sector'])[factor_name].apply(lambda x: x.fillna(x.median()))
zscore = lambda x: (x - x.mean()) / x.std()
results['factor_score'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor'].transform(zscore)
#Reset Factor to mean where abs(factor_score) > 3
results['factor_adj'] = np.where(abs(results['factor_score']) > 3,np.nan,results['factor'])
results['factor_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].apply(lambda x: x.fillna(x.median()))
results['factor_score_adj'] = results.groupby([results.index.get_level_values(0),'Sector'])['factor_adj'].transform(zscore)
#Look ahead forward return
results['forward_return_1'] = results.groupby(results.index.get_level_values(1))['PriceReturn'].apply(lambda x: x.shift(-1))
assets = results.index.levels[1].unique()
# Make sure to extend the pricing a bit to get a full set of returns.
pricing = get_pricing(assets, start_date='2012-06-30', end_date='2016-07-31', fields='open_price')
results.head()
AlphaLens builds a single factor tearsheet.
#First, let's look at our standard factor
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['EV_EBITDA'],
pricing,
quantiles=10,
periods=(1,5,10))
alphalens.tears.create_full_tear_sheet(factor_data)
#We can improve this factor a bit simply by removing outliers.
#Lets use the z-score adjusted factor we built earlier
#This can be a pattern for how we can quickly iterate and improve factors
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['factor_score_adj'],
pricing,
quantiles=10,
periods=(1,5,10))
alphalens.tears.create_full_tear_sheet(factor_data)
#But what do good charts look like?
#Let's use the look ahead or forward return. The 1 period charts should come out looking really good. (Too good)
#Some things (like the distribution charts) aren't really useful and turnover is really high.
#However, this shows what the charts could look like.
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(results['forward_return_1'],
pricing,
quantiles=10,
periods=(1,5,10))
alphalens.tears.create_full_tear_sheet(factor_data)
#We can also build multiple datasets to do multifactor analysis.
#By digging around in the source (https://github.com/quantopian/alphalens) we can find some return streams to compare
factor_data_2 = alphalens.utils.get_clean_factor_and_forward_returns(results['EV_EBITDA'],
pricing,
quantiles=10,
periods=(1,5,10))
factor_returns_1 = alphalens.performance.factor_returns(factor_data)#[['1']]
factor_returns_2 = alphalens.performance.factor_returns(factor_data_2)#[['1']]
factor_returns_composite = factor_returns_1.merge(factor_returns_2)
factor_returns_composite.head()
factor_returns_1.head()