from quantopian.pipeline import Pipeline
from quantopian.pipeline import CustomFactor
from quantopian.research import run_pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline.factors import AverageDollarVolume
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data import morningstar as mstar
from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.classifiers.morningstar import Sector
import numpy as np
from scipy.stats.mstats import gmean
from quantopian.research import run_pipeline
def filter_universe():
"""
9 filters:
1. common stock
2 & 3. not limited partnership - name and database check
4. database has fundamental data
5. not over the counter
6. not when issued
7. not depository receipts
8. primary share
9. high dollar volume
Check Scott's notebook for more details.
"""
common_stock = mstar.share_class_reference.security_type.latest.eq('ST00000001')
not_lp_name = ~mstar.company_reference.standard_name.latest.matches('.* L[\\. ]?P\.?$')
not_lp_balance_sheet = mstar.balance_sheet.limited_partnership.latest.isnull()
have_data = mstar.valuation.market_cap.latest.notnull() # this knocks out ETFs, since ETFs won't have any fundamental data
not_otc = ~mstar.share_class_reference.exchange_id.latest.startswith('OTC')
not_wi = ~mstar.share_class_reference.symbol.latest.endswith('.WI')
not_depository = ~mstar.share_class_reference.is_depositary_receipt.latest
primary_share = IsPrimaryShare()
not_financial = ~Sector().eq(103)
#remove_nan_inf = np.isfinite(earning_yield)
# Combine the above filters.
tradable_filter = (common_stock & not_lp_name & not_lp_balance_sheet &
have_data & not_otc & not_wi & not_depository & primary_share & not_financial)
high_volume_tradable = AverageDollarVolume(
window_length=21,
mask=tradable_filter
).rank(ascending=False) < 1001
mask = high_volume_tradable
return mask
def make_pipeline1():
initial_screen = filter_universe()
average_dollar_volume = AverageDollarVolume(window_length=21)
sector= Sector()
return Pipeline(
columns={
'avg_dollar_volume' : average_dollar_volume,
'sector':sector
},
screen= initial_screen
)
my_pipe = make_pipeline1()
result = run_pipeline(my_pipe, '2016-09-07','2016-09-07')
print 'Number of securities that passed the filter: %d' % len(result)
result.head(5)
class STA(CustomFactor):
inputs = [morningstar.cash_flow_statement.operating_cash_flow,
morningstar.cash_flow_statement.net_income,
morningstar.balance_sheet.total_assets]
window_length = 1
def compute(self, today, assets, out, ocf, ni, ta):
ta = np.where(np.isnan(ta), 0, ta)
ocf = np.where(np.isnan(ocf), 0, ocf)
ni = np.where(np.isnan(ni), 0, ni)
out[:] = abs(ni[-1] - ocf[-1])/ ta[-1]
class SNOA(CustomFactor):
inputs = [morningstar.balance_sheet.total_assets,
morningstar.balance_sheet.cash_and_cash_equivalents,
morningstar.balance_sheet.current_debt, # same as short-term debt?
morningstar.balance_sheet.minority_interest,
morningstar.balance_sheet.long_term_debt, # check same?
morningstar.balance_sheet.preferred_stock # check same?
]
window_length = 1
def compute(self, today, assets, out, ta, cace, cd, mi, ltd, ps):
ta = np.where(np.isnan(ta), 0, ta)
cace = np.where(np.isnan(cace), 0, cace)
cd = np.where(np.isnan(cd), 0, cd)
mi = np.where(np.isnan(mi), 0, mi)
ltd = np.where(np.isnan(ltd), 0, ltd)
ps = np.where(np.isnan(ps), 0, ps)
results = ((ta[-1]-cace[-1])-(ta[-1]-cace[-1]-ltd[-1]-cd[-1]-ps[-1]-mi[-1]))/ta[-1]
out[:] = np.where(np.isnan(results),0,results)
def make_pipeline2():
initial_screen = filter_universe()
sta = STA()
sta_rank = sta.rank(mask=initial_screen, ascending=True) # high STA -> bad!, so we rank from lowest to highest
snoa = SNOA()
snoa_rank = snoa.rank(mask=initial_screen, ascending=True) # high SNOA -> bad!, so we rank from lowest to highest
combined_forensic = (sta_rank+snoa_rank)/2.0
combined_forensic_rank = combined_forensic.rank(ascending=True) # high combined rank -> bad, so we rank from lowest to highest
# This filter screens out the worst 10% of stocks by forensic rank, i.e. the 10% with the lowest rank
passes_forensic = combined_forensic_rank.percentile_between(0,90)
return Pipeline(
columns={
'sta' : sta,
'sta_rank' : sta_rank,
'snoa' : snoa,
'snoa_rank' : snoa_rank,
#'combined_forensic' : combined_forensic,
'comb_forensic_rank' : combined_forensic_rank
},
screen= passes_forensic
)
my_pipe = make_pipeline2()
result = run_pipeline(my_pipe, '2016-09-07','2016-09-07')
print 'Number of securities that passed the filter: %d' % len(result)
result.head(50)
class Value(CustomFactor):
inputs = [morningstar.income_statement.ebitda,
morningstar.valuation.enterprise_value]
window_length = 1
def compute(self, today, assets, out, ebitda, ev):
ebitda = np.where(np.isnan(ebitda), 0, ebitda) # how do I 'clip' negative values?
ev = np.where(np.isnan(ev), 0, ev)
ev = np.clip(ev, a_min=1.0, a_max=None) # Clip lower bound of the array to 1.0
ebitda = np.clip(ebitda, a_min=1.0, a_max=None) # Clip lower bound of the array to 1.0
result = ebitda[-1] / ev[-1] # we do it this way round so that we can rank it in ascending order...
out[:] = np.where(np.isnan(result),0,result)
def make_pipeline3():
initial_screen = filter_universe()
###################################
sta = STA()
sta_rank = sta.rank(mask=initial_screen, ascending=True) # high STA -> bad!, so we rank from lowest to highest
snoa = SNOA()
snoa_rank = snoa.rank(mask=initial_screen, ascending=True) # high SNOA -> bad!, so we rank from lowest to highest
combined_forensic = (sta_rank+snoa_rank)/2.0
combined_forensic_rank = combined_forensic.rank(ascending=True) # high combined rank -> bad, so we rank from lowest to highest
# This filter screens out the worst 10% of stocks by forensic rank, i.e. the 10% with the lowest rank
passes_forensic = combined_forensic_rank.percentile_between(0,90)
##################################
ebitda = morningstar.income_statement.ebitda.latest
ev = morningstar.valuation.enterprise_value.latest
value = Value() # the way we've setup the factor, a higher score (yield) is better
value_pass_rank = value.rank(mask=passes_forensic, ascending=False) # therefore we rank from highest to lowest, i.e. descending
#value_fail_rank = value.rank(mask=passes_forensic)
passes_value = value_pass_rank.percentile_between(0,10) # take the 10% highest ranked stocks by value
return Pipeline(
columns={
'comb_forensic_rank' : combined_forensic_rank,
'value':value,
'value_rank': value_pass_rank,
'ev':ev,
'ebitda':ebitda
#'passes_value':passes_value,
#'passes_forensic':passes_forensic,
},
screen= passes_value
)
my_pipe = make_pipeline3()
result = run_pipeline(my_pipe, '2016-09-07','2016-09-07')
print 'Number of securities that passed the filter: %d' % len(result)
result.head(5)
class ROA(CustomFactor):
inputs = [ morningstar.operation_ratios.roa ]
window_length = 1
def compute(self, today, assets, out, roa):
out[:] = np.where(roa[-1]>0,1,0)
class FCFTA(CustomFactor):
inputs = [ morningstar.cash_flow_statement.free_cash_flow,
morningstar.balance_sheet.total_assets]
window_length = 1
def compute(self, today, assets, out, fcf, ta):
out[:] = np.where(fcf[-1]/ta[-1]>0,1,0)
class ROA_GROWTH(CustomFactor):
inputs = [ morningstar.operation_ratios.roa ]
window_length = 252
def compute(self, today, assets, out, roa):
out[:] = np.where(roa[-1]>roa[-252],1,0)
class FCFTA_ROA(CustomFactor):
inputs = [ morningstar.cash_flow_statement.free_cash_flow,
morningstar.balance_sheet.total_assets,
morningstar.operation_ratios.roa ]
window_length = 1
def compute(self, today, assets, out, fcf, ta, roa):
out[:] = np.where(fcf[-1]/ta[-1]>roa[-1],1,0)
class FCFTA_GROWTH(CustomFactor):
inputs = [ morningstar.cash_flow_statement.free_cash_flow,
morningstar.balance_sheet.total_assets]
window_length = 252
def compute(self, today, assets, out, fcf, ta):
out[:] = np.where(fcf[-1]/ta[-1]>fcf[-252]/ta[-252],1,0)
class LTD_GROWTH(CustomFactor):
inputs = [ morningstar.balance_sheet.total_assets,
morningstar.balance_sheet.long_term_debt]
window_length = 252
def compute(self, today, assets, out, ta, ltd):
out[:] = np.where(ltd[-1]/ta[-1]<ltd[-252]/ta[-252],1,0)
class CR_GROWTH(CustomFactor):
inputs = [ morningstar.operation_ratios.current_ratio ]
window_length = 252
def compute(self, today, assets, out, cr):
out[:] = np.where(cr[-1]>cr[-252],1,0)
class GM_GROWTH(CustomFactor):
inputs = [ morningstar.operation_ratios.gross_margin ]
window_length = 252
def compute(self, today, assets, out, gm):
out[:] = np.where(gm[-1]>gm[-252],1,0)
class ATR_GROWTH(CustomFactor):
inputs = [ morningstar.operation_ratios.assets_turnover ]
window_length = 252
def compute(self, today, assets, out, atr):
out[:] = np.where(atr[-1]>atr[-252],1,0)
class NEQISS(CustomFactor):
inputs = [ morningstar.valuation.shares_outstanding ]
window_length = 252
def compute(self, today, assets, out, so):
out[:] = np.where(so[-1]-so[-252]<1,1,0)
def make_pipeline4():
initial_screen = filter_universe()
# elements of forensic screen:
sta = STA()
sta_rank = sta.rank(mask=initial_screen, ascending=True) # high STA -> bad!, so we rank from lowest to highest
snoa = SNOA()
snoa_rank = snoa.rank(mask=initial_screen, ascending=True) # high SNOA -> bad!, so we rank from lowest to highest
combined_forensic = (sta_rank+snoa_rank)/2.0
combined_forensic_rank = combined_forensic.rank(ascending=True) # high combined rank -> bad, so we rank from lowest to highest
# This filter screens out the worst 10% of stocks by forensic rank, i.e. the 10% with the lowest rank
passes_forensic = combined_forensic_rank.percentile_between(0,90)
# value screen
value = Value() # the way we've setup the factor, a higher score (yield) is better
value_pass_rank = value.rank(mask=passes_forensic, ascending=False) # therefore we rank from highest to lowest, i.e. descending
#value_fail_rank = value.rank(mask=passes_forensic)
passes_value = value_pass_rank.percentile_between(0,10) # take the 10% highest ranked stocks by value
# elements of the f-score, or financial strength measure
roa=ROA()
fcfta=FCFTA()
roa_growth=ROA_GROWTH()
fcfta_roa=FCFTA_ROA()
fcfta_growth=FCFTA_GROWTH()
ltd_growth=LTD_GROWTH()
cr_growth=CR_GROWTH()
gm_growth=GM_GROWTH()
atr_growth=ATR_GROWTH()
neqiss = NEQISS()
# here we add together the 10 elements of the f-score to get the total f-score, then we rank the results
f_score = roa+fcfta+roa_growth+fcfta_roa+fcfta_growth+ltd_growth+cr_growth+gm_growth+atr_growth+neqiss
f_score_rank = f_score.rank(mask=passes_value, ascending=False) # a higher f_score is better, so we rank highest to lowest
passes_fin_strength = f_score_rank.percentile_between(0,50) # take the 50% highest ranked stocks by f_score
return Pipeline(
columns={
'roa': roa,
'fcfta':fcfta,
'roa_growth':roa_growth,
'fcfta_roa':fcfta_roa,
'fcfta_growth':fcfta_growth,
'ltd_growth':ltd_growth,
'cr_growth':cr_growth,
'gm_growth':gm_growth,
'atr_growth':atr_growth,
'neqiss' : neqiss,
'f_score': f_score,
'f_score_rank': f_score_rank
#'passes_fin_strength': passes_fin_strength
},
screen= passes_fin_strength
)
my_pipe = make_pipeline4()
result = run_pipeline(my_pipe, '2016-09-07','2016-09-07')
print 'Number of securities that passed the filter: %d' % len(result)
result.head(45)
class GM_GROWTH_2YR(CustomFactor):
inputs = [ morningstar.operation_ratios.gross_margin ]
window_length = 504
def compute(self, today, assets, out, gm):
out[:] = gmean([gm[-1]+1, gm[-252]+1,gm[-504]+1])-1
class GM_STABILITY_2YR(CustomFactor):
inputs = [ morningstar.operation_ratios.gross_margin ]
window_length = 504
def compute(self, today, assets, out, gm):
out[:] = np.std([gm[-1]-gm[-252],gm[-252]-gm[-504]],axis=0) #gm[-252]-gm[-504]#gm[-1]-gm[-252] # #np.nanstd([,gm[-252]-gm[-504]])
class ROA_GROWTH_2YR(CustomFactor):
inputs = [ morningstar.operation_ratios.roa ]
window_length = 504
def compute(self, today, assets, out, roa):
out[:] = gmean([roa[-1]+1, roa[-252]+1,roa[-504]+1])-1
class ROIC_GROWTH_2YR(CustomFactor):
inputs = [ morningstar.operation_ratios.roic ]
window_length = 504
def compute(self, today, assets, out, roic):
out[:] = gmean([roic[-1]+1, roic[-252]+1,roic[-504]+1])-1
def make_pipeline5():
initial_screen = filter_universe()
##################################
# elements of forensic screen:
sta = STA()
sta_rank = sta.rank(mask=initial_screen, ascending=True) # high STA -> bad!, so we rank from lowest to highest
snoa = SNOA()
snoa_rank = snoa.rank(mask=initial_screen, ascending=True) # high SNOA -> bad!, so we rank from lowest to highest
combined_forensic = (sta_rank+snoa_rank)/2.0
combined_forensic_rank = combined_forensic.rank(ascending=True) # high combined rank -> bad, so we rank from lowest to highest
# This filter screens out the worst 10% of stocks by forensic rank, i.e. the 10% with the lowest rank
passes_forensic = combined_forensic_rank.percentile_between(0,90)
####################################
# value screen
value = Value() # the way we've setup the factor, a higher score (yield) is better
value_pass_rank = value.rank(mask=passes_forensic, ascending=False) # therefore we rank from highest to lowest, i.e. descending
#value_fail_rank = value.rank(mask=passes_forensic)
passes_value = value_pass_rank.percentile_between(0,10) # take the 10% highest ranked stocks by value
#####################################
# elements of the f-score, or financial strength measure
roa=ROA()
fcfta=FCFTA()
roa_growth=ROA_GROWTH()
fcfta_roa=FCFTA_ROA()
fcfta_growth=FCFTA_GROWTH()
ltd_growth=LTD_GROWTH()
cr_growth=CR_GROWTH()
gm_growth=GM_GROWTH()
atr_growth=ATR_GROWTH()
neqiss = NEQISS()
# here we add together the 10 elements of the f-score to get the total f-score, then we rank the results
f_score = roa+fcfta+roa_growth+fcfta_roa+fcfta_growth+ltd_growth+cr_growth+gm_growth+atr_growth+neqiss
f_score_rank = f_score.rank(mask=passes_value, ascending=False)
#####################################
# elements of the franchise power score
gm_growth_2yr = GM_GROWTH_2YR()
gm_growth_2yr_rank = gm_growth_2yr.rank(ascending=False, mask=passes_value)
gm_stability_2yr = GM_STABILITY_2YR()
gm_stability_2yr_rank = gm_stability_2yr.rank(ascending=True, mask=passes_value)
roa_growth_2yr = ROA_GROWTH_2YR()
roa_growth_2yr_rank = roa_growth_2yr.rank(ascending=False, mask=passes_value)
roic_growth_2yr = ROIC_GROWTH_2YR()
roic_growth_2yr_rank = roic_growth_2yr.rank(ascending=False, mask=passes_value)
# here we add together the ranks of the elements of the franchise power score, average them and re-rank
franchise_power = (gm_growth_2yr_rank + gm_stability_2yr_rank + roa_growth_2yr_rank + roic_growth_2yr_rank)/4.0 #
franchise_power_rank = franchise_power.rank(ascending=True)
# here we add together the ranks of the fin_strength and franchise power, average them and re-rank
combined_quality = (f_score_rank+franchise_power_rank)/2.0
combined_quality_rank = combined_quality.rank(ascending=True)
# here we create the filter
passes_quality = combined_quality_rank.percentile_between(0,50) # take the 10% highest ranked stocks by value
return Pipeline(
columns={
'f_score': f_score,
'f_score_rank': f_score_rank,
#'combined_franchise':combined_franchise,
'franchise_power_rank':franchise_power_rank,
#'combined_quality':combined_quality,
'combined_quality_rank':combined_quality_rank,
'gm_growth':gm_growth_2yr,
'gm_growth_rank':gm_growth_2yr_rank,
'gm_stability':gm_stability_2yr,
'gm_stability_rank':gm_stability_2yr_rank,
'roa_growth':roa_growth_2yr,
'roa_growth_rank':roa_growth_2yr_rank,
'roic_growth':roic_growth_2yr,
'roic_growth_rank':roic_growth_2yr_rank
},
screen= passes_quality
)
my_pipe = make_pipeline5()
result = run_pipeline(my_pipe, '2016-09-07','2016-09-07')
print 'Number of securities that passed the filter: %d' % len(result)
result.head(45)
I need to see how much pipeline can handle in terms of years of fundamental data before it falls over or takes so long that it's impractical to run
class GM_GROWTH_8YR(CustomFactor):
inputs = [ morningstar.operation_ratios.gross_margin ]
window_length = 8
def compute(self, today, assets, out, gm):
out[:] = gmean([gm[-1]+1, gm[-2]+1, gm[-3]+1, gm[-4]+1, gm[-5]+1, gm[-6]+1, gm[-7]+1, gm[-8]+1])-1
class GM_STABILITY_8YR(CustomFactor):
inputs = [ morningstar.operation_ratios.gross_margin ]
window_length = 9
def compute(self, today, assets, out, gm):
out[:] = gm[-8]#-gm[-2]#np.std([gm[-1]-gm[-2],gm[-2]-gm[-3],gm[-3]-gm[-4],gm[-4]-gm[-5],gm[-5]-gm[-6],gm[-6]-gm[-7],gm[-7]-gm[-8]]) #,gm[-8]-gm[-9]
class ROA_GROWTH_8YR(CustomFactor):
inputs = [ morningstar.operation_ratios.roa ]
window_length = 9
def compute(self, today, assets, out, roa):
out[:] = gmean([roa[-1]/100+1, roa[-2]/100+1,roa[-3]/100+1,roa[-4]/100+1,roa[-5]/100+1,roa[-6]/100+1,roa[-7]/100+1,roa[-8]/100+1])-1
class ROIC_GROWTH_8YR(CustomFactor):
inputs = [ morningstar.operation_ratios.roic ]
window_length = 9
def compute(self, today, assets, out, roic):
out[:] = gmean([roic[-1]/100+1, roic[-2]/100+1,roic[-3]/100+1,roic[-4]/100+1,roic[-5]/100+1,roic[-6]/100+1,roic[-7]/100+1,roic[-8]/100+1])-1
def make_pipeline2():
initial_screen = filter_universe()
ta = morningstar.balance_sheet.total_assets.latest
cace = morningstar.balance_sheet.cash_and_cash_equivalents.latest
cd = morningstar.balance_sheet.current_debt.latest # same as short-term debt?
mi = morningstar.balance_sheet.minority_interest.latest
ltd = morningstar.balance_sheet.long_term_debt.latest # check same?
ps = morningstar.balance_sheet.preferred_stock.latest # check same?
#mi = np.where(mi.isnan(), 0, mi)
np.where(ta.isfinite(),ta,0)
#np.where(ps.isnan(),0,ps)
#np.where(mi.isnan(),0,mi)
#mi[np.isnan(mi)]=0
sta = STA()
sta_rank = sta.rank(mask=initial_screen, ascending=False)
snoa = SNOA()
snoa_rank = snoa.rank(mask=initial_screen, ascending=False)
combined_forensic = (sta_rank+snoa_rank)/2.0
combined_forensic_rank = combined_forensic.rank(ascending=True)
# This filter screens out the worst 10% of stocks by forensic rank, i.e. the 10% with the lowest rank
passes_forensic = combined_forensic_rank.percentile_between(0,90)
#fails_forensic = combined_forensic_rank.percentile_between(90,100)
value = Value()
value_pass_rank = value.rank(mask=passes_forensic, ascending=False)
#value_fail_rank = value.rank(mask=passes_forensic)
passes_value = value_pass_rank.percentile_between(0,10) # take the 10% highest ranked stocks by value
# These are the elements of the f-score, or financial strength measure
roa=ROA()
fcfta=FCFTA()
roa_growth=ROA_GROWTH()
fcfta_roa=FCFTA_ROA()
fcfta_growth=FCFTA_GROWTH()
ltd_growth=LTD_GROWTH()
cr_growth=CR_GROWTH()
gm_growth=GM_GROWTH()
atr_growth=ATR_GROWTH()
neqiss = NEQISS()
fin_strength = roa+fcfta+roa_growth+fcfta_roa+fcfta_growth+ltd_growth+cr_growth+gm_growth+atr_growth+neqiss
fin_strength_rank = fin_strength.rank(mask=passes_value)
gm_growth_2yr = GM_GROWTH_2YR()
gm_growth_2yr_rank = gm_growth_2yr.rank(ascending=False)
gm_stability_2yr = GM_STABILITY_2YR()
gm_stability_2yr_rank = gm_stability_2yr.rank(ascending=False)
roa_growth_2yr = ROA_GROWTH_2YR()
roa_growth_2yr_rank = roa_growth_2yr.rank(ascending=False)
roic_growth_2yr = ROIC_GROWTH_2YR()
roic_growth_2yr_rank = roic_growth_2yr.rank(ascending=False)
dollar_volume = AverageDollarVolume(window_length=30) # factor
#high_dollar_volume = (dollar_volume > 10000000)
#high_dollar_volume = dollar_volume.percentile_between(90,100) # factor method returning a filter
#is_tradeable = high_dollar_volume & close_price_filter
#fscore= np.where(ta>0,True,False)
return Pipeline(
columns={
#'ta':ta,
#'cace':cace,
#'cd':cd,
#'mi':mi,
#'ltd':ltd,
#'ps':ps,
#'sta' : sta,
#'sta_rank' : sta_rank,
#'snoa' : snoa,
#'snoa_rank' : snoa_rank,
'combined_forensic' : combined_forensic,
'comb_forensic_rank' : combined_forensic_rank,
'roa': roa,
'value':value,
'value_rank': value_pass_rank,
'passes_value':passes_value,
'passes_forensic':passes_forensic,
'gm_growth':gm_growth_2yr,
'gm_growth_rank':gm_growth_2yr_rank,
'gm_stability':gm_stability_2yr,
'gm_stability_rank':gm_stability_2yr_rank,
'roa_growth':roa_growth_2yr,
'roa_growth_rank':roa_growth_2yr_rank,
'roic_growth':roic_growth_2yr,
'roic_growth_rank':roic_growth_2yr_rank
},
screen= passes_forensic
)