Quantopian's community platform is shutting down. Please read this post for more information and download your code.
Back to Community
Help with pipeline performance

Hi,

I am new to Quantopian (and python to a certain extend), and after going through some of the tutorials am trying to create a first algorithm. The algorithm in question is Greenblatt's Magic Fomula Investing. In notebook, I was able to compute the rank properly, but when moving over to algorithm it ends up timing out :(

Would any more experienced Quantopian be able to review my code and suggest optimizations? My guess is I'm not properly using the filter functions. Below is the "work in progress" code:

"""
This is an algorithm for calculating Joel's Greenblatt Magic Formula.  
"""

# +------------------------+  
# |                        |  
# |      LIBRARIES         |  
# |                        |  
# +------------------------+

# Python Imports

import math

# Quantopian Imports

from quantopian.algorithm import attach_pipeline, pipeline_output

# Pipieline Imports

from quantopian.pipeline import Pipeline  
from quantopian.pipeline.data.morningstar import valuation  
from quantopian.pipeline.data.morningstar import income_statement  
from quantopian.pipeline.data.morningstar import balance_sheet  
from quantopian.pipeline.data.morningstar import asset_classification  
from quantopian.pipeline.data.morningstar import share_class_reference  
from quantopian.pipeline.data.morningstar import company_reference  
from quantopian.pipeline.data.morningstar import operation_ratios  
from quantopian.pipeline.filters.morningstar import IsPrimaryShare  
from quantopian.pipeline.factors import AverageDollarVolume, SimpleMovingAverage

# +------------------------+  
# |                        |  
# |      CONSTANTS         |  
# |                        |  
# +------------------------+

# Units

MILLION_DOLLAR = 1000000  
WEEKS_IN_YEAR = 52

# Strategy settings

MINIMUM_MARKET_CAP = 500 * MILLION_DOLLAR  
REBALANCE_WEEK_FREQUENCY = 4 # 52 weeks / 4 = 13 rebalance events per year  
MAX_HOLDING_COUNT = 26       # 2 positions every 4 weeks  
NEW_HOLDING_PER_REBALANCE = MAX_HOLDING_COUNT / (WEEKS_IN_YEAR / REBALANCE_WEEK_FREQUENCY) 

SHORT_SPY = False

# Filters

MINIMUM_DOLLAR_VOLUME = 10 * MILLION_DOLLAR

# Other Constants

SECURITY_TYPE_COMMON_STOCK = 'ST00000001'  
SECTOR_CODE_UTILITIES = 207  
SECTOR_CODE_FINANCIALS = 103

EXCHANGE_ID_NASDAQ = 'NAS'  
EXCHANGE_ID_NYSE = 'NYS'

# +------------------------+  
# |                        |  
# |      HELPERS           |  
# |                        |  
# +------------------------+

def earning_yield():  
    ev = valuation.enterprise_value.latest  
    ebit = income_statement.ebit.latest  
    return ebit / ev

def return_on_capital():  
    return operation_ratios.roic.latest

def high_market_cap():  
    market_cap = valuation.market_cap.latest  
    return market_cap > MINIMUM_MARKET_CAP

def high_trading_volume():  
    dollar_volume = AverageDollarVolume(window_length=30)  
    return dollar_volume > MINIMUM_DOLLAR_VOLUME  
def allowed_industries_filter():  
    sector_code = asset_classification.morningstar_sector_code.latest  
    return (sector_code != SECTOR_CODE_UTILITIES and  
            sector_code != SECTOR_CODE_FINANCIALS)

def is_tradeable():  
    primary_share = IsPrimaryShare()  
    security_type = share_class_reference.security_type.latest  
    not_depositary = ~share_class_reference.is_depositary_receipt.latest  
    not_otc = ~share_class_reference.exchange_id.latest.startswith('OTC')  
    not_wi = ~share_class_reference.symbol.latest.endswith('.WI')  
    not_lp_name = ~company_reference.standard_name.latest.matches('.* L[. ]?P.?$')  
    not_lp_balance_sheet = balance_sheet.limited_partnership.latest.isnull()  
    have_market_cap = valuation.market_cap.latest.notnull()

    return (  
        primary_share &  
        security_type.eq(SECURITY_TYPE_COMMON_STOCK) &  
        not_depositary &  
        not_otc &  
        not_wi &  
        not_lp_name &  
        not_lp_balance_sheet &  
        have_market_cap  
    )  
def reputable_exchanges():  
    exchange_id = company_reference.primary_exchange_id.latest  
    return (  
        exchange_id.eq(EXCHANGE_ID_NASDAQ) |  
        exchange_id.eq(EXCHANGE_ID_NYSE)  
    )  
def universe_filter():  
    return (  
        is_tradeable() &  
        reputable_exchanges() &  
        high_market_cap() &  
        high_trading_volume() &  
        allowed_industries_filter()  
   )

# +------------------------+  
# |                        |  
# |      ALGORITHM         |  
# |                        |  
# +------------------------+

#  
# TODO:  
#   -> Exclude security that had earning within the week  
#   -> Exclude security with unexpectedly low P/E (<5)  
#   -> Implement SPY shorting  
#

def initialize(context):  
    """  
    Called once at the start of the algorithm.  
    """  
    # Check our portfolio every week  
    schedule_function(weekly_check, date_rules.week_start(), time_rules.market_open(hours=1))  
    # Create our magic formula stock selector.  
    attach_pipeline(magic_formula_pipeline(), 'magic_formula')  
    # Strategy parameters  
    context.current_week = 0  
    context.position_weeks = {}  
def magic_formula_pipeline():  
    return Pipeline(  
        columns={  
            'earning_yield': earning_yield(),  
            'return_on_capital': return_on_capital()  
        },  
        screen=universe_filter()  
    )  
def weekly_check(context,data):  
    """  
    Execute orders according to our schedule_function() timing.  
    """  
    # Exit positions if it's been about a year.  
    update_positions(context, data)  
    # Enter new positions according to schedule  
    if(context.current_week % REBALANCE_WEEK_FREQUENCY == 0):  
        enter_positions(context, data)  
    # Print useful information  
    log_stats(context, data)  
    # Increment Week Count  
    context.current_week += 1  
def update_positions(context, data):  
    pass  
def enter_positions(context, data):  
    if len(context.portfolio.positions) >= MAX_HOLDING_COUNT:  
        return # We're full already, move on!

    top_securities = magic_formula_securities()

def magic_formula_securities():  
    result = pipeline_output('magic_formula')  
    securities_rank = {}  
    # Ranking Earning Yield  
    rank_count = 1  
    for key, row in result.sort('earning_yield', ascending = False ).iterrows():  
        security = row.name  
        if math.isnan(row.earning_yield) or math.isnan(row.return_on_capital):  
            next  
        securities_rank[security] = rank_count  
        rank_count += 1  
    # Rank Return on Capital  
    rank_count = 1  
    for key, row in result.sort('return_on_capital', ascending = False ).iterrows():  
        security = row.name  
        if math.isnan(row.earning_yield) or math.isnan(row.return_on_capital):  
            next  
        securities_rank[security] += rank_count  
        rank_count += 1  
    # Return top securities  
    top_results = []  
    for key, value in sorted(securities_rank.iteritems(), key=lambda (k,v): (v,k))[0:30]:  
        top_results += [key]  
    return top_results

def log_stats(context, data):  
    log.info('Current week: {0}'.format(context.current_week))  
    log.info('Active positions: {0}'.format(len(context.portfolio.positions)))  
1 response

Looks like you're iterating over all the securities our pipeline selects, adding them to output vectors one at a time. Consider ranking and selecting positions from factors directly. Something like this would work for selecting top N values from a factor. I am doing this from memory, please consult the documentation, tutorials, forum posts for precise details. Your pipeline looks simple enough that it shouldn't be timing out, you aren't using all that many fundamental factors and a relatively small window.

factor.top(N, mask=factor.isfinite())  

That way you will get N values out for the factor. If you want to get the top N ranks, you could do this instead:

ranked = factor.rank(ascending=False, mask=factor.isfinite())  

And when defining the pipeline, include

ranked < N  

as one of the screen filters.

Hope that helps.

Sunil