Speed comparison - Offline Zipline environment 35 times faster than Online Quantopian (is this true???)

Dear All,

When testing speed of my local zipline environment vs Quantopian environment, I found local zipline environment 35 times faster than Q server.

Lesson Learnt: handle_data is running every minute, to increase speed for backesting daily data, new function and scheduler has to be created as Dan mentioned below otherwise backtest is very slow.

Example of creating scheduled function:

def initialize(context):

    attach_pipeline(make_pipeline(context), 'my_pipeline')  
    # Schedule the renamed `my_handle_data` to run once a day  
    schedule_function(my_handle_data, date_rules.every_day(), time_rules.market_close())

# Rename `handle data` so it doesn't run every minute  
def my_handle_data(context, data):

Offline Zipline test below takes 2min 20s to backtest 10 years (2006-1-1 : 2016-1-1)

from datetime import datetime  
st = datetime.now()  
from zipline import run_algorithm  
from zipline.api import *  
from zipline.pipeline import CustomFactor, Pipeline  
from zipline.pipeline.data import USEquityPricing  
from zipline.pipeline.factors import Returns, SimpleMovingAverage  
import zipline.pipeline.filters as Filters  
import pytz  
import pandas as pd  
import numpy as np


def initialize(context):  
    attach_pipeline(make_pipeline(context), 'my_pipeline')

def make_pipeline(context):  
    context.symbols = symbols("AAPL","AXP","BA","CAT","CSCO","CVX","DIS","GS","HD","IBM","INTC","JNJ",  
    "JPM","KO","MCD","MMM","MRK","MSFT","NKE","PFE","PG","TRV","UNH","UTX","VZ","WMT","XOM",  )  
    universe = Filters.StaticAssets(context.symbols)  
    mean_close_100 = SimpleMovingAverage(inputs=[USEquityPricing.close],window_length=100)  
    yesterday_close = USEquityPricing.close.latest

    return Pipeline(  
            columns = {  
            'mean_close_100' : mean_close_100,  
            'yesterday_close' : yesterday_close,  
                      },  
            screen = universe,  
            )  
def handle_data(context, data):

    context.output = pipeline_output('my_pipeline')  
    context.output['weight'] = np.where(context.output['yesterday_close']  > context.output['mean_close_100'], 1/len(context.symbols), 0.0)  
    for stock, analytics in context.output.iterrows():  
        if data.can_trade(stock):  
            order_target_percent(stock, analytics['weight'])

start = datetime(2006, 1, 1, tzinfo=pytz.UTC)  
end = datetime(2016, 1, 1, tzinfo=pytz.UTC)

results = run_algorithm(start=start, end=end,  
                        initialize=initialize,  
                        handle_data=handle_data,  
                        capital_base=10000,  
                        data_frequency = 'daily', bundle='quandl' )  
ed = datetime.now()  
print(ed-st)

Online Quantopian server test below takes 2min 20s to backtest 3.5 months (2006-1-1 : 2006-3-15)

from quantopian.pipeline.data.builtin import USEquityPricing  
from quantopian.algorithm import attach_pipeline, pipeline_output  
from quantopian.pipeline import Pipeline  
import quantopian.pipeline.filters as Filters  
from quantopian.pipeline.factors import Returns, SimpleMovingAverage  
import pandas as pd  
import numpy as np

def initialize(context):

    attach_pipeline(make_pipeline(context), 'my_pipeline')


def make_pipeline(context):  
    context.symbols = symbols("AAPL","AXP","BA","CAT","CSCO","CVX","DIS","GS","HD","IBM","INTC","JNJ",  
    "JPM","KO","MCD","MMM","MRK","MSFT","NKE","PFE","PG","TRV","UNH","UTX","VZ","WMT","XOM",  )  

    universe = Filters.StaticAssets(context.symbols)  
    mean_close_100 = SimpleMovingAverage(inputs=[USEquityPricing.close],window_length=100)  
    yesterday_close = USEquityPricing.close.latest

    return Pipeline(  
            columns = {  
            'mean_close_100' : mean_close_100,  
            'yesterday_close' : yesterday_close,  
                      },  
            screen = universe,  
            )  
def handle_data(context, data):

    context.output = pipeline_output('my_pipeline')  
    context.output['weight'] = np.where(context.output['yesterday_close']  >                 context.output['mean_close_100'], 1/len(context.symbols), 0.0)  
    for stock, analytics in context.output.iterrows():  
        if data.can_trade(stock):  
            order_target_percent(stock, analytics['weight'])

results = run_algorithm(start=start, end=end, initialize=initialize, handle_data=handle_data, capital_base=10000, data_frequency = 'minute', bundle='quandl' )

def initialize(context): attach_pipeline(make_pipeline(context), 'my_pipeline') # Schedule the renamed `my_handle_data` to run once a day schedule_function(my_handle_data, date_rules.every_day(), time_rules.market_close()) # Rename `handle data` so it doesn't run every minute def my_handle_data(context, data):