Dear All,
When testing speed of my local zipline environment vs Quantopian environment, I found local zipline environment 35 times faster than Q server.
Lesson Learnt: handle_data is running every minute, to increase speed for backesting daily data, new function and scheduler has to be created as Dan mentioned below otherwise backtest is very slow.
Example of creating scheduled function:
def initialize(context):
attach_pipeline(make_pipeline(context), 'my_pipeline')
# Schedule the renamed `my_handle_data` to run once a day
schedule_function(my_handle_data, date_rules.every_day(), time_rules.market_close())
# Rename `handle data` so it doesn't run every minute
def my_handle_data(context, data):
- Offline Zipline test below takes 2min 20s to backtest 10 years (2006-1-1 : 2016-1-1)
from datetime import datetime
st = datetime.now()
from zipline import run_algorithm
from zipline.api import *
from zipline.pipeline import CustomFactor, Pipeline
from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.factors import Returns, SimpleMovingAverage
import zipline.pipeline.filters as Filters
import pytz
import pandas as pd
import numpy as np
def initialize(context):
attach_pipeline(make_pipeline(context), 'my_pipeline')
def make_pipeline(context):
context.symbols = symbols("AAPL","AXP","BA","CAT","CSCO","CVX","DIS","GS","HD","IBM","INTC","JNJ",
"JPM","KO","MCD","MMM","MRK","MSFT","NKE","PFE","PG","TRV","UNH","UTX","VZ","WMT","XOM", )
universe = Filters.StaticAssets(context.symbols)
mean_close_100 = SimpleMovingAverage(inputs=[USEquityPricing.close],window_length=100)
yesterday_close = USEquityPricing.close.latest
return Pipeline(
columns = {
'mean_close_100' : mean_close_100,
'yesterday_close' : yesterday_close,
},
screen = universe,
)
def handle_data(context, data):
context.output = pipeline_output('my_pipeline')
context.output['weight'] = np.where(context.output['yesterday_close'] > context.output['mean_close_100'], 1/len(context.symbols), 0.0)
for stock, analytics in context.output.iterrows():
if data.can_trade(stock):
order_target_percent(stock, analytics['weight'])
start = datetime(2006, 1, 1, tzinfo=pytz.UTC)
end = datetime(2016, 1, 1, tzinfo=pytz.UTC)
results = run_algorithm(start=start, end=end,
initialize=initialize,
handle_data=handle_data,
capital_base=10000,
data_frequency = 'daily', bundle='quandl' )
ed = datetime.now()
print(ed-st)
- Online Quantopian server test below takes 2min 20s to backtest 3.5 months (2006-1-1 : 2006-3-15)
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.algorithm import attach_pipeline, pipeline_output
from quantopian.pipeline import Pipeline
import quantopian.pipeline.filters as Filters
from quantopian.pipeline.factors import Returns, SimpleMovingAverage
import pandas as pd
import numpy as np
def initialize(context):
attach_pipeline(make_pipeline(context), 'my_pipeline')
def make_pipeline(context):
context.symbols = symbols("AAPL","AXP","BA","CAT","CSCO","CVX","DIS","GS","HD","IBM","INTC","JNJ",
"JPM","KO","MCD","MMM","MRK","MSFT","NKE","PFE","PG","TRV","UNH","UTX","VZ","WMT","XOM", )
universe = Filters.StaticAssets(context.symbols)
mean_close_100 = SimpleMovingAverage(inputs=[USEquityPricing.close],window_length=100)
yesterday_close = USEquityPricing.close.latest
return Pipeline(
columns = {
'mean_close_100' : mean_close_100,
'yesterday_close' : yesterday_close,
},
screen = universe,
)
def handle_data(context, data):
context.output = pipeline_output('my_pipeline')
context.output['weight'] = np.where(context.output['yesterday_close'] > context.output['mean_close_100'], 1/len(context.symbols), 0.0)
for stock, analytics in context.output.iterrows():
if data.can_trade(stock):
order_target_percent(stock, analytics['weight'])