Notebook

Backtesting a Moving Average Crossover Strategy

In this example we use get_pricing to load 10 years' worth of historical financial data for Apple's stock (i.e. the ticker symbol AAPL).

We then define a Dual Moving Average Crossover algorithm with zipline, the open source backtesting library that powers Quantopian.

Finally, we backtest our strategy against our loaded trade data and create visualization of our entry- and exitpoints.

In [1]:
# Import Zipline, the open source backester, and a few other libraries that we will use
import zipline
from zipline import TradingAlgorithm
from zipline.api import order_target, record, symbol, history, add_history

import pytz
from datetime import datetime
import matplotlib.pyplot as pyplot
import numpy as np
In [16]:
# Load data from get_trades for AAPL
data = get_pricing(
#    ['AAPL'],
    [symbols(24)],
    start_date='2002-01-01',
    end_date = '2015-02-15',
    frequency='daily'
)
data.price.plot(use_index=False)
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f6030ebf410>
In [17]:
data
Out[17]:
<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 3303 (major_axis) x 1 (minor_axis)
Items axis: open_price to price
Major_axis axis: 2002-01-02 00:00:00+00:00 to 2015-02-13 00:00:00+00:00
Minor_axis axis: Equity(24 [AAPL]) to Equity(24 [AAPL])
In [18]:
# Define the algorithm - this should look familiar from the Quantopian IDE
# For more information on writing algorithms for Quantopian
# and these functions, see https://www.quantopian.com/help

def initialize(context):
    # Register 2 histories that track daily prices,
    # one with a 100 window and one with a 300 day window
    add_history(100, '1d', 'price')
    add_history(300, '1d', 'price')

    context.i = 0
    context.aapl = symbol('AAPL')

def handle_data(context, data):
    # Skip first 300 days to get full windows
    context.i += 1
    if context.i < 300:
        return

    # Compute averages
    # history() has to be called with the same params
    # from above and returns a pandas dataframe.
    short_mavg = history(100, '1d', 'price').mean()
    long_mavg = history(300, '1d', 'price').mean()

    # Trading logic
    if short_mavg[context.aapl] > long_mavg[context.aapl]:
        # order_target orders as many shares as needed to
        # achieve the desired number of shares.
        order_target(context.aapl, 100)
    elif short_mavg[context.aapl] < long_mavg[context.aapl]:
        order_target(context.aapl, 0)

    # Save values for later inspection
    record(AAPL=data[context.aapl].price,
           short_mavg=short_mavg[context.aapl],
           long_mavg=long_mavg[context.aapl])
In [19]:
# Analyze is a post-hoc analysis method available on Zipline. 
# It accepts the context object and 'perf' which is the output 
# of a Zipline backtest.  This API is currently experimental, 
# and will likely change before release.

def analyze(context, perf):
    fig = pyplot.figure()
    
    # Make a subplot for portfolio value.
    ax1 = fig.add_subplot(211)
    perf.portfolio_value.plot(ax=ax1, figsize=(16,12))
    ax1.set_ylabel('portfolio value in $')

    # Make another subplot showing our trades.
    ax2 = fig.add_subplot(212)
    perf['AAPL'].plot(ax=ax2, figsize=(16, 12))
    perf[['short_mavg', 'long_mavg']].plot(ax=ax2)

    perf_trans = perf.ix[[t != [] for t in perf.transactions]]
    buys = perf_trans.ix[[t[0]['amount'] > 0 for t in perf_trans.transactions]]
    sells = perf_trans.ix[
        [t[0]['amount'] < 0 for t in perf_trans.transactions]]

    # Add buy/sell markers to the second plot
    ax2.plot(buys.index, perf.short_mavg.ix[buys.index],
             '^', markersize=10, color='m')
    ax2.plot(sells.index, perf.short_mavg.ix[sells.index],
             'v', markersize=10, color='k')
    
    # Set figure metadata
    ax2.set_ylabel('price in $')
    pyplot.legend(loc=0)
    pyplot.show()
In [20]:
# NOTE: This cell will take a few minutes to run.

# Create algorithm object passing in initialize and
# handle_data functions
algo_obj = TradingAlgorithm(
    initialize=initialize, 
    handle_data=handle_data
)

# HACK: Analyze isn't supported by the parameter-based API, so
# tack it directly onto the object.
algo_obj._analyze = analyze

# Run algorithm
perf_manual = algo_obj.run(data.transpose(2,1,0))
In [21]:
import pyfolio as pf
In [23]:
returns, positions, transactions, gross_lev = pf.utils.extract_rets_pos_txn_from_zipline(perf_manual)
In [24]:
pf.create_full_tear_sheet(returns, positions=positions, transactions=transactions,
                          gross_lev=gross_lev, live_start_date='2000-01-01')
Entire data start date: 2002-01-04
Entire data end date: 2015-02-13


Out-of-Sample Months: 157
Backtest Months: 0
                   Backtest  Out_of_Sample  All_History
annual_return           NaN           0.01         0.01
annual_volatility       NaN           0.01         0.01
sharpe_ratio            NaN           0.68         0.68
calmar_ratio            NaN           0.18         0.18
stability               NaN           0.83         0.83
max_drawdown            NaN          -0.04        -0.04
omega_ratio             NaN           1.19         1.19
sortino_ratio           NaN           0.99         0.99
skewness                NaN          -0.15        -0.15
kurtosis                NaN          21.21        21.21
information_ratio         0          -0.02        -0.02
alpha                   NaN           0.01         0.01
beta                    NaN           0.02         0.02
/usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.py:598: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/usr/local/lib/python2.7/dist-packages/numpy/core/_methods.py:59: RuntimeWarning: Mean of empty slice.
  warnings.warn("Mean of empty slice.", RuntimeWarning)
/usr/local/lib/python2.7/dist-packages/numpy/lib/function_base.py:1890: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-24-f15a0a2e7356> in <module>()
      1 pf.create_full_tear_sheet(returns, positions=positions, transactions=transactions,
----> 2                           gross_lev=gross_lev, live_start_date='2000-01-01')

/usr/local/lib/python2.7/dist-packages/pyfolio/tears.pyc in create_full_tear_sheet(returns, positions, transactions, benchmark_rets, gross_lev, slippage, live_start_date, sector_mappings, bayesian, round_trips, hide_positions, cone_std, set_context)
    157         cone_std=cone_std,
    158         benchmark_rets=benchmark_rets,
--> 159         set_context=set_context)
    160 
    161     create_interesting_times_tear_sheet(returns,

/usr/local/lib/python2.7/dist-packages/pyfolio/plotting.pyc in call_w_context(*args, **kwargs)
     44         if set_context:
     45             with context():
---> 46                 return func(*args, **kwargs)
     47         else:
     48             return func(*args, **kwargs)

/usr/local/lib/python2.7/dist-packages/pyfolio/tears.pyc in create_returns_tear_sheet(returns, live_start_date, cone_std, benchmark_rets, return_fig)
    274         live_start_date=live_start_date,
    275         cone_std=cone_std,
--> 276         ax=ax_rolling_returns)
    277     ax_rolling_returns.set_title(
    278         'Cumulative Returns')

/usr/local/lib/python2.7/dist-packages/pyfolio/plotting.pyc in plot_rolling_returns(returns, factor_returns, live_start_date, cone_std, legend_loc, volatility_match, cone_function, ax, **kwargs)
    613 
    614     is_cum_returns.plot(lw=3, color='forestgreen', alpha=0.6,
--> 615                         label='Backtest', ax=ax, **kwargs)
    616 
    617     if len(oos_cum_returns) > 0:

/usr/local/lib/python2.7/dist-packages/pandas/tools/plotting.pyc in plot_series(data, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   2517                  yerr=yerr, xerr=xerr,
   2518                  label=label, secondary_y=secondary_y,
-> 2519                  **kwds)
   2520 
   2521 

/usr/local/lib/python2.7/dist-packages/pandas/tools/plotting.pyc in _plot(data, x, y, subplots, ax, kind, **kwds)
   2322         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2323 
-> 2324     plot_obj.generate()
   2325     plot_obj.draw()
   2326     return plot_obj.result

/usr/local/lib/python2.7/dist-packages/pandas/tools/plotting.pyc in generate(self)
    910     def generate(self):
    911         self._args_adjust()
--> 912         self._compute_plot_data()
    913         self._setup_subplots()
    914         self._make_plot()

/usr/local/lib/python2.7/dist-packages/pandas/tools/plotting.pyc in _compute_plot_data(self)
   1015         if is_empty:
   1016             raise TypeError('Empty {0!r}: no numeric data to '
-> 1017                             'plot'.format(numeric_data.__class__.__name__))
   1018 
   1019         self.data = numeric_data

TypeError: Empty 'DataFrame': no numeric data to plot
In [ ]: