Notebook
In [1]:
bt = get_backtest('56a8157633749711029e987b')
100% Time: 0:00:45|###########################################################|
In [42]:
from __future__ import division
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import numpy as np
import pyfolio as pf
from collections import OrderedDict, defaultdict, deque

def groupby_directional(txn):
    out = []
    for sym, t in txn.groupby('symbol'):
        t = t.sort_index()
        t['order_sign'] = t.amount > 0
        t['block'] = (t.order_sign.shift(1) != t.order_sign).astype(int).cumsum()
        t.index.name = 'dt'
        grouped = t.reset_index()\
                   .groupby('block')\
                   .agg({'price': 'mean',
                         'amount': 'sum',
                         'symbol': 'first',
                         'dt': 'first'})
        out.append(grouped)
    
    out = pd.concat(out)
    out = out.set_index('dt')
    return out
    
    
def groupby_consecutive(txn, min_gap=pd.Timedelta('1m')):
    out = []
    for sym, t in txn.groupby('symbol'):
        t = t.sort_index()
        t.index.name = 'dt'
        t = t.reset_index()
        
        t['order_sign'] = t.amount > 0
        t['block_dir'] = (t.order_sign.shift(1) != t.order_sign).astype(int).cumsum()
        t['block_time'] = ((t.dt - t.dt.shift(1)) > min_gap).astype(int).cumsum()
        grouped = t.groupby(['block_dir', 'block_time'])\
                   .agg({'price': 'mean',
                         'amount': 'sum',
                         'symbol': 'first',
                         'dt': 'first'})
        out.append(grouped)
    
    out = pd.concat(out)
    out = out.set_index('dt')
    return out
    
    
def extract_round_trips_stack(transactions, groupby=groupby_consecutive):
    #transactions = pf.round_trips.split_trades(transactions)
    
    transactions = groupby(transactions)
    port_stack = defaultdict(deque)
    roundtrips = []

    for sym, trans_sym in transactions.groupby('symbol'):
        trans_sym = trans_sym.sort_index()
        sym_stack = port_stack[sym]
        
        # Create list of 
        for dt, t in trans_sym.iterrows():
            signed_price = t.price * np.sign(t.amount)
            abs_amount = int(abs(t.amount))
            indiv_prices = [signed_price] * abs_amount
            if (len(sym_stack) == 0) or \
                (np.sign(sym_stack[-1][1]) == np.sign(t.amount)):
                for price in indiv_prices:
                    sym_stack.append((dt, price))
            else:
                # Close round-trip
                pnl = 0
                invested = 0
                cur_durations = []
                cur_open_dts = []
                cur_close_dts = []

                for price in indiv_prices:
                    if len(sym_stack) != 0:
                        # Retrieve last dt, stock-price pair from stack
                        prev_dt, prev_price = sym_stack.pop()

                        pnl += -(price + prev_price)
                        cur_durations.append(dt - prev_dt)
                        cur_open_dts.append(prev_dt)
                        invested += np.abs(prev_price)

                    else:
                        # Push additional stock-prices onto stack
                        sym_stack.append((dt, price))


                roundtrips.append({'pnl': pnl,
                                   'duration': np.median(cur_durations),
                                   'open_dt': cur_open_dts[len(cur_open_dts) // 2],
                                   'close_dt': dt,
                                   'long': price < 0,
                                   'returns': pnl / invested,
                                   'symbol': sym,
                                  })

    roundtrips = pd.DataFrame(roundtrips)

    return roundtrips
In [17]:
# Add symbols
transactions = bt.transactions
transactions['symbol'] = map(lambda x: x.symbol, symbols(transactions.sid))
In [43]:
trades = extract_round_trips_stack(transactions)
In [44]:
trades.head()
Out[44]:
close_dt duration long open_dt pnl returns symbol
0 2011-12-01 30 days True 2011-11-01 156.823966 0.072501 A
1 2011-03-01 28 days True 2011-02-01 -916.024008 -0.035666 AAI
2 2011-03-01 28 days True 2011-02-01 -472.261430 -0.020697 AAP
3 2011-09-01 31 days True 2011-08-01 257.028264 0.117430 AAP
4 2011-11-01 123 days True 2011-07-01 338.766246 0.099569 AAP
In [45]:
trade_stats = OrderedDict([('Total profit', lambda x: x.sum()),
                          ('Gross profit', lambda x: x[x>0].sum()),
                          ('Gross loss', lambda x: x[x<0].sum()),
                          ('Profit factor', lambda x: x[x>0].sum() / x[x<0].abs().sum()),
                          ('Total number of trades', 'count'),
                          ('Percent profitable', lambda x: len(x[x>0]) / len(x)),
                          ('Winning trades', lambda x: len(x[x>0])),
                          ('Losing trades', lambda x: len(x[x<0])),
                          ('Even trades', lambda x: len(x[x==0])),
                          ('Avg. trade net profit', 'mean'),
                          ('Avg. winning trade', lambda x: x[x>0].mean()),
                          ('Avg. losing trade', lambda x: x[x<0].mean()),
                          ('Ratio Avg. Win:Avg. Loss', lambda x: x[x>0].mean() / x[x<0].abs().mean()),
                          ('Largest winning trade', 'max'),
                          ('Largest losing trade', 'min')])


stats = trades.assign(ones=np.ones(len(trades))).groupby('ones')['pnl'].agg(trade_stats).T.rename_axis({1.0: 'All trades'}, axis='columns')
stats2 = trades.groupby('long')['pnl'].agg(trade_stats).T.rename_axis({False: 'Short trades', True: 'Long trades'}, axis='columns')
stats = stats.join(stats2)
#pd.set_option('display.float_format', lambda x: '$%.2f' % x)
stats[['All trades', 'Long trades', 'Short trades']]
Out[45]:
All trades Long trades Short trades
Total profit -69584.662210 975513.212704 -1045097.874914
Gross profit 2510523.350874 1550297.922011 960225.428863
Gross loss -2580108.013083 -574784.709306 -2005323.303777
Profit factor 0.973030 2.697180 0.478838
Total number of trades 2329.000000 1114.000000 1215.000000
Percent profitable 0.576642 0.780969 0.389300
Winning trades 1343.000000 870.000000 473.000000
Losing trades 986.000000 244.000000 742.000000
Even trades 0.000000 0.000000 0.000000
Avg. trade net profit -29.877485 875.685110 -860.162860
Avg. winning trade 1869.339800 1781.951634 2030.074902
Avg. losing trade -2616.742407 -2355.675038 -2702.592054
Ratio Avg. Win:Avg. Loss 0.714377 0.756451 0.751158
Largest winning trade 65310.107568 65310.107568 22894.903235
Largest losing trade -51904.020233 -14280.127609 -51904.020233
In [35]:
def create_round_trip_tear_sheet(trades, ndays):
    #transactions_closed = pf.round_trips.add_closing_transactions(positions,
    #                                                              transactions)
    #trades = round_trips.extract_round_trips(transactions_closed)
    #trades = extract_round_trips_stack(transactions)

    if len(trades) < 5:
        warnings.warn(
            """Fewer than 5 round-trip trades made.
               Skipping round trip tearsheet.""", UserWarning)
        return

    #ndays = len(positions)

    print(trades.drop(['open_dt', 'close_dt', 'symbol'],
                      axis='columns').describe())
    print('Percent of round trips profitable = {:.4}%'.format(
          (trades.pnl > 0).mean() * 100))

    winning_round_trips = trades[trades.pnl > 0]
    losing_round_trips = trades[trades.pnl < 0]
    print('Mean return per winning round trip = {:.4}'.format(
        winning_round_trips.returns.mean()))
    print('Mean return per losing round trip = {:.4}'.format(
        losing_round_trips.returns.mean()))

    print('A decision is made every {:.4} days.'.format(float(ndays) / len(trades)))
    print('{:.4} trading decisions per day.'.format(len(trades) * 1. / ndays))
    print('{:.4} trading decisions per month.'.format(
        len(trades) * 1. / (ndays / 21)))
In [36]:
create_round_trip_tear_sheet(trades, len(bt.positions))
                        duration       long           pnl      returns
count                       2329       2329   2329.000000  2329.000000
mean   -28 days +21:37:29.689261  0.4783169    -29.877485    -0.000590
std      53 days 21:25:31.374712  0.4996369   5835.378202     0.328026
min              0 days 00:00:00      False -51904.020233    -2.540072
25%             30 days 00:00:00          0   -304.623502    -0.063682
50%             32 days 00:00:00          0     25.611326     0.020453
75%             90 days 00:00:00          1    426.406668     0.111431
max            303 days 00:00:00       True  65310.107568     1.999213
Percent of round trips profitable = 57.66%
Mean return per winning round trip = 0.1471
Mean return per losing round trip = -0.2018
A decision is made every 41.21 days.
0.02426 trading decisions per day.
0.5096 trading decisions per month.