# Copyright 2016 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from math import copysign
import warnings
from collections import deque, OrderedDict
import pandas as pd
import numpy as np
from .utils import print_table, format_asset
PNL_STATS = OrderedDict(
[('Total profit', lambda x: x.sum()),
('Gross profit', lambda x: x[x > 0].sum()),
('Gross loss', lambda x: x[x < 0].sum()),
('Profit factor', lambda x: x[x > 0].sum() / x[x < 0].abs().sum()
if x[x < 0].abs().sum() != 0 else np.nan),
('Avg. trade net profit', 'mean'),
('Avg. winning trade', lambda x: x[x > 0].mean()),
('Avg. losing trade', lambda x: x[x < 0].mean()),
('Ratio Avg. Win:Avg. Loss', lambda x: x[x > 0].mean() /
x[x < 0].abs().mean() if x[x < 0].abs().mean() != 0 else np.nan),
('Largest winning trade', 'max'),
('Largest losing trade', 'min'),
])
SUMMARY_STATS = OrderedDict(
[('Total number of round_trips', 'count'),
('Percent profitable', lambda x: len(x[x > 0]) / float(len(x))),
('Winning round_trips', lambda x: len(x[x > 0])),
('Losing round_trips', lambda x: len(x[x < 0])),
('Even round_trips', lambda x: len(x[x == 0])),
])
RETURN_STATS = OrderedDict(
[('Avg returns all round_trips', lambda x: x.mean()),
('Avg returns winning', lambda x: x[x > 0].mean()),
('Avg returns losing', lambda x: x[x < 0].mean()),
('Median returns all round_trips', lambda x: x.median()),
('Median returns winning', lambda x: x[x > 0].median()),
('Median returns losing', lambda x: x[x < 0].median()),
('Largest winning trade', 'max'),
('Largest losing trade', 'min'),
])
DURATION_STATS = OrderedDict(
[('Avg duration', lambda x: x.mean()),
('Median duration', lambda x: x.median()),
('Longest duration', lambda x: x.max()),
('Shortest duration', lambda x: x.min())
# FIXME: Instead of x.max() - x.min() this should be
# rts.close_dt.max() - rts.open_dt.min() which is not
# available here. As it would require a new approach here
# that passes in multiple fields we disable these measures
# for now.
# ('Avg # round_trips per day', lambda x: float(len(x)) /
# (x.max() - x.min()).days),
# ('Avg # round_trips per month', lambda x: float(len(x)) /
# (((x.max() - x.min()).days) / APPROX_BDAYS_PER_MONTH)),
])
def agg_all_long_short(round_trips, col, stats_dict):
stats_all = (round_trips
.assign(ones=1)
.groupby('ones')[col]
.agg(stats_dict)
.T
.rename(columns={1.0: 'All trades'}))
stats_long_short = (round_trips
.groupby('long')[col]
.agg(stats_dict)
.T
.rename(columns={False: 'Short trades',
True: 'Long trades'}))
return stats_all.join(stats_long_short)
def _groupby_consecutive(txn, max_delta=pd.Timedelta('8h')):
def vwap(transaction):
if transaction.amount.sum() == 0:
warnings.warn('Zero transacted shares, setting vwap to nan.')
return np.nan
return (transaction.amount * transaction.price).sum() / \
transaction.amount.sum()
out = []
for _, t in txn.groupby('symbol'):
t = t.sort_index()
t.index.name = 'dt'
t = t.reset_index()
t['order_sign'] = t.amount > 0
t['block_dir'] = (t.order_sign.shift(
1) != t.order_sign).astype(int).cumsum()
t['block_time'] = ((t.dt.sub(t.dt.shift(1))) >
max_delta).astype(int).cumsum()
grouped_price = (t.groupby(['block_dir',
'block_time'])
.apply(vwap))
grouped_price.name = 'price'
grouped_rest = t.groupby(['block_dir', 'block_time']).agg({
'amount': 'sum',
'symbol': 'first',
'dt': 'first'})
grouped = grouped_rest.join(grouped_price)
out.append(grouped)
out = pd.concat(out)
out = out.set_index('dt')
return out
def extract_round_trips(transactions,portfolio_value=None):
transactions = _groupby_consecutive(transactions)
roundtrips = []
for sym, trans_sym in transactions.groupby('symbol'):
trans_sym = trans_sym.sort_index()
price_stack = deque()
dt_stack = deque()
trans_sym['signed_price'] = trans_sym.price * \
np.sign(trans_sym.amount)
trans_sym['abs_amount'] = trans_sym.amount.abs().astype(int)
for dt, t in trans_sym.iterrows():
if t.price < 0:
warnings.warn('Negative price detected, ignoring for'
'round-trip.')
continue
indiv_prices = [t.signed_price] * t.abs_amount
if (len(price_stack) == 0) or \
(copysign(1, price_stack[-1]) == copysign(1, t.amount)):
price_stack.extend(indiv_prices)
dt_stack.extend([dt] * len(indiv_prices))
else:
# Close round-trip
pnl = 0
invested = 0
cur_open_dts = []
for price in indiv_prices:
if len(price_stack) != 0 and \
(copysign(1, price_stack[-1]) != copysign(1, price)):
# Retrieve first dt, stock-price pair from
# stack
prev_price = price_stack.popleft()
prev_dt = dt_stack.popleft()
pnl += -(price + prev_price)
cur_open_dts.append(prev_dt)
invested += abs(prev_price)
else:
# Push additional stock-prices onto stack
price_stack.append(price)
dt_stack.append(dt)
roundtrips.append({'pnl': pnl,
'open_dt': cur_open_dts[0],
'close_dt': dt,
'long': price < 0,
'rt_returns': pnl / invested,
'symbol': sym,
})
roundtrips = pd.DataFrame(roundtrips)
roundtrips['duration'] = roundtrips['close_dt'].sub(roundtrips['open_dt'])
if portfolio_value is not None:
# Need to normalize so that we can join
pv = pd.DataFrame(portfolio_value,
columns=['portfolio_value'])\
.assign(date=portfolio_value.index)
roundtrips['date'] = roundtrips.close_dt.apply(lambda x:
x.replace(hour=0,
minute=0,
second=0))
tmp = (roundtrips.set_index('date')
.join(pv.set_index('date'), lsuffix='_')
.reset_index())
roundtrips['returns'] = tmp.pnl / tmp.portfolio_value
roundtrips = roundtrips.drop('date', axis='columns')
return roundtrips
def add_closing_transactions(positions, transactions):
closed_txns = transactions[['symbol', 'amount', 'price']]
pos_at_end = positions.drop('cash', axis=1).iloc[-1]
open_pos = pos_at_end.replace(0, np.nan).dropna()
# Add closing round_trips one second after the close to be sure
# they don't conflict with other round_trips executed at that time.
end_dt = open_pos.name + pd.Timedelta(seconds=1)
for sym, ending_val in open_pos.iteritems():
txn_sym = transactions[transactions.symbol == sym]
ending_amount = txn_sym.amount.sum()
ending_price = ending_val / ending_amount
closing_txn = OrderedDict([
('amount', -ending_amount),
('price', ending_price),
('symbol', sym),
])
closing_txn = pd.DataFrame(closing_txn, index=[end_dt])
closed_txns = closed_txns.append(closing_txn)
closed_txns = closed_txns[closed_txns.amount != 0]
return closed_txns
def gen_round_trip_stats(round_trips):
stats = {}
stats['pnl'] = agg_all_long_short(round_trips, 'pnl', PNL_STATS)
stats['summary'] = agg_all_long_short(round_trips, 'pnl',SUMMARY_STATS)
stats['duration'] = agg_all_long_short(round_trips, 'duration',DURATION_STATS)
stats['returns'] = agg_all_long_short(round_trips, 'returns',RETURN_STATS)
stats['symbols'] = \
round_trips.groupby('symbol')['returns'].agg(RETURN_STATS).T
return stats
def print_round_trip_stats(round_trips, hide_pos=True):
stats = gen_round_trip_stats(round_trips)
print_table(stats['summary'], float_format='{:.2f}'.format,
name='Summary stats')
'''
print_table(stats['pnl'], float_format='${:.2f}'.format, name='PnL stats')
print_table(stats['duration'], float_format='{:.2f}'.format,
name='Duration stats')
print_table(stats['returns'] * 100, float_format='{:.2f}%'.format,
name='Return stats')
if not hide_pos:
stats['symbols'].columns = stats['symbols'].columns.map(format_asset)
print_table(stats['symbols'] * 100,
float_format='{:.2f}%'.format, name='Symbol stats')
'''