To identify slow areas and optimize.
Example output:
1970-01-01 timing:90 INFO 0.8800 avg 0.8800 lo 0.8800 hi 0.87998 init new max
2015-10-01 timing:90 INFO 0.0013 avg 0.0013 lo 0.0013 hi 0.00133 volume new max
2015-10-01 timing:90 INFO 0.0013 avg 0.0013 lo 0.0013 hi 0.00125 close new max
2015-10-01 timing:90 INFO 0.1267 avg 0.1267 lo 0.1267 hi 0.12674 order new max
2015-12-28 timing:112 INFO Timing by highs descending:
2015-12-28 timing:114 INFO avg 0.879977 lo 0.879977 hi 0.879977 init
2015-12-28 timing:114 INFO avg 0.066348 lo 0.037155 hi 0.126741 order
2015-12-28 timing:114 INFO avg 0.001009 lo 0.000823 hi 0.001328 volume
2015-12-28 timing:114 INFO avg 0.000973 lo 0.000787 hi 0.001250 close
End of logs.
Same as 'Source Code' tab in the backtest below.
''' https://www.quantopian.com/posts/timing-code
Timing code to determine elapsed time for any given section of code.
--garyha
Start: timing(context, 'some string', 'reset')
End: timing(context, 'some string')
... and the function def timing(), those are all you need.
There can be multiple instances with different identifiers ('some string', whatever you set).
Four instances in this working example below.
Options (found within the function):
log_level = 1 # -1 Off
# 0 Only summary at end of run
# 1 Each new maximum
# 2 Everything always
# 3 Only if beyond a certain threshold. Use warn_threshold
'''
def initialize(context):
timing(context, 'init', 'reset')
set_universe(universe.DollarVolumeUniverse(floor_percentile=90, ceiling_percentile=100))
timing(context, 'init')
def handle_data(context, data):
c = context # brevity
# identifier Reset time to zero
# \/ \/
timing(c, 'volume', 'reset') # Reset time to zero
volumes = history(100, '1d', 'volume') # history() volume
timing(c, 'volume') # Time since reset, does the logging
# /\
# No reset this time
volumes # Just suppressing used-once warning in this example
timing(c, 'close', 'reset')
closes = history(100, '1d', 'close_price') # history() close prices
timing(c, 'close')
closes # Just suppressing used-once warning
timing(c, 'order', 'reset')
for stock in data:
order_target_percent(stock, .005) # ordering
timing(c, 'order')
# /\
# identifier
def timing(c, identifier, reset=None): # You set identifier as you wish
''' Log timing between any 'reset' and like timing(context, 'history1')
Incoming 'c' is context
Time is in seconds.
'''
log_level = 1 # -1 Off
# 0 Only summary at end of run
# 1 Each new maximum
# 2 Everything always
# 3 Only if beyond a certain threshold. Use warn_threshold
warn_threshold = 0.15 # Use with (3) log_level to only log if elapsed is above this seconds value.
if log_level == -1: return
import time # Python will only bother doing once, makes this portable.
# Move to top of algo for better efficiency.
t = time.time()
if 'times' not in c: # Also move these, to initialize() for better efficiency.
c.times = {} # Dictionary to keep track of these timings.
if identifier not in c.times:
c.times[identifier] = { # Initialization of any new identifier string
'point': t,
'hi' : 0,
'lo' : 1000,
'avg' : [0, 0],
}
return
if reset:
c.times[identifier]['point'] = t # Point in time, acting as zero
return
elapsed = t - c.times[identifier]['point'] # Elapsed time
avg, num = c.times[identifier]['avg'] # Tracking average
avg_new = ((avg * num) + elapsed) / (num + 1) # Calc new average
new_max_str = 'new max' if elapsed > c.times[identifier]['hi'] else ''
c.times[identifier]['avg'] = [avg_new, num + 1] # New values
c.times[identifier]['hi'] = max(elapsed, c.times[identifier]['hi'])
c.times[identifier]['lo'] = min(elapsed, c.times[identifier]['lo'])
prnt_str = '{} avg {} lo {} hi {} {} {}'.format('%.4f' % elapsed, '%.4f' % avg_new,
'%.4f' % c.times[identifier]['lo'], '%.5f' % c.times[identifier]['hi'], identifier, new_max_str)
if (log_level == 1 and new_max_str) or log_level == 2:
log.info(prnt_str)
if log_level == 3 and elapsed > warn_threshold:
log.warn(prnt_str)
# Summary on last day/frame, sorted by highest high value first.
# If using schedule_function(), backtest last day/time may need to match for this to execute.
# Log on last minute of last day
if 'times_summary_done' not in c: c.times_summary_done = 0
log_summary = 0
mode = get_environment('data_frequency')
if mode == 'daily' and get_datetime().date() == get_environment('end').date():
log_summary = 1
elif mode == 'minute' and get_datetime() == get_environment('end'):
log_summary = 1
if log_summary and not c.times_summary_done:
id_list = []
for ident in c.times:
hi = '%.6f' % c.times[ident]['hi']
lo = '%.6f' % c.times[ident]['lo']
av = '%.6f' % c.times[ident]['avg'][0]
id_list.append( (hi, lo, av, ident) )
list_sort = sorted(id_list, reverse=True)
log.info('Timing by highs descending:')
for ls in list_sort:
log.info('avg {} lo {} hi {} {}'.format(ls[2],ls[1],ls[0],ls[3]))
c.times_summary_done = 1
# The summary code will be vastly improved when the thing mentioned by Jamie McCorriston as
# passed along to the engineers is implemented, like finalize() suggested by Andre Pajak at
# https://www.quantopian.com/posts/logging-something-on-the-last-day-of-backtest
# Right now the summary runs last day/minute on the *first* timing(). If there is more than one,
# then the summary will not include any new highs in those that come after it.
# If a new high on last minute, unlikely yet bad.
# I would call it wrapup(), fewer chars and less ominous. It should be optional.
# If we want to add it, it runs when the backtest is done so we can log anything from context.
# From the software development point of view, highly beneficial.