from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data import EquityPricing, factset
from quantopian.pipeline.factors import Returns, SimpleMovingAverage
from quantopian.pipeline.domain import (
AT_EQUITIES, # Austria
AU_EQUITIES, # Australia
BE_EQUITIES, # Belgium
BR_EQUITIES, # Brazil
CA_EQUITIES, # Canada
CH_EQUITIES, # Switzerland
CN_EQUITIES, # China
DE_EQUITIES, # Germany
DK_EQUITIES, # Denmark
ES_EQUITIES, # Spain
FI_EQUITIES, # Finland
FR_EQUITIES, # France
GB_EQUITIES, # Great Britain
HK_EQUITIES, # Hong Kong
IE_EQUITIES, # Ireland
IN_EQUITIES, # India
IT_EQUITIES, # Italy
JP_EQUITIES, # Japan
KR_EQUITIES, # South Korea
NL_EQUITIES, # Netherlands
NO_EQUITIES, # Norway
NZ_EQUITIES, # New Zealand
PT_EQUITIES, # Portugal
SE_EQUITIES, # Sweden
SG_EQUITIES, # Singapore
US_EQUITIES, # United States
)
from quantopian.research import run_pipeline
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
import empyrical as ep
import alphalens as al
import pyfolio as pf
Helper functions
class FilterExtend(CustomFactor):
def compute(self, today, asset_ids, out, values):
out[:] = np.max(values, axis=0)
def evaluate_factor(factor,
domain,
start_date,
end_date,
factor_screen=None,
quantiles=5,
returns_lengths=(1, 5, 10)):
"""Analyze a Pipeline Factor using Alphalens.
Parameters
----------
factor : quantopian.pipeline.factors.Factor
Factor producing scores to be evaluated.
domain : quantopian.pipeline.domain.Domain
Domain on which the factor should be evaluated.
start_date : str or pd.Timestamp
Start date for evaluation period.
end_date : str or pd.Timestamp
End date for evaluation period.
standardize :
factor_screen : quantopian.pipeline.filters.Filter, optional
Filter defining which assets ``factor`` should be evaluated on.
Default is ``factor.notnull()``.
quantiles : int, optional
Number of buckets to use for quantile groups. Default is 5
returns_lengths : sequence[int]
Forward-returns horizons to use when evaluating ``factor``.
Default is 1-day, 5-day, and 10-day returns.
Returns
-------
factor_data : pd.DataFrame
A (date, asset)-indexed DataFrame with the following columns:
'factor': float64
Values produced by ``factor``.
'factor_quantiles': int64
Daily quantile label for each
"""
calendar = domain.calendar
# Roll input dates to the next trading session.
start_date = calendar.minute_to_session_label(pd.Timestamp(start_date, tz='UTC'))
end_date = calendar.minute_to_session_label(pd.Timestamp(end_date, tz='UTC'))
if factor_screen is None:
factor_screen = factor.notnull()
# Run pipeline to get factor values and quantiles.
factor_pipe = Pipeline(
{'factor': factor,
'factor_quantile': factor.quantiles(quantiles, mask=factor_screen)},
screen=factor_screen,
domain=domain,
)
factor_results = run_pipeline(factor_pipe, start_date, end_date, chunksize=250)
returns_pipe = Pipeline(
columns={'daily_returns': Returns(window_length=2)},
domain=domain,
screen=FilterExtend(
inputs=[factor_screen],
window_length=max(returns_lengths)+1
).eq(1)
)
# Compute returns for the period after the factor pipeline, then
# shift the results back to align with our factor values.
returns_start_date = start_date
returns_end_date = end_date + domain.calendar.day * max(returns_lengths)
raw_returns = run_pipeline(returns_pipe, returns_start_date, returns_end_date, chunksize=500)
shifted_returns = {}
column_order = []
daily_returns = raw_returns['daily_returns']
for length in returns_lengths:
# Shift 1-day returns back by a day, 5-day returns back by 5 days, etc.
colname = '{}D'.format(length)
column_order.append(colname)
shifted_returns[colname] = backshift_returns_series(daily_returns, length)
# Merge backshifted returns into a single frame indexed like our desired output.
merged_returns = pd.DataFrame(
data=shifted_returns,
index=factor_results.index,
columns=column_order,
)
# Concat factor results and forward returns column-wise.
merged = pd.concat([factor_results, merged_returns], axis=1)
merged.index.set_names(['date', 'asset'], inplace=True)
# Drop NaNs
merged = merged.dropna(how='any')
# Add a Business Day Offset to the DateTimeIndex
merged.index.levels[0].freq = pd.tseries.offsets.BDay()
return merged
def backshift_returns_series(series, N):
"""Shift a multi-indexed series backwards by N observations in the first level.
This can be used to convert backward-looking returns into a forward-returns series.
"""
ix = series.index
dates, sids = ix.levels
date_labels, sid_labels = map(np.array, ix.labels)
# Output date labels will contain the all but the last N dates.
new_dates = dates[:-N]
# Output data will remove the first M rows, where M is the index of the
# last record with one of the first N dates.
cutoff = date_labels.searchsorted(N)
new_date_labels = date_labels[cutoff:] - N
new_sid_labels = sid_labels[cutoff:]
new_values = series.values[cutoff:]
assert new_date_labels[0] == 0
new_index = pd.MultiIndex(
levels=[new_dates, sids],
labels=[new_date_labels, new_sid_labels],
sortorder=1,
names=ix.names,
)
return pd.Series(data=new_values, index=new_index)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import empyrical as ep
import alphalens as al
import pyfolio as pf
def compute_turnover(df):
return df.dropna().unstack().dropna(how='all').fillna(0).diff().abs().sum(1)
def get_max_median_position_concentration(expos):
longs = expos.loc[expos > 0]
shorts = expos.loc[expos < 0]
return expos.groupby(level=0).quantile([.05, .25, .5, .75, .95]).unstack()
def compute_factor_stats(factor_data_total, periods=range(1, 15)):
portfolio_returns_total = al.performance.factor_returns(factor_data_total)
portfolio_returns_total.columns = portfolio_returns_total.columns.map(lambda x: int(x[:-1]))
for i in portfolio_returns_total.columns:
portfolio_returns_total[i] = portfolio_returns_total[i].shift(i)
delay_sharpes_total = portfolio_returns_total.apply(ep.sharpe_ratio)
factor = factor_data_total.factor
turnover = compute_turnover(factor)
n_holdings = factor.groupby(level=0).count()
perc_holdings = get_max_median_position_concentration(factor)
return {'factor_data_total': factor_data_total,
'portfolio_returns_total': portfolio_returns_total,
'delay_sharpes_total': delay_sharpes_total,
'turnover': turnover,
'n_holdings': n_holdings,
'perc_holdings': perc_holdings,
}
def plot_overview_tear_sheet(factor_data, periods=range(1, 15)):
# We assume portfolio weights, so make sure factor scores sum to 1
factor_data['factor'] = factor_data.factor.div(factor_data.abs().groupby(level='date').sum()['factor'])
fig = plt.figure(figsize=(16, 16))
gs = plt.GridSpec(3, 4)
ax1 = plt.subplot(gs[0:2, 0:2])
factor_stats = compute_factor_stats(factor_data, periods=periods)
pd.DataFrame({'total': factor_stats['delay_sharpes_total']}).plot.bar(ax=ax1)
ax1.set(xlabel='delay', ylabel='IR')
ax2a = plt.subplot(gs[0:2, 2:4])
delay_cum_rets_total = factor_stats['portfolio_returns_total'][list(range(1, 5))].apply(ep.cum_returns)
delay_cum_rets_total.plot(ax=ax2a)
ax2a.set(title='Total returns', ylabel='Cumulative returns')
ax6 = plt.subplot(gs[-1, 0:2])
factor_stats['n_holdings'].plot(color='b', ax=ax6)
ax6.set_ylabel('# holdings', color='b')
ax6.tick_params(axis='y', labelcolor='b')
ax62 = ax6.twinx()
factor_stats['turnover'].plot(color='r', ax=ax62)
ax62.set_ylabel('turnover', color='r')
ax62.tick_params(axis='y', labelcolor='r')
ax7 = plt.subplot(gs[-1, 2:4])
factor_stats['perc_holdings'].plot(ax=ax7)
ax7.set(ylabel='Long/short perc holdings')
gs.tight_layout(fig)
return fig, factor_stats
Universe definition
# Custom factor that gets the minimum volume traded over the last two weeks.
class MinVolume(CustomFactor):
inputs=[EquityPricing.volume]
window_length=10
def compute(self, today, asset_ids, out, values):
# Calculates the column-wise standard deviation, ignoring NaNs
out[:] = np.min(values, axis=0)
# Create a volume and price filter that filters for stocks in the top 30%.
# We multiply by price to rule out penny stocks that trade in huge volume.
volume_min = MinVolume()
price = EquityPricing.close.latest
univ_filter = ((price * volume_min).percentile_between(70, 100, mask=(volume_min > 0)))
Enter your alpha factor here. Make sure to delete the following cell before making your submission!
# Our alpha factor is a size-based factor.
alpha_factor = -factset.Fundamentals.mkt_val.latest.log1p()
alpha_winsorized = alpha_factor.winsorize(min_percentile=0.05,
max_percentile=0.95,
mask=univ_filter)
# Zscore to get long and short (positive and negative) alphas to use as weights
alpha_zscore = alpha_winsorized.zscore()
# Call evaluate_factor on our factor to get Alphalens-formatted data.
al_data = evaluate_factor(
alpha_zscore,
JP_EQUITIES,
'2015-06-1',
'2018-10-1',
factor_screen=univ_filter,
returns_lengths=range(1, 15),
)
plot_overview_tear_sheet(al_data);