from quantopian.research import run_pipeline
import quantopian.optimize as opt
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import CustomFactor, SimpleMovingAverage
from quantopian.pipeline.data.builtin import EquityPricing, USEquityPricing
from quantopian.pipeline.data.factset import RBICSFocus
from quantopian.pipeline.data.factset.estimates import Actuals, PeriodicConsensus
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.filters import QTradableStocksUS
from zipline.utils.tradingcalendar import trading_day
import numpy as np
import pandas as pd
# Pipeline parameters
USE_SECTORS = True
PIPE_NORMALIZE = True
def clip(data, threshold=0.025, drop=False):
data = pd.Series(data)
data_notnull = data[data.notnull()]
if data_notnull.shape[0] > 0:
low_cutoff = data_notnull.quantile(threshold)
high_cutoff = data_notnull.quantile(1 - threshold)
if not drop:
data = data.clip(lower=low_cutoff, upper=high_cutoff).values
else:
data = data[(data < low_cutoff) | (data > high_cutoff)]
return data
def standardize(data, winsorize=True, sectors=None, threshold=0.025):
data = pd.Series(data)
if winsorize:
data = clip(data, threshold=threshold)
# Prepare the data
dfData = pd.DataFrame({'data': data})
if USE_SECTORS and sectors is not None:
dfData['sector'] = sectors
else:
dfData['sector'] = ''
# Standardize the data
zscore = lambda x: (x - x.mean()) / (x.std() == 0 and 1 or x.std())
data = dfData.groupby(['sector'])['data'].transform(zscore)
return data
def normalize(data, demean=False):
data = pd.Series(data)
if demean:
data = data - data.mean()
denom = data.abs().sum()
if denom == 0:
denom = 1
return data / denom
class EPSGrowth(CustomFactor):
qn1_eps = Actuals.slice('EPS', 'qf', -1)
q0_eps = Actuals.slice('EPS', 'qf', 0)
inputs = [qn1_eps.actual_value, q0_eps.actual_value, RBICSFocus().l2_name]
window_length = 1
window_safe = True
outputs = ['surprise','prev_q']
def compute(self, today, assets, out, prev_q, curr_q, sectors):
# Calculate surprise
prev_q = prev_q[-1, :]
curr_q = curr_q[-1, :]
surprise = (curr_q - prev_q) / np.abs(prev_q)
# Replace inf w/ NaN
surprise[np.isinf(surprise)] = np.nan
# Standardize the data
surprise = standardize(surprise, sectors=sectors.as_string_array()[-1, :])
# Normalize the data (NOTE: only include if looking at factor individually)
if PIPE_NORMALIZE:
surprise = normalize(surprise)
out.surprise[:] = surprise
out.prev_q[:] = prev_q
# x,y = EPSGrowth()
# class EPSGrowthYr(CustomFactor):
# qn1_eps = Actuals.slice('EPS', 'af', -1)
# q0_eps = Actuals.slice('EPS', 'af', 0)
# inputs = [qn1_eps.actual_value, q0_eps.actual_value, RBICSFocus().l2_name]
# window_length = 1
# window_safe = True
# def compute(self, today, assets, out, prev_q, curr_q, sectors):
# # Calculate surprise
# prev_q = prev_q[-1, :]
# curr_q = curr_q[-1, :]
# surprise = (curr_q - prev_q) / np.abs(prev_q)
# # Replace inf w/ NaN
# surprise[np.isinf(surprise)] = np.nan
# # Standardize the data
# surprise = standardize(surprise, sectors=sectors.as_string_array()[-1, :])
# # Normalize the data (NOTE: only include if looking at factor individually)
# if PIPE_NORMALIZE:
# surprise = normalize(surprise)
# out[:] = surprise
# class TrendyEPS(CustomFactor):
# # Get EPS values
# qn3_eps = PeriodicConsensus.slice('EPS', 'qf', -3)
# qn2_eps = PeriodicConsensus.slice('EPS', 'qf', -2)
# qn1_eps = PeriodicConsensus.slice('EPS', 'qf', -1)
# q0_eps = PeriodicConsensus.slice('EPS', 'qf', 0)
# an3_eps = Actuals.slice('EPS', 'qf', -3)
# an2_eps = Actuals.slice('EPS', 'qf', -2)
# an1_eps = Actuals.slice('EPS', 'qf', -1)
# a0_eps = Actuals.slice('EPS', 'qf', 0)
# inputs = [qn3_eps.mean, qn2_eps.mean, qn1_eps.mean, q0_eps.mean, an3_eps.actual_value, an2_eps.actual_value, an1_eps.actual_value, a0_eps.actual_value, RBICSFocus().l2_name]
# window_length = 1
# window_safe = True
# def compute(self, today, assets, out, qn3_eps, qn2_eps, qn1_eps, q0_eps, an3_eps, an2_eps, an1_eps, a0_eps, sectors):
# # Calculate surprise
# surprise_n3 = (an3_eps[-1, :] - qn3_eps[-1, :]) / np.abs(qn3_eps[-1, :])
# surprise_n2 = (an2_eps[-1, :] - qn2_eps[-1, :]) / np.abs(qn2_eps[-1, :])
# surprise_n1 = (an1_eps[-1, :] - qn1_eps[-1, :]) / np.abs(qn1_eps[-1, :])
# surprise_0 = (a0_eps[-1, :] - q0_eps[-1, :]) / np.abs(q0_eps[-1, :])
# # Add all surprises
# surprise = np.nan_to_num(surprise_n3) + np.nan_to_num(surprise_n2) + np.nan_to_num(surprise_n1) + np.nan_to_num(surprise_0)
# # Replace inf w/ NaN
# surprise[np.isinf(surprise)] = np.nan
# # Standardize the data
# surprise = standardize(surprise, sectors=sectors.as_string_array()[-1, :])
# # Normalize the data (NOTE: only include if looking at factor individually)
# if PIPE_NORMALIZE:
# surprise = normalize(surprise)
# out[:] = surprise
def make_factors():
surprise, prev_q = EPSGrowth()
factors = {}
factors['EPSGrowth'] = surprise
factors['EPSGrowth_prev_q'] = prev_q
# factors['EPSGrowth'] = EPSGrowth
# factors['EPSGrowthYr'] = EPSGrowthYr
# factors['TrendyEPS'] = TrendyEPS
return factors
# Define the universe
universe = QTradableStocksUS()
def factor_pipeline(universe):
all_factors = make_factors()
factors = {a: all_factors[a]() for a in all_factors}
pipe = Pipeline(columns=factors, screen=universe)
return pipe
start_date = '2018-01-04'
end_date = '2018-08-29'
factor_pipe = run_pipeline(factor_pipeline(universe), start_date=start_date, end_date=end_date).dropna(how='all')
factor_pipe.head()
# factor_pipe.hist();
factor_pipe.fillna(0).corr() # Filling NaNs with 0 assumes empty values are the mean (z-score/rank of 0)
# factor_pipe.corr() # Drops NaNs; results are not much different
# Get the alphas
alphas = factor_pipe.copy()
# Replace infs and NaNs
alphas[np.isinf(alphas)] = np.nan
alphas.fillna(0, inplace=True)
from datetime import datetime
from dateutil.relativedelta import relativedelta
# Get pricing data (extends 6 months to minimize dropping in Alphalens)
new_start_date = (datetime.strptime(start_date, '%Y-%m-%d') - relativedelta(months=6)).strftime('%Y-%m-%d')
new_end_date = (datetime.strptime(end_date, '%Y-%m-%d') + relativedelta(months=6)).strftime('%Y-%m-%d')
assets = factor_pipe.reset_index()['level_1'].unique()
dates = factor_pipe.reset_index()['level_0'].unique()
prices = get_pricing(assets, start_date=new_start_date, end_date=new_end_date, fields='close_price')
import alphalens as al
from scipy import stats
def get_ic_table(ic_data):
ic_summary_table = pd.DataFrame()
ic_summary_table["IC Mean"] = ic_data.mean()
ic_summary_table["IC Std."] = ic_data.std()
ic_summary_table["Risk-Adjusted IC"] = \
ic_data.mean() / ic_data.std()
t_stat, p_value = stats.ttest_1samp(ic_data, 0)
ic_summary_table["t-stat(IC)"] = t_stat
ic_summary_table["p-value(IC)"] = p_value
ic_summary_table["IC Skew"] = stats.skew(ic_data)
ic_summary_table["IC Kurtosis"] = stats.kurtosis(ic_data)
return ic_summary_table.apply(lambda x: x.round(3)).T
results = None
for i, col in enumerate(sorted(alphas.columns)):
if i > 0:
print('')
print(col)
# Get the factor data
data = alphas[col]
data = data[data != 0].dropna()
try:
factor_data = al.utils.get_clean_factor_and_forward_returns(data,
prices,
quantiles=None,
bins=(-np.inf, 0, np.inf),
periods=[1, 3, 7, 14],
max_loss=1.
)
# Output the results
# al.tears.create_full_tear_sheet(factor_data)
# al.tears.create_returns_tear_sheet(factor_data)
ic = al.performance.factor_information_coefficient(factor_data)
ic = get_ic_table(ic)
ic.columns = pd.MultiIndex.from_product([[col], ic.columns])
if results is None:
results = ic
else:
results = pd.concat([results, ic], axis=1)
except Exception as e:
print('Error: {}'.format(e))
continue
temp = None
i = 0
unique_vals = results.columns.get_level_values(0).unique()
for j, factor in enumerate(sorted(unique_vals)):
i += 1
res = results.xs(factor, axis=1, level=0, drop_level=False)
if temp is None:
temp = res
else:
temp = pd.concat([temp, res], axis=1)
if i > 4 or j == len(unique_vals) - 1:
display(temp)
temp = None
i = 0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import empyrical as ep
import alphalens as al
import pyfolio as pf
from quantopian.research.experimental import get_factor_returns, get_factor_loadings
# Load risk factor loadings and returns
factor_loadings = get_factor_loadings(assets, start_date, new_end_date)
factor_returns = get_factor_returns(start_date, new_end_date)
# Fix a bug in the risk returns
factor_returns.loc[factor_returns.value.idxmax(), 'value'] = 0
def calc_perf_attrib(portfolio_returns, portfolio_pos, factor_returns, factor_loadings):
start = portfolio_returns.index[0]
end = portfolio_returns.index[-1]
factor_loadings.index = factor_loadings.index.set_names(['dt', 'ticker'])
portfolio_pos.index = portfolio_pos.index.set_names(['dt'])
portfolio_pos = portfolio_pos.drop('cash', axis=1)
portfolio_pos.columns.name = 'ticker'
portfolio_pos.columns = portfolio_pos.columns.astype('int')
return ep.perf_attrib(
portfolio_returns,
portfolio_pos.stack().dropna(),
factor_returns.loc[start:end],
factor_loadings.loc[start:end])
def plot_exposures(risk_exposures, ax=None):
rep = risk_exposures.stack().reset_index()
rep.columns = ['dt', 'factor', 'exposure']
sns.boxplot(x='exposure', y='factor', data=rep, orient='h', ax=ax, order=risk_exposures.columns[::-1])
def compute_turnover(df):
return df.dropna().unstack().dropna(how='all').fillna(0).diff().abs().sum(1)
def get_max_median_position_concentration(expos):
longs = expos.loc[expos > 0]
shorts = expos.loc[expos < 0]
return expos.groupby(level=0).quantile([.05, .25, .5, .75, .95]).unstack()
def compute_factor_stats(factor, pricing, factor_returns, factor_loadings, periods=range(1, 15), view=None):
factor_data_total = al.utils.get_clean_factor_and_forward_returns(
factor,
pricing,
quantiles=None,
bins=(-np.inf, 0, np.inf),
periods=periods,
cumulative_returns=False,
)
portfolio_returns_total = al.performance.factor_returns(factor_data_total)
portfolio_returns_total.columns = portfolio_returns_total.columns.map(lambda x: int(x[:-1]))
for i in portfolio_returns_total.columns:
portfolio_returns_total[i] = portfolio_returns_total[i].shift(i)
portfolio_returns_specific = pd.DataFrame(columns=portfolio_returns_total.columns, index=portfolio_returns_total.index)
# closure
def calc_perf_attrib_c(i, portfolio_returns_total=portfolio_returns_total,
factor_data_total=factor_data_total, factor_returns=factor_returns,
factor_loadings=factor_loadings):
return calc_perf_attrib(portfolio_returns_total[i],
factor_data_total['factor'].unstack().assign(cash=0).shift(i),
factor_returns, factor_loadings)
if view is None:
perf_attrib = map(calc_perf_attrib_c, portfolio_returns_total.columns)
else:
perf_attrib = view.map_sync(calc_perf_attrib_c, portfolio_returns_total.columns)
for i, pa in enumerate(perf_attrib):
if i == 0:
risk_exposures_portfolio = pa[0]
perf_attribution = pa[1]
portfolio_returns_specific[i + 1] = pa[1]['specific_returns']
delay_sharpes_total = portfolio_returns_total.apply(ep.sharpe_ratio)
delay_sharpes_specific = portfolio_returns_specific.apply(ep.sharpe_ratio)
turnover = compute_turnover(factor)
n_holdings = factor.groupby(level=0).count()
perc_holdings = get_max_median_position_concentration(factor)
return {'factor_data_total': factor_data_total,
'portfolio_returns_total': portfolio_returns_total,
'portfolio_returns_specific': portfolio_returns_specific,
'risk_exposures_portfolio': risk_exposures_portfolio,
'perf_attribution': perf_attribution,
'delay_sharpes_total': delay_sharpes_total,
'delay_sharpes_specific': delay_sharpes_specific,
'turnover': turnover,
'n_holdings': n_holdings,
'perc_holdings': perc_holdings,
}
def plot_overview_tear_sheet(factor, pricing, factor_returns, factor_loadings, periods=range(1, 15), view=None):
fig = plt.figure(figsize=(16, 16))
gs = plt.GridSpec(5, 4)
ax1 = plt.subplot(gs[0:2, 0:2])
factor_stats = compute_factor_stats(factor, pricing, factor_returns, factor_loadings, periods=periods, view=view)
sharpes = pd.DataFrame({'specific': factor_stats['delay_sharpes_specific'],
'total': factor_stats['delay_sharpes_total']})
# display(sharpes)
sharpes.plot.bar(ax=ax1)
ax1.set(xlabel='delay', ylabel='IR')
ax2a = plt.subplot(gs[0, 2:4])
delay_cum_rets_total = factor_stats['portfolio_returns_total'][list(range(1, 5))].apply(ep.cum_returns)
delay_cum_rets_total.plot(ax=ax2a)
ax2a.set(title='Total returns', ylabel='Cumulative returns')
ax2b = plt.subplot(gs[1, 2:4])
delay_cum_rets_specific = factor_stats['portfolio_returns_specific'][list(range(1, 5))].apply(ep.cum_returns)
delay_cum_rets_specific.plot(ax=ax2b)
ax2b.set(title='Specific returns', ylabel='Cumulative returns')
ax3 = plt.subplot(gs[2:4, 0:2])
plot_exposures(factor_stats['risk_exposures_portfolio'].reindex(columns=factor_stats['perf_attribution'].columns),
ax=ax3)
ax4 = plt.subplot(gs[2:4, 2])
ep.cum_returns_final(factor_stats['perf_attribution']).plot.barh(ax=ax4)
ax4.set(xlabel='Cumulative returns')
ax5 = plt.subplot(gs[2:4, 3], sharey=ax4)
factor_stats['perf_attribution'].apply(ep.annual_volatility).plot.barh(ax=ax5)
ax5.set(xlabel='Ann. volatility')
ax6 = plt.subplot(gs[-1, 0:2])
factor_stats['n_holdings'].plot(color='b', ax=ax6)
ax6.set_ylabel('# holdings', color='b')
ax6.tick_params(axis='y', labelcolor='b')
ax62 = ax6.twinx()
factor_stats['turnover'].plot(color='r', ax=ax62)
ax62.set_ylabel('turnover', color='r')
ax62.tick_params(axis='y', labelcolor='r')
ax7 = plt.subplot(gs[-1, 2:4])
factor_stats['perc_holdings'].plot(ax=ax7)
ax7.set(ylabel='Long/short perc holdings')
gs.tight_layout(fig)
return fig, factor_stats, sharpes
# Loop through all columns
results = None
for i, col in enumerate(sorted(alphas.columns)):
if i > 0:
print('')
print(col)
# Get the factor data
try:
data = alphas[col]
data = data[data != 0].dropna()
fig, factor_stats, sharpes = plot_overview_tear_sheet(data,
prices,
factor_returns,
factor_loadings);
plt.show()
sharpes.columns = pd.MultiIndex.from_product([[col], sharpes.columns])
if results is None:
results = sharpes
else:
results = pd.concat([results, sharpes], axis=1)
except Exception as e:
print('Error: {}'.format(e))
continue
# results
temp = None
i = 0
unique_vals = results.columns.get_level_values(0).unique()
for j, factor in enumerate(unique_vals):
i += 1
res = results.xs(factor, axis=1, level=0, drop_level=False)
if temp is None:
temp = res
else:
temp = pd.concat([temp, res], axis=1)
if i > 4 or j == len(unique_vals) - 1:
display(temp)
temp = None
i = 0