Hi all,
I'm trying to input some data into alphalens. Since I can't run the pipeline over a longer time period I call the pipeline every 6 months, after which I concatenate the pipeline outputs to obtain one single dataframe. I then get the prices for the securities from the concatenated dataframe, but when I plug in the the factor and pricing data into the alphalens functions I get an error.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
import numpy as np
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline.filters import Q1500US
class Quality(CustomFactor):
inputs = [morningstar.income_statement.ebit, morningstar.balance_sheet.working_capital, morningstar.balance_sheet.gross_ppe]
window_length = 1
def compute(self, today, assets, out, ebit, wc, ppe):
out[:] = ebit / (wc + ppe)
def make_pipeline():
universe = Q1500US()
pipe = Pipeline()
quality = Quality()
pipe.add(quality, 'quality')
pipe.set_screen(universe)
return pipe
from quantopian.research import run_pipeline
start_dates = pd.date_range(start='2011-01-01', periods=4, freq='2QS')
end_dates = pd.date_range(start='2011-07-01', periods=4, freq='2QS')
pipes = range(0,len(start_dates)-1)
prices = range(0,len(start_dates)-1)
for i in range(0,len(start_dates)-1):
pipe = make_pipeline()
results = run_pipeline(pipe, start_dates[i], end_dates[i])
pipes[i] = results
print("Iteration " + str(i+1) +" of " + str(len(start_dates)-1) + " complete.")
Here I get the error message:
import alphalens
results = pd.concat(pipes)
securities = results['quality'].index.levels[1].unique()
pricing = get_pricing(securities,
start_date='2010-11-01',
end_date='2016-11-01',
frequency='daily',
fields='price')
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(
results['quality'],
pricing,
quantiles=5,
periods=(15,20,30))
alphalens.tears.create_full_tear_sheet(factor_data)
ExceptionTraceback (most recent call last)
<ipython-input-23-6d8e42bfbdcb> in <module>()
12 pricing_test,
13 quantiles=5,
---> 14 periods=(15,20,30))
15 alphalens.tears.create_full_tear_sheet(factor_data)
/usr/local/lib/python2.7/dist-packages/alphalens/utils.pyc in get_clean_factor_and_forward_returns(factor, prices, groupby, by_group, quantiles, bins, periods, filter_zscore, groupby_labels)
301 "the pandas methods tz_localize and tz_convert.")
302
--> 303 merged_data = compute_forward_returns(prices, periods, filter_zscore)
304
305 factor = factor.copy()
/usr/local/lib/python2.7/dist-packages/alphalens/utils.pyc in compute_forward_returns(prices, periods, filter_zscore)
136 delta[mask] = np.nan
137
--> 138 forward_returns[period] = delta.stack()
139
140 forward_returns.index = forward_returns.index.rename(['date', 'asset'])
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in __setitem__(self, key, value)
2355 else:
2356 # set column
-> 2357 self._set_item(key, value)
2358
2359 def _setitem_slice(self, key, value):
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in _set_item(self, key, value)
2421
2422 self._ensure_valid_index(value)
-> 2423 value = self._sanitize_column(key, value)
2424 NDFrame._set_item(self, key, value)
2425
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in _sanitize_column(self, key, value)
2555
2556 if isinstance(value, Series):
-> 2557 value = reindexer(value)
2558
2559 elif isinstance(value, DataFrame):
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in reindexer(value)
2547 # duplicate axis
2548 if not value.index.is_unique:
-> 2549 raise e
2550
2551 # other
Exception: cannot handle a non-unique multi-index!
Does anyone have an idea what I'm doing wrong here? Any help would be appreciated!
Thanks,
Michael