Notebook

$Alpha_{5}$ : $(rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))$

$Alpha_{8}$ : $(-1 rank(((sum(open, 5) sum(returns, 5)) - delay((sum(open, 5) * sum(returns, 5)),

10))))$

$Alpha_{9}$ : $((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ?

delta(close, 1) : (-1 * delta(close, 1))))$

Imports

In [1]:
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.factors import CustomFactor, SimpleMovingAverage, AverageDollarVolume, Returns, RSI
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters import Q500US, Q1500US
from quantopian.pipeline.data.quandl import fred_usdontd156n as libor
from quantopian.pipeline.data.zacks import EarningsSurprises
from quantopian.pipeline import CustomFilter
from quantopian.pipeline.factors import VWAP

import statsmodels.api as sm

import talib
import pandas as pd
import numpy as np
from time import time

import alphalens as al
import pyfolio as pf
from scipy import stats
import matplotlib.pyplot as plt
from sklearn import linear_model, decomposition, ensemble, preprocessing, isotonic, metrics

Run pipeline in chucks

In [2]:
def run_pipeline_chunks(pipe, start_date, end_date, chunks_len = None):
    """
    Drop-in replacement for run_pipeline.
    run_pipeline fails over a very long period of time (memery usage),
    so we need to split in chunks the pipeline and concatenate the results
    """
    chunks  = []
    current = pd.Timestamp(start_date)
    end     = pd.Timestamp(end_date)
    step    = pd.Timedelta(weeks=26) if chunks_len is None else chunks_len
    
    start_pipeline_timer = time()
    
    while current <= end:
        
        current_end = current + step
        if current_end > end:
            current_end = end
        
        start_timer = time()
        print 'Running pipeline:', current, ' - ', current_end
        results = run_pipeline(pipe, current.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
        chunks.append(results)
        
        # pipeline returns more days than requested (if no trading day), so get last date from the results
        current_end = results.index.get_level_values(0)[-1].tz_localize(None)
        current = current_end + pd.Timedelta(days=1)
        
        end_timer = time()
        print "Time to run this chunk of the pipeline %.2f secs" % (end_timer - start_timer)
        
    end_pipeline_timer = time()
    print "Time to run the entire pipeline %.2f secs" % (end_pipeline_timer - start_pipeline_timer)
    return pd.concat(chunks)

Sector Codes

In [3]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}

Make factors will create the Alphas

In [4]:
def make_factors():

    class Alpha5(CustomFactor):
        vwap_in = VWAP(window_length=2)
        vwap_in.window_safe = True
        inputs = [USEquityPricing.close, USEquityPricing.open, vwap_in]
        window_length = 10

        def compute(self, today, assets, out, close, open, vwap):
            v000 = open[-1]
            v00100 = np.empty((10, out.shape[0]))
            for i0 in range(1, 11):
                v00100[-i0] = vwap[-i0]
            v0010 = v00100.sum(axis=0)
            v0011 = np.full(out.shape[0], 10.0)
            v001 = v0010 / v0011
            v00 = v000 - v001
            v0 = stats.rankdata(v00)
            v10 = np.full(out.shape[0], -1.0)
            v11000 = close[-1]
            v11001 = vwap[-1]
            v1100 = v11000 - v11001
            v110 = stats.rankdata(v1100)
            v11 = np.abs(v110)
            v1 = v10 * v11
            out[:] = v0 * v1
            
    class Alpha8(CustomFactor):
        inputs = [Returns(window_length=2), USEquityPricing.open]
        window_length = 16

        def compute(self, today, assets, out, returns, open):
            v0 = np.full(out.shape[0], -1.0)
            v10000 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v10000[-i0] = open[-i0]
            v1000 = v10000.sum(axis=0)
            v10010 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v10010[-i0] = returns[-i0]
            v1001 = v10010.sum(axis=0)
            v100 = v1000 * v1001
            v101000 = np.empty((5, out.shape[0]))
            for i0 in range(11, 16):
                v101000[10-i0] = open[-i0]
            v10100 = v101000.sum(axis=0)
            v101010 = np.empty((5, out.shape[0]))
            for i0 in range(11, 16):
                v101010[10-i0] = returns[-i0]
            v10101 = v101010.sum(axis=0)
            v1010 = v10100 * v10101
            v101 = v1010 # delay
            v10 = v100 - v101
            v1 = stats.rankdata(v10)
            out[:] = v0 * v1
            
    class Alpha9(CustomFactor):
        inputs = [USEquityPricing.close]
        window_length = 7

        def compute(self, today, assets, out, close):
            v00 = np.full(out.shape[0], 0.0)
            v010 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v0100 = np.empty((2, out.shape[0]))
                for i1 in range(1, 3):
                    v0100[-i1] = close[-i0-i1]
                v010[-i0] = v0100[-1] - v0100[-2]
            v01 = np.min(v010, axis=0)
            v0 = v00 < v01
            v10 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v10[-i0] = close[-i0]
            v1 = v10[-1] - v10[-2]
            v2000 = np.empty((5, out.shape[0]))
            for i0 in range(1, 6):
                v20000 = np.empty((2, out.shape[0]))
                for i1 in range(1, 3):
                    v20000[-i1] = close[-i0-i1]
                v2000[-i0] = v20000[-1] - v20000[-2]
            v200 = np.max(v2000, axis=0)
            v201 = np.full(out.shape[0], 0.0)
            v20 = v200 < v201
            v210 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v210[-i0] = close[-i0]
            v21 = v210[-1] - v210[-2]
            v220 = np.full(out.shape[0], -1.0)
            v2210 = np.empty((2, out.shape[0]))
            for i0 in range(1, 3):
                v2210[-i0] = close[-i0]
            v221 = v2210[-1] - v2210[-2]
            v22 = v220 * v221
            v2lgcl = np.empty(out.shape[0])
            v2lgcl[v20] = v21[v20]
            v2lgcl[~v20] = v22[~v20]
            v2 = v2lgcl
            vlgcl = np.empty(out.shape[0])
            vlgcl[v0] = v1[v0]
            vlgcl[~v0] = v2[~v0]
            out[:] = vlgcl

               
    all_factors = {
        'Alpha5' : Alpha5,
        'Alpha8' : Alpha8,
        'Alpha9' : Alpha9,
    }     
    return all_factors

Make the Pipeline

In [8]:
def make_pipeline(price_filter = 2000, min_price_filter = 1):
    
    pipe_cols= None
    pipe_cols = {}
    
    # Set price filter
    stock_price_filter = USEquityPricing.close.latest <= price_filter
    stock_price_min = USEquityPricing.close.latest >= min_price_filter
    
    # Before we do any other ranking, we want to throw away the bad assets.
    initial_screen = (stock_price_min & stock_price_filter & Q500US())
    
    ## Test Factor
    factors = make_factors()
    pipe_cols = {name: f(mask=initial_screen) for name, f in factors.iteritems()}
    # create the sum of ranks
    pipe_cols["test_alpha"] = \
    (pipe_cols['Alpha5'].rank(mask=initial_screen) + 
     pipe_cols['Alpha8'].rank(mask=initial_screen) + 
     pipe_cols['Alpha9'].rank(mask=initial_screen))
    
    ## Add Sector data
    pipe_cols['Sector'] = Sector(mask=initial_screen)
    
    ## Return the new pipeline
    return Pipeline(columns=pipe_cols, screen=initial_screen)

Set the Timeframe for this test

In [9]:
start_date='2014-01-01'
end_date='2015-12-01'

Get the Pipeline data

In [10]:
result = run_pipeline_chunks(make_pipeline(), 
                             start_date=start_date, 
                             end_date=end_date)
Running pipeline: 2014-01-01 00:00:00  -  2014-07-02 00:00:00
Time to run this chunk of the pipeline 38.35 secs
Running pipeline: 2014-07-03 00:00:00  -  2015-01-01 00:00:00
Time to run this chunk of the pipeline 37.32 secs
Running pipeline: 2015-01-03 00:00:00  -  2015-07-04 00:00:00
Time to run this chunk of the pipeline 34.56 secs
Running pipeline: 2015-07-07 00:00:00  -  2015-12-01 00:00:00
Time to run this chunk of the pipeline 32.29 secs
Time to run the entire pipeline 142.52 secs
In [12]:
result.head()
Out[12]:
Alpha5 Alpha8 Alpha9 Sector test_alpha
2014-01-02 00:00:00+00:00 Equity(2 [ARNC]) -36504.0 -165.0 -0.10 101 885.0
Equity(24 [AAPL]) -1992.0 -132.0 -6.66 311 851.0
Equity(62 [ABT]) -18190.0 -181.0 0.06 206 1049.0
Equity(67 [ADSK]) -102595.0 -166.0 -0.74 311 553.0
Equity(114 [ADBE]) -66560.0 -16.0 -0.33 311 846.0

Get the dates for the Aplens test that are 1 month in the past and 1 month in the future

In [13]:
start_date_alphalens='2013-12-01'
end_date_alphalens='2016-01-01'
In [14]:
# All assets that were returned in the pipeline result.
assets = result.index.levels[1].unique()

# We need to get a little more pricing data than the length of our factor so we 
# can compare forward returns. We'll tack on another month in this example.
pricing = get_pricing(assets, 
                      start_date=start_date_alphalens, 
                      end_date=end_date_alphalens, 
                      fields='open_price')
In [15]:
import alphalens

alphalens.tears.create_factor_tear_sheet(factor=result['test_alpha'],
                                         prices=pricing,
                                         groupby=result['Sector'],
                                         show_groupby_plots=True,
                                         periods=(1,5,10, 20, 30),
                                         quantiles=3,
                                         groupby_labels=MORNINGSTAR_SECTOR_CODES)
Returns Analysis
1 5 10 20 30
Ann. alpha -0.005 0.001 0.002 0.002 -0.004
beta 0.079 0.092 0.085 0.036 0.085
Mean Period Wise Return Top Quantile (bps) 0.016 0.045 0.279 0.118 -0.036
Mean Period Wise Return Bottom Quantile (bps) 0.108 -0.051 -0.047 -0.081 0.041
Mean Period Wise Spread (bps) -0.081 0.087 0.314 0.193 -0.085
Information Analysis
1 5 10 20 30
IC Mean 0.006 0.003 0.006 0.001 -0.000
IC Std. 0.126 0.122 0.118 0.113 0.117
t-stat(IC) 1.021 0.590 1.185 0.129 -0.083
p-value(IC) 0.308 0.555 0.237 0.898 0.934
IC Skew -0.154 -0.194 -0.282 -0.097 -0.325
IC Kurtosis 0.271 0.498 0.531 0.091 0.178
Ann. IR 0.744 0.431 0.864 0.094 -0.061
Turnover Analysis
1
Quantile 1 Mean Turnover 0.475
Quantile 2 Mean Turnover 0.611
Quantile 3 Mean Turnover 0.467
1
Mean Factor Rank Autocorrelation 0.426
/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_axes.py:2790: MatplotlibDeprecationWarning: Use of None object as fmt keyword argument to suppress plotting of data values is deprecated since 1.4; use the string "none" instead.
  warnings.warn(msg, mplDeprecation, stacklevel=1)
<matplotlib.figure.Figure at 0x7f91e86f13d0>
In [ ]:
 
In [ ]: