Notebook
In [24]:
from quantopian.pipeline import CustomFactor
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import AverageDollarVolume, SimpleBeta, MarketCap
from quantopian.pipeline.data import morningstar
from quantopian.pipeline.data import Fundamentals
from sklearn import preprocessing
from quantopian.pipeline.classifiers.morningstar import Sector
from scipy import stats
import numpy as np
import pandas as pd
In [25]:
def preprocess(a):
    
    a = np.nan_to_num(a - np.nanmean(a))
    return preprocessing.scale(a)
In [26]:
def make_factors():
        
    class CGO(CustomFactor):    
        inputs = [USEquityPricing.close, USEquityPricing.volume, morningstar.valuation.shares_outstanding]
        window_length=1260
        
        def compute(self, today, assets, out, close, volume, share):
            mon = 60
            unit = 21
            df_price = close   
            df_hsl = volume/share 
            copy = 1 - df_hsl
            copy[-1,:] = 1.0
            copy = copy.prod(axis=0)/copy.cumprod(axis=0)*copy
            copy = copy * df_hsl
            k = copy.sum(axis=0)
            ref_pr = ((df_price*(copy)).sum(axis=0)/k)
            if unit > 1:
                df_price = df_price[::unit,]
            CGO = ((df_price[-1,:] - ref_pr)/df_price[-1,:])
            
            out[:] = preprocess(CGO)

            
    return {
            'CGO': CGO,
            }    
In [27]:
start = pd.Timestamp("2014-01-05")
end = pd.Timestamp("2019-03-30")
from quantopian.pipeline.filters import StaticAssets
        
def make_pipeline():
    pipe = Pipeline() 
    my_etfs = StaticAssets(symbols(['SPY']))
    beta = SimpleBeta(target=symbols(8554),regression_length=120,
                      allowed_missing_percentage=1.0
                     )
    
    universe = QTradableStocksUS() & Sector().notnull() & beta.notnull()#& ~StaticAssets([sid(8340), sid(34648), sid(32430)])
    
    dollar_volume = AverageDollarVolume(window_length=5, mask = universe)
    high_dollar_volume = dollar_volume.top(500)
    universe &= high_dollar_volume 
    mktcap = MarketCap(mask =universe)
    universe &= mktcap.bottom(200)
            
    factors = make_factors()    
    combined_alpha = None
    for name, f in factors.iteritems():        
        fac = f(mask=universe)
        if combined_alpha == None:
            combined_alpha = fac
        else:
            combined_alpha += fac

    pipe.add(combined_alpha, 'combined_alpha')        
    pipe.set_screen(universe & combined_alpha.notnull())
    return pipe

results = run_pipeline(pipeline=make_pipeline(), start_date=start, end_date=end)

# Show the first 5 rows of factor data
results.head(5)

Pipeline Execution Time: 2 Minutes, 5.80 Seconds
Out[27]:
combined_alpha
2014-01-06 00:00:00+00:00 Equity(2 [ARNC]) -0.144378
Equity(67 [ADSK]) 0.826941
Equity(154 [AEM]) -1.789000
Equity(197 [AGCO]) 0.140815
Equity(328 [ALTR]) -0.977182
In [28]:
assets = results.index.levels[1]
pricing = get_pricing(assets, start, end, fields='open_price' # Generally, you should use open pricing. Explained more in lesson 4
)

# Show the first 5 rows of pricing_data
pricing.head(5)
Out[28]:
Equity(2 [ARNC]) Equity(39 [DDC]) Equity(53 [ABMD]) Equity(64 [GOLD]) Equity(67 [ADSK]) Equity(76 [TAP]) Equity(110 [RAMP]) Equity(122 [ADI]) Equity(154 [AEM]) Equity(166 [AES]) ... Equity(51917 [ZUO]) Equity(51937 [DOCU]) Equity(51942 [PVTL]) Equity(51955 [CDAY]) Equity(51961 [SMAR]) Equity(51992 [PS]) Equity(51994 [EQH]) Equity(52038 [CGC]) Equity(52064 [EVRG]) Equity(52165 [DOMO])
2014-01-06 00:00:00+00:00 30.085 12.960 27.23 17.488 48.98 49.421 36.53 43.323 25.972 11.993 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2014-01-07 00:00:00+00:00 29.858 12.750 27.36 17.241 48.89 49.286 36.08 43.446 25.697 12.010 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2014-01-08 00:00:00+00:00 29.717 12.678 27.54 17.166 49.50 49.734 36.25 43.639 25.574 12.160 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2014-01-09 00:00:00+00:00 30.651 12.596 28.22 16.976 50.82 49.645 36.47 43.411 25.024 12.010 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2014-01-10 00:00:00+00:00 28.245 12.523 29.67 17.080 51.14 49.672 36.31 43.393 25.062 12.127 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 1486 columns

In [29]:
from alphalens.utils import get_clean_factor_and_forward_returns

merged_data = get_clean_factor_and_forward_returns(
  factor=results, 
  prices=pricing,  
)


# Show the first 5 rows of merged_data
merged_data.head(5)
Dropped 3.5% entries from factor data: 0.9% in forward returns computation and 2.6% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
Out[29]:
1D 5D 10D factor factor_quantile
date asset
2014-01-06 00:00:00+00:00 Equity(2 [ARNC]) -0.007545 -0.041416 0.108160 -0.144378 2.0
Equity(67 [ADSK]) -0.001837 0.061658 0.090649 0.826941 4.0
Equity(154 [AEM]) -0.010588 0.001810 0.062798 -1.789000 1.0
Equity(197 [AGCO]) -0.018826 -0.022764 -0.046714 0.140815 4.0
Equity(328 [ALTR]) -0.003115 -0.028807 0.036226 -0.977182 1.0
In [30]:
from alphalens.tears import create_full_tear_sheet

#merged_data.sort_values(by=['factor'], ascending=False, inplace=True)
create_full_tear_sheet(merged_data)
Quantiles Statistics
min max mean std count count %
factor_quantile
1.0 -11.861954 -2.371435e-01 -1.424878 0.816081 50920 20.014150
2.0 -0.892109 1.801319e-16 -0.208021 0.223743 73697 28.966669
3.0 -0.208714 3.548901e-01 0.025576 0.055837 28231 11.096219
4.0 0.000003 9.990097e-01 0.417997 0.193283 50652 19.908812
5.0 0.357350 8.181045e+00 1.293705 0.518549 50920 20.014150
Returns Analysis
1D 5D 10D
Ann. alpha 0.076 0.065 0.054
beta -0.087 -0.098 -0.119
Mean Period Wise Return Top Quantile (bps) 2.193 1.897 1.273
Mean Period Wise Return Bottom Quantile (bps) -3.926 -2.816 -2.362
Mean Period Wise Spread (bps) 6.120 4.783 3.709
<matplotlib.figure.Figure at 0x7efbb48f2450>
Information Analysis
1D 5D 10D
IC Mean 0.011 0.016 0.016
IC Std. 0.157 0.157 0.153
Risk-Adjusted IC 0.070 0.101 0.104
t-stat(IC) 2.484 3.608 3.705
p-value(IC) 0.013 0.000 0.000
IC Skew 0.046 -0.052 -0.118
IC Kurtosis 0.067 -0.127 -0.301
Turnover Analysis
10D 1D 5D
Quantile 1 Mean Turnover 0.504 0.165 0.404
Quantile 2 Mean Turnover 0.661 0.275 0.568
Quantile 3 Mean Turnover 0.847 0.560 0.789
Quantile 4 Mean Turnover 0.672 0.270 0.573
Quantile 5 Mean Turnover 0.578 0.186 0.472
1D 5D 10D
Mean Factor Rank Autocorrelation 0.966 0.859 0.748
In [ ]: