Notebook
In [1]:
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.factors import SimpleMovingAverage,Latest,RSI, MACDSignal, SimpleBeta
from quantopian.pipeline.filters import Q1500US, QTradableStocksUS, Q500US, Q3000US
from quantopian.research import get_pricing
from quantopian.pipeline.data import USEquityPricing
from quantopian.pipeline.factors import CustomFactor
from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet
from quantopian.pipeline import CustomFilter
import pandas as pd
import numpy as np

import talib
In [2]:
#This class is to create the price action pattern of engulging, 
class engulf(CustomFilter):
    
    inputs = [USEquityPricing.close, USEquityPricing.low, USEquityPricing.open, USEquityPricing.high]  

    window_length = 2
    
    def compute(self, today, assets, out, close, low, open1, high): 
        
        condition_1 = (high[-1] > high[-2]) & (low[-1]<low[-2])
        
        condition_2 = (close[-1] > close[-2]) & (open1[-1]<open1[-2])
        
        condition_3 = close[-1] > open1[-1]
        
        condition_4 = (close[-1] > open1[-2]) & (open1[-1]<close[-2])
        
        
        result = condition_1 & condition_2 & condition_3 & condition_4 
        
        out[:] = result   
        
        
class volume(CustomFilter):
    
    inputs = [USEquityPricing.volume]
    window_length = 6
    def compute(self, today, assets, out, vol):     
                
        low_condition_1 = vol[-1] / np.mean(vol, axis = 0)>0.29
        
        low_condition_2 = (vol[-1] / np.mean(vol, axis = 0))<1.54
                                
        out[:] =  low_condition_1 

        
        


def filtro():
    sector = Fundamentals.morningstar_sector_code.latest
    industry = Fundamentals.morningstar_industry_code.latest
    #universe= Q3000US() & sector.element_of([206]) 
    universe = Q3000US() & QTradableStocksUS()   
    sma20 = SimpleMovingAverage(inputs = [USEquityPricing.close], window_length=20 )
    sma50 = SimpleMovingAverage(inputs = [USEquityPricing.close], window_length=50, mask = universe )
    sma200 = SimpleMovingAverage(inputs = [USEquityPricing.close], window_length=200 )
    price = SimpleMovingAverage(inputs = [USEquityPricing.close], window_length=1, mask = universe )
    high = SimpleMovingAverage(inputs = [USEquityPricing.high], window_length=1, mask = universe )
    low = SimpleMovingAverage(inputs = [USEquityPricing.low], window_length=1, mask = universe )
    close = SimpleMovingAverage(inputs = [USEquityPricing.close], window_length=1, mask = universe )
    open1 = SimpleMovingAverage(inputs = [USEquityPricing.open], window_length=1, mask = universe )
    betaytd = SimpleBeta(target=symbols(8554)  , regression_length=100)
    betayty = SimpleBeta(target=symbols(8554),regression_length = 253) 
    rsi = RSI()
    macd = MACDSignal()
    
    
    condition_1 = rsi < 40
    condition_2 = macd < -1.5
    
    technical_condition =  condition_2 & condition_1
    
       
    condition_1 = price > sma20
    condition_2 = price < sma50
    condition_3 = price < sma200 
    condition_4 = 5 < price <120
    sma_condition = condition_1 & condition_4 & condition_3 & condition_2 

    mid_price = (high+low)/2
    box = (close-open1).abs()
    limit_box  = (high-low)*0.25
    condition_5 = close > mid_price
    condition_6 = open1 > mid_price
    condition_7 = box <= limit_box
    
    hammer = condition_5 & condition_6 & condition_7 
        
    price_action =    (hammer & volume()) | engulf()
                  
    filter1= technical_condition | sma_condition
                                
    return Pipeline(columns={"filter":filter1} ,screen = universe & price_action )
 
factor = run_pipeline(filtro(), start_date="2019-05-5", end_date="2020-06-11")

Pipeline Execution Time: 23.03 Seconds
In [3]:
asset_list = factor.index.levels[1].unique()
pricing = get_pricing(asset_list,start_date="2019-4-5", end_date="2020-6-10", fields= "close_price" )
merged_data = get_clean_factor_and_forward_returns(factor, pricing, periods=[1,2,3,4,5], quantiles=None, bins=[-1, 0.5, 2])
Dropped 1.8% entries from factor data: 1.8% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!

Create a full tear sheet to give us some statistics about this factor. Lets specifically look at the Mean Period Wise Return Top Quantile (bps).

In [4]:
create_full_tear_sheet(merged_data)
Quantiles Statistics
min max mean std count count %
factor_quantile
1 False False False 0.0 58471 84.600804
2 True True True 0.0 10643 15.399196
Returns Analysis
1D 2D 3D 4D 5D
Ann. alpha 0.071 0.074 0.065 0.065 0.066
beta 0.088 0.116 0.084 0.079 0.068
Mean Period Wise Return Top Quantile (bps) 3.485 3.549 2.894 3.386 3.685
Mean Period Wise Return Bottom Quantile (bps) -1.719 -1.677 -2.132 -1.853 -1.657
Mean Period Wise Spread (bps) 5.203 5.061 4.874 5.075 5.150
<matplotlib.figure.Figure at 0x7f98a922cd30>
Information Analysis
1D 2D 3D 4D 5D
IC Mean 0.001 0.001 0.002 0.006 0.007
IC Std. 0.116 0.123 0.121 0.118 0.118
Risk-Adjusted IC 0.011 0.007 0.019 0.048 0.055
t-stat(IC) 0.178 0.113 0.310 0.795 0.916
p-value(IC) 0.859 0.910 0.757 0.428 0.361
IC Skew 0.377 -0.057 0.332 0.239 0.167
IC Kurtosis 1.784 2.165 1.745 1.739 1.607
/venvs/py35/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
/venvs/py35/lib/python3.5/site-packages/alphalens/utils.py:912: UserWarning: Skipping return periods that aren't exact multiples of days.
  + " of days."
Turnover Analysis
1D 2D 3D 4D 5D
Quantile 1 Mean Turnover 0.870 0.876 0.876 0.878 0.881
Quantile 2 Mean Turnover 0.887 0.910 0.922 0.927 0.940
1D 2D 3D 4D 5D
Mean Factor Rank Autocorrelation 0.844 0.683 0.556 0.489 0.429

Let's see if we can get the same numbers for Mean Period Wise Return Top Quantile (bps) is we calculate it manually. This is calculated as the mean of daily means. So, we need to do two mean calculations. First group by day to get the mean of each day. Second, group by factor quantile to get the mean of those means for each quantile.

In [5]:
# First create a 'grouper' to group by date
day_grouper = ['factor_quantile', merged_data.index.get_level_values('date')]

# Now use that grouper and get the mean of each returns column
daily_means = merged_data.groupby(day_grouper)['1D','2D','3D','4D','5D'].mean()

# Print to see what we have
daily_means
Out[5]:
1D 2D 3D 4D 5D
factor_quantile date
1 2019-05-06 00:00:00+00:00 -0.021295 -0.022079 -0.021987 -0.016898 -0.045441
2019-05-07 00:00:00+00:00 -0.003800 -0.007184 -0.002781 -0.034016 -0.022369
2019-05-08 00:00:00+00:00 -0.002121 -0.000027 -0.031230 -0.018295 -0.015283
2019-05-09 00:00:00+00:00 0.000523 -0.031925 -0.020343 -0.013535 -0.005556
2019-05-10 00:00:00+00:00 -0.032031 -0.018185 -0.013347 -0.005465 -0.018368
2019-05-13 00:00:00+00:00 0.013511 0.017503 0.024396 0.009394 0.001621
2019-05-14 00:00:00+00:00 0.004980 0.010391 0.000078 -0.006467 0.006496
2019-05-15 00:00:00+00:00 0.002740 -0.007400 -0.014918 0.001151 -0.012285
2019-05-16 00:00:00+00:00 -0.012466 -0.019249 -0.006135 -0.016173 -0.034058
2019-05-17 00:00:00+00:00 -0.008318 -0.000298 -0.002821 -0.011289 -0.008498
2019-05-20 00:00:00+00:00 0.014386 0.007852 -0.005001 -0.001604 -0.008068
2019-05-21 00:00:00+00:00 -0.007240 -0.026609 -0.021943 -0.027367 -0.037586
2019-05-22 00:00:00+00:00 -0.015883 -0.011464 -0.021326 -0.030385 -0.031001
2019-05-23 00:00:00+00:00 0.005855 -0.001033 -0.008431 -0.010720 -0.019129
2019-05-24 00:00:00+00:00 -0.007253 -0.015559 -0.016224 -0.026773 -0.022563
2019-05-28 00:00:00+00:00 -0.004991 -0.010169 -0.025891 -0.016475 0.008245
2019-05-29 00:00:00+00:00 -0.003454 -0.018031 -0.009941 0.015020 0.012857
2019-05-30 00:00:00+00:00 -0.013632 -0.009111 0.019528 0.016498 0.014629
2019-05-31 00:00:00+00:00 -0.006472 0.017397 0.019794 0.019758 0.029783
2019-06-03 00:00:00+00:00 0.020706 0.024555 0.024213 0.030951 0.035334
2019-06-04 00:00:00+00:00 0.001196 -0.001658 0.001252 0.001717 0.001609
2019-06-05 00:00:00+00:00 0.001269 0.010114 0.010641 0.006935 0.010599
2019-06-06 00:00:00+00:00 0.006954 0.013353 0.009783 0.009527 0.018692
2019-06-07 00:00:00+00:00 0.006003 0.004892 0.002929 0.012830 0.006495
2019-06-10 00:00:00+00:00 -0.000456 -0.010738 0.004324 -0.008862 0.001598
2019-06-11 00:00:00+00:00 0.002346 0.008850 0.009609 0.012699 0.017112
2019-06-12 00:00:00+00:00 0.007384 0.001027 0.005565 0.012643 0.018462
2019-06-13 00:00:00+00:00 -0.006644 -0.000990 0.011169 0.017089 0.021161
2019-06-14 00:00:00+00:00 0.007881 0.018396 0.022396 0.032699 0.025742
2019-06-17 00:00:00+00:00 0.012850 0.015481 0.022152 0.017586 0.010016
... ... ... ... ... ... ...
2 2020-04-22 00:00:00+00:00 0.011236 0.021183 0.059502 0.077632 0.129062
2020-04-23 00:00:00+00:00 0.017434 0.058133 0.074573 0.124010 0.085249
2020-04-24 00:00:00+00:00 0.039833 0.056732 0.110532 0.081770 0.033207
2020-04-27 00:00:00+00:00 0.020092 0.074241 0.041339 -0.007173 -0.008708
2020-04-28 00:00:00+00:00 0.076536 0.054801 -0.005084 0.004472 0.012406
2020-04-29 00:00:00+00:00 -0.032112 -0.088987 -0.087577 -0.087481 -0.109010
2020-04-30 00:00:00+00:00 -0.049528 -0.046174 -0.000668 -0.028462 -0.002616
2020-05-01 00:00:00+00:00 -0.002106 -0.005101 -0.036307 -0.008530 0.032615
2020-05-04 00:00:00+00:00 0.024944 0.008608 0.036244 0.077785 0.054335
2020-05-05 00:00:00+00:00 -0.025976 0.000162 0.053308 0.020569 -0.019806
2020-05-06 00:00:00+00:00 0.011120 0.077814 0.072187 0.032391 -0.027712
2020-05-07 00:00:00+00:00 0.043539 0.032026 -0.017626 -0.061449 -0.061577
2020-05-08 00:00:00+00:00 -0.013701 -0.038024 -0.069678 -0.064006 -0.045586
2020-05-11 00:00:00+00:00 -0.044495 -0.105026 -0.089371 -0.092022 0.015972
2020-05-12 00:00:00+00:00 -0.049298 -0.034594 -0.032144 0.053545 0.044705
2020-05-13 00:00:00+00:00 0.007701 -0.017027 0.016728 0.002348 0.008936
2020-05-14 00:00:00+00:00 0.003795 0.064744 0.041323 0.055612 0.059051
2020-05-15 00:00:00+00:00 0.062108 0.042244 0.075337 0.072674 0.072986
2020-05-18 00:00:00+00:00 -0.023882 -0.000496 -0.002092 0.003723 0.038818
2020-05-19 00:00:00+00:00 0.039308 0.042857 0.047997 0.091562 0.130879
2020-05-20 00:00:00+00:00 0.022233 0.019742 0.074652 0.102493 0.079815
2020-05-21 00:00:00+00:00 0.001684 0.042133 0.068232 0.071683 0.054872
2020-05-22 00:00:00+00:00 0.077327 0.159051 0.110215 0.073566 0.097959
2020-05-26 00:00:00+00:00 0.039302 0.017973 0.008710 0.022740 0.027735
2020-05-27 00:00:00+00:00 -0.000806 0.004948 0.009461 0.001474 0.002423
2020-05-28 00:00:00+00:00 -0.025818 0.005538 0.004398 0.037807 0.036591
2020-05-29 00:00:00+00:00 0.008092 -0.001486 0.017836 0.018497 0.097110
2020-06-01 00:00:00+00:00 0.004007 0.039068 0.035656 0.084415 0.110748
2020-06-02 00:00:00+00:00 0.023132 0.021977 0.044960 0.097473 0.071345
2020-06-03 00:00:00+00:00 -0.009855 0.018535 0.043487 0.020507 -0.001296

546 rows × 5 columns

In [6]:
# Next, create a 'grouper' to group by our factor quantiles
quantile_grouper = [daily_means.index.get_level_values('factor_quantile')]

# Now use that grouper and get the mean of the daily means by factor quantile
quantile_means = daily_means.groupby(quantile_grouper).mean()

# Print to see what we have
quantile_means
Out[6]:
1D 2D 3D 4D 5D
factor_quantile
1 -0.000321 -0.000704 -0.000634 -0.000171 0.000279
2 0.000199 0.000341 0.000874 0.001925 0.002951

Hmm, these don't match the returns in the tear sheet output. Why is that? The create_full_tear_sheet method, by default, assumes a long short portfolio. It assumes one will short stocks in the lower half of the quantiles and long the stocks in the upper half. The mean returns we calculated above just took the returns and didn't account for long and short. The short returns would need to be reversed.

We won't do that here. Rather, one can use the long_short parameter to force create_full_tear_sheet to assume an all long portfolio. The returns should then be the returns we calculated. Let's check that.

In [7]:
display(quantile_means)
create_full_tear_sheet(merged_data, long_short=False)
1D 2D 3D 4D 5D
factor_quantile
1 -0.000321 -0.000704 -0.000634 -0.000171 0.000279
2 0.000199 0.000341 0.000874 0.001925 0.002951
Quantiles Statistics
min max mean std count count %
factor_quantile
1 False False False 0.0 58471 84.600804
2 True True True 0.0 10643 15.399196
Returns Analysis
1D 2D 3D 4D 5D
Ann. alpha 0.096 0.101 0.076 0.086 0.093
beta 1.101 1.141 1.089 1.081 1.069
Mean Period Wise Return Top Quantile (bps) 1.991 1.706 2.913 4.809 5.895
Mean Period Wise Return Bottom Quantile (bps) -3.212 -3.520 -2.113 -0.427 0.558
Mean Period Wise Spread (bps) 5.203 4.597 4.387 4.501 4.637
<matplotlib.figure.Figure at 0x7f98b40d3748>
Information Analysis
1D 2D 3D 4D 5D
IC Mean 0.001 0.001 0.002 0.006 0.007
IC Std. 0.116 0.123 0.121 0.118 0.118
Risk-Adjusted IC 0.011 0.007 0.019 0.048 0.055
t-stat(IC) 0.178 0.113 0.310 0.795 0.916
p-value(IC) 0.859 0.910 0.757 0.428 0.361
IC Skew 0.377 -0.057 0.332 0.239 0.167
IC Kurtosis 1.784 2.165 1.745 1.739 1.607
Turnover Analysis
1D 2D 3D 4D 5D
Quantile 1 Mean Turnover 0.870 0.876 0.876 0.878 0.881
Quantile 2 Mean Turnover 0.887 0.910 0.922 0.927 0.940
1D 2D 3D 4D 5D
Mean Factor Rank Autocorrelation 0.844 0.683 0.556 0.489 0.429

First it seems the numbers are off by a factor of 1000. This makes sense since create_full_tear_sheet displays results in basis points (bps). However, the 1D values are correct but the others seem to be off.

The create_full_tear_sheet method displays all the returns normalized to the 1 day compounded rate. In other words, the returns shown in create_full_tear_sheet, if compounded, will equal the total returns we calculated. Another way to look at it is the nth root of the day n returns will equal the create_full_tear_sheet returns.

Lets chek that out.

In [8]:
# Let's just look at quantile 2
quantile_2_total_returns = quantile_means.query('factor_quantile==2')
quantile_2_total_returns
# Let's also drop the multi-index on the columns (to make indexing easier)
#quantile_2_total_returns.columns = quantile_2_total_returns.columns.droplevel(level=1)
#quantile_2_total_returns
Out[8]:
1D 2D 3D 4D 5D
factor_quantile
2 0.000199 0.000341 0.000874 0.001925 0.002951
In [9]:
# Make a new dataframe for our daily compounded returns
quantile_2_daily_returns = pd.DataFrame()
quantile_2_daily_returns['1D'] = quantile_2_total_returns['1D'].mul(10000).round(3)
quantile_2_daily_returns['2D'] = quantile_2_total_returns['2D'].add(1).pow(1/2).sub(1).mul(10000).round(3)
quantile_2_daily_returns['3D'] = quantile_2_total_returns['3D'].add(1).pow(1/3).sub(1).mul(10000).round(3)
quantile_2_daily_returns['4D'] = quantile_2_total_returns['4D'].add(1).pow(1/4).sub(1).mul(10000).round(3)
quantile_2_daily_returns['5D'] = quantile_2_total_returns['5D'].add(1).pow(1/5).sub(1).mul(10000).round(3)

quantile_2_daily_returns
Out[9]:
1D 2D 3D 4D 5D
factor_quantile
2 1.991 1.706 2.913 4.809 5.895

Finally, let's compare this calculated value with those returned by create_full_tear_sheet.

In [10]:
display(quantile_2_daily_returns)
create_full_tear_sheet(merged_data, long_short=False)
1D 2D 3D 4D 5D
factor_quantile
2 1.991 1.706 2.913 4.809 5.895
Quantiles Statistics
min max mean std count count %
factor_quantile
1 False False False 0.0 58471 84.600804
2 True True True 0.0 10643 15.399196
Returns Analysis
1D 2D 3D 4D 5D
Ann. alpha 0.096 0.101 0.076 0.086 0.093
beta 1.101 1.141 1.089 1.081 1.069
Mean Period Wise Return Top Quantile (bps) 1.991 1.706 2.913 4.809 5.895
Mean Period Wise Return Bottom Quantile (bps) -3.212 -3.520 -2.113 -0.427 0.558
Mean Period Wise Spread (bps) 5.203 4.597 4.387 4.501 4.637
<matplotlib.figure.Figure at 0x7f98a825e780>
Information Analysis
1D 2D 3D 4D 5D
IC Mean 0.001 0.001 0.002 0.006 0.007
IC Std. 0.116 0.123 0.121 0.118 0.118
Risk-Adjusted IC 0.011 0.007 0.019 0.048 0.055
t-stat(IC) 0.178 0.113 0.310 0.795 0.916
p-value(IC) 0.859 0.910 0.757 0.428 0.361
IC Skew 0.377 -0.057 0.332 0.239 0.167
IC Kurtosis 1.784 2.165 1.745 1.739 1.607
Turnover Analysis
1D 2D 3D 4D 5D
Quantile 1 Mean Turnover 0.870 0.876 0.876 0.878 0.881
Quantile 2 Mean Turnover 0.887 0.910 0.922 0.927 0.940
1D 2D 3D 4D 5D
Mean Factor Rank Autocorrelation 0.844 0.683 0.556 0.489 0.429

That's it. The tear sheet's mean period wise returns for the top quantile match our calculated returns.

In [ ]: