Notebook

Recursive factors - Three ways to do EWMA

Calculate EMWA of price three ways.

  • explicitly calling previous factor
  • pandas EWMA
  • custom factor and a for loop
In [4]:
# First, we need to import the basic pipeline methods
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline

# Also get the built-in filters and/or factors to use
from quantopian.pipeline.filters import QTradableStocksUS, Q500US, StaticAssets

# Finally get any data we want to use
# Not using any so pass
from quantopian.pipeline.data.builtin import USEquityPricing  

# Import numpy and pandas because they rock
import numpy as np
import pandas as pd
In [ ]:
 
In [5]:
# Get previous factor data for our explicit version 
class Factor_N_Days_Ago(CustomFactor):  
    def compute(self, today, assets, out, input_factor):  
        out[:] = input_factor[0]
        
# Custom factor for EWMA using pandas version
class PriceEWMA_Pandas(CustomFactor):
    inputs = [USEquityPricing.close]
    params = {'ewma_span'}
    
    def compute(self, today, assets, out, price, ewma_span):
        ewma = pd.ewma(price, span=ewma_span, adjust=False)
        out[:] = ewma[-1]

#Custom factor using a for loop and a store for previous factor data
class PriceEMA2_For_Loop(CustomFactor):
    inputs = [USEquityPricing.close]
    params = {'ewma_span'}
    
    def compute(self, today, assets, out, price, ewma_span):
        # Create our function
        def my_ema(previous_factor_value, current_input_value):
            alpha = 2.0 / (1.0+ewma_span)
            current_factor_value = ((1-alpha) * previous_factor_value) + (alpha * current_input_value)
            return current_factor_value
        
        # Create an array to put our historical factor values into
        factor_values = np.empty(price.shape)
        
        # Set our initial value
        factor_values[0] = price[0]

        # Fill our factor value array
        for i in range(1, len(factor_values)):
            factor_values[i] = my_ema(factor_values[i-1], price[i])
            
        out[:] = factor_values[-1]
In [6]:
# Now use our single factor. 
my_universe = Q500US()

# Set our span
EWMA_SPAN = 4

alpha = 2.0 / (1.0+EWMA_SPAN)

# Start with a hardcoded EWMA
factor_0 = Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=5)
factor_1 =((1-alpha) * factor_0) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=4))
factor_2 =((1-alpha) * factor_1) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=3))
factor_3 =((1-alpha) * factor_2) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=2))
factor_4 =((1-alpha) * factor_3) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=1))

ewma_calculated = factor_4

# Now our pandas EWMA
ewma_pandas = PriceEWMA_Pandas(window_length=5, mask=my_universe, ewma_span=EWMA_SPAN)

# Now our pandas EWMA
ewma_for_loop = PriceEMA2_For_Loop(window_length=5, mask=my_universe, ewma_span=EWMA_SPAN)
In [7]:
# set some start and end dates
start = '2019-1-28'
end = '2019-10-28'
In [8]:
# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_calculated': ewma_calculated,
             'ewma_pandas': ewma_pandas,
             'ewma_for_loop': ewma_for_loop,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

/venvs/py35/lib/python3.5/site-packages/ipykernel_launcher.py:12: FutureWarning: pd.ewm_mean is deprecated for ndarrays and will be removed in a future version
  if sys.path[0] == '':
Pipeline Execution Time: 20.15 Seconds
Out[8]:
ewma_calculated ewma_for_loop ewma_pandas
2019-01-28 00:00:00+00:00 Equity(24 [AAPL]) 155.512768 155.512768 155.512768
Equity(53 [ABMD]) 344.164224 344.164224 344.164224
Equity(62 [ABT]) 70.959376 70.959376 70.959376
Equity(67 [ADSK]) 139.668896 139.668896 139.668896
Equity(76 [TAP]) 64.028896 64.028896 64.028896

Great! The 3 approaches give identical results.

Let's check how long each approach takes.

In [9]:
# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_calculated': ewma_calculated,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

Pipeline Execution Time: 1.45 Seconds
Out[9]:
ewma_calculated
2019-01-28 00:00:00+00:00 Equity(24 [AAPL]) 155.512768
Equity(53 [ABMD]) 344.164224
Equity(62 [ABT]) 70.959376
Equity(67 [ADSK]) 139.668896
Equity(76 [TAP]) 64.028896
In [10]:
# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_pandas': ewma_pandas,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

/venvs/py35/lib/python3.5/site-packages/ipykernel_launcher.py:12: FutureWarning: pd.ewm_mean is deprecated for ndarrays and will be removed in a future version
  if sys.path[0] == '':
Pipeline Execution Time: 1.98 Seconds
Out[10]:
ewma_pandas
2019-01-28 00:00:00+00:00 Equity(24 [AAPL]) 155.512768
Equity(53 [ABMD]) 344.164224
Equity(62 [ABT]) 70.959376
Equity(67 [ADSK]) 139.668896
Equity(76 [TAP]) 64.028896
In [11]:
# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_for_loop': ewma_for_loop,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

Pipeline Execution Time: 0.88 Seconds
Out[11]:
ewma_for_loop
2019-01-28 00:00:00+00:00 Equity(24 [AAPL]) 155.512768
Equity(53 [ABMD]) 344.164224
Equity(62 [ABT]) 70.959376
Equity(67 [ADSK]) 139.668896
Equity(76 [TAP]) 64.028896

Looks like the pandas version takes the longest and our for loop custom factor is the fastest.

In [ ]: