Recursive factors - Three ways to do EWMA¶

Calculate EMWA of price three ways.

explicitly calling previous factor
pandas EWMA
custom factor and a for loop

# First, we need to import the basic pipeline methods
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline

# Also get the built-in filters and/or factors to use
from quantopian.pipeline.filters import QTradableStocksUS, Q500US, StaticAssets

# Finally get any data we want to use
# Not using any so pass
from quantopian.pipeline.data.builtin import USEquityPricing  

# Import numpy and pandas because they rock
import numpy as np
import pandas as pd

# Get previous factor data for our explicit version 
class Factor_N_Days_Ago(CustomFactor):  
    def compute(self, today, assets, out, input_factor):  
        out[:] = input_factor[0]
        
# Custom factor for EWMA using pandas version
class PriceEWMA_Pandas(CustomFactor):
    inputs = [USEquityPricing.close]
    params = {'ewma_span'}
    
    def compute(self, today, assets, out, price, ewma_span):
        ewma = pd.ewma(price, span=ewma_span, adjust=False)
        out[:] = ewma[-1]

#Custom factor using a for loop and a store for previous factor data
class PriceEMA2_For_Loop(CustomFactor):
    inputs = [USEquityPricing.close]
    params = {'ewma_span'}
    
    def compute(self, today, assets, out, price, ewma_span):
        # Create our function
        def my_ema(previous_factor_value, current_input_value):
            alpha = 2.0 / (1.0+ewma_span)
            current_factor_value = ((1-alpha) * previous_factor_value) + (alpha * current_input_value)
            return current_factor_value
        
        # Create an array to put our historical factor values into
        factor_values = np.empty(price.shape)
        
        # Set our initial value
        factor_values[0] = price[0]

        # Fill our factor value array
        for i in range(1, len(factor_values)):
            factor_values[i] = my_ema(factor_values[i-1], price[i])
            
        out[:] = factor_values[-1]

# Now use our single factor. 
my_universe = Q500US()

# Set our span
EWMA_SPAN = 4

alpha = 2.0 / (1.0+EWMA_SPAN)

# Start with a hardcoded EWMA
factor_0 = Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=5)
factor_1 =((1-alpha) * factor_0) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=4))
factor_2 =((1-alpha) * factor_1) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=3))
factor_3 =((1-alpha) * factor_2) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=2))
factor_4 =((1-alpha) * factor_3) + (alpha * Factor_N_Days_Ago(inputs=[USEquityPricing.close], window_length=1))

ewma_calculated = factor_4

# Now our pandas EWMA
ewma_pandas = PriceEWMA_Pandas(window_length=5, mask=my_universe, ewma_span=EWMA_SPAN)

# Now our pandas EWMA
ewma_for_loop = PriceEMA2_For_Loop(window_length=5, mask=my_universe, ewma_span=EWMA_SPAN)

# set some start and end dates
start = '2019-1-28'
end = '2019-10-28'

# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_calculated': ewma_calculated,
             'ewma_pandas': ewma_pandas,
             'ewma_for_loop': ewma_for_loop,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

/venvs/py35/lib/python3.5/site-packages/ipykernel_launcher.py:12: FutureWarning: pd.ewm_mean is deprecated for ndarrays and will be removed in a future version
  if sys.path[0] == '':

Great! The 3 approaches give identical results.

Let's check how long each approach takes.

# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_calculated': ewma_calculated,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_pandas': ewma_pandas,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

/venvs/py35/lib/python3.5/site-packages/ipykernel_launcher.py:12: FutureWarning: pd.ewm_mean is deprecated for ndarrays and will be removed in a future version
  if sys.path[0] == '':

# let's run a pipeline comparing the 3 different approaches
my_pipe = Pipeline(
    columns={
             'ewma_for_loop': ewma_for_loop,
            },
    screen = Q500US()
    )

results = run_pipeline(my_pipe, start, end)
results.head()

Looks like the pandas version takes the longest and our for loop custom factor is the fastest.

		ewma_calculated	ewma_for_loop	ewma_pandas
2019-01-28 00:00:00+00:00	Equity(24 [AAPL])	155.512768	155.512768	155.512768
	Equity(53 [ABMD])	344.164224	344.164224	344.164224
	Equity(62 [ABT])	70.959376	70.959376	70.959376
	Equity(67 [ADSK])	139.668896	139.668896	139.668896
	Equity(76 [TAP])	64.028896	64.028896	64.028896

		ewma_calculated
2019-01-28 00:00:00+00:00	Equity(24 [AAPL])	155.512768
	Equity(53 [ABMD])	344.164224
	Equity(62 [ABT])	70.959376
	Equity(67 [ADSK])	139.668896
	Equity(76 [TAP])	64.028896

		ewma_pandas
2019-01-28 00:00:00+00:00	Equity(24 [AAPL])	155.512768
	Equity(53 [ABMD])	344.164224
	Equity(62 [ABT])	70.959376
	Equity(67 [ADSK])	139.668896
	Equity(76 [TAP])	64.028896

		ewma_for_loop
2019-01-28 00:00:00+00:00	Equity(24 [AAPL])	155.512768
	Equity(53 [ABMD])	344.164224
	Equity(62 [ABT])	70.959376
	Equity(67 [ADSK])	139.668896
	Equity(76 [TAP])	64.028896