I'm working with a custom factor that is defined within a function, allowing for dynamism in terms of both the window length and the timestamp. Please see below example.
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
import numpy as np
import pandas as pd
def StdDev(offset=365, nbars=5, s_input=USEquityPricing.close):
class StdDevFact(CustomFactor):
inputs=[s_input]
window_length=nbars + offset
def compute(self, today, asset_ids, out, values):
# Calculates the column-wise standard deviation, ignoring NaNs
out[:] = np.nanstd(values[(-1-offset):-(offset + nbars)], axis=0)
return StdDevFact()
def make_pipeline():
std_dev = StdDev()
return Pipeline(
columns={
'std_dev': std_dev
}
)
result = run_pipeline(make_pipeline(), start_date = '2015-01-01', end_date = '2016-01-01')
result.head(10)
The problem with the above code, correct me if I'm wrong, is that the offset parameter makes it computationally inefficient by drastically increasing window length. I'm hoping someone may have a better suggestion such that the input to the compute function only takes in M=nbars instead of M=nbars+offset and then the offset parameter directly adjusts the timestamp. I've attempted the following to no avail:
Attempt 1: yields no error, but the result is unchanged irrespective of the value of offset.
def StdDev(offset=365, nbars=5, s_input=USEquityPricing.close):
class StdDevFact(CustomFactor):
inputs=[s_input]
window_length=nbars
def compute(self, today, asset_ids, out, values):
# Calculates the column-wise standard deviation, ignoring NaNs
today = today - pd.DateOffset(days=offset)
out[:] = np.nanstd(values, axis=0)
return StdDevFact()
Attempt 2: yields an error, "today is undefined"
def StdDev(offset=365, nbars=5, s_input=USEquityPricing.close):
class StdDevFact(CustomFactor):
inputs=[s_input]
window_length=nbars
today = today - pd.DateOffset(days=offset)
def compute(self, today, asset_ids, out, values):
# Calculates the column-wise standard deviation, ignoring NaNs
out[:] = np.nanstd(values, axis=0)
return StdDevFact()