# Imports needed to use pipeline and custom factors
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline
# Import to use static assets so we can test
from quantopian.pipeline.filters import StaticAssets
# Import some data
from quantopian.pipeline.data.builtin import USEquityPricing
# Define our Custom Factor
class PreviousClose(CustomFactor):
# Define inputs
inputs = [USEquityPricing.close]
# Set window_length to whatever number of days to lookback
# The default is 2 days ago.
# This can be set/over-ridden as shown below:
# my_open_10 = PreviousOpen(window_length = 10)
# would return the open 10 days ago
window_length = 2
def compute(self, today, assets, out, close):
out[:] = close[0]
# Create an asset that we want to get data for
my_asset = symbols('SPY')
# Make a filter to get just spy data
my_filter = StaticAssets([my_asset])
# Make our factors
# Using a mask limits the computation to only the specified asset.
close_yesterday = PreviousClose(window_length = 1, mask = my_filter)
close_2_day_ago = PreviousClose(window_length = 2, mask = my_filter)
close_3_day_ago = PreviousClose(window_length = 3, mask = my_filter)
close_4_day_ago = PreviousClose(window_length = 4, mask = my_filter)
close_5_day_ago = PreviousClose(window_length = 5, mask = my_filter)
# Now create our pipeline object which does the heavy lifting to get the data we specify
# Add a screen to just return specific assets. Otherwise all assets in the Q database are returned
# Defining the pipeline is really just defining the columns we want in the returned dataframe and
# limiting the rows (if desired) to a particular subset of assets.
my_pipe = Pipeline(
screen = my_filter,
columns = {
'close_yesterday': close_yesterday,
'close_2_day_ago': close_2_day_ago,
'close_3_day_ago': close_3_day_ago,
'close_4_day_ago': close_4_day_ago,
'close_5_day_ago': close_5_day_ago,
}
)
# With the factors and pipeline defined in the previous cell we can now run it.
# In this case we want to get the data that the pipelne would have returned from1-5-2017 to 2-5-2017
start_date = '1-5-2017'
end_date = '2-5-2017'
results = run_pipeline(my_pipe, '1-5-2017', '2-5-2017')
# Let's print the dataframe to see what we have...
results
# Let's see if this is correct
# Let's fetch pricing
# run the get_pricing method to pull in actual data
# the result is a Pandas dataframe
my_prices = get_pricing(symbols=my_asset,
start_date=start_date,
end_date=end_date,
frequency='daily',
fields=['close_price'])
# Now lets print it
my_prices
It may not immediately seem correct, but remember that the pipe returns data as of the day it was run. In other words, if it's run today, it will return yesterdays close (not todays). The get_pricing method however returns the data as of the the dates requested.