Notebook

N Days Ago Custom Factor

In [1]:
# Imports needed to use pipeline and custom factors
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline

# Import to use static assets so we can test 
from quantopian.pipeline.filters import StaticAssets

# Import some data
from quantopian.pipeline.data.builtin import USEquityPricing
In [2]:
# Define our Custom Factor

class PreviousClose(CustomFactor):  
    # Define inputs  
    inputs = [USEquityPricing.close]  


    # Set window_length to whatever number of days to lookback  
    # The default is 2 days ago.  
    # This can be set/over-ridden as shown below:  
    # my_open_10 = PreviousOpen(window_length = 10)  
    # would return the open 10 days ago  
    window_length = 2  


    def compute(self, today, assets, out, close):  
        out[:] = close[0]
In [3]:
# Create an asset that we want to get data for
my_asset = symbols('SPY')

# Make a filter to get just spy data
my_filter = StaticAssets([my_asset])

# Make our factors
# Using a mask limits the computation to only the specified asset.
close_yesterday = PreviousClose(window_length = 1, mask = my_filter) 
close_2_day_ago = PreviousClose(window_length = 2, mask = my_filter) 
close_3_day_ago = PreviousClose(window_length = 3, mask = my_filter) 
close_4_day_ago = PreviousClose(window_length = 4, mask = my_filter) 
close_5_day_ago = PreviousClose(window_length = 5, mask = my_filter) 


# Now create our pipeline object which does the heavy lifting to get the data we specify
# Add a screen to just return specific assets. Otherwise all assets in the Q database are returned
# Defining the pipeline is really just defining the columns we want in the returned dataframe and
# limiting the rows (if desired) to a particular subset of assets.
my_pipe = Pipeline(
        screen = my_filter,
        columns = {
            'close_yesterday': close_yesterday,
            'close_2_day_ago': close_2_day_ago,
            'close_3_day_ago': close_3_day_ago,
            'close_4_day_ago': close_4_day_ago,
            'close_5_day_ago': close_5_day_ago,

        }
    )
In [4]:
# With the factors and pipeline defined in the previous cell we can now run it.
# In this case we want to get the data that the pipelne would have returned from1-5-2017 to 2-5-2017
start_date = '1-5-2017'
end_date = '2-5-2017'
results = run_pipeline(my_pipe, '1-5-2017', '2-5-2017')
In [5]:
# Let's print the dataframe to see what we have...
results
Out[5]:
close_2_day_ago close_3_day_ago close_4_day_ago close_5_day_ago close_yesterday
2017-01-05 00:00:00+00:00 Equity(8554 [SPY]) 225.170 223.550 224.360 224.420 226.520
2017-01-06 00:00:00+00:00 Equity(8554 [SPY]) 226.520 225.170 223.550 224.360 226.360
2017-01-09 00:00:00+00:00 Equity(8554 [SPY]) 226.360 226.520 225.170 223.550 227.230
2017-01-10 00:00:00+00:00 Equity(8554 [SPY]) 227.230 226.360 226.520 225.170 226.500
2017-01-11 00:00:00+00:00 Equity(8554 [SPY]) 226.500 227.230 226.360 226.520 226.450
2017-01-12 00:00:00+00:00 Equity(8554 [SPY]) 226.450 226.500 227.230 226.360 227.070
2017-01-13 00:00:00+00:00 Equity(8554 [SPY]) 227.070 226.450 226.500 227.230 226.570
2017-01-17 00:00:00+00:00 Equity(8554 [SPY]) 226.570 227.070 226.450 226.500 227.020
2017-01-18 00:00:00+00:00 Equity(8554 [SPY]) 227.020 226.570 227.070 226.450 226.290
2017-01-19 00:00:00+00:00 Equity(8554 [SPY]) 226.290 227.020 226.570 227.070 226.740
2017-01-20 00:00:00+00:00 Equity(8554 [SPY]) 226.740 226.290 227.020 226.570 225.950
2017-01-23 00:00:00+00:00 Equity(8554 [SPY]) 225.950 226.740 226.290 227.020 226.690
2017-01-24 00:00:00+00:00 Equity(8554 [SPY]) 226.690 225.950 226.740 226.290 226.180
2017-01-25 00:00:00+00:00 Equity(8554 [SPY]) 226.180 226.690 225.950 226.740 227.590
2017-01-26 00:00:00+00:00 Equity(8554 [SPY]) 227.590 226.180 226.690 225.950 229.480
2017-01-27 00:00:00+00:00 Equity(8554 [SPY]) 229.480 227.590 226.180 226.690 229.295
2017-01-30 00:00:00+00:00 Equity(8554 [SPY]) 229.295 229.480 227.590 226.180 228.990
2017-01-31 00:00:00+00:00 Equity(8554 [SPY]) 228.990 229.295 229.480 227.590 227.630
2017-02-01 00:00:00+00:00 Equity(8554 [SPY]) 227.630 228.990 229.295 229.480 227.560
2017-02-02 00:00:00+00:00 Equity(8554 [SPY]) 227.560 227.630 228.990 229.295 227.630
2017-02-03 00:00:00+00:00 Equity(8554 [SPY]) 227.630 227.560 227.630 228.990 227.770
2017-02-06 00:00:00+00:00 Equity(8554 [SPY]) 227.770 227.630 227.560 227.630 229.380
In [6]:
# Let's see if this is correct
# Let's fetch pricing

# run the get_pricing method to pull in actual data
# the result is a Pandas dataframe
my_prices = get_pricing(symbols=my_asset, 
                        start_date=start_date, 
                        end_date=end_date, 
                        frequency='daily', 
                        fields=['close_price'])
In [7]:
# Now lets print it
my_prices
Out[7]:
close_price
2017-01-05 00:00:00+00:00 226.360
2017-01-06 00:00:00+00:00 227.230
2017-01-09 00:00:00+00:00 226.500
2017-01-10 00:00:00+00:00 226.450
2017-01-11 00:00:00+00:00 227.070
2017-01-12 00:00:00+00:00 226.570
2017-01-13 00:00:00+00:00 227.020
2017-01-17 00:00:00+00:00 226.290
2017-01-18 00:00:00+00:00 226.740
2017-01-19 00:00:00+00:00 225.950
2017-01-20 00:00:00+00:00 226.690
2017-01-23 00:00:00+00:00 226.180
2017-01-24 00:00:00+00:00 227.590
2017-01-25 00:00:00+00:00 229.480
2017-01-26 00:00:00+00:00 229.295
2017-01-27 00:00:00+00:00 228.990
2017-01-30 00:00:00+00:00 227.630
2017-01-31 00:00:00+00:00 227.560
2017-02-01 00:00:00+00:00 227.630
2017-02-02 00:00:00+00:00 227.770
2017-02-03 00:00:00+00:00 229.380

That looks like it all matches Great!

It may not immediately seem correct, but remember that the pipe returns data as of the day it was run. In other words, if it's run today, it will return yesterdays close (not todays). The get_pricing method however returns the data as of the the dates requested.