Notebook

Factor Manipulation Using Numpy Arrays

Factors can be put into Numpy arrays and manipulated (if you are careful) to produce other factors. Below is an example of a new factor which is the weighted sum of several other factors.

In [1]:
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline

from quantopian.pipeline.data.zacks import broker_ratings 

import numpy as np
In [16]:
# Get analyst estimates to use as datasets
# These are all factors. These are not the actual data.
rating_strong_buys = broker_ratings.rating_cnt_strong_buys.latest  
rating_buys = broker_ratings.rating_cnt_mod_buys.latest  
rating_holds = broker_ratings.rating_cnt_holds.latest  
rating_sells = broker_ratings.rating_cnt_mod_sells.latest  
rating_strong_sells = broker_ratings.rating_cnt_strong_sells.latest

# Check to see what the type each factor is
type(rating_buys)
Out[16]:
<class 'zipline.pipeline.factors.factor.Latest'>
In [17]:
# Put the factors into an array to then do some Numpy calculations with them
# Again, this is an array of factor objects and not the data itself
ratings = np.array([rating_strong_buys, rating_buys, rating_holds, rating_sells, rating_strong_sells])  
In [18]:
# Create a new array with desired factor weights. This must have the same dimension as the ratings array
rating_weights = np.array([0.025, 0.015, 0.005, -0.005, -0.015])  
In [22]:
# Use the magic of Numpy to create a new array of "weighted" factors 
weighted_factors = ratings * rating_weights

# new_factors is an array of factors and not a new factor
type(weighted_factors)
Out[22]:
<type 'numpy.ndarray'>
In [23]:
# The elements of the array are the factors and not the data itself
type(weighted_factors[0])
Out[23]:
<class 'zipline.pipeline.factors.factor.NumExprFactor'>
In [24]:
# Use the Numpy sum method to add the factors in the array and create a new factor
# which is the sum of all the factors in the array.
# A little surprised this works
sum_factor = weighted_factors.sum()

type(sum_factor)
Out[24]:
<class 'zipline.pipeline.factors.factor.NumExprFactor'>
In [25]:
# Make a pipline instance called pipe
pipe = Pipeline()
In [26]:
# Lazy here. Iterate over the arrays to add factors to the pipe.
# Use the overwrite=True parameter to be able to re-run this cell
for index, factor in np.ndenumerate(ratings):
    pipe.add(factor, str(index), overwrite=True)

for index, factor in np.ndenumerate(weighted_factors):
    pipe.add(factor, "weighted" + str(index), overwrite=True)

pipe.add(sum_factor, "sum_factor", overwrite=True)
In [27]:
# Run the pipline to return a dataframe with data from desired date(s)

results = run_pipeline(pipe, '2016-05-25', '2016-05-25')

print "Total Asset Count: %d" % len(results)
results.head(20)
Total Asset Count: 8355
Out[27]:
(0,) (1,) (2,) (3,) (4,) sum_factor weighted(0,) weighted(1,) weighted(2,) weighted(3,) weighted(4,)
2016-05-25 00:00:00+00:00 Equity(2 [ARNC]) 6.0 1.0 4.0 0.0 1.0 0.170 0.150 0.015 0.020 -0.000 -0.015
Equity(21 [AAME]) NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Equity(24 [AAPL]) 21.0 3.0 4.0 0.0 1.0 0.575 0.525 0.045 0.020 -0.000 -0.015
Equity(25 [ARNC_PR]) NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Equity(31 [ABAX]) 1.0 0.0 4.0 0.0 0.0 0.045 0.025 0.000 0.020 -0.000 -0.000
Equity(39 [DDC]) 2.0 0.0 1.0 0.0 0.0 0.055 0.050 0.000 0.005 -0.000 -0.000
Equity(41 [ARCB]) 3.0 0.0 5.0 1.0 0.0 0.095 0.075 0.000 0.025 -0.005 -0.000
Equity(52 [ABM]) NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Equity(53 [ABMD]) 5.0 1.0 2.0 0.0 0.0 0.150 0.125 0.015 0.010 -0.000 -0.000
Equity(62 [ABT]) 9.0 2.0 4.0 0.0 0.0 0.275 0.225 0.030 0.020 -0.000 -0.000
Equity(64 [ABX]) 4.0 0.0 11.0 0.0 1.0 0.140 0.100 0.000 0.055 -0.000 -0.015
Equity(66 [AB]) 1.0 0.0 2.0 0.0 0.0 0.035 0.025 0.000 0.010 -0.000 -0.000
Equity(67 [ADSK]) 7.0 0.0 5.0 1.0 1.0 0.180 0.175 0.000 0.025 -0.005 -0.015
Equity(69 [ACAT]) 3.0 0.0 5.0 0.0 1.0 0.085 0.075 0.000 0.025 -0.000 -0.015
Equity(70 [VBF]) NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Equity(76 [TAP]) 3.0 0.0 1.0 0.0 0.0 0.080 0.075 0.000 0.005 -0.000 -0.000
Equity(84 [ACET]) 2.0 0.0 0.0 0.0 0.0 0.050 0.050 0.000 0.000 -0.000 -0.000
Equity(100 [IEP]) 1.0 0.0 0.0 0.0 0.0 0.025 0.025 0.000 0.000 -0.000 -0.000
Equity(106 [ACU]) 0.0 0.0 0.0 0.0 0.0 0.000 0.000 0.000 0.000 -0.000 -0.000
Equity(110 [ACXM]) 2.0 1.0 0.0 0.0 0.0 0.065 0.050 0.015 0.000 -0.000 -0.000
In [15]:
# If one doesn't want the NaNs then one can filter them out by making a filter using the isfinite method
# and applying it to the pipe
# Run the pipline to return a dataframe with data from desired date(s)

finite_sum = sum_factor.isfinite()
pipe.set_screen(finite_sum)
results = run_pipeline(pipe, '2016-05-25', '2016-05-25')

print "Total Asset Count: %d" % len(results)
results.head(20)
Total Asset Count: 3672
Out[15]:
(0,) (1,) (2,) (3,) (4,) sum_factor weighted(0,) weighted(1,) weighted(2,) weighted(3,) weighted(4,)
2016-05-25 00:00:00+00:00 Equity(2 [ARNC]) 6.0 1.0 4.0 0.0 1.0 0.170 0.150 0.015 0.020 -0.000 -0.015
Equity(24 [AAPL]) 21.0 3.0 4.0 0.0 1.0 0.575 0.525 0.045 0.020 -0.000 -0.015
Equity(31 [ABAX]) 1.0 0.0 4.0 0.0 0.0 0.045 0.025 0.000 0.020 -0.000 -0.000
Equity(39 [DDC]) 2.0 0.0 1.0 0.0 0.0 0.055 0.050 0.000 0.005 -0.000 -0.000
Equity(41 [ARCB]) 3.0 0.0 5.0 1.0 0.0 0.095 0.075 0.000 0.025 -0.005 -0.000
Equity(53 [ABMD]) 5.0 1.0 2.0 0.0 0.0 0.150 0.125 0.015 0.010 -0.000 -0.000
Equity(62 [ABT]) 9.0 2.0 4.0 0.0 0.0 0.275 0.225 0.030 0.020 -0.000 -0.000
Equity(64 [ABX]) 4.0 0.0 11.0 0.0 1.0 0.140 0.100 0.000 0.055 -0.000 -0.015
Equity(66 [AB]) 1.0 0.0 2.0 0.0 0.0 0.035 0.025 0.000 0.010 -0.000 -0.000
Equity(67 [ADSK]) 7.0 0.0 5.0 1.0 1.0 0.180 0.175 0.000 0.025 -0.005 -0.015
Equity(69 [ACAT]) 3.0 0.0 5.0 0.0 1.0 0.085 0.075 0.000 0.025 -0.000 -0.015
Equity(76 [TAP]) 3.0 0.0 1.0 0.0 0.0 0.080 0.075 0.000 0.005 -0.000 -0.000
Equity(84 [ACET]) 2.0 0.0 0.0 0.0 0.0 0.050 0.050 0.000 0.000 -0.000 -0.000
Equity(100 [IEP]) 1.0 0.0 0.0 0.0 0.0 0.025 0.025 0.000 0.000 -0.000 -0.000
Equity(106 [ACU]) 0.0 0.0 0.0 0.0 0.0 0.000 0.000 0.000 0.000 -0.000 -0.000
Equity(110 [ACXM]) 2.0 1.0 0.0 0.0 0.0 0.065 0.050 0.015 0.000 -0.000 -0.000
Equity(114 [ADBE]) 8.0 2.0 3.0 0.0 0.0 0.245 0.200 0.030 0.015 -0.000 -0.000
Equity(122 [ADI]) 10.0 2.0 8.0 0.0 0.0 0.320 0.250 0.030 0.040 -0.000 -0.000
Equity(128 [ADM]) 1.0 1.0 5.0 0.0 1.0 0.050 0.025 0.015 0.025 -0.000 -0.015
Equity(154 [AEM]) 6.0 1.0 7.0 0.0 0.0 0.200 0.150 0.015 0.035 -0.000 -0.000
In [ ]: