Factors can be put into Numpy arrays and manipulated (if you are careful) to produce other factors. Below is an example of a new factor which is the weighted sum of several other factors.
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.data.zacks import broker_ratings
import numpy as np
# Get analyst estimates to use as datasets
# These are all factors. These are not the actual data.
rating_strong_buys = broker_ratings.rating_cnt_strong_buys.latest
rating_buys = broker_ratings.rating_cnt_mod_buys.latest
rating_holds = broker_ratings.rating_cnt_holds.latest
rating_sells = broker_ratings.rating_cnt_mod_sells.latest
rating_strong_sells = broker_ratings.rating_cnt_strong_sells.latest
# Check to see what the type each factor is
type(rating_buys)
# Put the factors into an array to then do some Numpy calculations with them
# Again, this is an array of factor objects and not the data itself
ratings = np.array([rating_strong_buys, rating_buys, rating_holds, rating_sells, rating_strong_sells])
# Create a new array with desired factor weights. This must have the same dimension as the ratings array
rating_weights = np.array([0.025, 0.015, 0.005, -0.005, -0.015])
# Use the magic of Numpy to create a new array of "weighted" factors
weighted_factors = ratings * rating_weights
# new_factors is an array of factors and not a new factor
type(weighted_factors)
# The elements of the array are the factors and not the data itself
type(weighted_factors[0])
# Use the Numpy sum method to add the factors in the array and create a new factor
# which is the sum of all the factors in the array.
# A little surprised this works
sum_factor = weighted_factors.sum()
type(sum_factor)
# Make a pipline instance called pipe
pipe = Pipeline()
# Lazy here. Iterate over the arrays to add factors to the pipe.
# Use the overwrite=True parameter to be able to re-run this cell
for index, factor in np.ndenumerate(ratings):
pipe.add(factor, str(index), overwrite=True)
for index, factor in np.ndenumerate(weighted_factors):
pipe.add(factor, "weighted" + str(index), overwrite=True)
pipe.add(sum_factor, "sum_factor", overwrite=True)
# Run the pipline to return a dataframe with data from desired date(s)
results = run_pipeline(pipe, '2016-05-25', '2016-05-25')
print "Total Asset Count: %d" % len(results)
results.head(20)
# If one doesn't want the NaNs then one can filter them out by making a filter using the isfinite method
# and applying it to the pipe
# Run the pipline to return a dataframe with data from desired date(s)
finite_sum = sum_factor.isfinite()
pipe.set_screen(finite_sum)
results = run_pipeline(pipe, '2016-05-25', '2016-05-25')
print "Total Asset Count: %d" % len(results)
results.head(20)