from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.morningstar import Fundamentals
from quantopian.pipeline.domain import US_EQUITIES
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import StaticAssets
from matplotlib import pyplot as plt
def make_pipeline():
universe = StaticAssets(symbols(['INTC','AMD','NXPI','AVGO','QCOM','QRVO']))
market_cap = Fundamentals.market_cap.latest
volume = USEquityPricing.volume.latest
open_ = USEquityPricing.open.latest
close_ = USEquityPricing.close.latest
pipe = Pipeline(
columns={
'open': open_,
'close': close_,
'volume': volume
},
screen = universe
)
return pipe
start = '2018-01-01'
end = '2018-01-05'
df = run_pipeline(make_pipeline(), start, end)
df.head(6)
df
# One easy way to select a single stock is with the 'xs' (or slice) method
# To get only the data for INTC one could do this. The date is the level 0 index and the symbol is level 1
df.xs(symbols('INTC'), level=1)
# to select a single column such as volume one can use 'dot' notation
# This returns a pandas series of just one stock and one set, or column, of data
df.xs(symbols('INTC'), level=1).volume
# Pandas has very simple and powerful plotting capabilities based upon matplotlib
# Plotting is as easy as adding the 'plot' method
df.xs(symbols('INTC'), level=1).volume.plot()
# Maybe one wants to compare all the volumes.
# Plot really would like to see data in rows and columns. It doesn't really know how to interpret
# The multi-index very well. No problem. One can easily turn an idex into columns using 'unstack'.
df.volume.unstack(level=1)
# That looks good. We have a column of volume data for each security.
# The plot method will now plot each column separately
df.volume.unstack(level=1).plot()
# Sometimes it's easier to read on separate plots
# Pandas plot can do that too
df.volume.unstack(level=1).plot(subplots=True, figsize=(15, 60), layout=(13, 2));
# The above plots could be a bit misleading since they are different scales.
# To force them all to have the same scale use the 'sharey' parameter
df.volume.unstack(level=1).plot(subplots=True, figsize=(15, 60), layout=(13, 2), sharey=True);
Pipeline may not be the best way to get data for plotting. The issue is that pipeline returns data which is 'split and dividend adjusted as of each day'. In other words, in the case of open, close, and volume, data, this is the data that would have been actually seen on each day.
Let's look at AAPL data around the time of their big 7-9-2014 7:1 stock split.
First make a new pipeline.
def make_aapl_pipeline():
universe = StaticAssets(symbols(['AAPL']))
market_cap = Fundamentals.market_cap.latest
volume = USEquityPricing.volume.latest
open_ = USEquityPricing.open.latest
close_ = USEquityPricing.close.latest
pipe = Pipeline(
columns={
'open': open_,
'close': close_,
'volume': volume
},
screen = universe
)
return pipe
start = '2014-06-01'
end = '2014-06-30'
df = run_pipeline(make_aapl_pipeline(), start, end)
df
# Big drop on 6-9-2014 when a stock split happened.
# These are the actual values people would have seen on those days.
# Lets see what the plot looks like.
df.close.plot();
# A big 7x drop at the time of the stock split.
# However, one typically wants prices adjusted as of a single common end date (not as each date)
# The above graph makes it falsely appear there was a huge drop in price
# So, rather than pipeline data, use the get_pricing method.
# This adjusts prices and volumes 'as of the end date of the method'
# The following will adjust data as of 7-30-2014
df_adjusted = get_pricing(symbols('AAPL'), start, end)
df_adjusted
# Let's plot this pricing data
df_adjusted.close_price.plot();
Both pipeline and get_pricing approaches are valid but have different use cases.