Notebook
In [1]:
from quantopian.pipeline import Pipeline
In [2]:
from quantopian.pipeline.data.builtin import USEquityPricing
In [3]:
from quantopian.pipeline.filters import QTradableStocksUS
In [4]:
from quantopian.pipeline.data.morningstar import Fundamentals
In [5]:
from quantopian.pipeline.domain import US_EQUITIES
In [6]:
import pandas as pd
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import StaticAssets
from matplotlib import pyplot as plt
In [7]:
def make_pipeline():
    
    universe = StaticAssets(symbols(['INTC','AMD','NXPI','AVGO','QCOM','QRVO']))
    market_cap = Fundamentals.market_cap.latest
    volume = USEquityPricing.volume.latest
    open_ = USEquityPricing.open.latest
    close_ = USEquityPricing.close.latest
    
    pipe = Pipeline(
            columns={
            'open': open_,
            'close': close_,  
            'volume': volume
                },
            screen = universe
            )
    return pipe
In [8]:
start = '2018-01-01'
end = '2018-01-05'
df = run_pipeline(make_pipeline(), start, end)

Pipeline Execution Time: 3.55 Seconds
In [9]:
df.head(6)
Out[9]:
close open volume
2018-01-02 00:00:00+00:00 Equity(351 [AMD]) 10.290 10.570 23435082.0
Equity(3951 [INTC]) 46.180 46.210 10118422.0
Equity(6295 [QCOM]) 64.069 64.360 3038214.0
Equity(38650 [AVGO]) 257.320 259.769 1138916.0
Equity(39994 [NXPI]) 117.080 116.930 1084999.0
Equity(48384 [QRVO]) 66.610 67.360 484524.0
In [10]:
df
Out[10]:
close open volume
2018-01-02 00:00:00+00:00 Equity(351 [AMD]) 10.290 10.570 23435082.0
Equity(3951 [INTC]) 46.180 46.210 10118422.0
Equity(6295 [QCOM]) 64.069 64.360 3038214.0
Equity(38650 [AVGO]) 257.320 259.769 1138916.0
Equity(39994 [NXPI]) 117.080 116.930 1084999.0
Equity(48384 [QRVO]) 66.610 67.360 484524.0
2018-01-03 00:00:00+00:00 Equity(351 [AMD]) 10.990 10.420 39531765.0
Equity(3951 [INTC]) 46.830 46.380 14091350.0
Equity(6295 [QCOM]) 65.190 64.379 4472162.0
Equity(38650 [AVGO]) 267.010 259.769 1997342.0
Equity(39994 [NXPI]) 117.980 117.000 1532380.0
Equity(48384 [QRVO]) 68.880 67.060 908961.0
2018-01-04 00:00:00+00:00 Equity(351 [AMD]) 11.550 11.610 130934639.0
Equity(3951 [INTC]) 45.250 45.470 102911469.0
Equity(6295 [QCOM]) 65.940 65.220 5302155.0
Equity(38650 [AVGO]) 269.680 267.285 2213719.0
Equity(39994 [NXPI]) 118.150 118.000 2377471.0
Equity(48384 [QRVO]) 68.390 69.080 1207263.0
2018-01-05 00:00:00+00:00 Equity(351 [AMD]) 12.115 12.095 102192904.0
Equity(3951 [INTC]) 44.430 43.520 79694202.0
Equity(6295 [QCOM]) 66.060 66.180 3603491.0
Equity(38650 [AVGO]) 270.020 271.590 1363846.0
Equity(39994 [NXPI]) 117.900 118.090 1663124.0
Equity(48384 [QRVO]) 68.710 68.690 1042256.0
In [11]:
# One easy way to select a single stock is with the 'xs' (or slice) method
# To get only the data for INTC one could do this. The date is the level 0 index and the symbol is level 1
df.xs(symbols('INTC'), level=1)
Out[11]:
close open volume
2018-01-02 00:00:00+00:00 46.18 46.21 10118422.0
2018-01-03 00:00:00+00:00 46.83 46.38 14091350.0
2018-01-04 00:00:00+00:00 45.25 45.47 102911469.0
2018-01-05 00:00:00+00:00 44.43 43.52 79694202.0
In [12]:
# to select a single column such as volume one can use 'dot' notation
# This returns a pandas series of just one stock and one set, or column, of data
df.xs(symbols('INTC'), level=1).volume
Out[12]:
2018-01-02 00:00:00+00:00     10118422.0
2018-01-03 00:00:00+00:00     14091350.0
2018-01-04 00:00:00+00:00    102911469.0
2018-01-05 00:00:00+00:00     79694202.0
Name: volume, dtype: float64
In [13]:
# Pandas has very simple and powerful plotting capabilities based upon matplotlib
# Plotting is as easy as adding the 'plot' method
df.xs(symbols('INTC'), level=1).volume.plot()
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f1529f4dc10>
In [14]:
# Maybe one wants to compare all the volumes.
# Plot really would like to see data in rows and columns. It doesn't really know how to interpret
# The multi-index very well. No problem. One can easily turn an idex into columns using 'unstack'.
df.volume.unstack(level=1)
Out[14]:
Equity(351 [AMD]) Equity(3951 [INTC]) Equity(6295 [QCOM]) Equity(38650 [AVGO]) Equity(39994 [NXPI]) Equity(48384 [QRVO])
2018-01-02 00:00:00+00:00 23435082.0 10118422.0 3038214.0 1138916.0 1084999.0 484524.0
2018-01-03 00:00:00+00:00 39531765.0 14091350.0 4472162.0 1997342.0 1532380.0 908961.0
2018-01-04 00:00:00+00:00 130934639.0 102911469.0 5302155.0 2213719.0 2377471.0 1207263.0
2018-01-05 00:00:00+00:00 102192904.0 79694202.0 3603491.0 1363846.0 1663124.0 1042256.0
In [15]:
# That looks good. We have a column of volume data for each security.
# The plot method will now plot each column separately
df.volume.unstack(level=1).plot()
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f152a4a9510>
In [16]:
# Sometimes it's easier to read on separate plots
# Pandas plot can do that too
df.volume.unstack(level=1).plot(subplots=True, figsize=(15, 60), layout=(13, 2));
In [17]:
# The above plots could be a bit misleading since they are different scales.
# To force them all to have the same scale use the 'sharey' parameter
df.volume.unstack(level=1).plot(subplots=True, figsize=(15, 60), layout=(13, 2), sharey=True);

Those are the basics to plotting pandas dataframes.

However...

Pipeline may not be the best way to get data for plotting. The issue is that pipeline returns data which is 'split and dividend adjusted as of each day'. In other words, in the case of open, close, and volume, data, this is the data that would have been actually seen on each day.

Let's look at AAPL data around the time of their big 7-9-2014 7:1 stock split.

First make a new pipeline.

In [18]:
def make_aapl_pipeline():
    
    universe = StaticAssets(symbols(['AAPL']))
    market_cap = Fundamentals.market_cap.latest
    volume = USEquityPricing.volume.latest
    open_ = USEquityPricing.open.latest
    close_ = USEquityPricing.close.latest
    
    pipe = Pipeline(
            columns={
            'open': open_,
            'close': close_,  
            'volume': volume
                },
            screen = universe
            )
    return pipe
    
In [19]:
start = '2014-06-01'
end = '2014-06-30'
df = run_pipeline(make_aapl_pipeline(), start, end)
df

Pipeline Execution Time: 0.71 Seconds
Out[19]:
close open volume
2014-06-02 00:00:00+00:00 Equity(24 [AAPL]) 633.00000 637.980000 1.473668e+07
2014-06-03 00:00:00+00:00 Equity(24 [AAPL]) 628.50000 633.960000 1.036449e+07
2014-06-04 00:00:00+00:00 Equity(24 [AAPL]) 637.54000 628.460000 8.121157e+06
2014-06-05 00:00:00+00:00 Equity(24 [AAPL]) 644.82000 637.440000 8.964095e+06
2014-06-06 00:00:00+00:00 Equity(24 [AAPL]) 647.35000 646.200000 7.841964e+06
2014-06-09 00:00:00+00:00 Equity(24 [AAPL]) 92.22613 92.844714 6.736860e+07
2014-06-10 00:00:00+00:00 Equity(24 [AAPL]) 93.70000 92.700000 6.438021e+07
2014-06-11 00:00:00+00:00 Equity(24 [AAPL]) 94.25000 94.730000 5.447028e+07
2014-06-12 00:00:00+00:00 Equity(24 [AAPL]) 93.87000 94.130000 3.942720e+07
2014-06-13 00:00:00+00:00 Equity(24 [AAPL]) 92.26000 94.040000 4.659990e+07
2014-06-16 00:00:00+00:00 Equity(24 [AAPL]) 91.26000 92.200000 4.751786e+07
2014-06-17 00:00:00+00:00 Equity(24 [AAPL]) 92.17000 91.510000 3.102466e+07
2014-06-18 00:00:00+00:00 Equity(24 [AAPL]) 92.07000 92.310000 2.498375e+07
2014-06-19 00:00:00+00:00 Equity(24 [AAPL]) 92.18000 92.270000 2.919784e+07
2014-06-20 00:00:00+00:00 Equity(24 [AAPL]) 91.84000 92.290000 3.013044e+07
2014-06-23 00:00:00+00:00 Equity(24 [AAPL]) 91.16000 91.850000 5.602332e+07
2014-06-24 00:00:00+00:00 Equity(24 [AAPL]) 90.82000 91.320000 3.241792e+07
2014-06-25 00:00:00+00:00 Equity(24 [AAPL]) 90.28000 90.750000 3.510258e+07
2014-06-26 00:00:00+00:00 Equity(24 [AAPL]) 90.36000 90.210000 3.057503e+07
2014-06-27 00:00:00+00:00 Equity(24 [AAPL]) 90.92000 90.370000 2.915607e+07
2014-06-30 00:00:00+00:00 Equity(24 [AAPL]) 91.97000 90.820000 3.344918e+07
In [20]:
# Big drop on 6-9-2014 when a stock split happened.
# These are the actual values people would have seen on those days.
# Lets see what the plot looks like.
df.close.plot();
In [21]:
# A big 7x drop at the time of the stock split.
# However, one typically wants prices adjusted as of a single common end date (not as each date)
# The above graph makes it falsely appear there was a huge drop in price
# So, rather than pipeline data, use the get_pricing method.
# This adjusts prices and volumes 'as of the end date of the method'
# The following will adjust data as of 7-30-2014
df_adjusted = get_pricing(symbols('AAPL'), start, end)
df_adjusted
Out[21]:
open_price high low close_price volume price
2014-06-02 00:00:00+00:00 90.568 90.692 88.930 89.788 7.254995e+07 89.788
2014-06-03 00:00:00+00:00 89.782 91.250 89.752 91.079 5.684696e+07 91.079
2014-06-04 00:00:00+00:00 91.065 92.558 90.875 92.119 6.274741e+07 92.119
2014-06-05 00:00:00+00:00 92.316 92.769 91.803 92.480 5.489265e+07 92.480
2014-06-06 00:00:00+00:00 92.845 93.039 92.069 92.226 6.736860e+07 92.226
2014-06-09 00:00:00+00:00 92.700 93.880 91.750 93.700 6.438021e+07 93.700
2014-06-10 00:00:00+00:00 94.730 95.050 93.570 94.250 5.447028e+07 94.250
2014-06-11 00:00:00+00:00 94.130 94.760 93.470 93.870 3.942720e+07 93.870
2014-06-12 00:00:00+00:00 94.040 94.120 91.900 92.260 4.659990e+07 92.260
2014-06-13 00:00:00+00:00 92.200 92.440 90.880 91.260 4.751786e+07 91.260
2014-06-16 00:00:00+00:00 91.510 92.750 91.450 92.170 3.102466e+07 92.170
2014-06-17 00:00:00+00:00 92.310 92.700 91.800 92.070 2.498375e+07 92.070
2014-06-18 00:00:00+00:00 92.270 92.290 91.350 92.180 2.919784e+07 92.180
2014-06-19 00:00:00+00:00 92.290 92.300 91.370 91.840 3.013044e+07 91.840
2014-06-20 00:00:00+00:00 91.850 92.550 90.910 91.160 5.602332e+07 91.160
2014-06-23 00:00:00+00:00 91.320 91.620 90.600 90.820 3.241792e+07 90.820
2014-06-24 00:00:00+00:00 90.750 91.740 90.190 90.280 3.510258e+07 90.280
2014-06-25 00:00:00+00:00 90.210 90.700 89.650 90.360 3.057503e+07 90.360
2014-06-26 00:00:00+00:00 90.370 91.050 89.800 90.920 2.915607e+07 90.920
2014-06-27 00:00:00+00:00 90.820 92.000 90.770 91.970 3.344918e+07 91.970
2014-06-30 00:00:00+00:00 92.100 93.725 92.090 92.960 4.033112e+07 92.960
In [22]:
# Let's plot this pricing data
df_adjusted.close_price.plot();

Both pipeline and get_pricing approaches are valid but have different use cases.

In [ ]: