TTM Custom Factor Test¶

Check out the speed and results from two separate custom factors each calculating TTM values. The first is a numpy implementation by Doug Baldwin (https://www.quantopian.com/posts/trailing-twelve-months-ttm-with-as-of-date). The second is a pandas implementation.

# First, we need to import the basic pipeline methods
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline

# Also get the built-in filters and/or factors to use
from quantopian.pipeline.filters import QTradableStocksUS, Q500US, StaticAssets

# Finally get any data we want to use
from quantopian.pipeline.data.morningstar import Fundamentals as ms
from quantopian.pipeline.domain import US_EQUITIES

# Import numpy and pandas because they rock
import numpy as np
import pandas as pd

# First the numpy version
class TrailingTwelveMonths(CustomFactor):  
    window_length=315  
    window_safe = True  
    mask=symbols  
 
    def compute(self, today, assets, out, values, dates): 
        
        out[:] = [  
            (v[d + np.timedelta64(52, 'W') > d[-1]])[  
                np.unique(  
                    d[d + np.timedelta64(52, 'W') > d[-1]],  
                    return_index=True  
                )[1]  
            ].sum()  
            for v, d in zip(values.T, dates.T)  
        ]

# Next a version which uses pandas methods
# A little easier to understand but probably less efficient
class TrailingTwelveMonths_Pandas(CustomFactor):
    """
    Gets the sum of the last 4 reported values using pandas
    Uses a window length of ~5 quarters (252/4 x 5) to ensure we have at least 4 quarters
    
    Requires 2 inputs: the datset values and the associated asof_dates

    """
    window_length = 315
    outputs = ['value', 'unique_values']  

    def compute(self, today, assets, out, values, asof_date):
        # Make a single multi-index dataframe with asof_date and values as columns
        # The days are the index level=0 and securities are level=1
        date_df = pd.DataFrame(asof_date).stack().rename('date')
        value_df = pd.DataFrame(values).stack().rename('value')
        df = pd.concat([date_df, value_df], axis=1)
        
        # Add a column with the asset to make drop_duplicates easier
        df['asset'] = df.index.get_level_values(level=1)

        # Drop duplicate dates, keep only the latest value for each date
        df_dropped = df.drop_duplicates(['date', 'asset'], keep='last')
        
        # Take only the latest 4 dates. Then, filter out any groups with less than 4 qtrs of data
        df_top_4 = df_dropped.groupby(level=1).tail(4)
        df_exactly_4 = df_top_4.groupby(level=1).filter(lambda group: group.date.size == 4)

        # Sum the values where there are exactly 4 unique asof_dates to get TTM. Get counts.
        value_sum = df_exactly_4.groupby(level=1).value.sum()
        counts = df_top_4.groupby(level=1).date.count()
        
        # Make a series of nans to hold our output values (will defualt to nan if no TTM value)
        output_sum = pd.Series(np.nan, index=range(len(assets)))
        output_counts = pd.Series(np.nan, index=range(len(assets)))
        
        # Put our TTM values into the series
        output_sum.loc[value_sum.index] = value_sum 
        output_counts.loc[counts.index] = counts
        
        out.value[:] = output_sum
        out.unique_values[:] = output_counts

Let's first run a pipeline using the numpy version and check the time it takes to complete. Use the dividend field to get the TTM dividends.

etrn_filter = StaticAssets(symbols(['ETRN']))

dividends_ttm_np = TrailingTwelveMonths(inputs=[
                ms.dividend_per_share_earnings_reports,
                ms.dividend_per_share_earnings_reports_asof_date
            ],
                                        mask=QTradableStocksUS()
                                       )
pipeline_numpy = Pipeline(
    columns={'dividends_ttm_np': dividends_ttm_np},
    screen = QTradableStocksUS()
    )

start = '2019-10-28'
end = '2019-10-28'
results = run_pipeline(pipeline_numpy, start, end)

results.head()

Second, run a pipeline using the pandas version and check the time it takes to complete. Use the same dividend field to get the TTM dividends.

dividends_ttm_pd = TrailingTwelveMonths_Pandas(inputs=[
                ms.dividend_per_share_earnings_reports,
                ms.dividend_per_share_earnings_reports_asof_date
            ],
                                        mask=QTradableStocksUS()
                                       )
pipeline_numpy = Pipeline(
    columns={'dividends_ttm_pd': dividends_ttm_pd.value,
             'count': dividends_ttm_pd.unique_values},
    screen = QTradableStocksUS()
    )

start = '2019-10-28'
end = '2019-10-28'
results = run_pipeline(pipeline_numpy, start, end)

results.head()

Rather what was expected. The pandas version takes over 50% more time. However, it's still fast. Let's run them in the same dataframe and compare the results.

dividends_ttm_np = TrailingTwelveMonths(inputs=[
                ms.dividend_per_share_earnings_reports,
                ms.dividend_per_share_earnings_reports_asof_date
            ],
                                        mask=QTradableStocksUS()
                                       )
dividends_ttm_pd = TrailingTwelveMonths_Pandas(inputs=[
                ms.dividend_per_share_earnings_reports,
                ms.dividend_per_share_earnings_reports_asof_date
            ],
                                        mask=QTradableStocksUS()
                                       )
pipeline_numpy = Pipeline(
    columns={'dividends_ttm_np': dividends_ttm_np, 
             'dividends_ttm_pd': dividends_ttm_pd.value,
             'count': dividends_ttm_pd.unique_values,
            },
    screen = QTradableStocksUS()
    )

start = '2019-10-28'
end = '2019-10-28'
results = run_pipeline(pipeline_numpy, start, end)

results.head()

# Let's see if there are any differences
diffs = results.query('(dividends_ttm_np != dividends_ttm_pd)')
diffs

# Looks like there are places where there are no dividends reported and those with less than 4 qtrs
# Use trick that nan != nan
diffs = results.query('(count != 4) and (count == count)')
diffs

# Check if there are any places where there is a difference and not nan and not less than 4 qtrs
diffs = results.query('(count == 4) and (count != count) and (dividends_ttm_np != dividends_ttm_pd)')
diffs

It appears the two versions give the same results if there are 4 or more qtrs of data. The numpy version outputs 0 for securities without any quarters of data while the pandas version outputs nan.

However, there is an issue with the numpy version. It returns a value even if there are less than 4 qtrs of data. It will just sum what it has. This probably could be fixed.

So, the numpy version is concise and fast (and has a little bug) while the pandas version is a little more verbose and slower. The pandas version may be optimized a bit but will probably still be slower than numpy.

		dividends_ttm_np
2019-10-28 00:00:00+00:00	Equity(2 [HWM])	0.200
	Equity(24 [AAPL])	2.960
	Equity(41 [ARCB])	0.320
	Equity(52 [ABM])	0.715
	Equity(53 [ABMD])	0.000

		count	dividends_ttm_pd
2019-10-28 00:00:00+00:00	Equity(2 [HWM])	4.0	0.200
	Equity(24 [AAPL])	4.0	2.960
	Equity(41 [ARCB])	4.0	0.320
	Equity(52 [ABM])	4.0	0.715
	Equity(53 [ABMD])	NaN	NaN

		count	dividends_ttm_np	dividends_ttm_pd
2019-10-28 00:00:00+00:00	Equity(2 [HWM])	4.0	0.200	0.200
	Equity(24 [AAPL])	4.0	2.960	2.960
	Equity(41 [ARCB])	4.0	0.320	0.320
	Equity(52 [ABM])	4.0	0.715	0.715
	Equity(53 [ABMD])	NaN	0.000	NaN

		count	dividends_ttm_np	dividends_ttm_pd
2019-10-28 00:00:00+00:00	Equity(53 [ABMD])	NaN	0.00	NaN
	Equity(301 [ALKS])	NaN	0.00	NaN
	Equity(351 [AMD])	NaN	0.00	NaN
	Equity(371 [TVTY])	NaN	0.00	NaN
	Equity(410 [AN])	NaN	0.00	NaN
	Equity(557 [ASGN])	NaN	0.00	NaN
	Equity(659 [AMAG])	NaN	0.00	NaN
	Equity(693 [AZO])	NaN	0.00	NaN
	Equity(766 [BCE])	NaN	0.00	NaN
	Equity(879 [BIO])	NaN	0.00	NaN
	Equity(935 [CNR])	NaN	0.00	NaN
	Equity(1010 [BNS])	NaN	0.00	NaN
	Equity(1131 [BSX])	NaN	0.00	NaN
	Equity(1216 [CACC])	NaN	0.00	NaN
	Equity(1218 [CACI])	NaN	0.00	NaN
	Equity(1332 [CCEP])	1.0	0.30	NaN
	Equity(1385 [CDNS])	NaN	0.00	NaN
	Equity(1406 [CELG])	NaN	0.00	NaN
	Equity(1419 [CERN])	1.0	0.18	NaN
	Equity(1597 [CLH])	NaN	0.00	NaN
	Equity(1751 [COHR])	NaN	0.00	NaN
	Equity(1792 [CP])	NaN	0.00	NaN
	Equity(1882 [CRUS])	NaN	0.00	NaN
	Equity(1884 [CRVL])	NaN	0.00	NaN
	Equity(2185 [DIOD])	NaN	0.00	NaN
	Equity(2385 [DY])	NaN	0.00	NaN
	Equity(2602 [EA])	NaN	0.00	NaN
	Equity(2853 [FISV])	NaN	0.00	NaN
	Equity(3084 [GAB])	NaN	0.00	NaN
	Equity(3131 [GCO])	NaN	0.00	NaN
	...	...	...	...
	Equity(52291 [SONO])	NaN	0.00	NaN
	Equity(52407 [FTDR])	NaN	0.00	NaN
	Equity(52408 [GTX])	NaN	0.00	NaN
	Equity(52424 [ELAN])	NaN	0.00	NaN
	Equity(52425 [FTCH])	NaN	0.00	NaN
	Equity(52427 [EB])	NaN	0.00	NaN
	Equity(52446 [UPWK])	NaN	0.00	NaN
	Equity(52450 [SVMK])	NaN	0.00	NaN
	Equity(52491 [GH])	NaN	0.00	NaN
	Equity(52494 [ESTC])	NaN	0.00	NaN
	Equity(52508 [ALLO])	NaN	0.00	NaN
	Equity(52511 [LTHM])	NaN	0.00	NaN
	Equity(52517 [PLAN])	NaN	0.00	NaN
	Equity(52525 [REZI])	NaN	0.00	NaN
	Equity(52529 [ACA])	2.0	0.10	NaN
	Equity(52537 [STNE])	NaN	0.00	NaN
	Equity(52548 [TWST])	NaN	0.00	NaN
	Equity(52553 [SWI])	NaN	0.00	NaN
	Equity(52567 [AXNX])	NaN	0.00	NaN
	Equity(52571 [YETI])	NaN	0.00	NaN
	Equity(52586 [CCC])	NaN	0.00	NaN
	Equity(52594 [ETRN])	2.0	0.86	NaN
	Equity(52603 [APHA])	NaN	0.00	NaN
	Equity(52629 [CTRA])	NaN	0.00	NaN
	Equity(52747 [DELL])	NaN	0.00	NaN
	Equity(52751 [MRNA])	NaN	0.00	NaN
	Equity(52812 [HEXO])	NaN	0.00	NaN
	Equity(52838 [CVET])	NaN	0.00	NaN
	Equity(52856 [ALEC])	NaN	0.00	NaN
	Equity(52864 [GOSS])	NaN	0.00	NaN

		count	dividends_ttm_np	dividends_ttm_pd
2019-10-28 00:00:00+00:00	Equity(1332 [CCEP])	1.0	0.300000	NaN
	Equity(1419 [CERN])	1.0	0.180000	NaN
	Equity(4265 [KEM])	3.0	0.150000	NaN
	Equity(5626 [OI])	2.0	0.100000	NaN
	Equity(6258 [PVAC])	1.0	0.000000	NaN
	Equity(6269 [PWR])	3.0	0.120000	NaN
	Equity(10533 [SU])	1.0	0.216886	NaN
	Equity(13612 [BAP])	1.0	0.000000	NaN
	Equity(14014 [CTXS])	3.0	1.050000	NaN
	Equity(23966 [PPBI])	2.0	0.440000	NaN
	Equity(24819 [EBAY])	2.0	0.280000	NaN
	Equity(25467 [TCBI])	1.0	0.406250	NaN
	Equity(26660 [KYN])	2.0	0.360000	NaN
	Equity(27027 [PBH])	1.0	0.000000	NaN
	Equity(32690 [AWI])	3.0	0.525000	NaN
	Equity(33698 [TMUS])	1.0	0.687500	NaN
	Equity(34440 [CXO])	2.0	0.250000	NaN
	Equity(38817 [VRSK])	2.0	0.500000	NaN
	Equity(41416 [KOS])	2.0	0.090400	NaN
	Equity(45670 [OMF])	2.0	0.500000	NaN
	Equity(47875 [VBTX])	2.0	0.250000	NaN
	Equity(50112 [FG])	2.0	0.020000	NaN
	Equity(50860 [VST])	2.0	0.250000	NaN
	Equity(50870 [SOI])	3.0	0.300000	NaN
	Equity(52529 [ACA])	2.0	0.100000	NaN
	Equity(52594 [ETRN])	2.0	0.860000	NaN