Notebook
In [1]:
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data import morningstar
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Returns
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.factors import CustomFactor, AverageDollarVolume, RSI
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline.data import USEquityPricing
from quantopian.pipeline.experimental import QTradableStocksUS
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.data.psychsignal import aggregated_twitter_withretweets_stocktwits
from quantopian.research import symbols
from quantopian.pipeline.data.zacks import EarningsSurprises
from collections import OrderedDict

import alphalens as al
from quantopian.research import prices
import pandas as pd
import numpy as np
import talib
import matplotlib.pyplot as plt
import matplotlib as mpl
from math import isnan
import math
from scipy import stats
from statsmodels.tsa.stattools import adfuller
from time import time
from sklearn import linear_model, decomposition, ensemble, preprocessing, isotonic, metrics
In [2]:
class make_pipeline():
    def make_factors(self):
        def total_revenue():
            return Fundamentals.total_revenue.latest
        def operating_income():
            return Fundamentals.operating_income.latest
        def selling_and_marketing_expense():
            return Fundamentals.selling_and_marketing_expense.latest
        def research_and_development():
            return Fundamentals.research_and_development.latest
        def interest_expense():
            return Fundamentals.interest_expense.latest
        def tax_provision():
            return Fundamentals.tax_provision.latest
        def normalized_income():
            return Fundamentals.normalized_income.latest
        def accounts_receivable():
            return Fundamentals.accounts_receivable.latest
        def allowance_for_doubtful_accounts_receivable():
            return Fundamentals.allowance_for_doubtful_accounts_receivable.latest
        def inventory():
            return Fundamentals.inventory.latest
        def inventories_adjustments_allowances():
            return Fundamentals.inventories_adjustments_allowances.latest
        def prepaid_assets():
            return Fundamentals.prepaid_assets.latest 
        def other_current_assets():
            return Fundamentals.other_current_assets.latest
        def net_ppe():
            return Fundamentals.net_ppe.latest        
        def goodwill_and_other_intangible_assets():
            return Fundamentals.goodwill_and_other_intangible_assets.latest          
        def deferred_tax():
            return Fundamentals.deferred_tax.latest  
        def accounts_payable():
            return Fundamentals.accounts_payable.latest         
        def accrued_liabilities_total():
            return Fundamentals.accrued_liabilities_total.latest         
        def income_tax_payable():
            return Fundamentals.income_tax_payable.latest         
        def other_liabilities():
            return Fundamentals.other_liabilities.latest 
        def current_assets():
            return Fundamentals.current_assets.latest
        def total_non_current_assets():
            return Fundamentals.total_non_current_assets.latest
        def current_liabilities():
            return Fundamentals.current_liabilities.latest
        def total_non_current_liabilities_net_minority_interest():
            return Fundamentals.total_non_current_liabilities_net_minority_interest.latest
        def stockholders_equity():
            return Fundamentals.stockholders_equity.latest
        def enterprise_value():
            return Fundamentals.enterprise_value.latest
        def shares_outstanding(): 
            return Fundamentals.shares_outstanding.latest

        self.factors={
                'total_revenue':total_revenue,    
                'selling_and_marketing_expense':selling_and_marketing_expense,
                'research_and_development':research_and_development,
                'operating_income':operating_income,
                'interest_expense':interest_expense,
                'tax_provision':tax_provision,
                'normalized_income':normalized_income,
                'accounts_receivable':accounts_receivable,    
                'allowance_for_doubtful_accounts_receivable':allowance_for_doubtful_accounts_receivable,
                'inventory':inventory,
                'inventories_adjustments_allowances':inventories_adjustments_allowances,
                'prepaid_assets':prepaid_assets,
                'other_current_assets':other_current_assets,
                'net_ppe':net_ppe,    
                'goodwill_and_other_intangible_assets':goodwill_and_other_intangible_assets,
                'deferred_tax':deferred_tax,
                'accounts_payable':accounts_payable,
                'accrued_liabilities_total':accrued_liabilities_total,
                'income_tax_payable':income_tax_payable,
                'other_liabilities':other_liabilities,
                'current_assets':current_assets,
                'total_non_current_assets':total_non_current_assets,
                'current_liabilities':current_liabilities,
                'total_non_current_liabilities_net_minority_interest':total_non_current_liabilities_net_minority_interest,
                'stockholders_equity':stockholders_equity,
                'enterprise_value':enterprise_value,
                'shares_outstanding':shares_outstanding,
            }
    
    def make_ml_pipeline(self):
        factors_pipe = OrderedDict()
        for name, f in self.factors.iteritems():
            factors_pipe[name] = f()
        self.ml_pipe = Pipeline(
            screen=(self.universe), 
            columns=factors_pipe)
    
    def run_pipeline(self):
        '''Runs the pipeline engine'''
        start_timer = time()
        self.pipeline_output= run_pipeline(self.ml_pipe, start_date=self.start, end_date=self.end)
        end_timer = time()
        self.pipeline_output
In [3]:
class Dataload(make_pipeline):
    '''Transformations that happen outside pipeline and are relevant for fitting'''    
    
    def __init__(self,universe,start,end):
        self.universe=universe
        self.start=start
        self.end=end
        
    def execute_transform_pipeline(self):
        '''Adjustments to pipeline output, like changing timezone information and renaming indices'''
        self.make_factors()
        self.make_ml_pipeline()
        self.run_pipeline()
In [6]:
dataload=Dataload((QTradableStocksUS()),pd.Timestamp("2002-12-31"),pd.Timestamp("2018-12-31"))
In [7]:
dataload.execute_transform_pipeline()

Pipeline Execution Time: 2 Minutes, 22.12 Seconds
In [12]:
df=dataload.pipeline_output
In [13]:
dfnew=pd.DataFrame()
In [14]:
dfnew["null_records"]=df.isnull().sum()
dfnew["notnull_records"]=df.notnull().sum()
dfnew["total_records"]=df.isnull().sum()+df.notnull().sum()
dfnew["null_records/total_records"]=dfnew["null_records"]/dfnew["total_records"]
In [15]:
dfnew.sort_values(by="null_records/total_records",ascending=False)
Out[15]:
null_records notnull_records total_records null_records/total_records
accrued_liabilities_total 7746361 3896 7750257 0.999497
allowance_for_doubtful_accounts_receivable 7503318 246939 7750257 0.968138
other_liabilities 6907826 842431 7750257 0.891303
inventories_adjustments_allowances 6608211 1142046 7750257 0.852644
selling_and_marketing_expense 6168626 1581631 7750257 0.795925
research_and_development 5154773 2595484 7750257 0.665110
income_tax_payable 4957454 2792803 7750257 0.639650
total_non_current_liabilities_net_minority_interest 3545291 4204966 7750257 0.457442
prepaid_assets 2383936 5366321 7750257 0.307594
inventory 2353582 5396675 7750257 0.303678
deferred_tax 1583594 6166663 7750257 0.204328
other_current_assets 1471091 6279166 7750257 0.189812
accounts_payable 1062378 6687879 7750257 0.137076
current_liabilities 1061565 6688692 7750257 0.136972
total_non_current_assets 1059922 6690335 7750257 0.136760
current_assets 1059922 6690335 7750257 0.136760
goodwill_and_other_intangible_assets 1058822 6691435 7750257 0.136618
accounts_receivable 938188 6812069 7750257 0.121053
interest_expense 536250 7214007 7750257 0.069191
tax_provision 473369 7276888 7750257 0.061078
shares_outstanding 416122 7334135 7750257 0.053691
net_ppe 321126 7429131 7750257 0.041434
operating_income 182578 7567679 7750257 0.023558
total_revenue 152698 7597559 7750257 0.019702
normalized_income 149086 7601171 7750257 0.019236
stockholders_equity 145918 7604339 7750257 0.018828
enterprise_value 3196 7747061 7750257 0.000412