Notebook
In [1]:
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import USEquityPricing
from quantopian.pipeline.experimental import QTradableStocksUS
from quantopian.research import run_pipeline
from quantopian.pipeline.data import Fundamentals
In [15]:
def make_pipeline():
    base_universe = QTradableStocksUS()
    close_price = USEquityPricing.close.latest
    
    #Profitability Ratios
    earning_yield = Fundamentals.earning_yield.latest
    return_on_equity = Fundamentals.roe.latest
    return_on_assets = Fundamentals.roa.latest
    net_profit = Fundamentals.net_margin.latest
    
    #Liquidity Ratios
    quick_ratio = Fundamentals.quick_ratio.latest
    current_ratio = Fundamentals.current_ratio.latest
    debt_to_equity_ratio = (Fundamentals.long_term_debt_equity_ratio.latest)**-1
    interest_coverage = Fundamentals.interest_coverage.latest
    
    #Efficiency Ratios
    assets_turnover = Fundamentals.assets_turnover.latest
    change_in_working_capital = Fundamentals.change_in_working_capital.latest
    
    #Investment Ratios
    book_value_yield = Fundamentals.book_value_yield.latest
    fcf_yield = Fundamentals.fcf_yield.latest
    trailing_dividend_yield = Fundamentals.trailing_dividend_yield.latest
    total_yield = Fundamentals.total_yield.latest
    
    #Winsorising
    earning_yield_winsorized = earning_yield.winsorize(min_percentile=0.01, max_percentile=0.99)
    return_on_equity_winsorized = return_on_equity.winsorize(min_percentile=0.01, max_percentile=0.99)
    return_on_assets_winsorized = return_on_assets.winsorize(min_percentile=0.01, max_percentile=0.99)
    net_profit_winsorized = net_profit.winsorize(min_percentile=0.01, max_percentile=0.99)
    quick_ratio_winsorized = quick_ratio.winsorize(min_percentile=0.01, max_percentile=0.99)
    current_ratio_winsorized = current_ratio.winsorize(min_percentile=0.01, max_percentile=0.99)
    debt_to_equity_ratio_winsorized = debt_to_equity_ratio.winsorize(min_percentile=0.01, max_percentile=0.99)
    interest_coverage_winsorized = interest_coverage.winsorize(min_percentile=0.01, max_percentile=0.99)
    assets_turnover_winsorized = assets_turnover.winsorize(min_percentile=0.01, max_percentile=0.99)
    change_in_working_capital_winsorized = change_in_working_capital.winsorize(min_percentile=0.01, max_percentile=0.99)
    book_value_yield_winsorized = book_value_yield.winsorize(min_percentile=0.01, max_percentile=0.99)
    fcf_yield_winsorized = fcf_yield.winsorize(min_percentile=0.01, max_percentile=0.99)
    trailing_dividend_yield_winsorized = trailing_dividend_yield.winsorize(min_percentile=0.01, max_percentile=0.99)
    total_yield_winsorized = total_yield.winsorize(min_percentile=0.01, max_percentile=0.99)
    
    #Creating a combined factor


    return Pipeline(
    columns = {
        'close price': close_price,
        'earning_yield': earning_yield_winsorized.zscore(),
        'return_on_equity': return_on_equity_winsorized.zscore(),
        'return_on_assets': return_on_assets_winsorized.zscore(),
        'net_profit': net_profit_winsorized.zscore(),
        'quick_ratio': quick_ratio_winsorized.zscore(),
        'current_ratio': current_ratio_winsorized.zscore(),
        'debt_to_equity_ratio': debt_to_equity_ratio_winsorized.zscore(),
        'interest_coverage': interest_coverage_winsorized.zscore(),
        'assets_turnover': assets_turnover_winsorized.zscore(),
        'change_in_working_capital': change_in_working_capital_winsorized.zscore(),
        'book_value_yield': book_value_yield_winsorized.zscore(),
        'fcf_yield': fcf_yield_winsorized.zscore(),
        'trailing_dividend_yield': trailing_dividend_yield_winsorized.zscore(),
        'total_yield': total_yield_winsorized.zscore(),
    },
    screen = (base_universe),)
    
In [16]:
period_start = '2009-01-01'
period_end = '2014-01-01'

pipeline_output = run_pipeline(
    make_pipeline(),
    start_date = period_start,
    end_date = period_end)
In [17]:
pipeline_output.info()
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 2299567 entries, (2009-01-02 00:00:00+00:00, Equity(2 [ARNC])) to (2014-01-02 00:00:00+00:00, Equity(44510 [FWM]))
Data columns (total 15 columns):
assets_turnover              float64
book_value_yield             float64
change_in_working_capital    float64
close price                  float64
current_ratio                float64
debt_to_equity_ratio         float64
earning_yield                float64
fcf_yield                    float64
interest_coverage            float64
net_profit                   float64
quick_ratio                  float64
return_on_assets             float64
return_on_equity             float64
total_yield                  float64
trailing_dividend_yield      float64
dtypes: float64(15)
memory usage: 280.7+ MB
In [19]:
pipeline_output.dropna(inplace=True)
In [22]:
pipeline_output.info()
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 939317 entries, (2009-01-02 00:00:00+00:00, Equity(41 [ARCB])) to (2014-01-02 00:00:00+00:00, Equity(44508 [CST]))
Data columns (total 15 columns):
assets_turnover              939317 non-null float64
book_value_yield             939317 non-null float64
change_in_working_capital    939317 non-null float64
close price                  939317 non-null float64
current_ratio                939317 non-null float64
debt_to_equity_ratio         939317 non-null float64
earning_yield                939317 non-null float64
fcf_yield                    939317 non-null float64
interest_coverage            939317 non-null float64
net_profit                   939317 non-null float64
quick_ratio                  939317 non-null float64
return_on_assets             939317 non-null float64
return_on_equity             939317 non-null float64
total_yield                  939317 non-null float64
trailing_dividend_yield      939317 non-null float64
dtypes: float64(15)
memory usage: 114.7+ MB
In [23]:
pipeline_output['30d_future_close'] = pipeline_output['close price'].shift(-30)
pipeline_output['30d_close_future_pct'] = pipeline_output['30d_future_close'].pct_change(30)
In [24]:
pipeline_output.dropna(inplace=True)
In [25]:
pipeline_output.head()
Out[25]:
assets_turnover book_value_yield change_in_working_capital close price current_ratio debt_to_equity_ratio earning_yield fcf_yield interest_coverage net_profit quick_ratio return_on_assets return_on_equity total_yield trailing_dividend_yield 30d_future_close 30d_close_future_pct
2009-01-02 00:00:00+00:00 Equity(3490 [HCP]) -0.906487 -0.299722 -0.073550 27.78 0.255053 -0.181476 0.290604 0.279296 -0.127925 0.217826 0.462359 0.285918 0.223411 -0.122917 -0.150549 26.59 -0.042837
Equity(3519 [HEI]) 0.187917 -0.471460 -0.105106 38.82 0.029198 -0.176972 0.298925 0.282467 0.264632 0.176252 -0.325887 0.433042 0.238756 -0.639812 -0.462101 47.86 0.232870
Equity(3596 [HMA]) 0.172728 -0.469821 -0.365603 1.82 -0.240964 -0.181870 0.506210 0.650853 -0.132968 0.166823 -0.172285 0.165487 0.226685 -0.661178 -0.431391 53.60 28.450549
Equity(3668 [HRL]) 1.666380 -0.428789 -0.157392 31.03 -0.232483 -0.178946 0.308525 0.259000 -0.052672 0.170366 -0.443837 0.412780 0.240023 -0.329182 -0.357094 18.06 -0.417983
Equity(3676 [HRS]) 0.511444 -0.465372 -0.349073 38.04 -0.285473 -0.180500 0.305599 0.306674 -0.036738 0.176215 -0.315335 0.508437 0.276552 -0.184550 -0.383841 24.66 -0.351735
In [26]:
pipeline_output.info()
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 939257 entries, (2009-01-02 00:00:00+00:00, Equity(3490 [HCP])) to (2014-01-02 00:00:00+00:00, Equity(41242 [ARCO]))
Data columns (total 17 columns):
assets_turnover              939257 non-null float64
book_value_yield             939257 non-null float64
change_in_working_capital    939257 non-null float64
close price                  939257 non-null float64
current_ratio                939257 non-null float64
debt_to_equity_ratio         939257 non-null float64
earning_yield                939257 non-null float64
fcf_yield                    939257 non-null float64
interest_coverage            939257 non-null float64
net_profit                   939257 non-null float64
quick_ratio                  939257 non-null float64
return_on_assets             939257 non-null float64
return_on_equity             939257 non-null float64
total_yield                  939257 non-null float64
trailing_dividend_yield      939257 non-null float64
30d_future_close             939257 non-null float64
30d_close_future_pct         939257 non-null float64
dtypes: float64(17)
memory usage: 129.0+ MB
In [70]:
# Import Lasso
from sklearn.linear_model import Lasso
import matplotlib.pyplot as plt

# Instantiate a lasso regressor: lasso
lasso = Lasso(alpha=0.4,normalize=True)

X = pipeline_output['30d_close_future_pct']
y = pipeline_output[pipeline_output.columns[~pipeline_output.columns.isin(['30d_future_close','30d_close_future_pct','close price'])]]
y = y.values
X = X.values
In [77]:
lasso.fit(X,y)

lasso_coef = lasso.coef_

ValueErrorTraceback (most recent call last)
<ipython-input-77-75f11c6f87ae> in <module>()
----> 1 lasso.fit(X,y)
      2 
      3 lasso_coef = lasso.coef_

/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/coordinate_descent.pyc in fit(self, X, y)
    628         X, y = check_X_y(X, y, accept_sparse='csc', dtype=np.float64,
    629                          order='F', copy=self.copy_X and self.fit_intercept,
--> 630                          multi_output=True, y_numeric=True)
    631 
    632         X, y, X_mean, y_mean, X_std, precompute, Xy = \

/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric)
    452         y = y.astype(np.float64)
    453 
--> 454     check_consistent_length(X, y)
    455 
    456     return X, y

/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in check_consistent_length(*arrays)
    172     if len(uniques) > 1:
    173         raise ValueError("Found arrays with inconsistent numbers of samples: "
--> 174                          "%s" % str(uniques))
    175 
    176 

ValueError: Found arrays with inconsistent numbers of samples: [     1 939257]
In [ ]:
 
In [ ]: