In a 2008 paper called "In Search of Distress Risk", John Campbell, Jens Hilscher, and Jan Szilagyi comprehensively explore the determinants of corporate failure.
The logit model generates a binary dependent variable or logit value, “logit probability of financial distress” or LPFD, calculated as follows:
LPFD = −20.26 × NIMTAAVG +1.42 × TLMTA −7.13 × EXRETAVG +1.41 × SIGMA −0.045 × RSIZE −2.13 × CASHMTA +0.075 × MB −0.058 × PRICE −9.16
The paper was updated on Januany, 2010 with new weights.
Link to the Paper: http://scholar.harvard.edu/campbell/publications/search-distress-risk
OLD: https://www.quantopian.com/posts/campbell-hilscher-szilagyi-chs-model-probability-of-corporate-failure NEW: https://www.quantopian.com/posts/campbell-hilscher-szilagyi-chs-model-probability-of-corporate-failure-update-version¶ Share
# Initialisation
import pandas as pd
import numpy as np
import datetime
from collections import OrderedDict
fundamentals = init_fundamentals()
from zipline.utils.tradingcalendar import get_trading_days
from datetime import date, datetime, timedelta
def three_month_ago(t):
m = t.month - 3
y = t.year
if m < 1:
m += 12
y -= 1
# return the first day of the month that's a trading day
s = datetime(y,m,1)
e = s + timedelta(days=7)
return date(get_trading_days(s, e)[0].year,
get_trading_days(s, e)[0].month,
get_trading_days(s, e)[0].day)
#lag = 0
#today = datetime.now()
#t0 = date(today.year - lag, today.month, 1)
t0 = date(2015, 9, 1) # this is the ending date
t1 = three_month_ago(t0)
t2 = three_month_ago(t1)
t3 = three_month_ago(t2)
t4 = three_month_ago(t3)
print t0, t1, t2, t3, t4
def fund_df(t):
return get_fundamentals(
query(
fundamentals.valuation.market_cap,
fundamentals.valuation.shares_outstanding,
fundamentals.balance_sheet.cash_and_cash_equivalents,
fundamentals.balance_sheet.stockholders_equity,
fundamentals.balance_sheet.total_assets,
fundamentals.balance_sheet.total_debt,
fundamentals.income_statement.net_income
)
# No Financials (103), Real Estate (104), Utilities (207) and ADR
.filter(fundamentals.company_reference.industry_template_code != 'B')
.filter(fundamentals.company_reference.industry_template_code != 'I')
.filter(fundamentals.company_reference.industry_template_code != 'F')
.filter(fundamentals.asset_classification.morningstar_sector_code != 103)
.filter(fundamentals.asset_classification.morningstar_sector_code != 104)
.filter(fundamentals.asset_classification.morningstar_sector_code != 207)
.filter(fundamentals.share_class_reference.is_depositary_receipt == False)
.filter(fundamentals.share_class_reference.is_primary_share == True)
# Only pick active common stocks
.filter(fundamentals.share_class_reference.share_class_status == "A")
.filter(fundamentals.share_class_reference.security_type == "ST00000001")
# Exclude When Distributed(WD), When Issued(WI) and VJ - usuallly companies in bankruptcy
.filter(~fundamentals.share_class_reference.symbol.like('%\_WI'))
.filter(~fundamentals.share_class_reference.symbol.like('%\_WD'))
.filter(~fundamentals.share_class_reference.symbol.like('%\_VJ'))
# Exclude Halted stocks
.filter(~fundamentals.share_class_reference.symbol.like('%\_V'))
.filter(~fundamentals.share_class_reference.symbol.like('%\_H'))
# Only NYSE, AMEX and Nasdaq
.filter(fundamentals.company_reference.primary_exchange_id.in_(['NYSE', 'NAS', 'AMEX']))
# Sanity check
# TODO better None or > 0 ?
.filter(fundamentals.valuation.market_cap > 0)
.filter(fundamentals.valuation.shares_outstanding > 0)
.filter(fundamentals.balance_sheet.invested_capital > 0)
.filter(fundamentals.balance_sheet.cash_and_cash_equivalents > 0)
.filter(fundamentals.balance_sheet.current_assets > 0)
#.filter(fundamentals.balance_sheet.current_assets is not None)
.filter(fundamentals.balance_sheet.total_assets > 0)
#.filter(fundamentals.balance_sheet.total_assets is not None)
.filter(fundamentals.cash_flow_statement.free_cash_flow is not None)
.filter(fundamentals.valuation.enterprise_value > 0),
t)
fund_df0 = fund_df(t0)
fund_df1 = fund_df(t1)
fund_df2 = fund_df(t2)
fund_df3 = fund_df(t3)
fund_df4 = fund_df(t4)
qtr0 = "%d-%02d-%02d" % (t0.year, t0.month, t0.day)
qtr1 = "%d-%02d-%02d" % (t1.year, t1.month, t1.day)
qtr2 = "%d-%02d-%02d" % (t2.year, t2.month, t2.day)
qtr3 = "%d-%02d-%02d" % (t3.year, t3.month, t3.day)
qtr4 = "%d-%02d-%02d" % (t4.year, t4.month, t4.day)
fundamental_dict = OrderedDict()
fundamental_dict[qtr4] = fund_df4
fundamental_dict[qtr3] = fund_df3
fundamental_dict[qtr2] = fund_df2
fundamental_dict[qtr1] = fund_df1
fundamental_dict[qtr0] = fund_df0
fundamental_data = pd.Panel(fundamental_dict)
items = fundamental_data.items
fundamental_data.minor_axis
symbols = fundamental_data.minor_axis
price_history = get_pricing(symbols, fields='close_price', start_date=t1, end_date=t0)
price_history.head()
prices_sp_all = get_pricing('SPY', fields='close_price', start_date=t4, end_date=t0)
prices_sp_index = pd.date_range(prices_sp_all.index[0], prices_sp_all.index[-1])
prices_sp_all = prices_sp_all.reindex(prices_sp_index, method='ffill')
dates = []
#items = fundamental_data.items
#for date_str in items[-5:]:
# date_str = date_str + "-01"
# dates.append(pd.Timestamp(date_str))
dates.append(pd.Timestamp(t4))
dates.append(pd.Timestamp(t3))
dates.append(pd.Timestamp(t2))
dates.append(pd.Timestamp(t1))
dates.append(pd.Timestamp(t0))
prices_sp = 10.0*prices_sp_all.loc[dates]
print prices_sp
returns_sp = ((prices_sp - prices_sp.shift(1)) / prices_sp.shift(1))[1:]
returns_sp
mta = fundamental_data[-4:].loc[:,'total_debt'] + fundamental_data[-4:].loc[:,'market_cap']
nimta = fundamental_data[-4:].loc[:,'net_income'] / mta
tlmta = fundamental_data[-4:].loc[:,'total_debt'] / mta
cashmta = fundamental_data[-4:].loc[:,'cash_and_cash_equivalents'] / mta
print nimta.head()
print tlmta.head()
print cashmta.head()
prices = fundamental_data[-5:].loc[:,'market_cap'] / fundamental_data[-5:].loc[:,'shares_outstanding']
prices.head()
#Equity(24 [AAPL]) 643.86 100.750000 112.5200 126.37 126.750000
# (100.750000 - 643.86) / 643.86
returns = ((prices - prices.shift(1, axis=1)) / prices.shift(1, axis=1)).iloc[:,1:]
returns.head()
exret = (np.log(returns.add(1))).sub(np.log(returns_sp.add(1)))
exret.head()
returns_daily = ((price_history - price_history.shift(1, axis=0)) / price_history.shift(1, axis=0)).iloc[1:]
n = len(returns_daily)
sigma = returns_daily.sub(returns_daily.mean()).pow(2).sum().multiply(252.0/(n-1.0)).pow(0.5)
sigma.describe()
sp500_divisor = 9350070273.0
sp500_market_value = sp500_divisor * prices_sp[-1] * 10.0
rsize = np.log(fund_df0.loc['market_cap'] / sp500_market_value)
rsize.head()
adjusted_book_value= fund_df0.loc['total_assets'] + 0.1*(fund_df0.loc['market_cap'] - fund_df0.loc['total_assets'])
#adjusted_book_value
mb = mta.div(adjusted_book_value, axis=0)
mb.head()
capped_prices = prices.iloc[:,-1]
capped_prices[capped_prices > 15] = 15
log_prices = np.log(capped_prices)
log_prices.head()
nimtaavg = 0.5333*nimta.iloc[:,-1] + 0.2666*nimta.iloc[:,-2] + 0.1333*nimta.iloc[:,-3] + 0.0666*nimta.iloc[:,-4]
nimtaavg.head()
exretavg = 0.5333*exret.iloc[:,-1] + 0.2666*exret.iloc[:,-2] + 0.1333*exret.iloc[:,-3] + 0.0666*exret.iloc[:,-4]
exretavg.head()
# Descriptive Statistics
nimta_stats = nimta.iloc[:,-1].describe()
nimta_stats.name = 'NIMTA'
tlmta_stats = tlmta.iloc[:,-1].describe()
tlmta_stats.name = 'TLMTA'
exret_stats = exret.iloc[:,-1].describe()
exret_stats.name = 'EXRET'
rsize_stats = rsize.describe()
rsize_stats.name = 'RSIZE'
sigma_stats = sigma.describe()
sigma_stats.name = 'SIGMA'
cashmta_stats = cashmta.iloc[:,-1].describe()
cashmta_stats.name = 'CASHMTA'
mb_stats = mb.iloc[:,-1].describe()
mb_stats.name = 'MB'
log_prices_stats = log_prices.describe()
log_prices_stats.name = 'PRICE'
pd.concat([nimta_stats, tlmta_stats, exret_stats, rsize_stats, sigma_stats, cashmta_stats, mb_stats, log_prices_stats], axis=1)
# Summary Statistics as reported in the Paper (Table II)
# NIMTA TLMTA EXRET RSIZE SIGMA CASHMTA MB PRICE
# Mean 0.000 0.445 -0.011 -10.456 0.562 0.084 2.041 2.019
# Median 0.006 0.427 -0.009 -10.570 0.471 0.045 1.557 2.474
lpfd = -20.12*nimtaavg +1.60*tlmta.iloc[:,-1] -7.88*exretavg +1.55*sigma -0.005*rsize -2.27*cashmta.iloc[:,-1] + 0.07*mb.iloc[:,-1] - 0.09*log_prices - 8.87
nimtaavg
lpfd.head()
# PFD (Probability of Financial Distress). The probability of financial distress ranges between zero and 100 percent.
# Zero implies no probability of financial distress in the next 12 months, while 100 percent suggests certain financial distress.
pfd = 1.0 / (1.0 + np.exp(-lpfd))
distressed_companies = pfd[pfd > 0.50].order(ascending=False)
for equity in distressed_companies.index:
work = "%-5s %-40s %.2f%%" % (equity.symbol, equity.asset_name, 100.0*distressed_companies[equity])
print(work)