Here are my functions for grabbing 3 trailing years of fundamental data, annualizing IS and CFS 3mth ending numbers (trailing twelve months) from pandas dataframes. I welcome any suggestions on how to speed up/simplify the code.
def _add_4qtrs(stk, idx, asof_dTbl, asof_grp, src_df):
''' annualizes IS and CFS from quarterlies '''
FS = None
for i in idx:
if FS is None: FS = src_df.loc[asof_dTbl[asof_grp[i]]].loc[stk].fillna(0)
else: FS = FS + src_df.loc[asof_dTbl[asof_grp[i]]].loc[stk].fillna(0)
if src_df.columns.equals(CFS_f_df.columns):
# set beg cash to value at the beg of first quarter of TTM instead of summing across 4 qtrs
FS.CFS_beginning_cash_position = \
src_df.loc[asof_dTbl[asof_grp[idx[0]]]].loc[stk].CFS_beginning_cash_position
return FS
def Hist_FS(BS_f_df, IS_f_df, CFS_f_df, nfo_df, TGT_endDate, num = 4):
''' Returns 3 years of trailing quarterly financial statements and annualizes IS & CFS
PARAMETERS---------------------------------------
BS_f_df, IS_f_df, CFS_f_df = balance sheet, income statement, cash flow statement source dataframes.
TGT_endDate = timestamp of pipeline date to retrieve trailing data for.
num = number of historical years to retrieve.
RETURNS---------------------------------------
3 BS/IS/CFS dataframes corresponding to each year; balance sheet BS_3yAgo
represents the beg of yr 3 BS for ratios using BS averages.
'''
# timestamp for start of 4 year slice of data ending in tgt_endDate
TTM_begDate = TGT_endDate - MonthBegin((12*num))
min_qtrs = num * 4 + 1 # min qtrs of data for 3 yrs trailing
# create 3mth sequences of past asof dates for debugging purposes and to test consistency of quaterly
# asof date sequences: sometimes companies change quarter ends.
# for n in range(num):
# nfo_df['asof_'+str(n + 1)+'QSub'] = nfo_df['max_asof'] - MonthEnd(3 * (n + 1))
# initialize empty dataframes with same symbol and column indexes as last rpt'd qtr
BS_0yAgo = pd.DataFrame().reindex_like(BS_f_df.xs(TGT_endDate))
BS_1yAgo = pd.DataFrame().reindex_like(BS_f_df.xs(TGT_endDate))
BS_2yAgo = pd.DataFrame().reindex_like(BS_f_df.xs(TGT_endDate))
BS_3yAgo = pd.DataFrame().reindex_like(BS_f_df.xs(TGT_endDate))
IS_0yAgo_TTM = pd.DataFrame().reindex_like(IS_f_df.xs(TGT_endDate))
IS_1yAgo_TTM = pd.DataFrame().reindex_like(IS_f_df.xs(TGT_endDate))
IS_2yAgo_TTM = pd.DataFrame().reindex_like(IS_f_df.xs(TGT_endDate))
CFS_0yAgo_TTM = pd.DataFrame().reindex_like(CFS_f_df.xs(TGT_endDate))
CFS_1yAgo_TTM = pd.DataFrame().reindex_like(CFS_f_df.xs(TGT_endDate))
CFS_2yAgo_TTM = pd.DataFrame().reindex_like(CFS_f_df.xs(TGT_endDate))
insufficient_data = {} # init dict for logging stks with less than 3 yrs of data
# 3-year of slice of nfo_df data grouped by symbol
stk_grpby = nfo_df[TTM_begDate:TGT_endDate].reset_index()
stk_grpby = stk_grpby.loc[:,('level_0','level_1','max_asof')].groupby('level_1')
for stk in nfo_df.xs(TGT_endDate).index:
# filter out unique asof dates from nfo_df for each stock
asof_grp = pd.unique(stk_grpby.get_group(stk).max_asof)
# asof_grp = np.unique(stk_grpby.get_group(stk).max_asof.values)
asof_dTbl = {} # stores qtr end dates and the pipeline dates when that historic data was most recent
if len(asof_grp) >= min_qtrs: # filter out firms w/ less than 13 qtrs of trailing data
for n in xrange(min_qtrs):
# this loop finds the date when each historic quarterly was the most recent
# available; associated timestamps will be used to annualize IS & CFS 10Qs (3 mth ending)
asof_dateIdx = -(n + 1)
# data_date = last pipeline date when old asof dates were the most recent data available;
# using most recent pipeline dates for each qtr end rpt'g data to reflect updated
# info in amended filings
data_date = stk_grpby.get_group(stk).level_0.loc[\
stk_grpby.get_group(stk).max_asof == \
asof_grp[asof_dateIdx]].max().tz_localize(None)
# asof_grp[asof_dateIdx] = quarter end asof date
asof_dTbl[asof_grp[asof_dateIdx]] = data_date
# position idx of asof dates associated with historic 10Qs in asof_grp
y0_Qidx = [-(i+1) for i in xrange(4)]
y1_Qidx = [-(i+5) for i in xrange(4)]
y2_Qidx = [-(i+9) for i in xrange(4)]
# calc trailing TTM data for BS, IS, CFS over the past 3 years
BS_0yAgo.loc[stk] = BS_f_df.loc[asof_dTbl[asof_grp[-1]]].loc[stk]
BS_1yAgo.loc[stk] = BS_f_df.loc[asof_dTbl[asof_grp[-5]]].loc[stk]
BS_2yAgo.loc[stk] = BS_f_df.loc[asof_dTbl[asof_grp[-9]]].loc[stk]
BS_3yAgo.loc[stk] = BS_f_df.loc[asof_dTbl[asof_grp[-13]]].loc[stk]
IS_0yAgo_TTM.loc[stk] = _add_4qtrs(stk, y0_Qidx, asof_dTbl, asof_grp, IS_f_df)
IS_1yAgo_TTM.loc[stk] = _add_4qtrs(stk, y1_Qidx, asof_dTbl, asof_grp, IS_f_df)
IS_2yAgo_TTM.loc[stk] = _add_4qtrs(stk, y2_Qidx, asof_dTbl, asof_grp, IS_f_df)
CFS_0yAgo_TTM.loc[stk] = _add_4qtrs(stk, y0_Qidx, asof_dTbl, asof_grp, CFS_f_df)
CFS_1yAgo_TTM.loc[stk] = _add_4qtrs(stk, y1_Qidx, asof_dTbl, asof_grp, CFS_f_df)
CFS_2yAgo_TTM.loc[stk] = _add_4qtrs(stk, y2_Qidx, asof_dTbl, asof_grp, CFS_f_df)
else: insufficient_data[stk] = len(asof_grp)
return BS_0yAgo, BS_1yAgo, BS_2yAgo, BS_3yAgo,\
IS_0yAgo_TTM, IS_1yAgo_TTM, IS_2yAgo_TTM,\
CFS_0yAgo_TTM, CFS_1yAgo_TTM, CFS_2yAgo_TTM