This analysis loads the country-level COVID-19 confirmed cases and deaths sourced by ECDC and Our World in Data. We then load in FactSet's Geographic Revenue Exposure data which tells us how much revenue companies are making from different regions like Europe. From this data we build regional long/short factors over the QTU and see how the returns of these factors line up with COVID-19 infection numbers in the corresponding regions.
This should just serve as an example of the type of analysis we are looking for here, by no means should you feel limited to GeoRev.
A couple of notes on what a good analysis looks like:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import empyrical as ep
import alphalens as al
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.factset import GeoRev
from quantopian.pipeline.domain import US_EQUITIES
from quantopian.research import run_pipeline
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data.factset import RBICSFocus
# Load COVID-19 data. We periodically update the file in your research
# folder. You can also download an updated file from
# https://ourworldindata.org/coronavirus-source-data and put
# it into your data directory in research
covid = local_csv('covid19_cases.csv', date_column='date')
# Map selected countries to their region
region_country = {
'United States': 'NORTH AMERICA',
'Canada': 'NORTH AMERICA',
'Austria': 'EUROPE',
'Bulgaria': 'EUROPE',
'Croatia': 'EUROPE',
'Denmark': 'EUROPE',
'Estonia': 'EUROPE',
'Finland': 'EUROPE',
'France': 'EUROPE',
'Germany': 'EUROPE',
'Greece': 'EUROPE',
'Iceland': 'EUROPE',
'Hungary': 'EUROPE',
'Italy': 'EUROPE',
'Netherlands': 'EUROPE',
'Poland': 'EUROPE',
'Portugal': 'EUROPE',
'Romania': 'EUROPE',
'Spain': 'EUROPE',
'Serbia': 'EUROPE',
'Switzerland': 'EUROPE',
'Sweden': 'EUROPE',
'United Kingdom': 'EUROPE',
'Australia': 'ASIA-PACIFIC',
'Bangladesh': 'ASIA-PACIFIC',
'Bhutan': 'ASIA-PACIFIC',
'China': 'ASIA-PACIFIC',
'Indonesia': 'ASIA-PACIFIC',
'Japan': 'ASIA-PACIFIC',
'Malaysia': 'ASIA-PACIFIC',
'Myanmar': 'ASIA-PACIFIC',
'New Zealand': 'ASIA-PACIFIC',
'Singapore': 'ASIA-PACIFIC',
'South Korea': 'ASIA-PACIFIC',
'Taiwan': 'ASIA-PACIFIC',
'Vietnam': 'ASIA-PACIFIC',
}
covid['region'] = covid['location'].map(region_country)
covid_region = covid.reset_index().groupby(['date', 'region']).sum().reset_index(level='region')
covid_region.head()
thresh = 25000
covid_region.groupby('region')['total_cases'].plot(legend=True);
plt.axhline(thresh, ls='--', color='0.5', label='Chosen threshold');
plt.ylabel('# confirmed cases'); plt.title('COVID-19 growth in different regions'); plt.legend(loc=0);
# Compute date where threshold was crossed in each region
covid_region_date = covid_region.loc[lambda x: x.total_cases > thresh].reset_index().groupby('region').first()['date']
covid_region_date
# Revenue exposure to North America, Asia / Pacific and Europe.
GeoRevNA = GeoRev.slice('NORTH AMERICA')
GeoRevAP = GeoRev.slice('ASIA-PACIFIC')
GeoRevEU = GeoRev.slice('EUROPE')
# Most recent revenue exposure.
rev_exposure_NA = GeoRevNA.est_pct.latest
rev_exposure_AP = GeoRevAP.est_pct.latest
rev_exposure_EU = GeoRevEU.est_pct.latest
# We are not using RBICS sectors for this analysis
# but putting it here in case you want to use it.
sector = RBICSFocus.l1_name.latest
# Add all factors to a pipeline and run it.
pipe = Pipeline(
columns={
'rev_exposure_NA': rev_exposure_NA.rank().zscore(),
'rev_exposure_AP': rev_exposure_AP.rank().zscore(),
'rev_exposure_EU': rev_exposure_EU.rank().zscore(),
'sector': sector,
},
domain=US_EQUITIES,
screen=(QTradableStocksUS() & rev_exposure_NA.notnull() & rev_exposure_AP.notnull() & rev_exposure_EU.notnull()),
)
# Run the pipeline for the most recent date available, georev is only updated yearly
df = run_pipeline(pipe, '2018-01-01', '2018-01-01')
df = df.reset_index(level=0, drop=True) # drop date index as it's just a single day
print(df.head())
df = df.drop('sector', axis='columns')
df.head()
# Get stock returns
prices = get_pricing(
symbols=df.index,
start_date=covid.index[0],
end_date=covid.index[-1],
fields='close_price',
)
# Add datetime index with forward filling to match prices
# This effectively forward-fills the 2018 data
factor = pd.concat({dt: df for dt in prices.index})
factor.head()
# Use alphalens to compute factor returns for each column
factor_returns = {}
for col in factor.columns:
factor_data = al.utils.get_clean_factor_and_forward_returns(
factor[col], prices, periods=[1])
factor_returns[col] = al.performance.factor_returns(factor_data)['1D']
factor_returns = pd.DataFrame(factor_returns)
factor_returns.head()
factor_returns.columns = ['Asia/Pacific factor returns', 'Europe factor returns', 'North America factor returns']
You should summarize your findings ideally in one succinct and largely self-explanatory plot.
fig, axs = plt.subplots(nrows=2, sharex=True);
covid_region.groupby('region')['total_cases'].plot(ax=axs[0], legend=True);
axs[0].axhline(thresh, ls='--', color='0.5', label='Chosen threshold');
axs[0].set(ylabel='# confirmed cases', title='COVID-19 growth in different regions'); axs[0].legend(loc=0);
ep.cum_returns(factor_returns).plot(ax=axs[1])
colors = ['b', 'g', 'r']
for c, (region, date) in zip(colors, covid_region_date.iteritems()):
axs[1].axvline(date, label=region, ls='--', color=c)
axs[1].legend(loc=2);
axs[1].set(title='GeoRev-weighted factor returns in response to COVID-19 cases crossing {} cases'.format(thresh),
ylabel='Cumulative returns');
As you can see, there does not seem to be an obvious influence of the confirmed COVID-19 cases onto the returns of companies exposed to the region where the confirmed cases crossed our chosen threshold.