Searching for a signal in CEO change and news sentiment data¶

from quantopian.pipeline.data.accern import alphaone_free as alphaone
from quantopian.pipeline.data.eventvestor import CEOChangeAnnouncements as cc
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
import pandas as pd

# Number of days after CEO change that articles are considered.
SINCE_CHANGE = 5

# Number of trading days since article that we will keep track of price data.
SINCE_ARTICLE = 40

Let's set up a pipeline to get some data to work with. This takes a minute to run.

announcement_date = cc.asof_date.latest
pipe = Pipeline(
    columns={
        'sentiment': alphaone.article_sentiment.latest,
        'impact_score': alphaone.impact_score.latest,
        'announcement_date': announcement_date,
        'price': USEquityPricing.close.latest
    },
    screen=announcement_date.notnull()
)
factor = run_pipeline(pipe, start_date='2012-08-26', end_date='2014-12-31')
factor.head(5)

Now we'll filter out the data points that aren't close enough to a CEO change.

data = factor

# Constructs a new column with values from the first index, stripped of the time information.
data['asof_date'] = map(lambda x: pd.Timestamp(x.date()), factor.index.get_level_values(0))

# Filters out data points too long after the last CEO change.
data = data[data['asof_date'] - data['announcement_date'] <=
            pd.Timedelta((SINCE_CHANGE + SINCE_ARTICLE) * 1.6, unit='d')]
data.head(5)

Here we make a new DataFrame with a row for each article fewer than 5 days after a CEO change.

# Filter for a non-null sentiment value and a date within 5 days of the last CEO change.
sentiment = data[pd.notnull(data['sentiment']) &
                 (data['asof_date'] - data['announcement_date'] < pd.Timedelta(SINCE_CHANGE, unit='d'))]
sentiment.head(10)

Now we append columns containing the relative price difference from the day of article publication.

# This function is applied to our sentiment DataFrame right below its definition.
# Given an event (equity and article's date of publication),
#     it returns a Series containing pricing data for the equity following the event.
def get_prices(s):
    
    # Drops all data before the article's publication date, using a slice.
    data_after_article = data.loc[s['asof_date']:]
    
    # Isolates the data for the specific equity.
    # The equity is stored in the series name automatically by the .apply function below.
    equity_data = data_after_article.loc(axis=0)[:, s.name[1]]
    
    # Gets the equity's prices for a specified number of days after the event,
    #     by dropping pricing data that is too old.
    equity_pricing = equity_data['price'].head(SINCE_ARTICLE + 1)
    
    # Returns the equity's relative price differences from the day of the event,
    #     with the indices of the series replaced by numbers to indicate days after the event.
    return ((equity_pricing - s['price']) / s['price']).reset_index(drop=True)

# Concatenates the price data for all stocks in the DataFrame as new columns.
matches = pd.concat([sentiment, sentiment.apply(get_prices, axis=1)], axis=1)
matches.head(5)

print(len(matches))

2984

import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np

Here's a plot for rank correlation between the sentiment rating and the relative price change down the road. In the first 12 days, the stock price tends to move in the opposite direction. A good strategy might be to go long or short in a stock for 12 days after an article, going in the opposite direction of the sentiment.

scores = np.zeros(SINCE_ARTICLE + 1)
for i in range(1, SINCE_ARTICLE + 1):
    score, pvalue = stats.spearmanr(matches['sentiment'],
                                    matches[i])
    scores[i] = score
    
plt.bar(range(1, SINCE_ARTICLE + 1), scores[1:])
plt.xlabel('Trading days afterward')
plt.xlim((1, SINCE_ARTICLE + 1))
plt.ylabel('Rank correlation between article sentiment and returns');

Here's the spread of 12-day returns, with equities sorted into baskets by sentiment score. This shows how the signal might be usable in a long-short strategy. It might be wise to exclude sentiment scores under -0.2 for a smooth spread.

num_baskets = 10
delta = 2. / num_baskets
avgs = np.zeros(num_baskets + 1)
for i in range(num_baskets):
    basket = matches[(matches['sentiment'] >= -1 + i * delta) &
                     (matches['sentiment'] < -1 + (i + 1) * delta)]
    
    avgs[i] = basket[12].mean()
avgs[num_baskets] = matches[matches['sentiment'] == 1][12].mean()

plt.bar(range(num_baskets + 1), avgs)
plt.xticks(range(num_baskets + 1), np.around(np.arange(-1, 1.2, 0.2), decimals=1))
plt.xlabel('Article sentiment')
plt.ylabel('Mean 12-day returns');

We can try to incorporate Accern's impact score but it doesn't make much difference.

multi_signal = matches['sentiment'] * matches['impact_score']
scores = np.zeros(SINCE_ARTICLE + 1)
pvalues = np.zeros(SINCE_ARTICLE + 1)
for i in range(1, SINCE_ARTICLE + 1):
    score, pvalue = stats.spearmanr(multi_signal,
                                    matches[i])
    pvalues[i] = pvalue
    scores[i] = score
    
plt.bar(range(1,SINCE_ARTICLE + 1), scores[1:])
plt.xlabel('Trading days afterward')
plt.xlim((1, SINCE_ARTICLE + 1))
plt.ylabel('Rank correlation between article sentiment times impact score and returns');

num_baskets = 10
delta = 200. / num_baskets
avgs = np.zeros(num_baskets + 1)
for i in range(num_baskets):
    basket = matches[(multi_signal >= -100 + i * delta) &
                     (multi_signal < -100 + (i + 1) * delta)]
    
    avgs[i] = basket[12].mean()
avgs[num_baskets] = matches[multi_signal == 100][12].mean()

plt.bar(range(num_baskets + 1), avgs)
plt.xticks(range(num_baskets + 1), np.around(np.arange(-1, 1.2, 0.2), decimals=1));
plt.xlabel('Article sentiment times impact score')
plt.ylabel('Mean 12-day returns');

		announcement_date	impact_score	price	sentiment
2012-08-27 00:00:00+00:00	Equity(2 [AA])	2008-05-08	NaN	8.63	NaN
	Equity(24 [AAPL])	2011-08-24	70.086	663.18	-0.126
	Equity(64 [ABX])	2008-12-23	NaN	37.77	NaN
	Equity(66 [AB])	2008-12-19	NaN	13.59	NaN
	Equity(69 [ACAT])	2010-10-28	NaN	42.93	NaN

		announcement_date	impact_score	price	sentiment	asof_date
2012-08-27 00:00:00+00:00	Equity(754 [BBY])	2012-08-20	79.143	17.31	-0.714	2012-08-27
	Equity(780 [BCS])	2012-07-03	87.000	11.85	-0.667	2012-08-27
	Equity(1331 [CCC])	2012-06-27	NaN	13.39	NaN	2012-08-27
	Equity(1979 [CUB])	2012-06-25	NaN	48.73	NaN	2012-08-27
	Equity(2351 [DUK])	2012-07-03	80.000	65.48	0.000	2012-08-27

		announcement_date	impact_score	price	sentiment	asof_date
2012-08-28 00:00:00+00:00	Equity(5199 [NAV])	2012-08-27	87.071	23.331	-0.051	2012-08-28
2012-08-28 00:00:00+00:00	Equity(9430 [STRL])	2012-08-27	99.545	9.750	0.000	2012-08-28
2012-08-29 00:00:00+00:00	Equity(5199 [NAV])	2012-08-27	88.075	22.750	-0.164	2012-08-29
	Equity(6769 [SEE])	2012-08-28	90.250	12.990	0.583	2012-08-29
	Equity(9430 [STRL])	2012-08-27	99.545	9.800	0.000	2012-08-29
	Equity(42037 [ZLTQ])	2012-08-28	100.000	4.910	0.500	2012-08-29
2012-08-30 00:00:00+00:00	Equity(5199 [NAV])	2012-08-27	85.571	22.500	0.000	2012-08-30
	Equity(6769 [SEE])	2012-08-28	95.789	14.580	0.281	2012-08-30
	Equity(9268 [SHLO])	2012-08-29	100.000	10.420	0.143	2012-08-30
	Equity(9430 [STRL])	2012-08-27	99.545	9.910	0.000	2012-08-30

		announcement_date	impact_score	price	sentiment	asof_date	1	2	3	4	...	31	32	33	34	35	36	37	38	39	40
2012-08-28 00:00:00+00:00	Equity(5199 [NAV])	2012-08-27	87.071	23.331	-0.051	2012-08-28	-0.024902	-0.035618	-0.080194	-0.057906	...	-0.057049	-0.030046	-0.044190	-0.072050	-0.117912	-0.093909	-0.119626	-0.183104	-0.148986	-0.165017
2012-08-28 00:00:00+00:00	Equity(9430 [STRL])	2012-08-27	99.545	9.750	0.000	2012-08-28	0.005128	0.016410	0.005128	-0.006154	...	0.008205	0.001026	-0.015385	-0.009231	-0.011282	-0.008205	-0.025641	-0.033846	-0.056410	-0.066667
2012-08-29 00:00:00+00:00	Equity(5199 [NAV])	2012-08-27	88.075	22.750	-0.164	2012-08-29	-0.010989	-0.056703	-0.033846	-0.121758	...	-0.005275	-0.019780	-0.048352	-0.095385	-0.070769	-0.097143	-0.162242	-0.127253	-0.143692	-0.160879
	Equity(6769 [SEE])	2012-08-28	90.250	12.990	0.583	2012-08-29	0.122402	0.101848	0.098537	0.087375	...	0.190916	0.180139	0.192456	0.227868	0.253272	0.270978	0.227868	0.234796	0.210931	0.210931
	Equity(9430 [STRL])	2012-08-27	99.545	9.800	0.000	2012-08-29	0.011224	0.000000	-0.011224	0.006122	...	-0.004082	-0.020408	-0.014286	-0.016327	-0.013265	-0.030612	-0.038776	-0.061224	-0.071429	-0.081633