-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment_return_regression.py
145 lines (123 loc) · 6.18 KB
/
sentiment_return_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 16 12:58:19 2019
@author: mschnaubelt
"""
import pandas as pd
import statsmodels.api as sm
from util.prepare_data import prepare_data, clean_data
from config import TARGETS, FEATURES
data = prepare_data()
data = clean_data(data)
#data = data.sample(5000)
C = data[TARGETS + FEATURES].corr(method = 'spearman')
jobs = [
{
'subset': 'SP1500', 'target': 'abnormal_cont_return',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_SentimentLM', 'general_RatioUncertaintyLM',
'qanda_SentimentLM', 'qanda_RatioUncertaintyLM']
},
{
'subset': 'SP1500', 'target': 'abnormal_return',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_SentimentLM', 'general_RatioUncertaintyLM',
'qanda_SentimentLM', 'qanda_RatioUncertaintyLM',
'abnormal_call_return']
},
{
'subset': 'SP1500', 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_SentimentLM', 'general_RatioUncertaintyLM',
'qanda_SentimentLM', 'qanda_RatioUncertaintyLM',
'abnormal_call_return']
},
{
'subset': 'SP1500', 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_PositivityLM', 'general_NegativityLM',
'general_RatioUncertaintyLM',
'abnormal_call_return']
},
{
'subset': 'SP1500', 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_PositivityLM', 'general_NegativityLM',
'general_RatioUncertaintyLM',
'abnormal_call_return'] + ['%d_b' % (i, ) for i in range(30)]
},
{
'subset': 'SP1500', 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'whole_general_neg', 'whole_general_pos',
'whole_general_unc',
'abnormal_call_return']
},
{
'subset': 'SP500TR', 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_SentimentLM', 'general_RatioUncertaintyLM',
'qanda_SentimentLM', 'qanda_RatioUncertaintyLM',
'abnormal_call_return']
},
{
'subset': 'SP600TR', 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_SentimentLM', 'general_RatioUncertaintyLM',
'qanda_SentimentLM', 'qanda_RatioUncertaintyLM',
'abnormal_call_return']
},
{
'subset': None, 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'log_length', 'nr_analysts', 'nr_executives',
'pays_dividend', 'general_SentimentLM', 'general_RatioUncertaintyLM',
'qanda_SentimentLM', 'qanda_RatioUncertaintyLM',
'abnormal_call_return']
},
{
'subset': 'SP1500', 'target': 'abnormal_return',
'features': ['earnings_surprise', 'nr_analysts', 'nr_executives',
'pays_dividend', 'whole_general_neg', 'whole_general_pos',
'whole_general_unc',
'abnormal_call_return'] + ['%d_%s' % (i, d) for i in range(30) for d in ['pos', 'neg']]
},
{
'subset': 'SP1500', 'target': 'abnormal_5d_drift',
'features': ['earnings_surprise', 'nr_analysts', 'nr_executives',
'pays_dividend', 'whole_general_neg', 'whole_general_pos',
'whole_general_unc',
'abnormal_call_return'] + ['%d_%s' % (i, d) for i in range(30) for d in ['pos', 'neg']]
},
{
'subset': 'SP1500', 'target': 'ff5_abnormal_5d_drift',
'features': ['MV_log', 'BM_ratio', 'EP_ratio', 'SP_ratio',
#'CP_ratio', 'ACCRUAL_ratio',
'DY_ratio', 'dividend_payout_ratio',
'BM_surprise', 'EP_surprise', 'SP_surprise',
'DY_surprise', #'CP_surprise',
'EP_surprise_mean_std', 'EP_surprise_std',
'EP_surprise_revisions', 'EP_surprise_estimates',
'SP_surprise_std', 'SP_surprise_estimates',
'DY_surprise_std', 'log_length', 'nr_analysts',
'general_PositivityLM', 'general_NegativityLM',
'qanda_PositivityLM', 'qanda_NegativityLM']
}
]
for job in jobs:
index_names = ['SP500TR', 'SP400TR', 'SP600TR'] if job['subset'] == 'SP1500' \
else [job['subset']]
if job['subset'] is not None:
reg_data = data.loc[data.mkt_index.isin(index_names)]
else:
reg_data = data
regr_columns = job['features']
if not pd.Series(regr_columns).isin(data.columns).all():
continue
inputs = (reg_data[regr_columns] - reg_data[regr_columns].mean()) / reg_data[regr_columns].std()
x = sm.add_constant(reg_data[regr_columns],
prepend = False)
mod = sm.OLS(reg_data[job['target']], x)
res = mod.fit(cov_type = 'HAC', cov_kwds = {'maxlags': 10})
print("\n\n\n*** Using data subset from indices %s ***\n" % index_names)
print(res.summary2())