Skip to content

Commit 33b8d06

Browse files
authored
Add files via upload
0 parents  commit 33b8d06

File tree

43 files changed

+4120
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+4120
-0
lines changed

Corporate_Governance_Risk_Measure.py

+210
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
import pandas as pd
2+
import yfinance as yf
3+
import datetime as dt
4+
import matplotlib.pyplot as plt
5+
import statsmodels.api as sm
6+
import numpy as np
7+
from sklearn.covariance import EllipticEnvelope
8+
9+
10+
def load_raw_data(ticker, start_date, end_date):
11+
crash_data = pd.DataFrame()
12+
for i in ticker:
13+
raw_data = yf.download(i, start_date, end_date)
14+
crash_df = pd.DataFrame()
15+
crash_df['RET'] = (raw_data['Adj Close'] / raw_data['Adj Close'].shift(1)) - 1
16+
crash_df.index = raw_data.index
17+
crash_df['BIDLO'] = raw_data['Low']
18+
crash_df['ASKHI'] = raw_data['High']
19+
crash_df['PRC'] = raw_data['Close']
20+
crash_df['VOL'] = raw_data['Volume']
21+
typical_price = (raw_data['High'] + raw_data['Low'] + raw_data['Close']) / 3
22+
crash_df['VWAP'] = (typical_price * raw_data['Volume']).cumsum() / raw_data['Volume'].cumsum()
23+
crash_df['vwretx'] = (crash_df['VWAP'] / crash_df['VWAP'].shift(1)) - 1
24+
crash_df['TICKER'] = i
25+
crash_df.dropna(inplace=True)
26+
crash_data = pd.concat([crash_data, crash_df])
27+
28+
return crash_data
29+
30+
31+
def weekly_hist_gram(crash_data):
32+
crash_dataw = crash_data.groupby('TICKER').resample('W').agg({'RET': 'mean', 'vwretx': 'mean', 'VOL': 'mean',
33+
'BIDLO': 'mean', 'ASKHI': 'mean', 'PRC': 'mean'})
34+
crash_dataw = crash_dataw.reset_index()
35+
crash_dataw.dropna(inplace=True)
36+
stocks = crash_data.TICKER.unique()
37+
plt.figure(figsize=(12, 8))
38+
k = 1
39+
for i in stocks[:4]:
40+
plt.subplot(2, 2, k)
41+
plt.hist(crash_dataw[crash_dataw.TICKER == i]['RET'])
42+
plt.title('Histogram of ' + i)
43+
k += 1
44+
plt.show()
45+
return crash_dataw, stocks
46+
47+
48+
def firm_specific_weekly_return(crash_dataw, stocks):
49+
residuals_dict = {} # We will store residuals for each stock in this dictionary
50+
51+
for i in stocks:
52+
Y = crash_dataw.loc[crash_dataw['TICKER'] == i]['RET'].values
53+
X = crash_dataw.loc[crash_dataw['TICKER'] == i]['vwretx'].values
54+
X = sm.add_constant(X)
55+
56+
X_transformed = X[2:-2] + X[1:-3] + X[0:-4] + X[3:-1] + X[4:]
57+
ols = sm.OLS(Y[2:-2], X_transformed).fit()
58+
59+
residuals_stock = ols.resid
60+
residuals_dict[i] = list(map(lambda x: np.log(1 + x), residuals_stock))
61+
62+
crash_data_sliced = pd.DataFrame([])
63+
for i in stocks:
64+
crash_data_sliced = pd.concat([crash_data_sliced, crash_dataw.loc[crash_dataw.TICKER == i][2:-2]],
65+
ignore_index=True)
66+
print(crash_data_sliced.head())
67+
68+
envelope = EllipticEnvelope(contamination=0.02, support_fraction=1)
69+
ee_predictions = {}
70+
71+
for i in stocks:
72+
stock_residuals = np.array(residuals_dict[i]).reshape(-1, 1)
73+
if stock_residuals.shape[0] < 2:
74+
print(f"Skipping stock {i} due to insufficient residuals.")
75+
continue # Skip the current iteration and move to the next stock
76+
envelope.fit(stock_residuals)
77+
ee_predictions[i] = envelope.predict(stock_residuals)
78+
79+
transform = []
80+
for i in stocks:
81+
if i in ee_predictions: # Ensure we only process stocks that were not skipped
82+
for j in range(len(ee_predictions[i])):
83+
transform.append(np.where(ee_predictions[i][j] == 1, 0, -1))
84+
85+
crash_data_sliced = crash_data_sliced.reset_index()
86+
crash_data_sliced['residuals'] = np.concatenate(list(residuals_dict.values()))
87+
crash_data_sliced['neg_outliers'] = np.where((np.array(transform)) == -1, 1, 0)
88+
crash_data_sliced.loc[(crash_data_sliced.neg_outliers == 1) & (crash_data_sliced.residuals > 0), 'neg_outliers'] = 0
89+
90+
plt.figure(figsize=(12, 8))
91+
k = 1
92+
93+
for i in stocks[8:12]:
94+
plt.subplot(2, 2, k)
95+
crash_data_sliced['residuals'][crash_data_sliced.TICKER == i].hist(label='normal', bins=30, color='gray')
96+
outliers = crash_data_sliced['residuals'][
97+
(crash_data_sliced.TICKER == i) & (crash_data_sliced.neg_outliers > 0)]
98+
outliers.hist(color='black', label='anomaly')
99+
plt.title(i)
100+
plt.legend()
101+
k += 1
102+
plt.show()
103+
104+
return crash_data_sliced
105+
106+
107+
def weekly_to_annual_data(crash_data_sliced, crash_data, crash_dataw):
108+
crash_data_sliced = crash_data_sliced.set_index('Date')
109+
crash_data_sliced.index = pd.to_datetime(crash_data_sliced.index)
110+
111+
std = crash_data.groupby('TICKER')['RET'].resample('W').std().reset_index()
112+
crash_dataw['std'] = pd.DataFrame(std['RET'])
113+
114+
yearly_data = crash_data_sliced.groupby('TICKER').resample('Y')['residuals'].agg(['mean', 'std']).reset_index()
115+
print(yearly_data.head())
116+
117+
merge_crash = pd.merge(crash_data_sliced.reset_index(), yearly_data, how='outer', on=['TICKER', 'Date'])
118+
merge_crash[['annual_mean', 'annual_std']] = merge_crash.sort_values(by=['TICKER', 'Date']).iloc[:, -2:].fillna(
119+
method='bfill')
120+
merge_crash['residuals'] = merge_crash.sort_values(by=['TICKER', 'Date'])['residuals'].fillna(method='ffill')
121+
merge_crash = merge_crash.drop(merge_crash.iloc[:, -4:-2], axis=1)
122+
123+
return merge_crash
124+
125+
126+
def crash_risk_measure(merge_crash, stocks):
127+
crash_risk_out = []
128+
for j in stocks:
129+
for k in range(len(merge_crash[merge_crash.TICKER == j])):
130+
if merge_crash[merge_crash.TICKER == j]['residuals'].iloc[k] < \
131+
merge_crash[merge_crash.TICKER == j]['annual_mean'].iloc[k] - \
132+
3.09 * merge_crash[merge_crash.TICKER == j]['annual_std'].iloc[k]:
133+
crash_risk_out.append(1)
134+
else:
135+
crash_risk_out.append(0)
136+
merge_crash['crash_risk'] = crash_risk_out
137+
print(merge_crash['crash_risk'].value_counts())
138+
139+
merge_crash = merge_crash.set_index('Date')
140+
merge_crash_annual = merge_crash.groupby('TICKER').resample('1Y')['crash_risk'].sum().reset_index()
141+
142+
down = []
143+
for j in range(len(merge_crash)):
144+
if merge_crash['residuals'].iloc[j] < merge_crash['annual_mean'].iloc[j]:
145+
down.append(1)
146+
else:
147+
down.append(0)
148+
149+
merge_crash = merge_crash.reset_index()
150+
merge_crash['down'] = pd.DataFrame(down)
151+
merge_crash['up'] = 1 - merge_crash['down']
152+
down_residuals = merge_crash[merge_crash.down == 1][['residuals', 'TICKER', 'Date']]
153+
up_residuals = merge_crash[merge_crash.up == 1][['residuals', 'TICKER', 'Date']]
154+
155+
down_residuals['residuals_down_sq'] = down_residuals['residuals'] ** 2
156+
down_residuals['residuals_down_cubic'] = down_residuals['residuals'] ** 3
157+
up_residuals['residuals_up_sq'] = up_residuals['residuals'] ** 2
158+
up_residuals['residuals_up_cubic'] = up_residuals['residuals'] ** 3
159+
down_residuals['down_residuals'] = down_residuals['residuals']
160+
up_residuals['up_residuals'] = up_residuals['residuals']
161+
del down_residuals['residuals']
162+
del up_residuals['residuals']
163+
merge_crash['residuals_sq'] = merge_crash['residuals'] ** 2
164+
merge_crash['residuals_cubic'] = merge_crash['residuals'] ** 3
165+
166+
merge_crash_all = merge_crash.merge(down_residuals, on=['TICKER', 'Date'], how='outer')
167+
merge_crash_all = merge_crash_all.merge(up_residuals, on=['TICKER', 'Date'], how='outer')
168+
cols = ['BIDLO', 'ASKHI', 'residuals', 'annual_std', 'residuals_sq', 'residuals_cubic', 'down', 'up',
169+
'residuals_up_sq', 'residuals_down_sq', 'neg_outliers']
170+
merge_crash_all = merge_crash_all.set_index('Date')
171+
merge_grouped = merge_crash_all.groupby('TICKER')[cols].resample('1Y').sum().reset_index()
172+
merge_grouped['neg_outliers'] = np.where(merge_grouped.neg_outliers >= 1, 1, 0)
173+
174+
merge_grouped = merge_grouped.set_index('Date')
175+
merge_all = merge_grouped.groupby('TICKER').resample('1Y').agg({'down': ['sum', 'count'],
176+
'up': ['sum', 'count']}).reset_index()
177+
print(merge_all.head())
178+
179+
merge_grouped['down'] = merge_all['down']['sum'].values
180+
merge_grouped['up'] = merge_all['up']['sum'].values
181+
merge_grouped['count'] = merge_grouped['down'] + merge_grouped['up']
182+
183+
merge_grouped = merge_grouped.reset_index()
184+
merge_grouped['duvol'] = np.log(((merge_grouped['up'] - 1) * merge_grouped['residuals_down_sq']) /
185+
((merge_grouped['down'] - 1) * merge_grouped['residuals_up_sq']))
186+
print(merge_grouped.groupby('TICKER')['duvol'].mean())
187+
188+
merge_grouped['ncskew'] = - (((merge_grouped['count'] * (merge_grouped['count'] - 1) ** (3 / 2)) *
189+
merge_grouped['residuals_cubic']) / (((merge_grouped['count'] - 1) *
190+
(merge_grouped['count'] - 2)) * merge_grouped['residuals_sq'] ** (3 / 2)))
191+
print(merge_grouped.groupby('TICKER')['ncskew'].mean())
192+
193+
merge_grouped['crash_risk'] = merge_crash_annual['crash_risk']
194+
merge_grouped['crash_risk'] = np.where(merge_grouped.crash_risk >= 1, 1, 0)
195+
merge_crash_all_grouped2 = merge_crash_all.groupby('TICKER')[['VOL', 'PRC']].resample('1Y').mean().reset_index()
196+
merge_grouped[['VOL', 'PRC']] = merge_crash_all_grouped2[['VOL', 'PRC']]
197+
print(merge_grouped[['ncskew', 'duvol']].corr())
198+
return merge_grouped
199+
200+
201+
if __name__ == '__main__':
202+
ticker = ['ABBV', 'GOOGL', 'JNJ', 'DLTR', 'HLT', 'JPM', 'DEO', 'PG', 'ALB', 'BA', 'NVDA', 'LUV', 'PEP', 'TSM',
203+
'SPY', '^VIX', 'GLD']
204+
start_date = dt.datetime(2010, 1, 1)
205+
end_date = dt.datetime(2023, 1, 1)
206+
crash_data_ = load_raw_data(ticker, start_date, end_date)
207+
crash_dataw_, stocks_ = weekly_hist_gram(crash_data_)
208+
crash_data_sliced_ = firm_specific_weekly_return(crash_dataw_, stocks_)
209+
merge_crash_ = weekly_to_annual_data(crash_data_sliced_, crash_data_, crash_dataw_)
210+
merge_grouped_ = crash_risk_measure(merge_crash_, stocks_)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
from sklearn.preprocessing import StandardScaler
5+
from sklearn.model_selection import train_test_split
6+
from sklearn.metrics import roc_auc_score, roc_curve
7+
from tensorflow import keras
8+
from keras.wrappers.scikit_learn import KerasClassifier
9+
from keras.layers import Dense, Dropout
10+
from sklearn.model_selection import GridSearchCV
11+
import tensorflow as tf
12+
import logging
13+
tf.get_logger().setLevel(logging.ERROR)
14+
15+
def read_original_files(file_path):
16+
credit = pd.read_csv(file_path)
17+
print(credit.head())
18+
del credit['Unnamed: 0']
19+
return credit
20+
21+
22+
def data_conversion(credit):
23+
print(credit.describe())
24+
numerical_credit = credit.select_dtypes(include=[np.number])
25+
'''obtain all numerical variables'''
26+
plt.figure(figsize=(10, 8))
27+
k = 0
28+
cols = numerical_credit.columns
29+
for i, j in zip(range(len(cols)), cols):
30+
k += 1
31+
plt.subplot(2, 2, k)
32+
plt.hist(numerical_credit.iloc[:, i])
33+
plt.title(j)
34+
plt.show()
35+
36+
scaler = StandardScaler()
37+
scaled_credit = scaler.fit_transform(numerical_credit)
38+
scaled_credit = pd.DataFrame(scaled_credit, columns=numerical_credit.columns)
39+
40+
non_numerical_credit = credit.select_dtypes(include=['object'])
41+
dummies_credit = pd.get_dummies(non_numerical_credit, drop_first=True)
42+
dummies_credit = dummies_credit.astype(int)
43+
print(dummies_credit.head())
44+
45+
combined_credit = pd.concat([scaled_credit, dummies_credit], axis=1)
46+
47+
return numerical_credit, scaled_credit, dummies_credit, combined_credit
48+
49+
50+
def data_preparation(combined_credit):
51+
X = combined_credit.drop("Risk_good", axis=1)
52+
y = combined_credit["Risk_good"]
53+
54+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
55+
56+
return X_train, X_test, y_train, y_test
57+
58+
59+
def DL_risk(dropout_rate, verbose=0):
60+
model = keras.Sequential()
61+
model.add(Dense(128,kernel_initializer='normal', activation='relu', input_dim=21))
62+
model.add(Dense(64, kernel_initializer='normal', activation='relu'))
63+
model.add(Dense(8, kernel_initializer='normal', activation='relu'))
64+
model.add(Dropout(dropout_rate))
65+
model.add(Dense(1, activation='sigmoid'))
66+
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
67+
return model
68+
69+
70+
def training_model(X_train, X_test, y_train, y_test):
71+
parameters = {'batch_size': [10, 50, 100],
72+
'epochs': [50, 100, 150],
73+
'dropout_rate': [0.2, 0.4]}
74+
model = KerasClassifier(build_fn=DL_risk)
75+
gs = GridSearchCV(estimator=model, param_grid=parameters, scoring='roc_auc', error_score='raise')
76+
77+
gs.fit(X_train, y_train, verbose=0)
78+
print('Best hyperparameters for first cluster in DL are {}'.format(gs.best_params_))
79+
80+
model = KerasClassifier(build_fn=DL_risk,
81+
dropout_rate=gs.best_params_['dropout_rate'],
82+
verbose=0,
83+
batch_size=gs.best_params_['batch_size'],
84+
epochs=gs.best_params_['epochs'])
85+
model.fit(X_train, y_train)
86+
DL_predict = model.predict(X_test)
87+
DL_ROC_AUC = roc_auc_score(y_test, pd.DataFrame(DL_predict.flatten()))
88+
print('DL_ROC_AUC is {:.4f}'.format(DL_ROC_AUC))
89+
return model, DL_predict
90+
91+
92+
if __name__ == '__main__':
93+
file_path = 'D:/PyCharm Community Edition 2023.1.2/Python_Project/Finance/py4frm/german_credit_data.csv'
94+
credit_ = read_original_files(file_path)
95+
numerical_credit_, scaled_credit_, dummies_credit_, combined_credit_ = data_conversion(credit_)
96+
X_train_, X_test_, y_train_, y_test_ = data_preparation(combined_credit_)
97+
model_, DL_pred = training_model( X_train_, X_test_, y_train_, y_test_)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
from sklearn.preprocessing import StandardScaler
5+
from sklearn.model_selection import train_test_split
6+
from sklearn.neural_network import MLPClassifier
7+
from sklearn.experimental import enable_halving_search_cv
8+
from sklearn.model_selection import HalvingRandomSearchCV
9+
from sklearn.metrics import roc_auc_score, roc_curve
10+
11+
12+
def read_original_files(file_path):
13+
credit = pd.read_csv(file_path)
14+
print(credit.head())
15+
del credit['Unnamed: 0']
16+
return credit
17+
18+
19+
def data_conversion(credit):
20+
print(credit.describe())
21+
numerical_credit = credit.select_dtypes(include=[np.number])
22+
'''obtain all numerical variables'''
23+
plt.figure(figsize=(10, 8))
24+
k = 0
25+
cols = numerical_credit.columns
26+
for i, j in zip(range(len(cols)), cols):
27+
k += 1
28+
plt.subplot(2, 2, k)
29+
plt.hist(numerical_credit.iloc[:, i])
30+
plt.title(j)
31+
plt.show()
32+
33+
scaler = StandardScaler()
34+
scaled_credit = scaler.fit_transform(numerical_credit)
35+
scaled_credit = pd.DataFrame(scaled_credit, columns=numerical_credit.columns)
36+
37+
non_numerical_credit = credit.select_dtypes(include=['object'])
38+
dummies_credit = pd.get_dummies(non_numerical_credit, drop_first=True)
39+
dummies_credit = dummies_credit.astype(int)
40+
print(dummies_credit.head())
41+
42+
combined_credit = pd.concat([scaled_credit, dummies_credit], axis=1)
43+
44+
return numerical_credit, scaled_credit, dummies_credit, combined_credit
45+
46+
47+
def data_preparation(combined_credit):
48+
X = combined_credit.drop("Risk_good", axis=1)
49+
y = combined_credit["Risk_good"]
50+
51+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
52+
53+
return X_train, X_test, y_train, y_test
54+
55+
56+
def training_model(X_train, X_test, y_train, y_test):
57+
param_NN ={"hidden_layer_sizes": [(100, 50), (50, 50), (10, 100)],
58+
"solver": ["lbfgs", "sgd", "adam"],
59+
"learning_rate_init": [0.001, 0.05]}
60+
MLP = MLPClassifier(random_state=42)
61+
62+
param_halve_NN = HalvingRandomSearchCV(MLP, param_NN, scoring='roc_auc')
63+
param_halve_NN.fit(X_train, y_train)
64+
65+
y_pred_NN = param_halve_NN.predict(X_test)
66+
print('The ROC AUC score of RF is {:4f}'.format(roc_auc_score(y_test, y_pred_NN)))
67+
68+
return param_halve_NN, y_pred_NN
69+
70+
71+
if __name__ == '__main__':
72+
file_path = 'D:/PyCharm Community Edition 2023.1.2/Python_Project/Finance/py4frm/german_credit_data.csv'
73+
credit_ = read_original_files(file_path)
74+
numerical_credit_, scaled_credit_, dummies_credit_, combined_credit_ = data_conversion(credit_)
75+
X_train_, X_test_, y_train_, y_test_ = data_preparation(combined_credit_)
76+
model, y_pred = training_model(X_train_, X_test_, y_train_, y_test_)

0 commit comments

Comments
 (0)