|
| 1 | +import pandas as pd |
| 2 | +import yfinance as yf |
| 3 | +import datetime as dt |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import statsmodels.api as sm |
| 6 | +import numpy as np |
| 7 | +from sklearn.covariance import EllipticEnvelope |
| 8 | + |
| 9 | + |
| 10 | +def load_raw_data(ticker, start_date, end_date): |
| 11 | + crash_data = pd.DataFrame() |
| 12 | + for i in ticker: |
| 13 | + raw_data = yf.download(i, start_date, end_date) |
| 14 | + crash_df = pd.DataFrame() |
| 15 | + crash_df['RET'] = (raw_data['Adj Close'] / raw_data['Adj Close'].shift(1)) - 1 |
| 16 | + crash_df.index = raw_data.index |
| 17 | + crash_df['BIDLO'] = raw_data['Low'] |
| 18 | + crash_df['ASKHI'] = raw_data['High'] |
| 19 | + crash_df['PRC'] = raw_data['Close'] |
| 20 | + crash_df['VOL'] = raw_data['Volume'] |
| 21 | + typical_price = (raw_data['High'] + raw_data['Low'] + raw_data['Close']) / 3 |
| 22 | + crash_df['VWAP'] = (typical_price * raw_data['Volume']).cumsum() / raw_data['Volume'].cumsum() |
| 23 | + crash_df['vwretx'] = (crash_df['VWAP'] / crash_df['VWAP'].shift(1)) - 1 |
| 24 | + crash_df['TICKER'] = i |
| 25 | + crash_df.dropna(inplace=True) |
| 26 | + crash_data = pd.concat([crash_data, crash_df]) |
| 27 | + |
| 28 | + return crash_data |
| 29 | + |
| 30 | + |
| 31 | +def weekly_hist_gram(crash_data): |
| 32 | + crash_dataw = crash_data.groupby('TICKER').resample('W').agg({'RET': 'mean', 'vwretx': 'mean', 'VOL': 'mean', |
| 33 | + 'BIDLO': 'mean', 'ASKHI': 'mean', 'PRC': 'mean'}) |
| 34 | + crash_dataw = crash_dataw.reset_index() |
| 35 | + crash_dataw.dropna(inplace=True) |
| 36 | + stocks = crash_data.TICKER.unique() |
| 37 | + plt.figure(figsize=(12, 8)) |
| 38 | + k = 1 |
| 39 | + for i in stocks[:4]: |
| 40 | + plt.subplot(2, 2, k) |
| 41 | + plt.hist(crash_dataw[crash_dataw.TICKER == i]['RET']) |
| 42 | + plt.title('Histogram of ' + i) |
| 43 | + k += 1 |
| 44 | + plt.show() |
| 45 | + return crash_dataw, stocks |
| 46 | + |
| 47 | + |
| 48 | +def firm_specific_weekly_return(crash_dataw, stocks): |
| 49 | + residuals_dict = {} # We will store residuals for each stock in this dictionary |
| 50 | + |
| 51 | + for i in stocks: |
| 52 | + Y = crash_dataw.loc[crash_dataw['TICKER'] == i]['RET'].values |
| 53 | + X = crash_dataw.loc[crash_dataw['TICKER'] == i]['vwretx'].values |
| 54 | + X = sm.add_constant(X) |
| 55 | + |
| 56 | + X_transformed = X[2:-2] + X[1:-3] + X[0:-4] + X[3:-1] + X[4:] |
| 57 | + ols = sm.OLS(Y[2:-2], X_transformed).fit() |
| 58 | + |
| 59 | + residuals_stock = ols.resid |
| 60 | + residuals_dict[i] = list(map(lambda x: np.log(1 + x), residuals_stock)) |
| 61 | + |
| 62 | + crash_data_sliced = pd.DataFrame([]) |
| 63 | + for i in stocks: |
| 64 | + crash_data_sliced = pd.concat([crash_data_sliced, crash_dataw.loc[crash_dataw.TICKER == i][2:-2]], |
| 65 | + ignore_index=True) |
| 66 | + print(crash_data_sliced.head()) |
| 67 | + |
| 68 | + envelope = EllipticEnvelope(contamination=0.02, support_fraction=1) |
| 69 | + ee_predictions = {} |
| 70 | + |
| 71 | + for i in stocks: |
| 72 | + stock_residuals = np.array(residuals_dict[i]).reshape(-1, 1) |
| 73 | + if stock_residuals.shape[0] < 2: |
| 74 | + print(f"Skipping stock {i} due to insufficient residuals.") |
| 75 | + continue # Skip the current iteration and move to the next stock |
| 76 | + envelope.fit(stock_residuals) |
| 77 | + ee_predictions[i] = envelope.predict(stock_residuals) |
| 78 | + |
| 79 | + transform = [] |
| 80 | + for i in stocks: |
| 81 | + if i in ee_predictions: # Ensure we only process stocks that were not skipped |
| 82 | + for j in range(len(ee_predictions[i])): |
| 83 | + transform.append(np.where(ee_predictions[i][j] == 1, 0, -1)) |
| 84 | + |
| 85 | + crash_data_sliced = crash_data_sliced.reset_index() |
| 86 | + crash_data_sliced['residuals'] = np.concatenate(list(residuals_dict.values())) |
| 87 | + crash_data_sliced['neg_outliers'] = np.where((np.array(transform)) == -1, 1, 0) |
| 88 | + crash_data_sliced.loc[(crash_data_sliced.neg_outliers == 1) & (crash_data_sliced.residuals > 0), 'neg_outliers'] = 0 |
| 89 | + |
| 90 | + plt.figure(figsize=(12, 8)) |
| 91 | + k = 1 |
| 92 | + |
| 93 | + for i in stocks[8:12]: |
| 94 | + plt.subplot(2, 2, k) |
| 95 | + crash_data_sliced['residuals'][crash_data_sliced.TICKER == i].hist(label='normal', bins=30, color='gray') |
| 96 | + outliers = crash_data_sliced['residuals'][ |
| 97 | + (crash_data_sliced.TICKER == i) & (crash_data_sliced.neg_outliers > 0)] |
| 98 | + outliers.hist(color='black', label='anomaly') |
| 99 | + plt.title(i) |
| 100 | + plt.legend() |
| 101 | + k += 1 |
| 102 | + plt.show() |
| 103 | + |
| 104 | + return crash_data_sliced |
| 105 | + |
| 106 | + |
| 107 | +def weekly_to_annual_data(crash_data_sliced, crash_data, crash_dataw): |
| 108 | + crash_data_sliced = crash_data_sliced.set_index('Date') |
| 109 | + crash_data_sliced.index = pd.to_datetime(crash_data_sliced.index) |
| 110 | + |
| 111 | + std = crash_data.groupby('TICKER')['RET'].resample('W').std().reset_index() |
| 112 | + crash_dataw['std'] = pd.DataFrame(std['RET']) |
| 113 | + |
| 114 | + yearly_data = crash_data_sliced.groupby('TICKER').resample('Y')['residuals'].agg(['mean', 'std']).reset_index() |
| 115 | + print(yearly_data.head()) |
| 116 | + |
| 117 | + merge_crash = pd.merge(crash_data_sliced.reset_index(), yearly_data, how='outer', on=['TICKER', 'Date']) |
| 118 | + merge_crash[['annual_mean', 'annual_std']] = merge_crash.sort_values(by=['TICKER', 'Date']).iloc[:, -2:].fillna( |
| 119 | + method='bfill') |
| 120 | + merge_crash['residuals'] = merge_crash.sort_values(by=['TICKER', 'Date'])['residuals'].fillna(method='ffill') |
| 121 | + merge_crash = merge_crash.drop(merge_crash.iloc[:, -4:-2], axis=1) |
| 122 | + |
| 123 | + return merge_crash |
| 124 | + |
| 125 | + |
| 126 | +def crash_risk_measure(merge_crash, stocks): |
| 127 | + crash_risk_out = [] |
| 128 | + for j in stocks: |
| 129 | + for k in range(len(merge_crash[merge_crash.TICKER == j])): |
| 130 | + if merge_crash[merge_crash.TICKER == j]['residuals'].iloc[k] < \ |
| 131 | + merge_crash[merge_crash.TICKER == j]['annual_mean'].iloc[k] - \ |
| 132 | + 3.09 * merge_crash[merge_crash.TICKER == j]['annual_std'].iloc[k]: |
| 133 | + crash_risk_out.append(1) |
| 134 | + else: |
| 135 | + crash_risk_out.append(0) |
| 136 | + merge_crash['crash_risk'] = crash_risk_out |
| 137 | + print(merge_crash['crash_risk'].value_counts()) |
| 138 | + |
| 139 | + merge_crash = merge_crash.set_index('Date') |
| 140 | + merge_crash_annual = merge_crash.groupby('TICKER').resample('1Y')['crash_risk'].sum().reset_index() |
| 141 | + |
| 142 | + down = [] |
| 143 | + for j in range(len(merge_crash)): |
| 144 | + if merge_crash['residuals'].iloc[j] < merge_crash['annual_mean'].iloc[j]: |
| 145 | + down.append(1) |
| 146 | + else: |
| 147 | + down.append(0) |
| 148 | + |
| 149 | + merge_crash = merge_crash.reset_index() |
| 150 | + merge_crash['down'] = pd.DataFrame(down) |
| 151 | + merge_crash['up'] = 1 - merge_crash['down'] |
| 152 | + down_residuals = merge_crash[merge_crash.down == 1][['residuals', 'TICKER', 'Date']] |
| 153 | + up_residuals = merge_crash[merge_crash.up == 1][['residuals', 'TICKER', 'Date']] |
| 154 | + |
| 155 | + down_residuals['residuals_down_sq'] = down_residuals['residuals'] ** 2 |
| 156 | + down_residuals['residuals_down_cubic'] = down_residuals['residuals'] ** 3 |
| 157 | + up_residuals['residuals_up_sq'] = up_residuals['residuals'] ** 2 |
| 158 | + up_residuals['residuals_up_cubic'] = up_residuals['residuals'] ** 3 |
| 159 | + down_residuals['down_residuals'] = down_residuals['residuals'] |
| 160 | + up_residuals['up_residuals'] = up_residuals['residuals'] |
| 161 | + del down_residuals['residuals'] |
| 162 | + del up_residuals['residuals'] |
| 163 | + merge_crash['residuals_sq'] = merge_crash['residuals'] ** 2 |
| 164 | + merge_crash['residuals_cubic'] = merge_crash['residuals'] ** 3 |
| 165 | + |
| 166 | + merge_crash_all = merge_crash.merge(down_residuals, on=['TICKER', 'Date'], how='outer') |
| 167 | + merge_crash_all = merge_crash_all.merge(up_residuals, on=['TICKER', 'Date'], how='outer') |
| 168 | + cols = ['BIDLO', 'ASKHI', 'residuals', 'annual_std', 'residuals_sq', 'residuals_cubic', 'down', 'up', |
| 169 | + 'residuals_up_sq', 'residuals_down_sq', 'neg_outliers'] |
| 170 | + merge_crash_all = merge_crash_all.set_index('Date') |
| 171 | + merge_grouped = merge_crash_all.groupby('TICKER')[cols].resample('1Y').sum().reset_index() |
| 172 | + merge_grouped['neg_outliers'] = np.where(merge_grouped.neg_outliers >= 1, 1, 0) |
| 173 | + |
| 174 | + merge_grouped = merge_grouped.set_index('Date') |
| 175 | + merge_all = merge_grouped.groupby('TICKER').resample('1Y').agg({'down': ['sum', 'count'], |
| 176 | + 'up': ['sum', 'count']}).reset_index() |
| 177 | + print(merge_all.head()) |
| 178 | + |
| 179 | + merge_grouped['down'] = merge_all['down']['sum'].values |
| 180 | + merge_grouped['up'] = merge_all['up']['sum'].values |
| 181 | + merge_grouped['count'] = merge_grouped['down'] + merge_grouped['up'] |
| 182 | + |
| 183 | + merge_grouped = merge_grouped.reset_index() |
| 184 | + merge_grouped['duvol'] = np.log(((merge_grouped['up'] - 1) * merge_grouped['residuals_down_sq']) / |
| 185 | + ((merge_grouped['down'] - 1) * merge_grouped['residuals_up_sq'])) |
| 186 | + print(merge_grouped.groupby('TICKER')['duvol'].mean()) |
| 187 | + |
| 188 | + merge_grouped['ncskew'] = - (((merge_grouped['count'] * (merge_grouped['count'] - 1) ** (3 / 2)) * |
| 189 | + merge_grouped['residuals_cubic']) / (((merge_grouped['count'] - 1) * |
| 190 | + (merge_grouped['count'] - 2)) * merge_grouped['residuals_sq'] ** (3 / 2))) |
| 191 | + print(merge_grouped.groupby('TICKER')['ncskew'].mean()) |
| 192 | + |
| 193 | + merge_grouped['crash_risk'] = merge_crash_annual['crash_risk'] |
| 194 | + merge_grouped['crash_risk'] = np.where(merge_grouped.crash_risk >= 1, 1, 0) |
| 195 | + merge_crash_all_grouped2 = merge_crash_all.groupby('TICKER')[['VOL', 'PRC']].resample('1Y').mean().reset_index() |
| 196 | + merge_grouped[['VOL', 'PRC']] = merge_crash_all_grouped2[['VOL', 'PRC']] |
| 197 | + print(merge_grouped[['ncskew', 'duvol']].corr()) |
| 198 | + return merge_grouped |
| 199 | + |
| 200 | + |
| 201 | +if __name__ == '__main__': |
| 202 | + ticker = ['ABBV', 'GOOGL', 'JNJ', 'DLTR', 'HLT', 'JPM', 'DEO', 'PG', 'ALB', 'BA', 'NVDA', 'LUV', 'PEP', 'TSM', |
| 203 | + 'SPY', '^VIX', 'GLD'] |
| 204 | + start_date = dt.datetime(2010, 1, 1) |
| 205 | + end_date = dt.datetime(2023, 1, 1) |
| 206 | + crash_data_ = load_raw_data(ticker, start_date, end_date) |
| 207 | + crash_dataw_, stocks_ = weekly_hist_gram(crash_data_) |
| 208 | + crash_data_sliced_ = firm_specific_weekly_return(crash_dataw_, stocks_) |
| 209 | + merge_crash_ = weekly_to_annual_data(crash_data_sliced_, crash_data_, crash_dataw_) |
| 210 | + merge_grouped_ = crash_risk_measure(merge_crash_, stocks_) |
0 commit comments