diff --git a/ffm_regression.ipynb b/ffm_regression.ipynb index 39a9a3b..dd88016 100644 --- a/ffm_regression.ipynb +++ b/ffm_regression.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 48, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -14,114 +14,501 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "stock_csv_path = Path('monthly_data.csv')\n", "monthly_df = pd.read_csv(stock_csv_path, index_col = 'Date')\n", - "#monthly_df_daily_returns = monthly_df.pct_change()" + "monthly_df_daily_returns = monthly_df.pct_change()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 135, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DXCBBYAMGCNPWMTMRKNEMVFCMDTHST...UNHHSYFBEIXSBUXMCOHIIRLLNTAXP
Date
3/31/2015-0.072857-0.002622-0.007578-0.018278-0.014131-0.010162-0.174545-0.0132020.009122-0.029476...0.044493-0.0276540.041155-0.0211370.0129420.070765-0.005552-0.039418-0.009434-0.039329
4/30/2015-0.012714-0.0830910.0528450.027437-0.0510640.0361860.220175-0.038242-0.045391-0.001982...-0.058247-0.089090-0.041961-0.0244920.0470960.035838-0.0610770.014524-0.031611-0.008577
5/31/20150.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
6/30/20150.025611-0.052519-0.033299-0.081165-0.085489-0.036887-0.117304-0.0324770.000349-0.005595...0.099791-0.0281470.088866-0.0811050.0849250.007206-0.141581-0.004303-0.0454770.007241
7/31/2015-0.003199-0.009813-0.0489480.0162900.0148040.035658-0.2649830.1053920.057895-0.022693...-0.0049180.0457050.0960710.0797050.0803810.0228790.042810-0.0488820.075432-0.021358
\n", + "

5 rows × 100 columns

\n", + "
" + ], + "text/plain": [ + " DXC BBY AMG CNP WMT MRK \\\n", + "Date \n", + "3/31/2015 -0.072857 -0.002622 -0.007578 -0.018278 -0.014131 -0.010162 \n", + "4/30/2015 -0.012714 -0.083091 0.052845 0.027437 -0.051064 0.036186 \n", + "5/31/2015 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "6/30/2015 0.025611 -0.052519 -0.033299 -0.081165 -0.085489 -0.036887 \n", + "7/31/2015 -0.003199 -0.009813 -0.048948 0.016290 0.014804 0.035658 \n", + "\n", + " NEM VFC MDT HST ... UNH HSY \\\n", + "Date ... \n", + "3/31/2015 -0.174545 -0.013202 0.009122 -0.029476 ... 0.044493 -0.027654 \n", + "4/30/2015 0.220175 -0.038242 -0.045391 -0.001982 ... -0.058247 -0.089090 \n", + "5/31/2015 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 \n", + "6/30/2015 -0.117304 -0.032477 0.000349 -0.005595 ... 0.099791 -0.028147 \n", + "7/31/2015 -0.264983 0.105392 0.057895 -0.022693 ... -0.004918 0.045705 \n", + "\n", + " FB EIX SBUX MCO HII RL \\\n", + "Date \n", + "3/31/2015 0.041155 -0.021137 0.012942 0.070765 -0.005552 -0.039418 \n", + "4/30/2015 -0.041961 -0.024492 0.047096 0.035838 -0.061077 0.014524 \n", + "5/31/2015 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "6/30/2015 0.088866 -0.081105 0.084925 0.007206 -0.141581 -0.004303 \n", + "7/31/2015 0.096071 0.079705 0.080381 0.022879 0.042810 -0.048882 \n", + "\n", + " LNT AXP \n", + "Date \n", + "3/31/2015 -0.009434 -0.039329 \n", + "4/30/2015 -0.031611 -0.008577 \n", + "5/31/2015 0.000000 0.000000 \n", + "6/30/2015 -0.045477 0.007241 \n", + "7/31/2015 0.075432 -0.021358 \n", + "\n", + "[5 rows x 100 columns]" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "monthly_df_daily_returns" + "monthly_df_daily_returns.head()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(59, 100)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "monthly_df_daily_returns" + "monthly_df_daily_returns.shape" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 26, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'Path' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mffm_csv_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'F-F_Research_Data_Factors.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mffm_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mffm_csv_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex_col\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'Path' is not defined" - ] + "data": { + "text/plain": [ + "(58, 100)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "ffm_csv_path = Path('F-F_Research_Data_Factors.csv')\n", - "ffm_data = pd.read_csv(ffm_csv_path, index_col='Date')\n" + "monthly_df_daily_returns.dropna(inplace = True)\n", + "monthly_df_daily_returns.shape" ] }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 136, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" + "(58, 4)" ] }, - "execution_count": 85, + "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "ffm_csv_path = Path('F-F_Research_Data_Factors.csv')\n", + "ffm_data = pd.read_csv(ffm_csv_path, index_col='Date')\n", + "ffm_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "reg = linear_model.LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], "source": [ "reg = linear_model.LinearRegression()\n", - "reg.fit(ffm_data[['SMB','HML','RF']],monthly_df.AAPL)" + "fit = reg.fit(X=ffm_data[['SMB','HML','RF']],\n", + " y=monthly_df_daily_returns.AAPL)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.00493159, -0.00134568, 0.15507276])" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fit.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [], + "source": [ + "import statsmodels.api as sm\n", + "from statsmodels.regression import linear_model as lm" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\nikki\\Anaconda3\\lib\\site-packages\\numpy\\core\\fromnumeric.py:2495: FutureWarning: Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.\n", + " return ptp(axis=axis, out=out, **kwargs)\n" + ] + } + ], + "source": [ + "results_dict = {}\n", + "params = {}\n", + "\n", + "for ticker in monthly_df_daily_returns.columns:\n", + " results_dict[ticker] = lm.OLS(endog=monthly_df_daily_returns.loc[:,ticker],\n", + " exog=sm.add_constant(ffm_data[['SMB','HML','RF']])).fit()\n", + " \n", + " params[ticker] = results_dict[ticker].params" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [], + "source": [ + "temp_var = results_dict['AAPL']" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [], + "source": [ + "# params" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "ffm_data.index = pd.DatetimeIndex(ffm_data.index)" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [], + "source": [ + "monthly_df_daily_returns.index = pd.DatetimeIndex(monthly_df_daily_returns.index)" ] }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 144, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([ 2.32782370e-01, -2.62080002e-02, 5.28665463e+02])" + "DatetimeIndex(['2015-03-31', '2015-04-30', '2015-05-31', '2015-06-30',\n", + " '2015-07-31', '2015-08-31', '2015-09-30', '2015-10-31',\n", + " '2015-11-30', '2015-12-31', '2016-01-31', '2016-02-29',\n", + " '2016-03-31', '2016-04-30', '2016-05-31', '2016-06-30',\n", + " '2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',\n", + " '2016-11-30', '2016-12-31', '2017-01-31', '2017-02-28',\n", + " '2017-03-31', '2017-04-30', '2017-05-31', '2017-06-30',\n", + " '2017-07-31', '2017-08-31', '2017-09-30', '2017-10-31',\n", + " '2017-11-30', '2017-12-31', '2018-01-31', '2018-02-28',\n", + " '2018-03-31', '2018-04-30', '2018-05-31', '2018-06-30',\n", + " '2018-07-31', '2018-08-31', '2018-09-30', '2018-10-31',\n", + " '2018-11-30', '2018-12-31', '2019-01-31', '2019-02-28',\n", + " '2019-03-31', '2019-04-30', '2019-05-31', '2019-06-30',\n", + " '2019-07-31', '2019-08-31', '2019-09-30', '2019-10-31',\n", + " '2019-11-30', '2019-12-31'],\n", + " dtype='datetime64[ns]', name='Date', freq=None)" ] }, - "execution_count": 86, + "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "reg.coef_" + "monthly_df_daily_returns.index" ] }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 145, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "105.87697212512981" + "DatetimeIndex(['2015-03-31', '2015-04-30', '2015-05-31', '2015-06-30',\n", + " '2015-07-31', '2015-08-31', '2015-09-30', '2015-10-31',\n", + " '2015-11-30', '2015-12-31', '2016-01-31', '2016-02-29',\n", + " '2016-03-31', '2016-04-30', '2016-05-31', '2016-06-30',\n", + " '2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',\n", + " '2016-11-30', '2016-12-31', '2017-01-31', '2017-02-28',\n", + " '2017-03-31', '2017-04-30', '2017-05-31', '2017-06-30',\n", + " '2017-07-31', '2017-08-31', '2017-09-30', '2017-10-31',\n", + " '2017-11-30', '2017-12-31', '2018-01-31', '2018-02-28',\n", + " '2018-03-31', '2018-04-30', '2018-05-31', '2018-06-30',\n", + " '2018-07-31', '2018-08-31', '2018-09-30', '2018-10-31',\n", + " '2018-11-30', '2018-12-31', '2019-01-31', '2019-02-28',\n", + " '2019-03-31', '2019-04-30', '2019-05-31', '2019-06-30',\n", + " '2019-07-31', '2019-08-31', '2019-09-30', '2019-10-31',\n", + " '2019-11-30', '2019-12-31'],\n", + " dtype='datetime64[ns]', name='Date', freq=None)" ] }, - "execution_count": 87, + "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "reg.intercept_" + "ffm_data.index" ] }, { @@ -148,7 +535,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.5" + "version": "3.7.4" } }, "nbformat": 4,