diff --git a/EDA.ipynb b/EDA.ipynb
new file mode 100644
index 0000000..d9a9764
--- /dev/null
+++ b/EDA.ipynb
@@ -0,0 +1,373 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c27d092",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "594e37d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train = pd.read_csv('data/returns_train.csv', index_col='month_end')\n",
+    "df_train.sort_index()\n",
+    "df_test = pd.read_csv('data/returns_test.csv', index_col='month_end')\n",
+    "df_test.sort_index();"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab9a7f05",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_all = pd.concat([df_train, df_test])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23429d59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f59d07e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_all"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f45c8e11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stocks = df_train.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c19773d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(1 + df_train.Stock1).cumprod().plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e173c714",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(df_all.columns)\n",
+    "1/54"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adeead11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for stock in stocks:\n",
+    "    ax = (1 + df_all[stock]).cumprod().plot()\n",
+    "ax.axvline(len(df_train), c='r')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "876417ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(1 + df_all.Stock66).cumprod().plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2604ffcd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(1 + df_all.Stock54).cumprod().plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a5291f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1e14611",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(1 + df_all).cumprod().loc[['2017-09-30']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a079265d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_all.Stock54.iloc[-13:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "602b26ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pypfopt.expected_returns import mean_historical_return\n",
+    "from pypfopt.risk_models import CovarianceShrinkage\n",
+    "df_train_cum = (df_train + 1).cumprod()\n",
+    "mu = mean_historical_return(df_train_cum)\n",
+    "S = CovarianceShrinkage(df_train_cum).ledoit_wolf()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "893784c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22396233",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pypfopt.efficient_frontier import EfficientFrontier\n",
+    "ef = EfficientFrontier(mu, S, weight_bounds=(0,0.1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2ba7e90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "weights = ef.max_sharpe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9524682f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(weights.keys)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc685705",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def dict_to_df(dic):\n",
+    "    new_dic = {}\n",
+    "    for key in dic:\n",
+    "        new_dic[key] = [dic[key]]\n",
+    "\n",
+    "    return pd.DataFrame(new_dic)\n",
+    "dict_to_df(weights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f4a2cf47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(1.5)**(1/(4*12))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d147a818",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(1.07)**(1/12) - 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b2e99242",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import normaltest\n",
+    "\n",
+    "for i in range(len(df_all)):\n",
+    "    print(normaltest(df_all[stocks[i]])[1])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f64a8ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_all[stocks[7]].hist(bins=30)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc03e60d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(df_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e13378b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scatter_data = []\n",
+    "for stock in stocks:\n",
+    "    series = df_all[stock]\n",
+    "    for i in range(len(series) - 1):\n",
+    "        scatter_data.append([series[i], series[i + 1]])\n",
+    "x = [i[0] for i in scatter_data]\n",
+    "y = [i[1] for i in scatter_data]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "636e23ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import linregress\n",
+    "linregress(x, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe3791a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import pearsonr\n",
+    "pearsonr(x, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "874962a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len({'a': 1, 'b': 5})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4fd8af38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.scatter([i[0] for i in scatter_data], [i[1] for i in scatter_data])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "78714014",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pypfopt.risk_models import CovarianceShrinkage, semicovariance, risk_matrix\n",
+    "risk_df = risk_matrix((1 + df_train).cumprod(), method='sample_cov')\n",
+    "\n",
+    "for i in range(len(risk_df)):\n",
+    "    for j in range(len(risk_df)):\n",
+    "        risk_df.iloc[i, j] /= (risk_df.iloc[i, i]*risk_df.iloc[j, j])**0.5\n",
+    "        \n",
+    "risk_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Final_notebook.ipynb b/Final_notebook.ipynb
new file mode 100644
index 0000000..706f2d7
--- /dev/null
+++ b/Final_notebook.ipynb
@@ -0,0 +1,230 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec2209b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "5# %%\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import datetime\n",
+    "import plotly.express as px\n",
+    "from pypfopt.efficient_frontier import EfficientCVaR\n",
+    "from pypfopt.risk_models import risk_matrix\n",
+    "from pypfopt.expected_returns import mean_historical_return\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "print('---Python script Start---', str(datetime.datetime.now()))\n",
+    "\n",
+    "# %%\n",
+    "\n",
+    "# data reads\n",
+    "df_returns_train = pd.read_csv('data/returns_train.csv')\n",
+    "df_returns_test = pd.read_csv('data/returns_test.csv')\n",
+    "df_returns_train['month_end'] = pd.to_datetime(arg=df_returns_train['month_end']).apply(lambda d: d.date())\n",
+    "df_returns_test['month_end'] = pd.to_datetime(arg=df_returns_test['month_end']).apply(lambda d: d.date())\n",
+    "\n",
+    "# %%\n",
+    "\n",
+    "def equalise_weights(df: pd.DataFrame):\n",
+    "\n",
+    "    '''\n",
+    "        Function to generate the equal weights, i.e. 1/p for each active stock within a month\n",
+    "\n",
+    "        Args:\n",
+    "            df: A return data frame. First column is month end and remaining columns are stocks\n",
+    "\n",
+    "        Returns:\n",
+    "            A dataframe of the same dimension but with values 1/p on active funds within a month\n",
+    "\n",
+    "    '''\n",
+    "\n",
+    "    # create df to house weights\n",
+    "    n_length = len(df)\n",
+    "    df_returns = df\n",
+    "    df_weights = df_returns[:n_length].copy()\n",
+    "    df_weights.set_index('month_end', inplace=True)\n",
+    "\n",
+    "    # list of stock names\n",
+    "    list_stocks = list(df_returns.columns)\n",
+    "    list_stocks.remove('month_end')\n",
+    "\n",
+    "    # assign 1/p\n",
+    "    df_weights[list_stocks] = 1/len(list_stocks)\n",
+    "\n",
+    "    return df_weights\n",
+    "\n",
+    "\n",
+    "# %%\n",
+    "\n",
+    "def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):\n",
+    "\n",
+    "    '''\n",
+    "        Function to generate stocks weight allocation for time t+1 using historic data. Initial weights generated as 1/p for active stock within a month\n",
+    "\n",
+    "        Args:\n",
+    "            df_train: The training set of returns. First column is month end and remaining columns are stocks\n",
+    "            df_test: The testing set of returns. First column is month end and remaining columns are stocks\n",
+    "\n",
+    "        Returns:\n",
+    "            The returns dataframe and the weights\n",
+    "    '''\n",
+    "\n",
+    "    print('---> training set spans', df_train['month_end'].min(), df_train['month_end'].max())\n",
+    "    print('---> training set spans', df_test['month_end'].min(), df_test['month_end'].max())\n",
+    "\n",
+    "    # initialise data\n",
+    "    n_train = len(df_train)\n",
+    "    df_returns = pd.concat(objs=[df_train, df_test], ignore_index=True)\n",
+    "\n",
+    "    df_weights = equalise_weights(df_returns[:n_train]) # df to store weights and create initial\n",
+    "\n",
+    "    # list of stock names\n",
+    "    list_stocks = list(df_returns.columns)\n",
+    "    list_stocks.remove('month_end')\n",
+    "\n",
+    "    # <<--------------------- YOUR CODE GOES BELOW THIS LINE --------------------->>\n",
+    "\n",
+    "    # This is your playground. Delete/modify any of the code here and replace with \n",
+    "    # your methodology. Below we provide a simple, naive estimation to illustrate \n",
+    "    # how we think you should go about structuring your submission and your comments:\n",
+    "\n",
+    "    # We use a static Inverse Volatility Weighting (https://en.wikipedia.org/wiki/Inverse-variance_weighting) \n",
+    "    # strategy to generate portfolio weights.\n",
+    "    # Use the latest available data at that point in time\n",
+    "    \n",
+    "    # It's simpler to write our own than figure out what's going\n",
+    "    # wrong with pandas's inbuilt dict_to_df\n",
+    "    def dict_to_df(dic):\n",
+    "        new_dic = {}\n",
+    "        \n",
+    "        for key in dic:\n",
+    "            new_dic[key] = [dic[key]]\n",
+    "\n",
+    "        return pd.DataFrame(new_dic)\n",
+    "    \n",
+    "    for i in range(len(df_test)):\n",
+    "\n",
+    "        # latest data at this point\n",
+    "        df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])]\n",
+    "                \n",
+    "        df_window = df_latest.set_index('month_end').iloc[-5*12:] # We only use the last 5 years in our analysis\n",
+    "        \n",
+    "        df_cum = (1 + df_window).cumprod() # cumulative returns\n",
+    "        \n",
+    "        # constants\n",
+    "        TARGET_RETURN = 0.006\n",
+    "        LOWER_BOUND = 0 # No diversification. divesification bad!\n",
+    "        UPPER_BOUND = 0.1 # Forced diversification. Me angry!\n",
+    "        \n",
+    "        mu = mean_historical_return(df_cum)\n",
+    "        S = risk_matrix(df_cum, method='sample_cov') # Simple sample covariance works best\n",
+    "        \n",
+    "        ef = EfficientCVaR(mu, df_cum, weight_bounds=(LOWER_BOUND, UPPER_BOUND))\n",
+    "        \n",
+    "        weights = ef.efficient_return(TARGET_RETURN)\n",
+    "        df_w =  dict_to_df(weights)\n",
+    "\n",
+    "        # add to all weights\n",
+    "        df_weights = pd.concat(objs=[df_weights, df_w], ignore_index=True)\n",
+    "    \n",
+    "    # <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->>\n",
+    "    \n",
+    "    # 10% limit check\n",
+    "    if len(np.array(df_weights[list_stocks])[np.array(df_weights[list_stocks]) > 0.101]):\n",
+    "\n",
+    "        raise Exception(r'---> 10% limit exceeded')\n",
+    "\n",
+    "    return df_returns, df_weights\n",
+    "\n",
+    "\n",
+    "# %%\n",
+    "\n",
+    "\n",
+    "def plot_total_return(df_returns: pd.DataFrame, df_weights_index: pd.DataFrame, df_weights_portfolio: pd.DataFrame):\n",
+    "\n",
+    "    '''\n",
+    "        Function to generate the two total return indices.\n",
+    "\n",
+    "        Args:\n",
+    "            df_returns: Ascending date ordered combined training and test returns data.\n",
+    "            df_weights_index: Index weights. Equally weighted\n",
+    "            df_weights_index: Portfolio weights. Your portfolio should use equally weighted for the training date range. If blank will be ignored\n",
+    "\n",
+    "        Returns:\n",
+    "            A plot of the two total return indices and the total return indices as a dataframe\n",
+    "    '''\n",
+    "\n",
+    "    # list of stock names\n",
+    "    list_stocks = list(df_returns.columns)\n",
+    "    list_stocks.remove('month_end')\n",
+    "\n",
+    "    # replace nans with 0 in return array\n",
+    "    ar_returns = np.array(df_returns[list_stocks])\n",
+    "    np.nan_to_num(x=ar_returns, copy=False, nan=0)\n",
+    "\n",
+    "    # calc index\n",
+    "    ar_rtn_index = np.array(df_weights_index[list_stocks])*ar_returns\n",
+    "    ar_rtn_port = np.array(df_weights_portfolio[list_stocks])*ar_returns\n",
+    "\n",
+    "    v_rtn_index = np.sum(ar_rtn_index, axis=1)\n",
+    "    v_rtn_port = np.sum(ar_rtn_port, axis=1)\n",
+    "\n",
+    "    # add return series to dataframe\n",
+    "    df_rtn = pd.DataFrame(data=df_returns['month_end'], columns=['month_end'])\n",
+    "    df_rtn['index'] = v_rtn_index\n",
+    "    df_rtn['portfolio'] = v_rtn_port\n",
+    "    df_rtn\n",
+    "\n",
+    "    # create total return\n",
+    "    base_price = 100\n",
+    "    df_rtn.sort_values(by = 'month_end', inplace = True)\n",
+    "    df_rtn['index_tr'] = ((1 + df_rtn['index']).cumprod()) * base_price\n",
+    "    df_rtn['portfolio_tr'] = ((1 + df_rtn['portfolio']).cumprod()) * base_price\n",
+    "    df_rtn\n",
+    "\n",
+    "    df_rtn_long = df_rtn[['month_end', 'index_tr', 'portfolio_tr']].melt(id_vars='month_end', var_name='series', value_name='Total Return')\n",
+    "\n",
+    "    # plot\n",
+    "    fig1 = px.line(data_frame=df_rtn_long, x='month_end', y='Total Return', color='series')\n",
+    "\n",
+    "    return fig1, df_rtn\n",
+    "\n",
+    "# %%\n",
+    "\n",
+    "# running solution\n",
+    "df_returns = pd.concat(objs=[df_returns_train, df_returns_test], ignore_index=True)\n",
+    "df_weights_index = equalise_weights(df_returns)\n",
+    "df_returns, df_weights_portfolio = generate_portfolio(df_returns_train, df_returns_test)\n",
+    "fig1, df_rtn = plot_total_return(df_returns, df_weights_index=df_weights_index, df_weights_portfolio=df_weights_portfolio)\n",
+    "fig1\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Write up.pdf b/Write up.pdf
new file mode 100644
index 0000000..07c9566
Binary files /dev/null and b/Write up.pdf differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..852e649
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+pyportfolioopt==1.5.5
+
diff --git a/solution_skeleton.py b/solution_skeleton.py
index 7fc2fa5..a791924 100644
--- a/solution_skeleton.py
+++ b/solution_skeleton.py
@@ -4,6 +4,13 @@
 import datetime
 import plotly.express as px
 
+# Use the pypfopt library
+from pypfopt.efficient_frontier import EfficientCVaR
+from pypfopt.risk_models import risk_matrix
+from pypfopt.expected_returns import mean_historical_return
+
+
+
 
 print('---Python script Start---', str(datetime.datetime.now()))
 
@@ -84,22 +91,40 @@ def generate_portfolio(df_train: pd.DataFrame, df_test: pd.DataFrame):
     # strategy to generate portfolio weights.
     # Use the latest available data at that point in time
     
+    # It's simpler to write our own than figure out what's going
+    # wrong with pandas's inbuilt dict_to_df
+    def dict_to_df(dic):
+        new_dic = {}
+        
+        for key in dic:
+            new_dic[key] = [dic[key]]
+
+        return pd.DataFrame(new_dic)
+    
     for i in range(len(df_test)):
 
         # latest data at this point
         df_latest = df_returns[(df_returns['month_end'] < df_test.loc[i, 'month_end'])]
                 
-        # vol calc
-        df_w = pd.DataFrame()
-        df_w['vol'] = df_latest.std(numeric_only=True)          # calculate stock volatility
-        df_w['inv_vol'] = 1/df_w['vol']                         # calculate the inverse volatility
-        df_w['tot_inv_vol'] = df_w['inv_vol'].sum()             # calculate the total inverse volatility
-        df_w['weight'] = df_w['inv_vol']/df_w['tot_inv_vol']    # calculate weight based on inverse volatility
-        df_w.reset_index(inplace=True, names='name')
+        df_window = df_latest.set_index('month_end').iloc[-5*12:] # We only use the last 5 years in our analysis
+        
+        df_cum = (1 + df_window).cumprod() # cumulative returns
+        
+        # constants
+        TARGET_RETURN = 0.006
+        LOWER_BOUND = 0 # No diversification. divesification bad!
+        UPPER_BOUND = 0.1 # Forced diversification. Me angry!
+        
+        mu = mean_historical_return(df_cum)
+        S = risk_matrix(df_cum, method='sample_cov') # Simple sample covariance works best
+        
+        ef = EfficientCVaR(mu, df_cum, weight_bounds=(LOWER_BOUND, UPPER_BOUND))
+        
+        weights = ef.efficient_return(TARGET_RETURN)
+        df_w =  dict_to_df(weights)
 
         # add to all weights
-        df_this = pd.DataFrame(data=[[df_test.loc[i, 'month_end']] + df_w['weight'].to_list()], columns=df_latest.columns)
-        df_weights = pd.concat(objs=[df_weights, df_this], ignore_index=True)
+        df_weights = pd.concat(objs=[df_weights, df_w], ignore_index=True)
     
     # <<--------------------- YOUR CODE GOES ABOVE THIS LINE --------------------->>