dashboard update

Signed-off-by: ivelin <[email protected]>
ivelin · Feb 21, 2024 · f27428e · f27428e
1 parent 7b518b6
commit f27428e
Show file tree

Hide file tree

Showing 10 changed files with 54 additions and 107 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,4 @@ tmp/**
 .vscode
 canswim_model.pt*
 build/**
+dist/**
diff --git a/forecast.sh b/forecast.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/bash
+echo "Running forecast for multiple periods"
+set exv
+
+
+./canswim.sh forecast
+./canswim.sh forecast --forecast_start_date "2023-11-18"
+./canswim.sh forecast --forecast_start_date "2023-12-02"
+./canswim.sh forecast --forecast_start_date "2023-12-16"
+./canswim.sh forecast --forecast_start_date "2024-01-13"
+./canswim.sh forecast --forecast_start_date "2024-01-27"
diff --git a/gather_data.ipynb b/gather_data.ipynb
@@ -240,10 +240,10 @@
    "source": [
     "all_stock_set = set()\n",
     "stock_files = [\n",
-    "    'IBD50.csv', \n",
-    "    'IBD250.csv', \n",
-    "    'ibdlive_picks.csv', \n",
-    "    'russell2000_iwm_holdings.csv', \n",
+    "    'IBD50.csv',\n",
+    "    'IBD250.csv',\n",
+    "    'ibdlive_picks.csv',\n",
+    "    'russell2000_iwm_holdings.csv',\n",
     "    'sp500_ivv_holdings.csv',\n",
     "    'nasdaq100_cndx_holdings.csv',\n",
     "    all_stocks_file\n",
@@ -1407,7 +1407,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Capture S&P500, NASDAQ100 and Russell 200 indecies and their equal weighted counter parts\n",
+    "# Capture S&P500, NASDAQ100 and Russell 200 indexes and their equal weighted counter parts\n",
     "# As well as VIX volatility index, DYX US Dollar index, TNX US 12 Weeks Treasury Yield, 5 Years Treasury Yield and 10 Year Treasuries Yield\n",
     "broad_market_indicies = '^SPX ^SPXEW ^NDX ^NDXE ^RUT ^R2ESC ^VIX DX-Y.NYB ^IRX ^FVX ^TNX'"
    ]
@@ -1844,7 +1844,7 @@
     }
    ],
    "source": [
-    "broad_market = yf.download(broad_market_indicies, period='max', group_by='tickers') \n",
+    "broad_market = yf.download(broad_market_indicies, period='max', group_by='tickers')\n",
     "broad_market"
    ]
   },
@@ -2768,7 +2768,7 @@
    ],
    "source": [
     "import yfinance as yf\n",
-    "sectors = yf.download(sector_indicies, period='max', group_by='tickers') \n",
+    "sectors = yf.download(sector_indicies, period='max', group_by='tickers')\n",
     "sectors"
    ]
   },
@@ -3727,7 +3727,7 @@
     }
    ],
    "source": [
-    "stock_price_data = yf.download(all_stock_set, period='max', group_by='tickers', interval=price_interval) \n",
+    "stock_price_data = yf.download(all_stock_set, period='max', group_by='tickers', interval=price_interval)\n",
     "stock_price_data"
    ]
   },
@@ -6491,7 +6491,7 @@
    ],
    "source": [
     "earnings_all_df = pd.DataFrame()\n",
-    "for ticker in stocks_ticker_set: # ['AAON']: # \n",
+    "for ticker in stocks_ticker_set: # ['AAON']: #\n",
     "    earnings = fmpsdk.historical_earning_calendar(apikey=FMP_API_KEY, symbol=ticker, limit=-1)\n",
     "    if earnings is not None and len(earnings) > 0:\n",
     "        edf = pd.DataFrame(earnings)\n",
@@ -11685,8 +11685,8 @@
     "\n",
     "\n",
     "def institutional_symbol_ownership(\n",
-    "    apikey: str, \n",
-    "    symbol: str, \n",
+    "    apikey: str,\n",
+    "    symbol: str,\n",
     "    limit: int,\n",
     "    includeCurrentQuarter: bool = False,\n",
     ") -> typing.Optional[typing.List[typing.Dict]]:\n",
@@ -16132,8 +16132,8 @@
     "\n",
     "\n",
     "def analyst_estimates(\n",
-    "    apikey: str, \n",
-    "    symbol: str, \n",
+    "    apikey: str,\n",
+    "    symbol: str,\n",
     "    period: str = \"annual\",\n",
     "    limit: int = DEFAULT_LIMIT\n",
     ") -> typing.Optional[typing.List[typing.Dict]]:\n",
@@ -16153,8 +16153,7 @@
     "        \"period\": __validate_period(value=period),\n",
     "        \"limit\": limit,\n",
     "    }\n",
-    "    return __return_json_v3(path=path, query_vars=query_vars)\n",
-    "\n"
+    "    return __return_json_v3(path=path, query_vars=query_vars)\n"
    ]
   },
   {
@@ -16166,7 +16165,7 @@
     "def fetch_estimates(period=None):\n",
     "    assert period in ['quarter', 'annual']\n",
     "    estimates_all_df = pd.DataFrame()\n",
-    "    for ticker in stocks_ticker_set: # ['ALTR']: \n",
+    "    for ticker in stocks_ticker_set: # ['ALTR']:\n",
     "        est = analyst_estimates(apikey=FMP_API_KEY, symbol=ticker, period=period, limit=-1)\n",
     "        # print('est:', est)\n",
     "        if est is not None and len(est) > 0:\n",
@@ -16181,8 +16180,7 @@
     "        else:\n",
     "            print(f\"No {ticker} {period} analyst estimates reports: est={est}\")\n",
     "\n",
-    "    return estimates_all_df\n",
-    "\n"
+    "    return estimates_all_df\n"
    ]
   },
   {
@@ -16214,11 +16212,10 @@
     "    estimates_all_df = fetch_estimates(p)\n",
     "    estimates_all_df = estimates_all_df.sort_index()\n",
     "    estimates_all_df.index.names = [\"Symbol\", \"Date\"]\n",
-    "    # est_file_name= f'data/analyst_estimates_{p}.csv.bz2'    \n",
+    "    # est_file_name= f'data/analyst_estimates_{p}.csv.bz2'\n",
     "    # estimates_all_df.to_csv(est_file_name)\n",
     "    estimates_all_df.to_parquet(est_file_name)\n",
-    "    print(f'all {p} estimates count:', len(estimates_all_df.index))\n",
-    "    "
+    "    print(f'all {p} estimates count:', len(estimates_all_df.index))\n"
    ]
   },
   {

diff --git a/publish.sh b/publish.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/bash
+echo "Packaging canswim and publishing to PyPI repo"
+set exv
+
+rm -r dist/
+
+# test repo publish
+python3 -m build
+python3 -m twine upload --verbose --repository testpypi dist/*
+python3 -m pip install --index-url https://test.pypi.org/simple/ --no-deps canswim
+
+# proper repo publish
+python3 -m twine upload --verbose dist/*
diff --git a/setup.cfg b/setup.cfg
@@ -1,11 +1,11 @@
 [metadata]
 name = canswim
-version = 0.0.2
+version = 0.0.4
 author = Ivelin Ivanov
 author_email = [email protected]
-description = Developer toolkit for IBD CANSLIM style investors
-long_description = file: README.md, LICENSE.txt
-keywords = one, two
+description = "Developer toolkit for IBD CANSLIM style investors"
+long_description = file: README.md
+keywords = stock market, analytics
 license = Apache-2.0
 classifiers =
     # Framework :: Django
@@ -35,7 +35,7 @@ install_requires =
 
 
 [options.package_data]
-* = *.txt, *.rst
+* = *.txt, *.rst, *.md
 # hello = *.msg
 
 [options.entry_points]

diff --git a/setup.py b/setup.py
@@ -60,7 +60,7 @@
     #
     # This field corresponds to the "Description-Content-Type" metadata field:
     # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
-    # long_description_content_type="text/markdown",  # Optional (see note above)
+    long_description_content_type="text/markdown",  # Optional (see note above)
     # This should be a valid link to your project's main homepage.
     #
     # This field corresponds to the "Home-Page" metadata field:

diff --git a/src/canswim/__init__.py b/src/canswim/__init__.py
diff --git a/src/canswim/covariates.py b/src/canswim/covariates.py
@@ -327,7 +327,7 @@ def prepare_key_metrics(self, stock_price_series=None):
         return t_kms_series
 
     def prepare_broad_market_series(self, train_date_start=None):
-        logger.info("preparing past covariates: broad market indecies")
+        logger.info("preparing past covariates: broad market indexes")
         broad_market_df = self.broad_market_df.copy()
         # flatten column hierarchy so Darts can use as covariate series
         broad_market_df.columns = [f"{i}_{j}" for i, j in broad_market_df.columns]

diff --git a/src/canswim/dashboard.py b/src/canswim/dashboard.py
@@ -3,13 +3,10 @@
 from canswim.model import CanswimModel
 import pandas as pd
 import gradio as gr
-from darts.models import ExponentialSmoothing
-from darts import TimeSeries
 from canswim.hfhub import HFHub
 import matplotlib.pyplot as plt
 import random
 import pandas as pd
-from pandas.tseries.offsets import BDay
 from loguru import logger
 from typing import Union, Optional, Sequence
 import matplotlib
@@ -228,88 +225,17 @@ def plot_quantiles_df(
 
     def plot_forecast(self, ticker: str = None, lowq: int = 0.2):
         target = self.get_target(ticker)
-        # baseline_forecast, canswim_forecast = self.get_forecast(ticker)
-        # backtest_forecasts = self.backtest(ticker)
         saved_forecast_df_list = self.get_saved_forecast(ticker=ticker)
         lq = lowq / 100
         fig, axes = plt.subplots(figsize=(20, 12))
         target.plot(label=f"{ticker} Close actual")
-        # baseline_forecast.plot(label=f"{ticker} Close baseline forecast")
-        # canswim_forecast.plot(
-        #     label=f"{ticker} Close CANSWIM forecast",
-        #     low_quantile=lq,
-        #     high_quantile=0.95,
-        # )
         logger.info(f"Plotting saved forecast: {saved_forecast_df_list}")
         if saved_forecast_df_list is not None and len(saved_forecast_df_list) > 0:
             for forecast in saved_forecast_df_list:
                 self.plot_quantiles_df(df=forecast, low_quantile=lq, high_quantile=0.95, label=f"{ticker} Close forecast")
-        # for b in backtest_forecasts:
-        #     b.plot(
-        #         label=f"{ticker} Close CANSWIM backtest",
-        #         low_quantile=lq,
-        #         high_quantile=0.95,
-        #     )
         plt.legend()
         return fig
 
-    def get_forecast(self, ticker: str = None):
-        target = self.get_target(ticker)
-        past_covariates = self.get_past_covariates(ticker)
-        future_covariates = self.get_future_covariates(ticker)
-        baseline_model = ExponentialSmoothing()
-        baseline_model.fit(target)
-        baseline_forecast = baseline_model.predict(
-            self.canswim_model.pred_horizon, num_samples=500
-        )
-        cached_canswim_pred = self.forecast_cache.get(ticker)
-        if cached_canswim_pred is not None:
-            logger.info(f"{ticker} forecast found in cache.")
-            canswim_forecast = cached_canswim_pred
-        else:
-            logger.info(f"{ticker} forecast not in cache. Running model predict().")
-            canswim_forecast = self.canswim_model.predict(
-                target=[target],
-                past_covariates=[past_covariates],
-                future_covariates=[future_covariates],
-            )[0]
-            self.forecast_cache[ticker] = canswim_forecast
-        logger.info(f"{ticker} get_forecast() finished.")
-        return baseline_forecast, canswim_forecast
-
-    def backtest(self, ticker: str = None):
-        cached_backtest = self.backtest_cache.get(ticker)
-        if cached_backtest is not None:
-            logger.info(f"{ticker} backtest found in cache.")
-            backtest_forecasts = cached_backtest
-        else:
-            logger.info(f"{ticker} backtest not in cache. Running model predict().")
-            target = self.get_target(ticker)
-            past_covariates = self.get_past_covariates(ticker)
-            future_covariates = self.get_future_covariates(ticker)
-            end_date = target.end_time()
-            earnings_df = self.canswim_model.covariates.earnings_loaded_df
-            logger.info("earnings_df.columns", earnings_df.columns)
-            mask = (earnings_df.index.get_level_values("Symbol") == ticker) & (
-                earnings_df.index.get_level_values("Date") < end_date - BDay(n=10)
-            )
-            earnings_dates = earnings_df.loc[mask]
-            logger.info(f"{ticker} earnings dates: {earnings_dates}")
-            earnings_dates_unique = earnings_dates.index.get_level_values(
-                "Date"
-            ).unique()
-            assert len(earnings_dates_unique) >= 2
-            target1 = target.drop_after(earnings_dates_unique[-1])
-            target2 = target.drop_after(earnings_dates_unique[-2])
-            backtest_forecasts = self.canswim_model.predict(
-                target=[target1, target2],
-                past_covariates=[past_covariates, past_covariates],
-                future_covariates=[future_covariates, future_covariates],
-            )
-            logger.info(f"{ticker} backtest finished.\n", backtest_forecasts)
-            self.backtest_cache[ticker] = backtest_forecasts
-        return backtest_forecasts
-
     def get_saved_forecast(self, ticker: str = None):
         """Load forecasts from storage to a list of individual forecast series with quantile sampling"""
         # load parquet partition for stock
@@ -334,6 +260,7 @@ def get_saved_forecast(self, ticker: str = None):
                         "forecast_start_year",
                         "forecast_start_month",
                         "forecast_start_day",])
+                    single_forecast = single_forecast.sort_index()
                     df_list.append(single_forecast)
         return df_list
 
@@ -362,7 +289,6 @@ def main():
                     label="Stock Symbol",
                     value=random.sample(sorted_tickers, 1)[0],
                 )
-                ## time = gr.Dropdown(["3 months", "6 months", "9 months", "12 months"], label="Downloads over the last...", value="12 months")
                 lowq = gr.Slider(
                     5,
                     80,
@@ -388,7 +314,6 @@ def main():
             outputs=[plotComponent],
             queue=False,
         )
-        ## time.change(get_forecast, [lib, time], plt, queue=False)
         demo.load(
             fn=canswim_playground.plot_forecast,
             inputs=[tickerDropdown, lowq],

diff --git a/src/canswim/gather_data.py b/src/canswim/gather_data.py
@@ -203,7 +203,7 @@ def _gather_yfdata_date_index(self, data_file: str = None, tickers: str = None):
 
     def gather_broad_market_data(self):
         ## Prepare data for broad market indicies
-        # Capture S&P500, NASDAQ100 and Russell 200 indecies and their equal weighted counter parts
+        # Capture S&P500, NASDAQ100 and Russell 200 indexes and their equal weighted counter parts
         # As well as VIX volatility index, DYX US Dollar index, TNX US 12 Weeks Treasury Yield, 5 Years Treasury Yield and 10 Year Treasuries Yield
         broad_market_indicies = (
             "^SPX ^SPXEW ^NDX ^NDXE ^RUT ^R2ESC ^VIX DX-Y.NYB ^IRX ^FVX ^TNX"
@@ -227,9 +227,9 @@ def gather_subindustries_data(self):
         """
         return  # See warning message above.
         """
-        Gather historic price and volume data for S&P 1500 GICS subindustries indecies.
+        Gather historic price and volume data for S&P 1500 GICS subindustries indexes.
         S&P 1500 includes S&P 400, S&P 500, S&P 600 and overall about 90% of the US stock market capitalization.
-        The dataset has 163 GICS sub-industry indecies active as of 2023 plus 7 that were removed in 2023.
+        The dataset has 163 GICS sub-industry indexes active as of 2023 plus 7 that were removed in 2023.
         https://www.msci.com/documents/1296102/11185224/GICS+Map+2023.xlsx/82cc6504-9919-29e5-9789-a24fc039d0a5?t=1679087572540
         The goal of these covariates is to provide the model with a more granural breakdown of stock grouping by industry.
         Since stocks usually move together with their group, the model can learn the patterns how an individual stock trend
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,4 @@ tmp/** @@
     .vscode
     canswim_model.pt*
     build/**
+    dist/**