Skip to content

Commit

Permalink
dashboard update
Browse files Browse the repository at this point in the history
Signed-off-by: ivelin <[email protected]>
  • Loading branch information
ivelin committed Feb 21, 2024
1 parent 7b518b6 commit f27428e
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 107 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ tmp/**
.vscode
canswim_model.pt*
build/**
dist/**
11 changes: 11 additions & 0 deletions forecast.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/bash
echo "Running forecast for multiple periods"
set exv


./canswim.sh forecast
./canswim.sh forecast --forecast_start_date "2023-11-18"
./canswim.sh forecast --forecast_start_date "2023-12-02"
./canswim.sh forecast --forecast_start_date "2023-12-16"
./canswim.sh forecast --forecast_start_date "2024-01-13"
./canswim.sh forecast --forecast_start_date "2024-01-27"
39 changes: 18 additions & 21 deletions gather_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,10 @@
"source": [
"all_stock_set = set()\n",
"stock_files = [\n",
" 'IBD50.csv', \n",
" 'IBD250.csv', \n",
" 'ibdlive_picks.csv', \n",
" 'russell2000_iwm_holdings.csv', \n",
" 'IBD50.csv',\n",
" 'IBD250.csv',\n",
" 'ibdlive_picks.csv',\n",
" 'russell2000_iwm_holdings.csv',\n",
" 'sp500_ivv_holdings.csv',\n",
" 'nasdaq100_cndx_holdings.csv',\n",
" all_stocks_file\n",
Expand Down Expand Up @@ -1407,7 +1407,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Capture S&P500, NASDAQ100 and Russell 200 indecies and their equal weighted counter parts\n",
"# Capture S&P500, NASDAQ100 and Russell 200 indexes and their equal weighted counter parts\n",
"# As well as VIX volatility index, DYX US Dollar index, TNX US 12 Weeks Treasury Yield, 5 Years Treasury Yield and 10 Year Treasuries Yield\n",
"broad_market_indicies = '^SPX ^SPXEW ^NDX ^NDXE ^RUT ^R2ESC ^VIX DX-Y.NYB ^IRX ^FVX ^TNX'"
]
Expand Down Expand Up @@ -1844,7 +1844,7 @@
}
],
"source": [
"broad_market = yf.download(broad_market_indicies, period='max', group_by='tickers') \n",
"broad_market = yf.download(broad_market_indicies, period='max', group_by='tickers')\n",
"broad_market"
]
},
Expand Down Expand Up @@ -2768,7 +2768,7 @@
],
"source": [
"import yfinance as yf\n",
"sectors = yf.download(sector_indicies, period='max', group_by='tickers') \n",
"sectors = yf.download(sector_indicies, period='max', group_by='tickers')\n",
"sectors"
]
},
Expand Down Expand Up @@ -3727,7 +3727,7 @@
}
],
"source": [
"stock_price_data = yf.download(all_stock_set, period='max', group_by='tickers', interval=price_interval) \n",
"stock_price_data = yf.download(all_stock_set, period='max', group_by='tickers', interval=price_interval)\n",
"stock_price_data"
]
},
Expand Down Expand Up @@ -6491,7 +6491,7 @@
],
"source": [
"earnings_all_df = pd.DataFrame()\n",
"for ticker in stocks_ticker_set: # ['AAON']: # \n",
"for ticker in stocks_ticker_set: # ['AAON']: #\n",
" earnings = fmpsdk.historical_earning_calendar(apikey=FMP_API_KEY, symbol=ticker, limit=-1)\n",
" if earnings is not None and len(earnings) > 0:\n",
" edf = pd.DataFrame(earnings)\n",
Expand Down Expand Up @@ -11685,8 +11685,8 @@
"\n",
"\n",
"def institutional_symbol_ownership(\n",
" apikey: str, \n",
" symbol: str, \n",
" apikey: str,\n",
" symbol: str,\n",
" limit: int,\n",
" includeCurrentQuarter: bool = False,\n",
") -> typing.Optional[typing.List[typing.Dict]]:\n",
Expand Down Expand Up @@ -16132,8 +16132,8 @@
"\n",
"\n",
"def analyst_estimates(\n",
" apikey: str, \n",
" symbol: str, \n",
" apikey: str,\n",
" symbol: str,\n",
" period: str = \"annual\",\n",
" limit: int = DEFAULT_LIMIT\n",
") -> typing.Optional[typing.List[typing.Dict]]:\n",
Expand All @@ -16153,8 +16153,7 @@
" \"period\": __validate_period(value=period),\n",
" \"limit\": limit,\n",
" }\n",
" return __return_json_v3(path=path, query_vars=query_vars)\n",
"\n"
" return __return_json_v3(path=path, query_vars=query_vars)\n"
]
},
{
Expand All @@ -16166,7 +16165,7 @@
"def fetch_estimates(period=None):\n",
" assert period in ['quarter', 'annual']\n",
" estimates_all_df = pd.DataFrame()\n",
" for ticker in stocks_ticker_set: # ['ALTR']: \n",
" for ticker in stocks_ticker_set: # ['ALTR']:\n",
" est = analyst_estimates(apikey=FMP_API_KEY, symbol=ticker, period=period, limit=-1)\n",
" # print('est:', est)\n",
" if est is not None and len(est) > 0:\n",
Expand All @@ -16181,8 +16180,7 @@
" else:\n",
" print(f\"No {ticker} {period} analyst estimates reports: est={est}\")\n",
"\n",
" return estimates_all_df\n",
"\n"
" return estimates_all_df\n"
]
},
{
Expand Down Expand Up @@ -16214,11 +16212,10 @@
" estimates_all_df = fetch_estimates(p)\n",
" estimates_all_df = estimates_all_df.sort_index()\n",
" estimates_all_df.index.names = [\"Symbol\", \"Date\"]\n",
" # est_file_name= f'data/analyst_estimates_{p}.csv.bz2' \n",
" # est_file_name= f'data/analyst_estimates_{p}.csv.bz2'\n",
" # estimates_all_df.to_csv(est_file_name)\n",
" estimates_all_df.to_parquet(est_file_name)\n",
" print(f'all {p} estimates count:', len(estimates_all_df.index))\n",
" "
" print(f'all {p} estimates count:', len(estimates_all_df.index))\n"
]
},
{
Expand Down
13 changes: 13 additions & 0 deletions publish.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/bash
echo "Packaging canswim and publishing to PyPI repo"
set exv

rm -r dist/

# test repo publish
python3 -m build
python3 -m twine upload --verbose --repository testpypi dist/*
python3 -m pip install --index-url https://test.pypi.org/simple/ --no-deps canswim

# proper repo publish
python3 -m twine upload --verbose dist/*
10 changes: 5 additions & 5 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[metadata]
name = canswim
version = 0.0.2
version = 0.0.4
author = Ivelin Ivanov
author_email = [email protected]
description = Developer toolkit for IBD CANSLIM style investors
long_description = file: README.md, LICENSE.txt
keywords = one, two
description = "Developer toolkit for IBD CANSLIM style investors"
long_description = file: README.md
keywords = stock market, analytics
license = Apache-2.0
classifiers =
# Framework :: Django
Expand Down Expand Up @@ -35,7 +35,7 @@ install_requires =


[options.package_data]
* = *.txt, *.rst
* = *.txt, *.rst, *.md
# hello = *.msg

[options.entry_points]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
#
# This field corresponds to the "Description-Content-Type" metadata field:
# https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
# long_description_content_type="text/markdown", # Optional (see note above)
long_description_content_type="text/markdown", # Optional (see note above)
# This should be a valid link to your project's main homepage.
#
# This field corresponds to the "Home-Page" metadata field:
Expand Down
Empty file added src/canswim/__init__.py
Empty file.
2 changes: 1 addition & 1 deletion src/canswim/covariates.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def prepare_key_metrics(self, stock_price_series=None):
return t_kms_series

def prepare_broad_market_series(self, train_date_start=None):
logger.info("preparing past covariates: broad market indecies")
logger.info("preparing past covariates: broad market indexes")
broad_market_df = self.broad_market_df.copy()
# flatten column hierarchy so Darts can use as covariate series
broad_market_df.columns = [f"{i}_{j}" for i, j in broad_market_df.columns]
Expand Down
77 changes: 1 addition & 76 deletions src/canswim/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
from canswim.model import CanswimModel
import pandas as pd
import gradio as gr
from darts.models import ExponentialSmoothing
from darts import TimeSeries
from canswim.hfhub import HFHub
import matplotlib.pyplot as plt
import random
import pandas as pd
from pandas.tseries.offsets import BDay
from loguru import logger
from typing import Union, Optional, Sequence
import matplotlib
Expand Down Expand Up @@ -228,88 +225,17 @@ def plot_quantiles_df(

def plot_forecast(self, ticker: str = None, lowq: int = 0.2):
target = self.get_target(ticker)
# baseline_forecast, canswim_forecast = self.get_forecast(ticker)
# backtest_forecasts = self.backtest(ticker)
saved_forecast_df_list = self.get_saved_forecast(ticker=ticker)
lq = lowq / 100
fig, axes = plt.subplots(figsize=(20, 12))
target.plot(label=f"{ticker} Close actual")
# baseline_forecast.plot(label=f"{ticker} Close baseline forecast")
# canswim_forecast.plot(
# label=f"{ticker} Close CANSWIM forecast",
# low_quantile=lq,
# high_quantile=0.95,
# )
logger.info(f"Plotting saved forecast: {saved_forecast_df_list}")
if saved_forecast_df_list is not None and len(saved_forecast_df_list) > 0:
for forecast in saved_forecast_df_list:
self.plot_quantiles_df(df=forecast, low_quantile=lq, high_quantile=0.95, label=f"{ticker} Close forecast")
# for b in backtest_forecasts:
# b.plot(
# label=f"{ticker} Close CANSWIM backtest",
# low_quantile=lq,
# high_quantile=0.95,
# )
plt.legend()
return fig

def get_forecast(self, ticker: str = None):
target = self.get_target(ticker)
past_covariates = self.get_past_covariates(ticker)
future_covariates = self.get_future_covariates(ticker)
baseline_model = ExponentialSmoothing()
baseline_model.fit(target)
baseline_forecast = baseline_model.predict(
self.canswim_model.pred_horizon, num_samples=500
)
cached_canswim_pred = self.forecast_cache.get(ticker)
if cached_canswim_pred is not None:
logger.info(f"{ticker} forecast found in cache.")
canswim_forecast = cached_canswim_pred
else:
logger.info(f"{ticker} forecast not in cache. Running model predict().")
canswim_forecast = self.canswim_model.predict(
target=[target],
past_covariates=[past_covariates],
future_covariates=[future_covariates],
)[0]
self.forecast_cache[ticker] = canswim_forecast
logger.info(f"{ticker} get_forecast() finished.")
return baseline_forecast, canswim_forecast

def backtest(self, ticker: str = None):
cached_backtest = self.backtest_cache.get(ticker)
if cached_backtest is not None:
logger.info(f"{ticker} backtest found in cache.")
backtest_forecasts = cached_backtest
else:
logger.info(f"{ticker} backtest not in cache. Running model predict().")
target = self.get_target(ticker)
past_covariates = self.get_past_covariates(ticker)
future_covariates = self.get_future_covariates(ticker)
end_date = target.end_time()
earnings_df = self.canswim_model.covariates.earnings_loaded_df
logger.info("earnings_df.columns", earnings_df.columns)
mask = (earnings_df.index.get_level_values("Symbol") == ticker) & (
earnings_df.index.get_level_values("Date") < end_date - BDay(n=10)
)
earnings_dates = earnings_df.loc[mask]
logger.info(f"{ticker} earnings dates: {earnings_dates}")
earnings_dates_unique = earnings_dates.index.get_level_values(
"Date"
).unique()
assert len(earnings_dates_unique) >= 2
target1 = target.drop_after(earnings_dates_unique[-1])
target2 = target.drop_after(earnings_dates_unique[-2])
backtest_forecasts = self.canswim_model.predict(
target=[target1, target2],
past_covariates=[past_covariates, past_covariates],
future_covariates=[future_covariates, future_covariates],
)
logger.info(f"{ticker} backtest finished.\n", backtest_forecasts)
self.backtest_cache[ticker] = backtest_forecasts
return backtest_forecasts

def get_saved_forecast(self, ticker: str = None):
"""Load forecasts from storage to a list of individual forecast series with quantile sampling"""
# load parquet partition for stock
Expand All @@ -334,6 +260,7 @@ def get_saved_forecast(self, ticker: str = None):
"forecast_start_year",
"forecast_start_month",
"forecast_start_day",])
single_forecast = single_forecast.sort_index()
df_list.append(single_forecast)
return df_list

Expand Down Expand Up @@ -362,7 +289,6 @@ def main():
label="Stock Symbol",
value=random.sample(sorted_tickers, 1)[0],
)
## time = gr.Dropdown(["3 months", "6 months", "9 months", "12 months"], label="Downloads over the last...", value="12 months")
lowq = gr.Slider(
5,
80,
Expand All @@ -388,7 +314,6 @@ def main():
outputs=[plotComponent],
queue=False,
)
## time.change(get_forecast, [lib, time], plt, queue=False)
demo.load(
fn=canswim_playground.plot_forecast,
inputs=[tickerDropdown, lowq],
Expand Down
6 changes: 3 additions & 3 deletions src/canswim/gather_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def _gather_yfdata_date_index(self, data_file: str = None, tickers: str = None):

def gather_broad_market_data(self):
## Prepare data for broad market indicies
# Capture S&P500, NASDAQ100 and Russell 200 indecies and their equal weighted counter parts
# Capture S&P500, NASDAQ100 and Russell 200 indexes and their equal weighted counter parts
# As well as VIX volatility index, DYX US Dollar index, TNX US 12 Weeks Treasury Yield, 5 Years Treasury Yield and 10 Year Treasuries Yield
broad_market_indicies = (
"^SPX ^SPXEW ^NDX ^NDXE ^RUT ^R2ESC ^VIX DX-Y.NYB ^IRX ^FVX ^TNX"
Expand All @@ -227,9 +227,9 @@ def gather_subindustries_data(self):
"""
return # See warning message above.
"""
Gather historic price and volume data for S&P 1500 GICS subindustries indecies.
Gather historic price and volume data for S&P 1500 GICS subindustries indexes.
S&P 1500 includes S&P 400, S&P 500, S&P 600 and overall about 90% of the US stock market capitalization.
The dataset has 163 GICS sub-industry indecies active as of 2023 plus 7 that were removed in 2023.
The dataset has 163 GICS sub-industry indexes active as of 2023 plus 7 that were removed in 2023.
https://www.msci.com/documents/1296102/11185224/GICS+Map+2023.xlsx/82cc6504-9919-29e5-9789-a24fc039d0a5?t=1679087572540
The goal of these covariates is to provide the model with a more granural breakdown of stock grouping by industry.
Since stocks usually move together with their group, the model can learn the patterns how an individual stock trend
Expand Down

0 comments on commit f27428e

Please sign in to comment.