Skip to content

Commit

Permalink
draft streamlit dashboard
Browse files Browse the repository at this point in the history
Signed-off-by: ivelin <[email protected]>
  • Loading branch information
ivelin committed Feb 6, 2024
2 parents b974fb8 + 23cb488 commit 5e026b3
Show file tree
Hide file tree
Showing 12 changed files with 460 additions and 414 deletions.
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11
255 changes: 86 additions & 169 deletions model_sandbox.ipynb

Large diffs are not rendered by default.

113 changes: 107 additions & 6 deletions prepare_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16700,7 +16700,7 @@
},
{
"cell_type": "code",
"execution_count": 124,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -16712,6 +16712,9 @@
}
],
"source": [
"import os\n",
"from pathlib import Path\n",
"from dotenv import load_dotenv\n",
"from huggingface_hub import snapshot_download, upload_folder, create_repo\n",
"from canswim.hfhub import HFHub\n",
"\n",
Expand All @@ -16728,7 +16731,7 @@
},
{
"cell_type": "code",
"execution_count": 129,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -16738,13 +16741,111 @@
"repo_info: https://huggingface.co/datasets/ivelin/canswim\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3ca9cffe200149f794687b4463cf861b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"analyst_estimates_annual.parquet: 0%| | 0.00/6.43M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "25a9635df9ea4bd3a1796e1813fd55e2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"earnings_calendar.parquet: 0%| | 0.00/3.97M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "501e4047846448d68eaaf91dd05b123d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"all_stocks_price_hist_1d.parquet: 0%| | 0.00/337M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "089919302969411d87903b15baf1ab6a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"analyst_estimates_quarter.parquet: 0%| | 0.00/20.3M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b1c7531f650d4d40a889163c4e04e3c2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload 6 LFS files: 0%| | 0/6 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "13856ae3f48641808d964cb225ad8f0e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"institutional_symbol_ownership.parquet: 0%| | 0.00/15.2M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0580cfce57f148e2a8753a7c2d9d0444",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"keymetrics_history.parquet: 0%| | 0.00/95.5M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'https://huggingface.co/datasets/ivelin/canswim/tree/main/data-3rd-party'"
"'https://huggingface.co/datasets/ivelin/canswim/tree/main/'"
]
},
"execution_count": 129,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -16755,10 +16856,10 @@
" repo_id=repo_id, repo_type=\"dataset\", private=private, exist_ok=True, token=HF_TOKEN\n",
")\n",
"print(f\"repo_info: \", repo_info)\n",
"data_path = Path(\"data/data-3rd-party\")\n",
"data_path = Path(\"data\")\n",
"upload_folder(\n",
" repo_id=repo_id,\n",
" path_in_repo=\"data-3rd-party\",\n",
" # path_in_repo=\"data-3rd-party\",\n",
" repo_type=\"dataset\",\n",
" folder_path=data_path,\n",
" token=HF_TOKEN,\n",
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

# Always prefer setuptools over distutils
from setuptools import setup # , find_packages
import pathlib
# import pathlib

here = pathlib.Path(__file__).parent.resolve()
# here = pathlib.Path(__file__).parent.resolve()

# Get the long description from the README file
# long_description = (here / "README.md").read_text(encoding="utf-8")
Expand Down
1 change: 0 additions & 1 deletion src/canswim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@
See the License for the specific language governing permissions and
limitations under the License.
"""

7 changes: 5 additions & 2 deletions src/canswim/covariates.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ def prepare_key_metrics(self, stock_price_series=None):
kms_df = kms_df.dropna()
# print("kms_df\n", kms_df[kms_df.isnull()])
assert not kms_df.isnull().values.any()
assert len(kms_df) > 0, f"No key metrics available for {t}"
# print(f'{t} earnings: \n{t_kms.columns}')
kms_df = self.df_index_to_biz_days(kms_df)
tkms_series_tmp = TimeSeries.from_dataframe(
Expand All @@ -315,6 +316,8 @@ def prepare_key_metrics(self, stock_price_series=None):
t_kms_series[t] = kms_ser_padded
except KeyError as e:
print(f"Skipping {t} due to error: ", e)
except AssertionError as e:
print(f"Skipping {t} due to error: ", e)
# print("t_kms_series:", t_kms_series)
return t_kms_series

Expand Down Expand Up @@ -449,8 +452,8 @@ def load_estimates(self):
assert est_loaded_df.index.is_unique
# print(f'{period} estimates loaded: \n{est_loaded_df}')
# est_loaded_df["date"] = pd.to_datetime(est_loaded_df["date"])
# est_unique = est_loaded_df.drop_duplicates(subset=["symbol", "date"])
assert not est_loaded_df.duplicated().any()
est_unique = est_loaded_df.drop_duplicates() # subset=["symbol", "date"])
assert not est_unique.duplicated().any()
# est_unique = est_unique.set_index(keys=["symbol", "date"])
assert est_loaded_df.index.has_duplicates == False
assert est_loaded_df.index.is_unique == True
Expand Down
Loading

0 comments on commit 5e026b3

Please sign in to comment.