draft streamlit dashboard

Signed-off-by: ivelin <[email protected]>
ivelin · Feb 6, 2024 · 5e026b3 · 5e026b3
2 parents b974fb8 + 23cb488
commit 5e026b3
Show file tree

Hide file tree

Showing 12 changed files with 460 additions and 414 deletions.
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.11
diff --git a/model_sandbox.ipynb b/model_sandbox.ipynb
diff --git a/prepare_data.ipynb b/prepare_data.ipynb
@@ -16700,7 +16700,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -16712,6 +16712,9 @@
     }
    ],
    "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "from dotenv import load_dotenv\n",
     "from huggingface_hub import snapshot_download, upload_folder, create_repo\n",
     "from canswim.hfhub import HFHub\n",
     "\n",
@@ -16728,7 +16731,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -16738,13 +16741,111 @@
       "repo_info:  https://huggingface.co/datasets/ivelin/canswim\n"
      ]
     },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3ca9cffe200149f794687b4463cf861b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "analyst_estimates_annual.parquet:   0%|          | 0.00/6.43M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "25a9635df9ea4bd3a1796e1813fd55e2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "earnings_calendar.parquet:   0%|          | 0.00/3.97M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "501e4047846448d68eaaf91dd05b123d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "all_stocks_price_hist_1d.parquet:   0%|          | 0.00/337M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "089919302969411d87903b15baf1ab6a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "analyst_estimates_quarter.parquet:   0%|          | 0.00/20.3M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b1c7531f650d4d40a889163c4e04e3c2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload 6 LFS files:   0%|          | 0/6 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "13856ae3f48641808d964cb225ad8f0e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "institutional_symbol_ownership.parquet:   0%|          | 0.00/15.2M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0580cfce57f148e2a8753a7c2d9d0444",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "keymetrics_history.parquet:   0%|          | 0.00/95.5M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "text/plain": [
-       "'https://huggingface.co/datasets/ivelin/canswim/tree/main/data-3rd-party'"
+       "'https://huggingface.co/datasets/ivelin/canswim/tree/main/'"
       ]
      },
-     "execution_count": 129,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -16755,10 +16856,10 @@
     "    repo_id=repo_id, repo_type=\"dataset\", private=private, exist_ok=True, token=HF_TOKEN\n",
     ")\n",
     "print(f\"repo_info: \", repo_info)\n",
-    "data_path = Path(\"data/data-3rd-party\")\n",
+    "data_path = Path(\"data\")\n",
     "upload_folder(\n",
     "    repo_id=repo_id,\n",
-    "    path_in_repo=\"data-3rd-party\",\n",
+    "    # path_in_repo=\"data-3rd-party\",\n",
     "    repo_type=\"dataset\",\n",
     "    folder_path=data_path,\n",
     "    token=HF_TOKEN,\n",

diff --git a/setup.py b/setup.py
@@ -7,9 +7,9 @@
 
 # Always prefer setuptools over distutils
 from setuptools import setup  # , find_packages
-import pathlib
+# import pathlib
 
-here = pathlib.Path(__file__).parent.resolve()
+# here = pathlib.Path(__file__).parent.resolve()
 
 # Get the long description from the README file
 # long_description = (here / "README.md").read_text(encoding="utf-8")

diff --git a/src/canswim/__init__.py b/src/canswim/__init__.py
@@ -13,4 +13,3 @@
    See the License for the specific language governing permissions and
    limitations under the License.
 """
-
diff --git a/src/canswim/covariates.py b/src/canswim/covariates.py
@@ -298,6 +298,7 @@ def prepare_key_metrics(self, stock_price_series=None):
                 kms_df = kms_df.dropna()
                 # print("kms_df\n", kms_df[kms_df.isnull()])
                 assert not kms_df.isnull().values.any()
+                assert len(kms_df) > 0, f"No key metrics available for {t}"
                 # print(f'{t} earnings: \n{t_kms.columns}')
                 kms_df = self.df_index_to_biz_days(kms_df)
                 tkms_series_tmp = TimeSeries.from_dataframe(
@@ -315,6 +316,8 @@ def prepare_key_metrics(self, stock_price_series=None):
                 t_kms_series[t] = kms_ser_padded
             except KeyError as e:
                 print(f"Skipping {t} due to error: ", e)
+            except AssertionError as e:
+                print(f"Skipping {t} due to error: ", e)
         # print("t_kms_series:", t_kms_series)
         return t_kms_series
 
@@ -449,8 +452,8 @@ def load_estimates(self):
             assert est_loaded_df.index.is_unique
             # print(f'{period} estimates loaded: \n{est_loaded_df}')
             # est_loaded_df["date"] = pd.to_datetime(est_loaded_df["date"])
-            # est_unique = est_loaded_df.drop_duplicates(subset=["symbol", "date"])
-            assert not est_loaded_df.duplicated().any()
+            est_unique = est_loaded_df.drop_duplicates()  # subset=["symbol", "date"])
+            assert not est_unique.duplicated().any()
             # est_unique = est_unique.set_index(keys=["symbol", "date"])
             assert est_loaded_df.index.has_duplicates == False
             assert est_loaded_df.index.is_unique == True
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,4 +13,3 @@
		See the License for the specific language governing permissions and
		limitations under the License.
		"""