BobSheehan23 · BobSheehan23 · Nov 11, 2025 · Nov 13, 2025
diff --git a/README.md b/README.md
@@ -29,7 +29,8 @@ LHM/
 ### Prerequisites
 - Python 3.8+
 - Jupyter Lab/Notebook
-- Required packages: pandas, numpy, matplotlib
+- Required packages: pandas, numpy, matplotlib, PyYAML
+- Optional for Parquet export: `pyarrow` or `fastparquet`
 
 ### Installation
 ```bash
@@ -73,3 +74,30 @@ This is an internal repository. Follow the established coding standards and ensu
 
 ## License
 Internal use only. All rights reserved.
+
+## FRED Data Platform
+
+The monorepo now ships with an end-to-end ingestion stack capable of
+maintaining hundreds of FRED indicators on a rolling basis.
+
+Key entry points:
+
+- `configs/fred_catalog_sources.yaml` – tag-based recipe for generating
+  category-specific indicator catalogs directly from FRED.
+- `configs/fred_series_catalog.yaml` – materialised catalog consumed by
+  the ingestion pipeline. Regenerate with `python -m lhm.catalog.generate`
+  after supplying a FRED API key.
+- `src/lhm/config/series_catalog.py` – loader utilities for the catalog
+  format.
+- `src/lhm/clients/fred_client.py` – fully functional HTTP client with
+  rate limiting, metadata retrieval, tag search, and observation pulls.
+- `src/lhm/pipelines/daily_refresh.py` – orchestrates refresh cycles,
+  persists results, and records metadata for each series.
+- `src/lhm/storage/filesystem.py` – pluggable storage backend capable of
+  writing Parquet, CSV, or JSON datasets to disk.
+- `src/lhm/cli.py` – command line interface for triggering the pipeline.
+- `src/lhm/catalog/generate.py` – helper CLI for expanding the catalog
+  using the tag configuration.
+
+Refer to `docs/fred_data_platform.md` for the detailed architecture and
+operational guidance.
diff --git a/configs/fred_catalog_sources.yaml b/configs/fred_catalog_sources.yaml
@@ -0,0 +1,39 @@
+# Mapping from stakeholder categories to FRED tags used for catalog generation.
+#
+# Each category lists one or more FRED tags. The catalog generator will pull the
+# most popular series associated with the intersection of those tags to build
+# the curated indicator list.
+categories:
+  gdp:
+    tags: [gdp]
+    limit: 80
+  labor:
+    tags: [employment, unemployment]
+    limit: 80
+  prices:
+    tags: [inflation]
+    limit: 80
+  health:
+    tags: [health]
+    limit: 80
+  money:
+    tags: [money, monetary]
+    limit: 80
+  trade:
+    tags: [trade]
+    limit: 80
+  government:
+    tags: [government]
+    limit: 80
+  business:
+    tags: [business]
+    limit: 80
+  consumer:
+    tags: [consumer]
+    limit: 80
+  housing:
+    tags: [housing]
+    limit: 80
+  taxes:
+    tags: [taxes]
+    limit: 80
diff --git a/configs/fred_series_catalog.template.yaml b/configs/fred_series_catalog.template.yaml
@@ -0,0 +1,24 @@
+# Catalog of FRED series grouped by stakeholder-defined categories.
+#
+# This template illustrates the schema produced by the automated catalog
+# generator (`python -m lhm.catalog.generate`). Regenerate
+# `configs/fred_series_catalog.yaml` from live FRED data once the tag
+# recipes in `configs/fred_catalog_sources.yaml` are finalised.
+categories:
+  gdp:
+    - series_id: GDP
+      title: Gross Domestic Product, Billions of Dollars, Quarterly, SAAR
+      frequency: Quarterly
+      units: Billions of Dollars
+      seasonal_adjustment: Seasonally Adjusted Annual Rate
+      notes: Placeholder entry demonstrating the schema; the full catalog will be curated next.
+  labor: []
+  prices: []
+  health: []
+  money: []
+  trade: []
+  government: []
+  business: []
+  consumer: []
+  housing: []
+  taxes: []
diff --git a/configs/fred_series_catalog.yaml b/configs/fred_series_catalog.yaml
@@ -0,0 +1,21 @@
+# Generated catalog placeholder. Run `python -m lhm.catalog.generate` to
+# populate this file using the tag configuration in
+# `configs/fred_catalog_sources.yaml` once a FRED API key is available.
+categories:
+  gdp:
+    - series_id: GDP
+      title: Gross Domestic Product, Billions of Dollars, Quarterly, SAAR
+      frequency: Quarterly
+      units: Billions of Dollars
+      seasonal_adjustment: Seasonally Adjusted Annual Rate
+      notes: Placeholder entry demonstrating the schema; regenerate via the catalog generator for the full list.
+  labor: []
+  prices: []
+  health: []
+  money: []
+  trade: []
+  government: []
+  business: []
+  consumer: []
+  housing: []
+  taxes: []
diff --git a/docs/fred_data_platform.md b/docs/fred_data_platform.md
@@ -0,0 +1,60 @@
+# FRED Data Platform
+
+The Lighthouse Macro ingestion stack now provides a complete workflow for
+curating, downloading, and persisting large collections of FRED series on
+an automated cadence.
+
+## Objectives
+
+1. Provide a clearly structured configuration layer where each category
+   maps to a curated list of FRED series (50-100 per category).
+2. Separate the concerns of configuration, data acquisition, storage,
+   and orchestration so that each layer can evolve independently.
+3. Maintain an automated daily refresh cadence that rehydrates recent
+   observations while respecting FRED's rate limits and terms of use.
+
+## High-Level Architecture
+
+| Layer        | Responsibility                                                      |
+| ------------ | ------------------------------------------------------------------- |
+| Config       | Defines series metadata, API credentials, and storage preferences. |
+| Client       | Handles authenticated calls to FRED and response validation.        |
+| Pipelines    | Orchestrates recurring refresh jobs and error handling.             |
+| Storage      | Persists data in the agreed upon analytics format.                  |
+
+## Key Modules
+
+- `lhm.catalog.generate`: Generates the 50-100 indicators per category by
+  querying FRED tag endpoints based on the recipes stored in
+  `configs/fred_catalog_sources.yaml`.
+- `lhm.config.series_catalog.SeriesCatalog`: Loads and validates the
+  materialised catalog consumed by downstream components.
+- `lhm.clients.fred_client.FREDClient`: Provides metadata lookups, tag
+  searches, and observation downloads with built-in rate limiting.
+- `lhm.pipelines.daily_refresh.DailyRefreshPipeline`: Coordinates the
+  refresh cycle, including window resolution, data collection, and
+  persistence.
+- `lhm.storage.filesystem.FilesystemStorageBackend`: Writes Parquet, CSV,
+  or JSON datasets alongside metadata manifests for each series.
+- `lhm.cli`: Convenience CLI for executing the pipeline from the command
+  line or a scheduler.
+
+## Operational Workflow
+
+1. **Generate the catalog**: `python -m lhm.catalog.generate --sources configs/fred_catalog_sources.yaml --output configs/fred_series_catalog.yaml --api-key $FRED_API_KEY`
+   produces the per-category inventory. Adjust the tag recipes or limits
+   as desired.
+2. **Run the ingestion pipeline**: `python -m lhm.cli --catalog configs/fred_series_catalog.yaml --storage-root data/raw/fred --storage-format parquet --full-refresh --api-key $FRED_API_KEY`
+   performs either a full backfill or rolling refresh depending on the
+   flags supplied.
+3. **Schedule recurring updates**: integrate the CLI command into your
+   preferred scheduler (cron, Airflow, Dagster, etc.) to rehydrate the
+   desired window daily.
+
+## Next Steps
+
+- Expand automated testing (unit and integration) once API credentials
+  are available in CI.
+- Add additional storage backends (e.g., DuckDB, cloud object stores) as
+  production requirements evolve.
+- Layer in monitoring/alerting once deployment targets are defined.
diff --git a/docs/fred_series_catalog_outline.md b/docs/fred_series_catalog_outline.md
@@ -0,0 +1,23 @@
+# FRED Series Catalog Outline
+
+The following table captures the ten macroeconomic categories requested
+by the stakeholder. Each category will contain 50-100 vetted FRED series
+selected via the automated tag-based catalog generator.
+
+| Category  | Description | Status | Next Actions |
+| --------- | ----------- | ------ | ------------ |
+| GDP       | Aggregate output measures such as GDP, GDI, and potential GDP. | Tag recipe defined; generator fetches top series under the `gdp` tag. | Review generated catalog and pin any mandatory inclusions/exclusions. |
+| Labor     | Employment, unemployment, hours worked, participation rates. | Tag recipe defined (`employment` + `unemployment`). | Validate coverage of BLS headline indicators; refine tags if needed. |
+| Prices    | Inflation, price indices, producer prices, deflators. | Tag recipe defined (`inflation`). | Evaluate inclusion of survey expectations; adjust generator filters. |
+| Health    | Healthcare expenditure, insurance coverage, health outcomes. | Tag recipe defined (`health`). | Augment with priority health expenditure series if missing. |
+| Money     | Monetary aggregates, interest rates, credit measures. | Tag recipe defined (`money`, `monetary`). | Ensure inclusion of Fed balance sheet aggregates; tweak tags as required. |
+| Trade     | Exports, imports, balance of trade, exchange rates. | Tag recipe defined (`trade`). | Review for key bilateral balances and trade-weighted indexes. |
+| Government| Fiscal revenue/expenditure, debt, budget balances. | Tag recipe defined (`government`). | Confirm Treasury receipts/outlays presence; expand tags if gaps remain. |
+| Business  | Business sentiment, production, investment, inventories. | Tag recipe defined (`business`). | Incorporate targeted survey series (e.g., ISM) via supplemental tags. |
+| Consumer  | Spending, confidence, credit, income distribution metrics. | Tag recipe defined (`consumer`). | Verify consumption, income, credit coverage; refine as necessary. |
+| Housing   | Construction, sales, prices, mortgage data. | Tag recipe defined (`housing`). | Add explicit mortgage rate series if excluded. |
+| Taxes     | Federal, state, and local tax receipts and rates. | Tag recipe defined (`taxes`). | Review generated list for revenue vs. rate balance; adjust accordingly. |
+
+After reviewing the automatically generated catalog, update
+`configs/fred_catalog_sources.yaml` with additional tags or overrides to
+lock in the final 50-100 indicators per category.
diff --git a/src/lhm/__init__.py b/src/lhm/__init__.py
@@ -0,0 +1,20 @@
+"""Core package for the LHM data platform."""
+
+from importlib import resources
+
+
+def get_version() -> str:
+    """Return the package version if metadata is available.
+
+    The skeleton package does not yet ship with a build system that
+    automatically injects version metadata. The helper gracefully
+    falls back to ``"0.0.0"`` so that downstream modules have a
+    reliable semantic version string to reference during early
+    development.
+    """
+
+    try:
+        with resources.files(__package__).joinpath("VERSION").open("r", encoding="utf-8") as handle:
+            return handle.read().strip()
+    except FileNotFoundError:
+        return "0.0.0"
diff --git a/src/lhm/catalog/__init__.py b/src/lhm/catalog/__init__.py
@@ -0,0 +1,5 @@
+"""Utilities for building and maintaining FRED series catalogs."""
+
+from .generate import CatalogSourceConfig, CatalogSources, build_catalog_from_sources
+
+__all__ = ["CatalogSourceConfig", "CatalogSources", "build_catalog_from_sources"]
diff --git a/src/lhm/catalog/generate.py b/src/lhm/catalog/generate.py
@@ -0,0 +1,102 @@
+"""Build series catalogs automatically from FRED tag sources."""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+import json
+from pathlib import Path
+
+import yaml
+
+from ..clients import FREDClient
+from ..config import Category, SeriesCatalog, SeriesDefinition
+
+
+@dataclass(slots=True)
+class CatalogSourceConfig:
+    """Describe how to fetch series for a specific category."""
+
+    tags: tuple[str, ...]
+    limit: int = 75
+
+
+class CatalogSources(dict[Category, CatalogSourceConfig]):
+    """Container that parses the YAML configuration into dataclasses."""
+
+    @classmethod
+    def load(cls, path: Path) -> "CatalogSources":
+        with path.open("r", encoding="utf-8") as handle:
+            payload = yaml.safe_load(handle) or {}
+
+        categories: dict[Category, CatalogSourceConfig] = {}
+        for raw_category, spec in (payload.get("categories") or {}).items():
+            category = Category(raw_category)
+            tags = tuple(spec.get("tags") or [])
+            limit = int(spec.get("limit", 75))
+            categories[category] = CatalogSourceConfig(tags=tags, limit=limit)
+        return cls(categories)
+
+
+def build_catalog_from_sources(client: FREDClient, sources: CatalogSources) -> SeriesCatalog:
+    """Construct a :class:`SeriesCatalog` based on tag-driven sources."""
+
+    categories: dict[Category, list[SeriesDefinition]] = {}
+    for category, source in sources.items():
+        seen: set[str] = set()
+        definitions: list[SeriesDefinition] = []
+        for series in client.search_series_by_tags(source.tags, limit=source.limit):
+            if series.series_id in seen:
+                continue
+            seen.add(series.series_id)
+            definitions.append(
+                SeriesDefinition(
+                    series_id=series.series_id,
+                    title=series.title,
+                    frequency=series.frequency,
+                    units=series.units,
+                    seasonal_adjustment=series.seasonal_adjustment,
+                )
+            )
+        categories[category] = definitions
+    return SeriesCatalog(categories=categories)
+
+
+def build_argument_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Generate a FRED series catalog from tag sources")
+    parser.add_argument("--sources", type=Path, required=True, help="YAML file describing tag sources per category")
+    parser.add_argument("--output", type=Path, required=True, help="Destination path for the generated catalog")
+    parser.add_argument("--api-key", type=str, default=None, help="FRED API key")
+    parser.add_argument(
+        "--base-url",
+        type=str,
+        default="https://api.stlouisfed.org",
+        help="Base URL for the FRED API",
+    )
+    parser.add_argument("--limit", type=int, default=None, help="Override the per-category series limit")
+    parser.add_argument("--dry-run", action="store_true", help="Print the catalog instead of writing to disk")
+    return parser
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = build_argument_parser()
+    args = parser.parse_args(argv)
+
+    sources = CatalogSources.load(args.sources)
+    if args.limit is not None:
+        for config in sources.values():
+            config.limit = args.limit
+
+    client = FREDClient(args.api_key, args.base_url)
+    catalog = build_catalog_from_sources(client, sources)
+
+    if args.dry_run:
+        print(json.dumps(catalog.to_yaml_dict(), indent=2))
+        return
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    catalog.dump(args.output)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    main()