fix: resolve CodeQL, LIMIT clause, and ruff format issues from PR review

Ambient Code Bot · claude · Ambient Code Bot · commit e38fed9a7328 · 2026-04-14T22:52:08.000-04:00
- Sanitize exception output in Jira fetch to prevent credential leakage
  in tracebacks (CodeQL clear-text logging fix)
- Add presence-only credential status logging (set/not set, never values)
- Add limit parameter and LIMIT clause to search_github_repos
- Replace N+1 per-repo language queries with single batched query
- Auto-format all scripts with ruff format

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/mcp_server.py b/mcp_server.py
@@ -117,9 +117,7 @@ def _attached_db_schema(db_name: str, build_hint: str) -> str:
         count = conn.execute(f"SELECT COUNT(*) FROM {db_name}.[{t['name']}]").fetchone()[0]
         lines.append(f"-- {t['name']}: {count:,} rows")
         lines.append(t["sql"] + ";\n")
-    views = conn.execute(
-        f"SELECT name, sql FROM {db_name}.sqlite_master WHERE type='view' ORDER BY name"
-    ).fetchall()
+    views = conn.execute(f"SELECT name, sql FROM {db_name}.sqlite_master WHERE type='view' ORDER BY name").fetchall()
     if views:
         lines.append(f"\n-- {db_name.upper()} VIEWS\n")
         for v in views:
@@ -795,9 +793,7 @@ def cloud_pricing_lookup(
     """
     conn = _get_conn()
     if not _db_attached(conn, "pricing"):
-        return json.dumps(
-            {"error": "pricing.db not attached. Run: uv run scripts/fetch_pricing.py"}
-        )
+        return json.dumps({"error": "pricing.db not attached. Run: uv run scripts/fetch_pricing.py"})
 
     if not any([provider, service, instance_type, region, model_name, usage_type]):
         return json.dumps({"error": "Provide at least one filter"})
@@ -850,17 +846,11 @@ def rosa_cluster_costs() -> str:
     """
     conn = _get_conn()
     if not _db_attached(conn, "pricing"):
-        return json.dumps(
-            {"error": "pricing.db not attached. Run: uv run scripts/fetch_pricing.py"}
-        )
+        return json.dumps({"error": "pricing.db not attached. Run: uv run scripts/fetch_pricing.py"})
 
-    rows = conn.execute(
-        "SELECT * FROM pricing.v_rosa_estimated_cost ORDER BY estimated_monthly_cost DESC"
-    ).fetchall()
+    rows = conn.execute("SELECT * FROM pricing.v_rosa_estimated_cost ORDER BY estimated_monthly_cost DESC").fetchall()
     if not rows:
-        return json.dumps(
-            {"error": "No ROSA cluster data. Run: uv run scripts/fetch_pricing.py (requires oc access)"}
-        )
+        return json.dumps({"error": "No ROSA cluster data. Run: uv run scripts/fetch_pricing.py (requires oc access)"})
     return json.dumps({"clusters": _rows_to_dicts(rows), "count": len(rows)}, default=str)
 
 
@@ -879,9 +869,7 @@ def github_org_summary() -> str:
     """
     conn = _get_conn()
     if not _db_attached(conn, "github"):
-        return json.dumps(
-            {"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"}
-        )
+        return json.dumps({"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"})
 
     stats = conn.execute("SELECT * FROM github.v_gh_org_stats").fetchone()
     result = dict(stats)
@@ -913,20 +901,21 @@ def search_github_repos(
     name: str | None = None,
     language: str | None = None,
     topic: str | None = None,
+    limit: int = 50,
 ) -> str:
     """Search GitHub repos by name, language, or topic. All filters are partial match.
 
     Provide at least one parameter. Returns repo details with language breakdown.
+    Returns up to `limit` repos (default 50, max 200).
     """
     if not any([name, language, topic]):
         return json.dumps({"error": "Provide at least one of: name, language, topic"})
 
     conn = _get_conn()
     if not _db_attached(conn, "github"):
-        return json.dumps(
-            {"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"}
-        )
+        return json.dumps({"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"})
 
+    limit = min(limit, MAX_QUERY_ROWS)
     conditions, params = [], []
     if name:
         conditions.append("r.name LIKE ?")
@@ -944,28 +933,35 @@ def search_github_repos(
 
     where = " AND ".join(conditions)
     repos = conn.execute(
-        f"""SELECT r.name, r.description, r.html_url, r.default_branch,
+        f"""SELECT r.repo_id, r.name, r.description, r.html_url, r.default_branch,
                    r.is_fork, r.is_archived, r.stars, r.forks, r.open_issues,
                    r.size_kb, r.created_at, r.pushed_at,
                    r.commit_count, r.pr_count, r.merged_pr_count, r.issue_count,
                    r.contributor_count, r.topics, r.total_language_bytes
             FROM github.v_gh_repo_summary r
             WHERE {where}
-            ORDER BY r.commit_count DESC""",
-        params,
+            ORDER BY r.commit_count DESC
+            LIMIT ?""",
+        [*params, limit],
     ).fetchall()
 
+    # Batch-load languages for all repos to avoid N+1 queries
+    repo_ids = [repo["repo_id"] for repo in repos]
+    langs_by_repo: dict[int, list[dict]] = {}
+    if repo_ids:
+        ph = ",".join("?" * len(repo_ids))
+        lang_rows = conn.execute(
+            f"SELECT repo_id, language, bytes FROM github.gh_repo_language WHERE repo_id IN ({ph}) ORDER BY bytes DESC",
+            repo_ids,
+        ).fetchall()
+        for lr in lang_rows:
+            langs_by_repo.setdefault(lr["repo_id"], []).append({"language": lr["language"], "bytes": lr["bytes"]})
+
     results = []
     for repo in repos:
         r = dict(repo)
-        # Add language breakdown
-        langs = conn.execute(
-            "SELECT language, bytes FROM github.gh_repo_language "
-            "WHERE repo_id = (SELECT repo_id FROM github.gh_repo WHERE name = ?) "
-            "ORDER BY bytes DESC",
-            (r["name"],),
-        ).fetchall()
-        r["languages"] = _rows_to_dicts(langs)
+        rid = r.pop("repo_id")
+        r["languages"] = langs_by_repo.get(rid, [])
         results.append(r)
 
     return json.dumps({"repos": results, "count": len(results)}, default=str)
@@ -991,9 +987,7 @@ def search_github_commits(
 
     conn = _get_conn()
     if not _db_attached(conn, "github"):
-        return json.dumps(
-            {"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"}
-        )
+        return json.dumps({"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"})
 
     limit = min(limit, MAX_QUERY_ROWS)
     conditions, params = [], []
@@ -1024,9 +1018,7 @@ def search_github_commits(
             LIMIT ?""",
         [*params, limit],
     ).fetchall()
-    return json.dumps(
-        {"commits": _rows_to_dicts(rows), "count": len(rows)}, default=str
-    )
+    return json.dumps({"commits": _rows_to_dicts(rows), "count": len(rows)}, default=str)
 
 
 @mcp.tool(
@@ -1048,9 +1040,7 @@ def search_github_prs(
 
     conn = _get_conn()
     if not _db_attached(conn, "github"):
-        return json.dumps(
-            {"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"}
-        )
+        return json.dumps({"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"})
 
     limit = min(limit, MAX_QUERY_ROWS)
     conditions, params = [], []
@@ -1109,9 +1099,7 @@ def search_github_issues(
 
     conn = _get_conn()
     if not _db_attached(conn, "github"):
-        return json.dumps(
-            {"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"}
-        )
+        return json.dumps({"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"})
 
     limit = min(limit, MAX_QUERY_ROWS)
     conditions, params = [], []
@@ -1155,9 +1143,7 @@ def github_code_stats(repo: str | None = None) -> str:
     """
     conn = _get_conn()
     if not _db_attached(conn, "github"):
-        return json.dumps(
-            {"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"}
-        )
+        return json.dumps({"error": "github.db not attached. Run: uv run scripts/fetch_github.py --org YOUR_ORG"})
 
     result: dict = {}
 
diff --git a/scripts/fetch_github.py b/scripts/fetch_github.py
@@ -551,9 +551,7 @@ def fetch_repos(conn: sqlite3.Connection, org: str, now: str) -> list[dict]:
     return repos
 
 
-def fetch_languages(
-    conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str
-) -> None:
+def fetch_languages(conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str) -> None:
     """Fetch language breakdown for a repo."""
     langs = gh_api_simple(f"/repos/{org}/{name}/languages")
     if not langs or not isinstance(langs, dict):
@@ -566,13 +564,9 @@ def fetch_languages(
         )
 
 
-def fetch_contributors(
-    conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str
-) -> None:
+def fetch_contributors(conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str) -> None:
     """Fetch contributors for a repo."""
-    contribs = gh_api_simple(
-        f"/repos/{org}/{name}/contributors?per_page=100", paginate=True
-    )
+    contribs = gh_api_simple(f"/repos/{org}/{name}/contributors?per_page=100", paginate=True)
     if not contribs or not isinstance(contribs, list):
         return
     for c in contribs:
@@ -639,9 +633,7 @@ def fetch_prs(
     since: str | None = None,
 ) -> None:
     """Fetch pull requests for a repo."""
-    endpoint = (
-        f"/repos/{org}/{name}/pulls?per_page=100&state=all&sort=updated&direction=desc"
-    )
+    endpoint = f"/repos/{org}/{name}/pulls?per_page=100&state=all&sort=updated&direction=desc"
     prs = gh_api_simple(endpoint, paginate=True)
     if not prs or not isinstance(prs, list):
         return
@@ -711,9 +703,7 @@ def fetch_prs(
     print(f"    {fetched} PRs")
 
 
-def fetch_pr_reviews(
-    conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str
-) -> None:
+def fetch_pr_reviews(conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str) -> None:
     """Fetch reviews for all PRs in a repo that don't have reviews yet."""
     # Get PRs that have no reviews stored
     prs = conn.execute(
@@ -726,9 +716,7 @@ def fetch_pr_reviews(
 
     review_count = 0
     for pr in prs:
-        reviews = gh_api_simple(
-            f"/repos/{org}/{name}/pulls/{pr['number']}/reviews?per_page=100"
-        )
+        reviews = gh_api_simple(f"/repos/{org}/{name}/pulls/{pr['number']}/reviews?per_page=100")
         if not reviews or not isinstance(reviews, list):
             continue
         for rv in reviews:
@@ -758,9 +746,7 @@ def fetch_issues(
     since: str | None = None,
 ) -> None:
     """Fetch issues (excluding PRs) for a repo."""
-    endpoint = (
-        f"/repos/{org}/{name}/issues?per_page=100&state=all&sort=updated&direction=desc"
-    )
+    endpoint = f"/repos/{org}/{name}/issues?per_page=100&state=all&sort=updated&direction=desc"
     if since:
         endpoint += f"&since={since}"
 
@@ -868,9 +854,7 @@ def fetch_releases(conn: sqlite3.Connection, repo_id: int, name: str, org: str,
         print(f"    {len(releases)} releases")
 
 
-def fetch_code_frequency(
-    conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str
-) -> None:
+def fetch_code_frequency(conn: sqlite3.Connection, repo_id: int, name: str, org: str, now: str) -> None:
     """Fetch weekly code frequency stats for a repo."""
     for attempt in range(5):
         data = gh_api_simple(f"/repos/{org}/{name}/stats/code_frequency")
@@ -917,9 +901,7 @@ def process_repo(
     # Determine incremental since for this repo
     repo_since = since
     if not full and not since:
-        row = conn.execute(
-            "SELECT value FROM _meta WHERE key = ?", (f"last_fetch_{name}",)
-        ).fetchone()
+        row = conn.execute("SELECT value FROM _meta WHERE key = ?", (f"last_fetch_{name}",)).fetchone()
         if row:
             repo_since = row[0]
             print(f"  Incremental since {repo_since}")
@@ -963,9 +945,7 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Fetch GitHub org data into github.db")
     parser.add_argument("--repo", help="Fetch a single repo by name")
     parser.add_argument("--since", help="Fetch from date (ISO 8601, e.g. 2026-03-01)")
-    parser.add_argument(
-        "--full", action="store_true", help="Ignore _meta, fetch all history"
-    )
+    parser.add_argument("--full", action="store_true", help="Ignore _meta, fetch all history")
     parser.add_argument(
         "--skip-commits",
         action="store_true",
@@ -994,19 +974,15 @@ def main() -> None:
                     break
             if not target:
                 target = repo_data
-            skipped = process_repo(
-                conn, target, args.org, now, args.since, args.skip_commits, args.full
-            )
+            skipped = process_repo(conn, target, args.org, now, args.since, args.skip_commits, args.full)
             all_skipped.extend(skipped)
         else:
             # Fetch all repos
             repos = fetch_repos(conn, args.org, now)
             conn.commit()
             for repo in repos:
                 try:
-                    skipped = process_repo(
-                        conn, repo, args.org, now, args.since, args.skip_commits, args.full
-                    )
+                    skipped = process_repo(conn, repo, args.org, now, args.since, args.skip_commits, args.full)
                     all_skipped.extend(skipped)
                 except KeyboardInterrupt:
                     print("\nInterrupted — committing pending data...")
diff --git a/scripts/fetch_pricing.py b/scripts/fetch_pricing.py
@@ -239,9 +239,7 @@ def _extract_ondemand_prices(terms: dict, sku: str) -> list[dict]:
     """Extract OnDemand price dimensions for a given SKU."""
     results = []
     on_demand = terms.get("OnDemand", {}).get(sku, {})
-    for _term_key, term_details in (
-        on_demand.items() if isinstance(on_demand, dict) else []
-    ):
+    for _term_key, term_details in on_demand.items() if isinstance(on_demand, dict) else []:
         for _dim_key, dim in term_details.get("priceDimensions", {}).items():
             price_str = dim.get("pricePerUnit", {}).get("USD", "0")
             try:
@@ -286,9 +284,7 @@ def fetch_aws_pricing(regions: list[str], now: str) -> list[dict]:
             try:
                 # Download to temp file to handle large files (EC2 ~150MB/region)
                 with tempfile.NamedTemporaryFile(suffix=".json", delete=True) as tmp:
-                    with httpx.stream(
-                        "GET", url, timeout=300, follow_redirects=True
-                    ) as resp:
+                    with httpx.stream("GET", url, timeout=300, follow_redirects=True) as resp:
                         resp.raise_for_status()
                         total = 0
                         for chunk in resp.iter_bytes(chunk_size=1024 * 1024):
@@ -321,9 +317,7 @@ def fetch_aws_pricing(regions: list[str], now: str) -> list[dict]:
                     continue
 
                 product_fields = parse_product(product)
-                product_desc = (
-                    attrs.get("usagetype", "") + " " + attrs.get("operation", "")
-                ).strip()
+                product_desc = (attrs.get("usagetype", "") + " " + attrs.get("operation", "")).strip()
 
                 for dim in _extract_ondemand_prices(terms, sku):
                     row = {
@@ -495,9 +489,7 @@ def fetch_rosa_instances(now: str) -> list[dict]:
             for node in nodes_data.get("items", []):
                 labels = node.get("metadata", {}).get("labels", {})
                 node_name = node.get("metadata", {}).get("name", "")
-                instance_type = labels.get(
-                    "node.kubernetes.io/instance-type", "unknown"
-                )
+                instance_type = labels.get("node.kubernetes.io/instance-type", "unknown")
                 az = labels.get("topology.kubernetes.io/zone", "")
 
                 role = "worker"
@@ -603,12 +595,8 @@ def upsert_rosa_instances(conn: sqlite3.Connection, rows: list[dict]) -> int:
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Fetch cloud pricing into pricing.db")
-    parser.add_argument(
-        "--aws-only", action="store_true", help="Fetch AWS pricing only"
-    )
-    parser.add_argument(
-        "--claude-only", action="store_true", help="Fetch Claude pricing only"
-    )
+    parser.add_argument("--aws-only", action="store_true", help="Fetch AWS pricing only")
+    parser.add_argument("--claude-only", action="store_true", help="Fetch Claude pricing only")
     parser.add_argument("--rosa-only", action="store_true", help="ROSA discovery only")
     parser.add_argument("--skip-rosa", action="store_true", help="Skip ROSA discovery")
     parser.add_argument(
diff --git a/scripts/refresh_catalog.py b/scripts/refresh_catalog.py