verygoodplugins · jack-arturo · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/.github/workflows/docs-dispatch.yml b/.github/workflows/docs-dispatch.yml
@@ -0,0 +1,74 @@
+name: Docs Dispatch
+on:
+  push:
+    branches: [main]
+
+jobs:
+  check-docs:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+
+      - name: Get changed files
+        id: changed
+        run: |
+          BASE_SHA="${{ github.event.before }}"
+          HEAD_SHA="${{ github.event.after }}"
+
+          if [ "$BASE_SHA" = "0000000000000000000000000000000000000000" ]; then
+            BASE_SHA=$(git rev-list --max-parents=0 "$HEAD_SHA")
+          fi
+
+          FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" | jq -R -s -c 'split("\n") | map(select(. != ""))')
+          echo "files=$FILES" >> $GITHUB_OUTPUT
+
+      - name: Check file-doc mapping
+        id: check
+        run: |
+          if ! MAP=$(curl -sf "https://raw.githubusercontent.com/verygoodplugins/automem-website/main/scripts/file-doc-map.json"); then
+            echo "Failed to fetch file-doc-map.json, skipping"
+            echo "affected=none" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+
+          REPO_KEY="${{ github.event.repository.name }}"
+          CHANGED='${{ steps.changed.outputs.files }}'
+
+          AFFECTED=$(echo "$MAP" | jq -r --arg repo "$REPO_KEY" --argjson changed "$CHANGED" '
+            def matches_pattern($file; $pattern):
+              if ($pattern | endswith("/**")) then
+                ($file | startswith($pattern[0:-3]))
+              else
+                $file == $pattern
+              end;
+
+            .[$repo] // {} | to_entries | map(
+              select(.key as $pattern | $changed | any(. as $file | matches_pattern($file; $pattern)))
+            ) | map(.value) | flatten | unique | .[]
+          ')
+
+          if [ -z "$AFFECTED" ]; then
+            echo "affected=none" >> $GITHUB_OUTPUT
+          else
+            AFFECTED_JSON=$(echo "$AFFECTED" | jq -R -s -c 'split("\n") | map(select(. != ""))')
+            echo "affected=$AFFECTED_JSON" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Dispatch to automem-website
+        if: steps.check.outputs.affected != 'none'
+        uses: peter-evans/repository-dispatch@v3
+        with:
+          token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
+          repository: verygoodplugins/automem-website
+          event-type: docs-update
+          client-payload: |
+            {
+              "source_repo": "${{ github.event.repository.full_name }}",
+              "source_sha": "${{ github.event.after }}",
+              "changed_files": ${{ steps.changed.outputs.files }},
+              "affected_docs": ${{ steps.check.outputs.affected }},
+              "commit_url": "${{ github.event.head_commit.url }}",
+              "compare_url": "${{ github.event.compare }}"
+            }
diff --git a/.gitignore b/.gitignore
@@ -40,3 +40,4 @@ node_modules/
 .env.bench
 /benchmarks/snapshots/
 /benchmarks/results/
+benchmarks/baselines/locomo_baseline.json
diff --git a/automem/api/recall.py b/automem/api/recall.py
@@ -8,7 +8,13 @@
 
 from flask import Blueprint, abort, jsonify, request
 
-from automem.config import ALLOWED_RELATIONS, RECALL_EXPANSION_LIMIT, RECALL_RELATION_LIMIT
+from automem.config import (
+    ALLOWED_RELATIONS,
+    RECALL_ADAPTIVE_FLOOR,
+    RECALL_EXPANSION_LIMIT,
+    RECALL_MIN_SCORE,
+    RECALL_RELATION_LIMIT,
+)
 from automem.utils.graph import _serialize_node
 
 DEFAULT_STYLE_PRIORITY_TAGS: Set[str] = {
@@ -1084,6 +1090,10 @@ def _parse_threshold(param_name: str) -> Optional[float]:
     expand_min_importance = _parse_threshold("expand_min_importance")
     expand_min_strength = _parse_threshold("expand_min_strength")
 
+    min_score_param = _parse_threshold("min_score")
+    min_score = min_score_param if min_score_param is not None else (RECALL_MIN_SCORE or None)
+    adaptive_floor = _parse_bool_param(request.args.get("adaptive_floor"), RECALL_ADAPTIVE_FLOOR)
+
     context_label = (request.args.get("context") or "").strip().lower()
     active_path = (
         request.args.get("active_path")
@@ -1242,6 +1252,11 @@ def _run_single_query(
             )
         ]
 
+        if min_score is not None and min_score > 0:
+            local_results = [
+                res for res in local_results if float(res.get("final_score", 0.0)) >= min_score
+            ]
+
         if sort_param == "score":
             local_results.sort(
                 key=lambda r: (
@@ -1412,6 +1427,32 @@ def _run_single_query(
             ]
         results = seed_results + expansion_results + entity_expansion_results
 
+    pre_filter_count = len(results)
+
+    # Apply adaptive score floor: detect steep dropoff and cut low-quality tail
+    score_floor_applied = None
+    if sort_param == "score" and adaptive_floor and len(results) > 3:
+        scores = sorted([float(r.get("final_score", 0.0)) for r in results], reverse=True)
+        # Find the largest gap between consecutive scores in the top half
+        max_gap = 0.0
+        gap_idx = -1
+        halfway = max(3, len(scores) // 2)
+        for i in range(1, halfway):
+            gap = scores[i - 1] - scores[i]
+            if gap > max_gap:
+                max_gap = gap
+                gap_idx = i
+        # If there's a steep dropoff (>15% of max score), cut below it
+        if max_gap > 0.15 * scores[0] and gap_idx > 0:
+            score_floor_applied = scores[gap_idx]
+            results = [
+                r for r in results if float(r.get("final_score", 0.0)) >= score_floor_applied
+            ]
+
+    # Apply explicit min_score on final assembled results (catches expansions)
+    if min_score is not None and min_score > 0:
+        results = [r for r in results if float(r.get("final_score", 0.0)) >= min_score]
+
     # JIT-enrich unenriched memories inline (cheap: entities + summary ~50ms each)
     jit_enriched_count = 0
     if jit_enrich_fn is not None:
@@ -1467,6 +1508,12 @@ def _run_single_query(
     response["tag_match"] = tag_match
     if jit_enriched_count:
         response["jit_enriched_count"] = jit_enriched_count
+    if min_score or score_floor_applied:
+        response["score_filter"] = {
+            "min_score": min_score,
+            "adaptive_floor": score_floor_applied,
+            "filtered_count": pre_filter_count - len(results),
+        }
     response["query_time_ms"] = round((time.perf_counter() - query_start) * 1000, 2)
     if any_context_profile:
         response["context_priority"] = {

diff --git a/automem/config.py b/automem/config.py
@@ -92,6 +92,8 @@
 
 RECALL_RELATION_LIMIT = int(os.getenv("RECALL_RELATION_LIMIT", "5"))
 RECALL_EXPANSION_LIMIT = int(os.getenv("RECALL_EXPANSION_LIMIT", "25"))
+RECALL_MIN_SCORE = float(os.getenv("RECALL_MIN_SCORE", "0.0"))
+RECALL_ADAPTIVE_FLOOR = os.getenv("RECALL_ADAPTIVE_FLOOR", "true").lower() in ("true", "1", "yes")
 
 # Memory content size limits (governs auto-summarization on store)
 # Soft limit: Content above this triggers auto-summarization

diff --git a/benchmarks/EXPERIMENT_LOG.md b/benchmarks/EXPERIMENT_LOG.md
@@ -10,7 +10,7 @@ on the snapshot-based bench infrastructure (PR #97, merged 2026-03-02).
 | Tier | Benchmark | Runtime | Cost | When to use |
 |------|-----------|---------|------|-------------|
 | 0 | `make test` (unit) | 30s | free | Every change |
-| 1 | `locomo-mini` (2 convos, 198 Qs) | 2-3 min | free | Rapid iteration |
+| 1 | `locomo-mini` (2 convos, 304 Qs) | 2-3 min | free | Rapid iteration |
 | 2 | `locomo` (10 convos, 1986 Qs) | 5-10 min | free | Before merge |
 | 3 | `longmemeval-mini` (20 Qs) | 15 min | ~$1 | Scoring/entity changes |
 | 4 | `longmemeval` (500 Qs) | 1-2 hr | ~$10 | Milestones only |

diff --git a/scripts/bench/health_check.py b/scripts/bench/health_check.py
@@ -121,12 +121,24 @@ def check_score_distribution(base_url: str, api_token: Optional[str] = None) ->
             "min": round(min(all_scores), 4) if all_scores else 0,
             "max": round(max(all_scores), 4) if all_scores else 0,
             "mean": round(statistics.mean(all_scores), 4) if all_scores else 0,
-            "stddev": round(statistics.stdev(all_scores), 4) if len(all_scores) > 1 else 0,
+            "stddev": (round(statistics.stdev(all_scores), 4) if len(all_scores) > 1 else 0),
             "spread": round(spread, 4),
         },
         "latency": {
             "p50_ms": round(statistics.median(latencies), 1) if latencies else 0,
-            "p95_ms": round(sorted(latencies)[int(len(latencies) * 0.95)] if latencies else 0, 1),
+            "p95_ms": (
+                round(
+                    sorted(latencies)[
+                        max(
+                            0,
+                            min(len(latencies) - 1, math.ceil(0.95 * len(latencies)) - 1),
+                        )
+                    ],
+                    1,
+                )
+                if latencies
+                else 0
+            ),
             "mean_ms": round(statistics.mean(latencies), 1) if latencies else 0,
         },
         "per_query": per_query,
@@ -232,7 +244,10 @@ def check_cross_query_overlap(base_url: str, api_token: Optional[str] = None) ->
         query_results[query[:40]] = ids
 
     if len(query_results) < 2:
-        return {"check": "cross_query_overlap", "verdict": "SKIP: not enough queries succeeded"}
+        return {
+            "check": "cross_query_overlap",
+            "verdict": "SKIP: not enough queries succeeded",
+        }
 
     id_lists = list(query_results.values())
     overlap_pairs = 0

diff --git a/tests/benchmarks/test_locomo.py b/tests/benchmarks/test_locomo.py
@@ -575,12 +575,20 @@ def match_dates_fuzzy(
         if not question_dates or not memory_dates:
             return False
 
-        # Check for matches within tolerance (strip tz for safe comparison)
+        # Check for matches within tolerance (normalize to UTC before comparing)
         for q_date in question_dates:
-            q_naive = q_date.replace(tzinfo=None)
+            q_utc = (
+                q_date.astimezone(timezone.utc)
+                if q_date.tzinfo is not None
+                else q_date.replace(tzinfo=timezone.utc)
+            )
             for m_date in memory_dates:
-                m_naive = m_date.replace(tzinfo=None)
-                days_diff = abs((q_naive - m_naive).days)
+                m_utc = (
+                    m_date.astimezone(timezone.utc)
+                    if m_date.tzinfo is not None
+                    else m_date.replace(tzinfo=timezone.utc)
+                )
+                days_diff = abs((q_utc - m_utc).total_seconds()) / 86400
                 if days_diff <= tolerance_days:
                     return True