Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .github/workflows/docs-dispatch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: Docs Dispatch
on:
push:
branches: [main]

jobs:
check-docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0

- name: Get changed files
id: changed
run: |
BASE_SHA="${{ github.event.before }}"
HEAD_SHA="${{ github.event.after }}"

if [ "$BASE_SHA" = "0000000000000000000000000000000000000000" ]; then
BASE_SHA=$(git rev-list --max-parents=0 "$HEAD_SHA")
fi

FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" | jq -R -s -c 'split("\n") | map(select(. != ""))')
echo "files=$FILES" >> $GITHUB_OUTPUT

- name: Check file-doc mapping
id: check
run: |
if ! MAP=$(curl -sf "https://raw.githubusercontent.com/verygoodplugins/automem-website/main/scripts/file-doc-map.json"); then
echo "Failed to fetch file-doc-map.json, skipping"
echo "affected=none" >> $GITHUB_OUTPUT
exit 0
fi

REPO_KEY="${{ github.event.repository.name }}"
CHANGED='${{ steps.changed.outputs.files }}'

AFFECTED=$(echo "$MAP" | jq -r --arg repo "$REPO_KEY" --argjson changed "$CHANGED" '
def matches_pattern($file; $pattern):
if ($pattern | endswith("/**")) then
($file | startswith($pattern[0:-3]))
else
$file == $pattern
end;

.[$repo] // {} | to_entries | map(
select(.key as $pattern | $changed | any(. as $file | matches_pattern($file; $pattern)))
) | map(.value) | flatten | unique | .[]
')

if [ -z "$AFFECTED" ]; then
echo "affected=none" >> $GITHUB_OUTPUT
else
AFFECTED_JSON=$(echo "$AFFECTED" | jq -R -s -c 'split("\n") | map(select(. != ""))')
echo "affected=$AFFECTED_JSON" >> $GITHUB_OUTPUT
fi

- name: Dispatch to automem-website
if: steps.check.outputs.affected != 'none'
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
repository: verygoodplugins/automem-website
event-type: docs-update
client-payload: |
{
"source_repo": "${{ github.event.repository.full_name }}",
"source_sha": "${{ github.event.after }}",
"changed_files": ${{ steps.changed.outputs.files }},
"affected_docs": ${{ steps.check.outputs.affected }},
"commit_url": "${{ github.event.head_commit.url }}",
"compare_url": "${{ github.event.compare }}"
}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ node_modules/
.env.bench
/benchmarks/snapshots/
/benchmarks/results/
benchmarks/baselines/locomo_baseline.json
49 changes: 48 additions & 1 deletion automem/api/recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@

from flask import Blueprint, abort, jsonify, request

from automem.config import ALLOWED_RELATIONS, RECALL_EXPANSION_LIMIT, RECALL_RELATION_LIMIT
from automem.config import (
ALLOWED_RELATIONS,
RECALL_ADAPTIVE_FLOOR,
RECALL_EXPANSION_LIMIT,
RECALL_MIN_SCORE,
RECALL_RELATION_LIMIT,
)
from automem.utils.graph import _serialize_node

DEFAULT_STYLE_PRIORITY_TAGS: Set[str] = {
Expand Down Expand Up @@ -1084,6 +1090,10 @@ def _parse_threshold(param_name: str) -> Optional[float]:
expand_min_importance = _parse_threshold("expand_min_importance")
expand_min_strength = _parse_threshold("expand_min_strength")

min_score_param = _parse_threshold("min_score")
min_score = min_score_param if min_score_param is not None else (RECALL_MIN_SCORE or None)
adaptive_floor = _parse_bool_param(request.args.get("adaptive_floor"), RECALL_ADAPTIVE_FLOOR)

context_label = (request.args.get("context") or "").strip().lower()
active_path = (
request.args.get("active_path")
Expand Down Expand Up @@ -1242,6 +1252,11 @@ def _run_single_query(
)
]

if min_score is not None and min_score > 0:
local_results = [
res for res in local_results if float(res.get("final_score", 0.0)) >= min_score
]

if sort_param == "score":
local_results.sort(
key=lambda r: (
Expand Down Expand Up @@ -1412,6 +1427,32 @@ def _run_single_query(
]
results = seed_results + expansion_results + entity_expansion_results

pre_filter_count = len(results)

# Apply adaptive score floor: detect steep dropoff and cut low-quality tail
score_floor_applied = None
if sort_param == "score" and adaptive_floor and len(results) > 3:
scores = sorted([float(r.get("final_score", 0.0)) for r in results], reverse=True)
# Find the largest gap between consecutive scores in the top half
max_gap = 0.0
gap_idx = -1
halfway = max(3, len(scores) // 2)
for i in range(1, halfway):
gap = scores[i - 1] - scores[i]
if gap > max_gap:
max_gap = gap
gap_idx = i
# If there's a steep dropoff (>15% of max score), cut below it
if max_gap > 0.15 * scores[0] and gap_idx > 0:
score_floor_applied = scores[gap_idx]
results = [
r for r in results if float(r.get("final_score", 0.0)) >= score_floor_applied
]

# Apply explicit min_score on final assembled results (catches expansions)
if min_score is not None and min_score > 0:
results = [r for r in results if float(r.get("final_score", 0.0)) >= min_score]

# JIT-enrich unenriched memories inline (cheap: entities + summary ~50ms each)
jit_enriched_count = 0
if jit_enrich_fn is not None:
Expand Down Expand Up @@ -1467,6 +1508,12 @@ def _run_single_query(
response["tag_match"] = tag_match
if jit_enriched_count:
response["jit_enriched_count"] = jit_enriched_count
if min_score or score_floor_applied:
response["score_filter"] = {
"min_score": min_score,
"adaptive_floor": score_floor_applied,
"filtered_count": pre_filter_count - len(results),
}
response["query_time_ms"] = round((time.perf_counter() - query_start) * 1000, 2)
if any_context_profile:
response["context_priority"] = {
Expand Down
2 changes: 2 additions & 0 deletions automem/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@

RECALL_RELATION_LIMIT = int(os.getenv("RECALL_RELATION_LIMIT", "5"))
RECALL_EXPANSION_LIMIT = int(os.getenv("RECALL_EXPANSION_LIMIT", "25"))
RECALL_MIN_SCORE = float(os.getenv("RECALL_MIN_SCORE", "0.0"))
RECALL_ADAPTIVE_FLOOR = os.getenv("RECALL_ADAPTIVE_FLOOR", "true").lower() in ("true", "1", "yes")

# Memory content size limits (governs auto-summarization on store)
# Soft limit: Content above this triggers auto-summarization
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/EXPERIMENT_LOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on the snapshot-based bench infrastructure (PR #97, merged 2026-03-02).
| Tier | Benchmark | Runtime | Cost | When to use |
|------|-----------|---------|------|-------------|
| 0 | `make test` (unit) | 30s | free | Every change |
| 1 | `locomo-mini` (2 convos, 198 Qs) | 2-3 min | free | Rapid iteration |
| 1 | `locomo-mini` (2 convos, 304 Qs) | 2-3 min | free | Rapid iteration |
| 2 | `locomo` (10 convos, 1986 Qs) | 5-10 min | free | Before merge |
| 3 | `longmemeval-mini` (20 Qs) | 15 min | ~$1 | Scoring/entity changes |
| 4 | `longmemeval` (500 Qs) | 1-2 hr | ~$10 | Milestones only |
Expand Down
21 changes: 18 additions & 3 deletions scripts/bench/health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,24 @@ def check_score_distribution(base_url: str, api_token: Optional[str] = None) ->
"min": round(min(all_scores), 4) if all_scores else 0,
"max": round(max(all_scores), 4) if all_scores else 0,
"mean": round(statistics.mean(all_scores), 4) if all_scores else 0,
"stddev": round(statistics.stdev(all_scores), 4) if len(all_scores) > 1 else 0,
"stddev": (round(statistics.stdev(all_scores), 4) if len(all_scores) > 1 else 0),
"spread": round(spread, 4),
},
"latency": {
"p50_ms": round(statistics.median(latencies), 1) if latencies else 0,
"p95_ms": round(sorted(latencies)[int(len(latencies) * 0.95)] if latencies else 0, 1),
"p95_ms": (
round(
sorted(latencies)[
max(
0,
min(len(latencies) - 1, math.ceil(0.95 * len(latencies)) - 1),
)
],
1,
)
if latencies
else 0
),
"mean_ms": round(statistics.mean(latencies), 1) if latencies else 0,
},
"per_query": per_query,
Expand Down Expand Up @@ -232,7 +244,10 @@ def check_cross_query_overlap(base_url: str, api_token: Optional[str] = None) ->
query_results[query[:40]] = ids

if len(query_results) < 2:
return {"check": "cross_query_overlap", "verdict": "SKIP: not enough queries succeeded"}
return {
"check": "cross_query_overlap",
"verdict": "SKIP: not enough queries succeeded",
}

id_lists = list(query_results.values())
overlap_pairs = 0
Expand Down
16 changes: 12 additions & 4 deletions tests/benchmarks/test_locomo.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,20 @@ def match_dates_fuzzy(
if not question_dates or not memory_dates:
return False

# Check for matches within tolerance (strip tz for safe comparison)
# Check for matches within tolerance (normalize to UTC before comparing)
for q_date in question_dates:
q_naive = q_date.replace(tzinfo=None)
q_utc = (
q_date.astimezone(timezone.utc)
if q_date.tzinfo is not None
else q_date.replace(tzinfo=timezone.utc)
)
for m_date in memory_dates:
m_naive = m_date.replace(tzinfo=None)
days_diff = abs((q_naive - m_naive).days)
m_utc = (
m_date.astimezone(timezone.utc)
if m_date.tzinfo is not None
else m_date.replace(tzinfo=timezone.utc)
)
days_diff = abs((q_utc - m_utc).total_seconds()) / 86400
if days_diff <= tolerance_days:
return True

Expand Down