From ba4344deb0f26df770883546e59692b80462666a Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:37:56 +0300 Subject: [PATCH 01/53] test: add long-term memory test infrastructure --- tests/unit/memory/long_term/__init__.py | 0 tests/unit/memory/long_term/conftest.py | 52 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 tests/unit/memory/long_term/__init__.py create mode 100644 tests/unit/memory/long_term/conftest.py diff --git a/tests/unit/memory/long_term/__init__.py b/tests/unit/memory/long_term/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/memory/long_term/conftest.py b/tests/unit/memory/long_term/conftest.py new file mode 100644 index 000000000..e41b141ff --- /dev/null +++ b/tests/unit/memory/long_term/conftest.py @@ -0,0 +1,52 @@ +"""Shared fixtures for long-term memory unit tests.""" +import hashlib +from typing import Any + +import pytest + + +class FakeTextEmbedder: + """Deterministic text embedder for tests. + + Maps text to a fixed-length unit vector derived from its sha256 digest. + Same text → same vector. Different texts → near-orthogonal vectors + (good enough for cosine ranking in unit tests). + """ + + DIM = 16 + + def execute(self, input_data: Any, **kwargs) -> dict: + # Mirror TextEmbedder.execute output shape: {"query": ..., "embedding": ...} + if hasattr(input_data, "query"): + text = input_data.query + elif isinstance(input_data, dict): + text = input_data["query"] + else: + text = str(input_data) + return {"query": text, "embedding": self._embed(text)} + + def embed(self, text: str) -> list[float]: + """Convenience helper for tests that want a raw vector.""" + return self._embed(text) + + @classmethod + def _embed(cls, text: str) -> list[float]: + digest = hashlib.sha256(text.encode("utf-8")).digest() + raw = [(b / 127.5) - 1.0 for b in digest[: cls.DIM]] + norm = sum(x * x for x in raw) ** 0.5 or 1.0 + return [x / norm for x in raw] + + +@pytest.fixture +def fake_embedder() -> FakeTextEmbedder: + return FakeTextEmbedder() + + +@pytest.fixture +def user_id() -> str: + return "user-test-123" + + +@pytest.fixture +def other_user_id() -> str: + return "user-other-456" From fac5d5390b51bbc8c8f2529a37531f6adfbc7134 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:38:19 +0300 Subject: [PATCH 02/53] feat: add Fact data model for long-term memory --- dynamiq/memory/long_term/__init__.py | 8 +++++++ dynamiq/memory/long_term/schemas.py | 22 +++++++++++++++++ tests/unit/memory/long_term/test_schemas.py | 26 +++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 dynamiq/memory/long_term/__init__.py create mode 100644 dynamiq/memory/long_term/schemas.py create mode 100644 tests/unit/memory/long_term/test_schemas.py diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py new file mode 100644 index 000000000..28db7be6f --- /dev/null +++ b/dynamiq/memory/long_term/__init__.py @@ -0,0 +1,8 @@ +"""Long-term, fact-shaped, user-scoped memory for Dynamiq agents. + +See docs/superpowers/specs/2026-05-25-long-term-memory-design.md. +""" + +from dynamiq.memory.long_term.schemas import Fact + +__all__ = ["Fact"] diff --git a/dynamiq/memory/long_term/schemas.py b/dynamiq/memory/long_term/schemas.py new file mode 100644 index 000000000..7b1d3b6fc --- /dev/null +++ b/dynamiq/memory/long_term/schemas.py @@ -0,0 +1,22 @@ +"""Pydantic schemas for long-term memory.""" +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, Field + + +class Fact(BaseModel): + """A single long-term memory fact, scoped to a user. + + `hash` is md5(f"{user_id}:{content.strip().lower()}") and is used to + short-circuit exact duplicates in `LongTermMemory.remember()` before + any embedder call. + """ + + id: str + content: str + hash: str + user_id: str + metadata: dict[str, Any] = Field(default_factory=dict) + created_at: datetime + updated_at: datetime diff --git a/tests/unit/memory/long_term/test_schemas.py b/tests/unit/memory/long_term/test_schemas.py new file mode 100644 index 000000000..5182611de --- /dev/null +++ b/tests/unit/memory/long_term/test_schemas.py @@ -0,0 +1,26 @@ +"""Tests for long-term memory pydantic schemas.""" +from datetime import UTC, datetime + +from dynamiq.memory.long_term.schemas import Fact + + +def test_fact_round_trip(): + now = datetime.now(UTC) + fact = Fact( + id="f1", + content="User prefers terse responses", + hash="abcd1234", + user_id="u1", + metadata={"category": "preference"}, + created_at=now, + updated_at=now, + ) + dumped = fact.model_dump() + assert Fact(**dumped) == fact + + +def test_fact_metadata_defaults_to_empty_dict(): + now = datetime.now(UTC) + fact = Fact(id="f1", content="x", hash="h", user_id="u", + created_at=now, updated_at=now) + assert fact.metadata == {} From 958384b15b41bb780b819f64c692012e489cc3b8 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:38:29 +0300 Subject: [PATCH 03/53] feat: add LongTermMemoryBackend abstract base class --- dynamiq/memory/long_term/__init__.py | 6 ++- dynamiq/memory/long_term/base.py | 58 ++++++++++++++++++++++++ tests/unit/memory/long_term/test_base.py | 26 +++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 dynamiq/memory/long_term/base.py create mode 100644 tests/unit/memory/long_term/test_base.py diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py index 28db7be6f..fd2853e21 100644 --- a/dynamiq/memory/long_term/__init__.py +++ b/dynamiq/memory/long_term/__init__.py @@ -3,6 +3,10 @@ See docs/superpowers/specs/2026-05-25-long-term-memory-design.md. """ +from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact -__all__ = ["Fact"] +__all__ = [ + "Fact", + "LongTermMemoryBackend", +] diff --git a/dynamiq/memory/long_term/base.py b/dynamiq/memory/long_term/base.py new file mode 100644 index 000000000..ca45171b1 --- /dev/null +++ b/dynamiq/memory/long_term/base.py @@ -0,0 +1,58 @@ +"""Backend ABC for long-term memory. + +Concrete backends wrap `dynamiq.storages.vector.*` infrastructure. +Independent of `dynamiq.memory.backends.MemoryBackend` (no shared base) — +see spec sections "Fork 3" and "Fork 4" for the rationale. +""" +from abc import ABC, abstractmethod + +from pydantic import BaseModel, ConfigDict + +from dynamiq.memory.long_term.schemas import Fact + + +class LongTermMemoryBackend(ABC, BaseModel): + """Fact-shaped, scope-filtered storage backend for `LongTermMemory`.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + @abstractmethod + def insert(self, fact: Fact, embedding: list[float]) -> None: + """Insert a new fact. Caller has already deduped via `get_by_hash`.""" + + @abstractmethod + def get(self, fact_id: str) -> Fact | None: ... + + @abstractmethod + def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: + """Exact-content dedup gate. Returns the existing Fact or None.""" + + @abstractmethod + def delete(self, fact_id: str) -> None: + """Hard-delete a single fact. No-op if not present.""" + + @abstractmethod + def search( + self, *, query_embedding: list[float], + scope: dict[str, str], limit: int, + ) -> list[tuple[Fact, float]]: + """Vector similarity search filtered by scope. Returns (fact, score) tuples, + most relevant first. No threshold filtering — caller decides.""" + + @abstractmethod + def list_by_scope( + self, scope: dict[str, str], limit: int = 100, + ) -> list[Fact]: + """Non-semantic listing for admin / introspection.""" + + @abstractmethod + def delete_scope(self, scope: dict[str, str]) -> int: + """Hard-delete every fact matching `scope`. Returns count deleted.""" + + # Phase 2 reservation — see spec Appendix A. + # In v1, `update()` is NOT @abstractmethod and the default raises. + def update(self, fact_id: str, content: str, embedding: list[float]) -> None: + raise NotImplementedError( + "update() lands in Phase 2 with the auto-extractor. " + "In v1, correct a fact via delete() + insert()." + ) diff --git a/tests/unit/memory/long_term/test_base.py b/tests/unit/memory/long_term/test_base.py new file mode 100644 index 000000000..5182aa15a --- /dev/null +++ b/tests/unit/memory/long_term/test_base.py @@ -0,0 +1,26 @@ +"""Tests for LongTermMemoryBackend ABC.""" +import pytest + +from dynamiq.memory.long_term.base import LongTermMemoryBackend + + +def test_long_term_memory_backend_is_abstract(): + with pytest.raises(TypeError): + LongTermMemoryBackend() + + +def test_long_term_memory_backend_update_default_raises(): + """Phase 2 reserves `update`; v1 default raises NotImplementedError.""" + + class TinyBackend(LongTermMemoryBackend): + def insert(self, fact, embedding): ... + def get(self, fact_id): return None + def get_by_hash(self, *, user_id, content_hash): return None + def delete(self, fact_id): ... + def search(self, *, query_embedding, scope, limit): return [] + def list_by_scope(self, scope, limit=100): return [] + def delete_scope(self, scope): return 0 + + backend = TinyBackend() + with pytest.raises(NotImplementedError, match="Phase 2"): + backend.update("f1", "x", [0.0]) From add1c8965db5f8ff2ef82b60120d7aa0fc05224d Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:38:36 +0300 Subject: [PATCH 04/53] feat: add in-memory long-term memory backend --- dynamiq/memory/long_term/backends/__init__.py | 5 + .../memory/long_term/backends/in_memory.py | 76 ++++++++++ .../long_term/test_in_memory_backend.py | 142 ++++++++++++++++++ 3 files changed, 223 insertions(+) create mode 100644 dynamiq/memory/long_term/backends/__init__.py create mode 100644 dynamiq/memory/long_term/backends/in_memory.py create mode 100644 tests/unit/memory/long_term/test_in_memory_backend.py diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py new file mode 100644 index 000000000..2de17ec00 --- /dev/null +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -0,0 +1,5 @@ +"""Concrete LongTermMemoryBackend implementations.""" + +from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend + +__all__ = ["InMemoryFactBackend"] diff --git a/dynamiq/memory/long_term/backends/in_memory.py b/dynamiq/memory/long_term/backends/in_memory.py new file mode 100644 index 000000000..c7b67d48f --- /dev/null +++ b/dynamiq/memory/long_term/backends/in_memory.py @@ -0,0 +1,76 @@ +"""In-process fact backend for tests and light use. + +Storage is a dict by fact_id. Search uses numpy cosine over all in-scope +vectors — fine for hundreds of facts, not intended for production scale. +""" +import numpy as np +from pydantic import PrivateAttr + +from dynamiq.memory.long_term.base import LongTermMemoryBackend +from dynamiq.memory.long_term.schemas import Fact + + +class InMemoryFactBackend(LongTermMemoryBackend): + """Dict + numpy-cosine backend. Loses data on restart.""" + + _facts: dict[str, Fact] = PrivateAttr(default_factory=dict) + _vectors: dict[str, list[float]] = PrivateAttr(default_factory=dict) + + def insert(self, fact: Fact, embedding: list[float]) -> None: + self._facts[fact.id] = fact + self._vectors[fact.id] = list(embedding) + + def get(self, fact_id: str) -> Fact | None: + return self._facts.get(fact_id) + + def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: + for fact in self._facts.values(): + if fact.user_id == user_id and fact.hash == content_hash: + return fact + return None + + def delete(self, fact_id: str) -> None: + self._facts.pop(fact_id, None) + self._vectors.pop(fact_id, None) + + def search( + self, *, query_embedding: list[float], + scope: dict[str, str], limit: int, + ) -> list[tuple[Fact, float]]: + if not self._facts: + return [] + + query = np.asarray(query_embedding, dtype=np.float64) + query_norm = np.linalg.norm(query) or 1.0 + + scored: list[tuple[Fact, float]] = [] + for fact_id, fact in self._facts.items(): + if not _matches_scope(fact, scope): + continue + vec = np.asarray(self._vectors[fact_id], dtype=np.float64) + vec_norm = np.linalg.norm(vec) or 1.0 + cosine = float(np.dot(query, vec) / (query_norm * vec_norm)) + scored.append((fact, cosine)) + + scored.sort(key=lambda pair: pair[1], reverse=True) + return scored[:limit] + + def list_by_scope( + self, scope: dict[str, str], limit: int = 100, + ) -> list[Fact]: + matched = [f for f in self._facts.values() if _matches_scope(f, scope)] + return matched[:limit] + + def delete_scope(self, scope: dict[str, str]) -> int: + to_delete = [fid for fid, f in self._facts.items() if _matches_scope(f, scope)] + for fid in to_delete: + self.delete(fid) + return len(to_delete) + + +def _matches_scope(fact: Fact, scope: dict[str, str]) -> bool: + """Return True iff every key in scope matches the corresponding Fact attribute.""" + for key, value in scope.items(): + if getattr(fact, key, None) != value: + return False + return True diff --git a/tests/unit/memory/long_term/test_in_memory_backend.py b/tests/unit/memory/long_term/test_in_memory_backend.py new file mode 100644 index 000000000..5883cb917 --- /dev/null +++ b/tests/unit/memory/long_term/test_in_memory_backend.py @@ -0,0 +1,142 @@ +"""Tests for InMemoryFactBackend.""" +from datetime import UTC, datetime + +from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.schemas import Fact + + +def _fact(fact_id: str, user_id: str, content: str, + content_hash: str | None = None) -> Fact: + now = datetime.now(UTC) + return Fact( + id=fact_id, content=content, + hash=content_hash or f"h-{fact_id}", + user_id=user_id, metadata={}, + created_at=now, updated_at=now, + ) + + +# --- insert / get / get_by_hash --- + +def test_insert_then_get(fake_embedder): + backend = InMemoryFactBackend() + fact = _fact("f1", "u1", "hello") + backend.insert(fact, fake_embedder.embed("hello")) + assert backend.get("f1") == fact + + +def test_get_unknown_returns_none(): + backend = InMemoryFactBackend() + assert backend.get("does-not-exist") is None + + +def test_get_by_hash_returns_match(fake_embedder): + backend = InMemoryFactBackend() + fact = _fact("f1", "u1", "hello", content_hash="h-shared") + backend.insert(fact, fake_embedder.embed("hello")) + assert backend.get_by_hash(user_id="u1", content_hash="h-shared") == fact + + +def test_get_by_hash_isolates_users(fake_embedder): + backend = InMemoryFactBackend() + backend.insert(_fact("f1", "u1", "hello", "h-shared"), + fake_embedder.embed("hello")) + assert backend.get_by_hash(user_id="u2", content_hash="h-shared") is None + + +def test_get_by_hash_unknown_returns_none(): + backend = InMemoryFactBackend() + assert backend.get_by_hash(user_id="u1", content_hash="nope") is None + + +# --- search --- + +def test_search_returns_relevance_ordered(fake_embedder): + backend = InMemoryFactBackend() + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u1", "alpha-2"), fake_embedder.embed("alpha-2")) + backend.insert(_fact("f3", "u1", "zulu"), fake_embedder.embed("zulu")) + + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=3, + ) + assert hits[0][0].id == "f1" + scores = [score for _, score in hits] + assert scores == sorted(scores, reverse=True) + + +def test_search_filters_by_scope(fake_embedder): + backend = InMemoryFactBackend() + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u2", "alpha"), fake_embedder.embed("alpha")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=10, + ) + assert [f.id for f, _ in hits] == ["f1"] + + +def test_search_respects_limit(fake_embedder): + backend = InMemoryFactBackend() + for i in range(5): + backend.insert(_fact(f"f{i}", "u1", f"text{i}"), + fake_embedder.embed(f"text{i}")) + hits = backend.search( + query_embedding=fake_embedder.embed("text0"), + scope={"user_id": "u1"}, limit=2, + ) + assert len(hits) == 2 + + +def test_search_empty_store_returns_empty(fake_embedder): + backend = InMemoryFactBackend() + hits = backend.search( + query_embedding=fake_embedder.embed("anything"), + scope={"user_id": "u1"}, limit=5, + ) + assert hits == [] + + +# --- delete / list_by_scope / delete_scope --- + +def test_delete_removes_fact(fake_embedder): + backend = InMemoryFactBackend() + backend.insert(_fact("f1", "u1", "x"), fake_embedder.embed("x")) + backend.delete("f1") + assert backend.get("f1") is None + + +def test_delete_unknown_is_noop(): + backend = InMemoryFactBackend() + backend.delete("does-not-exist") # must not raise + + +def test_list_by_scope_returns_in_scope_facts(fake_embedder): + backend = InMemoryFactBackend() + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + listed = backend.list_by_scope({"user_id": "u1"}) + assert {f.id for f in listed} == {"f1", "f2"} + + +def test_list_by_scope_respects_limit(fake_embedder): + backend = InMemoryFactBackend() + for i in range(5): + backend.insert(_fact(f"f{i}", "u1", f"x{i}"), + fake_embedder.embed(f"x{i}")) + assert len(backend.list_by_scope({"user_id": "u1"}, limit=2)) == 2 + + +def test_delete_scope_removes_all_in_scope(fake_embedder): + backend = InMemoryFactBackend() + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + deleted = backend.delete_scope({"user_id": "u1"}) + assert deleted == 2 + assert backend.list_by_scope({"user_id": "u1"}) == [] + assert len(backend.list_by_scope({"user_id": "u2"})) == 1 From 763abe71cdf605f47f2f7e10319ba9e7ed2df192 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:38:46 +0300 Subject: [PATCH 05/53] feat: add LongTermMemory facade with remember, recall, forget --- dynamiq/memory/long_term/__init__.py | 2 + dynamiq/memory/long_term/long_term_memory.py | 110 ++++++++++++++ .../memory/long_term/test_long_term_memory.py | 136 ++++++++++++++++++ 3 files changed, 248 insertions(+) create mode 100644 dynamiq/memory/long_term/long_term_memory.py create mode 100644 tests/unit/memory/long_term/test_long_term_memory.py diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py index fd2853e21..663a0284a 100644 --- a/dynamiq/memory/long_term/__init__.py +++ b/dynamiq/memory/long_term/__init__.py @@ -4,9 +4,11 @@ """ from dynamiq.memory.long_term.base import LongTermMemoryBackend +from dynamiq.memory.long_term.long_term_memory import LongTermMemory from dynamiq.memory.long_term.schemas import Fact __all__ = [ "Fact", + "LongTermMemory", "LongTermMemoryBackend", ] diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py new file mode 100644 index 000000000..822f481c2 --- /dev/null +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -0,0 +1,110 @@ +"""LongTermMemory facade. + +User-facing API for fact-shaped, user-scoped, cross-session memory. +Wraps a `LongTermMemoryBackend` and an embedder. +""" +from datetime import UTC, datetime +from hashlib import md5 +from typing import Any +from uuid import uuid4 + +from pydantic import BaseModel, ConfigDict + +from dynamiq.memory.long_term.base import LongTermMemoryBackend +from dynamiq.memory.long_term.schemas import Fact + + +def _content_hash(user_id: str, content: str) -> str: + """Stable hash for exact-duplicate dedup. Scoped per-user.""" + normalised = content.strip().lower() + return md5(f"{user_id}:{normalised}".encode("utf-8")).hexdigest() + + +def _embed(embedder: Any, text: str) -> list[float]: + """Call the embedder's `.execute({"query": text})` and pull the vector. + + Matches dynamiq's `TextEmbedder` contract: input is a dict (or + `TextEmbedderInputSchema`) with `query`; output is a dict-like with + `embedding`. + """ + result = embedder.execute({"query": text}) + return list(result["embedding"]) + + +class LongTermMemory(BaseModel): + """Tool-driven, user-scoped, fact-shaped memory.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + backend: LongTermMemoryBackend + embedder: Any # TextEmbedder in production; FakeTextEmbedder in tests. + + def remember( + self, *, content: str, user_id: str, + metadata: dict[str, Any] | None = None, + ) -> Fact: + """Add a fact. Idempotent on (user_id, normalised content).""" + if not content or not content.strip(): + raise ValueError("Fact content cannot be empty") + + normalised = content.strip() + content_hash = _content_hash(user_id, normalised) + + existing = self.backend.get_by_hash(user_id=user_id, content_hash=content_hash) + if existing is not None: + return existing + + now = datetime.now(UTC) + embedding = _embed(self.embedder, normalised) + fact = Fact( + id=str(uuid4()), + content=normalised, + hash=content_hash, + user_id=user_id, + metadata=metadata or {}, + created_at=now, + updated_at=now, + ) + self.backend.insert(fact, embedding) + return fact + + def recall( + self, *, query: str, user_id: str, limit: int = 5, + ) -> list[tuple[Fact, float]]: + """Semantic search for facts relevant to `query`, scoped to `user_id`. + + No similarity threshold — caller (or model) decides what's relevant. + """ + stripped = query.strip() if query else "" + if not stripped: + raise ValueError("recall query cannot be empty") + embedding = _embed(self.embedder, stripped) + return self.backend.search( + query_embedding=embedding, + scope={"user_id": user_id}, + limit=limit, + ) + + def forget(self, *, fact_id: str, user_id: str) -> str: + """Delete a fact by id, with cross-user guard. + + Returns one of: 'deleted', 'not_found', 'forbidden'. + Never raises on user mismatch — defence in depth above the + construction-time user_id binding on the tool. + """ + fact = self.backend.get(fact_id) + if fact is None: + return "not_found" + if fact.user_id != user_id: + return "forbidden" + self.backend.delete(fact_id) + return "deleted" + + def list_all(self, *, user_id: str, limit: int = 100) -> list[Fact]: + return self.backend.list_by_scope({"user_id": user_id}, limit=limit) + + def get(self, fact_id: str) -> Fact | None: + return self.backend.get(fact_id) + + def clear_user(self, *, user_id: str) -> int: + return self.backend.delete_scope({"user_id": user_id}) diff --git a/tests/unit/memory/long_term/test_long_term_memory.py b/tests/unit/memory/long_term/test_long_term_memory.py new file mode 100644 index 000000000..630f16748 --- /dev/null +++ b/tests/unit/memory/long_term/test_long_term_memory.py @@ -0,0 +1,136 @@ +"""Tests for the LongTermMemory facade.""" +import pytest + +from dynamiq.memory.long_term import LongTermMemory +from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend + + +@pytest.fixture +def ltm(fake_embedder): + return LongTermMemory( + backend=InMemoryFactBackend(), + embedder=fake_embedder, + ) + + +# --- remember --- + +def test_remember_returns_a_fact_and_persists_it(ltm, user_id): + fact = ltm.remember(content="User likes pizza", user_id=user_id) + assert fact.id + assert fact.content == "User likes pizza" + assert fact.user_id == user_id + assert ltm.backend.get(fact.id) == fact + + +def test_remember_dedups_exact_duplicate_in_same_user(ltm, user_id): + first = ltm.remember(content="User likes pizza", user_id=user_id) + second = ltm.remember(content="User likes pizza", user_id=user_id) + assert first.id == second.id + + +def test_remember_does_not_dedup_across_users(ltm, user_id, other_user_id): + a = ltm.remember(content="User likes pizza", user_id=user_id) + b = ltm.remember(content="User likes pizza", user_id=other_user_id) + assert a.id != b.id + assert a.user_id != b.user_id + + +def test_remember_normalises_whitespace_for_dedup(ltm, user_id): + a = ltm.remember(content=" User likes pizza ", user_id=user_id) + b = ltm.remember(content="USER LIKES PIZZA", user_id=user_id) + assert a.id == b.id + + +def test_remember_rejects_empty_content(ltm, user_id): + with pytest.raises(ValueError): + ltm.remember(content=" ", user_id=user_id) + + +def test_remember_stores_metadata(ltm, user_id): + fact = ltm.remember(content="x", user_id=user_id, + metadata={"category": "preference"}) + assert ltm.backend.get(fact.id).metadata == {"category": "preference"} + + +# --- recall --- + +def test_recall_returns_scored_facts(ltm, user_id): + ltm.remember(content="User likes pizza", user_id=user_id) + ltm.remember(content="User dislikes mushrooms", user_id=user_id) + hits = ltm.recall(query="pizza preferences", user_id=user_id, limit=2) + assert len(hits) == 2 + fact, score = hits[0] + assert fact.content + assert isinstance(score, float) + + +def test_recall_isolates_users(ltm, user_id, other_user_id): + ltm.remember(content="A's fact", user_id=user_id) + ltm.remember(content="B's fact", user_id=other_user_id) + hits = ltm.recall(query="fact", user_id=user_id, limit=5) + assert all(f.user_id == user_id for f, _ in hits) + + +def test_recall_respects_limit(ltm, user_id): + for i in range(5): + ltm.remember(content=f"fact-{i}", user_id=user_id) + hits = ltm.recall(query="fact", user_id=user_id, limit=2) + assert len(hits) == 2 + + +def test_recall_empty_store_returns_empty(ltm, user_id): + assert ltm.recall(query="anything", user_id=user_id, limit=5) == [] + + +def test_recall_rejects_empty_query(ltm, user_id): + with pytest.raises(ValueError): + ltm.recall(query=" ", user_id=user_id, limit=5) + + +# --- forget --- + +def test_forget_deletes_known_fact(ltm, user_id): + fact = ltm.remember(content="x", user_id=user_id) + assert ltm.forget(fact_id=fact.id, user_id=user_id) == "deleted" + assert ltm.backend.get(fact.id) is None + + +def test_forget_unknown_returns_not_found(ltm, user_id): + assert ltm.forget(fact_id="does-not-exist", user_id=user_id) == "not_found" + + +def test_forget_cross_user_returns_forbidden(ltm, user_id, other_user_id): + fact = ltm.remember(content="x", user_id=user_id) + result = ltm.forget(fact_id=fact.id, user_id=other_user_id) + assert result == "forbidden" + assert ltm.backend.get(fact.id) is not None + + +# --- admin / introspection --- + +def test_list_all_returns_user_facts(ltm, user_id, other_user_id): + ltm.remember(content="a", user_id=user_id) + ltm.remember(content="b", user_id=user_id) + ltm.remember(content="c", user_id=other_user_id) + facts = ltm.list_all(user_id=user_id) + assert {f.content for f in facts} == {"a", "b"} + + +def test_get_returns_fact_by_id(ltm, user_id): + fact = ltm.remember(content="x", user_id=user_id) + assert ltm.get(fact.id) == fact + + +def test_get_unknown_returns_none(ltm): + assert ltm.get("nope") is None + + +def test_clear_user_deletes_all_user_facts(ltm, user_id, other_user_id): + ltm.remember(content="a", user_id=user_id) + ltm.remember(content="b", user_id=user_id) + ltm.remember(content="c", user_id=other_user_id) + deleted = ltm.clear_user(user_id=user_id) + assert deleted == 2 + assert ltm.list_all(user_id=user_id) == [] + assert len(ltm.list_all(user_id=other_user_id)) == 1 From 16354ea41127fac9a943a77b787d123cf9b5524a Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:41:23 +0300 Subject: [PATCH 06/53] feat: add long-term memory tools and factory --- dynamiq/nodes/tools/long_term_memory.py | 156 ++++++++++++++++++++++ tests/unit/memory/long_term/test_tools.py | 154 +++++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 dynamiq/nodes/tools/long_term_memory.py create mode 100644 tests/unit/memory/long_term/test_tools.py diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py new file mode 100644 index 000000000..256d3b129 --- /dev/null +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -0,0 +1,156 @@ +"""Tools giving an agent access to a `LongTermMemory` instance. + +The three tools (`remember_fact`, `recall_facts`, `forget_fact`) bind +`user_id` at construction. `user_id` never appears in `InputSchema`, so +the model has no slot to address another user's memory. See spec §9.1. +""" +from typing import Any, ClassVar, Literal + +from pydantic import BaseModel, ConfigDict, Field + +from dynamiq.memory.long_term import LongTermMemory +from dynamiq.nodes.node import Node +from dynamiq.nodes.types import NodeGroup + + +REMEMBER_DESCRIPTION = ( + "Record a durable fact about the current user that should persist across " + "conversations (preferences, constraints, recurring context, biographical info). " + "Use only when you've learned something that will matter in future sessions — " + "not for ephemeral turn-level state. Returns {fact_id: }. Calling twice " + "with the same content returns the same fact_id." +) + +RECALL_DESCRIPTION = ( + "Search the user's long-term memory for facts relevant to a query. " + "Use BEFORE answering questions where prior context (preferences, past " + "decisions, biographical info) would change the response. Returns a list of " + "{fact_id, content, score} entries, most relevant first." +) + +FORGET_DESCRIPTION = ( + "Delete a fact from the user's long-term memory by id. Use when the user " + "explicitly asks to be forgotten on something, or when a fact is wrong and " + "you have no replacement. Get the fact_id from a prior recall_facts call. " + "Returns {status: 'deleted'|'not_found'|'forbidden'}." +) + + +class RememberFactInputSchema(BaseModel): + """LLM-visible input for remember_fact. Note: no `user_id`.""" + + content: str = Field(..., min_length=1, max_length=1000, + description="The fact to remember, as a short statement.") + metadata: dict[str, Any] | None = Field( + default=None, + description="Optional free-form metadata (e.g. {'category': 'preference'}).", + ) + + +class RecallFactsInputSchema(BaseModel): + """LLM-visible input for recall_facts. Note: no `user_id`.""" + + query: str = Field(..., min_length=1, max_length=500, + description="What to search for.") + limit: int = Field(default=5, ge=1, le=20, + description="Max facts to return.") + + +class ForgetFactInputSchema(BaseModel): + """LLM-visible input for forget_fact. Note: no `user_id`.""" + + fact_id: str = Field(..., description="The id returned by recall_facts or remember_fact.") + + +class _LongTermMemoryTool(Node): + """Shared base for the three long-term memory tools. + + Holds the `LongTermMemory` reference and the construction-bound `user_id`. + Concrete subclasses set `name`, `description`, `input_schema`, and `execute`. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + group: Literal[NodeGroup.TOOLS] = NodeGroup.TOOLS + long_term_memory: LongTermMemory + user_id: str + + +class RememberFactTool(_LongTermMemoryTool): + """Write a fact to long-term memory, scoped to the bound user_id.""" + + name: str = "remember_fact" + description: str = REMEMBER_DESCRIPTION + input_schema: ClassVar[type[RememberFactInputSchema]] = RememberFactInputSchema + + def execute(self, input_data: RememberFactInputSchema, config=None, **kwargs) -> dict: + fact = self.long_term_memory.remember( + content=input_data.content, + user_id=self.user_id, + metadata=input_data.metadata, + ) + return {"content": {"fact_id": fact.id}} + + +class RecallFactsTool(_LongTermMemoryTool): + """Search long-term memory for facts relevant to a query, scoped to user_id.""" + + name: str = "recall_facts" + description: str = RECALL_DESCRIPTION + input_schema: ClassVar[type[RecallFactsInputSchema]] = RecallFactsInputSchema + + def execute(self, input_data: RecallFactsInputSchema, config=None, **kwargs) -> dict: + hits = self.long_term_memory.recall( + query=input_data.query, + user_id=self.user_id, + limit=input_data.limit, + ) + return { + "content": [ + {"fact_id": fact.id, "content": fact.content, + "score": round(score, 4)} + for fact, score in hits + ] + } + + +class ForgetFactTool(_LongTermMemoryTool): + """Delete a fact by id, with cross-user guard.""" + + name: str = "forget_fact" + description: str = FORGET_DESCRIPTION + input_schema: ClassVar[type[ForgetFactInputSchema]] = ForgetFactInputSchema + + def execute(self, input_data: ForgetFactInputSchema, config=None, **kwargs) -> dict: + status = self.long_term_memory.forget( + fact_id=input_data.fact_id, + user_id=self.user_id, + ) + return {"content": {"status": status}} + + +_TOOL_BUILDERS: dict[str, type[_LongTermMemoryTool]] = { + "remember": RememberFactTool, + "recall": RecallFactsTool, + "forget": ForgetFactTool, +} + + +def build_long_term_memory_tools( + *, + long_term_memory: LongTermMemory, + user_id: str, + include: tuple[str, ...] = ("remember", "recall", "forget"), +) -> list[Node]: + """Build the long-term-memory tools with `user_id` baked in. + + `include` selects which tools to return — sub-agents commonly use + `include=("recall",)` for read-only inheritance. Unknown keys are ignored. + """ + tools: list[Node] = [] + for kind in include: + cls = _TOOL_BUILDERS.get(kind) + if cls is None: + continue + tools.append(cls(long_term_memory=long_term_memory, user_id=user_id)) + return tools diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py new file mode 100644 index 000000000..365c234a6 --- /dev/null +++ b/tests/unit/memory/long_term/test_tools.py @@ -0,0 +1,154 @@ +"""Tests for the three long-term memory tools and the factory. + +These tests do not invoke an LLM — they exercise the Node `execute()` +method directly, treating the tool the same way Agent's tool-use loop +would after the model emits a tool call. +""" +import pytest + +from dynamiq.memory.long_term import LongTermMemory +from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.nodes.tools.long_term_memory import ( + ForgetFactTool, + RecallFactsTool, + RememberFactTool, + build_long_term_memory_tools, +) + + +@pytest.fixture +def ltm(fake_embedder): + return LongTermMemory(backend=InMemoryFactBackend(), embedder=fake_embedder) + + +# --- RememberFactTool --- + + +def test_remember_tool_persists_a_fact(ltm, user_id): + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema(content="User likes pizza")) + fact_id = result["content"]["fact_id"] + assert ltm.get(fact_id).content == "User likes pizza" + + +def test_remember_tool_input_schema_has_no_user_id(): + """LLM-visible signature must not contain user_id — it's instance state.""" + assert "user_id" not in RememberFactTool.input_schema.model_fields + assert {"content", "metadata"} <= set(RememberFactTool.input_schema.model_fields) + + +def test_remember_tool_uses_construction_user_id(ltm, user_id): + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema(content="x")) + fact = ltm.get(result["content"]["fact_id"]) + assert fact.user_id == user_id + + +def test_remember_tool_idempotent_on_duplicate(ltm, user_id): + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + a = tool.execute(tool.input_schema(content="x")) + b = tool.execute(tool.input_schema(content="x")) + assert a["content"]["fact_id"] == b["content"]["fact_id"] + + +def test_remember_tool_accepts_metadata(ltm, user_id): + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema( + content="x", metadata={"category": "preference"})) + fact = ltm.get(result["content"]["fact_id"]) + assert fact.metadata == {"category": "preference"} + + +# --- RecallFactsTool --- + + +def test_recall_tool_returns_hits(ltm, user_id): + ltm.remember(content="User likes pizza", user_id=user_id) + ltm.remember(content="User likes Python", user_id=user_id) + tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema(query="pizza", limit=2)) + items = result["content"] + assert len(items) == 2 + for item in items: + assert {"fact_id", "content", "score"} <= set(item.keys()) + scores = [it["score"] for it in items] + assert scores == sorted(scores, reverse=True) + + +def test_recall_tool_input_schema_has_no_user_id(): + assert "user_id" not in RecallFactsTool.input_schema.model_fields + assert {"query", "limit"} <= set(RecallFactsTool.input_schema.model_fields) + + +def test_recall_tool_isolates_users(ltm, user_id, other_user_id): + ltm.remember(content="A's fact", user_id=user_id) + ltm.remember(content="B's fact", user_id=other_user_id) + tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema(query="fact", limit=5)) + contents = {item["content"] for item in result["content"]} + assert contents == {"A's fact"} + + +def test_recall_tool_empty_store_returns_empty(ltm, user_id): + tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema(query="anything")) + assert result["content"] == [] + + +# --- ForgetFactTool --- + + +def test_forget_tool_deletes_owned_fact(ltm, user_id): + fact = ltm.remember(content="x", user_id=user_id) + tool = ForgetFactTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema(fact_id=fact.id)) + assert result["content"]["status"] == "deleted" + assert ltm.get(fact.id) is None + + +def test_forget_tool_returns_not_found_for_unknown_id(ltm, user_id): + tool = ForgetFactTool(long_term_memory=ltm, user_id=user_id) + result = tool.execute(tool.input_schema(fact_id="does-not-exist")) + assert result["content"]["status"] == "not_found" + + +def test_forget_tool_returns_forbidden_on_cross_user(ltm, user_id, other_user_id): + fact = ltm.remember(content="x", user_id=user_id) + attacker = ForgetFactTool(long_term_memory=ltm, user_id=other_user_id) + result = attacker.execute(attacker.input_schema(fact_id=fact.id)) + assert result["content"]["status"] == "forbidden" + assert ltm.get(fact.id) is not None + + +def test_forget_tool_input_schema_has_no_user_id(): + assert "user_id" not in ForgetFactTool.input_schema.model_fields + assert "fact_id" in ForgetFactTool.input_schema.model_fields + + +# --- factory --- + + +def test_factory_builds_all_three_by_default(ltm, user_id): + tools = build_long_term_memory_tools(long_term_memory=ltm, user_id=user_id) + assert {t.name for t in tools} == {"remember_fact", "recall_facts", "forget_fact"} + + +def test_factory_respects_include(ltm, user_id): + tools = build_long_term_memory_tools( + long_term_memory=ltm, user_id=user_id, include=("recall",), + ) + assert [t.name for t in tools] == ["recall_facts"] + + +def test_factory_bakes_user_id_into_each_tool(ltm, user_id): + tools = build_long_term_memory_tools(long_term_memory=ltm, user_id=user_id) + for tool in tools: + assert tool.user_id == user_id + + +def test_factory_ignores_unknown_include_keys(ltm, user_id): + tools = build_long_term_memory_tools( + long_term_memory=ltm, user_id=user_id, + include=("recall", "unknown"), + ) + assert [t.name for t in tools] == ["recall_facts"] From 1feee3768b9b3b975dc7489cf75b3e2d295173ba Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:50:20 +0300 Subject: [PATCH 07/53] feat: integrate long-term memory into Agent execution --- dynamiq/memory/long_term/__init__.py | 3 +- dynamiq/memory/long_term/long_term_memory.py | 17 +- dynamiq/nodes/agents/base.py | 40 ++++ .../test_long_term_memory_integration.py | 176 ++++++++++++++++++ 4 files changed, 234 insertions(+), 2 deletions(-) create mode 100644 tests/unit/nodes/agents/test_long_term_memory_integration.py diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py index 663a0284a..15e3d00ac 100644 --- a/dynamiq/memory/long_term/__init__.py +++ b/dynamiq/memory/long_term/__init__.py @@ -4,11 +4,12 @@ """ from dynamiq.memory.long_term.base import LongTermMemoryBackend -from dynamiq.memory.long_term.long_term_memory import LongTermMemory +from dynamiq.memory.long_term.long_term_memory import LongTermMemory, LongTermMemoryConfig from dynamiq.memory.long_term.schemas import Fact __all__ = [ "Fact", "LongTermMemory", "LongTermMemoryBackend", + "LongTermMemoryConfig", ] diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index 822f481c2..a6e1b3d2f 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -5,7 +5,7 @@ """ from datetime import UTC, datetime from hashlib import md5 -from typing import Any +from typing import Any, Literal from uuid import uuid4 from pydantic import BaseModel, ConfigDict @@ -108,3 +108,18 @@ def get(self, fact_id: str) -> Fact | None: def clear_user(self, *, user_id: str) -> int: return self.backend.delete_scope({"user_id": user_id}) + + +class LongTermMemoryConfig(BaseModel): + """Per-agent configuration for long-term memory. + + `tools` controls which of the three tools the agent is given access to. + Sub-agents typically use `("recall",)` for read-only inheritance; + parent agents use the default `("remember", "recall", "forget")`. + """ + + tools: tuple[Literal["remember", "recall", "forget"], ...] = ( + "remember", + "recall", + "forget", + ) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 8997a4980..41c8b09f1 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -10,6 +10,7 @@ from dynamiq.connections.managers import ConnectionManager from dynamiq.memory import Memory, MemoryRetrievalStrategy, MemorySaveMode +from dynamiq.memory.long_term import LongTermMemory, LongTermMemoryConfig from dynamiq.nodes import ErrorHandling, Node, NodeGroup from dynamiq.nodes.agents.checkpoint import DEFAULT_HISTORY_OFFSET, AgentIterativeCheckpointMixin from dynamiq.nodes.agents.exceptions import AgentUnknownToolException, InvalidActionException, ToolExecutionException @@ -222,6 +223,17 @@ class Agent(AgentIterativeCheckpointMixin, Node): memory: Memory | None = Field(None, description="Memory node for the agent.") memory_limit: int = Field(100, description="Maximum number of messages to retrieve from memory") memory_retrieval_strategy: MemoryRetrievalStrategy | None = MemoryRetrievalStrategy.ALL + long_term_memory: LongTermMemory | None = Field( + default=None, + description=( + "Long-term, fact-shaped, user-scoped memory accessed via remember/recall/forget " + "tools. Independent of `memory` (short-term messages)." + ), + ) + long_term_memory_config: LongTermMemoryConfig = Field( + default_factory=LongTermMemoryConfig, + description="Which long-term-memory tools to expose on this agent.", + ) verbose: bool = Field(False, description="Whether to print verbose logs.") file_store: FileStoreConfig = Field( default_factory=lambda: FileStoreConfig(enabled=False, backend=InMemoryFileStore()), @@ -618,6 +630,11 @@ def execute( use_memory = self.memory and (input_data.user_id or input_data.session_id) + ltm_tools = self._build_long_term_memory_tools(input_data) + _tools_before_ltm = self.tools + if ltm_tools: + self.tools = list(_tools_before_ltm) + ltm_tools + if use_memory: history_messages = self._retrieve_memory(input_data) if len(history_messages) > 0: @@ -687,6 +704,8 @@ def execute( finally: self._current_call_context = None self._clear_todos_file() + if ltm_tools: + self.tools = _tools_before_ltm if use_memory: try: @@ -798,6 +817,27 @@ def _retrieve_memory(self, input_data: AgentInputSchema) -> list[Message]: logger.info("Agent %s - %s: retrieved %d messages from memory", self.name, self.id, len(history_messages)) return history_messages + def _build_long_term_memory_tools(self, input_data: "AgentInputSchema") -> list[Node]: + """Build per-run long-term-memory tools, or [] if not applicable. + + Returns an empty list when `long_term_memory` is unset or `user_id` + is absent. The caller attaches the returned tools to `self.tools` + for the duration of the run. + """ + if self.long_term_memory is None: + return [] + user_id = getattr(input_data, "user_id", None) + if not user_id: + return [] + # Imported locally to avoid circular imports at module load time. + from dynamiq.nodes.tools.long_term_memory import build_long_term_memory_tools + + return build_long_term_memory_tools( + long_term_memory=self.long_term_memory, + user_id=user_id, + include=self.long_term_memory_config.tools, + ) + def _is_input_output_trace_message(self, message: Message) -> bool: """Return True when a message is an internal ReAct/tool-trace entry.""" content = message.content.strip() diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py new file mode 100644 index 000000000..2c1557fb7 --- /dev/null +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -0,0 +1,176 @@ +"""Tests for Agent long-term memory integration. + +Covers `_build_long_term_memory_tools` (the per-run tool-construction +helper) and the snapshot/restore behavior of `self.tools` across an +`execute()` call. + +The execute-level tests mock `_run_agent` so we don't need a real LLM +backend response — we only verify the agent-loop bookkeeping. +""" +import hashlib +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + +from dynamiq.connections import OpenAI as OpenAIConnection +from dynamiq.memory.long_term import LongTermMemory, LongTermMemoryConfig +from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.nodes.agents.base import Agent +from dynamiq.nodes.llms import OpenAI + + +class _FakeEmbedder: + DIM = 16 + + def execute(self, input_data, **kwargs): + text = input_data["query"] if isinstance(input_data, dict) else input_data.query + digest = hashlib.sha256(text.encode("utf-8")).digest() + raw = [(b / 127.5) - 1.0 for b in digest[: self.DIM]] + norm = sum(x * x for x in raw) ** 0.5 or 1.0 + return {"query": text, "embedding": [x / norm for x in raw]} + + +@pytest.fixture +def ltm(): + return LongTermMemory(backend=InMemoryFactBackend(), embedder=_FakeEmbedder()) + + +@pytest.fixture +def llm(): + """Real OpenAI LLM object — never executed in these tests. Constructed + only to satisfy Agent's pydantic validation.""" + return OpenAI( + connection=OpenAIConnection(api_key="test-key"), + model="gpt-4o", + ) + + +def _make_agent(llm, *, ltm=None, ltm_config=None) -> Agent: + kwargs = {"name": "test", "llm": llm, "tools": []} + if ltm is not None: + kwargs["long_term_memory"] = ltm + if ltm_config is not None: + kwargs["long_term_memory_config"] = ltm_config + return Agent(**kwargs) + + +def _input(user_id=None, session_id=None): + return SimpleNamespace(user_id=user_id, session_id=session_id, input="hi") + + +# --- LongTermMemoryConfig --- + + +def test_config_default_includes_all_three_tools(): + assert LongTermMemoryConfig().tools == ("remember", "recall", "forget") + + +def test_config_can_restrict_to_read_only(): + assert LongTermMemoryConfig(tools=("recall",)).tools == ("recall",) + + +# --- Agent field declarations --- + + +def test_agent_has_long_term_memory_fields(): + fields = Agent.model_fields + assert "long_term_memory" in fields + assert "long_term_memory_config" in fields + assert fields["long_term_memory"].default is None + + +def test_agent_long_term_memory_defaults_to_none(llm): + agent = _make_agent(llm) + assert agent.long_term_memory is None + assert agent.long_term_memory_config.tools == ("remember", "recall", "forget") + + +# --- _build_long_term_memory_tools --- + + +def test_build_returns_three_tools_when_ltm_and_user_id_present(llm, ltm): + agent = _make_agent(llm, ltm=ltm) + tools = agent._build_long_term_memory_tools(_input(user_id="u1")) + assert {t.name for t in tools} == {"remember_fact", "recall_facts", "forget_fact"} + + +def test_build_returns_empty_when_no_user_id(llm, ltm): + agent = _make_agent(llm, ltm=ltm) + assert agent._build_long_term_memory_tools(_input(session_id="s1")) == [] + + +def test_build_returns_empty_when_no_long_term_memory(llm): + agent = _make_agent(llm) + assert agent._build_long_term_memory_tools(_input(user_id="u1")) == [] + + +def test_build_respects_config_include(llm, ltm): + agent = _make_agent(llm, ltm=ltm, ltm_config=LongTermMemoryConfig(tools=("recall",))) + tools = agent._build_long_term_memory_tools(_input(user_id="u1")) + assert [t.name for t in tools] == ["recall_facts"] + + +def test_build_bakes_user_id_into_each_tool(llm, ltm): + agent = _make_agent(llm, ltm=ltm) + tools = agent._build_long_term_memory_tools(_input(user_id="u1")) + for tool in tools: + assert tool.user_id == "u1" + + +# --- execute() splice: snapshot/restore self.tools --- + + +def _patch_run_agent_capture_tools(agent, captured): + def fake_run(*args, **kwargs): + captured.extend(agent.tools) + return "ok" + + return patch.object(agent, "_run_agent", side_effect=fake_run) + + +def test_execute_attaches_ltm_tools_during_run_and_restores_after(llm, ltm): + agent = _make_agent(llm, ltm=ltm) + original_tools = list(agent.tools) + captured: list = [] + + with _patch_run_agent_capture_tools(agent, captured): + agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) + + assert {"remember_fact", "recall_facts", "forget_fact"} <= {t.name for t in captured} + assert agent.tools == original_tools + + +def test_execute_restores_tools_even_when_run_raises(llm, ltm): + agent = _make_agent(llm, ltm=ltm) + original_tools = list(agent.tools) + + with patch.object(agent, "_run_agent", side_effect=RuntimeError("boom")): + # run_sync wraps exceptions in a failed RunnableResult; check tools after. + agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) + + assert agent.tools == original_tools + + +def test_execute_does_not_mutate_tools_when_no_user_id(llm, ltm): + agent = _make_agent(llm, ltm=ltm) + original_tools = list(agent.tools) + captured: list = [] + + with _patch_run_agent_capture_tools(agent, captured): + agent.run_sync(input_data={"input": "hi"}) + + assert {t.name for t in captured} == {t.name for t in original_tools} + assert agent.tools == original_tools + + +def test_execute_does_not_mutate_tools_when_no_long_term_memory(llm): + agent = _make_agent(llm) + original_tools = list(agent.tools) + captured: list = [] + + with _patch_run_agent_capture_tools(agent, captured): + agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) + + assert {t.name for t in captured} == {t.name for t in original_tools} + assert agent.tools == original_tools From b8252e614215d1b3427db668bbb0ec170c46bf40 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 21:59:33 +0300 Subject: [PATCH 08/53] feat: add pgvector-backed long-term memory backend --- dynamiq/memory/long_term/backends/__init__.py | 3 +- dynamiq/memory/long_term/backends/pgvector.py | 177 ++++++++++++++++++ .../integration_with_creds/memory/conftest.py | 29 +++ .../memory/test_pgvector_fact_backend.py | 139 ++++++++++++++ 4 files changed, 347 insertions(+), 1 deletion(-) create mode 100644 dynamiq/memory/long_term/backends/pgvector.py create mode 100644 tests/integration_with_creds/memory/conftest.py create mode 100644 tests/integration_with_creds/memory/test_pgvector_fact_backend.py diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py index 2de17ec00..54643789b 100644 --- a/dynamiq/memory/long_term/backends/__init__.py +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -1,5 +1,6 @@ """Concrete LongTermMemoryBackend implementations.""" from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.backends.pgvector import PgvectorFactBackend -__all__ = ["InMemoryFactBackend"] +__all__ = ["InMemoryFactBackend", "PgvectorFactBackend"] diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py new file mode 100644 index 000000000..1821a4a7b --- /dev/null +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -0,0 +1,177 @@ +"""pgvector-backed long-term memory backend. + +Uses psycopg (v3) + the pgvector extension. Stores facts in a single +table with a vector column for embeddings, a JSONB column for metadata, +and (user_id, hash) uniqueness for dedup. +""" +import psycopg +from pgvector.psycopg import register_vector +from psycopg.rows import dict_row +from psycopg.types.json import Jsonb +from pydantic import ConfigDict, PrivateAttr + +from dynamiq.memory.long_term.base import LongTermMemoryBackend +from dynamiq.memory.long_term.schemas import Fact + + +_SCHEMA_SQL = """ +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE IF NOT EXISTS {table} ( + id TEXT PRIMARY KEY, + content TEXT NOT NULL, + hash TEXT NOT NULL, + user_id TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{{}}'::jsonb, + embedding vector({dim}) NOT NULL, + created_at TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL +); + +CREATE INDEX IF NOT EXISTS {table}_user_id_idx ON {table} (user_id); +CREATE UNIQUE INDEX IF NOT EXISTS {table}_user_hash_uidx ON {table} (user_id, hash); +""" + + +def _scope_to_where(scope: dict[str, str]) -> tuple[str, list]: + """Translate a scope dict into a parameterised SQL WHERE clause. + + `scope` is always `{"user_id": ...}` in v1; the loop is shaped so + forward extensions (agent_id, run_id) drop in without rewriting. + """ + if not scope: + return "TRUE", [] + clauses = [f"{key} = %s" for key in scope.keys()] + return " AND ".join(clauses), list(scope.values()) + + +def _row_to_fact(row) -> Fact: + return Fact( + id=row["id"], + content=row["content"], + hash=row["hash"], + user_id=row["user_id"], + metadata=row["metadata"] or {}, + created_at=row["created_at"], + updated_at=row["updated_at"], + ) + + +class PgvectorFactBackend(LongTermMemoryBackend): + """Long-term memory backend backed by Postgres + pgvector.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + dsn: str + table_name: str = "user_facts" + dimension: int = 1536 + + _conn: psycopg.Connection | None = PrivateAttr(default=None) + + def model_post_init(self, __context) -> None: + self._conn = psycopg.connect(self.dsn, autocommit=True) + register_vector(self._conn) + + # --- schema management (test/admin helpers, not part of the ABC) --- + + def ensure_table(self) -> None: + """Create the facts table and indexes if absent. Safe to call repeatedly.""" + with self._conn.cursor() as cur: + cur.execute(_SCHEMA_SQL.format(table=self.table_name, dim=self.dimension)) + + def recreate_table(self) -> None: + """Drop and re-create the facts table. For tests only.""" + with self._conn.cursor() as cur: + cur.execute(f"DROP TABLE IF EXISTS {self.table_name}") + self.ensure_table() + + def drop_table(self) -> None: + with self._conn.cursor() as cur: + cur.execute(f"DROP TABLE IF EXISTS {self.table_name}") + + # --- LongTermMemoryBackend implementation --- + + def insert(self, fact: Fact, embedding: list[float]) -> None: + with self._conn.cursor() as cur: + cur.execute( + f""" + INSERT INTO {self.table_name} + (id, content, hash, user_id, metadata, embedding, created_at, updated_at) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s) + """, + ( + fact.id, + fact.content, + fact.hash, + fact.user_id, + Jsonb(fact.metadata), + embedding, + fact.created_at, + fact.updated_at, + ), + ) + + def get(self, fact_id: str) -> Fact | None: + with self._conn.cursor(row_factory=dict_row) as cur: + cur.execute( + f"SELECT id, content, hash, user_id, metadata, created_at, updated_at " + f"FROM {self.table_name} WHERE id = %s", + (fact_id,), + ) + row = cur.fetchone() + return _row_to_fact(row) if row else None + + def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: + with self._conn.cursor(row_factory=dict_row) as cur: + cur.execute( + f"SELECT id, content, hash, user_id, metadata, created_at, updated_at " + f"FROM {self.table_name} WHERE user_id = %s AND hash = %s", + (user_id, content_hash), + ) + row = cur.fetchone() + return _row_to_fact(row) if row else None + + def delete(self, fact_id: str) -> None: + with self._conn.cursor() as cur: + cur.execute(f"DELETE FROM {self.table_name} WHERE id = %s", (fact_id,)) + + def search( + self, + *, + query_embedding: list[float], + scope: dict[str, str], + limit: int, + ) -> list[tuple[Fact, float]]: + where, params = _scope_to_where(scope) + with self._conn.cursor(row_factory=dict_row) as cur: + cur.execute( + f""" + SELECT id, content, hash, user_id, metadata, created_at, updated_at, + 1 - (embedding <=> %s::vector) AS score + FROM {self.table_name} + WHERE {where} + ORDER BY embedding <=> %s::vector + LIMIT %s + """, + [query_embedding] + params + [query_embedding, limit], + ) + rows = cur.fetchall() + return [(_row_to_fact(row), float(row["score"])) for row in rows] + + def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: + where, params = _scope_to_where(scope) + with self._conn.cursor(row_factory=dict_row) as cur: + cur.execute( + f"SELECT id, content, hash, user_id, metadata, created_at, updated_at " + f"FROM {self.table_name} WHERE {where} " + f"ORDER BY created_at DESC LIMIT %s", + params + [limit], + ) + rows = cur.fetchall() + return [_row_to_fact(row) for row in rows] + + def delete_scope(self, scope: dict[str, str]) -> int: + where, params = _scope_to_where(scope) + with self._conn.cursor() as cur: + cur.execute(f"DELETE FROM {self.table_name} WHERE {where}", params) + return cur.rowcount diff --git a/tests/integration_with_creds/memory/conftest.py b/tests/integration_with_creds/memory/conftest.py new file mode 100644 index 000000000..1d43b5dd7 --- /dev/null +++ b/tests/integration_with_creds/memory/conftest.py @@ -0,0 +1,29 @@ +"""Shared fixtures for long-term memory integration tests.""" +import hashlib + +import pytest + + +class FakeTextEmbedder: + """Deterministic 16-dim embedder for integration tests against real backends.""" + + DIM = 16 + + def execute(self, input_data, **kwargs): + text = input_data["query"] if isinstance(input_data, dict) else input_data.query + return {"query": text, "embedding": self._embed(text)} + + def embed(self, text: str) -> list[float]: + return self._embed(text) + + @classmethod + def _embed(cls, text: str) -> list[float]: + digest = hashlib.sha256(text.encode("utf-8")).digest() + raw = [(b / 127.5) - 1.0 for b in digest[: cls.DIM]] + norm = sum(x * x for x in raw) ** 0.5 or 1.0 + return [x / norm for x in raw] + + +@pytest.fixture +def fake_embedder() -> FakeTextEmbedder: + return FakeTextEmbedder() diff --git a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py new file mode 100644 index 000000000..93eb29110 --- /dev/null +++ b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py @@ -0,0 +1,139 @@ +"""Integration tests for PgvectorFactBackend. + +Requires `POSTGRES_DSN` to point at a live Postgres with the pgvector +extension installed. Without it, this whole module skips. +""" +import os +from datetime import UTC, datetime + +import pytest + +from dynamiq.memory.long_term.backends.pgvector import PgvectorFactBackend +from dynamiq.memory.long_term.schemas import Fact + +DSN = os.getenv("POSTGRES_DSN") +pytestmark = pytest.mark.skipif(DSN is None, reason="POSTGRES_DSN not set") + + +@pytest.fixture +def backend(): + b = PgvectorFactBackend(dsn=DSN, table_name="test_user_facts", dimension=16) + b.recreate_table() + yield b + b.drop_table() + + +def _fact(fact_id, user_id, content, content_hash=None): + now = datetime.now(UTC) + return Fact( + id=fact_id, + content=content, + hash=content_hash or f"h-{fact_id}", + user_id=user_id, + metadata={}, + created_at=now, + updated_at=now, + ) + + +# --- insert / get / get_by_hash --- + + +def test_pgvector_insert_then_get(backend, fake_embedder): + fact = _fact("f1", "u1", "hello") + backend.insert(fact, fake_embedder.embed("hello")) + fetched = backend.get("f1") + assert fetched.id == "f1" + assert fetched.content == "hello" + assert fetched.user_id == "u1" + + +def test_pgvector_get_unknown_returns_none(backend): + assert backend.get("does-not-exist") is None + + +def test_pgvector_get_by_hash(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + found = backend.get_by_hash(user_id="u1", content_hash="h-shared") + assert found is not None and found.id == "f1" + + +def test_pgvector_get_by_hash_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + assert backend.get_by_hash(user_id="u2", content_hash="h-shared") is None + + +def test_pgvector_metadata_round_trip(backend, fake_embedder): + fact = _fact("f1", "u1", "x") + fact = fact.model_copy(update={"metadata": {"category": "preference", "score": 0.8}}) + backend.insert(fact, fake_embedder.embed("x")) + fetched = backend.get("f1") + assert fetched.metadata == {"category": "preference", "score": 0.8} + + +# --- delete / list_by_scope / delete_scope --- + + +def test_pgvector_delete(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x"), fake_embedder.embed("x")) + backend.delete("f1") + assert backend.get("f1") is None + + +def test_pgvector_delete_unknown_is_noop(backend): + backend.delete("does-not-exist") + + +def test_pgvector_list_by_scope(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + listed = backend.list_by_scope({"user_id": "u1"}) + assert {f.id for f in listed} == {"f1", "f2"} + + +def test_pgvector_delete_scope(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + deleted = backend.delete_scope({"user_id": "u1"}) + assert deleted == 2 + assert backend.list_by_scope({"user_id": "u1"}) == [] + assert len(backend.list_by_scope({"user_id": "u2"})) == 1 + + +# --- search --- + + +def test_pgvector_search_relevance_ordered(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u1", "alpha-2"), fake_embedder.embed("alpha-2")) + backend.insert(_fact("f3", "u1", "zulu"), fake_embedder.embed("zulu")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=3, + ) + assert hits[0][0].id == "f1" + scores = [s for _, s in hits] + assert scores == sorted(scores, reverse=True) + + +def test_pgvector_search_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u2", "alpha"), fake_embedder.embed("alpha")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=5, + ) + assert [f.id for f, _ in hits] == ["f1"] + + +def test_pgvector_search_empty_returns_empty(backend, fake_embedder): + hits = backend.search( + query_embedding=fake_embedder.embed("x"), + scope={"user_id": "u1"}, + limit=5, + ) + assert hits == [] From 91e8efeeeb864757070bd75ad7d65129ef1fd082 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 22:04:23 +0300 Subject: [PATCH 09/53] feat: add Qdrant-backed long-term memory backend --- dynamiq/memory/long_term/backends/__init__.py | 3 +- dynamiq/memory/long_term/backends/qdrant.py | 195 ++++++++++++++++++ .../memory/test_qdrant_fact_backend.py | 154 ++++++++++++++ 3 files changed, 351 insertions(+), 1 deletion(-) create mode 100644 dynamiq/memory/long_term/backends/qdrant.py create mode 100644 tests/integration_with_creds/memory/test_qdrant_fact_backend.py diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py index 54643789b..1eb7be58b 100644 --- a/dynamiq/memory/long_term/backends/__init__.py +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -2,5 +2,6 @@ from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend from dynamiq.memory.long_term.backends.pgvector import PgvectorFactBackend +from dynamiq.memory.long_term.backends.qdrant import QdrantFactBackend -__all__ = ["InMemoryFactBackend", "PgvectorFactBackend"] +__all__ = ["InMemoryFactBackend", "PgvectorFactBackend", "QdrantFactBackend"] diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py new file mode 100644 index 000000000..8f50233f0 --- /dev/null +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -0,0 +1,195 @@ +"""Qdrant-backed long-term memory backend. + +Qdrant point IDs must be UUIDs or unsigned ints. To allow any string for +`Fact.id`, this backend maps `fact.id -> deterministic UUID` for the +Qdrant point and keeps the original `fact.id` in the payload. +""" +import uuid +from datetime import datetime + +from pydantic import ConfigDict, PrivateAttr +from qdrant_client import QdrantClient +from qdrant_client.http.models import ( + Distance, + FieldCondition, + Filter, + MatchValue, + PointIdsList, + PointStruct, + VectorParams, +) + +from dynamiq.memory.long_term.base import LongTermMemoryBackend +from dynamiq.memory.long_term.schemas import Fact + + +_UUID_NAMESPACE = uuid.UUID("00000000-0000-0000-0000-000000000000") + + +def _to_point_id(fact_id: str) -> str: + """Map an arbitrary `fact_id` string to a deterministic Qdrant UUID.""" + return uuid.uuid5(_UUID_NAMESPACE, fact_id).hex + + +def _scope_to_filter(scope: dict[str, str]) -> Filter | None: + """Translate scope to Qdrant Filter. Returns None when scope is empty.""" + if not scope: + return None + return Filter( + must=[ + FieldCondition(key=key, match=MatchValue(value=value)) + for key, value in scope.items() + ] + ) + + +def _fact_to_payload(fact: Fact) -> dict: + return { + "fact_id": fact.id, + "content": fact.content, + "hash": fact.hash, + "user_id": fact.user_id, + "metadata": fact.metadata, + "created_at": fact.created_at.isoformat(), + "updated_at": fact.updated_at.isoformat(), + } + + +def _payload_to_fact(payload: dict) -> Fact: + return Fact( + id=payload["fact_id"], + content=payload["content"], + hash=payload["hash"], + user_id=payload["user_id"], + metadata=payload.get("metadata", {}), + created_at=datetime.fromisoformat(payload["created_at"]), + updated_at=datetime.fromisoformat(payload["updated_at"]), + ) + + +class QdrantFactBackend(LongTermMemoryBackend): + """Long-term memory backend backed by Qdrant.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + url: str = "http://localhost:6333" + api_key: str | None = None + collection_name: str = "user_facts" + dimension: int = 1536 + + _client: QdrantClient | None = PrivateAttr(default=None) + + def model_post_init(self, __context) -> None: + self._client = QdrantClient(url=self.url, api_key=self.api_key) + + # --- collection management (test/admin helpers, not part of the ABC) --- + + def ensure_collection(self) -> None: + """Create the facts collection and payload indexes if absent.""" + if not self._client.collection_exists(self.collection_name): + self._client.create_collection( + collection_name=self.collection_name, + vectors_config=VectorParams(size=self.dimension, distance=Distance.COSINE), + ) + for key in ("user_id", "hash"): + self._client.create_payload_index( + collection_name=self.collection_name, + field_name=key, + field_schema="keyword", + ) + + def recreate_collection(self) -> None: + """Drop and re-create. For tests only.""" + if self._client.collection_exists(self.collection_name): + self._client.delete_collection(self.collection_name) + self.ensure_collection() + + def drop_collection(self) -> None: + if self._client.collection_exists(self.collection_name): + self._client.delete_collection(self.collection_name) + + # --- LongTermMemoryBackend implementation --- + + def insert(self, fact: Fact, embedding: list[float]) -> None: + self._client.upsert( + collection_name=self.collection_name, + points=[ + PointStruct( + id=_to_point_id(fact.id), + vector=list(embedding), + payload=_fact_to_payload(fact), + ) + ], + ) + + def get(self, fact_id: str) -> Fact | None: + results = self._client.retrieve( + collection_name=self.collection_name, + ids=[_to_point_id(fact_id)], + with_payload=True, + with_vectors=False, + ) + if not results: + return None + return _payload_to_fact(results[0].payload) + + def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: + points, _ = self._client.scroll( + collection_name=self.collection_name, + scroll_filter=Filter( + must=[ + FieldCondition(key="user_id", match=MatchValue(value=user_id)), + FieldCondition(key="hash", match=MatchValue(value=content_hash)), + ] + ), + limit=1, + with_payload=True, + ) + if not points: + return None + return _payload_to_fact(points[0].payload) + + def delete(self, fact_id: str) -> None: + self._client.delete( + collection_name=self.collection_name, + points_selector=PointIdsList(points=[_to_point_id(fact_id)]), + ) + + def search( + self, + *, + query_embedding: list[float], + scope: dict[str, str], + limit: int, + ) -> list[tuple[Fact, float]]: + results = self._client.search( + collection_name=self.collection_name, + query_vector=list(query_embedding), + query_filter=_scope_to_filter(scope), + limit=limit, + with_payload=True, + with_vectors=False, + ) + return [(_payload_to_fact(point.payload), float(point.score)) for point in results] + + def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: + points, _ = self._client.scroll( + collection_name=self.collection_name, + scroll_filter=_scope_to_filter(scope), + limit=limit, + with_payload=True, + ) + return [_payload_to_fact(p.payload) for p in points] + + def delete_scope(self, scope: dict[str, str]) -> int: + # Qdrant's delete-by-filter does not return a count; scroll first. + in_scope = self.list_by_scope(scope, limit=10_000) + if not in_scope: + return 0 + self._client.delete( + collection_name=self.collection_name, + points_selector=PointIdsList( + points=[_to_point_id(f.id) for f in in_scope] + ), + ) + return len(in_scope) diff --git a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py new file mode 100644 index 000000000..566494556 --- /dev/null +++ b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py @@ -0,0 +1,154 @@ +"""Integration tests for QdrantFactBackend. + +Requires a reachable Qdrant. Set `QDRANT_URL` (defaults to +`http://localhost:6333`). Without one, this whole module skips. + +Run a local Qdrant with: `docker run -d -p 6333:6333 qdrant/qdrant`. +""" +import os +from datetime import UTC, datetime + +import pytest + +QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333") + +try: # pragma: no cover - environment probe + import requests as _requests + + _requests.get(QDRANT_URL, timeout=1) + QDRANT_AVAILABLE = True +except Exception: + QDRANT_AVAILABLE = False + +pytestmark = pytest.mark.skipif( + not QDRANT_AVAILABLE, reason=f"Qdrant not reachable at {QDRANT_URL}" +) + + +from dynamiq.memory.long_term.backends.qdrant import QdrantFactBackend # noqa: E402 +from dynamiq.memory.long_term.schemas import Fact # noqa: E402 + + +@pytest.fixture +def backend(): + b = QdrantFactBackend( + url=QDRANT_URL, + collection_name="test_user_facts", + dimension=16, + ) + b.recreate_collection() + yield b + b.drop_collection() + + +def _fact(fact_id, user_id, content, content_hash=None): + now = datetime.now(UTC) + return Fact( + id=fact_id, + content=content, + hash=content_hash or f"h-{fact_id}", + user_id=user_id, + metadata={}, + created_at=now, + updated_at=now, + ) + + +# --- insert / get / get_by_hash --- + + +def test_qdrant_insert_then_get(backend, fake_embedder): + fact = _fact("f1", "u1", "hello") + backend.insert(fact, fake_embedder.embed("hello")) + fetched = backend.get("f1") + assert fetched is not None + assert fetched.id == "f1" + assert fetched.content == "hello" + + +def test_qdrant_get_unknown_returns_none(backend): + assert backend.get("does-not-exist") is None + + +def test_qdrant_get_by_hash(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + found = backend.get_by_hash(user_id="u1", content_hash="h-shared") + assert found is not None and found.id == "f1" + + +def test_qdrant_get_by_hash_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + assert backend.get_by_hash(user_id="u2", content_hash="h-shared") is None + + +def test_qdrant_metadata_round_trip(backend, fake_embedder): + fact = _fact("f1", "u1", "x").model_copy( + update={"metadata": {"category": "preference", "score": 0.8}} + ) + backend.insert(fact, fake_embedder.embed("x")) + fetched = backend.get("f1") + assert fetched.metadata == {"category": "preference", "score": 0.8} + + +# --- delete / list_by_scope / delete_scope --- + + +def test_qdrant_delete(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x"), fake_embedder.embed("x")) + backend.delete("f1") + assert backend.get("f1") is None + + +def test_qdrant_list_by_scope(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + listed = backend.list_by_scope({"user_id": "u1"}) + assert {f.id for f in listed} == {"f1", "f2"} + + +def test_qdrant_delete_scope(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + deleted = backend.delete_scope({"user_id": "u1"}) + assert deleted == 2 + assert backend.list_by_scope({"user_id": "u1"}) == [] + assert len(backend.list_by_scope({"user_id": "u2"})) == 1 + + +# --- search --- + + +def test_qdrant_search_relevance_ordered(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u1", "alpha-2"), fake_embedder.embed("alpha-2")) + backend.insert(_fact("f3", "u1", "zulu"), fake_embedder.embed("zulu")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=3, + ) + assert hits[0][0].id == "f1" + scores = [s for _, s in hits] + assert scores == sorted(scores, reverse=True) + + +def test_qdrant_search_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u2", "alpha"), fake_embedder.embed("alpha")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=5, + ) + assert [f.id for f, _ in hits] == ["f1"] + + +def test_qdrant_search_empty_returns_empty(backend, fake_embedder): + hits = backend.search( + query_embedding=fake_embedder.embed("x"), + scope={"user_id": "u1"}, + limit=5, + ) + assert hits == [] From 12013bda638b8b8bf26efcab7a684c83f62e7c7d Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 22:09:47 +0300 Subject: [PATCH 10/53] fix: harden long-term memory against bandit findings --- dynamiq/memory/long_term/backends/pgvector.py | 136 +++++++++++------- dynamiq/memory/long_term/long_term_memory.py | 7 +- 2 files changed, 90 insertions(+), 53 deletions(-) diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index 1821a4a7b..5c7b17d0a 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -3,46 +3,53 @@ Uses psycopg (v3) + the pgvector extension. Stores facts in a single table with a vector column for embeddings, a JSONB column for metadata, and (user_id, hash) uniqueness for dedup. + +Table and column identifiers are interpolated via `psycopg.sql.SQL` / +`Identifier`, never raw f-strings, so an attacker-controlled table_name +cannot inject SQL. """ import psycopg from pgvector.psycopg import register_vector from psycopg.rows import dict_row +from psycopg.sql import SQL, Composed, Identifier from psycopg.types.json import Jsonb from pydantic import ConfigDict, PrivateAttr from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact +_CREATE_EXTENSION_SQL = SQL("CREATE EXTENSION IF NOT EXISTS vector") -_SCHEMA_SQL = """ -CREATE EXTENSION IF NOT EXISTS vector; +_CREATE_TABLE_TEMPLATE = SQL( + """ + CREATE TABLE IF NOT EXISTS {table} ( + id TEXT PRIMARY KEY, + content TEXT NOT NULL, + hash TEXT NOT NULL, + user_id TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{{}}'::jsonb, + embedding vector({dim}) NOT NULL, + created_at TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL + ) + """ +) -CREATE TABLE IF NOT EXISTS {table} ( - id TEXT PRIMARY KEY, - content TEXT NOT NULL, - hash TEXT NOT NULL, - user_id TEXT NOT NULL, - metadata JSONB NOT NULL DEFAULT '{{}}'::jsonb, - embedding vector({dim}) NOT NULL, - created_at TIMESTAMPTZ NOT NULL, - updated_at TIMESTAMPTZ NOT NULL -); +_CREATE_USER_ID_INDEX_TEMPLATE = SQL("CREATE INDEX IF NOT EXISTS {idx} ON {table} (user_id)") -CREATE INDEX IF NOT EXISTS {table}_user_id_idx ON {table} (user_id); -CREATE UNIQUE INDEX IF NOT EXISTS {table}_user_hash_uidx ON {table} (user_id, hash); -""" +_CREATE_USER_HASH_INDEX_TEMPLATE = SQL("CREATE UNIQUE INDEX IF NOT EXISTS {idx} ON {table} (user_id, hash)") -def _scope_to_where(scope: dict[str, str]) -> tuple[str, list]: - """Translate a scope dict into a parameterised SQL WHERE clause. +def _scope_where_clause(scope: dict[str, str]) -> tuple[Composed, list]: + """Build a parameterised WHERE clause from a scope dict. - `scope` is always `{"user_id": ...}` in v1; the loop is shaped so - forward extensions (agent_id, run_id) drop in without rewriting. + Returns the SQL fragment and its parameter list. Keys are interpolated + as Identifiers (safe); values stay as `%s` placeholders for the driver. """ if not scope: - return "TRUE", [] - clauses = [f"{key} = %s" for key in scope.keys()] - return " AND ".join(clauses), list(scope.values()) + return SQL("TRUE"), [] + clauses = [SQL("{key} = %s").format(key=Identifier(key)) for key in scope.keys()] + return SQL(" AND ").join(clauses), list(scope.values()) def _row_to_fact(row) -> Fact: @@ -57,6 +64,9 @@ def _row_to_fact(row) -> Fact: ) +_FACT_COLUMNS = SQL("id, content, hash, user_id, metadata, created_at, updated_at") + + class PgvectorFactBackend(LongTermMemoryBackend): """Long-term memory backend backed by Postgres + pgvector.""" @@ -74,48 +84,65 @@ def model_post_init(self, __context) -> None: # --- schema management (test/admin helpers, not part of the ABC) --- + @property + def _table(self) -> Identifier: + return Identifier(self.table_name) + def ensure_table(self) -> None: """Create the facts table and indexes if absent. Safe to call repeatedly.""" with self._conn.cursor() as cur: - cur.execute(_SCHEMA_SQL.format(table=self.table_name, dim=self.dimension)) + cur.execute(_CREATE_EXTENSION_SQL) + cur.execute(_CREATE_TABLE_TEMPLATE.format(table=self._table, dim=SQL(str(self.dimension)))) + cur.execute( + _CREATE_USER_ID_INDEX_TEMPLATE.format( + idx=Identifier(f"{self.table_name}_user_id_idx"), + table=self._table, + ) + ) + cur.execute( + _CREATE_USER_HASH_INDEX_TEMPLATE.format( + idx=Identifier(f"{self.table_name}_user_hash_uidx"), + table=self._table, + ) + ) def recreate_table(self) -> None: """Drop and re-create the facts table. For tests only.""" with self._conn.cursor() as cur: - cur.execute(f"DROP TABLE IF EXISTS {self.table_name}") + cur.execute(SQL("DROP TABLE IF EXISTS {table}").format(table=self._table)) self.ensure_table() def drop_table(self) -> None: with self._conn.cursor() as cur: - cur.execute(f"DROP TABLE IF EXISTS {self.table_name}") + cur.execute(SQL("DROP TABLE IF EXISTS {table}").format(table=self._table)) # --- LongTermMemoryBackend implementation --- def insert(self, fact: Fact, embedding: list[float]) -> None: with self._conn.cursor() as cur: cur.execute( - f""" - INSERT INTO {self.table_name} - (id, content, hash, user_id, metadata, embedding, created_at, updated_at) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s) - """, + SQL("INSERT INTO {table} ({cols}, embedding) " "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)").format( + table=self._table, cols=_FACT_COLUMNS + ), ( fact.id, fact.content, fact.hash, fact.user_id, Jsonb(fact.metadata), - embedding, fact.created_at, fact.updated_at, + embedding, ), ) def get(self, fact_id: str) -> Fact | None: with self._conn.cursor(row_factory=dict_row) as cur: cur.execute( - f"SELECT id, content, hash, user_id, metadata, created_at, updated_at " - f"FROM {self.table_name} WHERE id = %s", + SQL("SELECT {cols} FROM {table} WHERE id = %s").format( + cols=_FACT_COLUMNS, + table=self._table, + ), (fact_id,), ) row = cur.fetchone() @@ -124,8 +151,9 @@ def get(self, fact_id: str) -> Fact | None: def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: with self._conn.cursor(row_factory=dict_row) as cur: cur.execute( - f"SELECT id, content, hash, user_id, metadata, created_at, updated_at " - f"FROM {self.table_name} WHERE user_id = %s AND hash = %s", + SQL("SELECT {cols} FROM {table} WHERE user_id = %s AND hash = %s").format( + cols=_FACT_COLUMNS, table=self._table + ), (user_id, content_hash), ) row = cur.fetchone() @@ -133,7 +161,10 @@ def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: def delete(self, fact_id: str) -> None: with self._conn.cursor() as cur: - cur.execute(f"DELETE FROM {self.table_name} WHERE id = %s", (fact_id,)) + cur.execute( + SQL("DELETE FROM {table} WHERE id = %s").format(table=self._table), + (fact_id,), + ) def search( self, @@ -142,36 +173,39 @@ def search( scope: dict[str, str], limit: int, ) -> list[tuple[Fact, float]]: - where, params = _scope_to_where(scope) + where, params = _scope_where_clause(scope) with self._conn.cursor(row_factory=dict_row) as cur: cur.execute( - f""" - SELECT id, content, hash, user_id, metadata, created_at, updated_at, - 1 - (embedding <=> %s::vector) AS score - FROM {self.table_name} - WHERE {where} - ORDER BY embedding <=> %s::vector - LIMIT %s - """, + SQL( + "SELECT {cols}, 1 - (embedding <=> %s::vector) AS score " + "FROM {table} WHERE {where} " + "ORDER BY embedding <=> %s::vector LIMIT %s" + ).format(cols=_FACT_COLUMNS, table=self._table, where=where), [query_embedding] + params + [query_embedding, limit], ) rows = cur.fetchall() return [(_row_to_fact(row), float(row["score"])) for row in rows] def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: - where, params = _scope_to_where(scope) + where, params = _scope_where_clause(scope) with self._conn.cursor(row_factory=dict_row) as cur: cur.execute( - f"SELECT id, content, hash, user_id, metadata, created_at, updated_at " - f"FROM {self.table_name} WHERE {where} " - f"ORDER BY created_at DESC LIMIT %s", + SQL("SELECT {cols} FROM {table} WHERE {where} " "ORDER BY created_at DESC LIMIT %s").format( + cols=_FACT_COLUMNS, table=self._table, where=where + ), params + [limit], ) rows = cur.fetchall() return [_row_to_fact(row) for row in rows] def delete_scope(self, scope: dict[str, str]) -> int: - where, params = _scope_to_where(scope) + where, params = _scope_where_clause(scope) with self._conn.cursor() as cur: - cur.execute(f"DELETE FROM {self.table_name} WHERE {where}", params) + cur.execute( + SQL("DELETE FROM {table} WHERE {where}").format( + table=self._table, + where=where, + ), + params, + ) return cur.rowcount diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index a6e1b3d2f..76e4a6518 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -15,9 +15,12 @@ def _content_hash(user_id: str, content: str) -> str: - """Stable hash for exact-duplicate dedup. Scoped per-user.""" + """Stable hash for exact-duplicate dedup. Scoped per-user. + + MD5 is used only as a dedup key, never as a security primitive. + """ normalised = content.strip().lower() - return md5(f"{user_id}:{normalised}".encode("utf-8")).hexdigest() + return md5(f"{user_id}:{normalised}".encode(), usedforsecurity=False).hexdigest() def _embed(embedder: Any, text: str) -> list[float]: From 3e1a62e69c0ee36aa9552c6c28f6b8cf6169a89f Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 22:47:22 +0300 Subject: [PATCH 11/53] refactor: drop module docstrings and dead comments from long-term memory --- dynamiq/memory/long_term/__init__.py | 5 --- dynamiq/memory/long_term/backends/__init__.py | 2 -- .../memory/long_term/backends/in_memory.py | 6 ---- dynamiq/memory/long_term/backends/pgvector.py | 18 ++-------- dynamiq/memory/long_term/backends/qdrant.py | 19 ++++------ dynamiq/memory/long_term/base.py | 9 +---- dynamiq/memory/long_term/long_term_memory.py | 35 ++++--------------- dynamiq/memory/long_term/schemas.py | 8 +---- dynamiq/nodes/agents/base.py | 8 +---- dynamiq/nodes/tools/long_term_memory.py | 24 +++---------- 10 files changed, 22 insertions(+), 112 deletions(-) diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py index 15e3d00ac..45b989717 100644 --- a/dynamiq/memory/long_term/__init__.py +++ b/dynamiq/memory/long_term/__init__.py @@ -1,8 +1,3 @@ -"""Long-term, fact-shaped, user-scoped memory for Dynamiq agents. - -See docs/superpowers/specs/2026-05-25-long-term-memory-design.md. -""" - from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.long_term_memory import LongTermMemory, LongTermMemoryConfig from dynamiq.memory.long_term.schemas import Fact diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py index 1eb7be58b..1f1360879 100644 --- a/dynamiq/memory/long_term/backends/__init__.py +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -1,5 +1,3 @@ -"""Concrete LongTermMemoryBackend implementations.""" - from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend from dynamiq.memory.long_term.backends.pgvector import PgvectorFactBackend from dynamiq.memory.long_term.backends.qdrant import QdrantFactBackend diff --git a/dynamiq/memory/long_term/backends/in_memory.py b/dynamiq/memory/long_term/backends/in_memory.py index c7b67d48f..662a62519 100644 --- a/dynamiq/memory/long_term/backends/in_memory.py +++ b/dynamiq/memory/long_term/backends/in_memory.py @@ -1,8 +1,3 @@ -"""In-process fact backend for tests and light use. - -Storage is a dict by fact_id. Search uses numpy cosine over all in-scope -vectors — fine for hundreds of facts, not intended for production scale. -""" import numpy as np from pydantic import PrivateAttr @@ -69,7 +64,6 @@ def delete_scope(self, scope: dict[str, str]) -> int: def _matches_scope(fact: Fact, scope: dict[str, str]) -> bool: - """Return True iff every key in scope matches the corresponding Fact attribute.""" for key, value in scope.items(): if getattr(fact, key, None) != value: return False diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index 5c7b17d0a..d5426c453 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -1,13 +1,3 @@ -"""pgvector-backed long-term memory backend. - -Uses psycopg (v3) + the pgvector extension. Stores facts in a single -table with a vector column for embeddings, a JSONB column for metadata, -and (user_id, hash) uniqueness for dedup. - -Table and column identifiers are interpolated via `psycopg.sql.SQL` / -`Identifier`, never raw f-strings, so an attacker-controlled table_name -cannot inject SQL. -""" import psycopg from pgvector.psycopg import register_vector from psycopg.rows import dict_row @@ -43,8 +33,8 @@ def _scope_where_clause(scope: dict[str, str]) -> tuple[Composed, list]: """Build a parameterised WHERE clause from a scope dict. - Returns the SQL fragment and its parameter list. Keys are interpolated - as Identifiers (safe); values stay as `%s` placeholders for the driver. + Keys are interpolated as `Identifier` (safe); values stay as `%s` placeholders + for the driver — never an f-string substitution. """ if not scope: return SQL("TRUE"), [] @@ -82,8 +72,6 @@ def model_post_init(self, __context) -> None: self._conn = psycopg.connect(self.dsn, autocommit=True) register_vector(self._conn) - # --- schema management (test/admin helpers, not part of the ABC) --- - @property def _table(self) -> Identifier: return Identifier(self.table_name) @@ -116,8 +104,6 @@ def drop_table(self) -> None: with self._conn.cursor() as cur: cur.execute(SQL("DROP TABLE IF EXISTS {table}").format(table=self._table)) - # --- LongTermMemoryBackend implementation --- - def insert(self, fact: Fact, embedding: list[float]) -> None: with self._conn.cursor() as cur: cur.execute( diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index 8f50233f0..7659b95c5 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -1,9 +1,3 @@ -"""Qdrant-backed long-term memory backend. - -Qdrant point IDs must be UUIDs or unsigned ints. To allow any string for -`Fact.id`, this backend maps `fact.id -> deterministic UUID` for the -Qdrant point and keeps the original `fact.id` in the payload. -""" import uuid from datetime import datetime @@ -27,12 +21,15 @@ def _to_point_id(fact_id: str) -> str: - """Map an arbitrary `fact_id` string to a deterministic Qdrant UUID.""" + """Map an arbitrary `fact_id` string to a deterministic Qdrant UUID. + + Qdrant requires UUID or unsigned-int point IDs; the original `fact_id` + is kept in the payload so lookups round-trip. + """ return uuid.uuid5(_UUID_NAMESPACE, fact_id).hex def _scope_to_filter(scope: dict[str, str]) -> Filter | None: - """Translate scope to Qdrant Filter. Returns None when scope is empty.""" if not scope: return None return Filter( @@ -82,8 +79,6 @@ class QdrantFactBackend(LongTermMemoryBackend): def model_post_init(self, __context) -> None: self._client = QdrantClient(url=self.url, api_key=self.api_key) - # --- collection management (test/admin helpers, not part of the ABC) --- - def ensure_collection(self) -> None: """Create the facts collection and payload indexes if absent.""" if not self._client.collection_exists(self.collection_name): @@ -108,8 +103,6 @@ def drop_collection(self) -> None: if self._client.collection_exists(self.collection_name): self._client.delete_collection(self.collection_name) - # --- LongTermMemoryBackend implementation --- - def insert(self, fact: Fact, embedding: list[float]) -> None: self._client.upsert( collection_name=self.collection_name, @@ -182,7 +175,7 @@ def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: return [_payload_to_fact(p.payload) for p in points] def delete_scope(self, scope: dict[str, str]) -> int: - # Qdrant's delete-by-filter does not return a count; scroll first. + # Qdrant delete-by-filter returns no count, so enumerate ids first. in_scope = self.list_by_scope(scope, limit=10_000) if not in_scope: return 0 diff --git a/dynamiq/memory/long_term/base.py b/dynamiq/memory/long_term/base.py index ca45171b1..bca2559e2 100644 --- a/dynamiq/memory/long_term/base.py +++ b/dynamiq/memory/long_term/base.py @@ -1,9 +1,3 @@ -"""Backend ABC for long-term memory. - -Concrete backends wrap `dynamiq.storages.vector.*` infrastructure. -Independent of `dynamiq.memory.backends.MemoryBackend` (no shared base) — -see spec sections "Fork 3" and "Fork 4" for the rationale. -""" from abc import ABC, abstractmethod from pydantic import BaseModel, ConfigDict @@ -49,9 +43,8 @@ def list_by_scope( def delete_scope(self, scope: dict[str, str]) -> int: """Hard-delete every fact matching `scope`. Returns count deleted.""" - # Phase 2 reservation — see spec Appendix A. - # In v1, `update()` is NOT @abstractmethod and the default raises. def update(self, fact_id: str, content: str, embedding: list[float]) -> None: + """Replace an existing fact in-place (Phase 2). Use delete + insert in v1.""" raise NotImplementedError( "update() lands in Phase 2 with the auto-extractor. " "In v1, correct a fact via delete() + insert()." diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index 76e4a6518..40c6a4a2b 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -1,8 +1,3 @@ -"""LongTermMemory facade. - -User-facing API for fact-shaped, user-scoped, cross-session memory. -Wraps a `LongTermMemoryBackend` and an embedder. -""" from datetime import UTC, datetime from hashlib import md5 from typing import Any, Literal @@ -15,21 +10,12 @@ def _content_hash(user_id: str, content: str) -> str: - """Stable hash for exact-duplicate dedup. Scoped per-user. - - MD5 is used only as a dedup key, never as a security primitive. - """ + """Per-user stable hash used only as a dedup key, never as a security primitive.""" normalised = content.strip().lower() return md5(f"{user_id}:{normalised}".encode(), usedforsecurity=False).hexdigest() def _embed(embedder: Any, text: str) -> list[float]: - """Call the embedder's `.execute({"query": text})` and pull the vector. - - Matches dynamiq's `TextEmbedder` contract: input is a dict (or - `TextEmbedderInputSchema`) with `query`; output is a dict-like with - `embedding`. - """ result = embedder.execute({"query": text}) return list(result["embedding"]) @@ -40,7 +26,7 @@ class LongTermMemory(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) backend: LongTermMemoryBackend - embedder: Any # TextEmbedder in production; FakeTextEmbedder in tests. + embedder: Any def remember( self, *, content: str, user_id: str, @@ -74,10 +60,7 @@ def remember( def recall( self, *, query: str, user_id: str, limit: int = 5, ) -> list[tuple[Fact, float]]: - """Semantic search for facts relevant to `query`, scoped to `user_id`. - - No similarity threshold — caller (or model) decides what's relevant. - """ + """Semantic search for facts relevant to `query`, scoped to `user_id`.""" stripped = query.strip() if query else "" if not stripped: raise ValueError("recall query cannot be empty") @@ -89,11 +72,10 @@ def recall( ) def forget(self, *, fact_id: str, user_id: str) -> str: - """Delete a fact by id, with cross-user guard. + """Delete a fact by id, returning 'deleted' | 'not_found' | 'forbidden'. - Returns one of: 'deleted', 'not_found', 'forbidden'. Never raises on user mismatch — defence in depth above the - construction-time user_id binding on the tool. + construction-time `user_id` binding on the tool. """ fact = self.backend.get(fact_id) if fact is None: @@ -114,12 +96,7 @@ def clear_user(self, *, user_id: str) -> int: class LongTermMemoryConfig(BaseModel): - """Per-agent configuration for long-term memory. - - `tools` controls which of the three tools the agent is given access to. - Sub-agents typically use `("recall",)` for read-only inheritance; - parent agents use the default `("remember", "recall", "forget")`. - """ + """Per-agent configuration for long-term memory tool exposure.""" tools: tuple[Literal["remember", "recall", "forget"], ...] = ( "remember", diff --git a/dynamiq/memory/long_term/schemas.py b/dynamiq/memory/long_term/schemas.py index 7b1d3b6fc..6b239f2f8 100644 --- a/dynamiq/memory/long_term/schemas.py +++ b/dynamiq/memory/long_term/schemas.py @@ -1,4 +1,3 @@ -"""Pydantic schemas for long-term memory.""" from datetime import datetime from typing import Any @@ -6,12 +5,7 @@ class Fact(BaseModel): - """A single long-term memory fact, scoped to a user. - - `hash` is md5(f"{user_id}:{content.strip().lower()}") and is used to - short-circuit exact duplicates in `LongTermMemory.remember()` before - any embedder call. - """ + """A single long-term memory fact, scoped to a user.""" id: str content: str diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 41c8b09f1..248b1032b 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -818,18 +818,12 @@ def _retrieve_memory(self, input_data: AgentInputSchema) -> list[Message]: return history_messages def _build_long_term_memory_tools(self, input_data: "AgentInputSchema") -> list[Node]: - """Build per-run long-term-memory tools, or [] if not applicable. - - Returns an empty list when `long_term_memory` is unset or `user_id` - is absent. The caller attaches the returned tools to `self.tools` - for the duration of the run. - """ + """Construct per-run long-term-memory tools, or [] when LTM or user_id is absent.""" if self.long_term_memory is None: return [] user_id = getattr(input_data, "user_id", None) if not user_id: return [] - # Imported locally to avoid circular imports at module load time. from dynamiq.nodes.tools.long_term_memory import build_long_term_memory_tools return build_long_term_memory_tools( diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index 256d3b129..3d4b03210 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -1,9 +1,3 @@ -"""Tools giving an agent access to a `LongTermMemory` instance. - -The three tools (`remember_fact`, `recall_facts`, `forget_fact`) bind -`user_id` at construction. `user_id` never appears in `InputSchema`, so -the model has no slot to address another user's memory. See spec §9.1. -""" from typing import Any, ClassVar, Literal from pydantic import BaseModel, ConfigDict, Field @@ -37,7 +31,7 @@ class RememberFactInputSchema(BaseModel): - """LLM-visible input for remember_fact. Note: no `user_id`.""" + """LLM-visible input for `remember_fact`. `user_id` is bound at construction.""" content: str = Field(..., min_length=1, max_length=1000, description="The fact to remember, as a short statement.") @@ -48,7 +42,7 @@ class RememberFactInputSchema(BaseModel): class RecallFactsInputSchema(BaseModel): - """LLM-visible input for recall_facts. Note: no `user_id`.""" + """LLM-visible input for `recall_facts`. `user_id` is bound at construction.""" query: str = Field(..., min_length=1, max_length=500, description="What to search for.") @@ -57,17 +51,13 @@ class RecallFactsInputSchema(BaseModel): class ForgetFactInputSchema(BaseModel): - """LLM-visible input for forget_fact. Note: no `user_id`.""" + """LLM-visible input for `forget_fact`. `user_id` is bound at construction.""" fact_id: str = Field(..., description="The id returned by recall_facts or remember_fact.") class _LongTermMemoryTool(Node): - """Shared base for the three long-term memory tools. - - Holds the `LongTermMemory` reference and the construction-bound `user_id`. - Concrete subclasses set `name`, `description`, `input_schema`, and `execute`. - """ + """Shared base for the long-term memory tools.""" model_config = ConfigDict(arbitrary_types_allowed=True) @@ -142,11 +132,7 @@ def build_long_term_memory_tools( user_id: str, include: tuple[str, ...] = ("remember", "recall", "forget"), ) -> list[Node]: - """Build the long-term-memory tools with `user_id` baked in. - - `include` selects which tools to return — sub-agents commonly use - `include=("recall",)` for read-only inheritance. Unknown keys are ignored. - """ + """Construct long-term-memory tools with `user_id` baked in. Unknown keys in `include` are ignored.""" tools: list[Node] = [] for kind in include: cls = _TOOL_BUILDERS.get(kind) From 82db48d400e1667f448ae4319769f8c09f908322 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 22:50:43 +0300 Subject: [PATCH 12/53] docs: normalize long-term memory docstrings --- dynamiq/memory/long_term/backends/pgvector.py | 4 +++- dynamiq/memory/long_term/backends/qdrant.py | 5 +++-- dynamiq/memory/long_term/base.py | 16 ++++++++-------- dynamiq/memory/long_term/long_term_memory.py | 3 +++ 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index d5426c453..fafd68ef1 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -74,6 +74,7 @@ def model_post_init(self, __context) -> None: @property def _table(self) -> Identifier: + """Return the table name wrapped as a safe SQL identifier.""" return Identifier(self.table_name) def ensure_table(self) -> None: @@ -95,12 +96,13 @@ def ensure_table(self) -> None: ) def recreate_table(self) -> None: - """Drop and re-create the facts table. For tests only.""" + """Drop and re-create the facts table. Test-only helper.""" with self._conn.cursor() as cur: cur.execute(SQL("DROP TABLE IF EXISTS {table}").format(table=self._table)) self.ensure_table() def drop_table(self) -> None: + """Drop the facts table if it exists. Test-only helper.""" with self._conn.cursor() as cur: cur.execute(SQL("DROP TABLE IF EXISTS {table}").format(table=self._table)) diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index 7659b95c5..b30ebc2f8 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -80,7 +80,7 @@ def model_post_init(self, __context) -> None: self._client = QdrantClient(url=self.url, api_key=self.api_key) def ensure_collection(self) -> None: - """Create the facts collection and payload indexes if absent.""" + """Create the facts collection and payload indexes if absent. Safe to call repeatedly.""" if not self._client.collection_exists(self.collection_name): self._client.create_collection( collection_name=self.collection_name, @@ -94,12 +94,13 @@ def ensure_collection(self) -> None: ) def recreate_collection(self) -> None: - """Drop and re-create. For tests only.""" + """Drop and re-create the facts collection. Test-only helper.""" if self._client.collection_exists(self.collection_name): self._client.delete_collection(self.collection_name) self.ensure_collection() def drop_collection(self) -> None: + """Drop the facts collection if it exists. Test-only helper.""" if self._client.collection_exists(self.collection_name): self._client.delete_collection(self.collection_name) diff --git a/dynamiq/memory/long_term/base.py b/dynamiq/memory/long_term/base.py index bca2559e2..d3d6e5fe3 100644 --- a/dynamiq/memory/long_term/base.py +++ b/dynamiq/memory/long_term/base.py @@ -12,14 +12,15 @@ class LongTermMemoryBackend(ABC, BaseModel): @abstractmethod def insert(self, fact: Fact, embedding: list[float]) -> None: - """Insert a new fact. Caller has already deduped via `get_by_hash`.""" + """Insert a new fact and its embedding. Caller has already deduped via `get_by_hash`.""" @abstractmethod - def get(self, fact_id: str) -> Fact | None: ... + def get(self, fact_id: str) -> Fact | None: + """Fetch a fact by id, or `None` if it does not exist.""" @abstractmethod def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: - """Exact-content dedup gate. Returns the existing Fact or None.""" + """Fetch the fact matching `(user_id, content_hash)`, or `None`.""" @abstractmethod def delete(self, fact_id: str) -> None: @@ -30,21 +31,20 @@ def search( self, *, query_embedding: list[float], scope: dict[str, str], limit: int, ) -> list[tuple[Fact, float]]: - """Vector similarity search filtered by scope. Returns (fact, score) tuples, - most relevant first. No threshold filtering — caller decides.""" + """Return up to `limit` `(fact, score)` tuples matching `scope`, most relevant first.""" @abstractmethod def list_by_scope( self, scope: dict[str, str], limit: int = 100, ) -> list[Fact]: - """Non-semantic listing for admin / introspection.""" + """Return up to `limit` facts matching `scope`, non-semantically.""" @abstractmethod def delete_scope(self, scope: dict[str, str]) -> int: - """Hard-delete every fact matching `scope`. Returns count deleted.""" + """Hard-delete every fact matching `scope` and return the count deleted.""" def update(self, fact_id: str, content: str, embedding: list[float]) -> None: - """Replace an existing fact in-place (Phase 2). Use delete + insert in v1.""" + """Replace an existing fact in-place (Phase 2). Use `delete` + `insert` in v1.""" raise NotImplementedError( "update() lands in Phase 2 with the auto-extractor. " "In v1, correct a fact via delete() + insert()." diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index 40c6a4a2b..c6d61fd16 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -86,12 +86,15 @@ def forget(self, *, fact_id: str, user_id: str) -> str: return "deleted" def list_all(self, *, user_id: str, limit: int = 100) -> list[Fact]: + """Return up to `limit` facts for `user_id`, most recent first (admin/introspection).""" return self.backend.list_by_scope({"user_id": user_id}, limit=limit) def get(self, fact_id: str) -> Fact | None: + """Fetch a fact by id, or `None` if it does not exist.""" return self.backend.get(fact_id) def clear_user(self, *, user_id: str) -> int: + """Hard-delete every fact owned by `user_id` and return the count deleted.""" return self.backend.delete_scope({"user_id": user_id}) From 3d1593d7ec17126021ba2fe3e4dae7859fbfbce3 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 25 May 2026 22:58:36 +0300 Subject: [PATCH 13/53] refactor: replace string literals with ForgetStatus and MemoryToolKind enums --- dynamiq/memory/long_term/__init__.py | 3 +++ dynamiq/memory/long_term/long_term_memory.py | 21 +++++++++-------- dynamiq/memory/long_term/types.py | 17 ++++++++++++++ dynamiq/nodes/tools/long_term_memory.py | 24 ++++++++++++-------- 4 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 dynamiq/memory/long_term/types.py diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py index 45b989717..b27a04377 100644 --- a/dynamiq/memory/long_term/__init__.py +++ b/dynamiq/memory/long_term/__init__.py @@ -1,10 +1,13 @@ from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.long_term_memory import LongTermMemory, LongTermMemoryConfig from dynamiq.memory.long_term.schemas import Fact +from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind __all__ = [ "Fact", + "ForgetStatus", "LongTermMemory", "LongTermMemoryBackend", "LongTermMemoryConfig", + "MemoryToolKind", ] diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index c6d61fd16..f4717c343 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -1,12 +1,13 @@ from datetime import UTC, datetime from hashlib import md5 -from typing import Any, Literal +from typing import Any from uuid import uuid4 from pydantic import BaseModel, ConfigDict from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact +from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind def _content_hash(user_id: str, content: str) -> str: @@ -71,19 +72,19 @@ def recall( limit=limit, ) - def forget(self, *, fact_id: str, user_id: str) -> str: - """Delete a fact by id, returning 'deleted' | 'not_found' | 'forbidden'. + def forget(self, *, fact_id: str, user_id: str) -> ForgetStatus: + """Delete a fact by id and return a `ForgetStatus`. Never raises on user mismatch — defence in depth above the construction-time `user_id` binding on the tool. """ fact = self.backend.get(fact_id) if fact is None: - return "not_found" + return ForgetStatus.NOT_FOUND if fact.user_id != user_id: - return "forbidden" + return ForgetStatus.FORBIDDEN self.backend.delete(fact_id) - return "deleted" + return ForgetStatus.DELETED def list_all(self, *, user_id: str, limit: int = 100) -> list[Fact]: """Return up to `limit` facts for `user_id`, most recent first (admin/introspection).""" @@ -101,8 +102,8 @@ def clear_user(self, *, user_id: str) -> int: class LongTermMemoryConfig(BaseModel): """Per-agent configuration for long-term memory tool exposure.""" - tools: tuple[Literal["remember", "recall", "forget"], ...] = ( - "remember", - "recall", - "forget", + tools: tuple[MemoryToolKind, ...] = ( + MemoryToolKind.REMEMBER, + MemoryToolKind.RECALL, + MemoryToolKind.FORGET, ) diff --git a/dynamiq/memory/long_term/types.py b/dynamiq/memory/long_term/types.py new file mode 100644 index 000000000..2a9ca4c8e --- /dev/null +++ b/dynamiq/memory/long_term/types.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class ForgetStatus(str, Enum): + """Outcome of `LongTermMemory.forget()`.""" + + DELETED = "deleted" + NOT_FOUND = "not_found" + FORBIDDEN = "forbidden" + + +class MemoryToolKind(str, Enum): + """Kinds of long-term-memory tools exposed to an agent.""" + + REMEMBER = "remember" + RECALL = "recall" + FORGET = "forget" diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index 3d4b03210..ed4efb23d 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -2,11 +2,10 @@ from pydantic import BaseModel, ConfigDict, Field -from dynamiq.memory.long_term import LongTermMemory +from dynamiq.memory.long_term import LongTermMemory, MemoryToolKind from dynamiq.nodes.node import Node from dynamiq.nodes.types import NodeGroup - REMEMBER_DESCRIPTION = ( "Record a durable fact about the current user that should persist across " "conversations (preferences, constraints, recurring context, biographical info). " @@ -119,10 +118,10 @@ def execute(self, input_data: ForgetFactInputSchema, config=None, **kwargs) -> d return {"content": {"status": status}} -_TOOL_BUILDERS: dict[str, type[_LongTermMemoryTool]] = { - "remember": RememberFactTool, - "recall": RecallFactsTool, - "forget": ForgetFactTool, +_TOOL_BUILDERS: dict[MemoryToolKind, type[_LongTermMemoryTool]] = { + MemoryToolKind.REMEMBER: RememberFactTool, + MemoryToolKind.RECALL: RecallFactsTool, + MemoryToolKind.FORGET: ForgetFactTool, } @@ -130,13 +129,18 @@ def build_long_term_memory_tools( *, long_term_memory: LongTermMemory, user_id: str, - include: tuple[str, ...] = ("remember", "recall", "forget"), + include: tuple[MemoryToolKind | str, ...] = ( + MemoryToolKind.REMEMBER, + MemoryToolKind.RECALL, + MemoryToolKind.FORGET, + ), ) -> list[Node]: """Construct long-term-memory tools with `user_id` baked in. Unknown keys in `include` are ignored.""" tools: list[Node] = [] for kind in include: - cls = _TOOL_BUILDERS.get(kind) - if cls is None: + try: + tool_kind = MemoryToolKind(kind) + except ValueError: continue - tools.append(cls(long_term_memory=long_term_memory, user_id=user_id)) + tools.append(_TOOL_BUILDERS[tool_kind](long_term_memory=long_term_memory, user_id=user_id)) return tools From 5470ef74fab587eff0c7cb7756436748965a59d6 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 01:36:48 +0300 Subject: [PATCH 14/53] refactor: use typed connection fields in long-term memory backends --- .../memory/long_term/backends/in_memory.py | 2 ++ dynamiq/memory/long_term/backends/pgvector.py | 25 ++++++++++++++++--- dynamiq/memory/long_term/backends/qdrant.py | 19 +++++++++++--- .../memory/test_pgvector_fact_backend.py | 19 +++++++++++++- .../memory/test_qdrant_fact_backend.py | 3 ++- 5 files changed, 59 insertions(+), 9 deletions(-) diff --git a/dynamiq/memory/long_term/backends/in_memory.py b/dynamiq/memory/long_term/backends/in_memory.py index 662a62519..e07fec868 100644 --- a/dynamiq/memory/long_term/backends/in_memory.py +++ b/dynamiq/memory/long_term/backends/in_memory.py @@ -8,6 +8,8 @@ class InMemoryFactBackend(LongTermMemoryBackend): """Dict + numpy-cosine backend. Loses data on restart.""" + name: str = "InMemoryFactBackend" + _facts: dict[str, Fact] = PrivateAttr(default_factory=dict) _vectors: dict[str, list[float]] = PrivateAttr(default_factory=dict) diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index fafd68ef1..f1066802d 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -1,10 +1,13 @@ +from typing import Any + import psycopg from pgvector.psycopg import register_vector from psycopg.rows import dict_row from psycopg.sql import SQL, Composed, Identifier from psycopg.types.json import Jsonb -from pydantic import ConfigDict, PrivateAttr +from pydantic import ConfigDict, Field, PrivateAttr +from dynamiq.connections import PostgreSQL as PostgreSQLConnection from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact @@ -62,14 +65,30 @@ class PgvectorFactBackend(LongTermMemoryBackend): model_config = ConfigDict(arbitrary_types_allowed=True) - dsn: str + name: str = "PgvectorFactBackend" + connection: PostgreSQLConnection = Field(default_factory=PostgreSQLConnection) table_name: str = "user_facts" dimension: int = 1536 _conn: psycopg.Connection | None = PrivateAttr(default=None) + @property + def to_dict_exclude_params(self) -> dict[str, bool]: + return super().to_dict_exclude_params | {"_conn": True, "connection": True} + + def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: + exclude = kwargs.pop("exclude", self.to_dict_exclude_params.copy()) + data = self.model_dump(exclude=exclude, **kwargs) + data["connection"] = self.connection.to_dict(for_tracing=for_tracing) + return data + def model_post_init(self, __context) -> None: - self._conn = psycopg.connect(self.dsn, autocommit=True) + self._conn = self.connection.connect() + self._conn.autocommit = True + # CREATE EXTENSION must run BEFORE register_vector, otherwise the + # type adapter has nothing to bind to ("vector type not found"). + with self._conn.cursor() as cur: + cur.execute(_CREATE_EXTENSION_SQL) register_vector(self._conn) @property diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index b30ebc2f8..340ec60de 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -1,5 +1,6 @@ import uuid from datetime import datetime +from typing import Any from pydantic import ConfigDict, PrivateAttr from qdrant_client import QdrantClient @@ -13,10 +14,10 @@ VectorParams, ) +from dynamiq.connections import Qdrant as QdrantConnection from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact - _UUID_NAMESPACE = uuid.UUID("00000000-0000-0000-0000-000000000000") @@ -69,15 +70,25 @@ class QdrantFactBackend(LongTermMemoryBackend): model_config = ConfigDict(arbitrary_types_allowed=True) - url: str = "http://localhost:6333" - api_key: str | None = None + name: str = "QdrantFactBackend" + connection: QdrantConnection collection_name: str = "user_facts" dimension: int = 1536 _client: QdrantClient | None = PrivateAttr(default=None) + @property + def to_dict_exclude_params(self) -> dict[str, bool]: + return super().to_dict_exclude_params | {"_client": True, "connection": True} + + def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: + exclude = kwargs.pop("exclude", self.to_dict_exclude_params.copy()) + data = self.model_dump(exclude=exclude, **kwargs) + data["connection"] = self.connection.to_dict(for_tracing=for_tracing) + return data + def model_post_init(self, __context) -> None: - self._client = QdrantClient(url=self.url, api_key=self.api_key) + self._client = self.connection.connect() def ensure_collection(self) -> None: """Create the facts collection and payload indexes if absent. Safe to call repeatedly.""" diff --git a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py index 93eb29110..f4518fd05 100644 --- a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py +++ b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py @@ -5,9 +5,11 @@ """ import os from datetime import UTC, datetime +from urllib.parse import urlparse import pytest +from dynamiq.connections import PostgreSQL as PostgreSQLConnection from dynamiq.memory.long_term.backends.pgvector import PgvectorFactBackend from dynamiq.memory.long_term.schemas import Fact @@ -15,9 +17,24 @@ pytestmark = pytest.mark.skipif(DSN is None, reason="POSTGRES_DSN not set") +def _connection_from_dsn(dsn: str) -> PostgreSQLConnection: + parsed = urlparse(dsn) + return PostgreSQLConnection( + host=parsed.hostname or "localhost", + port=parsed.port or 5432, + database=(parsed.path or "/postgres").lstrip("/"), + user=parsed.username or "postgres", + password=parsed.password or "", + ) + + @pytest.fixture def backend(): - b = PgvectorFactBackend(dsn=DSN, table_name="test_user_facts", dimension=16) + b = PgvectorFactBackend( + connection=_connection_from_dsn(DSN), + table_name="test_user_facts", + dimension=16, + ) b.recreate_table() yield b b.drop_table() diff --git a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py index 566494556..377f8ab9b 100644 --- a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py +++ b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py @@ -25,6 +25,7 @@ ) +from dynamiq.connections import Qdrant as QdrantConnection # noqa: E402 from dynamiq.memory.long_term.backends.qdrant import QdrantFactBackend # noqa: E402 from dynamiq.memory.long_term.schemas import Fact # noqa: E402 @@ -32,7 +33,7 @@ @pytest.fixture def backend(): b = QdrantFactBackend( - url=QDRANT_URL, + connection=QdrantConnection(url=QDRANT_URL, api_key=""), collection_name="test_user_facts", dimension=16, ) From e556184be6c6e75f29001dc38ddb8af6ca338bf2 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 01:38:28 +0300 Subject: [PATCH 15/53] refactor: align LongTermMemory facade and ABC with dynamiq Memory conventions --- dynamiq/memory/long_term/base.py | 25 ++- dynamiq/memory/long_term/long_term_memory.py | 190 +++++++++++++----- tests/unit/memory/long_term/conftest.py | 35 ++-- .../memory/long_term/test_long_term_memory.py | 5 +- .../test_long_term_memory_integration.py | 18 +- 5 files changed, 204 insertions(+), 69 deletions(-) diff --git a/dynamiq/memory/long_term/base.py b/dynamiq/memory/long_term/base.py index d3d6e5fe3..d443f0520 100644 --- a/dynamiq/memory/long_term/base.py +++ b/dynamiq/memory/long_term/base.py @@ -1,8 +1,11 @@ from abc import ABC, abstractmethod +from functools import cached_property +from typing import Any -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field, computed_field from dynamiq.memory.long_term.schemas import Fact +from dynamiq.utils import generate_uuid class LongTermMemoryBackend(ABC, BaseModel): @@ -10,6 +13,26 @@ class LongTermMemoryBackend(ABC, BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) + name: str = "LongTermMemoryBackend" + id: str = Field(default_factory=generate_uuid) + + @computed_field + @cached_property + def type(self) -> str: + """Fully-qualified class id used by the YAML loader for polymorphic reconstruction.""" + return f"{self.__module__.rsplit('.', 1)[0]}.{self.__class__.__name__}" + + @property + def to_dict_exclude_params(self) -> dict[str, bool]: + """Field names to exclude from serialization (overridden by subclasses).""" + return {} + + def to_dict(self, include_secure_params: bool = False, **kwargs) -> dict[str, Any]: + """Serialize the backend to a dict for workflow YAML round-trip.""" + kwargs.pop("include_secure_params", None) + kwargs.pop("for_tracing", None) + return self.model_dump(exclude=kwargs.pop("exclude", self.to_dict_exclude_params), **kwargs) + @abstractmethod def insert(self, fact: Fact, embedding: list[float]) -> None: """Insert a new fact and its embedding. Caller has already deduped via `get_by_hash`.""" diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index f4717c343..2c3ebc14d 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -1,13 +1,23 @@ from datetime import UTC, datetime +from functools import cached_property from hashlib import md5 from typing import Any from uuid import uuid4 -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field, computed_field +from dynamiq.memory.long_term.backends import InMemoryFactBackend from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind +from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema +from dynamiq.utils.logger import logger + + +class LongTermMemoryError(Exception): + """Base exception for `LongTermMemory` operations.""" + + pass def _content_hash(user_id: str, content: str) -> str: @@ -16,87 +26,171 @@ def _content_hash(user_id: str, content: str) -> str: return md5(f"{user_id}:{normalised}".encode(), usedforsecurity=False).hexdigest() -def _embed(embedder: Any, text: str) -> list[float]: - result = embedder.execute({"query": text}) +def _embed(embedder: TextEmbedder, text: str) -> list[float]: + result = embedder.execute(input_data=TextEmbedderInputSchema(query=text)) return list(result["embedding"]) class LongTermMemory(BaseModel): - """Tool-driven, user-scoped, fact-shaped memory.""" + """Tool-driven, user-scoped, fact-shaped memory that persists across sessions.""" model_config = ConfigDict(arbitrary_types_allowed=True) - backend: LongTermMemoryBackend - embedder: Any + backend: LongTermMemoryBackend = Field( + default_factory=InMemoryFactBackend, + description="Backend storage implementation for facts and their embeddings.", + ) + embedder: TextEmbedder = Field( + ..., + description="Text embedder used to vectorize facts on write and queries on read.", + ) + + @computed_field + @cached_property + def type(self) -> str: + """Fully-qualified class id used by the YAML loader for reconstruction.""" + return f"{self.__module__.rsplit('.', 1)[0]}.{self.__class__.__name__}" + + @property + def to_dict_exclude_params(self) -> dict[str, bool]: + """Fields excluded from default model_dump; re-added by `to_dict`.""" + return {"backend": True, "embedder": True} + + def to_dict(self, include_secure_params: bool = False, **kwargs) -> dict[str, Any]: + """Serialize so backend and embedder round-trip via their own `to_dict`.""" + for_tracing = kwargs.pop("for_tracing", False) + data = self.model_dump(exclude=kwargs.pop("exclude", self.to_dict_exclude_params), **kwargs) + data["backend"] = self.backend.to_dict( + include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs + ) + data["embedder"] = self.embedder.to_dict( + include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs + ) + return data def remember( self, *, content: str, user_id: str, metadata: dict[str, Any] | None = None, ) -> Fact: - """Add a fact. Idempotent on (user_id, normalised content).""" + """Add a fact for `user_id`. Idempotent on the (user_id, normalised content) pair. + + Args: + content: The fact text. + user_id: Owner of the fact. Required. + metadata: Optional free-form metadata stored alongside the fact. + + Raises: + LongTermMemoryError: If content is empty or storage fails. + """ if not content or not content.strip(): - raise ValueError("Fact content cannot be empty") - - normalised = content.strip() - content_hash = _content_hash(user_id, normalised) - - existing = self.backend.get_by_hash(user_id=user_id, content_hash=content_hash) - if existing is not None: - return existing - - now = datetime.now(UTC) - embedding = _embed(self.embedder, normalised) - fact = Fact( - id=str(uuid4()), - content=normalised, - hash=content_hash, - user_id=user_id, - metadata=metadata or {}, - created_at=now, - updated_at=now, - ) - self.backend.insert(fact, embedding) - return fact + raise LongTermMemoryError("Fact content cannot be empty") + try: + normalised = content.strip() + content_hash = _content_hash(user_id, normalised) + + existing = self.backend.get_by_hash(user_id=user_id, content_hash=content_hash) + if existing is not None: + logger.debug(f"LongTermMemory: dedup hit for user={user_id}, returning existing fact {existing.id}") + return existing + + now = datetime.now(UTC) + embedding = _embed(self.embedder, normalised) + fact = Fact( + id=str(uuid4()), + content=normalised, + hash=content_hash, + user_id=user_id, + metadata=metadata or {}, + created_at=now, + updated_at=now, + ) + self.backend.insert(fact, embedding) + logger.debug(f"LongTermMemory: stored fact {fact.id} for user={user_id}") + return fact + except Exception as e: + logger.error(f"LongTermMemory.remember failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to remember fact: {e}") from e def recall( self, *, query: str, user_id: str, limit: int = 5, ) -> list[tuple[Fact, float]]: - """Semantic search for facts relevant to `query`, scoped to `user_id`.""" + """Semantic search for facts relevant to `query`, scoped to `user_id`. + + Args: + query: Natural-language query string. + user_id: Owner whose facts to search. + limit: Maximum number of (fact, score) tuples to return. + + Raises: + LongTermMemoryError: If the query is empty or search fails. + """ stripped = query.strip() if query else "" if not stripped: - raise ValueError("recall query cannot be empty") - embedding = _embed(self.embedder, stripped) - return self.backend.search( - query_embedding=embedding, - scope={"user_id": user_id}, - limit=limit, - ) + raise LongTermMemoryError("Recall query cannot be empty") + try: + embedding = _embed(self.embedder, stripped) + results = self.backend.search( + query_embedding=embedding, + scope={"user_id": user_id}, + limit=limit, + ) + logger.debug(f"LongTermMemory: recall for user={user_id} returned {len(results)} facts") + return results + except Exception as e: + logger.error(f"LongTermMemory.recall failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to recall facts: {e}") from e def forget(self, *, fact_id: str, user_id: str) -> ForgetStatus: - """Delete a fact by id and return a `ForgetStatus`. + """Delete a fact by id, returning a `ForgetStatus`. Never raises on user mismatch — defence in depth above the construction-time `user_id` binding on the tool. + + Raises: + LongTermMemoryError: If the storage delete fails for any other reason. """ - fact = self.backend.get(fact_id) - if fact is None: - return ForgetStatus.NOT_FOUND - if fact.user_id != user_id: - return ForgetStatus.FORBIDDEN - self.backend.delete(fact_id) - return ForgetStatus.DELETED + try: + fact = self.backend.get(fact_id) + if fact is None: + return ForgetStatus.NOT_FOUND + if fact.user_id != user_id: + logger.warning( + f"LongTermMemory.forget: cross-user delete blocked " + f"(owner={fact.user_id}, caller={user_id}, fact={fact_id})" + ) + return ForgetStatus.FORBIDDEN + self.backend.delete(fact_id) + logger.debug(f"LongTermMemory: deleted fact {fact_id} for user={user_id}") + return ForgetStatus.DELETED + except Exception as e: + logger.error(f"LongTermMemory.forget failed for fact={fact_id}, user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to forget fact: {e}") from e def list_all(self, *, user_id: str, limit: int = 100) -> list[Fact]: """Return up to `limit` facts for `user_id`, most recent first (admin/introspection).""" - return self.backend.list_by_scope({"user_id": user_id}, limit=limit) + try: + return self.backend.list_by_scope({"user_id": user_id}, limit=limit) + except Exception as e: + logger.error(f"LongTermMemory.list_all failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to list facts: {e}") from e def get(self, fact_id: str) -> Fact | None: """Fetch a fact by id, or `None` if it does not exist.""" - return self.backend.get(fact_id) + try: + return self.backend.get(fact_id) + except Exception as e: + logger.error(f"LongTermMemory.get failed for fact={fact_id}: {e}") + raise LongTermMemoryError(f"Failed to fetch fact: {e}") from e def clear_user(self, *, user_id: str) -> int: """Hard-delete every fact owned by `user_id` and return the count deleted.""" - return self.backend.delete_scope({"user_id": user_id}) + try: + deleted = self.backend.delete_scope({"user_id": user_id}) + logger.debug(f"LongTermMemory: cleared {deleted} facts for user={user_id}") + return deleted + except Exception as e: + logger.error(f"LongTermMemory.clear_user failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to clear user facts: {e}") from e class LongTermMemoryConfig(BaseModel): diff --git a/tests/unit/memory/long_term/conftest.py b/tests/unit/memory/long_term/conftest.py index e41b141ff..4afb56296 100644 --- a/tests/unit/memory/long_term/conftest.py +++ b/tests/unit/memory/long_term/conftest.py @@ -1,32 +1,39 @@ """Shared fixtures for long-term memory unit tests.""" import hashlib -from typing import Any +from typing import ClassVar import pytest +from dynamiq.connections import BaseConnection +from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema -class FakeTextEmbedder: - """Deterministic text embedder for tests. + +class _StubConnection(BaseConnection): + """No-op connection used only to satisfy ConnectionNode's connection/client validator.""" + + def connect(self) -> None: + return None + + +class FakeTextEmbedder(TextEmbedder): + """Deterministic `TextEmbedder` subclass for tests. Maps text to a fixed-length unit vector derived from its sha256 digest. Same text → same vector. Different texts → near-orthogonal vectors - (good enough for cosine ranking in unit tests). + (good enough for cosine ranking in unit tests). Bypasses any real + `text_embedder` component. """ - DIM = 16 + name: str = "fake-text-embedder" + connection: BaseConnection = _StubConnection() + DIM: ClassVar[int] = 16 - def execute(self, input_data: Any, **kwargs) -> dict: - # Mirror TextEmbedder.execute output shape: {"query": ..., "embedding": ...} - if hasattr(input_data, "query"): - text = input_data.query - elif isinstance(input_data, dict): - text = input_data["query"] - else: - text = str(input_data) + def execute(self, input_data: TextEmbedderInputSchema, config=None, **kwargs) -> dict: + text = input_data.query if hasattr(input_data, "query") else input_data["query"] return {"query": text, "embedding": self._embed(text)} def embed(self, text: str) -> list[float]: - """Convenience helper for tests that want a raw vector.""" + """Convenience helper for tests that want a raw vector without an InputSchema.""" return self._embed(text) @classmethod diff --git a/tests/unit/memory/long_term/test_long_term_memory.py b/tests/unit/memory/long_term/test_long_term_memory.py index 630f16748..0b7f203c6 100644 --- a/tests/unit/memory/long_term/test_long_term_memory.py +++ b/tests/unit/memory/long_term/test_long_term_memory.py @@ -3,6 +3,7 @@ from dynamiq.memory.long_term import LongTermMemory from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.long_term_memory import LongTermMemoryError @pytest.fixture @@ -43,7 +44,7 @@ def test_remember_normalises_whitespace_for_dedup(ltm, user_id): def test_remember_rejects_empty_content(ltm, user_id): - with pytest.raises(ValueError): + with pytest.raises(LongTermMemoryError): ltm.remember(content=" ", user_id=user_id) @@ -84,7 +85,7 @@ def test_recall_empty_store_returns_empty(ltm, user_id): def test_recall_rejects_empty_query(ltm, user_id): - with pytest.raises(ValueError): + with pytest.raises(LongTermMemoryError): ltm.recall(query=" ", user_id=user_id, limit=5) diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 2c1557fb7..6e2a878ed 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -9,22 +9,32 @@ """ import hashlib from types import SimpleNamespace +from typing import ClassVar from unittest.mock import patch import pytest +from dynamiq.connections import BaseConnection from dynamiq.connections import OpenAI as OpenAIConnection from dynamiq.memory.long_term import LongTermMemory, LongTermMemoryConfig from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend from dynamiq.nodes.agents.base import Agent +from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema from dynamiq.nodes.llms import OpenAI -class _FakeEmbedder: - DIM = 16 +class _StubConnection(BaseConnection): + def connect(self) -> None: + return None - def execute(self, input_data, **kwargs): - text = input_data["query"] if isinstance(input_data, dict) else input_data.query + +class _FakeEmbedder(TextEmbedder): + name: str = "fake-text-embedder" + connection: BaseConnection = _StubConnection() + DIM: ClassVar[int] = 16 + + def execute(self, input_data: TextEmbedderInputSchema, config=None, **kwargs): + text = input_data.query if hasattr(input_data, "query") else input_data["query"] digest = hashlib.sha256(text.encode("utf-8")).digest() raw = [(b / 127.5) - 1.0 for b in digest[: self.DIM]] norm = sum(x * x for x in raw) ** 0.5 or 1.0 From bcc026785dbdaa01dc2fc644702db31762f63fc2 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 01:39:54 +0300 Subject: [PATCH 16/53] feat: serialize Agent.long_term_memory through workflow YAML round-trip --- dynamiq/nodes/agents/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 248b1032b..ec1f9c431 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -396,6 +396,8 @@ def to_dict_exclude_params(self): "llm": True, "tools": True, "memory": True, + "long_term_memory": True, + "long_term_memory_config": True, "files": True, "images": True, "file_store": True, @@ -414,6 +416,8 @@ def to_dict(self, **kwargs) -> dict: data["tools"] = data["tools"] + [mcp_server.to_dict(**kwargs) for mcp_server in self._mcp_servers] data["memory"] = self.memory.to_dict(**kwargs) if self.memory else None + data["long_term_memory"] = self.long_term_memory.to_dict(**kwargs) if self.long_term_memory else None + data["long_term_memory_config"] = self.long_term_memory_config.model_dump() if self.files: data["files"] = [{"name": getattr(f, "name", f"file_{i}")} for i, f in enumerate(self.files)] if self.images: From 620e1c90e5c3bceed89dd179a91a5008625384c7 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 01:40:22 +0300 Subject: [PATCH 17/53] refactor: adopt canonical node lifecycle and richer descriptions in long-term memory tools --- dynamiq/nodes/tools/long_term_memory.py | 128 ++++++++++++++++++------ 1 file changed, 97 insertions(+), 31 deletions(-) diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index ed4efb23d..f9cdddae3 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -3,30 +3,79 @@ from pydantic import BaseModel, ConfigDict, Field from dynamiq.memory.long_term import LongTermMemory, MemoryToolKind -from dynamiq.nodes.node import Node +from dynamiq.nodes.node import Node, ensure_config from dynamiq.nodes.types import NodeGroup - -REMEMBER_DESCRIPTION = ( - "Record a durable fact about the current user that should persist across " - "conversations (preferences, constraints, recurring context, biographical info). " - "Use only when you've learned something that will matter in future sessions — " - "not for ephemeral turn-level state. Returns {fact_id: }. Calling twice " - "with the same content returns the same fact_id." -) - -RECALL_DESCRIPTION = ( - "Search the user's long-term memory for facts relevant to a query. " - "Use BEFORE answering questions where prior context (preferences, past " - "decisions, biographical info) would change the response. Returns a list of " - "{fact_id, content, score} entries, most relevant first." -) - -FORGET_DESCRIPTION = ( - "Delete a fact from the user's long-term memory by id. Use when the user " - "explicitly asks to be forgotten on something, or when a fact is wrong and " - "you have no replacement. Get the fact_id from a prior recall_facts call. " - "Returns {status: 'deleted'|'not_found'|'forbidden'}." -) +from dynamiq.runnables import RunnableConfig +from dynamiq.types.cancellation import check_cancellation +from dynamiq.utils.logger import logger + +REMEMBER_DESCRIPTION = """Persist a durable fact about the current user to long-term memory. + +Key capabilities: +- Survives across conversations and sessions — not just this chat +- Idempotent on identical content (same text returns the same fact_id, no duplicates) +- Optional structured metadata (e.g. category, source) for later filtering + +Usage strategy: +- Call when the user explicitly says "remember…", "save this…", "keep in mind for next time…" +- Call when you have learned something that will clearly matter in future sessions + (a stable preference, a constraint, a recurring context, biographical info) +- Do NOT use for ephemeral turn-level state — that is what the conversation history is for +- Do NOT use to remember tool outputs, file paths, or anything tied to this run +- The fact is scoped to the current user automatically; never pass or invent a user id + +Returns: {"fact_id": ""} — store the id if you may want to forget it later. + +Examples: +- {"content": "Prefers dogs over cats"} +- {"content": "Allergic to peanuts", "metadata": {"category": "health"}} +- {"content": "Works in EST timezone", "metadata": {"category": "context"}} +""" + +RECALL_DESCRIPTION = """Search the user's long-term memory for facts relevant to a query. + +Key capabilities: +- Semantic search (not keyword) — matches meaning, paraphrases, synonyms +- Returns ranked results with similarity scores (1.0 = perfect match) +- Scoped to the current user automatically — never crosses users + +Usage strategy: +- Call PROACTIVELY at the start of a turn when the request hints at something personal + (preferences, past decisions, biographical info, recurring context) +- Call BEFORE answering questions where prior context would change your response +- A low score (< ~0.3) means weakly related — use judgment, do not blindly include +- Each result has a `fact_id` — keep it if you may want to forget that fact later +- Skip when the question is purely factual or has no user-specific component + +Returns: list of {"fact_id": "", "content": "", "score": }, most relevant first. + +Examples: +- {"query": "food preferences"} +- {"query": "what does the user do for work?", "limit": 3} +- {"query": "timezone or schedule constraints", "limit": 10} +""" + +FORGET_DESCRIPTION = """Delete a single fact from the user's long-term memory by id. + +Key capabilities: +- Hard delete — the fact is gone, not just hidden +- Cross-user safety: returns "forbidden" if the fact belongs to another user +- Returns "not_found" if the id does not exist (already gone, or never existed) + +Usage strategy: +- Call ONLY when the user explicitly asks to forget something, or you've discovered + a fact is wrong and have no replacement to write +- ALWAYS get the fact_id from a prior `recall_facts` (or the original `remember_fact` + response) — do NOT guess or fabricate an id +- To CORRECT a wrong fact, prefer `remember_fact` with the corrected content first + (the old one stays, but new searches surface the correction); only call `forget_fact` + if the user wants the stale fact removed + +Returns: {"status": "deleted" | "not_found" | "forbidden"}. + +Examples: +- {"fact_id": "8f1c2b40-9d3e-4a8c-9c1f-0d2e3a4b5c6d"} +""" class RememberFactInputSchema(BaseModel): @@ -72,7 +121,14 @@ class RememberFactTool(_LongTermMemoryTool): description: str = REMEMBER_DESCRIPTION input_schema: ClassVar[type[RememberFactInputSchema]] = RememberFactInputSchema - def execute(self, input_data: RememberFactInputSchema, config=None, **kwargs) -> dict: + def execute( + self, input_data: RememberFactInputSchema, config: RunnableConfig | None = None, **kwargs + ) -> dict[str, Any]: + logger.debug(f"Tool {self.name} - {self.id}: started") + config = ensure_config(config) + check_cancellation(config) + self.run_on_node_execute_run(config.callbacks, **kwargs) + fact = self.long_term_memory.remember( content=input_data.content, user_id=self.user_id, @@ -88,18 +144,21 @@ class RecallFactsTool(_LongTermMemoryTool): description: str = RECALL_DESCRIPTION input_schema: ClassVar[type[RecallFactsInputSchema]] = RecallFactsInputSchema - def execute(self, input_data: RecallFactsInputSchema, config=None, **kwargs) -> dict: + def execute( + self, input_data: RecallFactsInputSchema, config: RunnableConfig | None = None, **kwargs + ) -> dict[str, Any]: + logger.debug(f"Tool {self.name} - {self.id}: started") + config = ensure_config(config) + check_cancellation(config) + self.run_on_node_execute_run(config.callbacks, **kwargs) + hits = self.long_term_memory.recall( query=input_data.query, user_id=self.user_id, limit=input_data.limit, ) return { - "content": [ - {"fact_id": fact.id, "content": fact.content, - "score": round(score, 4)} - for fact, score in hits - ] + "content": [{"fact_id": fact.id, "content": fact.content, "score": round(score, 4)} for fact, score in hits] } @@ -110,7 +169,14 @@ class ForgetFactTool(_LongTermMemoryTool): description: str = FORGET_DESCRIPTION input_schema: ClassVar[type[ForgetFactInputSchema]] = ForgetFactInputSchema - def execute(self, input_data: ForgetFactInputSchema, config=None, **kwargs) -> dict: + def execute( + self, input_data: ForgetFactInputSchema, config: RunnableConfig | None = None, **kwargs + ) -> dict[str, Any]: + logger.debug(f"Tool {self.name} - {self.id}: started") + config = ensure_config(config) + check_cancellation(config) + self.run_on_node_execute_run(config.callbacks, **kwargs) + status = self.long_term_memory.forget( fact_id=input_data.fact_id, user_id=self.user_id, From 58fbd7c3672d3397f6fe627c1adc74d37f604375 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 01:40:27 +0300 Subject: [PATCH 18/53] feat: log when long-term memory tools attach to an agent run --- dynamiq/nodes/agents/base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index ec1f9c431..290fa8e77 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -638,6 +638,13 @@ def execute( _tools_before_ltm = self.tools if ltm_tools: self.tools = list(_tools_before_ltm) + ltm_tools + logger.info( + "Agent %s - %s: attached %d long-term memory tools (%s)", + self.name, + self.id, + len(ltm_tools), + ", ".join(t.name for t in ltm_tools), + ) if use_memory: history_messages = self._retrieve_memory(input_data) From f61ac31507cb7f4cd8e4c0dc95d6fbff5d41e3ac Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 12:32:07 +0300 Subject: [PATCH 19/53] fix: rename memory backends --- dynamiq/memory/long_term/backends/__init__.py | 6 +++--- dynamiq/memory/long_term/backends/pgvector.py | 4 ++-- dynamiq/memory/long_term/backends/qdrant.py | 4 ++-- .../memory/test_pgvector_fact_backend.py | 6 +++--- .../memory/test_qdrant_fact_backend.py | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py index 1f1360879..259a4087c 100644 --- a/dynamiq/memory/long_term/backends/__init__.py +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -1,5 +1,5 @@ from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend -from dynamiq.memory.long_term.backends.pgvector import PgvectorFactBackend -from dynamiq.memory.long_term.backends.qdrant import QdrantFactBackend +from dynamiq.memory.long_term.backends.pgvector import PostgresLongTermMemoryBackend +from dynamiq.memory.long_term.backends.qdrant import QdrantLongTermMemoryBackend -__all__ = ["InMemoryFactBackend", "PgvectorFactBackend", "QdrantFactBackend"] +__all__ = ["InMemoryFactBackend", "PostgresLongTermMemoryBackend", "QdrantLongTermMemoryBackend"] diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index f1066802d..74b3afe44 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -60,12 +60,12 @@ def _row_to_fact(row) -> Fact: _FACT_COLUMNS = SQL("id, content, hash, user_id, metadata, created_at, updated_at") -class PgvectorFactBackend(LongTermMemoryBackend): +class PostgresLongTermMemoryBackend(LongTermMemoryBackend): """Long-term memory backend backed by Postgres + pgvector.""" model_config = ConfigDict(arbitrary_types_allowed=True) - name: str = "PgvectorFactBackend" + name: str = "PostgresLongTermMemoryBackend" connection: PostgreSQLConnection = Field(default_factory=PostgreSQLConnection) table_name: str = "user_facts" dimension: int = 1536 diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index 340ec60de..799571df3 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -65,12 +65,12 @@ def _payload_to_fact(payload: dict) -> Fact: ) -class QdrantFactBackend(LongTermMemoryBackend): +class QdrantLongTermMemoryBackend(LongTermMemoryBackend): """Long-term memory backend backed by Qdrant.""" model_config = ConfigDict(arbitrary_types_allowed=True) - name: str = "QdrantFactBackend" + name: str = "QdrantLongTermMemoryBackend" connection: QdrantConnection collection_name: str = "user_facts" dimension: int = 1536 diff --git a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py index f4518fd05..518b5ab57 100644 --- a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py +++ b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py @@ -1,4 +1,4 @@ -"""Integration tests for PgvectorFactBackend. +"""Integration tests for PostgresLongTermMemoryBackend. Requires `POSTGRES_DSN` to point at a live Postgres with the pgvector extension installed. Without it, this whole module skips. @@ -10,7 +10,7 @@ import pytest from dynamiq.connections import PostgreSQL as PostgreSQLConnection -from dynamiq.memory.long_term.backends.pgvector import PgvectorFactBackend +from dynamiq.memory.long_term.backends.pgvector import PostgresLongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact DSN = os.getenv("POSTGRES_DSN") @@ -30,7 +30,7 @@ def _connection_from_dsn(dsn: str) -> PostgreSQLConnection: @pytest.fixture def backend(): - b = PgvectorFactBackend( + b = PostgresLongTermMemoryBackend( connection=_connection_from_dsn(DSN), table_name="test_user_facts", dimension=16, diff --git a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py index 377f8ab9b..a6459af2a 100644 --- a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py +++ b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py @@ -1,4 +1,4 @@ -"""Integration tests for QdrantFactBackend. +"""Integration tests for QdrantLongTermMemoryBackend. Requires a reachable Qdrant. Set `QDRANT_URL` (defaults to `http://localhost:6333`). Without one, this whole module skips. @@ -26,13 +26,13 @@ from dynamiq.connections import Qdrant as QdrantConnection # noqa: E402 -from dynamiq.memory.long_term.backends.qdrant import QdrantFactBackend # noqa: E402 +from dynamiq.memory.long_term.backends.qdrant import QdrantLongTermMemoryBackend # noqa: E402 from dynamiq.memory.long_term.schemas import Fact # noqa: E402 @pytest.fixture def backend(): - b = QdrantFactBackend( + b = QdrantLongTermMemoryBackend( connection=QdrantConnection(url=QDRANT_URL, api_key=""), collection_name="test_user_facts", dimension=16, From 6bf75fff3a2db9ce0961d257940a1011a035ef6d Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 12:36:18 +0300 Subject: [PATCH 20/53] chore: update node names for memory backends --- dynamiq/memory/long_term/backends/__init__.py | 4 +-- .../memory/long_term/backends/in_memory.py | 4 +-- dynamiq/memory/long_term/backends/pgvector.py | 2 +- dynamiq/memory/long_term/backends/qdrant.py | 2 +- dynamiq/memory/long_term/base.py | 2 +- dynamiq/memory/long_term/long_term_memory.py | 4 +-- .../long_term/test_in_memory_backend.py | 32 +++++++++---------- .../memory/long_term/test_long_term_memory.py | 4 +-- tests/unit/memory/long_term/test_tools.py | 4 +-- .../test_long_term_memory_integration.py | 4 +-- 10 files changed, 31 insertions(+), 31 deletions(-) diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py index 259a4087c..1d8eff316 100644 --- a/dynamiq/memory/long_term/backends/__init__.py +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -1,5 +1,5 @@ -from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.backends.pgvector import PostgresLongTermMemoryBackend from dynamiq.memory.long_term.backends.qdrant import QdrantLongTermMemoryBackend -__all__ = ["InMemoryFactBackend", "PostgresLongTermMemoryBackend", "QdrantLongTermMemoryBackend"] +__all__ = ["InMemoryLongTermMemoryBackend", "PostgresLongTermMemoryBackend", "QdrantLongTermMemoryBackend"] diff --git a/dynamiq/memory/long_term/backends/in_memory.py b/dynamiq/memory/long_term/backends/in_memory.py index e07fec868..14d6bd702 100644 --- a/dynamiq/memory/long_term/backends/in_memory.py +++ b/dynamiq/memory/long_term/backends/in_memory.py @@ -5,10 +5,10 @@ from dynamiq.memory.long_term.schemas import Fact -class InMemoryFactBackend(LongTermMemoryBackend): +class InMemoryLongTermMemoryBackend(LongTermMemoryBackend): """Dict + numpy-cosine backend. Loses data on restart.""" - name: str = "InMemoryFactBackend" + name: str = "in-memory-long-term-memory-backend" _facts: dict[str, Fact] = PrivateAttr(default_factory=dict) _vectors: dict[str, list[float]] = PrivateAttr(default_factory=dict) diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index 74b3afe44..beb21c2e9 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -65,7 +65,7 @@ class PostgresLongTermMemoryBackend(LongTermMemoryBackend): model_config = ConfigDict(arbitrary_types_allowed=True) - name: str = "PostgresLongTermMemoryBackend" + name: str = "postgres-long-term-memory-backend" connection: PostgreSQLConnection = Field(default_factory=PostgreSQLConnection) table_name: str = "user_facts" dimension: int = 1536 diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index 799571df3..35d663e48 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -70,7 +70,7 @@ class QdrantLongTermMemoryBackend(LongTermMemoryBackend): model_config = ConfigDict(arbitrary_types_allowed=True) - name: str = "QdrantLongTermMemoryBackend" + name: str = "qdrant-long-term-memory-backend" connection: QdrantConnection collection_name: str = "user_facts" dimension: int = 1536 diff --git a/dynamiq/memory/long_term/base.py b/dynamiq/memory/long_term/base.py index d443f0520..d7a789093 100644 --- a/dynamiq/memory/long_term/base.py +++ b/dynamiq/memory/long_term/base.py @@ -13,7 +13,7 @@ class LongTermMemoryBackend(ABC, BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) - name: str = "LongTermMemoryBackend" + name: str = "long-term-memory-backend" id: str = Field(default_factory=generate_uuid) @computed_field diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index 2c3ebc14d..437b7edda 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -6,7 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field, computed_field -from dynamiq.memory.long_term.backends import InMemoryFactBackend +from dynamiq.memory.long_term.backends import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind @@ -37,7 +37,7 @@ class LongTermMemory(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) backend: LongTermMemoryBackend = Field( - default_factory=InMemoryFactBackend, + default_factory=InMemoryLongTermMemoryBackend, description="Backend storage implementation for facts and their embeddings.", ) embedder: TextEmbedder = Field( diff --git a/tests/unit/memory/long_term/test_in_memory_backend.py b/tests/unit/memory/long_term/test_in_memory_backend.py index 5883cb917..1fb6c2599 100644 --- a/tests/unit/memory/long_term/test_in_memory_backend.py +++ b/tests/unit/memory/long_term/test_in_memory_backend.py @@ -1,7 +1,7 @@ -"""Tests for InMemoryFactBackend.""" +"""Tests for InMemoryLongTermMemoryBackend.""" from datetime import UTC, datetime -from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact @@ -19,40 +19,40 @@ def _fact(fact_id: str, user_id: str, content: str, # --- insert / get / get_by_hash --- def test_insert_then_get(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() fact = _fact("f1", "u1", "hello") backend.insert(fact, fake_embedder.embed("hello")) assert backend.get("f1") == fact def test_get_unknown_returns_none(): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() assert backend.get("does-not-exist") is None def test_get_by_hash_returns_match(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() fact = _fact("f1", "u1", "hello", content_hash="h-shared") backend.insert(fact, fake_embedder.embed("hello")) assert backend.get_by_hash(user_id="u1", content_hash="h-shared") == fact def test_get_by_hash_isolates_users(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "hello", "h-shared"), fake_embedder.embed("hello")) assert backend.get_by_hash(user_id="u2", content_hash="h-shared") is None def test_get_by_hash_unknown_returns_none(): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() assert backend.get_by_hash(user_id="u1", content_hash="nope") is None # --- search --- def test_search_returns_relevance_ordered(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) backend.insert(_fact("f2", "u1", "alpha-2"), fake_embedder.embed("alpha-2")) backend.insert(_fact("f3", "u1", "zulu"), fake_embedder.embed("zulu")) @@ -68,7 +68,7 @@ def test_search_returns_relevance_ordered(fake_embedder): def test_search_filters_by_scope(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) backend.insert(_fact("f2", "u2", "alpha"), fake_embedder.embed("alpha")) hits = backend.search( @@ -80,7 +80,7 @@ def test_search_filters_by_scope(fake_embedder): def test_search_respects_limit(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() for i in range(5): backend.insert(_fact(f"f{i}", "u1", f"text{i}"), fake_embedder.embed(f"text{i}")) @@ -92,7 +92,7 @@ def test_search_respects_limit(fake_embedder): def test_search_empty_store_returns_empty(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() hits = backend.search( query_embedding=fake_embedder.embed("anything"), scope={"user_id": "u1"}, limit=5, @@ -103,19 +103,19 @@ def test_search_empty_store_returns_empty(fake_embedder): # --- delete / list_by_scope / delete_scope --- def test_delete_removes_fact(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "x"), fake_embedder.embed("x")) backend.delete("f1") assert backend.get("f1") is None def test_delete_unknown_is_noop(): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() backend.delete("does-not-exist") # must not raise def test_list_by_scope_returns_in_scope_facts(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) @@ -124,7 +124,7 @@ def test_list_by_scope_returns_in_scope_facts(fake_embedder): def test_list_by_scope_respects_limit(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() for i in range(5): backend.insert(_fact(f"f{i}", "u1", f"x{i}"), fake_embedder.embed(f"x{i}")) @@ -132,7 +132,7 @@ def test_list_by_scope_respects_limit(fake_embedder): def test_delete_scope_removes_all_in_scope(fake_embedder): - backend = InMemoryFactBackend() + backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) diff --git a/tests/unit/memory/long_term/test_long_term_memory.py b/tests/unit/memory/long_term/test_long_term_memory.py index 0b7f203c6..7dd5d9b52 100644 --- a/tests/unit/memory/long_term/test_long_term_memory.py +++ b/tests/unit/memory/long_term/test_long_term_memory.py @@ -2,14 +2,14 @@ import pytest from dynamiq.memory.long_term import LongTermMemory -from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.long_term_memory import LongTermMemoryError @pytest.fixture def ltm(fake_embedder): return LongTermMemory( - backend=InMemoryFactBackend(), + backend=InMemoryLongTermMemoryBackend(), embedder=fake_embedder, ) diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index 365c234a6..3ec7a9ad3 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -7,7 +7,7 @@ import pytest from dynamiq.memory.long_term import LongTermMemory -from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.nodes.tools.long_term_memory import ( ForgetFactTool, RecallFactsTool, @@ -18,7 +18,7 @@ @pytest.fixture def ltm(fake_embedder): - return LongTermMemory(backend=InMemoryFactBackend(), embedder=fake_embedder) + return LongTermMemory(backend=InMemoryLongTermMemoryBackend(), embedder=fake_embedder) # --- RememberFactTool --- diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 6e2a878ed..82722196f 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -17,7 +17,7 @@ from dynamiq.connections import BaseConnection from dynamiq.connections import OpenAI as OpenAIConnection from dynamiq.memory.long_term import LongTermMemory, LongTermMemoryConfig -from dynamiq.memory.long_term.backends.in_memory import InMemoryFactBackend +from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.nodes.agents.base import Agent from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema from dynamiq.nodes.llms import OpenAI @@ -43,7 +43,7 @@ def execute(self, input_data: TextEmbedderInputSchema, config=None, **kwargs): @pytest.fixture def ltm(): - return LongTermMemory(backend=InMemoryFactBackend(), embedder=_FakeEmbedder()) + return LongTermMemory(backend=InMemoryLongTermMemoryBackend(), embedder=_FakeEmbedder()) @pytest.fixture From 3659a664a4ad8c70060e203c2eca46334aa916ac Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 13:31:56 +0300 Subject: [PATCH 21/53] feat: implement update() across long-term memory backends --- .../memory/long_term/backends/in_memory.py | 19 +++++++++++++++ dynamiq/memory/long_term/backends/pgvector.py | 18 ++++++++++++++ dynamiq/memory/long_term/backends/qdrant.py | 24 +++++++++++++++++++ dynamiq/memory/long_term/base.py | 22 ++++++++++++----- 4 files changed, 77 insertions(+), 6 deletions(-) diff --git a/dynamiq/memory/long_term/backends/in_memory.py b/dynamiq/memory/long_term/backends/in_memory.py index 14d6bd702..7926c3a70 100644 --- a/dynamiq/memory/long_term/backends/in_memory.py +++ b/dynamiq/memory/long_term/backends/in_memory.py @@ -1,3 +1,5 @@ +from datetime import datetime + import numpy as np from pydantic import PrivateAttr @@ -30,6 +32,23 @@ def delete(self, fact_id: str) -> None: self._facts.pop(fact_id, None) self._vectors.pop(fact_id, None) + def update( + self, + fact_id: str, + *, + content: str, + content_hash: str, + embedding: list[float], + updated_at: datetime, + ) -> None: + existing = self._facts.get(fact_id) + if existing is None: + return + self._facts[fact_id] = existing.model_copy( + update={"content": content, "hash": content_hash, "updated_at": updated_at} + ) + self._vectors[fact_id] = list(embedding) + def search( self, *, query_embedding: list[float], scope: dict[str, str], limit: int, diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index beb21c2e9..d35a18620 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -1,3 +1,4 @@ +from datetime import datetime from typing import Any import psycopg @@ -173,6 +174,23 @@ def delete(self, fact_id: str) -> None: (fact_id,), ) + def update( + self, + fact_id: str, + *, + content: str, + content_hash: str, + embedding: list[float], + updated_at: datetime, + ) -> None: + with self._conn.cursor() as cur: + cur.execute( + SQL( + "UPDATE {table} SET content = %s, hash = %s, " "embedding = %s, updated_at = %s WHERE id = %s" + ).format(table=self._table), + (content, content_hash, embedding, updated_at, fact_id), + ) + def search( self, *, diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index 35d663e48..84805d802 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -160,6 +160,30 @@ def delete(self, fact_id: str) -> None: points_selector=PointIdsList(points=[_to_point_id(fact_id)]), ) + def update( + self, + fact_id: str, + *, + content: str, + content_hash: str, + embedding: list[float], + updated_at: datetime, + ) -> None: + existing = self.get(fact_id) + if existing is None: + return + new_fact = existing.model_copy(update={"content": content, "hash": content_hash, "updated_at": updated_at}) + self._client.upsert( + collection_name=self.collection_name, + points=[ + PointStruct( + id=_to_point_id(fact_id), + vector=list(embedding), + payload=_fact_to_payload(new_fact), + ) + ], + ) + def search( self, *, diff --git a/dynamiq/memory/long_term/base.py b/dynamiq/memory/long_term/base.py index d7a789093..f3e8ae224 100644 --- a/dynamiq/memory/long_term/base.py +++ b/dynamiq/memory/long_term/base.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod +from datetime import datetime from functools import cached_property from typing import Any @@ -66,9 +67,18 @@ def list_by_scope( def delete_scope(self, scope: dict[str, str]) -> int: """Hard-delete every fact matching `scope` and return the count deleted.""" - def update(self, fact_id: str, content: str, embedding: list[float]) -> None: - """Replace an existing fact in-place (Phase 2). Use `delete` + `insert` in v1.""" - raise NotImplementedError( - "update() lands in Phase 2 with the auto-extractor. " - "In v1, correct a fact via delete() + insert()." - ) + @abstractmethod + def update( + self, + fact_id: str, + *, + content: str, + content_hash: str, + embedding: list[float], + updated_at: datetime, + ) -> None: + """Replace content/hash/embedding/updated_at for an existing fact in place. + + Preserves `id`, `user_id`, `metadata`, and `created_at`. Used by the + semantic-upsert path in `LongTermMemory.remember`. + """ From 30f8eea62602ec98d99196ba74e3e4a84b68568c Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 13:32:08 +0300 Subject: [PATCH 22/53] feat: add Letta-style semantic upsert to LongTermMemory.remember --- dynamiq/memory/long_term/__init__.py | 3 +- dynamiq/memory/long_term/long_term_memory.py | 59 +++++++++++++++++--- dynamiq/memory/long_term/types.py | 11 +++- 3 files changed, 62 insertions(+), 11 deletions(-) diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py index b27a04377..181b99f72 100644 --- a/dynamiq/memory/long_term/__init__.py +++ b/dynamiq/memory/long_term/__init__.py @@ -1,7 +1,7 @@ from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.long_term_memory import LongTermMemory, LongTermMemoryConfig from dynamiq.memory.long_term.schemas import Fact -from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind +from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind, RememberOutcome __all__ = [ "Fact", @@ -10,4 +10,5 @@ "LongTermMemoryBackend", "LongTermMemoryConfig", "MemoryToolKind", + "RememberOutcome", ] diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index 437b7edda..7c4bca891 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -9,7 +9,7 @@ from dynamiq.memory.long_term.backends import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.base import LongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact -from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind +from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind, RememberOutcome from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema from dynamiq.utils.logger import logger @@ -44,6 +44,16 @@ class LongTermMemory(BaseModel): ..., description="Text embedder used to vectorize facts on write and queries on read.", ) + upsert_threshold: float = Field( + default=0.85, + ge=0.0, + le=1.0, + description=( + "Cosine similarity above which a new `remember()` call replaces " + "the nearest existing fact in place instead " + "of inserting a new row. Set to 1.0 to disable upsert (insert-only)." + ), + ) @computed_field @cached_property @@ -71,8 +81,19 @@ def to_dict(self, include_secure_params: bool = False, **kwargs) -> dict[str, An def remember( self, *, content: str, user_id: str, metadata: dict[str, Any] | None = None, - ) -> Fact: - """Add a fact for `user_id`. Idempotent on the (user_id, normalised content) pair. + ) -> tuple[Fact, RememberOutcome]: + """Add or upsert a fact for `user_id`. Returns the fact and a `RememberOutcome`. + + Semantics (no explicit forget tool): + + 1. Exact-duplicate guard: if `(user_id, normalised content)` already exists, + return it with `UNCHANGED` (no embed cost). + 2. Otherwise embed once and search the user's facts for the nearest neighbour. + If the top match's cosine score exceeds `upsert_threshold`, replace that + fact's content/hash/embedding in place (preserving id, created_at, metadata) + and return `UPDATED`. This is how an agent "corrects" or "deletes" a fact: + re-state it. + 3. Otherwise insert a brand-new fact and return `CREATED`. Args: content: The fact text. @@ -90,11 +111,34 @@ def remember( existing = self.backend.get_by_hash(user_id=user_id, content_hash=content_hash) if existing is not None: - logger.debug(f"LongTermMemory: dedup hit for user={user_id}, returning existing fact {existing.id}") - return existing + logger.debug(f"LongTermMemory: exact-dedup hit for user={user_id}, fact {existing.id}") + return existing, RememberOutcome.UNCHANGED - now = datetime.now(UTC) embedding = _embed(self.embedder, normalised) + + nearest = self.backend.search( + query_embedding=embedding, + scope={"user_id": user_id}, + limit=1, + ) + if nearest and nearest[0][1] >= self.upsert_threshold: + old_fact, score = nearest[0] + now = datetime.now(UTC) + self.backend.update( + old_fact.id, + content=normalised, + content_hash=content_hash, + embedding=embedding, + updated_at=now, + ) + logger.debug( + f"LongTermMemory: upsert hit (score={score:.3f}) — " + f"updated fact {old_fact.id} for user={user_id}" + ) + updated = old_fact.model_copy(update={"content": normalised, "hash": content_hash, "updated_at": now}) + return updated, RememberOutcome.UPDATED + + now = datetime.now(UTC) fact = Fact( id=str(uuid4()), content=normalised, @@ -106,7 +150,7 @@ def remember( ) self.backend.insert(fact, embedding) logger.debug(f"LongTermMemory: stored fact {fact.id} for user={user_id}") - return fact + return fact, RememberOutcome.CREATED except Exception as e: logger.error(f"LongTermMemory.remember failed for user={user_id}: {e}") raise LongTermMemoryError(f"Failed to remember fact: {e}") from e @@ -199,5 +243,4 @@ class LongTermMemoryConfig(BaseModel): tools: tuple[MemoryToolKind, ...] = ( MemoryToolKind.REMEMBER, MemoryToolKind.RECALL, - MemoryToolKind.FORGET, ) diff --git a/dynamiq/memory/long_term/types.py b/dynamiq/memory/long_term/types.py index 2a9ca4c8e..9e6a646f0 100644 --- a/dynamiq/memory/long_term/types.py +++ b/dynamiq/memory/long_term/types.py @@ -2,16 +2,23 @@ class ForgetStatus(str, Enum): - """Outcome of `LongTermMemory.forget()`.""" + """Outcome of `LongTermMemory.forget()` (programmatic API only).""" DELETED = "deleted" NOT_FOUND = "not_found" FORBIDDEN = "forbidden" +class RememberOutcome(str, Enum): + """Outcome of `LongTermMemory.remember()` — distinguishes insert from upsert.""" + + CREATED = "created" + UPDATED = "updated" + UNCHANGED = "unchanged" + + class MemoryToolKind(str, Enum): """Kinds of long-term-memory tools exposed to an agent.""" REMEMBER = "remember" RECALL = "recall" - FORGET = "forget" From 14d5822445771607e2e3c0e150125c4b0e4865a4 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 13:32:29 +0300 Subject: [PATCH 23/53] refactor: drop forget tool and agent-optimize remember/recall outputs --- dynamiq/nodes/tools/long_term_memory.py | 91 ++++++++----------------- 1 file changed, 27 insertions(+), 64 deletions(-) diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index f9cdddae3..1358884c5 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, ConfigDict, Field -from dynamiq.memory.long_term import LongTermMemory, MemoryToolKind +from dynamiq.memory.long_term import LongTermMemory, MemoryToolKind, RememberOutcome from dynamiq.nodes.node import Node, ensure_config from dynamiq.nodes.types import NodeGroup from dynamiq.runnables import RunnableConfig @@ -13,18 +13,22 @@ Key capabilities: - Survives across conversations and sessions — not just this chat -- Idempotent on identical content (same text returns the same fact_id, no duplicates) +- Idempotent on identical content (re-stating the same fact is a no-op) +- Semantic upsert: re-stating a near-paraphrase REPLACES the older version in place, + which is how you correct or update what you previously remembered - Optional structured metadata (e.g. category, source) for later filtering Usage strategy: - Call when the user explicitly says "remember…", "save this…", "keep in mind for next time…" - Call when you have learned something that will clearly matter in future sessions (a stable preference, a constraint, a recurring context, biographical info) +- To CORRECT a previously-saved fact, just call this tool again with the corrected + statement — the older paraphrase is replaced automatically. - Do NOT use for ephemeral turn-level state — that is what the conversation history is for - Do NOT use to remember tool outputs, file paths, or anything tied to this run - The fact is scoped to the current user automatically; never pass or invent a user id -Returns: {"fact_id": ""} — store the id if you may want to forget it later. +Returns: a short status line — "Fact saved.", "Fact updated.", or "Already remembered." Examples: - {"content": "Prefers dogs over cats"} @@ -36,18 +40,18 @@ Key capabilities: - Semantic search (not keyword) — matches meaning, paraphrases, synonyms -- Returns ranked results with similarity scores (1.0 = perfect match) - Scoped to the current user automatically — never crosses users +- Returns the most relevant facts first, as plain text Usage strategy: - Call PROACTIVELY at the start of a turn when the request hints at something personal (preferences, past decisions, biographical info, recurring context) - Call BEFORE answering questions where prior context would change your response -- A low score (< ~0.3) means weakly related — use judgment, do not blindly include -- Each result has a `fact_id` — keep it if you may want to forget that fact later +- If no relevant facts are found, just proceed without them — no need to retry with + different phrasings unless the user's request makes the prior context essential - Skip when the question is purely factual or has no user-specific component -Returns: list of {"fact_id": "", "content": "", "score": }, most relevant first. +Returns: a bullet list of relevant facts (most relevant first), or "No relevant facts." Examples: - {"query": "food preferences"} @@ -55,28 +59,6 @@ - {"query": "timezone or schedule constraints", "limit": 10} """ -FORGET_DESCRIPTION = """Delete a single fact from the user's long-term memory by id. - -Key capabilities: -- Hard delete — the fact is gone, not just hidden -- Cross-user safety: returns "forbidden" if the fact belongs to another user -- Returns "not_found" if the id does not exist (already gone, or never existed) - -Usage strategy: -- Call ONLY when the user explicitly asks to forget something, or you've discovered - a fact is wrong and have no replacement to write -- ALWAYS get the fact_id from a prior `recall_facts` (or the original `remember_fact` - response) — do NOT guess or fabricate an id -- To CORRECT a wrong fact, prefer `remember_fact` with the corrected content first - (the old one stays, but new searches surface the correction); only call `forget_fact` - if the user wants the stale fact removed - -Returns: {"status": "deleted" | "not_found" | "forbidden"}. - -Examples: -- {"fact_id": "8f1c2b40-9d3e-4a8c-9c1f-0d2e3a4b5c6d"} -""" - class RememberFactInputSchema(BaseModel): """LLM-visible input for `remember_fact`. `user_id` is bound at construction.""" @@ -98,12 +80,6 @@ class RecallFactsInputSchema(BaseModel): description="Max facts to return.") -class ForgetFactInputSchema(BaseModel): - """LLM-visible input for `forget_fact`. `user_id` is bound at construction.""" - - fact_id: str = Field(..., description="The id returned by recall_facts or remember_fact.") - - class _LongTermMemoryTool(Node): """Shared base for the long-term memory tools.""" @@ -114,6 +90,13 @@ class _LongTermMemoryTool(Node): user_id: str +_OUTCOME_MESSAGES: dict[RememberOutcome, str] = { + RememberOutcome.CREATED: "Fact saved.", + RememberOutcome.UPDATED: "Fact updated.", + RememberOutcome.UNCHANGED: "Already remembered.", +} + + class RememberFactTool(_LongTermMemoryTool): """Write a fact to long-term memory, scoped to the bound user_id.""" @@ -129,12 +112,14 @@ def execute( check_cancellation(config) self.run_on_node_execute_run(config.callbacks, **kwargs) - fact = self.long_term_memory.remember( + fact, outcome = self.long_term_memory.remember( content=input_data.content, user_id=self.user_id, metadata=input_data.metadata, ) - return {"content": {"fact_id": fact.id}} + if self.is_optimized_for_agents: + return {"content": _OUTCOME_MESSAGES[outcome]} + return {"content": {"fact_id": fact.id, "outcome": outcome.value}} class RecallFactsTool(_LongTermMemoryTool): @@ -157,37 +142,16 @@ def execute( user_id=self.user_id, limit=input_data.limit, ) - return { - "content": [{"fact_id": fact.id, "content": fact.content, "score": round(score, 4)} for fact, score in hits] - } - - -class ForgetFactTool(_LongTermMemoryTool): - """Delete a fact by id, with cross-user guard.""" - - name: str = "forget_fact" - description: str = FORGET_DESCRIPTION - input_schema: ClassVar[type[ForgetFactInputSchema]] = ForgetFactInputSchema - - def execute( - self, input_data: ForgetFactInputSchema, config: RunnableConfig | None = None, **kwargs - ) -> dict[str, Any]: - logger.debug(f"Tool {self.name} - {self.id}: started") - config = ensure_config(config) - check_cancellation(config) - self.run_on_node_execute_run(config.callbacks, **kwargs) - - status = self.long_term_memory.forget( - fact_id=input_data.fact_id, - user_id=self.user_id, - ) - return {"content": {"status": status}} + if self.is_optimized_for_agents: + if not hits: + return {"content": "No relevant facts."} + return {"content": "\n".join(f"- {fact.content}" for fact, _ in hits)} + return {"content": [{"content": fact.content, "score": round(score, 4)} for fact, score in hits]} _TOOL_BUILDERS: dict[MemoryToolKind, type[_LongTermMemoryTool]] = { MemoryToolKind.REMEMBER: RememberFactTool, MemoryToolKind.RECALL: RecallFactsTool, - MemoryToolKind.FORGET: ForgetFactTool, } @@ -198,7 +162,6 @@ def build_long_term_memory_tools( include: tuple[MemoryToolKind | str, ...] = ( MemoryToolKind.REMEMBER, MemoryToolKind.RECALL, - MemoryToolKind.FORGET, ), ) -> list[Node]: """Construct long-term-memory tools with `user_id` baked in. Unknown keys in `include` are ignored.""" From 4c4ffc0d0f4bb3673c76d8740b880aca39211ea8 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 13:33:28 +0300 Subject: [PATCH 24/53] test: cover semantic upsert, agent-optimized outputs, and removed forget tool --- tests/unit/memory/long_term/test_base.py | 11 ++- .../long_term/test_in_memory_backend.py | 47 +++++++++- .../memory/long_term/test_long_term_memory.py | 60 +++++++++---- tests/unit/memory/long_term/test_tools.py | 86 +++++++++++-------- .../test_long_term_memory_integration.py | 12 +-- 5 files changed, 151 insertions(+), 65 deletions(-) diff --git a/tests/unit/memory/long_term/test_base.py b/tests/unit/memory/long_term/test_base.py index 5182aa15a..b97436ab5 100644 --- a/tests/unit/memory/long_term/test_base.py +++ b/tests/unit/memory/long_term/test_base.py @@ -9,10 +9,10 @@ def test_long_term_memory_backend_is_abstract(): LongTermMemoryBackend() -def test_long_term_memory_backend_update_default_raises(): - """Phase 2 reserves `update`; v1 default raises NotImplementedError.""" +def test_long_term_memory_backend_update_is_abstract(): + """Subclasses must implement `update` — semantic upsert depends on it.""" - class TinyBackend(LongTermMemoryBackend): + class MissingUpdate(LongTermMemoryBackend): def insert(self, fact, embedding): ... def get(self, fact_id): return None def get_by_hash(self, *, user_id, content_hash): return None @@ -21,6 +21,5 @@ def search(self, *, query_embedding, scope, limit): return [] def list_by_scope(self, scope, limit=100): return [] def delete_scope(self, scope): return 0 - backend = TinyBackend() - with pytest.raises(NotImplementedError, match="Phase 2"): - backend.update("f1", "x", [0.0]) + with pytest.raises(TypeError, match="abstract"): + MissingUpdate() diff --git a/tests/unit/memory/long_term/test_in_memory_backend.py b/tests/unit/memory/long_term/test_in_memory_backend.py index 1fb6c2599..2c8fdc572 100644 --- a/tests/unit/memory/long_term/test_in_memory_backend.py +++ b/tests/unit/memory/long_term/test_in_memory_backend.py @@ -1,5 +1,6 @@ """Tests for InMemoryLongTermMemoryBackend.""" -from datetime import UTC, datetime + +from datetime import UTC, datetime, timedelta from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact @@ -131,6 +132,50 @@ def test_list_by_scope_respects_limit(fake_embedder): assert len(backend.list_by_scope({"user_id": "u1"}, limit=2)) == 2 +# --- update --- + + +def test_update_replaces_content_hash_embedding_and_timestamp(fake_embedder): + backend = InMemoryLongTermMemoryBackend() + original = _fact("f1", "u1", "hello", content_hash="h-old") + backend.insert(original, fake_embedder.embed("hello")) + + new_time = original.updated_at + timedelta(seconds=5) + backend.update( + "f1", + content="hello world", + content_hash="h-new", + embedding=fake_embedder.embed("hello world"), + updated_at=new_time, + ) + + updated = backend.get("f1") + assert updated.content == "hello world" + assert updated.hash == "h-new" + assert updated.updated_at == new_time + assert updated.id == original.id + assert updated.created_at == original.created_at + + hits = backend.search( + query_embedding=fake_embedder.embed("hello world"), + scope={"user_id": "u1"}, + limit=1, + ) + assert hits[0][0].content == "hello world" + + +def test_update_unknown_is_noop(fake_embedder): + backend = InMemoryLongTermMemoryBackend() + backend.update( + "does-not-exist", + content="x", + content_hash="h", + embedding=fake_embedder.embed("x"), + updated_at=datetime.now(UTC), + ) # must not raise + assert backend.get("does-not-exist") is None + + def test_delete_scope_removes_all_in_scope(fake_embedder): backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) diff --git a/tests/unit/memory/long_term/test_long_term_memory.py b/tests/unit/memory/long_term/test_long_term_memory.py index 7dd5d9b52..63e6f4859 100644 --- a/tests/unit/memory/long_term/test_long_term_memory.py +++ b/tests/unit/memory/long_term/test_long_term_memory.py @@ -1,7 +1,7 @@ """Tests for the LongTermMemory facade.""" import pytest -from dynamiq.memory.long_term import LongTermMemory +from dynamiq.memory.long_term import LongTermMemory, RememberOutcome from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.long_term_memory import LongTermMemoryError @@ -17,40 +17,70 @@ def ltm(fake_embedder): # --- remember --- def test_remember_returns_a_fact_and_persists_it(ltm, user_id): - fact = ltm.remember(content="User likes pizza", user_id=user_id) + fact, outcome = ltm.remember(content="User likes pizza", user_id=user_id) + assert outcome == RememberOutcome.CREATED assert fact.id assert fact.content == "User likes pizza" assert fact.user_id == user_id assert ltm.backend.get(fact.id) == fact -def test_remember_dedups_exact_duplicate_in_same_user(ltm, user_id): - first = ltm.remember(content="User likes pizza", user_id=user_id) - second = ltm.remember(content="User likes pizza", user_id=user_id) +def test_remember_exact_duplicate_returns_unchanged(ltm, user_id): + first, first_outcome = ltm.remember(content="User likes pizza", user_id=user_id) + second, second_outcome = ltm.remember(content="User likes pizza", user_id=user_id) + assert first_outcome == RememberOutcome.CREATED + assert second_outcome == RememberOutcome.UNCHANGED assert first.id == second.id def test_remember_does_not_dedup_across_users(ltm, user_id, other_user_id): - a = ltm.remember(content="User likes pizza", user_id=user_id) - b = ltm.remember(content="User likes pizza", user_id=other_user_id) + a, _ = ltm.remember(content="User likes pizza", user_id=user_id) + b, b_outcome = ltm.remember(content="User likes pizza", user_id=other_user_id) + assert b_outcome == RememberOutcome.CREATED assert a.id != b.id assert a.user_id != b.user_id def test_remember_normalises_whitespace_for_dedup(ltm, user_id): - a = ltm.remember(content=" User likes pizza ", user_id=user_id) - b = ltm.remember(content="USER LIKES PIZZA", user_id=user_id) + a, _ = ltm.remember(content=" User likes pizza ", user_id=user_id) + b, b_outcome = ltm.remember(content="USER LIKES PIZZA", user_id=user_id) + assert b_outcome == RememberOutcome.UNCHANGED assert a.id == b.id +def test_remember_paraphrase_upserts_existing(fake_embedder, user_id): + """With a low threshold, a near-similar fact replaces the earlier one in place.""" + ltm = LongTermMemory( + backend=InMemoryLongTermMemoryBackend(), + embedder=fake_embedder, + upsert_threshold=0.0, + ) + original, _ = ltm.remember(content="User likes pizza", user_id=user_id) + updated, outcome = ltm.remember(content="User loves pizza", user_id=user_id) + + assert outcome == RememberOutcome.UPDATED + assert updated.id == original.id + assert updated.content == "User loves pizza" + assert ltm.backend.get(original.id).content == "User loves pizza" + assert len(ltm.list_all(user_id=user_id)) == 1 + + +def test_remember_distinct_content_inserts_new_when_threshold_high(ltm, user_id): + """Default high threshold (0.85) keeps unrelated facts separate.""" + a, _ = ltm.remember(content="User likes pizza", user_id=user_id) + b, outcome = ltm.remember(content="User dislikes mushrooms", user_id=user_id) + assert outcome == RememberOutcome.CREATED + assert a.id != b.id + assert len(ltm.list_all(user_id=user_id)) == 2 + + def test_remember_rejects_empty_content(ltm, user_id): with pytest.raises(LongTermMemoryError): ltm.remember(content=" ", user_id=user_id) def test_remember_stores_metadata(ltm, user_id): - fact = ltm.remember(content="x", user_id=user_id, - metadata={"category": "preference"}) + fact, _ = ltm.remember(content="x", user_id=user_id, metadata={"category": "preference"}) assert ltm.backend.get(fact.id).metadata == {"category": "preference"} @@ -89,10 +119,10 @@ def test_recall_rejects_empty_query(ltm, user_id): ltm.recall(query=" ", user_id=user_id, limit=5) -# --- forget --- +# --- forget (programmatic API; not exposed to agents) --- def test_forget_deletes_known_fact(ltm, user_id): - fact = ltm.remember(content="x", user_id=user_id) + fact, _ = ltm.remember(content="x", user_id=user_id) assert ltm.forget(fact_id=fact.id, user_id=user_id) == "deleted" assert ltm.backend.get(fact.id) is None @@ -102,7 +132,7 @@ def test_forget_unknown_returns_not_found(ltm, user_id): def test_forget_cross_user_returns_forbidden(ltm, user_id, other_user_id): - fact = ltm.remember(content="x", user_id=user_id) + fact, _ = ltm.remember(content="x", user_id=user_id) result = ltm.forget(fact_id=fact.id, user_id=other_user_id) assert result == "forbidden" assert ltm.backend.get(fact.id) is not None @@ -119,7 +149,7 @@ def test_list_all_returns_user_facts(ltm, user_id, other_user_id): def test_get_returns_fact_by_id(ltm, user_id): - fact = ltm.remember(content="x", user_id=user_id) + fact, _ = ltm.remember(content="x", user_id=user_id) assert ltm.get(fact.id) == fact diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index 3ec7a9ad3..b86ce5144 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -1,4 +1,4 @@ -"""Tests for the three long-term memory tools and the factory. +"""Tests for the long-term memory tools and the factory. These tests do not invoke an LLM — they exercise the Node `execute()` method directly, treating the tool the same way Agent's tool-use loop @@ -8,12 +8,7 @@ from dynamiq.memory.long_term import LongTermMemory from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend -from dynamiq.nodes.tools.long_term_memory import ( - ForgetFactTool, - RecallFactsTool, - RememberFactTool, - build_long_term_memory_tools, -) +from dynamiq.nodes.tools.long_term_memory import RecallFactsTool, RememberFactTool, build_long_term_memory_tools @pytest.fixture @@ -28,6 +23,7 @@ def test_remember_tool_persists_a_fact(ltm, user_id): tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) result = tool.execute(tool.input_schema(content="User likes pizza")) fact_id = result["content"]["fact_id"] + assert result["content"]["outcome"] == "created" assert ltm.get(fact_id).content == "User likes pizza" @@ -49,6 +45,7 @@ def test_remember_tool_idempotent_on_duplicate(ltm, user_id): a = tool.execute(tool.input_schema(content="x")) b = tool.execute(tool.input_schema(content="x")) assert a["content"]["fact_id"] == b["content"]["fact_id"] + assert b["content"]["outcome"] == "unchanged" def test_remember_tool_accepts_metadata(ltm, user_id): @@ -59,6 +56,33 @@ def test_remember_tool_accepts_metadata(ltm, user_id): assert fact.metadata == {"category": "preference"} +def test_remember_tool_agent_optimized_returns_status_string(ltm, user_id): + """Agent-mode output is a short human-readable status, not a dict.""" + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + tool.is_optimized_for_agents = True + + created = tool.execute(tool.input_schema(content="User likes pizza")) + assert created["content"] == "Fact saved." + + unchanged = tool.execute(tool.input_schema(content="User likes pizza")) + assert unchanged["content"] == "Already remembered." + + +def test_remember_tool_agent_optimized_reports_update(fake_embedder, user_id): + """Agent-mode upsert renders as 'Fact updated.'""" + ltm = LongTermMemory( + backend=InMemoryLongTermMemoryBackend(), + embedder=fake_embedder, + upsert_threshold=0.0, + ) + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + tool.is_optimized_for_agents = True + + tool.execute(tool.input_schema(content="User likes pizza")) + updated = tool.execute(tool.input_schema(content="User loves pizza")) + assert updated["content"] == "Fact updated." + + # --- RecallFactsTool --- @@ -70,7 +94,7 @@ def test_recall_tool_returns_hits(ltm, user_id): items = result["content"] assert len(items) == 2 for item in items: - assert {"fact_id", "content", "score"} <= set(item.keys()) + assert {"content", "score"} <= set(item.keys()) scores = [it["score"] for it in items] assert scores == sorted(scores, reverse=True) @@ -95,42 +119,30 @@ def test_recall_tool_empty_store_returns_empty(ltm, user_id): assert result["content"] == [] -# --- ForgetFactTool --- - - -def test_forget_tool_deletes_owned_fact(ltm, user_id): - fact = ltm.remember(content="x", user_id=user_id) - tool = ForgetFactTool(long_term_memory=ltm, user_id=user_id) - result = tool.execute(tool.input_schema(fact_id=fact.id)) - assert result["content"]["status"] == "deleted" - assert ltm.get(fact.id) is None - - -def test_forget_tool_returns_not_found_for_unknown_id(ltm, user_id): - tool = ForgetFactTool(long_term_memory=ltm, user_id=user_id) - result = tool.execute(tool.input_schema(fact_id="does-not-exist")) - assert result["content"]["status"] == "not_found" - - -def test_forget_tool_returns_forbidden_on_cross_user(ltm, user_id, other_user_id): - fact = ltm.remember(content="x", user_id=user_id) - attacker = ForgetFactTool(long_term_memory=ltm, user_id=other_user_id) - result = attacker.execute(attacker.input_schema(fact_id=fact.id)) - assert result["content"]["status"] == "forbidden" - assert ltm.get(fact.id) is not None +def test_recall_tool_agent_optimized_returns_bullet_list(ltm, user_id): + ltm.remember(content="User likes pizza", user_id=user_id) + ltm.remember(content="User likes Python", user_id=user_id) + tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) + tool.is_optimized_for_agents = True + result = tool.execute(tool.input_schema(query="pizza", limit=2)) + assert isinstance(result["content"], str) + assert "- User likes pizza" in result["content"] + assert "- User likes Python" in result["content"] -def test_forget_tool_input_schema_has_no_user_id(): - assert "user_id" not in ForgetFactTool.input_schema.model_fields - assert "fact_id" in ForgetFactTool.input_schema.model_fields +def test_recall_tool_agent_optimized_empty_message(ltm, user_id): + tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) + tool.is_optimized_for_agents = True + result = tool.execute(tool.input_schema(query="anything")) + assert result["content"] == "No relevant facts." # --- factory --- -def test_factory_builds_all_three_by_default(ltm, user_id): +def test_factory_builds_default_two_tools(ltm, user_id): tools = build_long_term_memory_tools(long_term_memory=ltm, user_id=user_id) - assert {t.name for t in tools} == {"remember_fact", "recall_facts", "forget_fact"} + assert {t.name for t in tools} == {"remember_fact", "recall_facts"} def test_factory_respects_include(ltm, user_id): @@ -149,6 +161,6 @@ def test_factory_bakes_user_id_into_each_tool(ltm, user_id): def test_factory_ignores_unknown_include_keys(ltm, user_id): tools = build_long_term_memory_tools( long_term_memory=ltm, user_id=user_id, - include=("recall", "unknown"), + include=("recall", "unknown", "forget"), ) assert [t.name for t in tools] == ["recall_facts"] diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 82722196f..7f576b272 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -72,8 +72,8 @@ def _input(user_id=None, session_id=None): # --- LongTermMemoryConfig --- -def test_config_default_includes_all_three_tools(): - assert LongTermMemoryConfig().tools == ("remember", "recall", "forget") +def test_config_default_includes_remember_and_recall(): + assert LongTermMemoryConfig().tools == ("remember", "recall") def test_config_can_restrict_to_read_only(): @@ -93,16 +93,16 @@ def test_agent_has_long_term_memory_fields(): def test_agent_long_term_memory_defaults_to_none(llm): agent = _make_agent(llm) assert agent.long_term_memory is None - assert agent.long_term_memory_config.tools == ("remember", "recall", "forget") + assert agent.long_term_memory_config.tools == ("remember", "recall") # --- _build_long_term_memory_tools --- -def test_build_returns_three_tools_when_ltm_and_user_id_present(llm, ltm): +def test_build_returns_default_tools_when_ltm_and_user_id_present(llm, ltm): agent = _make_agent(llm, ltm=ltm) tools = agent._build_long_term_memory_tools(_input(user_id="u1")) - assert {t.name for t in tools} == {"remember_fact", "recall_facts", "forget_fact"} + assert {t.name for t in tools} == {"remember_fact", "recall_facts"} def test_build_returns_empty_when_no_user_id(llm, ltm): @@ -147,7 +147,7 @@ def test_execute_attaches_ltm_tools_during_run_and_restores_after(llm, ltm): with _patch_run_agent_capture_tools(agent, captured): agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) - assert {"remember_fact", "recall_facts", "forget_fact"} <= {t.name for t in captured} + assert {"remember_fact", "recall_facts"} <= {t.name for t in captured} assert agent.tools == original_tools From c415579de200d8a6280c1bb70553e49e9d0bf1be Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 14:02:16 +0300 Subject: [PATCH 25/53] chore: remove redundant module-level docstrings --- tests/integration_with_creds/memory/conftest.py | 1 - .../memory/test_pgvector_fact_backend.py | 5 ----- .../memory/test_qdrant_fact_backend.py | 7 ------- tests/unit/memory/long_term/test_base.py | 1 - tests/unit/memory/long_term/test_in_memory_backend.py | 2 -- tests/unit/memory/long_term/test_long_term_memory.py | 1 - tests/unit/memory/long_term/test_schemas.py | 1 - tests/unit/memory/long_term/test_tools.py | 6 ------ .../nodes/agents/test_long_term_memory_integration.py | 9 --------- 9 files changed, 33 deletions(-) diff --git a/tests/integration_with_creds/memory/conftest.py b/tests/integration_with_creds/memory/conftest.py index 1d43b5dd7..291441f7c 100644 --- a/tests/integration_with_creds/memory/conftest.py +++ b/tests/integration_with_creds/memory/conftest.py @@ -1,4 +1,3 @@ -"""Shared fixtures for long-term memory integration tests.""" import hashlib import pytest diff --git a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py index 518b5ab57..7096fe57f 100644 --- a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py +++ b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py @@ -1,8 +1,3 @@ -"""Integration tests for PostgresLongTermMemoryBackend. - -Requires `POSTGRES_DSN` to point at a live Postgres with the pgvector -extension installed. Without it, this whole module skips. -""" import os from datetime import UTC, datetime from urllib.parse import urlparse diff --git a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py index a6459af2a..4642f79aa 100644 --- a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py +++ b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py @@ -1,10 +1,3 @@ -"""Integration tests for QdrantLongTermMemoryBackend. - -Requires a reachable Qdrant. Set `QDRANT_URL` (defaults to -`http://localhost:6333`). Without one, this whole module skips. - -Run a local Qdrant with: `docker run -d -p 6333:6333 qdrant/qdrant`. -""" import os from datetime import UTC, datetime diff --git a/tests/unit/memory/long_term/test_base.py b/tests/unit/memory/long_term/test_base.py index b97436ab5..5240525f8 100644 --- a/tests/unit/memory/long_term/test_base.py +++ b/tests/unit/memory/long_term/test_base.py @@ -1,4 +1,3 @@ -"""Tests for LongTermMemoryBackend ABC.""" import pytest from dynamiq.memory.long_term.base import LongTermMemoryBackend diff --git a/tests/unit/memory/long_term/test_in_memory_backend.py b/tests/unit/memory/long_term/test_in_memory_backend.py index 2c8fdc572..6b5ecae2c 100644 --- a/tests/unit/memory/long_term/test_in_memory_backend.py +++ b/tests/unit/memory/long_term/test_in_memory_backend.py @@ -1,5 +1,3 @@ -"""Tests for InMemoryLongTermMemoryBackend.""" - from datetime import UTC, datetime, timedelta from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend diff --git a/tests/unit/memory/long_term/test_long_term_memory.py b/tests/unit/memory/long_term/test_long_term_memory.py index 63e6f4859..f455cb182 100644 --- a/tests/unit/memory/long_term/test_long_term_memory.py +++ b/tests/unit/memory/long_term/test_long_term_memory.py @@ -1,4 +1,3 @@ -"""Tests for the LongTermMemory facade.""" import pytest from dynamiq.memory.long_term import LongTermMemory, RememberOutcome diff --git a/tests/unit/memory/long_term/test_schemas.py b/tests/unit/memory/long_term/test_schemas.py index 5182611de..820cddef8 100644 --- a/tests/unit/memory/long_term/test_schemas.py +++ b/tests/unit/memory/long_term/test_schemas.py @@ -1,4 +1,3 @@ -"""Tests for long-term memory pydantic schemas.""" from datetime import UTC, datetime from dynamiq.memory.long_term.schemas import Fact diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index b86ce5144..a22611d80 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -1,9 +1,3 @@ -"""Tests for the long-term memory tools and the factory. - -These tests do not invoke an LLM — they exercise the Node `execute()` -method directly, treating the tool the same way Agent's tool-use loop -would after the model emits a tool call. -""" import pytest from dynamiq.memory.long_term import LongTermMemory diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 7f576b272..a0c402dd7 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -1,12 +1,3 @@ -"""Tests for Agent long-term memory integration. - -Covers `_build_long_term_memory_tools` (the per-run tool-construction -helper) and the snapshot/restore behavior of `self.tools` across an -`execute()` call. - -The execute-level tests mock `_run_agent` so we don't need a real LLM -backend response — we only verify the agent-loop bookkeeping. -""" import hashlib from types import SimpleNamespace from typing import ClassVar From 9112388031aac349257b4c676450df5a257b80ee Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 14:49:21 +0300 Subject: [PATCH 26/53] fix: exclude long_term_memory from LTM tool to_dict to avoid serializing live clients --- dynamiq/nodes/tools/long_term_memory.py | 9 +++++++++ tests/unit/memory/long_term/test_tools.py | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index 1358884c5..b7dea02f9 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -89,6 +89,15 @@ class _LongTermMemoryTool(Node): long_term_memory: LongTermMemory user_id: str + @property + def to_dict_exclude_params(self) -> dict[str, Any]: + return super().to_dict_exclude_params | {"long_term_memory": True} + + def to_dict(self, include_secure_params: bool = False, **kwargs) -> dict[str, Any]: + data = super().to_dict(include_secure_params=include_secure_params, **kwargs) + data["long_term_memory"] = self.long_term_memory.to_dict(include_secure_params=include_secure_params, **kwargs) + return data + _OUTCOME_MESSAGES: dict[RememberOutcome, str] = { RememberOutcome.CREATED: "Fact saved.", diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index a22611d80..441d929e8 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -158,3 +158,22 @@ def test_factory_ignores_unknown_include_keys(ltm, user_id): include=("recall", "unknown", "forget"), ) assert [t.name for t in tools] == ["recall_facts"] + + +# --- serialization --- + + +def test_remember_tool_to_dict_round_trips_long_term_memory(ltm, user_id): + """`to_dict` must not auto-dump `long_term_memory` (it holds runtime clients). + + The default `model_dump` would try to JSON-encode the embedder's connection + and the backend's live client, blowing up tracing callbacks. The tool base + excludes the field and re-adds it via `LongTermMemory.to_dict()`. + """ + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + data = tool.to_dict() + assert "long_term_memory" in data + ltm_dump = data["long_term_memory"] + assert isinstance(ltm_dump, dict) + assert "backend" in ltm_dump and isinstance(ltm_dump["backend"], dict) + assert "embedder" in ltm_dump and isinstance(ltm_dump["embedder"], dict) From 537fba4ff04528a56979254a41ad5cdd3cf446fb Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 14:49:28 +0300 Subject: [PATCH 27/53] fix: drop 10k cap in qdrant delete_scope by using count + delete-by-filter --- dynamiq/memory/long_term/backends/qdrant.py | 22 ++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index 84805d802..e81049328 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -8,6 +8,7 @@ Distance, FieldCondition, Filter, + FilterSelector, MatchValue, PointIdsList, PointStruct, @@ -211,14 +212,21 @@ def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: return [_payload_to_fact(p.payload) for p in points] def delete_scope(self, scope: dict[str, str]) -> int: - # Qdrant delete-by-filter returns no count, so enumerate ids first. - in_scope = self.list_by_scope(scope, limit=10_000) - if not in_scope: + # Qdrant delete-by-filter is atomic but returns no count, so count + # first then delete. `count(exact=True)` is one round-trip and avoids + # the 10k cap a paginated scroll-then-delete would silently hit. + scope_filter = _scope_to_filter(scope) + if scope_filter is None: + return 0 + total = self._client.count( + collection_name=self.collection_name, + count_filter=scope_filter, + exact=True, + ).count + if total == 0: return 0 self._client.delete( collection_name=self.collection_name, - points_selector=PointIdsList( - points=[_to_point_id(f.id) for f in in_scope] - ), + points_selector=FilterSelector(filter=scope_filter), ) - return len(in_scope) + return total From a4ff448ad6b061544e48aae961bccd418da20f68 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 15:10:09 +0300 Subject: [PATCH 28/53] fix: serialize LongTermMemoryConfig.tools as strings for yaml round-trip --- dynamiq/memory/long_term/long_term_memory.py | 8 +++++++- .../agents/test_long_term_memory_integration.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index 7c4bca891..c85a86c12 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -4,7 +4,7 @@ from typing import Any from uuid import uuid4 -from pydantic import BaseModel, ConfigDict, Field, computed_field +from pydantic import BaseModel, ConfigDict, Field, computed_field, field_serializer from dynamiq.memory.long_term.backends import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.base import LongTermMemoryBackend @@ -244,3 +244,9 @@ class LongTermMemoryConfig(BaseModel): MemoryToolKind.REMEMBER, MemoryToolKind.RECALL, ) + + @field_serializer("tools") + def _serialize_tools(self, tools: tuple[MemoryToolKind, ...]) -> tuple[str, ...]: + # Emit plain string values so YAML round-trip and tracing work; pydantic + # default-mode dump returns enum members which yaml.safe_dump cannot render. + return tuple(t.value for t in tools) diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index a0c402dd7..b50c80c41 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -71,6 +71,18 @@ def test_config_can_restrict_to_read_only(): assert LongTermMemoryConfig(tools=("recall",)).tools == ("recall",) +def test_config_model_dump_emits_plain_strings_not_enums(): + """YAML round-trip relies on tool kinds being dumped as their string values, + not as enum members (which yaml.safe_dump cannot represent and which would + round-trip back as the enum *name* — 'REMEMBER' — failing validation).""" + import yaml + + dumped = LongTermMemoryConfig().model_dump() + assert dumped == {"tools": ("remember", "recall")} + assert all(isinstance(t, str) and not hasattr(t, "value") for t in dumped["tools"]) + yaml.safe_dump(dumped) # must not raise + + # --- Agent field declarations --- From d98ef291f40e6a0ad9bd66c7c2b3fbdf15bf1ae0 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 15:25:37 +0300 Subject: [PATCH 29/53] fix: restore Agent.tools even when prep steps before _run_agent raise --- dynamiq/nodes/agents/base.py | 205 +++++++++--------- .../test_long_term_memory_integration.py | 13 ++ 2 files changed, 118 insertions(+), 100 deletions(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 290fa8e77..b60f2a91e 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -646,125 +646,130 @@ def execute( ", ".join(t.name for t in ltm_tools), ) - if use_memory: - history_messages = self._retrieve_memory(input_data) - if len(history_messages) > 0: - history_messages.insert( - 0, - Message( - role=MessageRole.SYSTEM, - content="Below is the previous conversation history. " - "Use this context to inform your response.", - static=True, - ), - ) - else: - history_messages = None - - files = input_data.files - if files: - normalized_files = self._ensure_named_files(files) - file_paths = [] - if self.sandbox_backend: - file_paths = self._upload_files_to_sandbox(normalized_files) + # Wrap everything after the mutation in try/finally so `self.tools` is + # always restored — including when prep steps (memory retrieval, file + # upload, prompt-variable update) raise before reaching `_run_agent`. + try: + if use_memory: + history_messages = self._retrieve_memory(input_data) + if len(history_messages) > 0: + history_messages.insert( + 0, + Message( + role=MessageRole.SYSTEM, + content="Below is the previous conversation history. " + "Use this context to inform your response.", + static=True, + ), + ) else: - if not self.file_store_backend: - self._setup_in_memory_file_store_and_tools() - if self.file_store_backend: - file_paths = self._upload_files_to_file_store(normalized_files) - input_message = self._inject_attached_files_into_message( - input_message, normalized_files, file_paths=file_paths - ) + history_messages = None - if input_data.tool_params: - kwargs["tool_params"] = input_data.tool_params + files = input_data.files + if files: + normalized_files = self._ensure_named_files(files) + file_paths = [] + if self.sandbox_backend: + file_paths = self._upload_files_to_sandbox(normalized_files) + else: + if not self.file_store_backend: + self._setup_in_memory_file_store_and_tools() + if self.file_store_backend: + file_paths = self._upload_files_to_file_store(normalized_files) + input_message = self._inject_attached_files_into_message( + input_message, normalized_files, file_paths=file_paths + ) - self.system_prompt_manager.update_variables(dict(input_data)) - kwargs = kwargs | {"parent_run_id": kwargs.get("run_id")} - kwargs.pop("run_depends", None) + if input_data.tool_params: + kwargs["tool_params"] = input_data.tool_params - try: - result = self._run_agent(input_message, history_messages, config=config, **kwargs) - except CanceledException: - if use_memory: - try: - self._save_history_to_memory(custom_metadata) - except Exception as save_error: - logger.error( - f"Agent {self.name} - {self.id}: failed to save history to memory " - f"after cancel: {save_error}", - ) + self.system_prompt_manager.update_variables(dict(input_data)) + kwargs = kwargs | {"parent_run_id": kwargs.get("run_id")} + kwargs.pop("run_depends", None) + + try: + result = self._run_agent(input_message, history_messages, config=config, **kwargs) + except CanceledException: + if use_memory: + try: + self._save_history_to_memory(custom_metadata) + except Exception as save_error: + logger.error( + f"Agent {self.name} - {self.id}: failed to save history to memory " + f"after cancel: {save_error}", + ) + try: + self._append_user_input_to_memory(custom_metadata) + except Exception as save_error2: + logger.error( + f"Agent {self.name} - {self.id}: also failed to save user input " + f"after cancel: {save_error2}", + ) + raise + except Exception: + if use_memory: try: self._append_user_input_to_memory(custom_metadata) - except Exception as save_error2: + except Exception as save_error: logger.error( - f"Agent {self.name} - {self.id}: also failed to save user input " - f"after cancel: {save_error2}", + f"Agent {self.name} - {self.id}: failed to save user input to memory " + f"after agent error: {save_error}", ) - raise - except Exception: + raise + finally: + self._current_call_context = None + self._clear_todos_file() + if use_memory: try: - self._append_user_input_to_memory(custom_metadata) + self._save_history_to_memory(custom_metadata, final_output=result) except Exception as save_error: logger.error( - f"Agent {self.name} - {self.id}: failed to save user input to memory " - f"after agent error: {save_error}", + "Agent %s - %s: failed to save history to memory: %s", + self.name, + self.id, + save_error, ) - raise - finally: - self._current_call_context = None - self._clear_todos_file() - if ltm_tools: - self.tools = _tools_before_ltm - - if use_memory: - try: - self._save_history_to_memory(custom_metadata, final_output=result) - except Exception as save_error: - logger.error( - "Agent %s - %s: failed to save history to memory: %s", - self.name, - self.id, - save_error, - ) - execution_result = { - "content": result, - } + execution_result = { + "content": result, + } - requested_paths = getattr(self, "_requested_output_files", None) + requested_paths = getattr(self, "_requested_output_files", None) - if self.file_store_backend and requested_paths: - try: - stored_files = self.file_store_backend.list_files_bytes(requested_paths) - except Exception as e: - logger.warning(f"Agent {self.name} - {self.id}: failed to collect files from file store: {e}") - stored_files = [] - if stored_files: - execution_result["files"] = stored_files - logger.info( - f"Agent {self.name} - {self.id}: " - f"returning {len(stored_files)} requested file(s) from file store" - ) + if self.file_store_backend and requested_paths: + try: + stored_files = self.file_store_backend.list_files_bytes(requested_paths) + except Exception as e: + logger.warning(f"Agent {self.name} - {self.id}: failed to collect files from file store: {e}") + stored_files = [] + if stored_files: + execution_result["files"] = stored_files + logger.info( + f"Agent {self.name} - {self.id}: " + f"returning {len(stored_files)} requested file(s) from file store" + ) - if self.sandbox_backend and requested_paths: - try: - sandbox_files = self.sandbox_backend.collect_files(file_paths=requested_paths) - except Exception as e: - logger.warning(f"Agent {self.name} - {self.id}: failed to collect files from sandbox: {e}") - sandbox_files = [] - if sandbox_files: - existing_files = execution_result.get("files", []) - execution_result["files"] = existing_files + sandbox_files - logger.info( - f"Agent {self.name} - {self.id}: " - f"returning {len(sandbox_files)} requested file(s) from sandbox" - ) + if self.sandbox_backend and requested_paths: + try: + sandbox_files = self.sandbox_backend.collect_files(file_paths=requested_paths) + except Exception as e: + logger.warning(f"Agent {self.name} - {self.id}: failed to collect files from sandbox: {e}") + sandbox_files = [] + if sandbox_files: + existing_files = execution_result.get("files", []) + execution_result["files"] = existing_files + sandbox_files + logger.info( + f"Agent {self.name} - {self.id}: " + f"returning {len(sandbox_files)} requested file(s) from sandbox" + ) - logger.info(f"Node {self.name} - {self.id}: finished with RESULT:\n{str(result)[:200]}...") + logger.info(f"Node {self.name} - {self.id}: finished with RESULT:\n{str(result)[:200]}...") - return execution_result + return execution_result + finally: + if ltm_tools: + self.tools = _tools_before_ltm def retrieve_conversation_history( self, diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index b50c80c41..8c0a6e4ac 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -187,3 +187,16 @@ def test_execute_does_not_mutate_tools_when_no_long_term_memory(llm): assert {t.name for t in captured} == {t.name for t in original_tools} assert agent.tools == original_tools + + +def test_execute_restores_tools_when_prep_step_raises_before_run(llm, ltm): + """Regression: prep code between the LTM mutation and the inner try block + (memory retrieval, file upload, prompt-variable update) used to leak + appended tools if it raised. The outer try/finally must catch that path.""" + agent = _make_agent(llm, ltm=ltm) + original_tools = list(agent.tools) + + with patch.object(agent.system_prompt_manager, "update_variables", side_effect=RuntimeError("prep boom")): + agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) + + assert agent.tools == original_tools From d6b14f3c17aa15562047bd16c2bbd3ff11ca6ad2 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 15:28:05 +0300 Subject: [PATCH 30/53] fix: remove comments --- dynamiq/nodes/agents/base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index b60f2a91e..cee09b3ff 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -646,9 +646,6 @@ def execute( ", ".join(t.name for t in ltm_tools), ) - # Wrap everything after the mutation in try/finally so `self.tools` is - # always restored — including when prep steps (memory retrieval, file - # upload, prompt-variable update) raise before reaching `_run_agent`. try: if use_memory: history_messages = self._retrieve_memory(input_data) From ff9a5c09c23792a25edad76a79aeaaeeef2c998a Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 15:44:21 +0300 Subject: [PATCH 31/53] fix: qdrant delete_scope deletes all on empty scope --- dynamiq/memory/long_term/backends/qdrant.py | 6 +++--- tests/unit/memory/long_term/test_in_memory_backend.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index e81049328..ededcc2ff 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -215,9 +215,9 @@ def delete_scope(self, scope: dict[str, str]) -> int: # Qdrant delete-by-filter is atomic but returns no count, so count # first then delete. `count(exact=True)` is one round-trip and avoids # the 10k cap a paginated scroll-then-delete would silently hit. - scope_filter = _scope_to_filter(scope) - if scope_filter is None: - return 0 + # Empty scope = "match everything" — same contract as the in-memory + # and pgvector backends — so we use an empty Filter() rather than None. + scope_filter = _scope_to_filter(scope) or Filter() total = self._client.count( collection_name=self.collection_name, count_filter=scope_filter, diff --git a/tests/unit/memory/long_term/test_in_memory_backend.py b/tests/unit/memory/long_term/test_in_memory_backend.py index 6b5ecae2c..8c2705529 100644 --- a/tests/unit/memory/long_term/test_in_memory_backend.py +++ b/tests/unit/memory/long_term/test_in_memory_backend.py @@ -183,3 +183,13 @@ def test_delete_scope_removes_all_in_scope(fake_embedder): assert deleted == 2 assert backend.list_by_scope({"user_id": "u1"}) == [] assert len(backend.list_by_scope({"user_id": "u2"})) == 1 + + +def test_delete_scope_empty_scope_deletes_everything(fake_embedder): + """Contract: empty scope = "match every fact" — same for all backends.""" + backend = InMemoryLongTermMemoryBackend() + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u2", "b"), fake_embedder.embed("b")) + deleted = backend.delete_scope({}) + assert deleted == 2 + assert backend.list_by_scope({}) == [] From b83b5c97ffce08b5f79d98d409d7e22d4c52cadd Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 20:09:27 +0300 Subject: [PATCH 32/53] fix: update comments --- dynamiq/memory/long_term/backends/pgvector.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index d35a18620..25bf6aab8 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -86,8 +86,6 @@ def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False def model_post_init(self, __context) -> None: self._conn = self.connection.connect() self._conn.autocommit = True - # CREATE EXTENSION must run BEFORE register_vector, otherwise the - # type adapter has nothing to bind to ("vector type not found"). with self._conn.cursor() as cur: cur.execute(_CREATE_EXTENSION_SQL) register_vector(self._conn) From 2fe0c560bb9fbf214da3282911de677573a2a4b4 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 23:36:41 +0300 Subject: [PATCH 33/53] fix: preserve tools added mid-run when removing LTM tools --- dynamiq/nodes/agents/base.py | 9 ++++-- .../test_long_term_memory_integration.py | 31 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index cee09b3ff..d16115c4c 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -635,9 +635,8 @@ def execute( use_memory = self.memory and (input_data.user_id or input_data.session_id) ltm_tools = self._build_long_term_memory_tools(input_data) - _tools_before_ltm = self.tools if ltm_tools: - self.tools = list(_tools_before_ltm) + ltm_tools + self.tools = list(self.tools) + ltm_tools logger.info( "Agent %s - %s: attached %d long-term memory tools (%s)", self.name, @@ -766,7 +765,11 @@ def execute( return execution_result finally: if ltm_tools: - self.tools = _tools_before_ltm + # Remove by identity rather than restoring a snapshot so any tools + # appended mid-run (e.g. by `_setup_in_memory_file_store_and_tools`) + # are preserved for subsequent calls. + ltm_ids = {id(t) for t in ltm_tools} + self.tools = [t for t in self.tools if id(t) not in ltm_ids] def retrieve_conversation_history( self, diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 8c0a6e4ac..7487e5882 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -189,6 +189,37 @@ def test_execute_does_not_mutate_tools_when_no_long_term_memory(llm): assert agent.tools == original_tools +def test_execute_preserves_tools_added_mid_run(llm, ltm): + """Tools appended during execution (e.g. by `_setup_in_memory_file_store_and_tools`) + must survive LTM cleanup — we remove LTM tools by identity, not by snapshot restore.""" + from dynamiq.nodes.node import Node + from dynamiq.nodes.types import NodeGroup + + class _FakeFileTool(Node): + group: ClassVar = NodeGroup.TOOLS + name: str = "fake_file_tool" + + def execute(self, input_data=None, config=None, **kwargs): + return {"content": "ok"} + + agent = _make_agent(llm, ltm=ltm) + original_tools = list(agent.tools) + injected = _FakeFileTool() + + def fake_run(*args, **kwargs): + # Simulate `_setup_in_memory_file_store_and_tools` mutating self.tools + # during the run window — same pattern as the real file-store setup. + agent.tools = list(agent.tools) + [injected] + return "ok" + + with patch.object(agent, "_run_agent", side_effect=fake_run): + agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) + + assert injected in agent.tools + assert all(t.name not in {"remember_fact", "recall_facts"} for t in agent.tools) + assert agent.tools == original_tools + [injected] + + def test_execute_restores_tools_when_prep_step_raises_before_run(llm, ltm): """Regression: prep code between the LTM mutation and the inner try block (memory retrieval, file upload, prompt-variable update) used to leak From 446a94a2d85d2134ce68c4212c0c4a0faa6e542d Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Wed, 27 May 2026 23:44:51 +0300 Subject: [PATCH 34/53] fix: qdrant delete_scope returns exact deleted count via scroll+delete-by-ids --- dynamiq/memory/long_term/backends/qdrant.py | 33 +++++++++++++-------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index ededcc2ff..f1eff8d80 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -8,7 +8,6 @@ Distance, FieldCondition, Filter, - FilterSelector, MatchValue, PointIdsList, PointStruct, @@ -212,21 +211,31 @@ def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: return [_payload_to_fact(p.payload) for p in points] def delete_scope(self, scope: dict[str, str]) -> int: - # Qdrant delete-by-filter is atomic but returns no count, so count - # first then delete. `count(exact=True)` is one round-trip and avoids - # the 10k cap a paginated scroll-then-delete would silently hit. + # Scroll for all matching point ids (paginated, no 10k cap), then delete + # them in one call. Compared to count-then-delete-by-filter this trades + # an extra round-trip for an accurate count of what we actually removed + # — the count+delete variant could diverge under concurrent writes. # Empty scope = "match everything" — same contract as the in-memory # and pgvector backends — so we use an empty Filter() rather than None. scope_filter = _scope_to_filter(scope) or Filter() - total = self._client.count( - collection_name=self.collection_name, - count_filter=scope_filter, - exact=True, - ).count - if total == 0: + ids: list = [] + offset = None + while True: + points, offset = self._client.scroll( + collection_name=self.collection_name, + scroll_filter=scope_filter, + limit=1000, + offset=offset, + with_payload=False, + with_vectors=False, + ) + ids.extend(p.id for p in points) + if offset is None: + break + if not ids: return 0 self._client.delete( collection_name=self.collection_name, - points_selector=FilterSelector(filter=scope_filter), + points_selector=PointIdsList(points=ids), ) - return total + return len(ids) From f8955a6389e0b20a8e5f4d0cb713579dac2f1fe7 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Thu, 28 May 2026 01:37:17 +0300 Subject: [PATCH 35/53] fix: propagate include_secure_params through LTM backends to connection --- dynamiq/connections/connections.py | 5 +++++ dynamiq/memory/long_term/backends/pgvector.py | 4 +++- dynamiq/memory/long_term/backends/qdrant.py | 4 +++- tests/unit/memory/long_term/test_tools.py | 9 +++++++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/dynamiq/connections/connections.py b/dynamiq/connections/connections.py index 746aa0df6..1b1157670 100644 --- a/dynamiq/connections/connections.py +++ b/dynamiq/connections/connections.py @@ -69,6 +69,11 @@ def to_dict(self, for_tracing: bool = False, **kwargs) -> dict: Returns: dict: A dictionary representation of the connection instance. """ + # Swallow `include_secure_params` if a caller forwards it down — the + # connection always serializes its credential fields (subject only to + # `for_tracing`), so the flag has no effect here but must not leak to + # `model_dump`, which raises on unknown kwargs. + kwargs.pop("include_secure_params", None) if for_tracing: return {"id": self.id, "type": self.type} return self.model_dump(**kwargs) diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index 25bf6aab8..5b446b07b 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -80,7 +80,9 @@ def to_dict_exclude_params(self) -> dict[str, bool]: def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: exclude = kwargs.pop("exclude", self.to_dict_exclude_params.copy()) data = self.model_dump(exclude=exclude, **kwargs) - data["connection"] = self.connection.to_dict(for_tracing=for_tracing) + data["connection"] = self.connection.to_dict( + for_tracing=for_tracing, include_secure_params=include_secure_params, **kwargs + ) return data def model_post_init(self, __context) -> None: diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index f1eff8d80..8abf6b125 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -84,7 +84,9 @@ def to_dict_exclude_params(self) -> dict[str, bool]: def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: exclude = kwargs.pop("exclude", self.to_dict_exclude_params.copy()) data = self.model_dump(exclude=exclude, **kwargs) - data["connection"] = self.connection.to_dict(for_tracing=for_tracing) + data["connection"] = self.connection.to_dict( + for_tracing=for_tracing, include_secure_params=include_secure_params, **kwargs + ) return data def model_post_init(self, __context) -> None: diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index 441d929e8..0c7c75144 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -177,3 +177,12 @@ def test_remember_tool_to_dict_round_trips_long_term_memory(ltm, user_id): assert isinstance(ltm_dump, dict) assert "backend" in ltm_dump and isinstance(ltm_dump["backend"], dict) assert "embedder" in ltm_dump and isinstance(ltm_dump["embedder"], dict) + + +def test_remember_tool_to_dict_accepts_include_secure_params(ltm, user_id): + """`include_secure_params=True` must propagate through tool → LTM → backend → connection + without raising. Connection.to_dict swallows the kwarg; backends pass it through.""" + tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + data = tool.to_dict(include_secure_params=True) + assert "long_term_memory" in data + assert "backend" in data["long_term_memory"] From bbf083a9d6f4c38c560acaefc205b4a0f057f148 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Thu, 28 May 2026 01:57:43 +0300 Subject: [PATCH 36/53] fix: set is_optimized_for_agents on per-run LTM tools --- dynamiq/nodes/agents/base.py | 8 +++++++- .../nodes/agents/test_long_term_memory_integration.py | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index d16115c4c..5d0bf63e5 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -842,11 +842,17 @@ def _build_long_term_memory_tools(self, input_data: "AgentInputSchema") -> list[ return [] from dynamiq.nodes.tools.long_term_memory import build_long_term_memory_tools - return build_long_term_memory_tools( + tools = build_long_term_memory_tools( long_term_memory=self.long_term_memory, user_id=user_id, include=self.long_term_memory_config.tools, ) + # `init_components` set this on every tool that existed at agent build + # time; LTM tools are constructed lazily per-run and must match so the + # remember/recall outputs render as friendly strings rather than raw dicts. + for tool in tools: + tool.is_optimized_for_agents = True + return tools def _is_input_output_trace_message(self, message: Message) -> bool: """Return True when a message is an internal ReAct/tool-trace entry.""" diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 7487e5882..4ee049b3d 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -131,6 +131,15 @@ def test_build_bakes_user_id_into_each_tool(llm, ltm): assert tool.user_id == "u1" +def test_build_sets_is_optimized_for_agents_on_each_tool(llm, ltm): + """LTM tools are built per-run, after `init_components` has run, so the agent + must flip `is_optimized_for_agents` itself — otherwise remember/recall would + return raw dicts instead of the friendly status strings the LLM expects.""" + agent = _make_agent(llm, ltm=ltm) + tools = agent._build_long_term_memory_tools(_input(user_id="u1")) + assert tools and all(t.is_optimized_for_agents for t in tools) + + # --- execute() splice: snapshot/restore self.tools --- From 0eade8fc351ccb687824573f5683a6f7bcc12002 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Thu, 28 May 2026 02:21:38 +0300 Subject: [PATCH 37/53] fix: init LTM embedder and serialize concurrent LTM tool windows --- dynamiq/nodes/agents/base.py | 26 +++++++-- .../test_long_term_memory_integration.py | 56 +++++++++++++++++++ 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 5d0bf63e5..81ad5c27e 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -3,6 +3,7 @@ import re from copy import deepcopy from enum import Enum +from threading import RLock from typing import Any, Callable, ClassVar, Union from uuid import uuid4 @@ -267,6 +268,9 @@ class Agent(AgentIterativeCheckpointMixin, Node): _pinned_input: Message | VisionMessage | None = PrivateAttr(default=None) system_prompt_manager: AgentPromptManager = Field(default_factory=AgentPromptManager) _current_call_context: dict[str, Any] | None = PrivateAttr(default=None) + # Serialises the LTM-tool window in `execute` so concurrent calls on the + # same agent instance don't see each other's per-call tools on `self.tools`. + _ltm_tools_lock: RLock = PrivateAttr(default_factory=RLock) # Loop progress and pending-tool-call state are declared on AgentIterativeCheckpointMixin. model_config = ConfigDict(arbitrary_types_allowed=True) @@ -446,6 +450,12 @@ def init_components(self, connection_manager: ConnectionManager | None = None): tool.init_components(connection_manager) tool.is_optimized_for_agents = True + # The LTM embedder is a ConnectionNode that needs its text_embedder + # client built before the first recall/remember call, otherwise it + # AttributeErrors on a `None` client during `execute`. + if self.long_term_memory and self.long_term_memory.embedder.is_postponed_component_init: + self.long_term_memory.embedder.init_components(connection_manager) + self._ensure_skills_ingested_for_sandbox() def _ensure_skills_ingested_for_sandbox(self) -> None: @@ -636,6 +646,9 @@ def execute( ltm_tools = self._build_long_term_memory_tools(input_data) if ltm_tools: + # Lock acquired here, released in the matching finally below — see + # `_ltm_tools_lock` declaration for the concurrency rationale. + self._ltm_tools_lock.acquire() self.tools = list(self.tools) + ltm_tools logger.info( "Agent %s - %s: attached %d long-term memory tools (%s)", @@ -765,11 +778,14 @@ def execute( return execution_result finally: if ltm_tools: - # Remove by identity rather than restoring a snapshot so any tools - # appended mid-run (e.g. by `_setup_in_memory_file_store_and_tools`) - # are preserved for subsequent calls. - ltm_ids = {id(t) for t in ltm_tools} - self.tools = [t for t in self.tools if id(t) not in ltm_ids] + try: + # Remove by identity rather than restoring a snapshot so any + # tools appended mid-run (e.g. by + # `_setup_in_memory_file_store_and_tools`) are preserved. + ltm_ids = {id(t) for t in ltm_tools} + self.tools = [t for t in self.tools if id(t) not in ltm_ids] + finally: + self._ltm_tools_lock.release() def retrieve_conversation_history( self, diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 4ee049b3d..5b23d59e2 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -140,6 +140,27 @@ def test_build_sets_is_optimized_for_agents_on_each_tool(llm, ltm): assert tools and all(t.is_optimized_for_agents for t in tools) +def test_init_components_initializes_ltm_embedder(llm): + """The embedder is a ConnectionNode whose `text_embedder` client is built + during `init_components`; without that, the first recall AttributeErrors + on a None client.""" + init_calls: list = [] + + class _RecordingEmbedder(_FakeEmbedder): + is_postponed_component_init: bool = True + + def init_components(self, connection_manager=None): + init_calls.append(connection_manager) + + ltm_with_postponed = LongTermMemory(backend=InMemoryLongTermMemoryBackend(), embedder=_RecordingEmbedder()) + agent = _make_agent(llm, ltm=ltm_with_postponed) + # Node.__init__ already invokes init_components on construction; clear and + # assert the explicit call also propagates to the embedder. + init_calls.clear() + agent.init_components() + assert len(init_calls) == 1 + + # --- execute() splice: snapshot/restore self.tools --- @@ -229,6 +250,41 @@ def fake_run(*args, **kwargs): assert agent.tools == original_tools + [injected] +def test_concurrent_execute_calls_isolate_per_user_ltm_tools(llm, ltm): + """Two concurrent execute calls with different user_ids must each observe + only their own LTM tools — the per-agent `_ltm_tools_lock` serialises the + mutation window so the user-scoped tools never leak across calls.""" + import threading + from concurrent.futures import ThreadPoolExecutor + + agent = _make_agent(llm, ltm=ltm) + original_tools = list(agent.tools) + snapshots: dict[str, set[str]] = {} + snapshots_lock = threading.Lock() + + def fake_run(*args, **kwargs): + bound_user_ids = {t.user_id for t in agent.tools if hasattr(t, "user_id")} + assert len(bound_user_ids) == 1, f"cross-user leakage: {bound_user_ids}" + (uid,) = bound_user_ids + with snapshots_lock: + snapshots[uid] = {t.name for t in agent.tools if hasattr(t, "user_id")} + return "ok" + + with patch.object(agent, "_run_agent", side_effect=fake_run): + with ThreadPoolExecutor(max_workers=2) as pool: + futures = [ + pool.submit(agent.run_sync, input_data={"input": "hi", "user_id": "u1"}), + pool.submit(agent.run_sync, input_data={"input": "hi", "user_id": "u2"}), + ] + for f in futures: + f.result(timeout=10) + + assert set(snapshots.keys()) == {"u1", "u2"} + for tool_names in snapshots.values(): + assert tool_names == {"remember_fact", "recall_facts"} + assert agent.tools == original_tools + + def test_execute_restores_tools_when_prep_step_raises_before_run(llm, ltm): """Regression: prep code between the LTM mutation and the inner try block (memory retrieval, file upload, prompt-variable update) used to leak From c7ce37d9f220827b3c6c2f565dce2a8055633863 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Thu, 28 May 2026 03:37:43 +0300 Subject: [PATCH 38/53] fix: acquire LTM lock directly before try block to prevent leak on raise --- dynamiq/nodes/agents/base.py | 24 ++++++++++--------- .../test_long_term_memory_integration.py | 24 +++++++++++++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 81ad5c27e..1f03e6460 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -645,20 +645,22 @@ def execute( use_memory = self.memory and (input_data.user_id or input_data.session_id) ltm_tools = self._build_long_term_memory_tools(input_data) + # Acquire on the line immediately before `try:` so nothing between the + # acquire and the matching finally can raise and leak the lock — see + # `_ltm_tools_lock` declaration for the concurrency rationale. if ltm_tools: - # Lock acquired here, released in the matching finally below — see - # `_ltm_tools_lock` declaration for the concurrency rationale. self._ltm_tools_lock.acquire() - self.tools = list(self.tools) + ltm_tools - logger.info( - "Agent %s - %s: attached %d long-term memory tools (%s)", - self.name, - self.id, - len(ltm_tools), - ", ".join(t.name for t in ltm_tools), - ) - try: + if ltm_tools: + self.tools = list(self.tools) + ltm_tools + logger.info( + "Agent %s - %s: attached %d long-term memory tools (%s)", + self.name, + self.id, + len(ltm_tools), + ", ".join(t.name for t in ltm_tools), + ) + if use_memory: history_messages = self._retrieve_memory(input_data) if len(history_messages) > 0: diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 5b23d59e2..fdf2564b0 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -250,6 +250,30 @@ def fake_run(*args, **kwargs): assert agent.tools == original_tools + [injected] +def test_ltm_lock_released_when_post_acquire_mutation_raises(llm, ltm): + """Anything between lock-acquire and run can raise (list creation, logger + call). It must still release the lock — otherwise the next LTM-enabled + execute on this agent would block forever waiting on it.""" + from dynamiq.nodes.agents.base import logger as base_logger + + agent = _make_agent(llm, ltm=ltm) + real_info = base_logger.info + + def fail_on_ltm_log(msg, *args, **kwargs): + # Only blow up on the LTM-attach log line so we hit the post-acquire + # window specifically; let other logger.info calls in execute pass. + if "long-term memory tools" in str(msg): + raise RuntimeError("log boom") + return real_info(msg, *args, **kwargs) + + with patch.object(base_logger, "info", side_effect=fail_on_ltm_log): + agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) + + # Lock must be free now; non-blocking acquire should succeed immediately. + assert agent._ltm_tools_lock.acquire(blocking=False), "lock was leaked" + agent._ltm_tools_lock.release() + + def test_concurrent_execute_calls_isolate_per_user_ltm_tools(llm, ltm): """Two concurrent execute calls with different user_ids must each observe only their own LTM tools — the per-agent `_ltm_tools_lock` serialises the From 7e36980bef639348f694f7bec7f561579645ba32 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Thu, 28 May 2026 03:59:40 +0300 Subject: [PATCH 39/53] fix: skip enum members without a builder in build_long_term_memory_tools --- dynamiq/nodes/tools/long_term_memory.py | 12 ++++++++++-- tests/unit/memory/long_term/test_tools.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index b7dea02f9..91be76a3a 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -173,12 +173,20 @@ def build_long_term_memory_tools( MemoryToolKind.RECALL, ), ) -> list[Node]: - """Construct long-term-memory tools with `user_id` baked in. Unknown keys in `include` are ignored.""" + """Construct long-term-memory tools with `user_id` baked in. Unknown keys in `include` are ignored. + + Skips both invalid kind strings (ValueError on enum coercion) and valid + enum members without a corresponding builder (e.g. an enum value added + here but not yet wired into `_TOOL_BUILDERS`). + """ tools: list[Node] = [] for kind in include: try: tool_kind = MemoryToolKind(kind) except ValueError: continue - tools.append(_TOOL_BUILDERS[tool_kind](long_term_memory=long_term_memory, user_id=user_id)) + builder = _TOOL_BUILDERS.get(tool_kind) + if builder is None: + continue + tools.append(builder(long_term_memory=long_term_memory, user_id=user_id)) return tools diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index 0c7c75144..b167b921b 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -160,6 +160,22 @@ def test_factory_ignores_unknown_include_keys(ltm, user_id): assert [t.name for t in tools] == ["recall_facts"] +def test_factory_skips_enum_members_missing_from_builders(ltm, user_id, monkeypatch): + """Valid `MemoryToolKind` values without a `_TOOL_BUILDERS` entry must be + silently skipped, not KeyError. Mirrors the unknown-string branch so the + docstring's "unknown keys are ignored" promise actually holds.""" + from dynamiq.memory.long_term.types import MemoryToolKind + from dynamiq.nodes.tools import long_term_memory as ltm_tools_module + + monkeypatch.setattr(ltm_tools_module, "_TOOL_BUILDERS", {MemoryToolKind.RECALL: ltm_tools_module.RecallFactsTool}) + tools = build_long_term_memory_tools( + long_term_memory=ltm, + user_id=user_id, + include=(MemoryToolKind.REMEMBER, MemoryToolKind.RECALL), + ) + assert [t.name for t in tools] == ["recall_facts"] + + # --- serialization --- From 46552b0cf6b6b7bbbc1a8ed3194d2c23ed739287 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Thu, 28 May 2026 03:59:46 +0300 Subject: [PATCH 40/53] fix: hold LTM lock for every execute when LTM is configured --- dynamiq/nodes/agents/base.py | 14 +++++--- .../test_long_term_memory_integration.py | 32 +++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 1f03e6460..260db1e79 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -648,7 +648,12 @@ def execute( # Acquire on the line immediately before `try:` so nothing between the # acquire and the matching finally can raise and leak the lock — see # `_ltm_tools_lock` declaration for the concurrency rationale. - if ltm_tools: + # The lock is taken whenever LTM is configured on this agent (not only + # when *this* call attaches tools) so a concurrent no-user-id call + # cannot read `self.tools` mid-mutation and see another user's + # user-scoped remember/recall tools. + ltm_locked = self.long_term_memory is not None + if ltm_locked: self._ltm_tools_lock.acquire() try: if ltm_tools: @@ -779,14 +784,15 @@ def execute( return execution_result finally: - if ltm_tools: - try: + try: + if ltm_tools: # Remove by identity rather than restoring a snapshot so any # tools appended mid-run (e.g. by # `_setup_in_memory_file_store_and_tools`) are preserved. ltm_ids = {id(t) for t in ltm_tools} self.tools = [t for t in self.tools if id(t) not in ltm_ids] - finally: + finally: + if ltm_locked: self._ltm_tools_lock.release() def retrieve_conversation_history( diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index fdf2564b0..885c5561c 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -274,6 +274,38 @@ def fail_on_ltm_log(msg, *args, **kwargs): agent._ltm_tools_lock.release() +def test_concurrent_no_user_id_call_does_not_see_other_users_ltm_tools(llm, ltm): + """The lock fires whenever LTM is configured on the agent — even for calls + without a user_id — so a concurrent no-user_id execute cannot observe + another call's user-scoped LTM tools mid-mutation.""" + import threading + from concurrent.futures import ThreadPoolExecutor + + agent = _make_agent(llm, ltm=ltm) + snapshots: dict[str, set] = {} + snapshots_lock = threading.Lock() + + def fake_run(*args, **kwargs): + bound = {getattr(t, "user_id", None) for t in agent.tools if hasattr(t, "user_id")} + with snapshots_lock: + key = next(iter(bound), "none") + snapshots[key] = bound + return "ok" + + with patch.object(agent, "_run_agent", side_effect=fake_run): + with ThreadPoolExecutor(max_workers=2) as pool: + futures = [ + pool.submit(agent.run_sync, input_data={"input": "hi", "user_id": "u1"}), + pool.submit(agent.run_sync, input_data={"input": "hi"}), # no user_id + ] + for f in futures: + f.result(timeout=10) + + assert snapshots.get("u1") == {"u1"} + # The no-user_id call must not see any user-scoped tools at all. + assert snapshots.get("none", set()) == set() + + def test_concurrent_execute_calls_isolate_per_user_ltm_tools(llm, ltm): """Two concurrent execute calls with different user_ids must each observe only their own LTM tools — the per-agent `_ltm_tools_lock` serialises the From c5f667cd28b64b2d467aac6afadf77bb1401c926 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Thu, 28 May 2026 04:24:13 +0300 Subject: [PATCH 41/53] refactor: use ContextVar overlay for per-call LTM tools instead of locked mutation --- dynamiq/nodes/agents/base.py | 78 ++++---- .../test_long_term_memory_integration.py | 174 +++++++----------- 2 files changed, 108 insertions(+), 144 deletions(-) diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 260db1e79..64d57d70b 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -1,9 +1,9 @@ import io import json import re +from contextvars import ContextVar from copy import deepcopy from enum import Enum -from threading import RLock from typing import Any, Callable, ClassVar, Union from uuid import uuid4 @@ -49,6 +49,14 @@ from dynamiq.utils.logger import logger from dynamiq.utils.utils import deep_merge +# Per-call overlay of tools visible to a single `Agent.execute` invocation — +# currently used for long-term-memory tools that bind a request's `user_id` +# at construction. ContextVar gives per-thread and per-asyncio-task isolation +# without mutating shared agent state, so concurrent execute() calls on the +# same agent instance never see each other's user-scoped tools and never +# block on a lock. +_run_extra_tools: ContextVar[list["Node"]] = ContextVar("dynamiq_agent_run_extra_tools", default=[]) + class StreamChunkChoiceDelta(BaseModel): """Delta model for content chunks.""" @@ -268,9 +276,6 @@ class Agent(AgentIterativeCheckpointMixin, Node): _pinned_input: Message | VisionMessage | None = PrivateAttr(default=None) system_prompt_manager: AgentPromptManager = Field(default_factory=AgentPromptManager) _current_call_context: dict[str, Any] | None = PrivateAttr(default=None) - # Serialises the LTM-tool window in `execute` so concurrent calls on the - # same agent instance don't see each other's per-call tools on `self.tools`. - _ltm_tools_lock: RLock = PrivateAttr(default_factory=RLock) # Loop progress and pending-tool-call state are declared on AgentIterativeCheckpointMixin. model_config = ConfigDict(arbitrary_types_allowed=True) @@ -645,27 +650,23 @@ def execute( use_memory = self.memory and (input_data.user_id or input_data.session_id) ltm_tools = self._build_long_term_memory_tools(input_data) - # Acquire on the line immediately before `try:` so nothing between the - # acquire and the matching finally can raise and leak the lock — see - # `_ltm_tools_lock` declaration for the concurrency rationale. - # The lock is taken whenever LTM is configured on this agent (not only - # when *this* call attaches tools) so a concurrent no-user-id call - # cannot read `self.tools` mid-mutation and see another user's - # user-scoped remember/recall tools. - ltm_locked = self.long_term_memory is not None - if ltm_locked: - self._ltm_tools_lock.acquire() - try: - if ltm_tools: - self.tools = list(self.tools) + ltm_tools - logger.info( - "Agent %s - %s: attached %d long-term memory tools (%s)", - self.name, - self.id, - len(ltm_tools), - ", ".join(t.name for t in ltm_tools), - ) + # Publish the per-call LTM tools via the module-level ContextVar; the + # tool-resolution properties (`tool_description`, `tool_names`, + # `tool_by_names`) read it. Setting a ContextVar is cheap, isolated + # per thread / per asyncio task, and never mutates shared state — so + # concurrent execute() calls don't see each other's user-scoped tools + # and don't need a lock. + ltm_token = _run_extra_tools.set(ltm_tools) if ltm_tools else None + if ltm_tools: + logger.info( + "Agent %s - %s: attached %d long-term memory tools (%s)", + self.name, + self.id, + len(ltm_tools), + ", ".join(t.name for t in ltm_tools), + ) + try: if use_memory: history_messages = self._retrieve_memory(input_data) if len(history_messages) > 0: @@ -784,16 +785,8 @@ def execute( return execution_result finally: - try: - if ltm_tools: - # Remove by identity rather than restoring a snapshot so any - # tools appended mid-run (e.g. by - # `_setup_in_memory_file_store_and_tools`) are preserved. - ltm_ids = {id(t) for t in ltm_tools} - self.tools = [t for t in self.tools if id(t) not in ltm_ids] - finally: - if ltm_locked: - self._ltm_tools_lock.release() + if ltm_token is not None: + _run_extra_tools.reset(ltm_token) def retrieve_conversation_history( self, @@ -1875,20 +1868,31 @@ def sandbox_backend(self) -> Sandbox | None: """Get the sandbox backend from the configuration if enabled.""" return self.sandbox.backend if self.sandbox and self.sandbox.enabled else None + @property + def _runtime_tools(self) -> list[Node]: + """Tools the LLM should see for the current call: instance tools + any + per-call overlay (e.g. long-term-memory tools bound to a request's + user_id). The overlay is read from a `ContextVar` and is isolated per + thread / per asyncio task — concurrent execute() calls never see each + other's user-scoped tools.""" + extra = _run_extra_tools.get() + return self.tools + extra if extra else self.tools + @property def tool_description(self) -> str: """Returns a description of the tools available to the agent.""" - return "\n".join([f"- {tool.name}: {tool.description.strip()}" for tool in self.tools]) if self.tools else "" + tools = self._runtime_tools + return "\n".join([f"- {tool.name}: {tool.description.strip()}" for tool in tools]) if tools else "" @property def tool_names(self) -> str: """Returns a comma-separated list of tool names available to the agent.""" - return ",".join([self.sanitize_tool_name(tool.name) for tool in self.tools]) + return ",".join([self.sanitize_tool_name(tool.name) for tool in self._runtime_tools]) @property def tool_by_names(self) -> dict[str, Node]: """Returns a dictionary mapping tool names to their corresponding Node objects.""" - return {self.sanitize_tool_name(tool.name): tool for tool in self.tools} + return {self.sanitize_tool_name(tool.name): tool for tool in self._runtime_tools} def reset_run_state(self): """Resets the agent's run state. diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 885c5561c..551919589 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -161,169 +161,110 @@ def init_components(self, connection_manager=None): assert len(init_calls) == 1 -# --- execute() splice: snapshot/restore self.tools --- +# --- per-call ContextVar overlay: LTM tools never mutate self.tools --- -def _patch_run_agent_capture_tools(agent, captured): +def _patch_run_agent_capture_runtime_tools(agent, captured): + """Capture what the LLM-facing `tool_by_names` resolution sees mid-run.""" + def fake_run(*args, **kwargs): - captured.extend(agent.tools) + captured.append(set(agent.tool_by_names.keys())) return "ok" return patch.object(agent, "_run_agent", side_effect=fake_run) -def test_execute_attaches_ltm_tools_during_run_and_restores_after(llm, ltm): +def test_execute_exposes_ltm_tools_during_run_only(llm, ltm): + """LTM tools must be visible to the tool-resolution properties during the + run, and absent from both `self.tools` and the properties after.""" agent = _make_agent(llm, ltm=ltm) original_tools = list(agent.tools) - captured: list = [] + captured: list[set[str]] = [] - with _patch_run_agent_capture_tools(agent, captured): + with _patch_run_agent_capture_runtime_tools(agent, captured): agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) - assert {"remember_fact", "recall_facts"} <= {t.name for t in captured} + assert {"remember_fact", "recall_facts"} <= captured[0] assert agent.tools == original_tools + assert {"remember_fact", "recall_facts"}.isdisjoint(agent.tool_by_names.keys()) -def test_execute_restores_tools_even_when_run_raises(llm, ltm): +def test_execute_clears_ltm_overlay_even_when_run_raises(llm, ltm): agent = _make_agent(llm, ltm=ltm) original_tools = list(agent.tools) with patch.object(agent, "_run_agent", side_effect=RuntimeError("boom")): - # run_sync wraps exceptions in a failed RunnableResult; check tools after. agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) assert agent.tools == original_tools + assert {"remember_fact", "recall_facts"}.isdisjoint(agent.tool_by_names.keys()) -def test_execute_does_not_mutate_tools_when_no_user_id(llm, ltm): +def test_execute_no_ltm_overlay_when_no_user_id(llm, ltm): agent = _make_agent(llm, ltm=ltm) - original_tools = list(agent.tools) - captured: list = [] + captured: list[set[str]] = [] - with _patch_run_agent_capture_tools(agent, captured): + with _patch_run_agent_capture_runtime_tools(agent, captured): agent.run_sync(input_data={"input": "hi"}) - assert {t.name for t in captured} == {t.name for t in original_tools} - assert agent.tools == original_tools + assert {"remember_fact", "recall_facts"}.isdisjoint(captured[0]) -def test_execute_does_not_mutate_tools_when_no_long_term_memory(llm): +def test_execute_no_ltm_overlay_when_no_long_term_memory(llm): agent = _make_agent(llm) - original_tools = list(agent.tools) - captured: list = [] - - with _patch_run_agent_capture_tools(agent, captured): - agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) - - assert {t.name for t in captured} == {t.name for t in original_tools} - assert agent.tools == original_tools - - -def test_execute_preserves_tools_added_mid_run(llm, ltm): - """Tools appended during execution (e.g. by `_setup_in_memory_file_store_and_tools`) - must survive LTM cleanup — we remove LTM tools by identity, not by snapshot restore.""" - from dynamiq.nodes.node import Node - from dynamiq.nodes.types import NodeGroup - - class _FakeFileTool(Node): - group: ClassVar = NodeGroup.TOOLS - name: str = "fake_file_tool" - - def execute(self, input_data=None, config=None, **kwargs): - return {"content": "ok"} - - agent = _make_agent(llm, ltm=ltm) - original_tools = list(agent.tools) - injected = _FakeFileTool() - - def fake_run(*args, **kwargs): - # Simulate `_setup_in_memory_file_store_and_tools` mutating self.tools - # during the run window — same pattern as the real file-store setup. - agent.tools = list(agent.tools) + [injected] - return "ok" - - with patch.object(agent, "_run_agent", side_effect=fake_run): - agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) - - assert injected in agent.tools - assert all(t.name not in {"remember_fact", "recall_facts"} for t in agent.tools) - assert agent.tools == original_tools + [injected] - - -def test_ltm_lock_released_when_post_acquire_mutation_raises(llm, ltm): - """Anything between lock-acquire and run can raise (list creation, logger - call). It must still release the lock — otherwise the next LTM-enabled - execute on this agent would block forever waiting on it.""" - from dynamiq.nodes.agents.base import logger as base_logger + captured: list[set[str]] = [] - agent = _make_agent(llm, ltm=ltm) - real_info = base_logger.info - - def fail_on_ltm_log(msg, *args, **kwargs): - # Only blow up on the LTM-attach log line so we hit the post-acquire - # window specifically; let other logger.info calls in execute pass. - if "long-term memory tools" in str(msg): - raise RuntimeError("log boom") - return real_info(msg, *args, **kwargs) - - with patch.object(base_logger, "info", side_effect=fail_on_ltm_log): + with _patch_run_agent_capture_runtime_tools(agent, captured): agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) - # Lock must be free now; non-blocking acquire should succeed immediately. - assert agent._ltm_tools_lock.acquire(blocking=False), "lock was leaked" - agent._ltm_tools_lock.release() + assert {"remember_fact", "recall_facts"}.isdisjoint(captured[0]) -def test_concurrent_no_user_id_call_does_not_see_other_users_ltm_tools(llm, ltm): - """The lock fires whenever LTM is configured on the agent — even for calls - without a user_id — so a concurrent no-user_id execute cannot observe - another call's user-scoped LTM tools mid-mutation.""" +def test_execute_does_not_serialize_concurrent_calls_when_ltm_configured(llm, ltm): + """With the ContextVar overlay, concurrent execute() calls on the same + LTM-configured agent must run truly in parallel — no shared lock.""" import threading from concurrent.futures import ThreadPoolExecutor agent = _make_agent(llm, ltm=ltm) - snapshots: dict[str, set] = {} - snapshots_lock = threading.Lock() + barrier = threading.Barrier(2, timeout=5) def fake_run(*args, **kwargs): - bound = {getattr(t, "user_id", None) for t in agent.tools if hasattr(t, "user_id")} - with snapshots_lock: - key = next(iter(bound), "none") - snapshots[key] = bound + # If a lock was serialising us, the second thread would never reach + # the barrier and we'd time out — barrier verifies true concurrency. + barrier.wait() return "ok" with patch.object(agent, "_run_agent", side_effect=fake_run): with ThreadPoolExecutor(max_workers=2) as pool: futures = [ pool.submit(agent.run_sync, input_data={"input": "hi", "user_id": "u1"}), - pool.submit(agent.run_sync, input_data={"input": "hi"}), # no user_id + pool.submit(agent.run_sync, input_data={"input": "hi", "user_id": "u2"}), ] for f in futures: f.result(timeout=10) - assert snapshots.get("u1") == {"u1"} - # The no-user_id call must not see any user-scoped tools at all. - assert snapshots.get("none", set()) == set() - -def test_concurrent_execute_calls_isolate_per_user_ltm_tools(llm, ltm): - """Two concurrent execute calls with different user_ids must each observe - only their own LTM tools — the per-agent `_ltm_tools_lock` serialises the - mutation window so the user-scoped tools never leak across calls.""" +def test_concurrent_calls_isolate_per_user_ltm_tools(llm, ltm): + """Two concurrent execute() calls with different user_ids must each see + only their own LTM tools via the per-task ContextVar overlay.""" import threading from concurrent.futures import ThreadPoolExecutor agent = _make_agent(llm, ltm=ltm) - original_tools = list(agent.tools) snapshots: dict[str, set[str]] = {} snapshots_lock = threading.Lock() + barrier = threading.Barrier(2, timeout=5) def fake_run(*args, **kwargs): - bound_user_ids = {t.user_id for t in agent.tools if hasattr(t, "user_id")} + # Wait so both threads are inside _run_agent simultaneously. + barrier.wait() + resolved = agent.tool_by_names + bound_user_ids = {t.user_id for t in resolved.values() if hasattr(t, "user_id")} assert len(bound_user_ids) == 1, f"cross-user leakage: {bound_user_ids}" (uid,) = bound_user_ids with snapshots_lock: - snapshots[uid] = {t.name for t in agent.tools if hasattr(t, "user_id")} + snapshots[uid] = {name for name, t in resolved.items() if hasattr(t, "user_id")} return "ok" with patch.object(agent, "_run_agent", side_effect=fake_run): @@ -338,17 +279,36 @@ def fake_run(*args, **kwargs): assert set(snapshots.keys()) == {"u1", "u2"} for tool_names in snapshots.values(): assert tool_names == {"remember_fact", "recall_facts"} - assert agent.tools == original_tools -def test_execute_restores_tools_when_prep_step_raises_before_run(llm, ltm): - """Regression: prep code between the LTM mutation and the inner try block - (memory retrieval, file upload, prompt-variable update) used to leak - appended tools if it raised. The outer try/finally must catch that path.""" +def test_concurrent_no_user_id_call_does_not_see_other_users_ltm_tools(llm, ltm): + """A concurrent no-user_id execute must not observe another call's + user-scoped tools — ContextVar isolation guarantees this without a lock.""" + import threading + from concurrent.futures import ThreadPoolExecutor + agent = _make_agent(llm, ltm=ltm) - original_tools = list(agent.tools) + snapshots: dict[str, set] = {} + snapshots_lock = threading.Lock() + barrier = threading.Barrier(2, timeout=5) - with patch.object(agent.system_prompt_manager, "update_variables", side_effect=RuntimeError("prep boom")): - agent.run_sync(input_data={"input": "hi", "user_id": "u1"}) + def fake_run(*args, **kwargs): + barrier.wait() + resolved = agent.tool_by_names + bound = {getattr(t, "user_id", None) for t in resolved.values() if hasattr(t, "user_id")} + with snapshots_lock: + key = next(iter(bound), "none") + snapshots[key] = bound + return "ok" - assert agent.tools == original_tools + with patch.object(agent, "_run_agent", side_effect=fake_run): + with ThreadPoolExecutor(max_workers=2) as pool: + futures = [ + pool.submit(agent.run_sync, input_data={"input": "hi", "user_id": "u1"}), + pool.submit(agent.run_sync, input_data={"input": "hi"}), + ] + for f in futures: + f.result(timeout=10) + + assert snapshots.get("u1") == {"u1"} + assert snapshots.get("none", set()) == set() From 611ea39e9801149ffa7981a8df743058a7d20ed1 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Sat, 30 May 2026 01:11:14 +0300 Subject: [PATCH 42/53] refactor: move LTM ops onto backend and rename LongTermMemory to LongTermMemoryConfig --- dynamiq/memory/long_term/__init__.py | 6 +- .../memory/long_term/backends/in_memory.py | 3 +- dynamiq/memory/long_term/backends/pgvector.py | 10 +- dynamiq/memory/long_term/backends/qdrant.py | 9 +- dynamiq/memory/long_term/base.py | 202 +++++++++++++++- dynamiq/memory/long_term/long_term_memory.py | 226 ++---------------- dynamiq/memory/long_term/types.py | 4 +- dynamiq/nodes/agents/agent.py | 42 +++- dynamiq/nodes/agents/base.py | 43 ++-- dynamiq/nodes/tools/long_term_memory.py | 16 +- .../integration_with_creds/memory/conftest.py | 21 +- .../memory/test_pgvector_fact_backend.py | 3 +- .../memory/test_qdrant_fact_backend.py | 3 +- tests/unit/memory/long_term/conftest.py | 8 + .../long_term/test_in_memory_backend.py | 65 ++--- .../memory/long_term/test_long_term_memory.py | 179 +++++++------- tests/unit/memory/long_term/test_tools.py | 129 +++++----- .../test_long_term_memory_integration.py | 66 +++-- 18 files changed, 546 insertions(+), 489 deletions(-) diff --git a/dynamiq/memory/long_term/__init__.py b/dynamiq/memory/long_term/__init__.py index 181b99f72..fd3be3e01 100644 --- a/dynamiq/memory/long_term/__init__.py +++ b/dynamiq/memory/long_term/__init__.py @@ -1,14 +1,14 @@ -from dynamiq.memory.long_term.base import LongTermMemoryBackend -from dynamiq.memory.long_term.long_term_memory import LongTermMemory, LongTermMemoryConfig +from dynamiq.memory.long_term.base import LongTermMemoryBackend, LongTermMemoryError +from dynamiq.memory.long_term.long_term_memory import LongTermMemoryConfig from dynamiq.memory.long_term.schemas import Fact from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind, RememberOutcome __all__ = [ "Fact", "ForgetStatus", - "LongTermMemory", "LongTermMemoryBackend", "LongTermMemoryConfig", + "LongTermMemoryError", "MemoryToolKind", "RememberOutcome", ] diff --git a/dynamiq/memory/long_term/backends/in_memory.py b/dynamiq/memory/long_term/backends/in_memory.py index 7926c3a70..61123a722 100644 --- a/dynamiq/memory/long_term/backends/in_memory.py +++ b/dynamiq/memory/long_term/backends/in_memory.py @@ -39,13 +39,14 @@ def update( content: str, content_hash: str, embedding: list[float], + metadata: dict, updated_at: datetime, ) -> None: existing = self._facts.get(fact_id) if existing is None: return self._facts[fact_id] = existing.model_copy( - update={"content": content, "hash": content_hash, "updated_at": updated_at} + update={"content": content, "hash": content_hash, "metadata": metadata, "updated_at": updated_at} ) self._vectors[fact_id] = list(embedding) diff --git a/dynamiq/memory/long_term/backends/pgvector.py b/dynamiq/memory/long_term/backends/pgvector.py index 5b446b07b..e8bbaac77 100644 --- a/dynamiq/memory/long_term/backends/pgvector.py +++ b/dynamiq/memory/long_term/backends/pgvector.py @@ -78,8 +78,8 @@ def to_dict_exclude_params(self) -> dict[str, bool]: return super().to_dict_exclude_params | {"_conn": True, "connection": True} def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: - exclude = kwargs.pop("exclude", self.to_dict_exclude_params.copy()) - data = self.model_dump(exclude=exclude, **kwargs) + # super() re-adds the embedder; we add the connection on top. + data = super().to_dict(include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs) data["connection"] = self.connection.to_dict( for_tracing=for_tracing, include_secure_params=include_secure_params, **kwargs ) @@ -181,14 +181,16 @@ def update( content: str, content_hash: str, embedding: list[float], + metadata: dict, updated_at: datetime, ) -> None: with self._conn.cursor() as cur: cur.execute( SQL( - "UPDATE {table} SET content = %s, hash = %s, " "embedding = %s, updated_at = %s WHERE id = %s" + "UPDATE {table} SET content = %s, hash = %s, embedding = %s, " + "metadata = %s, updated_at = %s WHERE id = %s" ).format(table=self._table), - (content, content_hash, embedding, updated_at, fact_id), + (content, content_hash, embedding, Jsonb(metadata), updated_at, fact_id), ) def search( diff --git a/dynamiq/memory/long_term/backends/qdrant.py b/dynamiq/memory/long_term/backends/qdrant.py index 8abf6b125..a8f5f06ce 100644 --- a/dynamiq/memory/long_term/backends/qdrant.py +++ b/dynamiq/memory/long_term/backends/qdrant.py @@ -82,8 +82,8 @@ def to_dict_exclude_params(self) -> dict[str, bool]: return super().to_dict_exclude_params | {"_client": True, "connection": True} def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: - exclude = kwargs.pop("exclude", self.to_dict_exclude_params.copy()) - data = self.model_dump(exclude=exclude, **kwargs) + # super() re-adds the embedder; we add the connection on top. + data = super().to_dict(include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs) data["connection"] = self.connection.to_dict( for_tracing=for_tracing, include_secure_params=include_secure_params, **kwargs ) @@ -169,12 +169,15 @@ def update( content: str, content_hash: str, embedding: list[float], + metadata: dict, updated_at: datetime, ) -> None: existing = self.get(fact_id) if existing is None: return - new_fact = existing.model_copy(update={"content": content, "hash": content_hash, "updated_at": updated_at}) + new_fact = existing.model_copy( + update={"content": content, "hash": content_hash, "metadata": metadata, "updated_at": updated_at} + ) self._client.upsert( collection_name=self.collection_name, points=[ diff --git a/dynamiq/memory/long_term/base.py b/dynamiq/memory/long_term/base.py index f3e8ae224..9979e7b13 100644 --- a/dynamiq/memory/long_term/base.py +++ b/dynamiq/memory/long_term/base.py @@ -1,21 +1,59 @@ from abc import ABC, abstractmethod -from datetime import datetime +from datetime import UTC, datetime from functools import cached_property +from hashlib import md5 from typing import Any +from uuid import uuid4 from pydantic import BaseModel, ConfigDict, Field, computed_field from dynamiq.memory.long_term.schemas import Fact +from dynamiq.memory.long_term.types import ForgetStatus, RememberOutcome +from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema from dynamiq.utils import generate_uuid +from dynamiq.utils.logger import logger + + +class LongTermMemoryError(Exception): + """Base exception for long-term memory operations.""" + + pass + + +def _content_hash(user_id: str, content: str) -> str: + """Per-user stable hash used only as a dedup key, never as a security primitive.""" + normalised = content.strip().lower() + return md5(f"{user_id}:{normalised}".encode(), usedforsecurity=False).hexdigest() class LongTermMemoryBackend(ABC, BaseModel): - """Fact-shaped, scope-filtered storage backend for `LongTermMemory`.""" + """Fact-shaped, user-scoped storage + embedding engine for long-term memory. + + Subclasses implement the abstract storage primitives (`insert`, `get`, + `search`, `update`, ...). The high-level operations the agent tools call — + `remember`, `recall`, `forget`, `list_all`, `clear_user` — are concrete here + (Template Method): they orchestrate embedding, dedup, and semantic upsert in + terms of those primitives, so every backend gets them for free. + """ model_config = ConfigDict(arbitrary_types_allowed=True) name: str = "long-term-memory-backend" id: str = Field(default_factory=generate_uuid) + embedder: TextEmbedder = Field( + ..., + description="Text embedder used to vectorize facts on write and queries on read.", + ) + upsert_threshold: float = Field( + default=0.85, + ge=0.0, + le=1.0, + description=( + "Cosine similarity above which a new `remember()` call replaces the " + "nearest existing fact in place instead of inserting a new row. " + "Set to 1.0 to disable upsert (insert-only)." + ), + ) @computed_field @cached_property @@ -26,13 +64,156 @@ def type(self) -> str: @property def to_dict_exclude_params(self) -> dict[str, bool]: """Field names to exclude from serialization (overridden by subclasses).""" - return {} + return {"embedder": True} - def to_dict(self, include_secure_params: bool = False, **kwargs) -> dict[str, Any]: + def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: """Serialize the backend to a dict for workflow YAML round-trip.""" - kwargs.pop("include_secure_params", None) - kwargs.pop("for_tracing", None) - return self.model_dump(exclude=kwargs.pop("exclude", self.to_dict_exclude_params), **kwargs) + data = self.model_dump(exclude=kwargs.pop("exclude", self.to_dict_exclude_params), **kwargs) + data["embedder"] = self.embedder.to_dict( + include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs + ) + return data + + def _embed(self, text: str) -> list[float]: + result = self.embedder.execute(input_data=TextEmbedderInputSchema(query=text)) + return list(result["embedding"]) + + # --- high-level operations (Template Method over the storage primitives) --- + + def remember( + self, *, content: str, user_id: str, metadata: dict[str, Any] | None = None + ) -> tuple[Fact, RememberOutcome]: + """Add or upsert a fact for `user_id`. Returns the fact and a `RememberOutcome`. + + 1. Exact-duplicate guard: if `(user_id, normalised content)` already exists, + return it with `UNCHANGED` (no embed cost). + 2. Otherwise embed once and search the user's facts for the nearest neighbour. + If the top match's cosine score exceeds `upsert_threshold`, replace that + fact's content/hash/embedding/metadata in place (preserving id, created_at) + and return `UPDATED`. This is how an agent "corrects" a fact: re-state it. + 3. Otherwise insert a brand-new fact and return `CREATED`. + + Raises: + LongTermMemoryError: If content is empty or storage fails. + """ + if not content or not content.strip(): + raise LongTermMemoryError("Fact content cannot be empty") + try: + normalised = content.strip() + content_hash = _content_hash(user_id, normalised) + + existing = self.get_by_hash(user_id=user_id, content_hash=content_hash) + if existing is not None: + logger.debug(f"LongTermMemory: exact-dedup hit for user={user_id}, fact {existing.id}") + return existing, RememberOutcome.UNCHANGED + + embedding = self._embed(normalised) + + nearest = self.search(query_embedding=embedding, scope={"user_id": user_id}, limit=1) + if nearest and nearest[0][1] >= self.upsert_threshold: + old_fact, score = nearest[0] + now = datetime.now(UTC) + # New metadata replaces the old when the caller supplies it; + # otherwise the existing metadata is preserved. + new_metadata = metadata if metadata is not None else old_fact.metadata + self.update( + old_fact.id, + content=normalised, + content_hash=content_hash, + embedding=embedding, + metadata=new_metadata, + updated_at=now, + ) + logger.debug( + f"LongTermMemory: upsert hit (score={score:.3f}) — updated fact {old_fact.id} for user={user_id}" + ) + updated = old_fact.model_copy( + update={ + "content": normalised, + "hash": content_hash, + "metadata": new_metadata, + "updated_at": now, + } + ) + return updated, RememberOutcome.UPDATED + + now = datetime.now(UTC) + fact = Fact( + id=str(uuid4()), + content=normalised, + hash=content_hash, + user_id=user_id, + metadata=metadata or {}, + created_at=now, + updated_at=now, + ) + self.insert(fact, embedding) + logger.debug(f"LongTermMemory: stored fact {fact.id} for user={user_id}") + return fact, RememberOutcome.CREATED + except Exception as e: + logger.error(f"LongTermMemory.remember failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to remember fact: {e}") from e + + def recall(self, *, query: str, user_id: str, limit: int = 5) -> list[tuple[Fact, float]]: + """Semantic search for facts relevant to `query`, scoped to `user_id`. + + Raises: + LongTermMemoryError: If the query is empty or search fails. + """ + stripped = query.strip() if query else "" + if not stripped: + raise LongTermMemoryError("Recall query cannot be empty") + try: + embedding = self._embed(stripped) + results = self.search(query_embedding=embedding, scope={"user_id": user_id}, limit=limit) + logger.debug(f"LongTermMemory: recall for user={user_id} returned {len(results)} facts") + return results + except Exception as e: + logger.error(f"LongTermMemory.recall failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to recall facts: {e}") from e + + def forget(self, *, fact_id: str, user_id: str) -> ForgetStatus: + """Delete a fact by id, returning a `ForgetStatus`. Never raises on user mismatch. + + Raises: + LongTermMemoryError: If the storage delete fails for any other reason. + """ + try: + fact = self.get(fact_id) + if fact is None: + return ForgetStatus.NOT_FOUND + if fact.user_id != user_id: + logger.warning( + f"LongTermMemory.forget: cross-user delete blocked " + f"(owner={fact.user_id}, caller={user_id}, fact={fact_id})" + ) + return ForgetStatus.FORBIDDEN + self.delete(fact_id) + logger.debug(f"LongTermMemory: deleted fact {fact_id} for user={user_id}") + return ForgetStatus.DELETED + except Exception as e: + logger.error(f"LongTermMemory.forget failed for fact={fact_id}, user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to forget fact: {e}") from e + + def list_all(self, *, user_id: str, limit: int = 100) -> list[Fact]: + """Return up to `limit` facts for `user_id` (admin/introspection).""" + try: + return self.list_by_scope({"user_id": user_id}, limit=limit) + except Exception as e: + logger.error(f"LongTermMemory.list_all failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to list facts: {e}") from e + + def clear_user(self, *, user_id: str) -> int: + """Hard-delete every fact owned by `user_id` and return the count deleted.""" + try: + deleted = self.delete_scope({"user_id": user_id}) + logger.debug(f"LongTermMemory: cleared {deleted} facts for user={user_id}") + return deleted + except Exception as e: + logger.error(f"LongTermMemory.clear_user failed for user={user_id}: {e}") + raise LongTermMemoryError(f"Failed to clear user facts: {e}") from e + + # --- storage primitives (implemented per backend) --- @abstractmethod def insert(self, fact: Fact, embedding: list[float]) -> None: @@ -75,10 +256,11 @@ def update( content: str, content_hash: str, embedding: list[float], + metadata: dict[str, Any], updated_at: datetime, ) -> None: - """Replace content/hash/embedding/updated_at for an existing fact in place. + """Replace content/hash/embedding/metadata/updated_at for a fact in place. - Preserves `id`, `user_id`, `metadata`, and `created_at`. Used by the - semantic-upsert path in `LongTermMemory.remember`. + Preserves `id`, `user_id`, and `created_at`. Used by the semantic-upsert + path in `remember`. """ diff --git a/dynamiq/memory/long_term/long_term_memory.py b/dynamiq/memory/long_term/long_term_memory.py index c85a86c12..a0e956e7e 100644 --- a/dynamiq/memory/long_term/long_term_memory.py +++ b/dynamiq/memory/long_term/long_term_memory.py @@ -1,58 +1,30 @@ -from datetime import UTC, datetime from functools import cached_property -from hashlib import md5 from typing import Any -from uuid import uuid4 from pydantic import BaseModel, ConfigDict, Field, computed_field, field_serializer -from dynamiq.memory.long_term.backends import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.base import LongTermMemoryBackend -from dynamiq.memory.long_term.schemas import Fact -from dynamiq.memory.long_term.types import ForgetStatus, MemoryToolKind, RememberOutcome -from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema -from dynamiq.utils.logger import logger +from dynamiq.memory.long_term.types import MemoryToolKind -class LongTermMemoryError(Exception): - """Base exception for `LongTermMemory` operations.""" - - pass - - -def _content_hash(user_id: str, content: str) -> str: - """Per-user stable hash used only as a dedup key, never as a security primitive.""" - normalised = content.strip().lower() - return md5(f"{user_id}:{normalised}".encode(), usedforsecurity=False).hexdigest() - - -def _embed(embedder: TextEmbedder, text: str) -> list[float]: - result = embedder.execute(input_data=TextEmbedderInputSchema(query=text)) - return list(result["embedding"]) - +class LongTermMemoryConfig(BaseModel): + """Agent-level configuration for long-term memory. -class LongTermMemory(BaseModel): - """Tool-driven, user-scoped, fact-shaped memory that persists across sessions.""" + Mirrors `SandboxConfig` / `SkillsConfig`: an on/off switch plus the backend + that does the work, plus which memory tools to expose to the LLM. All + operations (remember/recall/forget) live on `backend`. + """ model_config = ConfigDict(arbitrary_types_allowed=True) + enabled: bool = True backend: LongTermMemoryBackend = Field( - default_factory=InMemoryLongTermMemoryBackend, - description="Backend storage implementation for facts and their embeddings.", - ) - embedder: TextEmbedder = Field( ..., - description="Text embedder used to vectorize facts on write and queries on read.", + description="Backend engine that stores facts, embeds text, and serves remember/recall/forget.", ) - upsert_threshold: float = Field( - default=0.85, - ge=0.0, - le=1.0, - description=( - "Cosine similarity above which a new `remember()` call replaces " - "the nearest existing fact in place instead " - "of inserting a new row. Set to 1.0 to disable upsert (insert-only)." - ), + tools: tuple[MemoryToolKind, ...] = Field( + default=(MemoryToolKind.REMEMBER, MemoryToolKind.RECALL), + description="Which long-term-memory tools to expose to the agent's LLM.", ) @computed_field @@ -64,187 +36,17 @@ def type(self) -> str: @property def to_dict_exclude_params(self) -> dict[str, bool]: """Fields excluded from default model_dump; re-added by `to_dict`.""" - return {"backend": True, "embedder": True} + return {"backend": True} def to_dict(self, include_secure_params: bool = False, **kwargs) -> dict[str, Any]: - """Serialize so backend and embedder round-trip via their own `to_dict`.""" + """Serialize so the backend round-trips via its own `to_dict`.""" for_tracing = kwargs.pop("for_tracing", False) data = self.model_dump(exclude=kwargs.pop("exclude", self.to_dict_exclude_params), **kwargs) data["backend"] = self.backend.to_dict( include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs ) - data["embedder"] = self.embedder.to_dict( - include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs - ) return data - def remember( - self, *, content: str, user_id: str, - metadata: dict[str, Any] | None = None, - ) -> tuple[Fact, RememberOutcome]: - """Add or upsert a fact for `user_id`. Returns the fact and a `RememberOutcome`. - - Semantics (no explicit forget tool): - - 1. Exact-duplicate guard: if `(user_id, normalised content)` already exists, - return it with `UNCHANGED` (no embed cost). - 2. Otherwise embed once and search the user's facts for the nearest neighbour. - If the top match's cosine score exceeds `upsert_threshold`, replace that - fact's content/hash/embedding in place (preserving id, created_at, metadata) - and return `UPDATED`. This is how an agent "corrects" or "deletes" a fact: - re-state it. - 3. Otherwise insert a brand-new fact and return `CREATED`. - - Args: - content: The fact text. - user_id: Owner of the fact. Required. - metadata: Optional free-form metadata stored alongside the fact. - - Raises: - LongTermMemoryError: If content is empty or storage fails. - """ - if not content or not content.strip(): - raise LongTermMemoryError("Fact content cannot be empty") - try: - normalised = content.strip() - content_hash = _content_hash(user_id, normalised) - - existing = self.backend.get_by_hash(user_id=user_id, content_hash=content_hash) - if existing is not None: - logger.debug(f"LongTermMemory: exact-dedup hit for user={user_id}, fact {existing.id}") - return existing, RememberOutcome.UNCHANGED - - embedding = _embed(self.embedder, normalised) - - nearest = self.backend.search( - query_embedding=embedding, - scope={"user_id": user_id}, - limit=1, - ) - if nearest and nearest[0][1] >= self.upsert_threshold: - old_fact, score = nearest[0] - now = datetime.now(UTC) - self.backend.update( - old_fact.id, - content=normalised, - content_hash=content_hash, - embedding=embedding, - updated_at=now, - ) - logger.debug( - f"LongTermMemory: upsert hit (score={score:.3f}) — " - f"updated fact {old_fact.id} for user={user_id}" - ) - updated = old_fact.model_copy(update={"content": normalised, "hash": content_hash, "updated_at": now}) - return updated, RememberOutcome.UPDATED - - now = datetime.now(UTC) - fact = Fact( - id=str(uuid4()), - content=normalised, - hash=content_hash, - user_id=user_id, - metadata=metadata or {}, - created_at=now, - updated_at=now, - ) - self.backend.insert(fact, embedding) - logger.debug(f"LongTermMemory: stored fact {fact.id} for user={user_id}") - return fact, RememberOutcome.CREATED - except Exception as e: - logger.error(f"LongTermMemory.remember failed for user={user_id}: {e}") - raise LongTermMemoryError(f"Failed to remember fact: {e}") from e - - def recall( - self, *, query: str, user_id: str, limit: int = 5, - ) -> list[tuple[Fact, float]]: - """Semantic search for facts relevant to `query`, scoped to `user_id`. - - Args: - query: Natural-language query string. - user_id: Owner whose facts to search. - limit: Maximum number of (fact, score) tuples to return. - - Raises: - LongTermMemoryError: If the query is empty or search fails. - """ - stripped = query.strip() if query else "" - if not stripped: - raise LongTermMemoryError("Recall query cannot be empty") - try: - embedding = _embed(self.embedder, stripped) - results = self.backend.search( - query_embedding=embedding, - scope={"user_id": user_id}, - limit=limit, - ) - logger.debug(f"LongTermMemory: recall for user={user_id} returned {len(results)} facts") - return results - except Exception as e: - logger.error(f"LongTermMemory.recall failed for user={user_id}: {e}") - raise LongTermMemoryError(f"Failed to recall facts: {e}") from e - - def forget(self, *, fact_id: str, user_id: str) -> ForgetStatus: - """Delete a fact by id, returning a `ForgetStatus`. - - Never raises on user mismatch — defence in depth above the - construction-time `user_id` binding on the tool. - - Raises: - LongTermMemoryError: If the storage delete fails for any other reason. - """ - try: - fact = self.backend.get(fact_id) - if fact is None: - return ForgetStatus.NOT_FOUND - if fact.user_id != user_id: - logger.warning( - f"LongTermMemory.forget: cross-user delete blocked " - f"(owner={fact.user_id}, caller={user_id}, fact={fact_id})" - ) - return ForgetStatus.FORBIDDEN - self.backend.delete(fact_id) - logger.debug(f"LongTermMemory: deleted fact {fact_id} for user={user_id}") - return ForgetStatus.DELETED - except Exception as e: - logger.error(f"LongTermMemory.forget failed for fact={fact_id}, user={user_id}: {e}") - raise LongTermMemoryError(f"Failed to forget fact: {e}") from e - - def list_all(self, *, user_id: str, limit: int = 100) -> list[Fact]: - """Return up to `limit` facts for `user_id`, most recent first (admin/introspection).""" - try: - return self.backend.list_by_scope({"user_id": user_id}, limit=limit) - except Exception as e: - logger.error(f"LongTermMemory.list_all failed for user={user_id}: {e}") - raise LongTermMemoryError(f"Failed to list facts: {e}") from e - - def get(self, fact_id: str) -> Fact | None: - """Fetch a fact by id, or `None` if it does not exist.""" - try: - return self.backend.get(fact_id) - except Exception as e: - logger.error(f"LongTermMemory.get failed for fact={fact_id}: {e}") - raise LongTermMemoryError(f"Failed to fetch fact: {e}") from e - - def clear_user(self, *, user_id: str) -> int: - """Hard-delete every fact owned by `user_id` and return the count deleted.""" - try: - deleted = self.backend.delete_scope({"user_id": user_id}) - logger.debug(f"LongTermMemory: cleared {deleted} facts for user={user_id}") - return deleted - except Exception as e: - logger.error(f"LongTermMemory.clear_user failed for user={user_id}: {e}") - raise LongTermMemoryError(f"Failed to clear user facts: {e}") from e - - -class LongTermMemoryConfig(BaseModel): - """Per-agent configuration for long-term memory tool exposure.""" - - tools: tuple[MemoryToolKind, ...] = ( - MemoryToolKind.REMEMBER, - MemoryToolKind.RECALL, - ) - @field_serializer("tools") def _serialize_tools(self, tools: tuple[MemoryToolKind, ...]) -> tuple[str, ...]: # Emit plain string values so YAML round-trip and tracing work; pydantic diff --git a/dynamiq/memory/long_term/types.py b/dynamiq/memory/long_term/types.py index 9e6a646f0..95e1ae74b 100644 --- a/dynamiq/memory/long_term/types.py +++ b/dynamiq/memory/long_term/types.py @@ -2,7 +2,7 @@ class ForgetStatus(str, Enum): - """Outcome of `LongTermMemory.forget()` (programmatic API only).""" + """Outcome of `LongTermMemoryBackend.forget()` (programmatic API only).""" DELETED = "deleted" NOT_FOUND = "not_found" @@ -10,7 +10,7 @@ class ForgetStatus(str, Enum): class RememberOutcome(str, Enum): - """Outcome of `LongTermMemory.remember()` — distinguishes insert from upsert.""" + """Outcome of `LongTermMemoryBackend.remember()` — distinguishes insert from upsert.""" CREATED = "created" UPDATED = "updated" diff --git a/dynamiq/nodes/agents/agent.py b/dynamiq/nodes/agents/agent.py index 50eef5530..d7d043f35 100644 --- a/dynamiq/nodes/agents/agent.py +++ b/dynamiq/nodes/agents/agent.py @@ -8,6 +8,7 @@ from dynamiq.callbacks import AgentStreamingParserCallback, StreamingQueueCallbackHandler from dynamiq.executors.context import ContextAwareThreadPoolExecutor from dynamiq.nodes.agents.base import Agent as BaseAgent +from dynamiq.nodes.agents.base import _run_extra_tools from dynamiq.nodes.agents.components import parser, schema_generator from dynamiq.nodes.agents.components.history_manager import HistoryManagerMixin from dynamiq.nodes.agents.exceptions import ( @@ -972,11 +973,15 @@ def _setup_prompt_and_stop_sequences( input_message: The user's input message history_messages: Optional conversation history """ + # Pass overlay-aware tool variables so per-call LTM tools appear in the + # system prompt (relevant for XML/ReAct mode, where the model learns + # about tools from the prompt rather than function-calling schemas). system_message = Message( role=MessageRole.SYSTEM, content=self.generate_prompt( tools_name=self.tool_names, - input_formats=schema_generator.generate_input_formats(self.tools, self.sanitize_tool_name), + tool_description=self.tool_description, + input_formats=schema_generator.generate_input_formats(self._runtime_tools, self.sanitize_tool_name), ), static=True, ) @@ -1504,10 +1509,11 @@ def _run_react_llm_step(self, config: RunnableConfig | None, loop_num: int, **kw try: native_parallel = self.parallel_tool_calls_enabled and self.inference_mode == InferenceMode.FUNCTION_CALLING + fc_tools, response_format = self._effective_inference_schemas() llm_result = self._run_llm( messages=messages, - tools=self._tools, - response_format=self._response_format, + tools=fc_tools, + response_format=response_format, config=llm_config, parallel_tool_calls=True if native_parallel else None, **kwargs, @@ -1914,6 +1920,36 @@ def _refresh_agent_state(self, loop_num: int) -> None: except Exception as e: logger.debug("Failed to load todo state (none or invalid): %s", e) + def _build_inference_schemas(self, tools: list) -> tuple: + """Build (function_calling_tools, response_format) for the given tool list. + + Returns the init-time defaults for modes that don't apply, so callers + can substitute whichever value the current inference mode produces. + """ + fc_tools = self._tools + response_format = self._response_format + if self.inference_mode == InferenceMode.FUNCTION_CALLING: + fc_tools = schema_generator.generate_function_calling_schemas( + tools, + self.delegation_allowed, + self.sanitize_tool_name, + response_format=self.response_format, + ) + elif self.inference_mode == InferenceMode.STRUCTURED_OUTPUT: + response_format = schema_generator.generate_structured_output_schemas( + tools, self.sanitize_tool_name, self.delegation_allowed + ) + return fc_tools, response_format + + def _effective_inference_schemas(self) -> tuple: + """Inference schemas for the current call, including any per-call LTM + overlay. When no overlay is set this is the init-time cache; when LTM + tools are attached they're regenerated so remember/recall are visible + to the LLM in FUNCTION_CALLING and STRUCTURED_OUTPUT modes.""" + if not _run_extra_tools.get(): + return self._tools, self._response_format + return self._build_inference_schemas(self._runtime_tools) + def _init_prompt_blocks(self): """Initialize the prompt blocks required for the ReAct strategy.""" # Generate inference-mode schemas diff --git a/dynamiq/nodes/agents/base.py b/dynamiq/nodes/agents/base.py index 64d57d70b..2293d5d99 100644 --- a/dynamiq/nodes/agents/base.py +++ b/dynamiq/nodes/agents/base.py @@ -11,7 +11,7 @@ from dynamiq.connections.managers import ConnectionManager from dynamiq.memory import Memory, MemoryRetrievalStrategy, MemorySaveMode -from dynamiq.memory.long_term import LongTermMemory, LongTermMemoryConfig +from dynamiq.memory.long_term import LongTermMemoryConfig from dynamiq.nodes import ErrorHandling, Node, NodeGroup from dynamiq.nodes.agents.checkpoint import DEFAULT_HISTORY_OFFSET, AgentIterativeCheckpointMixin from dynamiq.nodes.agents.exceptions import AgentUnknownToolException, InvalidActionException, ToolExecutionException @@ -232,17 +232,13 @@ class Agent(AgentIterativeCheckpointMixin, Node): memory: Memory | None = Field(None, description="Memory node for the agent.") memory_limit: int = Field(100, description="Maximum number of messages to retrieve from memory") memory_retrieval_strategy: MemoryRetrievalStrategy | None = MemoryRetrievalStrategy.ALL - long_term_memory: LongTermMemory | None = Field( + long_term_memory: LongTermMemoryConfig | None = Field( default=None, description=( - "Long-term, fact-shaped, user-scoped memory accessed via remember/recall/forget " - "tools. Independent of `memory` (short-term messages)." + "Long-term, fact-shaped, user-scoped memory config (enabled + backend + tools). " + "Accessed via remember/recall tools. Independent of `memory` (short-term messages)." ), ) - long_term_memory_config: LongTermMemoryConfig = Field( - default_factory=LongTermMemoryConfig, - description="Which long-term-memory tools to expose on this agent.", - ) verbose: bool = Field(False, description="Whether to print verbose logs.") file_store: FileStoreConfig = Field( default_factory=lambda: FileStoreConfig(enabled=False, backend=InMemoryFileStore()), @@ -406,7 +402,6 @@ def to_dict_exclude_params(self): "tools": True, "memory": True, "long_term_memory": True, - "long_term_memory_config": True, "files": True, "images": True, "file_store": True, @@ -426,7 +421,6 @@ def to_dict(self, **kwargs) -> dict: data["memory"] = self.memory.to_dict(**kwargs) if self.memory else None data["long_term_memory"] = self.long_term_memory.to_dict(**kwargs) if self.long_term_memory else None - data["long_term_memory_config"] = self.long_term_memory_config.model_dump() if self.files: data["files"] = [{"name": getattr(f, "name", f"file_{i}")} for i, f in enumerate(self.files)] if self.images: @@ -458,8 +452,8 @@ def init_components(self, connection_manager: ConnectionManager | None = None): # The LTM embedder is a ConnectionNode that needs its text_embedder # client built before the first recall/remember call, otherwise it # AttributeErrors on a `None` client during `execute`. - if self.long_term_memory and self.long_term_memory.embedder.is_postponed_component_init: - self.long_term_memory.embedder.init_components(connection_manager) + if self.long_term_memory and self.long_term_memory.backend.embedder.is_postponed_component_init: + self.long_term_memory.backend.embedder.init_components(connection_manager) self._ensure_skills_ingested_for_sandbox() @@ -650,13 +644,6 @@ def execute( use_memory = self.memory and (input_data.user_id or input_data.session_id) ltm_tools = self._build_long_term_memory_tools(input_data) - # Publish the per-call LTM tools via the module-level ContextVar; the - # tool-resolution properties (`tool_description`, `tool_names`, - # `tool_by_names`) read it. Setting a ContextVar is cheap, isolated - # per thread / per asyncio task, and never mutates shared state — so - # concurrent execute() calls don't see each other's user-scoped tools - # and don't need a lock. - ltm_token = _run_extra_tools.set(ltm_tools) if ltm_tools else None if ltm_tools: logger.info( "Agent %s - %s: attached %d long-term memory tools (%s)", @@ -665,7 +652,15 @@ def execute( len(ltm_tools), ", ".join(t.name for t in ltm_tools), ) - + # Publish the per-call LTM tools via the module-level ContextVar; the + # tool-resolution properties (`tool_description`, `tool_names`, + # `tool_by_names`) and inference-schema generation read it. Setting a + # ContextVar is cheap, isolated per thread / per asyncio task, and never + # mutates shared state — so concurrent execute() calls don't see each + # other's user-scoped tools and don't need a lock. The set() is the last + # statement before `try:` so nothing can raise between it and the + # matching reset() in finally. + ltm_token = _run_extra_tools.set(ltm_tools) if ltm_tools else None try: if use_memory: history_messages = self._retrieve_memory(input_data) @@ -851,8 +846,8 @@ def _retrieve_memory(self, input_data: AgentInputSchema) -> list[Message]: return history_messages def _build_long_term_memory_tools(self, input_data: "AgentInputSchema") -> list[Node]: - """Construct per-run long-term-memory tools, or [] when LTM or user_id is absent.""" - if self.long_term_memory is None: + """Construct per-run long-term-memory tools, or [] when LTM is off/absent or user_id is missing.""" + if self.long_term_memory is None or not self.long_term_memory.enabled: return [] user_id = getattr(input_data, "user_id", None) if not user_id: @@ -860,9 +855,9 @@ def _build_long_term_memory_tools(self, input_data: "AgentInputSchema") -> list[ from dynamiq.nodes.tools.long_term_memory import build_long_term_memory_tools tools = build_long_term_memory_tools( - long_term_memory=self.long_term_memory, + backend=self.long_term_memory.backend, user_id=user_id, - include=self.long_term_memory_config.tools, + include=self.long_term_memory.tools, ) # `init_components` set this on every tool that existed at agent build # time; LTM tools are constructed lazily per-run and must match so the diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index 91be76a3a..3524e243e 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, ConfigDict, Field -from dynamiq.memory.long_term import LongTermMemory, MemoryToolKind, RememberOutcome +from dynamiq.memory.long_term import LongTermMemoryBackend, MemoryToolKind, RememberOutcome from dynamiq.nodes.node import Node, ensure_config from dynamiq.nodes.types import NodeGroup from dynamiq.runnables import RunnableConfig @@ -86,16 +86,16 @@ class _LongTermMemoryTool(Node): model_config = ConfigDict(arbitrary_types_allowed=True) group: Literal[NodeGroup.TOOLS] = NodeGroup.TOOLS - long_term_memory: LongTermMemory + backend: LongTermMemoryBackend user_id: str @property def to_dict_exclude_params(self) -> dict[str, Any]: - return super().to_dict_exclude_params | {"long_term_memory": True} + return super().to_dict_exclude_params | {"backend": True} def to_dict(self, include_secure_params: bool = False, **kwargs) -> dict[str, Any]: data = super().to_dict(include_secure_params=include_secure_params, **kwargs) - data["long_term_memory"] = self.long_term_memory.to_dict(include_secure_params=include_secure_params, **kwargs) + data["backend"] = self.backend.to_dict(include_secure_params=include_secure_params, **kwargs) return data @@ -121,7 +121,7 @@ def execute( check_cancellation(config) self.run_on_node_execute_run(config.callbacks, **kwargs) - fact, outcome = self.long_term_memory.remember( + fact, outcome = self.backend.remember( content=input_data.content, user_id=self.user_id, metadata=input_data.metadata, @@ -146,7 +146,7 @@ def execute( check_cancellation(config) self.run_on_node_execute_run(config.callbacks, **kwargs) - hits = self.long_term_memory.recall( + hits = self.backend.recall( query=input_data.query, user_id=self.user_id, limit=input_data.limit, @@ -166,7 +166,7 @@ def execute( def build_long_term_memory_tools( *, - long_term_memory: LongTermMemory, + backend: LongTermMemoryBackend, user_id: str, include: tuple[MemoryToolKind | str, ...] = ( MemoryToolKind.REMEMBER, @@ -188,5 +188,5 @@ def build_long_term_memory_tools( builder = _TOOL_BUILDERS.get(tool_kind) if builder is None: continue - tools.append(builder(long_term_memory=long_term_memory, user_id=user_id)) + tools.append(builder(backend=backend, user_id=user_id)) return tools diff --git a/tests/integration_with_creds/memory/conftest.py b/tests/integration_with_creds/memory/conftest.py index 291441f7c..2f86d3677 100644 --- a/tests/integration_with_creds/memory/conftest.py +++ b/tests/integration_with_creds/memory/conftest.py @@ -1,15 +1,28 @@ import hashlib +from typing import ClassVar import pytest +from dynamiq.connections import BaseConnection +from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema -class FakeTextEmbedder: + +class _StubConnection(BaseConnection): + """No-op connection to satisfy ConnectionNode's connection/client validator.""" + + def connect(self) -> None: + return None + + +class FakeTextEmbedder(TextEmbedder): """Deterministic 16-dim embedder for integration tests against real backends.""" - DIM = 16 + name: str = "fake-text-embedder" + connection: BaseConnection = _StubConnection() + DIM: ClassVar[int] = 16 - def execute(self, input_data, **kwargs): - text = input_data["query"] if isinstance(input_data, dict) else input_data.query + def execute(self, input_data: TextEmbedderInputSchema, config=None, **kwargs) -> dict: + text = input_data.query if hasattr(input_data, "query") else input_data["query"] return {"query": text, "embedding": self._embed(text)} def embed(self, text: str) -> list[float]: diff --git a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py index 7096fe57f..f17fbdb66 100644 --- a/tests/integration_with_creds/memory/test_pgvector_fact_backend.py +++ b/tests/integration_with_creds/memory/test_pgvector_fact_backend.py @@ -24,9 +24,10 @@ def _connection_from_dsn(dsn: str) -> PostgreSQLConnection: @pytest.fixture -def backend(): +def backend(fake_embedder): b = PostgresLongTermMemoryBackend( connection=_connection_from_dsn(DSN), + embedder=fake_embedder, table_name="test_user_facts", dimension=16, ) diff --git a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py index 4642f79aa..abe111846 100644 --- a/tests/integration_with_creds/memory/test_qdrant_fact_backend.py +++ b/tests/integration_with_creds/memory/test_qdrant_fact_backend.py @@ -24,9 +24,10 @@ @pytest.fixture -def backend(): +def backend(fake_embedder): b = QdrantLongTermMemoryBackend( connection=QdrantConnection(url=QDRANT_URL, api_key=""), + embedder=fake_embedder, collection_name="test_user_facts", dimension=16, ) diff --git a/tests/unit/memory/long_term/conftest.py b/tests/unit/memory/long_term/conftest.py index 4afb56296..ae6019452 100644 --- a/tests/unit/memory/long_term/conftest.py +++ b/tests/unit/memory/long_term/conftest.py @@ -49,6 +49,14 @@ def fake_embedder() -> FakeTextEmbedder: return FakeTextEmbedder() +@pytest.fixture +def backend(fake_embedder): + """A fresh in-memory backend wired with the deterministic fake embedder.""" + from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend + + return InMemoryLongTermMemoryBackend(embedder=fake_embedder) + + @pytest.fixture def user_id() -> str: return "user-test-123" diff --git a/tests/unit/memory/long_term/test_in_memory_backend.py b/tests/unit/memory/long_term/test_in_memory_backend.py index 8c2705529..76eda682b 100644 --- a/tests/unit/memory/long_term/test_in_memory_backend.py +++ b/tests/unit/memory/long_term/test_in_memory_backend.py @@ -1,6 +1,6 @@ +"""Tests for InMemoryLongTermMemoryBackend storage primitives.""" from datetime import UTC, datetime, timedelta -from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.schemas import Fact @@ -17,41 +17,34 @@ def _fact(fact_id: str, user_id: str, content: str, # --- insert / get / get_by_hash --- -def test_insert_then_get(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_insert_then_get(backend, fake_embedder): fact = _fact("f1", "u1", "hello") backend.insert(fact, fake_embedder.embed("hello")) assert backend.get("f1") == fact -def test_get_unknown_returns_none(): - backend = InMemoryLongTermMemoryBackend() +def test_get_unknown_returns_none(backend): assert backend.get("does-not-exist") is None -def test_get_by_hash_returns_match(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_get_by_hash_returns_match(backend, fake_embedder): fact = _fact("f1", "u1", "hello", content_hash="h-shared") backend.insert(fact, fake_embedder.embed("hello")) assert backend.get_by_hash(user_id="u1", content_hash="h-shared") == fact -def test_get_by_hash_isolates_users(fake_embedder): - backend = InMemoryLongTermMemoryBackend() - backend.insert(_fact("f1", "u1", "hello", "h-shared"), - fake_embedder.embed("hello")) +def test_get_by_hash_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "hello", "h-shared"), fake_embedder.embed("hello")) assert backend.get_by_hash(user_id="u2", content_hash="h-shared") is None -def test_get_by_hash_unknown_returns_none(): - backend = InMemoryLongTermMemoryBackend() +def test_get_by_hash_unknown_returns_none(backend): assert backend.get_by_hash(user_id="u1", content_hash="nope") is None # --- search --- -def test_search_returns_relevance_ordered(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_search_returns_relevance_ordered(backend, fake_embedder): backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) backend.insert(_fact("f2", "u1", "alpha-2"), fake_embedder.embed("alpha-2")) backend.insert(_fact("f3", "u1", "zulu"), fake_embedder.embed("zulu")) @@ -66,8 +59,7 @@ def test_search_returns_relevance_ordered(fake_embedder): assert scores == sorted(scores, reverse=True) -def test_search_filters_by_scope(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_search_filters_by_scope(backend, fake_embedder): backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) backend.insert(_fact("f2", "u2", "alpha"), fake_embedder.embed("alpha")) hits = backend.search( @@ -78,11 +70,9 @@ def test_search_filters_by_scope(fake_embedder): assert [f.id for f, _ in hits] == ["f1"] -def test_search_respects_limit(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_search_respects_limit(backend, fake_embedder): for i in range(5): - backend.insert(_fact(f"f{i}", "u1", f"text{i}"), - fake_embedder.embed(f"text{i}")) + backend.insert(_fact(f"f{i}", "u1", f"text{i}"), fake_embedder.embed(f"text{i}")) hits = backend.search( query_embedding=fake_embedder.embed("text0"), scope={"user_id": "u1"}, limit=2, @@ -90,8 +80,7 @@ def test_search_respects_limit(fake_embedder): assert len(hits) == 2 -def test_search_empty_store_returns_empty(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_search_empty_store_returns_empty(backend, fake_embedder): hits = backend.search( query_embedding=fake_embedder.embed("anything"), scope={"user_id": "u1"}, limit=5, @@ -101,20 +90,17 @@ def test_search_empty_store_returns_empty(fake_embedder): # --- delete / list_by_scope / delete_scope --- -def test_delete_removes_fact(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_delete_removes_fact(backend, fake_embedder): backend.insert(_fact("f1", "u1", "x"), fake_embedder.embed("x")) backend.delete("f1") assert backend.get("f1") is None -def test_delete_unknown_is_noop(): - backend = InMemoryLongTermMemoryBackend() +def test_delete_unknown_is_noop(backend): backend.delete("does-not-exist") # must not raise -def test_list_by_scope_returns_in_scope_facts(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_list_by_scope_returns_in_scope_facts(backend, fake_embedder): backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) @@ -122,19 +108,16 @@ def test_list_by_scope_returns_in_scope_facts(fake_embedder): assert {f.id for f in listed} == {"f1", "f2"} -def test_list_by_scope_respects_limit(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_list_by_scope_respects_limit(backend, fake_embedder): for i in range(5): - backend.insert(_fact(f"f{i}", "u1", f"x{i}"), - fake_embedder.embed(f"x{i}")) + backend.insert(_fact(f"f{i}", "u1", f"x{i}"), fake_embedder.embed(f"x{i}")) assert len(backend.list_by_scope({"user_id": "u1"}, limit=2)) == 2 # --- update --- -def test_update_replaces_content_hash_embedding_and_timestamp(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_update_replaces_content_hash_embedding_and_timestamp(backend, fake_embedder): original = _fact("f1", "u1", "hello", content_hash="h-old") backend.insert(original, fake_embedder.embed("hello")) @@ -144,12 +127,14 @@ def test_update_replaces_content_hash_embedding_and_timestamp(fake_embedder): content="hello world", content_hash="h-new", embedding=fake_embedder.embed("hello world"), + metadata={"category": "greeting"}, updated_at=new_time, ) updated = backend.get("f1") assert updated.content == "hello world" assert updated.hash == "h-new" + assert updated.metadata == {"category": "greeting"} assert updated.updated_at == new_time assert updated.id == original.id assert updated.created_at == original.created_at @@ -162,20 +147,19 @@ def test_update_replaces_content_hash_embedding_and_timestamp(fake_embedder): assert hits[0][0].content == "hello world" -def test_update_unknown_is_noop(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_update_unknown_is_noop(backend, fake_embedder): backend.update( "does-not-exist", content="x", content_hash="h", embedding=fake_embedder.embed("x"), + metadata={}, updated_at=datetime.now(UTC), ) # must not raise assert backend.get("does-not-exist") is None -def test_delete_scope_removes_all_in_scope(fake_embedder): - backend = InMemoryLongTermMemoryBackend() +def test_delete_scope_removes_all_in_scope(backend, fake_embedder): backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) @@ -185,9 +169,8 @@ def test_delete_scope_removes_all_in_scope(fake_embedder): assert len(backend.list_by_scope({"user_id": "u2"})) == 1 -def test_delete_scope_empty_scope_deletes_everything(fake_embedder): +def test_delete_scope_empty_scope_deletes_everything(backend, fake_embedder): """Contract: empty scope = "match every fact" — same for all backends.""" - backend = InMemoryLongTermMemoryBackend() backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) backend.insert(_fact("f2", "u2", "b"), fake_embedder.embed("b")) deleted = backend.delete_scope({}) diff --git a/tests/unit/memory/long_term/test_long_term_memory.py b/tests/unit/memory/long_term/test_long_term_memory.py index f455cb182..436f86cf2 100644 --- a/tests/unit/memory/long_term/test_long_term_memory.py +++ b/tests/unit/memory/long_term/test_long_term_memory.py @@ -1,166 +1,179 @@ +"""Tests for the long-term memory backend operations (remember/recall/forget/...).""" import pytest -from dynamiq.memory.long_term import LongTermMemory, RememberOutcome +from dynamiq.memory.long_term import LongTermMemoryError, RememberOutcome from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend -from dynamiq.memory.long_term.long_term_memory import LongTermMemoryError - - -@pytest.fixture -def ltm(fake_embedder): - return LongTermMemory( - backend=InMemoryLongTermMemoryBackend(), - embedder=fake_embedder, - ) - # --- remember --- -def test_remember_returns_a_fact_and_persists_it(ltm, user_id): - fact, outcome = ltm.remember(content="User likes pizza", user_id=user_id) + +def test_remember_returns_a_fact_and_persists_it(backend, user_id): + fact, outcome = backend.remember(content="User likes pizza", user_id=user_id) assert outcome == RememberOutcome.CREATED assert fact.id assert fact.content == "User likes pizza" assert fact.user_id == user_id - assert ltm.backend.get(fact.id) == fact + assert backend.get(fact.id) == fact -def test_remember_exact_duplicate_returns_unchanged(ltm, user_id): - first, first_outcome = ltm.remember(content="User likes pizza", user_id=user_id) - second, second_outcome = ltm.remember(content="User likes pizza", user_id=user_id) +def test_remember_exact_duplicate_returns_unchanged(backend, user_id): + first, first_outcome = backend.remember(content="User likes pizza", user_id=user_id) + second, second_outcome = backend.remember(content="User likes pizza", user_id=user_id) assert first_outcome == RememberOutcome.CREATED assert second_outcome == RememberOutcome.UNCHANGED assert first.id == second.id -def test_remember_does_not_dedup_across_users(ltm, user_id, other_user_id): - a, _ = ltm.remember(content="User likes pizza", user_id=user_id) - b, b_outcome = ltm.remember(content="User likes pizza", user_id=other_user_id) +def test_remember_does_not_dedup_across_users(backend, user_id, other_user_id): + a, _ = backend.remember(content="User likes pizza", user_id=user_id) + b, b_outcome = backend.remember(content="User likes pizza", user_id=other_user_id) assert b_outcome == RememberOutcome.CREATED assert a.id != b.id assert a.user_id != b.user_id -def test_remember_normalises_whitespace_for_dedup(ltm, user_id): - a, _ = ltm.remember(content=" User likes pizza ", user_id=user_id) - b, b_outcome = ltm.remember(content="USER LIKES PIZZA", user_id=user_id) +def test_remember_normalises_whitespace_for_dedup(backend, user_id): + a, _ = backend.remember(content=" User likes pizza ", user_id=user_id) + b, b_outcome = backend.remember(content="USER LIKES PIZZA", user_id=user_id) assert b_outcome == RememberOutcome.UNCHANGED assert a.id == b.id def test_remember_paraphrase_upserts_existing(fake_embedder, user_id): """With a low threshold, a near-similar fact replaces the earlier one in place.""" - ltm = LongTermMemory( - backend=InMemoryLongTermMemoryBackend(), - embedder=fake_embedder, - upsert_threshold=0.0, - ) - original, _ = ltm.remember(content="User likes pizza", user_id=user_id) - updated, outcome = ltm.remember(content="User loves pizza", user_id=user_id) + backend = InMemoryLongTermMemoryBackend(embedder=fake_embedder, upsert_threshold=0.0) + original, _ = backend.remember(content="User likes pizza", user_id=user_id) + updated, outcome = backend.remember(content="User loves pizza", user_id=user_id) assert outcome == RememberOutcome.UPDATED assert updated.id == original.id assert updated.content == "User loves pizza" - assert ltm.backend.get(original.id).content == "User loves pizza" - assert len(ltm.list_all(user_id=user_id)) == 1 + assert backend.get(original.id).content == "User loves pizza" + assert len(backend.list_all(user_id=user_id)) == 1 -def test_remember_distinct_content_inserts_new_when_threshold_high(ltm, user_id): +def test_remember_distinct_content_inserts_new_when_threshold_high(backend, user_id): """Default high threshold (0.85) keeps unrelated facts separate.""" - a, _ = ltm.remember(content="User likes pizza", user_id=user_id) - b, outcome = ltm.remember(content="User dislikes mushrooms", user_id=user_id) + a, _ = backend.remember(content="User likes pizza", user_id=user_id) + b, outcome = backend.remember(content="User dislikes mushrooms", user_id=user_id) assert outcome == RememberOutcome.CREATED assert a.id != b.id - assert len(ltm.list_all(user_id=user_id)) == 2 + assert len(backend.list_all(user_id=user_id)) == 2 -def test_remember_rejects_empty_content(ltm, user_id): +def test_upsert_replaces_metadata_when_provided(fake_embedder, user_id): + """A corrected fact's new metadata must overwrite the old fact's metadata.""" + backend = InMemoryLongTermMemoryBackend(embedder=fake_embedder, upsert_threshold=0.0) + original, _ = backend.remember(content="User likes pizza", user_id=user_id, metadata={"category": "food"}) + updated, outcome = backend.remember( + content="User loves pizza", user_id=user_id, metadata={"category": "preference"} + ) + assert outcome == RememberOutcome.UPDATED + assert updated.id == original.id + assert updated.metadata == {"category": "preference"} + assert backend.get(original.id).metadata == {"category": "preference"} + + +def test_upsert_preserves_metadata_when_omitted(fake_embedder, user_id): + """When the corrected call passes no metadata, the old metadata is kept.""" + backend = InMemoryLongTermMemoryBackend(embedder=fake_embedder, upsert_threshold=0.0) + original, _ = backend.remember(content="User likes pizza", user_id=user_id, metadata={"category": "food"}) + updated, _ = backend.remember(content="User loves pizza", user_id=user_id) + assert updated.id == original.id + assert updated.metadata == {"category": "food"} + + +def test_remember_rejects_empty_content(backend, user_id): with pytest.raises(LongTermMemoryError): - ltm.remember(content=" ", user_id=user_id) + backend.remember(content=" ", user_id=user_id) -def test_remember_stores_metadata(ltm, user_id): - fact, _ = ltm.remember(content="x", user_id=user_id, metadata={"category": "preference"}) - assert ltm.backend.get(fact.id).metadata == {"category": "preference"} +def test_remember_stores_metadata(backend, user_id): + fact, _ = backend.remember(content="x", user_id=user_id, metadata={"category": "preference"}) + assert backend.get(fact.id).metadata == {"category": "preference"} # --- recall --- -def test_recall_returns_scored_facts(ltm, user_id): - ltm.remember(content="User likes pizza", user_id=user_id) - ltm.remember(content="User dislikes mushrooms", user_id=user_id) - hits = ltm.recall(query="pizza preferences", user_id=user_id, limit=2) + +def test_recall_returns_scored_facts(backend, user_id): + backend.remember(content="User likes pizza", user_id=user_id) + backend.remember(content="User dislikes mushrooms", user_id=user_id) + hits = backend.recall(query="pizza preferences", user_id=user_id, limit=2) assert len(hits) == 2 fact, score = hits[0] assert fact.content assert isinstance(score, float) -def test_recall_isolates_users(ltm, user_id, other_user_id): - ltm.remember(content="A's fact", user_id=user_id) - ltm.remember(content="B's fact", user_id=other_user_id) - hits = ltm.recall(query="fact", user_id=user_id, limit=5) +def test_recall_isolates_users(backend, user_id, other_user_id): + backend.remember(content="A's fact", user_id=user_id) + backend.remember(content="B's fact", user_id=other_user_id) + hits = backend.recall(query="fact", user_id=user_id, limit=5) assert all(f.user_id == user_id for f, _ in hits) -def test_recall_respects_limit(ltm, user_id): +def test_recall_respects_limit(backend, user_id): for i in range(5): - ltm.remember(content=f"fact-{i}", user_id=user_id) - hits = ltm.recall(query="fact", user_id=user_id, limit=2) + backend.remember(content=f"fact-{i}", user_id=user_id) + hits = backend.recall(query="fact", user_id=user_id, limit=2) assert len(hits) == 2 -def test_recall_empty_store_returns_empty(ltm, user_id): - assert ltm.recall(query="anything", user_id=user_id, limit=5) == [] +def test_recall_empty_store_returns_empty(backend, user_id): + assert backend.recall(query="anything", user_id=user_id, limit=5) == [] -def test_recall_rejects_empty_query(ltm, user_id): +def test_recall_rejects_empty_query(backend, user_id): with pytest.raises(LongTermMemoryError): - ltm.recall(query=" ", user_id=user_id, limit=5) + backend.recall(query=" ", user_id=user_id, limit=5) # --- forget (programmatic API; not exposed to agents) --- -def test_forget_deletes_known_fact(ltm, user_id): - fact, _ = ltm.remember(content="x", user_id=user_id) - assert ltm.forget(fact_id=fact.id, user_id=user_id) == "deleted" - assert ltm.backend.get(fact.id) is None +def test_forget_deletes_known_fact(backend, user_id): + fact, _ = backend.remember(content="x", user_id=user_id) + assert backend.forget(fact_id=fact.id, user_id=user_id) == "deleted" + assert backend.get(fact.id) is None -def test_forget_unknown_returns_not_found(ltm, user_id): - assert ltm.forget(fact_id="does-not-exist", user_id=user_id) == "not_found" +def test_forget_unknown_returns_not_found(backend, user_id): + assert backend.forget(fact_id="does-not-exist", user_id=user_id) == "not_found" -def test_forget_cross_user_returns_forbidden(ltm, user_id, other_user_id): - fact, _ = ltm.remember(content="x", user_id=user_id) - result = ltm.forget(fact_id=fact.id, user_id=other_user_id) + +def test_forget_cross_user_returns_forbidden(backend, user_id, other_user_id): + fact, _ = backend.remember(content="x", user_id=user_id) + result = backend.forget(fact_id=fact.id, user_id=other_user_id) assert result == "forbidden" - assert ltm.backend.get(fact.id) is not None + assert backend.get(fact.id) is not None # --- admin / introspection --- -def test_list_all_returns_user_facts(ltm, user_id, other_user_id): - ltm.remember(content="a", user_id=user_id) - ltm.remember(content="b", user_id=user_id) - ltm.remember(content="c", user_id=other_user_id) - facts = ltm.list_all(user_id=user_id) + +def test_list_all_returns_user_facts(backend, user_id, other_user_id): + backend.remember(content="a", user_id=user_id) + backend.remember(content="b", user_id=user_id) + backend.remember(content="c", user_id=other_user_id) + facts = backend.list_all(user_id=user_id) assert {f.content for f in facts} == {"a", "b"} -def test_get_returns_fact_by_id(ltm, user_id): - fact, _ = ltm.remember(content="x", user_id=user_id) - assert ltm.get(fact.id) == fact +def test_get_returns_fact_by_id(backend, user_id): + fact, _ = backend.remember(content="x", user_id=user_id) + assert backend.get(fact.id) == fact -def test_get_unknown_returns_none(ltm): - assert ltm.get("nope") is None +def test_get_unknown_returns_none(backend): + assert backend.get("nope") is None -def test_clear_user_deletes_all_user_facts(ltm, user_id, other_user_id): - ltm.remember(content="a", user_id=user_id) - ltm.remember(content="b", user_id=user_id) - ltm.remember(content="c", user_id=other_user_id) - deleted = ltm.clear_user(user_id=user_id) +def test_clear_user_deletes_all_user_facts(backend, user_id, other_user_id): + backend.remember(content="a", user_id=user_id) + backend.remember(content="b", user_id=user_id) + backend.remember(content="c", user_id=other_user_id) + deleted = backend.clear_user(user_id=user_id) assert deleted == 2 - assert ltm.list_all(user_id=user_id) == [] - assert len(ltm.list_all(user_id=other_user_id)) == 1 + assert backend.list_all(user_id=user_id) == [] + assert len(backend.list_all(user_id=other_user_id)) == 1 diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index b167b921b..90a143485 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -1,24 +1,16 @@ -import pytest - -from dynamiq.memory.long_term import LongTermMemory from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.nodes.tools.long_term_memory import RecallFactsTool, RememberFactTool, build_long_term_memory_tools -@pytest.fixture -def ltm(fake_embedder): - return LongTermMemory(backend=InMemoryLongTermMemoryBackend(), embedder=fake_embedder) - - # --- RememberFactTool --- -def test_remember_tool_persists_a_fact(ltm, user_id): - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) +def test_remember_tool_persists_a_fact(backend, user_id): + tool = RememberFactTool(backend=backend, user_id=user_id) result = tool.execute(tool.input_schema(content="User likes pizza")) fact_id = result["content"]["fact_id"] assert result["content"]["outcome"] == "created" - assert ltm.get(fact_id).content == "User likes pizza" + assert backend.get(fact_id).content == "User likes pizza" def test_remember_tool_input_schema_has_no_user_id(): @@ -27,32 +19,31 @@ def test_remember_tool_input_schema_has_no_user_id(): assert {"content", "metadata"} <= set(RememberFactTool.input_schema.model_fields) -def test_remember_tool_uses_construction_user_id(ltm, user_id): - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) +def test_remember_tool_uses_construction_user_id(backend, user_id): + tool = RememberFactTool(backend=backend, user_id=user_id) result = tool.execute(tool.input_schema(content="x")) - fact = ltm.get(result["content"]["fact_id"]) + fact = backend.get(result["content"]["fact_id"]) assert fact.user_id == user_id -def test_remember_tool_idempotent_on_duplicate(ltm, user_id): - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) +def test_remember_tool_idempotent_on_duplicate(backend, user_id): + tool = RememberFactTool(backend=backend, user_id=user_id) a = tool.execute(tool.input_schema(content="x")) b = tool.execute(tool.input_schema(content="x")) assert a["content"]["fact_id"] == b["content"]["fact_id"] assert b["content"]["outcome"] == "unchanged" -def test_remember_tool_accepts_metadata(ltm, user_id): - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) - result = tool.execute(tool.input_schema( - content="x", metadata={"category": "preference"})) - fact = ltm.get(result["content"]["fact_id"]) +def test_remember_tool_accepts_metadata(backend, user_id): + tool = RememberFactTool(backend=backend, user_id=user_id) + result = tool.execute(tool.input_schema(content="x", metadata={"category": "preference"})) + fact = backend.get(result["content"]["fact_id"]) assert fact.metadata == {"category": "preference"} -def test_remember_tool_agent_optimized_returns_status_string(ltm, user_id): +def test_remember_tool_agent_optimized_returns_status_string(backend, user_id): """Agent-mode output is a short human-readable status, not a dict.""" - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + tool = RememberFactTool(backend=backend, user_id=user_id) tool.is_optimized_for_agents = True created = tool.execute(tool.input_schema(content="User likes pizza")) @@ -64,12 +55,8 @@ def test_remember_tool_agent_optimized_returns_status_string(ltm, user_id): def test_remember_tool_agent_optimized_reports_update(fake_embedder, user_id): """Agent-mode upsert renders as 'Fact updated.'""" - ltm = LongTermMemory( - backend=InMemoryLongTermMemoryBackend(), - embedder=fake_embedder, - upsert_threshold=0.0, - ) - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + backend = InMemoryLongTermMemoryBackend(embedder=fake_embedder, upsert_threshold=0.0) + tool = RememberFactTool(backend=backend, user_id=user_id) tool.is_optimized_for_agents = True tool.execute(tool.input_schema(content="User likes pizza")) @@ -80,10 +67,10 @@ def test_remember_tool_agent_optimized_reports_update(fake_embedder, user_id): # --- RecallFactsTool --- -def test_recall_tool_returns_hits(ltm, user_id): - ltm.remember(content="User likes pizza", user_id=user_id) - ltm.remember(content="User likes Python", user_id=user_id) - tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) +def test_recall_tool_returns_hits(backend, user_id): + backend.remember(content="User likes pizza", user_id=user_id) + backend.remember(content="User likes Python", user_id=user_id) + tool = RecallFactsTool(backend=backend, user_id=user_id) result = tool.execute(tool.input_schema(query="pizza", limit=2)) items = result["content"] assert len(items) == 2 @@ -98,25 +85,25 @@ def test_recall_tool_input_schema_has_no_user_id(): assert {"query", "limit"} <= set(RecallFactsTool.input_schema.model_fields) -def test_recall_tool_isolates_users(ltm, user_id, other_user_id): - ltm.remember(content="A's fact", user_id=user_id) - ltm.remember(content="B's fact", user_id=other_user_id) - tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) +def test_recall_tool_isolates_users(backend, user_id, other_user_id): + backend.remember(content="A's fact", user_id=user_id) + backend.remember(content="B's fact", user_id=other_user_id) + tool = RecallFactsTool(backend=backend, user_id=user_id) result = tool.execute(tool.input_schema(query="fact", limit=5)) contents = {item["content"] for item in result["content"]} assert contents == {"A's fact"} -def test_recall_tool_empty_store_returns_empty(ltm, user_id): - tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) +def test_recall_tool_empty_store_returns_empty(backend, user_id): + tool = RecallFactsTool(backend=backend, user_id=user_id) result = tool.execute(tool.input_schema(query="anything")) assert result["content"] == [] -def test_recall_tool_agent_optimized_returns_bullet_list(ltm, user_id): - ltm.remember(content="User likes pizza", user_id=user_id) - ltm.remember(content="User likes Python", user_id=user_id) - tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) +def test_recall_tool_agent_optimized_returns_bullet_list(backend, user_id): + backend.remember(content="User likes pizza", user_id=user_id) + backend.remember(content="User likes Python", user_id=user_id) + tool = RecallFactsTool(backend=backend, user_id=user_id) tool.is_optimized_for_agents = True result = tool.execute(tool.input_schema(query="pizza", limit=2)) assert isinstance(result["content"], str) @@ -124,8 +111,8 @@ def test_recall_tool_agent_optimized_returns_bullet_list(ltm, user_id): assert "- User likes Python" in result["content"] -def test_recall_tool_agent_optimized_empty_message(ltm, user_id): - tool = RecallFactsTool(long_term_memory=ltm, user_id=user_id) +def test_recall_tool_agent_optimized_empty_message(backend, user_id): + tool = RecallFactsTool(backend=backend, user_id=user_id) tool.is_optimized_for_agents = True result = tool.execute(tool.input_schema(query="anything")) assert result["content"] == "No relevant facts." @@ -134,33 +121,28 @@ def test_recall_tool_agent_optimized_empty_message(ltm, user_id): # --- factory --- -def test_factory_builds_default_two_tools(ltm, user_id): - tools = build_long_term_memory_tools(long_term_memory=ltm, user_id=user_id) +def test_factory_builds_default_two_tools(backend, user_id): + tools = build_long_term_memory_tools(backend=backend, user_id=user_id) assert {t.name for t in tools} == {"remember_fact", "recall_facts"} -def test_factory_respects_include(ltm, user_id): - tools = build_long_term_memory_tools( - long_term_memory=ltm, user_id=user_id, include=("recall",), - ) +def test_factory_respects_include(backend, user_id): + tools = build_long_term_memory_tools(backend=backend, user_id=user_id, include=("recall",)) assert [t.name for t in tools] == ["recall_facts"] -def test_factory_bakes_user_id_into_each_tool(ltm, user_id): - tools = build_long_term_memory_tools(long_term_memory=ltm, user_id=user_id) +def test_factory_bakes_user_id_into_each_tool(backend, user_id): + tools = build_long_term_memory_tools(backend=backend, user_id=user_id) for tool in tools: assert tool.user_id == user_id -def test_factory_ignores_unknown_include_keys(ltm, user_id): - tools = build_long_term_memory_tools( - long_term_memory=ltm, user_id=user_id, - include=("recall", "unknown", "forget"), - ) +def test_factory_ignores_unknown_include_keys(backend, user_id): + tools = build_long_term_memory_tools(backend=backend, user_id=user_id, include=("recall", "unknown", "forget")) assert [t.name for t in tools] == ["recall_facts"] -def test_factory_skips_enum_members_missing_from_builders(ltm, user_id, monkeypatch): +def test_factory_skips_enum_members_missing_from_builders(backend, user_id, monkeypatch): """Valid `MemoryToolKind` values without a `_TOOL_BUILDERS` entry must be silently skipped, not KeyError. Mirrors the unknown-string branch so the docstring's "unknown keys are ignored" promise actually holds.""" @@ -169,7 +151,7 @@ def test_factory_skips_enum_members_missing_from_builders(ltm, user_id, monkeypa monkeypatch.setattr(ltm_tools_module, "_TOOL_BUILDERS", {MemoryToolKind.RECALL: ltm_tools_module.RecallFactsTool}) tools = build_long_term_memory_tools( - long_term_memory=ltm, + backend=backend, user_id=user_id, include=(MemoryToolKind.REMEMBER, MemoryToolKind.RECALL), ) @@ -179,26 +161,25 @@ def test_factory_skips_enum_members_missing_from_builders(ltm, user_id, monkeypa # --- serialization --- -def test_remember_tool_to_dict_round_trips_long_term_memory(ltm, user_id): - """`to_dict` must not auto-dump `long_term_memory` (it holds runtime clients). +def test_remember_tool_to_dict_excludes_live_backend(backend, user_id): + """`to_dict` must not auto-dump `backend` (it holds runtime clients + embedder). The default `model_dump` would try to JSON-encode the embedder's connection and the backend's live client, blowing up tracing callbacks. The tool base - excludes the field and re-adds it via `LongTermMemory.to_dict()`. + excludes the field and re-adds it via `LongTermMemoryBackend.to_dict()`. """ - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + tool = RememberFactTool(backend=backend, user_id=user_id) data = tool.to_dict() - assert "long_term_memory" in data - ltm_dump = data["long_term_memory"] - assert isinstance(ltm_dump, dict) - assert "backend" in ltm_dump and isinstance(ltm_dump["backend"], dict) - assert "embedder" in ltm_dump and isinstance(ltm_dump["embedder"], dict) + assert "backend" in data + backend_dump = data["backend"] + assert isinstance(backend_dump, dict) + assert "embedder" in backend_dump and isinstance(backend_dump["embedder"], dict) -def test_remember_tool_to_dict_accepts_include_secure_params(ltm, user_id): - """`include_secure_params=True` must propagate through tool → LTM → backend → connection +def test_remember_tool_to_dict_accepts_include_secure_params(backend, user_id): + """`include_secure_params=True` must propagate through tool → backend → connection without raising. Connection.to_dict swallows the kwarg; backends pass it through.""" - tool = RememberFactTool(long_term_memory=ltm, user_id=user_id) + tool = RememberFactTool(backend=backend, user_id=user_id) data = tool.to_dict(include_secure_params=True) - assert "long_term_memory" in data - assert "backend" in data["long_term_memory"] + assert "backend" in data + assert "embedder" in data["backend"] diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 551919589..9b987bd28 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -7,7 +7,7 @@ from dynamiq.connections import BaseConnection from dynamiq.connections import OpenAI as OpenAIConnection -from dynamiq.memory.long_term import LongTermMemory, LongTermMemoryConfig +from dynamiq.memory.long_term import LongTermMemoryConfig from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.nodes.agents.base import Agent from dynamiq.nodes.embedders.base import TextEmbedder, TextEmbedderInputSchema @@ -34,7 +34,7 @@ def execute(self, input_data: TextEmbedderInputSchema, config=None, **kwargs): @pytest.fixture def ltm(): - return LongTermMemory(backend=InMemoryLongTermMemoryBackend(), embedder=_FakeEmbedder()) + return LongTermMemoryConfig(backend=InMemoryLongTermMemoryBackend(embedder=_FakeEmbedder())) @pytest.fixture @@ -47,12 +47,18 @@ def llm(): ) -def _make_agent(llm, *, ltm=None, ltm_config=None) -> Agent: +def _ltm_config(*, tools=None, enabled=True) -> LongTermMemoryConfig: + backend = InMemoryLongTermMemoryBackend(embedder=_FakeEmbedder()) + kwargs = {"backend": backend, "enabled": enabled} + if tools is not None: + kwargs["tools"] = tools + return LongTermMemoryConfig(**kwargs) + + +def _make_agent(llm, *, ltm=None) -> Agent: kwargs = {"name": "test", "llm": llm, "tools": []} if ltm is not None: kwargs["long_term_memory"] = ltm - if ltm_config is not None: - kwargs["long_term_memory_config"] = ltm_config return Agent(**kwargs) @@ -64,11 +70,15 @@ def _input(user_id=None, session_id=None): def test_config_default_includes_remember_and_recall(): - assert LongTermMemoryConfig().tools == ("remember", "recall") + assert _ltm_config().tools == ("remember", "recall") def test_config_can_restrict_to_read_only(): - assert LongTermMemoryConfig(tools=("recall",)).tools == ("recall",) + assert _ltm_config(tools=("recall",)).tools == ("recall",) + + +def test_config_defaults_to_enabled(): + assert _ltm_config().enabled is True def test_config_model_dump_emits_plain_strings_not_enums(): @@ -77,8 +87,8 @@ def test_config_model_dump_emits_plain_strings_not_enums(): round-trip back as the enum *name* — 'REMEMBER' — failing validation).""" import yaml - dumped = LongTermMemoryConfig().model_dump() - assert dumped == {"tools": ("remember", "recall")} + dumped = _ltm_config().model_dump(exclude={"backend"}) + assert dumped["tools"] == ("remember", "recall") assert all(isinstance(t, str) and not hasattr(t, "value") for t in dumped["tools"]) yaml.safe_dump(dumped) # must not raise @@ -86,17 +96,15 @@ def test_config_model_dump_emits_plain_strings_not_enums(): # --- Agent field declarations --- -def test_agent_has_long_term_memory_fields(): +def test_agent_has_long_term_memory_field(): fields = Agent.model_fields assert "long_term_memory" in fields - assert "long_term_memory_config" in fields assert fields["long_term_memory"].default is None def test_agent_long_term_memory_defaults_to_none(llm): agent = _make_agent(llm) assert agent.long_term_memory is None - assert agent.long_term_memory_config.tools == ("remember", "recall") # --- _build_long_term_memory_tools --- @@ -118,12 +126,17 @@ def test_build_returns_empty_when_no_long_term_memory(llm): assert agent._build_long_term_memory_tools(_input(user_id="u1")) == [] -def test_build_respects_config_include(llm, ltm): - agent = _make_agent(llm, ltm=ltm, ltm_config=LongTermMemoryConfig(tools=("recall",))) +def test_build_respects_config_include(llm): + agent = _make_agent(llm, ltm=_ltm_config(tools=("recall",))) tools = agent._build_long_term_memory_tools(_input(user_id="u1")) assert [t.name for t in tools] == ["recall_facts"] +def test_build_returns_empty_when_disabled(llm): + agent = _make_agent(llm, ltm=_ltm_config(enabled=False)) + assert agent._build_long_term_memory_tools(_input(user_id="u1")) == [] + + def test_build_bakes_user_id_into_each_tool(llm, ltm): agent = _make_agent(llm, ltm=ltm) tools = agent._build_long_term_memory_tools(_input(user_id="u1")) @@ -140,6 +153,29 @@ def test_build_sets_is_optimized_for_agents_on_each_tool(llm, ltm): assert tools and all(t.is_optimized_for_agents for t in tools) +def test_function_calling_schemas_include_ltm_overlay(llm, ltm): + """In FUNCTION_CALLING mode the per-call LTM tools must appear in the + generated tool schemas, otherwise the LLM can never call remember/recall.""" + from dynamiq.nodes.agents.agent import Agent as ReActAgent + from dynamiq.nodes.agents.base import _run_extra_tools + from dynamiq.nodes.types import InferenceMode + + agent = ReActAgent(name="t", llm=llm, tools=[], long_term_memory=ltm, inference_mode=InferenceMode.FUNCTION_CALLING) + base_tools, _ = agent._effective_inference_schemas() + base_names = {schema["function"]["name"] for schema in (base_tools or [])} + assert "remember_fact" not in base_names # not present without an overlay + + ltm_tools = agent._build_long_term_memory_tools(_input(user_id="u1")) + token = _run_extra_tools.set(ltm_tools) + try: + fc_tools, _ = agent._effective_inference_schemas() + finally: + _run_extra_tools.reset(token) + + names = {schema["function"]["name"] for schema in fc_tools} + assert {"remember_fact", "recall_facts"} <= names + + def test_init_components_initializes_ltm_embedder(llm): """The embedder is a ConnectionNode whose `text_embedder` client is built during `init_components`; without that, the first recall AttributeErrors @@ -152,7 +188,7 @@ class _RecordingEmbedder(_FakeEmbedder): def init_components(self, connection_manager=None): init_calls.append(connection_manager) - ltm_with_postponed = LongTermMemory(backend=InMemoryLongTermMemoryBackend(), embedder=_RecordingEmbedder()) + ltm_with_postponed = LongTermMemoryConfig(backend=InMemoryLongTermMemoryBackend(embedder=_RecordingEmbedder())) agent = _make_agent(llm, ltm=ltm_with_postponed) # Node.__init__ already invokes init_components on construction; clear and # assert the explicit call also propagates to the embedder. From adc7e5e5bb7e94e33995332cb35a729957fa0932 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 11:41:53 +0300 Subject: [PATCH 43/53] fix: include LTM in has_tools so XML prompt template reserves tool blocks --- dynamiq/nodes/agents/agent.py | 9 +++++- .../test_long_term_memory_integration.py | 28 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/dynamiq/nodes/agents/agent.py b/dynamiq/nodes/agents/agent.py index d7d043f35..f771fa1f0 100644 --- a/dynamiq/nodes/agents/agent.py +++ b/dynamiq/nodes/agents/agent.py @@ -1976,10 +1976,17 @@ def _init_prompt_blocks(self): response_format_schema=response_format_schema, ) + # `has_tools` decides whether the XML/ReAct template emits tool-related + # blocks. Long-term memory injects per-call tools that aren't visible at + # init time, so we must opt in here too — otherwise the template has no + # placeholder for them and they stay invisible to the LLM. + ltm_enabled = self.long_term_memory is not None and self.long_term_memory.enabled self.system_prompt_manager.build_react_prompt( ReactPromptConfig( inference_mode=self.inference_mode, - has_tools=bool(self.tools) or (self.skills.enabled and self.skills.source is not None), + has_tools=bool(self.tools) + or (self.skills.enabled and self.skills.source is not None) + or ltm_enabled, parallel_tool_calls_enabled=self.parallel_tool_calls_enabled, delegation_allowed=self.delegation_allowed, context_compaction_enabled=self.summarization_config.enabled, diff --git a/tests/unit/nodes/agents/test_long_term_memory_integration.py b/tests/unit/nodes/agents/test_long_term_memory_integration.py index 9b987bd28..9b0e7ac5a 100644 --- a/tests/unit/nodes/agents/test_long_term_memory_integration.py +++ b/tests/unit/nodes/agents/test_long_term_memory_integration.py @@ -176,6 +176,34 @@ def test_function_calling_schemas_include_ltm_overlay(llm, ltm): assert {"remember_fact", "recall_facts"} <= names +def test_xml_prompt_includes_tool_blocks_when_only_ltm_configured(llm, ltm): + """In XML/ReAct mode the system prompt template must reserve tool blocks + when LTM is the only source of tools — otherwise the per-call tool + description has no placeholder and remember/recall stay invisible.""" + from dynamiq.nodes.agents.agent import Agent as ReActAgent + from dynamiq.nodes.types import InferenceMode + + agent = ReActAgent(name="t", llm=llm, tools=[], long_term_memory=ltm, inference_mode=InferenceMode.XML) + tools_block = agent.system_prompt_manager._prompt_blocks.get("tools", "") + assert "{{ tool_description }}" in tools_block + + +def test_xml_prompt_omits_tool_blocks_when_ltm_disabled(llm): + """Disabled LTM must not flip `has_tools` on — the template should still + render the no-tools instructions when nothing else provides tools.""" + from dynamiq.nodes.agents.agent import Agent as ReActAgent + from dynamiq.nodes.types import InferenceMode + + agent = ReActAgent( + name="t", + llm=llm, + tools=[], + long_term_memory=_ltm_config(enabled=False), + inference_mode=InferenceMode.XML, + ) + assert agent.system_prompt_manager._prompt_blocks.get("tools", "") == "" + + def test_init_components_initializes_ltm_embedder(llm): """The embedder is a ConnectionNode whose `text_embedder` client is built during `init_components`; without that, the first recall AttributeErrors From de0663e56a546c1c20b87e18d262c52b92d15669 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 11:43:08 +0300 Subject: [PATCH 44/53] perf: vectorize InMemory LTM cosine search across all matched facts --- .../memory/long_term/backends/in_memory.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/dynamiq/memory/long_term/backends/in_memory.py b/dynamiq/memory/long_term/backends/in_memory.py index 61123a722..7fbd38532 100644 --- a/dynamiq/memory/long_term/backends/in_memory.py +++ b/dynamiq/memory/long_term/backends/in_memory.py @@ -57,20 +57,25 @@ def search( if not self._facts: return [] + matched_facts = [f for f in self._facts.values() if _matches_scope(f, scope)] + if not matched_facts: + return [] + + matrix = np.asarray([self._vectors[f.id] for f in matched_facts], dtype=np.float64) query = np.asarray(query_embedding, dtype=np.float64) + + # Cosine = (M @ q) / (||rows|| * ||q||); zero-norm rows fall back to 1 + # to avoid div-by-zero (the dot product is 0 anyway, so the score is 0). + row_norms = np.linalg.norm(matrix, axis=1) + row_norms[row_norms == 0] = 1.0 query_norm = np.linalg.norm(query) or 1.0 + scores = (matrix @ query) / (row_norms * query_norm) - scored: list[tuple[Fact, float]] = [] - for fact_id, fact in self._facts.items(): - if not _matches_scope(fact, scope): - continue - vec = np.asarray(self._vectors[fact_id], dtype=np.float64) - vec_norm = np.linalg.norm(vec) or 1.0 - cosine = float(np.dot(query, vec) / (query_norm * vec_norm)) - scored.append((fact, cosine)) - - scored.sort(key=lambda pair: pair[1], reverse=True) - return scored[:limit] + k = min(limit, len(matched_facts)) + # argpartition gives the top-k unsorted; sort just that slice. + top_idx = np.argpartition(-scores, k - 1)[:k] + top_idx = top_idx[np.argsort(-scores[top_idx])] + return [(matched_facts[i], float(scores[i])) for i in top_idx] def list_by_scope( self, scope: dict[str, str], limit: int = 100, From f335b14fcadaf5504833334d834a8fca367003d3 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 11:45:13 +0300 Subject: [PATCH 45/53] feat: let RecallFactsTool accept multiple queries and merge results by best score --- dynamiq/nodes/tools/long_term_memory.py | 46 ++++++++++++++++------- tests/unit/memory/long_term/test_tools.py | 33 +++++++++++++--- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index 3524e243e..32752f293 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -36,27 +36,30 @@ - {"content": "Works in EST timezone", "metadata": {"category": "context"}} """ -RECALL_DESCRIPTION = """Search the user's long-term memory for facts relevant to a query. +RECALL_DESCRIPTION = """Search the user's long-term memory for facts relevant to one or more queries. Key capabilities: - Semantic search (not keyword) — matches meaning, paraphrases, synonyms - Scoped to the current user automatically — never crosses users +- Multi-query: pass several phrasings in one call; results are merged and de-duplicated. + Because matches are sensitive to phrasing, supplying 2–4 angles per recall typically + improves recall over a single query without an extra round-trip. - Returns the most relevant facts first, as plain text Usage strategy: - Call PROACTIVELY at the start of a turn when the request hints at something personal (preferences, past decisions, biographical info, recurring context) - Call BEFORE answering questions where prior context would change your response -- If no relevant facts are found, just proceed without them — no need to retry with - different phrasings unless the user's request makes the prior context essential +- Prefer 2–4 distinct phrasings over a single query when the topic is fuzzy +- If no relevant facts are found, just proceed without them - Skip when the question is purely factual or has no user-specific component Returns: a bullet list of relevant facts (most relevant first), or "No relevant facts." Examples: -- {"query": "food preferences"} -- {"query": "what does the user do for work?", "limit": 3} -- {"query": "timezone or schedule constraints", "limit": 10} +- {"queries": ["food preferences"]} +- {"queries": ["what does the user do for work?", "user profession", "user job role"], "limit": 3} +- {"queries": ["timezone", "working hours", "schedule constraints"], "limit": 10} """ @@ -74,10 +77,17 @@ class RememberFactInputSchema(BaseModel): class RecallFactsInputSchema(BaseModel): """LLM-visible input for `recall_facts`. `user_id` is bound at construction.""" - query: str = Field(..., min_length=1, max_length=500, - description="What to search for.") + queries: list[str] = Field( + ..., + min_length=1, + max_length=5, + description=( + "One or more search phrasings. Semantic search is phrasing-sensitive, " + "so 2–4 distinct angles usually beat a single query for fuzzy topics." + ), + ) limit: int = Field(default=5, ge=1, le=20, - description="Max facts to return.") + description="Max facts to return after merging across queries.") class _LongTermMemoryTool(Node): @@ -146,11 +156,19 @@ def execute( check_cancellation(config) self.run_on_node_execute_run(config.callbacks, **kwargs) - hits = self.backend.recall( - query=input_data.query, - user_id=self.user_id, - limit=input_data.limit, - ) + # Recall once per query; merge by fact id keeping the best score so a + # paraphrase that scores higher under one phrasing isn't penalised by + # another phrasing's weaker hit. Ask each backend call for `limit` to + # let the union be re-ranked at the end. + best: dict[str, tuple[Any, float]] = {} + for query in input_data.queries: + for fact, score in self.backend.recall(query=query, user_id=self.user_id, limit=input_data.limit): + prev = best.get(fact.id) + if prev is None or score > prev[1]: + best[fact.id] = (fact, score) + + hits = sorted(best.values(), key=lambda pair: pair[1], reverse=True)[: input_data.limit] + if self.is_optimized_for_agents: if not hits: return {"content": "No relevant facts."} diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index 90a143485..8ccad5f7e 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -71,7 +71,7 @@ def test_recall_tool_returns_hits(backend, user_id): backend.remember(content="User likes pizza", user_id=user_id) backend.remember(content="User likes Python", user_id=user_id) tool = RecallFactsTool(backend=backend, user_id=user_id) - result = tool.execute(tool.input_schema(query="pizza", limit=2)) + result = tool.execute(tool.input_schema(queries=["pizza"], limit=2)) items = result["content"] assert len(items) == 2 for item in items: @@ -82,21 +82,21 @@ def test_recall_tool_returns_hits(backend, user_id): def test_recall_tool_input_schema_has_no_user_id(): assert "user_id" not in RecallFactsTool.input_schema.model_fields - assert {"query", "limit"} <= set(RecallFactsTool.input_schema.model_fields) + assert {"queries", "limit"} <= set(RecallFactsTool.input_schema.model_fields) def test_recall_tool_isolates_users(backend, user_id, other_user_id): backend.remember(content="A's fact", user_id=user_id) backend.remember(content="B's fact", user_id=other_user_id) tool = RecallFactsTool(backend=backend, user_id=user_id) - result = tool.execute(tool.input_schema(query="fact", limit=5)) + result = tool.execute(tool.input_schema(queries=["fact"], limit=5)) contents = {item["content"] for item in result["content"]} assert contents == {"A's fact"} def test_recall_tool_empty_store_returns_empty(backend, user_id): tool = RecallFactsTool(backend=backend, user_id=user_id) - result = tool.execute(tool.input_schema(query="anything")) + result = tool.execute(tool.input_schema(queries=["anything"])) assert result["content"] == [] @@ -105,7 +105,7 @@ def test_recall_tool_agent_optimized_returns_bullet_list(backend, user_id): backend.remember(content="User likes Python", user_id=user_id) tool = RecallFactsTool(backend=backend, user_id=user_id) tool.is_optimized_for_agents = True - result = tool.execute(tool.input_schema(query="pizza", limit=2)) + result = tool.execute(tool.input_schema(queries=["pizza"], limit=2)) assert isinstance(result["content"], str) assert "- User likes pizza" in result["content"] assert "- User likes Python" in result["content"] @@ -114,10 +114,31 @@ def test_recall_tool_agent_optimized_returns_bullet_list(backend, user_id): def test_recall_tool_agent_optimized_empty_message(backend, user_id): tool = RecallFactsTool(backend=backend, user_id=user_id) tool.is_optimized_for_agents = True - result = tool.execute(tool.input_schema(query="anything")) + result = tool.execute(tool.input_schema(queries=["anything"])) assert result["content"] == "No relevant facts." +def test_recall_tool_merges_multiple_queries_and_dedupes(backend, user_id): + """Multiple phrasings hitting the same fact must yield one entry, not duplicates.""" + backend.remember(content="User likes pizza", user_id=user_id) + backend.remember(content="User likes Python", user_id=user_id) + tool = RecallFactsTool(backend=backend, user_id=user_id) + result = tool.execute( + tool.input_schema(queries=["pizza", "User likes pizza", "favourite food"], limit=5) + ) + contents = [it["content"] for it in result["content"]] + assert len(contents) == len(set(contents)) + assert "User likes pizza" in contents + + +def test_recall_tool_rejects_empty_queries_list(): + """min_length=1 on `queries` must reject an empty list at schema validation.""" + import pytest as _pytest + + with _pytest.raises(Exception): + RecallFactsTool.input_schema(queries=[]) + + # --- factory --- From 602ead9e1726e44790d5f43ab5e7327cadfb9e3e Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 11:54:35 +0300 Subject: [PATCH 46/53] style: shorten include_secure_params swallow comment in BaseConnection.to_dict --- dynamiq/connections/connections.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dynamiq/connections/connections.py b/dynamiq/connections/connections.py index 1b1157670..83430b975 100644 --- a/dynamiq/connections/connections.py +++ b/dynamiq/connections/connections.py @@ -69,10 +69,8 @@ def to_dict(self, for_tracing: bool = False, **kwargs) -> dict: Returns: dict: A dictionary representation of the connection instance. """ - # Swallow `include_secure_params` if a caller forwards it down — the - # connection always serializes its credential fields (subject only to - # `for_tracing`), so the flag has no effect here but must not leak to - # `model_dump`, which raises on unknown kwargs. + # Drop forwarded `include_secure_params` — has no effect here and + # `model_dump` would reject it as an unknown kwarg. kwargs.pop("include_secure_params", None) if for_tracing: return {"id": self.id, "type": self.type} From c96fabae3f50b1fd82b8e20a8dca0d627ce305ff Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 12:19:24 +0300 Subject: [PATCH 47/53] feat: add PineconeLongTermMemoryBackend with mocked-client unit tests --- dynamiq/memory/long_term/backends/__init__.py | 8 +- dynamiq/memory/long_term/backends/pinecone.py | 205 ++++++++++++++ .../memory/long_term/test_pinecone_backend.py | 263 ++++++++++++++++++ 3 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 dynamiq/memory/long_term/backends/pinecone.py create mode 100644 tests/unit/memory/long_term/test_pinecone_backend.py diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py index 1d8eff316..ce8629f13 100644 --- a/dynamiq/memory/long_term/backends/__init__.py +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -1,5 +1,11 @@ from dynamiq.memory.long_term.backends.in_memory import InMemoryLongTermMemoryBackend from dynamiq.memory.long_term.backends.pgvector import PostgresLongTermMemoryBackend +from dynamiq.memory.long_term.backends.pinecone import PineconeLongTermMemoryBackend from dynamiq.memory.long_term.backends.qdrant import QdrantLongTermMemoryBackend -__all__ = ["InMemoryLongTermMemoryBackend", "PostgresLongTermMemoryBackend", "QdrantLongTermMemoryBackend"] +__all__ = [ + "InMemoryLongTermMemoryBackend", + "PineconeLongTermMemoryBackend", + "PostgresLongTermMemoryBackend", + "QdrantLongTermMemoryBackend", +] diff --git a/dynamiq/memory/long_term/backends/pinecone.py b/dynamiq/memory/long_term/backends/pinecone.py new file mode 100644 index 000000000..92ae7a2cc --- /dev/null +++ b/dynamiq/memory/long_term/backends/pinecone.py @@ -0,0 +1,205 @@ +import json +from datetime import datetime +from typing import TYPE_CHECKING, Any + +from pydantic import ConfigDict, Field, PrivateAttr + +from dynamiq.connections import Pinecone as PineconeConnection +from dynamiq.memory.long_term.base import LongTermMemoryBackend +from dynamiq.memory.long_term.schemas import Fact + +if TYPE_CHECKING: + from pinecone import Pinecone as PineconeClient + +# Pinecone metadata doesn't accept nested dicts; we JSON-encode the Fact's +# `metadata` into this single string field and decode on read. +_METADATA_JSON_KEY = "metadata_json" + + +def _scope_to_filter(scope: dict[str, str]) -> dict | None: + """Translate `{key: value, ...}` into a Pinecone metadata filter. + + Single-key scopes become a flat `{key: {"$eq": value}}`; multi-key scopes + are wrapped in `$and` since Pinecone treats sibling keys as implicit-AND + only when each is a leaf. + """ + if not scope: + return None + if len(scope) == 1: + (key, value), = scope.items() + return {key: {"$eq": value}} + return {"$and": [{key: {"$eq": value}} for key, value in scope.items()]} + + +def _fact_to_metadata(fact: Fact) -> dict[str, Any]: + return { + "fact_id": fact.id, + "content": fact.content, + "hash": fact.hash, + "user_id": fact.user_id, + _METADATA_JSON_KEY: json.dumps(fact.metadata or {}), + "created_at": fact.created_at.isoformat(), + "updated_at": fact.updated_at.isoformat(), + } + + +def _metadata_to_fact(meta: dict[str, Any]) -> Fact: + raw_meta = meta.get(_METADATA_JSON_KEY) or "{}" + return Fact( + id=meta["fact_id"], + content=meta["content"], + hash=meta["hash"], + user_id=meta["user_id"], + metadata=json.loads(raw_meta) if isinstance(raw_meta, str) else raw_meta, + created_at=datetime.fromisoformat(meta["created_at"]), + updated_at=datetime.fromisoformat(meta["updated_at"]), + ) + + +class PineconeLongTermMemoryBackend(LongTermMemoryBackend): + """Long-term memory backend backed by Pinecone. + + Facts are stored as Pinecone vectors keyed by the original `fact_id` (Pinecone + accepts arbitrary string ids). Fact payload lives in the vector's metadata; the + free-form `Fact.metadata` dict is JSON-encoded into a single string field to + avoid Pinecone's no-nested-dicts restriction. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + name: str = "pinecone-long-term-memory-backend" + connection: PineconeConnection = Field(default_factory=PineconeConnection) + index_name: str = "user_facts" + namespace: str = "default" + dimension: int = 1536 + # Scroll/list pagination cap. Pinecone's max top_k is 10000 per query. + _LIST_PAGE_SIZE: int = 10_000 + + _client: "PineconeClient | None" = PrivateAttr(default=None) + _index: Any = PrivateAttr(default=None) + + @property + def to_dict_exclude_params(self) -> dict[str, bool]: + return super().to_dict_exclude_params | {"_client": True, "_index": True, "connection": True} + + def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: + data = super().to_dict(include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs) + data["connection"] = self.connection.to_dict( + for_tracing=for_tracing, include_secure_params=include_secure_params, **kwargs + ) + return data + + def model_post_init(self, __context) -> None: + self._client = self.connection.connect() + self._index = self._client.Index(name=self.index_name) + + def insert(self, fact: Fact, embedding: list[float]) -> None: + self._index.upsert( + vectors=[{"id": fact.id, "values": list(embedding), "metadata": _fact_to_metadata(fact)}], + namespace=self.namespace, + ) + + def get(self, fact_id: str) -> Fact | None: + result = self._index.fetch(ids=[fact_id], namespace=self.namespace) + vectors = result.get("vectors") if isinstance(result, dict) else getattr(result, "vectors", {}) + if not vectors or fact_id not in vectors: + return None + vec = vectors[fact_id] + meta = vec["metadata"] if isinstance(vec, dict) else vec.metadata + return _metadata_to_fact(meta) + + def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: + # Pinecone's metadata-only filter goes through `query`; we send a zero + # vector since the score is irrelevant for a hash lookup. + result = self._index.query( + vector=[0.0] * self.dimension, + top_k=1, + namespace=self.namespace, + filter=_scope_to_filter({"user_id": user_id, "hash": content_hash}), + include_metadata=True, + ) + matches = result.get("matches") if isinstance(result, dict) else getattr(result, "matches", []) + if not matches: + return None + match = matches[0] + meta = match["metadata"] if isinstance(match, dict) else match.metadata + return _metadata_to_fact(meta) + + def delete(self, fact_id: str) -> None: + self._index.delete(ids=[fact_id], namespace=self.namespace) + + def update( + self, + fact_id: str, + *, + content: str, + content_hash: str, + embedding: list[float], + metadata: dict, + updated_at: datetime, + ) -> None: + existing = self.get(fact_id) + if existing is None: + return + new_fact = existing.model_copy( + update={"content": content, "hash": content_hash, "metadata": metadata, "updated_at": updated_at} + ) + # `upsert` overwrites both the vector and the metadata payload in one call. + self._index.upsert( + vectors=[{"id": fact_id, "values": list(embedding), "metadata": _fact_to_metadata(new_fact)}], + namespace=self.namespace, + ) + + def search( + self, + *, + query_embedding: list[float], + scope: dict[str, str], + limit: int, + ) -> list[tuple[Fact, float]]: + result = self._index.query( + vector=list(query_embedding), + top_k=limit, + namespace=self.namespace, + filter=_scope_to_filter(scope), + include_metadata=True, + ) + matches = result.get("matches") if isinstance(result, dict) else getattr(result, "matches", []) + out: list[tuple[Fact, float]] = [] + for match in matches: + meta = match["metadata"] if isinstance(match, dict) else match.metadata + score = match["score"] if isinstance(match, dict) else match.score + out.append((_metadata_to_fact(meta), float(score))) + return out + + def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: + # Pinecone has no "scan" primitive — the documented pattern is a query + # with a zero vector + filter. Capped at top_k=10000 (Pinecone's max). + top_k = min(max(limit, 1), self._LIST_PAGE_SIZE) + result = self._index.query( + vector=[0.0] * self.dimension, + top_k=top_k, + namespace=self.namespace, + filter=_scope_to_filter(scope), + include_metadata=True, + ) + matches = result.get("matches") if isinstance(result, dict) else getattr(result, "matches", []) + return [_metadata_to_fact(match["metadata"] if isinstance(match, dict) else match.metadata) for match in matches] + + def delete_scope(self, scope: dict[str, str]) -> int: + # Pinecone Serverless does NOT support delete-by-filter — only delete-by-id. + # To stay portable across serverless and pod, we collect ids via query + filter, + # then delete by ids. This also gives us an accurate return count. + result = self._index.query( + vector=[0.0] * self.dimension, + top_k=self._LIST_PAGE_SIZE, + namespace=self.namespace, + filter=_scope_to_filter(scope), + include_metadata=False, + ) + matches = result.get("matches") if isinstance(result, dict) else getattr(result, "matches", []) + ids = [match["id"] if isinstance(match, dict) else match.id for match in matches] + if not ids: + return 0 + self._index.delete(ids=ids, namespace=self.namespace) + return len(ids) diff --git a/tests/unit/memory/long_term/test_pinecone_backend.py b/tests/unit/memory/long_term/test_pinecone_backend.py new file mode 100644 index 000000000..089a0c5f9 --- /dev/null +++ b/tests/unit/memory/long_term/test_pinecone_backend.py @@ -0,0 +1,263 @@ +"""Tests for PineconeLongTermMemoryBackend. + +These exercise the backend's storage primitives against an in-process fake +Pinecone index that mimics the v3 client API used by the backend +(`upsert`, `fetch`, `query`, `delete`). No live Pinecone calls are made. +""" +import math +from datetime import UTC, datetime + +import pytest + +from dynamiq.connections import Pinecone as PineconeConnection +from dynamiq.memory.long_term.backends.pinecone import PineconeLongTermMemoryBackend +from dynamiq.memory.long_term.schemas import Fact + +# --- Fake Pinecone client / index ------------------------------------------ + +# Pinecone metadata filter is MongoDB-style. We support the subset the backend +# emits: `{key: {"$eq": value}}` and `{"$and": [..., ...]}`. + + +def _matches_filter(metadata: dict, flt: dict | None) -> bool: + if not flt: + return True + if "$and" in flt: + return all(_matches_filter(metadata, sub) for sub in flt["$and"]) + for key, predicate in flt.items(): + if isinstance(predicate, dict) and "$eq" in predicate: + if metadata.get(key) != predicate["$eq"]: + return False + else: + if metadata.get(key) != predicate: + return False + return True + + +def _cosine(a: list[float], b: list[float]) -> float: + dot = sum(x * y for x, y in zip(a, b)) + na = math.sqrt(sum(x * x for x in a)) or 1.0 + nb = math.sqrt(sum(x * x for x in b)) or 1.0 + return dot / (na * nb) + + +class _FakeIndex: + def __init__(self) -> None: + # namespace -> id -> {"id", "values", "metadata"} + self.store: dict[str, dict[str, dict]] = {} + + def _ns(self, namespace: str) -> dict[str, dict]: + return self.store.setdefault(namespace, {}) + + def upsert(self, vectors, namespace="default"): + ns = self._ns(namespace) + for vec in vectors: + ns[vec["id"]] = {"id": vec["id"], "values": list(vec["values"]), "metadata": dict(vec["metadata"])} + return {"upserted_count": len(vectors)} + + def fetch(self, ids, namespace="default"): + ns = self._ns(namespace) + return {"vectors": {i: ns[i] for i in ids if i in ns}} + + def delete(self, ids=None, namespace="default", filter=None): + ns = self._ns(namespace) + if ids is not None: + for i in ids: + ns.pop(i, None) + elif filter is not None: + for i, item in list(ns.items()): + if _matches_filter(item["metadata"], filter): + ns.pop(i, None) + return {} + + def query(self, vector, top_k, namespace="default", filter=None, include_metadata=True, **_): + ns = self._ns(namespace) + candidates = [item for item in ns.values() if _matches_filter(item["metadata"], filter)] + scored = [(item, _cosine(vector, item["values"])) for item in candidates] + scored.sort(key=lambda pair: pair[1], reverse=True) + matches = [] + for item, score in scored[:top_k]: + entry = {"id": item["id"], "score": score} + if include_metadata: + entry["metadata"] = item["metadata"] + matches.append(entry) + return {"matches": matches} + + +class _FakeClient: + def __init__(self) -> None: + self.indexes: dict[str, _FakeIndex] = {} + + def Index(self, name): # noqa: N802 — mirrors Pinecone client API + return self.indexes.setdefault(name, _FakeIndex()) + + +# --- Fixtures --------------------------------------------------------------- + + +@pytest.fixture +def fake_pinecone_client(monkeypatch): + client = _FakeClient() + monkeypatch.setattr(PineconeConnection, "connect", lambda self: client) + return client + + +@pytest.fixture +def backend(fake_embedder, fake_pinecone_client): + return PineconeLongTermMemoryBackend( + connection=PineconeConnection(api_key="test-key"), + embedder=fake_embedder, + index_name="user_facts", + namespace="test", + dimension=fake_embedder.DIM, + ) + + +def _fact(fact_id: str, user_id: str, content: str, content_hash: str | None = None) -> Fact: + now = datetime.now(UTC) + return Fact( + id=fact_id, + content=content, + hash=content_hash or f"h-{fact_id}", + user_id=user_id, + metadata={}, + created_at=now, + updated_at=now, + ) + + +# --- insert / get / get_by_hash -------------------------------------------- + + +def test_pinecone_insert_then_get(backend, fake_embedder): + fact = _fact("f1", "u1", "hello") + backend.insert(fact, fake_embedder.embed("hello")) + fetched = backend.get("f1") + assert fetched is not None and fetched.id == "f1" and fetched.content == "hello" + + +def test_pinecone_get_unknown_returns_none(backend): + assert backend.get("does-not-exist") is None + + +def test_pinecone_get_by_hash(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + found = backend.get_by_hash(user_id="u1", content_hash="h-shared") + assert found is not None and found.id == "f1" + + +def test_pinecone_get_by_hash_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + assert backend.get_by_hash(user_id="u2", content_hash="h-shared") is None + + +def test_pinecone_metadata_round_trip(backend, fake_embedder): + """Free-form metadata must survive Pinecone's flat-schema constraint via JSON encoding.""" + fact = _fact("f1", "u1", "x").model_copy( + update={"metadata": {"category": "preference", "score": 0.8}} + ) + backend.insert(fact, fake_embedder.embed("x")) + fetched = backend.get("f1") + assert fetched.metadata == {"category": "preference", "score": 0.8} + + +# --- delete / list_by_scope / delete_scope --------------------------------- + + +def test_pinecone_delete(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x"), fake_embedder.embed("x")) + backend.delete("f1") + assert backend.get("f1") is None + + +def test_pinecone_list_by_scope(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + listed = backend.list_by_scope({"user_id": "u1"}) + assert {f.id for f in listed} == {"f1", "f2"} + + +def test_pinecone_delete_scope_returns_accurate_count(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + assert backend.delete_scope({"user_id": "u1"}) == 2 + assert backend.list_by_scope({"user_id": "u1"}) == [] + assert len(backend.list_by_scope({"user_id": "u2"})) == 1 + + +def test_pinecone_delete_scope_empty_returns_zero(backend): + assert backend.delete_scope({"user_id": "nobody"}) == 0 + + +# --- search ---------------------------------------------------------------- + + +def test_pinecone_search_relevance_ordered(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u1", "alpha-2"), fake_embedder.embed("alpha-2")) + backend.insert(_fact("f3", "u1", "zulu"), fake_embedder.embed("zulu")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=3, + ) + assert hits[0][0].id == "f1" + scores = [score for _, score in hits] + assert scores == sorted(scores, reverse=True) + + +def test_pinecone_search_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u2", "alpha"), fake_embedder.embed("alpha")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=5, + ) + assert [fact.id for fact, _ in hits] == ["f1"] + + +def test_pinecone_search_empty_returns_empty(backend, fake_embedder): + hits = backend.search( + query_embedding=fake_embedder.embed("x"), + scope={"user_id": "u1"}, + limit=5, + ) + assert hits == [] + + +# --- high-level operations via Template Method ------------------------------ + + +def test_pinecone_remember_and_recall_through_backend(backend, fake_embedder): + """End-to-end remember/recall must work through the backend's high-level API, + confirming the storage primitives are wired correctly.""" + backend.remember(content="User likes pizza", user_id="u1") + backend.remember(content="User likes Python", user_id="u1") + hits = backend.recall(query="pizza preferences", user_id="u1", limit=5) + contents = {fact.content for fact, _ in hits} + assert {"User likes pizza", "User likes Python"} <= contents + + +# --- serialization --------------------------------------------------------- + + +def test_pinecone_to_dict_excludes_live_clients_and_includes_connection(backend): + """`to_dict` must drop the runtime client/index but emit connection + embedder + so the YAML round-trip rebuilds an equivalent backend.""" + data = backend.to_dict() + assert "_client" not in data and "_index" not in data + assert isinstance(data["connection"], dict) + assert isinstance(data["embedder"], dict) + # Persistent backend identity must survive serialization. + assert data["index_name"] == "user_facts" + assert data["namespace"] == "test" + + +def test_pinecone_to_dict_accepts_include_secure_params(backend): + """`include_secure_params=True` must propagate through backend → connection + without raising.""" + data = backend.to_dict(include_secure_params=True) + assert "connection" in data and "embedder" in data From a0e4e67416ee8cc9d12a9c9c4b6f9ceb3b046d31 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 12:25:51 +0300 Subject: [PATCH 48/53] feat: add WeaviateLongTermMemoryBackend with mocked-client unit tests --- dynamiq/memory/long_term/backends/__init__.py | 2 + dynamiq/memory/long_term/backends/weaviate.py | 239 +++++++++++++ .../memory/long_term/test_weaviate_backend.py | 323 ++++++++++++++++++ 3 files changed, 564 insertions(+) create mode 100644 dynamiq/memory/long_term/backends/weaviate.py create mode 100644 tests/unit/memory/long_term/test_weaviate_backend.py diff --git a/dynamiq/memory/long_term/backends/__init__.py b/dynamiq/memory/long_term/backends/__init__.py index ce8629f13..cdde9417e 100644 --- a/dynamiq/memory/long_term/backends/__init__.py +++ b/dynamiq/memory/long_term/backends/__init__.py @@ -2,10 +2,12 @@ from dynamiq.memory.long_term.backends.pgvector import PostgresLongTermMemoryBackend from dynamiq.memory.long_term.backends.pinecone import PineconeLongTermMemoryBackend from dynamiq.memory.long_term.backends.qdrant import QdrantLongTermMemoryBackend +from dynamiq.memory.long_term.backends.weaviate import WeaviateLongTermMemoryBackend __all__ = [ "InMemoryLongTermMemoryBackend", "PineconeLongTermMemoryBackend", "PostgresLongTermMemoryBackend", "QdrantLongTermMemoryBackend", + "WeaviateLongTermMemoryBackend", ] diff --git a/dynamiq/memory/long_term/backends/weaviate.py b/dynamiq/memory/long_term/backends/weaviate.py new file mode 100644 index 000000000..b38ef7017 --- /dev/null +++ b/dynamiq/memory/long_term/backends/weaviate.py @@ -0,0 +1,239 @@ +import json +import uuid +from datetime import datetime +from typing import TYPE_CHECKING, Any + +from pydantic import ConfigDict, Field, PrivateAttr + +from dynamiq.connections import Weaviate as WeaviateConnection +from dynamiq.memory.long_term.base import LongTermMemoryBackend +from dynamiq.memory.long_term.schemas import Fact + +if TYPE_CHECKING: + from weaviate import WeaviateClient + +# Weaviate properties are strictly typed and rejects nested objects, so we +# JSON-encode the Fact's `metadata` dict into a single TEXT property. +_METADATA_JSON_KEY = "metadata_json" +# Deterministic namespace so two backends pointing at the same collection +# resolve a given `fact_id` to the same UUID — required for delete/update +# round-trips when the original fact_id is not itself a UUID. +_UUID_NAMESPACE = uuid.UUID("00000000-0000-0000-0000-000000000000") + + +def _to_weaviate_uuid(fact_id: str) -> str: + return str(uuid.uuid5(_UUID_NAMESPACE, fact_id)) + + +def _fact_to_properties(fact: Fact) -> dict[str, Any]: + return { + "fact_id": fact.id, + "content": fact.content, + "hash": fact.hash, + "user_id": fact.user_id, + _METADATA_JSON_KEY: json.dumps(fact.metadata or {}), + "created_at": fact.created_at.isoformat(), + "updated_at": fact.updated_at.isoformat(), + } + + +def _properties_to_fact(props: dict[str, Any]) -> Fact: + raw_meta = props.get(_METADATA_JSON_KEY) or "{}" + return Fact( + id=props["fact_id"], + content=props["content"], + hash=props["hash"], + user_id=props["user_id"], + metadata=json.loads(raw_meta) if isinstance(raw_meta, str) else raw_meta, + created_at=datetime.fromisoformat(props["created_at"]), + updated_at=datetime.fromisoformat(props["updated_at"]), + ) + + +def _scope_to_filter(scope: dict[str, str]): + """Translate `{key: value, ...}` to a weaviate v4 `Filter` expression, AND-ing + multiple keys. Imported lazily so the module load doesn't require weaviate.""" + if not scope: + return None + from weaviate.classes.query import Filter + + items = list(scope.items()) + expr = Filter.by_property(items[0][0]).equal(items[0][1]) + for key, value in items[1:]: + expr = expr & Filter.by_property(key).equal(value) + return expr + + +class WeaviateLongTermMemoryBackend(LongTermMemoryBackend): + """Long-term memory backend backed by Weaviate (client v4). + + Each fact is one Weaviate object whose UUID is derived deterministically + from the original `fact_id` (UUID5 over a fixed namespace) so id-based + operations round-trip cleanly. Free-form `Fact.metadata` is JSON-encoded + into a single TEXT property to dodge Weaviate's strict-schema constraint. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + name: str = "weaviate-long-term-memory-backend" + connection: WeaviateConnection = Field(default_factory=WeaviateConnection) + collection_name: str = "UserFacts" + dimension: int = 1536 + + _client: "WeaviateClient | None" = PrivateAttr(default=None) + _collection: Any = PrivateAttr(default=None) + + @property + def to_dict_exclude_params(self) -> dict[str, bool]: + return super().to_dict_exclude_params | {"_client": True, "_collection": True, "connection": True} + + def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: + data = super().to_dict(include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs) + data["connection"] = self.connection.to_dict( + for_tracing=for_tracing, include_secure_params=include_secure_params, **kwargs + ) + return data + + def model_post_init(self, __context) -> None: + self._client = self.connection.connect() + self._collection = self._client.collections.get(self.collection_name) + + def ensure_collection(self) -> None: + """Create the facts collection if absent. Safe to call repeatedly.""" + from weaviate.classes.config import Configure, DataType, Property + + if self._client.collections.exists(self.collection_name): + return + self._client.collections.create( + name=self.collection_name, + vectorizer_config=Configure.Vectorizer.none(), + vector_index_config=Configure.VectorIndex.hnsw( + distance_metric=Configure.VectorDistances.COSINE, + ), + properties=[ + Property(name="fact_id", data_type=DataType.TEXT), + Property(name="content", data_type=DataType.TEXT), + Property(name="hash", data_type=DataType.TEXT), + Property(name="user_id", data_type=DataType.TEXT), + Property(name=_METADATA_JSON_KEY, data_type=DataType.TEXT), + Property(name="created_at", data_type=DataType.TEXT), + Property(name="updated_at", data_type=DataType.TEXT), + ], + ) + self._collection = self._client.collections.get(self.collection_name) + + def recreate_collection(self) -> None: + """Drop and re-create the facts collection. Test-only helper.""" + if self._client.collections.exists(self.collection_name): + self._client.collections.delete(self.collection_name) + self.ensure_collection() + + def drop_collection(self) -> None: + """Drop the facts collection if it exists. Test-only helper.""" + if self._client.collections.exists(self.collection_name): + self._client.collections.delete(self.collection_name) + + def insert(self, fact: Fact, embedding: list[float]) -> None: + self._collection.data.insert( + uuid=_to_weaviate_uuid(fact.id), + properties=_fact_to_properties(fact), + vector=list(embedding), + ) + + def get(self, fact_id: str) -> Fact | None: + obj = self._collection.query.fetch_object_by_id(uuid=_to_weaviate_uuid(fact_id)) + if obj is None: + return None + return _properties_to_fact(obj.properties) + + def get_by_hash(self, *, user_id: str, content_hash: str) -> Fact | None: + result = self._collection.query.fetch_objects( + filters=_scope_to_filter({"user_id": user_id, "hash": content_hash}), + limit=1, + ) + objects = getattr(result, "objects", []) or [] + if not objects: + return None + return _properties_to_fact(objects[0].properties) + + def delete(self, fact_id: str) -> None: + self._collection.data.delete_by_id(uuid=_to_weaviate_uuid(fact_id)) + + def update( + self, + fact_id: str, + *, + content: str, + content_hash: str, + embedding: list[float], + metadata: dict, + updated_at: datetime, + ) -> None: + existing = self.get(fact_id) + if existing is None: + return + new_fact = existing.model_copy( + update={"content": content, "hash": content_hash, "metadata": metadata, "updated_at": updated_at} + ) + # `replace` overwrites properties + vector while preserving the uuid. + self._collection.data.replace( + uuid=_to_weaviate_uuid(fact_id), + properties=_fact_to_properties(new_fact), + vector=list(embedding), + ) + + def search( + self, + *, + query_embedding: list[float], + scope: dict[str, str], + limit: int, + ) -> list[tuple[Fact, float]]: + from weaviate.classes.query import MetadataQuery + + result = self._collection.query.near_vector( + near_vector=list(query_embedding), + limit=limit, + filters=_scope_to_filter(scope), + return_metadata=MetadataQuery(distance=True), + ) + objects = getattr(result, "objects", []) or [] + out: list[tuple[Fact, float]] = [] + for obj in objects: + distance = getattr(obj.metadata, "distance", None) if obj.metadata is not None else None + # Weaviate returns cosine *distance* in [0, 2]; convert to similarity + # so callers see the same shape as Qdrant/Pinecone (`1.0 = best`). + score = 1.0 - float(distance) if distance is not None else 0.0 + out.append((_properties_to_fact(obj.properties), score)) + return out + + def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: + result = self._collection.query.fetch_objects( + filters=_scope_to_filter(scope), + limit=limit, + ) + objects = getattr(result, "objects", []) or [] + return [_properties_to_fact(obj.properties) for obj in objects] + + def delete_scope(self, scope: dict[str, str]) -> int: + # Weaviate has a native delete-by-filter, but it returns no count. + # Count first via the same filter, then delete — same trade-off as + # Qdrant: an extra round-trip for an accurate return value. + flt = _scope_to_filter(scope) + if flt is None: + # Empty scope = match everything — match the Qdrant contract. + result = self._collection.query.fetch_objects(limit=10_000) + objects = getattr(result, "objects", []) or [] + count = len(objects) + if count == 0: + return 0 + from weaviate.classes.query import Filter + + self._collection.data.delete_many(where=Filter.by_id().contains_any([str(o.uuid) for o in objects])) + return count + + listed = self.list_by_scope(scope, limit=10_000) + if not listed: + return 0 + self._collection.data.delete_many(where=flt) + return len(listed) diff --git a/tests/unit/memory/long_term/test_weaviate_backend.py b/tests/unit/memory/long_term/test_weaviate_backend.py new file mode 100644 index 000000000..5627888e7 --- /dev/null +++ b/tests/unit/memory/long_term/test_weaviate_backend.py @@ -0,0 +1,323 @@ +"""Tests for WeaviateLongTermMemoryBackend. + +These exercise the backend's storage primitives against an in-process fake +Weaviate v4 collection that mimics the subset of the API the backend uses. +No live Weaviate calls are made. +""" +import math +from datetime import UTC, datetime +from types import SimpleNamespace + +import pytest + +from dynamiq.connections import Weaviate as WeaviateConnection +from dynamiq.memory.long_term.backends.weaviate import ( + WeaviateLongTermMemoryBackend, + _to_weaviate_uuid, +) +from dynamiq.memory.long_term.schemas import Fact + +# --- Fake Weaviate client / collection ------------------------------------- + + +def _cosine_distance(a: list[float], b: list[float]) -> float: + dot = sum(x * y for x, y in zip(a, b)) + na = math.sqrt(sum(x * x for x in a)) or 1.0 + nb = math.sqrt(sum(x * x for x in b)) or 1.0 + return 1.0 - dot / (na * nb) + + +class _FakeData: + def __init__(self, store: dict) -> None: + self.store = store + + def insert(self, *, uuid, properties, vector): + self.store[uuid] = {"uuid": uuid, "properties": dict(properties), "vector": list(vector)} + + def replace(self, *, uuid, properties, vector): + self.store[uuid] = {"uuid": uuid, "properties": dict(properties), "vector": list(vector)} + + def delete_by_id(self, *, uuid): + self.store.pop(uuid, None) + + def delete_many(self, *, where): + for uid, item in list(self.store.items()): + if where.matches(item): + self.store.pop(uid, None) + + +class _FakeQuery: + def __init__(self, store: dict) -> None: + self.store = store + + def fetch_object_by_id(self, *, uuid): + item = self.store.get(uuid) + if item is None: + return None + return SimpleNamespace(uuid=uuid, properties=item["properties"], metadata=None) + + def fetch_objects(self, *, filters=None, limit=10): + candidates = [ + SimpleNamespace(uuid=uid, properties=item["properties"], metadata=None) + for uid, item in self.store.items() + if filters is None or filters.matches(item) + ] + return SimpleNamespace(objects=candidates[:limit]) + + def near_vector(self, *, near_vector, limit=10, filters=None, return_metadata=None): + candidates = [ + (uid, item, _cosine_distance(near_vector, item["vector"])) + for uid, item in self.store.items() + if filters is None or filters.matches(item) + ] + candidates.sort(key=lambda t: t[2]) + return SimpleNamespace( + objects=[ + SimpleNamespace( + uuid=uid, + properties=item["properties"], + metadata=SimpleNamespace(distance=dist), + ) + for uid, item, dist in candidates[:limit] + ] + ) + + +class _FakeCollection: + def __init__(self) -> None: + self.store: dict[str, dict] = {} + self.data = _FakeData(self.store) + self.query = _FakeQuery(self.store) + + +class _FakeCollections: + def __init__(self) -> None: + self.collections: dict[str, _FakeCollection] = {} + + def get(self, name): + return self.collections.setdefault(name, _FakeCollection()) + + def exists(self, name): + return name in self.collections + + def create(self, *, name, **_): + self.collections.setdefault(name, _FakeCollection()) + + def delete(self, name): + self.collections.pop(name, None) + + +class _FakeClient: + def __init__(self) -> None: + self.collections = _FakeCollections() + + +# We bypass the real weaviate `Filter` objects entirely — the backend's +# `_scope_to_filter` builds them via `Filter.by_property(...).equal(...)`, +# which calls into the weaviate library. For mock tests we monkeypatch the +# scope-to-filter helper to return a callable predicate the fakes can evaluate. + + +class _PredicateFilter: + def __init__(self, predicate) -> None: + self._predicate = predicate + + def matches(self, item) -> bool: + return self._predicate(item) + + def __and__(self, other): + return _PredicateFilter(lambda item: self._predicate(item) and other._predicate(item)) + + +def _fake_scope_to_filter(scope: dict): + if not scope: + return None + return _PredicateFilter(lambda item: all(item["properties"].get(k) == v for k, v in scope.items())) + + +# --- Fixtures --------------------------------------------------------------- + + +@pytest.fixture +def fake_weaviate_client(monkeypatch): + client = _FakeClient() + monkeypatch.setattr(WeaviateConnection, "connect", lambda self: client) + # Swap in a fake scope_to_filter so the backend uses our predicate fakes + # instead of real weaviate Filter objects (which the fake store can't evaluate). + import dynamiq.memory.long_term.backends.weaviate as weaviate_backend + + monkeypatch.setattr(weaviate_backend, "_scope_to_filter", _fake_scope_to_filter) + return client + + +@pytest.fixture +def backend(fake_embedder, fake_weaviate_client): + backend = WeaviateLongTermMemoryBackend( + connection=WeaviateConnection(api_key="test-key", url="http://localhost"), + embedder=fake_embedder, + collection_name="UserFacts", + dimension=fake_embedder.DIM, + ) + # The real backend's `model_post_init` already called `collections.get`, + # which our fake auto-creates on access — so the collection is ready. + return backend + + +def _fact(fact_id: str, user_id: str, content: str, content_hash: str | None = None) -> Fact: + now = datetime.now(UTC) + return Fact( + id=fact_id, + content=content, + hash=content_hash or f"h-{fact_id}", + user_id=user_id, + metadata={}, + created_at=now, + updated_at=now, + ) + + +# --- insert / get / get_by_hash -------------------------------------------- + + +def test_weaviate_insert_then_get(backend, fake_embedder): + fact = _fact("f1", "u1", "hello") + backend.insert(fact, fake_embedder.embed("hello")) + fetched = backend.get("f1") + assert fetched is not None and fetched.id == "f1" and fetched.content == "hello" + + +def test_weaviate_get_unknown_returns_none(backend): + assert backend.get("does-not-exist") is None + + +def test_weaviate_get_by_hash(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + found = backend.get_by_hash(user_id="u1", content_hash="h-shared") + assert found is not None and found.id == "f1" + + +def test_weaviate_get_by_hash_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x", "h-shared"), fake_embedder.embed("x")) + assert backend.get_by_hash(user_id="u2", content_hash="h-shared") is None + + +def test_weaviate_metadata_round_trip(backend, fake_embedder): + """Free-form metadata must round-trip through the JSON-encoded property.""" + fact = _fact("f1", "u1", "x").model_copy(update={"metadata": {"category": "preference", "score": 0.8}}) + backend.insert(fact, fake_embedder.embed("x")) + assert backend.get("f1").metadata == {"category": "preference", "score": 0.8} + + +def test_weaviate_fact_id_maps_to_deterministic_uuid(): + """Two backends must resolve the same fact_id to the same UUID — so a fact + inserted by one process can be deleted by another via the original id.""" + assert _to_weaviate_uuid("fact-1") == _to_weaviate_uuid("fact-1") + assert _to_weaviate_uuid("fact-1") != _to_weaviate_uuid("fact-2") + + +# --- delete / list_by_scope / delete_scope --------------------------------- + + +def test_weaviate_delete(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "x"), fake_embedder.embed("x")) + backend.delete("f1") + assert backend.get("f1") is None + + +def test_weaviate_update_replaces_content_and_vector(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "old"), fake_embedder.embed("old")) + backend.update( + "f1", + content="new", + content_hash="h-new", + embedding=fake_embedder.embed("new"), + metadata={"k": "v"}, + updated_at=datetime.now(UTC), + ) + fetched = backend.get("f1") + assert fetched.content == "new" and fetched.hash == "h-new" and fetched.metadata == {"k": "v"} + + +def test_weaviate_list_by_scope(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + listed = backend.list_by_scope({"user_id": "u1"}) + assert {f.id for f in listed} == {"f1", "f2"} + + +def test_weaviate_delete_scope_returns_accurate_count(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "a"), fake_embedder.embed("a")) + backend.insert(_fact("f2", "u1", "b"), fake_embedder.embed("b")) + backend.insert(_fact("f3", "u2", "c"), fake_embedder.embed("c")) + assert backend.delete_scope({"user_id": "u1"}) == 2 + assert backend.list_by_scope({"user_id": "u1"}) == [] + assert len(backend.list_by_scope({"user_id": "u2"})) == 1 + + +def test_weaviate_delete_scope_empty_returns_zero(backend): + assert backend.delete_scope({"user_id": "nobody"}) == 0 + + +# --- search ---------------------------------------------------------------- + + +def test_weaviate_search_relevance_ordered(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u1", "alpha-2"), fake_embedder.embed("alpha-2")) + backend.insert(_fact("f3", "u1", "zulu"), fake_embedder.embed("zulu")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=3, + ) + assert hits[0][0].id == "f1" + scores = [score for _, score in hits] + assert scores == sorted(scores, reverse=True) + + +def test_weaviate_search_isolates_users(backend, fake_embedder): + backend.insert(_fact("f1", "u1", "alpha"), fake_embedder.embed("alpha")) + backend.insert(_fact("f2", "u2", "alpha"), fake_embedder.embed("alpha")) + hits = backend.search( + query_embedding=fake_embedder.embed("alpha"), + scope={"user_id": "u1"}, + limit=5, + ) + assert [fact.id for fact, _ in hits] == ["f1"] + + +def test_weaviate_search_empty_returns_empty(backend, fake_embedder): + hits = backend.search( + query_embedding=fake_embedder.embed("x"), + scope={"user_id": "u1"}, + limit=5, + ) + assert hits == [] + + +# --- high-level operations via Template Method ------------------------------ + + +def test_weaviate_remember_and_recall_through_backend(backend): + backend.remember(content="User likes pizza", user_id="u1") + backend.remember(content="User likes Python", user_id="u1") + hits = backend.recall(query="pizza preferences", user_id="u1", limit=5) + contents = {fact.content for fact, _ in hits} + assert {"User likes pizza", "User likes Python"} <= contents + + +# --- serialization --------------------------------------------------------- + + +def test_weaviate_to_dict_excludes_live_clients_and_includes_connection(backend): + data = backend.to_dict() + assert "_client" not in data and "_collection" not in data + assert isinstance(data["connection"], dict) + assert isinstance(data["embedder"], dict) + assert data["collection_name"] == "UserFacts" + + +def test_weaviate_to_dict_accepts_include_secure_params(backend): + data = backend.to_dict(include_secure_params=True) + assert "connection" in data and "embedder" in data From ed79e77f2621b57ab238f5a2374f3f5f3fea201c Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 14:28:41 +0300 Subject: [PATCH 49/53] fix: paginate Pinecone LTM delete_scope so users with >10k facts are fully cleared --- dynamiq/memory/long_term/backends/pinecone.py | 35 +++++++++++-------- .../memory/long_term/test_pinecone_backend.py | 10 ++++++ 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/dynamiq/memory/long_term/backends/pinecone.py b/dynamiq/memory/long_term/backends/pinecone.py index 92ae7a2cc..940589793 100644 --- a/dynamiq/memory/long_term/backends/pinecone.py +++ b/dynamiq/memory/long_term/backends/pinecone.py @@ -188,18 +188,23 @@ def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: def delete_scope(self, scope: dict[str, str]) -> int: # Pinecone Serverless does NOT support delete-by-filter — only delete-by-id. - # To stay portable across serverless and pod, we collect ids via query + filter, - # then delete by ids. This also gives us an accurate return count. - result = self._index.query( - vector=[0.0] * self.dimension, - top_k=self._LIST_PAGE_SIZE, - namespace=self.namespace, - filter=_scope_to_filter(scope), - include_metadata=False, - ) - matches = result.get("matches") if isinstance(result, dict) else getattr(result, "matches", []) - ids = [match["id"] if isinstance(match, dict) else match.id for match in matches] - if not ids: - return 0 - self._index.delete(ids=ids, namespace=self.namespace) - return len(ids) + # And Pinecone's query API has no cursor, so we loop: query → delete the + # matched ids → query again until the page comes back empty. Without the + # loop, scopes with >`_LIST_PAGE_SIZE` facts (10k) would silently leak. + total = 0 + flt = _scope_to_filter(scope) + while True: + result = self._index.query( + vector=[0.0] * self.dimension, + top_k=self._LIST_PAGE_SIZE, + namespace=self.namespace, + filter=flt, + include_metadata=False, + ) + matches = result.get("matches") if isinstance(result, dict) else getattr(result, "matches", []) + ids = [match["id"] if isinstance(match, dict) else match.id for match in matches] + if not ids: + break + self._index.delete(ids=ids, namespace=self.namespace) + total += len(ids) + return total diff --git a/tests/unit/memory/long_term/test_pinecone_backend.py b/tests/unit/memory/long_term/test_pinecone_backend.py index 089a0c5f9..36f51797c 100644 --- a/tests/unit/memory/long_term/test_pinecone_backend.py +++ b/tests/unit/memory/long_term/test_pinecone_backend.py @@ -191,6 +191,16 @@ def test_pinecone_delete_scope_empty_returns_zero(backend): assert backend.delete_scope({"user_id": "nobody"}) == 0 +def test_pinecone_delete_scope_paginates_beyond_single_page(backend, fake_embedder, monkeypatch): + """clear_user on users with more facts than fit in one query page must still + delete everything and report the true count — not silently cap at one page.""" + monkeypatch.setattr(backend, "_LIST_PAGE_SIZE", 2) + for i in range(5): + backend.insert(_fact(f"f{i}", "u1", f"c{i}"), fake_embedder.embed(f"c{i}")) + assert backend.delete_scope({"user_id": "u1"}) == 5 + assert backend.list_by_scope({"user_id": "u1"}) == [] + + # --- search ---------------------------------------------------------------- From cc1bf0e6daec6d8a1d876fc65820ec0d2ceb4ad4 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 14:31:25 +0300 Subject: [PATCH 50/53] fix: defer Weaviate collection lookup so construction works on a fresh instance --- dynamiq/memory/long_term/backends/weaviate.py | 15 +++++++--- .../memory/long_term/test_weaviate_backend.py | 29 +++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/dynamiq/memory/long_term/backends/weaviate.py b/dynamiq/memory/long_term/backends/weaviate.py index b38ef7017..daa55fda3 100644 --- a/dynamiq/memory/long_term/backends/weaviate.py +++ b/dynamiq/memory/long_term/backends/weaviate.py @@ -81,11 +81,10 @@ class WeaviateLongTermMemoryBackend(LongTermMemoryBackend): dimension: int = 1536 _client: "WeaviateClient | None" = PrivateAttr(default=None) - _collection: Any = PrivateAttr(default=None) @property def to_dict_exclude_params(self) -> dict[str, bool]: - return super().to_dict_exclude_params | {"_client": True, "_collection": True, "connection": True} + return super().to_dict_exclude_params | {"_client": True, "connection": True} def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False, **kwargs) -> dict[str, Any]: data = super().to_dict(include_secure_params=include_secure_params, for_tracing=for_tracing, **kwargs) @@ -95,8 +94,17 @@ def to_dict(self, include_secure_params: bool = False, for_tracing: bool = False return data def model_post_init(self, __context) -> None: + # Only resolve the client here; the collection proxy is fetched lazily + # so backend construction does not depend on the collection already + # existing — callers can construct, call ensure_collection(), then use. self._client = self.connection.connect() - self._collection = self._client.collections.get(self.collection_name) + + @property + def _collection(self): + """Lazy collection proxy. Re-fetched per access — the call is local to + the weaviate client (no network) and avoids stale state if the + collection is dropped/recreated between operations.""" + return self._client.collections.get(self.collection_name) def ensure_collection(self) -> None: """Create the facts collection if absent. Safe to call repeatedly.""" @@ -120,7 +128,6 @@ def ensure_collection(self) -> None: Property(name="updated_at", data_type=DataType.TEXT), ], ) - self._collection = self._client.collections.get(self.collection_name) def recreate_collection(self) -> None: """Drop and re-create the facts collection. Test-only helper.""" diff --git a/tests/unit/memory/long_term/test_weaviate_backend.py b/tests/unit/memory/long_term/test_weaviate_backend.py index 5627888e7..396e3abf8 100644 --- a/tests/unit/memory/long_term/test_weaviate_backend.py +++ b/tests/unit/memory/long_term/test_weaviate_backend.py @@ -208,6 +208,35 @@ def test_weaviate_metadata_round_trip(backend, fake_embedder): assert backend.get("f1").metadata == {"category": "preference", "score": 0.8} +def test_weaviate_construction_does_not_touch_collection(fake_embedder, monkeypatch): + """A fresh backend must construct cleanly without resolving the collection — + that lookup is deferred to first use so `ensure_collection()` can run after.""" + + class _StrictCollections: + def __init__(self) -> None: + self.get_called_with: list = [] + + def get(self, name): + self.get_called_with.append(name) + return _FakeCollection() + + class _StrictClient: + def __init__(self) -> None: + self.collections = _StrictCollections() + + client = _StrictClient() + monkeypatch.setattr(WeaviateConnection, "connect", lambda self: client) + backend = WeaviateLongTermMemoryBackend( + connection=WeaviateConnection(api_key="k", url="http://localhost"), + embedder=fake_embedder, + collection_name="UserFacts", + dimension=fake_embedder.DIM, + ) + assert client.collections.get_called_with == [] # not yet resolved + _ = backend._collection # first access resolves + assert client.collections.get_called_with == ["UserFacts"] + + def test_weaviate_fact_id_maps_to_deterministic_uuid(): """Two backends must resolve the same fact_id to the same UUID — so a fact inserted by one process can be deleted by another via the original id.""" From a1f52e1c5a68b3c7e31ee887bd9e6c03a0fd9171 Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 14:36:10 +0300 Subject: [PATCH 51/53] fix: paginate Weaviate LTM delete_scope so empty and scoped wipes are unbounded --- dynamiq/memory/long_term/backends/weaviate.py | 47 +++++++++++-------- .../memory/long_term/test_weaviate_backend.py | 24 ++++++++++ 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/dynamiq/memory/long_term/backends/weaviate.py b/dynamiq/memory/long_term/backends/weaviate.py index daa55fda3..50383ae07 100644 --- a/dynamiq/memory/long_term/backends/weaviate.py +++ b/dynamiq/memory/long_term/backends/weaviate.py @@ -64,6 +64,13 @@ def _scope_to_filter(scope: dict[str, str]): return expr +def _id_in_filter(uuids: list[str]): + """`Filter.by_id().contains_any(...)` factored out so tests can stub it.""" + from weaviate.classes.query import Filter + + return Filter.by_id().contains_any(uuids) + + class WeaviateLongTermMemoryBackend(LongTermMemoryBackend): """Long-term memory backend backed by Weaviate (client v4). @@ -79,6 +86,9 @@ class WeaviateLongTermMemoryBackend(LongTermMemoryBackend): connection: WeaviateConnection = Field(default_factory=WeaviateConnection) collection_name: str = "UserFacts" dimension: int = 1536 + # Page size for scoped scans (list/delete). Capped at Weaviate's default + # `QUERY_MAXIMUM_RESULTS` so a single fetch never exceeds server limits. + _SCOPE_PAGE_SIZE: int = 10_000 _client: "WeaviateClient | None" = PrivateAttr(default=None) @@ -223,24 +233,23 @@ def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: return [_properties_to_fact(obj.properties) for obj in objects] def delete_scope(self, scope: dict[str, str]) -> int: - # Weaviate has a native delete-by-filter, but it returns no count. - # Count first via the same filter, then delete — same trade-off as - # Qdrant: an extra round-trip for an accurate return value. + # Weaviate's `delete_many(where=...)` is server-capped (default ~10k per + # call) and doesn't report a count we can rely on across versions. Loop + # fetch-then-delete-by-uuid batches until the page comes back empty — + # this is unbounded and gives an accurate count of what we actually + # removed. Empty scope = match everything, same contract as Qdrant / + # in-memory; we drive that with `fetch_objects(limit=...)` (no filter). flt = _scope_to_filter(scope) - if flt is None: - # Empty scope = match everything — match the Qdrant contract. - result = self._collection.query.fetch_objects(limit=10_000) + total = 0 + while True: + if flt is None: + result = self._collection.query.fetch_objects(limit=self._SCOPE_PAGE_SIZE) + else: + result = self._collection.query.fetch_objects(filters=flt, limit=self._SCOPE_PAGE_SIZE) objects = getattr(result, "objects", []) or [] - count = len(objects) - if count == 0: - return 0 - from weaviate.classes.query import Filter - - self._collection.data.delete_many(where=Filter.by_id().contains_any([str(o.uuid) for o in objects])) - return count - - listed = self.list_by_scope(scope, limit=10_000) - if not listed: - return 0 - self._collection.data.delete_many(where=flt) - return len(listed) + if not objects: + break + uuids = [str(o.uuid) for o in objects] + self._collection.data.delete_many(where=_id_in_filter(uuids)) + total += len(uuids) + return total diff --git a/tests/unit/memory/long_term/test_weaviate_backend.py b/tests/unit/memory/long_term/test_weaviate_backend.py index 396e3abf8..d2ec3f9f4 100644 --- a/tests/unit/memory/long_term/test_weaviate_backend.py +++ b/tests/unit/memory/long_term/test_weaviate_backend.py @@ -135,6 +135,11 @@ def _fake_scope_to_filter(scope: dict): return _PredicateFilter(lambda item: all(item["properties"].get(k) == v for k, v in scope.items())) +def _fake_id_in_filter(uuids): + uuid_set = set(uuids) + return _PredicateFilter(lambda item: item["uuid"] in uuid_set) + + # --- Fixtures --------------------------------------------------------------- @@ -147,6 +152,7 @@ def fake_weaviate_client(monkeypatch): import dynamiq.memory.long_term.backends.weaviate as weaviate_backend monkeypatch.setattr(weaviate_backend, "_scope_to_filter", _fake_scope_to_filter) + monkeypatch.setattr(weaviate_backend, "_id_in_filter", _fake_id_in_filter) return client @@ -288,6 +294,24 @@ def test_weaviate_delete_scope_empty_returns_zero(backend): assert backend.delete_scope({"user_id": "nobody"}) == 0 +def test_weaviate_delete_scope_paginates_beyond_single_page_with_scope(backend, fake_embedder, monkeypatch): + """A scoped delete must remove every match and return the true count even + when the matched set exceeds Weaviate's per-call fetch cap.""" + monkeypatch.setattr(type(backend), "_SCOPE_PAGE_SIZE", 2) + for i in range(5): + backend.insert(_fact(f"f{i}", "u1", f"c{i}"), fake_embedder.embed(f"c{i}")) + assert backend.delete_scope({"user_id": "u1"}) == 5 + assert backend.list_by_scope({"user_id": "u1"}) == [] + + +def test_weaviate_delete_scope_empty_paginates_unbounded(backend, fake_embedder, monkeypatch): + """Empty scope must clear the entire collection — not just the first page.""" + monkeypatch.setattr(type(backend), "_SCOPE_PAGE_SIZE", 2) + for i in range(5): + backend.insert(_fact(f"f{i}", f"u{i % 2}", f"c{i}"), fake_embedder.embed(f"c{i}")) + assert backend.delete_scope({}) == 5 + + # --- search ---------------------------------------------------------------- From ed5a7ab28ced80b77072dcc8bf1e601e5c735a0b Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 14:38:16 +0300 Subject: [PATCH 52/53] fix: reject whitespace-only entries in RecallFactsTool queries at validation time --- dynamiq/nodes/tools/long_term_memory.py | 12 +++++++++++- tests/unit/memory/long_term/test_tools.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/dynamiq/nodes/tools/long_term_memory.py b/dynamiq/nodes/tools/long_term_memory.py index 32752f293..6b8cae14f 100644 --- a/dynamiq/nodes/tools/long_term_memory.py +++ b/dynamiq/nodes/tools/long_term_memory.py @@ -1,6 +1,6 @@ from typing import Any, ClassVar, Literal -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_validator from dynamiq.memory.long_term import LongTermMemoryBackend, MemoryToolKind, RememberOutcome from dynamiq.nodes.node import Node, ensure_config @@ -89,6 +89,16 @@ class RecallFactsInputSchema(BaseModel): limit: int = Field(default=5, ge=1, le=20, description="Max facts to return after merging across queries.") + @field_validator("queries", mode="after") + @classmethod + def _strip_and_require_nonblank(cls, queries: list[str]) -> list[str]: + """Reject whitespace-only entries here so the model sees a clean + validation error, instead of the backend raising at recall time.""" + cleaned = [q.strip() for q in queries] + if any(not q for q in cleaned): + raise ValueError("`queries` must not contain empty or whitespace-only strings") + return cleaned + class _LongTermMemoryTool(Node): """Shared base for the long-term memory tools.""" diff --git a/tests/unit/memory/long_term/test_tools.py b/tests/unit/memory/long_term/test_tools.py index 8ccad5f7e..073cb13dd 100644 --- a/tests/unit/memory/long_term/test_tools.py +++ b/tests/unit/memory/long_term/test_tools.py @@ -139,6 +139,26 @@ def test_recall_tool_rejects_empty_queries_list(): RecallFactsTool.input_schema(queries=[]) +def test_recall_tool_rejects_whitespace_only_query(): + """A blank or whitespace-only entry must be caught at validation time, not + when the backend raises mid-execute.""" + import pytest as _pytest + + with _pytest.raises(Exception): + RecallFactsTool.input_schema(queries=[" "]) + with _pytest.raises(Exception): + RecallFactsTool.input_schema(queries=["valid", ""]) + + +def test_recall_tool_strips_query_whitespace(backend, user_id): + """Surrounding whitespace must be stripped so leading/trailing spaces don't + affect the embedding (or cause spurious cache misses).""" + backend.remember(content="User likes pizza", user_id=user_id) + tool = RecallFactsTool(backend=backend, user_id=user_id) + result = tool.execute(tool.input_schema(queries=[" pizza "])) + assert result["content"], "stripped query should still match the stored fact" + + # --- factory --- From b197342232613b7af412db8ec1617ff29b535bdd Mon Sep 17 00:00:00 2001 From: Taras Yaroshko Date: Mon, 1 Jun 2026 14:41:16 +0300 Subject: [PATCH 53/53] chore: fix linting --- dynamiq/memory/long_term/backends/pinecone.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dynamiq/memory/long_term/backends/pinecone.py b/dynamiq/memory/long_term/backends/pinecone.py index 940589793..01ddc1a7e 100644 --- a/dynamiq/memory/long_term/backends/pinecone.py +++ b/dynamiq/memory/long_term/backends/pinecone.py @@ -184,7 +184,9 @@ def list_by_scope(self, scope: dict[str, str], limit: int = 100) -> list[Fact]: include_metadata=True, ) matches = result.get("matches") if isinstance(result, dict) else getattr(result, "matches", []) - return [_metadata_to_fact(match["metadata"] if isinstance(match, dict) else match.metadata) for match in matches] + return [ + _metadata_to_fact(match["metadata"] if isinstance(match, dict) else match.metadata) for match in matches + ] def delete_scope(self, scope: dict[str, str]) -> int: # Pinecone Serverless does NOT support delete-by-filter — only delete-by-id.