Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,4 @@ Thumbs.db
# Common editor files
*~
*.swp
.aider*
23 changes: 23 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# OpenAI Codex

## Testing requirements

Install all required dependencies for tests, linting and docs building by using
the `all` optional dependency group defined in `pyproject.toml`. With
python-pip, you would need to run `pip install -e '.[all]'`.

Make sure the `bin` directory where executables are installed by pip is added to
the `PATH` environment variable.

There are many dependencies, so it could take a while to install them. Please
wait, do not interrupt the install process.

## Testing

run `nox -s docs --non-interactive` to build docs. `nox -s lint` for linting.
`nox -s tests` for tests.

## Submitting a Pull Request

Make sure to include a concise description of the changes and link the relevant
pull requests or issues.
13 changes: 13 additions & 0 deletions src/dbetto/attrsdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,16 @@ def __or__(self, other: dict | AttrsDict) -> AttrsDict:
def reset(self) -> None:
"""Reset this instance by removing all cached data."""
super().__setattr__("__cached_remaps__", {})

# Make pickling safe by serializing only the internal cached state as attributes.
def __getstate__(self) -> dict:
"""Return the instance-specific state for pickling."""
try:
cached = super().__getattribute__("__cached_remaps__")
except AttributeError:
cached = {}
return {"__cached_remaps__": cached}

def __setstate__(self, state: dict) -> None:
"""Restore the instance-specific state during unpickling."""
super().__setattr__("__cached_remaps__", state.get("__cached_remaps__", {}))
27 changes: 27 additions & 0 deletions src/dbetto/textdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,33 @@ def __or__(self, other: TextDB) -> AttrsDict:

return self.__store__ | other

# Ensure pickling safety by explicitly controlling serialized state.
def __getstate__(self) -> dict:
"""Return the internal state for pickling."""
return {
"__path__": self.__path__,
"__lazy__": self.__lazy__,
"__hidden__": self.__hidden__,
"__ftypes__": self.__ftypes__,
"__store__": self.__store__,
}

def __setstate__(self, state: dict) -> None:
"""Restore internal state during unpickling."""
self.__path__ = (
Path(state["__path__"])
if not isinstance(state["__path__"], Path)
else state["__path__"]
)
self.__lazy__ = state["__lazy__"]
self.__hidden__ = state["__hidden__"]
self.__ftypes__ = (
set(state["__ftypes__"])
if not isinstance(state["__ftypes__"], set)
else state["__ftypes__"]
)
self.__store__ = state["__store__"]

def __contains__(self, value: str) -> bool:
return self.__store__.__contains__(value)

Expand Down
69 changes: 69 additions & 0 deletions tests/test_pickle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

import pickle
from pathlib import Path

from dbetto import AttrsDict, TextDB


def test_attrsdict_pickle_roundtrip():
d = AttrsDict(
{
"a": {"id": 1, "group": {"id": 3}, "data": "x"},
"b": {"id": 2, "group": {"id": 4}, "data": "y"},
}
)

# Warm up cache to ensure it serializes/restores safely
m1 = d.map("id")
assert m1[1].data == "x"
assert m1[2].data == "y"

blob = pickle.dumps(d)
d2 = pickle.loads(blob)

# Basic structure and attribute access preserved
assert isinstance(d2, AttrsDict)
assert d2.a.data == "x"
assert d2.b.group.id == 4

# Mapping still works after unpickling
m2 = d2.map("id")
assert set(m2.keys()) == {1, 2}


def test_textdb_pickle_roundtrip(tmp_path: Path):
# Create a small database
(tmp_path / "file1.json").write_text('{"id": 1, "name": "alpha"}', encoding="utf-8")
(tmp_path / "file2.yaml").write_text("id: 2\nname: beta\n", encoding="utf-8")
sub = tmp_path / "dir1"
sub.mkdir()
(sub / "file3.yaml").write_text("value: 42\n", encoding="utf-8")

db = TextDB(tmp_path, lazy=False)

# Ensure access works before pickling
f1 = db["file1"]
assert isinstance(f1, AttrsDict)
assert f1.id == 1
assert f1.name == "alpha"

blob = pickle.dumps(db)
db2 = pickle.loads(blob)

# Path type restored, access still works
assert isinstance(db2, TextDB)
assert isinstance(db2.__path__, Path)

f1_b = db2["file1"]
assert f1_b.id == 1
assert f1_b.name == "alpha"

f2 = db2.file2
assert f2.id == 2
assert f2.name == "beta"

d1 = db2["dir1"]
assert isinstance(d1, TextDB)
f3 = d1["file3"]
assert f3.value == 42