Skip to content

Commit

Permalink
Add and document ChangeTrackerType protocol. lemon24#323
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed Feb 28, 2024
1 parent 055e69b commit 313e5ca
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 15 deletions.
5 changes: 4 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@ Unreleased
* Split the :ref:`full-text search <fts>` index into a separate,
attached database.
(:issue:`323`)
* Require at least SQLite 3.18 (released 2017-03-30).
* Require at least SQLite 3.18.
Previously, *reader* core required 3.15,
and only :meth:`~Reader.update_search` required 3.18.
(:issue:`323`)
* Enable `write-ahead logging`_ only once, when the database is created,
instead of every time it is opened.
(:issue:`323`)
* Vacuum the main database after migrations. (:issue:`323`)
* Add an internal :ref:`change tracking API <changes>`
to formalize how search keeps in sync with storage.
(:issue:`323`)


Version 3.11
Expand Down
29 changes: 26 additions & 3 deletions docs/internal.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,11 @@ but at least one other implementation needs to exists before that.

Currently, search is tightly-bound to a storage implementation
(see :meth:`~.BoundSearchStorageType.make_search`).
In the future, it may be possible to mix and match
main storage and search implementations,
but :class:`StorageType` will likely need
While the :ref:`change tracking API <changes>` allows
search implementations to keep in sync with text content changes,
there is no convenient way for :meth:`SearchType.search_entries`
to filter/sort results without storage cooperation;
:class:`StorageType` will need
additional capabilities to support this.


Expand All @@ -167,6 +169,27 @@ but at least one other implementation needs to exists before that.
:members:


.. _changes:

.. autoclass:: ChangeTrackingStorageType()
:members:
:show-inheritance:

.. autoclass:: ChangeTrackerType()
:members:

.. autoclass:: Change
:members:

.. autoclass:: Action
:members:

.. autoattribute:: reader.Entry._sequence

.. autoexception:: reader.exceptions.ChangeTrackingNotEnabledError
:show-inheritance:


Data objects
~~~~~~~~~~~~

Expand Down
4 changes: 2 additions & 2 deletions src/reader/_storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Any

from .._types import ChangeTrackerType
from .._types import SearchType
from ._base import StorageBase
from ._changes import Changes
Expand Down Expand Up @@ -30,9 +31,8 @@ class Storage(FeedsMixin, EntriesMixin, TagsMixin, StorageBase):

def __init__(self, *args: Any, **kwargs: Any):
# FIXME: types
# FIXME: protocol
super().__init__(*args, **kwargs)
self.changes = Changes(self)
self.changes: ChangeTrackerType = Changes(self)

def make_search(self) -> SearchType:
from ._search import Search
Expand Down
2 changes: 0 additions & 2 deletions src/reader/_storage/_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@


class Changes:
# FIXME: protocol

def __init__(self, storage: StorageBase):
self.storage = storage

Expand Down
153 changes: 148 additions & 5 deletions src/reader/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,17 +1264,160 @@ def search_entry_counts(
"""


class Action(Enum):
# FIXME: docstring
INSERT = 1
DELETE = 2
@runtime_checkable
class ChangeTrackingStorageType(StorageType, Protocol):

"""A storage that can track changes to the text content of resources."""

@property
def changes(self) -> ChangeTrackerType:
"""The change tracker associated with this storage."""


class ChangeTrackerType(Protocol): # pragma: no cover
"""Storage API used to keep the full-text search index in sync.
The sync model works as follows.
Each resource to be indexed has sequence that changes
every time its text content changes.
The sequence can be a global counter, a random number,
or a high-precision timestamp;
the only requirement is that it won't be used again
(or it's extremely unlikely that will happen).
Each sequence change gets recorded.
Updates are recorded as pairs of
:attr:`~Action.DELETE` + :attr:`~Action.INSERT` changes
with the old / new sequences, respectively.
:meth:`SearchType.update` gets changes and processes them.
For :attr:`~Action.INSERT`,
the resource is indexed only if the change sequence
matches the current main storage sequence;
otherwise, the change is ignored.
For :attr:`~Action.DELETE`,
the resource is deleted only if the change sequence
matches the search index sequence.
(This means that, during updates,
multiple versions of a resource may appear in the index,
with different sequences.)
Processed changes are marked as done,
regardless of the action taken. Pseudocode::
def update(self):
while True:
changes = self.storage.changes.get()
if not changes:
break
self._process_changes(changes)
self.storage.changes.done(changes)
Enabling change tracking sets the sequence of all resources
and adds matching :attr:`~Action.INSERT` changes
to allow backfilling the search index.
The sequence may be :const:`None` when change tracking is disabled.
There is no guarantee the sequence of a resource is the same
if change tracking is disabled and then enabled again.
The entry sequence is exposed as :attr:`.Entry._sequence`,
and should change when
the entry :attr:`~.Entry.title`, :attr:`~.Entry.summary`,
or :attr:`~.Entry.content` change,
or when its feed's :attr:`~.Feed.title` or :attr:`~.Feed.user_title` change.
As of version |version|, only entry changes are tracked,
but the API supports tracking feeds and tags in the future;
search implementations should ignore
changes to resources they do not support
(but still mark them as done!).
Any method can raise :exc:`.StorageError`.
"""

def enable(self) -> None:
"""Enable change tracking.
A no-op and reasonably fast if change tracking is already enabled.
"""

def disable(self) -> None:
"""Disable change tracking.
A no-op if change tracking is already disabled.
"""

def get(
self, action: Action | None = None, limit: int | None = None
) -> list[Change]:
"""Return the next batch of changes, if any.
Args:
action: Only return changes of this type.
limit: Return at most this many changes;
may return fewer, depending on storage internal limits.
If none, reasonable limit should be used (hundreds).
Returns:
A batch of changes.
Raises:
ChangeTrackingNotEnabledError
"""

def done(self, changes: list[Change]) -> None:
"""Mark changes as done. Ignore unknown changes.
Args:
changes:
Raises:
ChangeTrackingNotEnabledError
ValueError: If more changes than :meth:`get` returns are passed;
``done(get())`` should always work.
"""


@dataclass(frozen=True)
class Change:
# FIXME: docstring
"""A change to be applied to the search index.
The change can be of an entry, a feed, or a resource tag,
with identifiers set accordingly:
=========== =========== =========== ===========
subject feed_url entry_id tag_key
=========== =========== =========== ===========
feed set
entry set set
global tag set
feed tag set set
entry tag set set set
=========== =========== =========== ===========
"""

#: Action to take.
action: Action
#: Resource/tag sequence.
sequence: bytes
#: Feed URL.
feed_url: str | None = None
#: Entry id.
entry_id: str | None = None
#: Tag key.
tag_key: str | None = None


class Action(Enum):
"""Action to take."""

#: The resource needs to be added to the search index.
INSERT = 1
#: The resource needs to be deleted from the search index.
DELETE = 2
3 changes: 2 additions & 1 deletion src/reader/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,8 @@ class ChangeTrackingNotEnabledError(StorageError):
"""A change tracking method was called when change tracking was not enabled.
.. admonition:: Unstable
This exception is part of the unstable :doc:`internal API <internal>`.
This exception is part of the unstable :ref:`change tracking API <changes>`.
"""

Expand Down
9 changes: 8 additions & 1 deletion src/reader/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,14 @@ def feed_url(self) -> str:
#: .. versionadded:: 1.8
original_feed_url: str = cast(str, None)

# FIXME: docstring
#: Change sequence.
#:
#: May be :const:`None` when change tracking is disabled.
#:
#: .. admonition:: Unstable
#:
#: This field is part of the unstable :ref:`change tracking API <changes>`.
#:
_sequence: bytes | None = None

# feed should not have a default, but I'd prefer objects that aren't
Expand Down

0 comments on commit 313e5ca

Please sign in to comment.