Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: syncindex can handle large queryset, allow for nonstandard pk models #10

Merged
merged 5 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ MEILISEARCH = {
'DEBUG': DEBUG, # Whether to throw exceptions on failed creation of documents
'SYNC': False, # Whether to execute operations to meilisearch in a synchronous manner (waiting for each rather than letting the task queue operate)
'OFFLINE': False, # Whether to make any http requests for the application.
'DEFAULT_BATCH_SIZE': 1000, # For syncindex the default batch size for import queryset
}
```

Expand Down Expand Up @@ -110,6 +111,13 @@ It attempts to mimic the django queryset API, but differs in 2 notable ways:
1. To do geo-filtering, you pass a positional argument
2. Not all queryset operations are implemented.

## Development

1. clone the repo
2. ./bin/setup.sh
3. ./bin/test.sh
4. Develop

## Contact
If there are any issues, please feel free to make an issue.
If you have suggested improvements, please make an issue where we can discuss.
3 changes: 3 additions & 0 deletions bin/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
python3.12 -m venv .venv
.venv/bin/pip install -r requirements.txt
2 changes: 2 additions & 0 deletions bin/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
.venv/bin/python manage.py test
15 changes: 13 additions & 2 deletions demo/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
For the full list of settings and their values, see
https://docs.djangoproject.com/en/4.2/ref/settings/
"""

import os
from pathlib import Path

# Build paths inside the project like this: BASE_DIR / 'subdir'.
Expand Down Expand Up @@ -124,4 +124,15 @@

DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

MEILISEARCH = {}
MEILISEARCH = {
'HTTPS': False, # Whether HTTPS is enabled for the meilisearch server
'HOST': 'localhost', # The host for the meilisearch server
'MASTER_KEY': os.getenv("MELISEARCH_MASTER_KEY"), # The master key for meilisearch. See https://www.meilisearch.com/docs/learn/security/basic_security for more detail
'PORT': 7700, # The port for the meilisearch server
'TIMEOUT': None, # The timeout to wait for when using sync meilisearch server
rosscdh-tpg marked this conversation as resolved.
Show resolved Hide resolved
'CLIENT_AGENTS': None, # The client agents for the meilisearch server
'DEBUG': DEBUG, # Whether to throw exceptions on failed creation of documents
'SYNC': False, # Whether to execute operations to meilisearch in a synchronous manner (waiting for each rather than letting the task queue operate)
'OFFLINE': False, # Whether to make any http requests for the application.
'DEFAULT_BATCH_SIZE': 1000, # For syncindex the default batch size for import queryset
}
3 changes: 3 additions & 0 deletions django_meili/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class DjangoMeiliSettings(TypedDict):
DEBUG: bool | None
SYNC: bool | None
OFFLINE: bool | None
DEFAULT_BATCH_SIZE: int = 1000


@dataclass(frozen=True, slots=True)
Expand All @@ -52,6 +53,7 @@ class _DjangoMeiliSettings:
debug: bool
sync: bool
offline: bool
batch_size: int

@classmethod
def from_settings(cls) -> "_DjangoMeiliSettings":
Expand All @@ -67,4 +69,5 @@ def from_settings(cls) -> "_DjangoMeiliSettings":
debug=settings.MEILISEARCH.get("DEBUG", settings.DEBUG),
sync=settings.MEILISEARCH.get("SYNC", False),
offline=settings.MEILISEARCH.get("OFFLINE", False),
batch_size=settings.MEILISEARCH.get("DEFAULT_BATCH_SIZE", 1000),
)
46 changes: 39 additions & 7 deletions django_meili/management/commands/syncindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,63 @@
"""

from typing import Type
from django.conf import settings
from django.core.management.base import BaseCommand
from django.apps import apps
from django_meili._client import client as _client
from django_meili.models import IndexMixin

DEFAULT_BATCH_SIZE = settings.MEILISEARCH.get("DEFAULT_BATCH_SIZE", 1000)


def batch_qs(qs, batch_size=DEFAULT_BATCH_SIZE):
"""
Returns a (start, end, total, queryset) tuple for each batch in the given
queryset.

Usage:
# Make sure to order your querset
article_qs = Article.objects.order_by('id')
for start, end, total, qs in batch_qs(article_qs):
print "Now processing %s - %s of %s" % (start + 1, end, total)
for article in qs:
print article.body
"""
total = qs.count()
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
yield qs[start:end]


class Command(BaseCommand):
help = "Syncs the MeiliSearch index for the given model."

def add_arguments(self, parser):
parser.add_argument("model", type=str, help="The model to sync the index for. This should be in the format <app_name>.<model_name>")
parser.add_argument(
"model",
type=str,
help="The model to sync the index for. This should be in the format <app_name>.<model_name>",
)
parser.add_argument(
"--batch_size",
action="store_true",
default=DEFAULT_BATCH_SIZE,
help="The batch size you want to import in (default: 1000)",
)

def handle(self, *args, **options):
Model = self._resolve_model(options["model"])
models = [self._serialize(m) for m in Model.objects.all() if m.meili_filter()]
if len(models) == 0:
self.stdout.write(self.style.WARNING(f"No documents to sync for {options['model']}"))
return
task = _client.get_index(Model.__name__).add_documents(models)

for qs in batch_qs(Model.objects.all(), options["batch_size"]):
task = _client.get_index(Model.__name__).add_documents(
[self._serialize(m) for m in qs if m.meili_filter()]
)
finished = _client.wait_for_task(task.task_uid)
if finished.status == "failed":
self.stderr.write(self.style.ERROR(finished.error))
exit(1)
self.stdout.write(self.style.SUCCESS(f"Synced index for {options['model']}"))


def _serialize(self, model) -> dict:
"""
Serialize the model instance into a dictionary.
Expand Down
9 changes: 9 additions & 0 deletions django_meili/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class IndexMixin(models.Model):
- supports_geo: Whether the model supports geolocation.
- index_name: The name of the index in Meilisearch.
- primary_key: The primary key for the model.
- include_pk_in_search: include the pk in the search results

This mixin also defines a few methods that can be overridden:
- meili_filter: A function to decide if the model should be added to meilisearch.
Expand Down Expand Up @@ -85,6 +86,7 @@ class MeiliMeta:
supports_geo: bool = False
index_name: str = None
primary_key: str = "pk"
include_pk_in_search: bool = False

def __init_subclass__(cls) -> None:
index_name = getattr(cls.MeiliMeta, "index_name", cls.__name__)
Expand All @@ -94,6 +96,7 @@ def __init_subclass__(cls) -> None:
filterable_fields = getattr(cls.MeiliMeta, "filterable_fields", None)
sortable_fields = getattr(cls.MeiliMeta, "sortable_fields", None)
supports_geo = getattr(cls.MeiliMeta, "supports_geo", False)
include_pk_in_search = getattr(cls.MeiliMeta, "include_pk_in_search", False)

if supports_geo:
filterable_fields = ("_geo",) + (filterable_fields or ())
Expand All @@ -108,6 +111,7 @@ def __init_subclass__(cls) -> None:
filterable_fields=filterable_fields,
sortable_fields=sortable_fields,
supports_geo=supports_geo,
include_pk_in_search=include_pk_in_search,
tasks=[],
)
else:
Expand All @@ -127,6 +131,7 @@ def __init_subclass__(cls) -> None:
filterable_fields=filterable_fields,
sortable_fields=sortable_fields,
supports_geo=supports_geo,
include_pk_in_search=include_pk_in_search,
tasks=[task for task in _client.tasks],
)
_client.flush_tasks()
Expand Down Expand Up @@ -165,6 +170,10 @@ def meili_serialize(self):
)
)[0]

if getattr(self.MeiliMeta, "include_pk_in_search", False):
rosscdh-tpg marked this conversation as resolved.
Show resolved Hide resolved
serialized_model["fields"][self.MeiliMeta.primary_key] = getattr(
self, self.MeiliMeta.primary_key
)
return serialized_model["fields"]

def meili_geo(self) -> MeiliGeo:
Expand Down
6 changes: 4 additions & 2 deletions django_meili/querysets.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,5 +259,7 @@ def search(self, q: str = ""):
"attributesToSearchOn": self.__attributes_to_search_on,
},
)
hits = results["hits"]
return self.model.objects.filter(pk__in=[hit["id"] for hit in hits])
id_field = getattr(self.model.MeiliMeta, "primary_key", "id")
return self.model.objects.filter(
pk__in=[hit[id_field] for hit in results.get("hits", [])]
)
51 changes: 50 additions & 1 deletion django_meili/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from django_meili.models import IndexMixin, MeiliGeo
from django_meili.querysets import Radius
from posts.models import Post, PostNoGeo
from posts.models import Post, PostNoGeo, NonStandardIdPost

# Create your tests here.

Expand Down Expand Up @@ -174,3 +174,52 @@ def test_django_meili_does_not_sync_when_offline(self):

self.assertEqual(PostNoGeo.meilisearch.count(), post_no_geo_original_count)
self.assertEqual(Post.meilisearch.count(), post_updated_count)



@override_settings(MEILISEARCH={"SYNC": True}, DEBUG=True)
class DjangoMeiliNonStandardIdTestCase(TestCase):
target_model = NonStandardIdPost
rosscdh-tpg marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def setUpTestData(cls) -> None:
cls.post = cls.target_model.objects.create(
title="Hello World",
body="This is a test post",
)

return super().setUpTestData()

@classmethod
def tearDownClass(cls) -> None:
from django_meili._client import client

client.client.delete_index(cls.target_model._meilisearch["index_name"])
return super().tearDownClass()

def test_post_created(self):
self.assertEqual(self.post.title, "Hello World")
self.assertEqual(self.post.body, "This is a test post")

def test_post_was_indexed(self):
self.assertNotEqual(self.target_model.meilisearch.count(), 0)

def test_post_search_returns_post(self):
self.assertEqual(
self.target_model.meilisearch.search("Hello World").first().title, "Hello World"
)

def test_crazy_id_present_in_serializer(self):
# {'title': 'Hello World', 'body': 'This is a test post', 'crazy_id': 'WqzyCvZF'}
self.assertEqual(
list(self.target_model.meilisearch.search("Hello World").first().meili_serialize().keys()),
['title', 'body', 'crazy_id']
)
def test_bad_search_returns_nothing(self):
self.assertEqual(self.target_model.meilisearch.search("al;kdfja;lsdkfj").count(), 0)

def test_post_search_can_be_filtered(self):
self.assertEqual(
self.target_model.meilisearch.filter(title="Hello World").search().first().title,
"Hello World",
)
33 changes: 33 additions & 0 deletions posts/migrations/0004_nonstandardidpost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 4.2.6 on 2024-10-24 02:20

from django.db import migrations, models
import posts.models


class Migration(migrations.Migration):
dependencies = [
("posts", "0003_postnogeo"),
]

operations = [
migrations.CreateModel(
name="NonStandardIdPost",
fields=[
(
"crazy_id",
models.CharField(
default=posts.models.rand_id,
max_length=128,
primary_key=True,
serialize=False,
),
),
("title", models.CharField(max_length=255)),
("body", models.TextField()),
],
options={
"verbose_name": "NonStandard Id Post",
"verbose_name_plural": "NonStandard IdPosts",
},
),
]
27 changes: 27 additions & 0 deletions posts/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import string
import random
from django.db import models

from django_meili.models import IndexMixin, MeiliGeo
Expand Down Expand Up @@ -55,3 +57,28 @@ class MeiliMeta:

def __str__(self):
return self.title


def rand_id():
return ''.join(random.choices(string.ascii_letters, k=8))

class NonStandardIdPost(IndexMixin, models.Model):
crazy_id = models.CharField(max_length=128, default=rand_id, primary_key=True)
title = models.CharField(max_length=255)
body = models.TextField()

class Meta:
"""Meta definition for Post."""

verbose_name = "NonStandard Id Post"
verbose_name_plural = "NonStandard IdPosts"

class MeiliMeta:
primary_key = "crazy_id" # test focus
include_pk_in_search = True # test focus
filterable_fields = ("title",)
searchable_fields = ("crazy_id", "title", "body")
displayed_fields = ("crazy_id", "title", "body")

def __str__(self):
return self.title