Skip to content

Commit f8f5937

Browse files
committed
Add README, verify examples from README works, bump version
1 parent fa04b84 commit f8f5937

File tree

10 files changed

+468
-1
lines changed

10 files changed

+468
-1
lines changed

README.md

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
# django-memoized-prefetch
2+
3+
A Django package that provides efficient memoized prefetching for processing data in chunks, reducing database queries through intelligent caching.
4+
In some cases it can be useful even when not processing data in chunks, for example, when there are multiple foreign keys to the same table.
5+
6+
## Overview
7+
8+
`django-memoized-prefetch` optimizes Django ORM queries when processing large datasets by:
9+
- **Reusing previously fetched objects** across chunks
10+
- **Memoizing prefetched objects** using LRU (Least Recently Used) cache
11+
- **Supporting both foreign key and many-to-many relationships**
12+
- **Minimizing database queries** across chunk processing operations
13+
14+
## Installation
15+
16+
```bash
17+
pip install django-memoized-prefetch
18+
```
19+
20+
## Requirements
21+
22+
- Python 3.9+
23+
- Django 4.2+
24+
- lru-dict 1.3.0+
25+
26+
## Usage Examples
27+
28+
<details>
29+
<summary>Models used in examples, click to expand</summary>
30+
31+
```python
32+
from django.db import models
33+
34+
class Author(models.Model):
35+
name = models.CharField(max_length=255)
36+
email = models.EmailField()
37+
38+
class Publisher(models.Model):
39+
name = models.CharField(max_length=255)
40+
country = models.CharField(max_length=100)
41+
42+
class Category(models.Model):
43+
name = models.CharField(max_length=100)
44+
45+
class Book(models.Model):
46+
title = models.CharField(max_length=255)
47+
isbn = models.CharField(max_length=13)
48+
author = models.ForeignKey(Author, on_delete=models.CASCADE, related_name="books")
49+
translator = models.ForeignKey(Author, on_delete=models.CASCADE, related_name="translations", null=True)
50+
publisher = models.ForeignKey(Publisher, on_delete=models.CASCADE, related_name="books")
51+
categories = models.ManyToManyField(Category, related_name="books")
52+
53+
class Review(models.Model):
54+
book = models.ForeignKey(Book, on_delete=models.CASCADE, related_name="reviews")
55+
rating = models.IntegerField()
56+
comment = models.TextField()
57+
```
58+
59+
</details>
60+
61+
### Basic Usage
62+
63+
Imagine you want to process all books, but there are too many of them to load them all into memory at once.
64+
You therefore need to process them in chunks.
65+
66+
If you use just native django, it will look something like this:
67+
68+
```python
69+
from chunkator import chunkator_page
70+
71+
for chunk in chunkator_page(Book.objects.all().prefetch_related("author", "translator", "publisher"), 10_000):
72+
for book in chunk:
73+
print(book.author.name, book.translator.name if book.translator is not None else None)
74+
print(book.publisher.name)
75+
```
76+
77+
This will work, with two caveats:
78+
1. On each chunk, Django will make separate queries to fetch the author and translator
79+
2. The author, translator and publisher objects will be fetched from the database for each chunk
80+
81+
This is the primary usecase for this package. When used like this:
82+
83+
```python
84+
from django_memoized_prefetch import MemoizedPrefetch, MemoizedPrefetchConfig
85+
from chunkator import chunkator_page
86+
87+
memoized_prefetch = MemoizedPrefetch(
88+
MemoizedPrefetchConfig(Author, ["author", "translator"]),
89+
MemoizedPrefetchConfig(Publisher, ["publisher"], prefetch_all=True),
90+
)
91+
92+
for chunk in chunkator_page(Book.objects.all(), 10_000):
93+
memoized_prefetch.process_chunk(chunk)
94+
95+
for book in chunk:
96+
print(book.author.name, book.translator.name if book.translator is not None else None)
97+
print(book.publisher.name)
98+
```
99+
100+
The processing will be more efficient, because:
101+
1. All publishers will get fetched before processing any chunks, and they will be reused across all chunks
102+
2. The author and translator objects will be fetched using one query
103+
3. Any authors and translators that appeared in previous chunks will not be fetched again
104+
105+
#### Nested attributes
106+
107+
You can also prefetch nested attributes using both dotted notation and undersore notation, for example, in this example both would work.
108+
109+
```python
110+
memoized_prefetch = MemoizedPrefetch(
111+
MemoizedPrefetchConfig(Publisher, ["book.publisher"]),
112+
MemoizedPrefetchConfig(Author, ["book__author"]),
113+
)
114+
115+
for chunk in chunkator_page(Review.objects.all(), 10000):
116+
memoized_prefetch.process_chunk(chunk)
117+
...
118+
```
119+
120+
### Many-to-Many Relationships
121+
122+
Many-to-many relationships are supported as well, caching the target model, while fetching the through model for each chunk.
123+
124+
```python
125+
from django_memoized_prefetch import MemoizedPrefetch, MemoizedPrefetchConfig
126+
from chunkator import chunkator_page
127+
128+
# Configure for many-to-many relationships
129+
memoized_prefetch = MemoizedPrefetch(
130+
MemoizedPrefetchConfig(
131+
model=Category,
132+
attributes=["categories"],
133+
is_many_to_many=True,
134+
through_model=Book.categories.through,
135+
source_field="book_id",
136+
target_field="category_id",
137+
)
138+
)
139+
140+
# Process books with their categories
141+
for chunk in chunkator_page(Book.objects.all(), 10000):
142+
memoized_prefetch.process_chunk(chunk)
143+
144+
for book in chunk:
145+
# Categories are prefetched and available
146+
category_names = [cat.name for cat in book.categories.all()]
147+
print(f"Book: {book.title}, Categories: {', '.join(category_names)}")
148+
```
149+
150+
### Usage outside chunked processing
151+
152+
If you have multiple foreign keys to the same table, this package can be used to optimise the database queries even when not processing data in chunks.
153+
154+
## Configuration Options
155+
156+
### MemoizedPrefetchConfig Parameters
157+
158+
- **`model`** (required): The Django model class to prefetch
159+
- **`attributes`** (required): List of attribute names to prefetch on your objects
160+
- **`queryset`** (optional): Custom queryset for the model (for additional select_related/prefetch_related)
161+
- **`prefetch_all`** (optional, default: False): Whether to prefetch all objects at initialisation
162+
- **`lru_cache_size`** (optional, default: 10,000): Maximum number of objects to keep in cache
163+
- **`is_many_to_many`** (optional, default: False): Set to True for many-to-many relationships
164+
- **`through_model`** (optional): Through model for many-to-many relationships
165+
- **`source_field`** (optional): Source field name in the through model
166+
- **`target_field`** (optional): Target field name in the through model
167+
168+
### Advanced Configuration
169+
170+
```python
171+
from django.db import models
172+
173+
# Custom queryset with select_related
174+
config = MemoizedPrefetchConfig(
175+
model=Author,
176+
attributes=["author"],
177+
queryset=Author.objects.select_related(...),
178+
lru_cache_size=5000,
179+
)
180+
181+
# Prefetch all objects at startup (useful for small, frequently accessed tables)
182+
config = MemoizedPrefetchConfig(
183+
model=Publisher,
184+
attributes=["publisher"],
185+
prefetch_all=True,
186+
)
187+
```
188+
189+
## Integrations with other packages.
190+
191+
The package automatically supports `django-seal` when available, all querysets which are sealable will be automatically sealed.
192+
193+
This package works when using `django-tenants`.
194+
195+
## Best Practices
196+
197+
1. **Use appropriate cache sizes**: Set `lru_cache_size` based on your expected data volume and available memory
198+
2. **Prefetch related objects**: Use custom querysets with `select_related` or `prefetch_related` for nested relationships
199+
3. **Consider prefetch_all**: Use `prefetch_all=True` for small, frequently accessed reference tables
200+
4. **Process in reasonable chunks**: Balance memory usage with query efficiency when choosing chunk sizes
201+
5. **Monitor cache hit rates**: Ensure your cache size is appropriate for your data access patterns
202+
203+
## Testing
204+
205+
Run the test suite:
206+
207+
```bash
208+
uv run pytest
209+
```
210+
211+
## License
212+
213+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
214+
215+
## Contributing
216+
217+
Contributions are welcome! Please feel free to submit a Pull Request.
218+
219+
## Authors
220+
221+
- Mikuláš Poul ([email protected])
222+
- Cameron Hobbs ([email protected])

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "django-memoized-prefetch"
3-
version = "0.1.0"
3+
version = "0.1.1"
44
description = "A memoized prefetch for Django."
55
authors = [
66
{name = "Mikuláš Poul", email = "[email protected]"},
@@ -154,6 +154,7 @@ parametrize-names-type = "list"
154154
[dependency-groups]
155155
dev = [
156156
"dirty-equals>=0.9.0",
157+
"django-chunkator>=2.0.0",
157158
"django-seal>=1.7.1",
158159
"factory-boy>=3.3.3",
159160
"pytest-cov>=6.2.1",

tests/test_project/bookshop/__init__.py

Whitespace-only changes.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import random
2+
3+
import factory
4+
5+
from tests.test_project.bookshop.models import (
6+
Author,
7+
Book,
8+
Category,
9+
Publisher,
10+
Review,
11+
)
12+
13+
14+
class AuthorFactory(factory.django.DjangoModelFactory):
15+
name = factory.Faker("name")
16+
email = factory.Faker("email")
17+
18+
class Meta:
19+
model = Author
20+
21+
22+
class PublisherFactory(factory.django.DjangoModelFactory):
23+
name = factory.Faker("company")
24+
country = factory.Faker("country")
25+
26+
class Meta:
27+
model = Publisher
28+
29+
30+
class CategoryFactory(factory.django.DjangoModelFactory):
31+
name = factory.Faker("word")
32+
33+
class Meta:
34+
model = Category
35+
36+
37+
class BookFactory(factory.django.DjangoModelFactory):
38+
title = factory.Faker("sentence", nb_words=4)
39+
isbn = factory.Faker("isbn13")
40+
author = factory.SubFactory(AuthorFactory)
41+
translator = factory.SubFactory(AuthorFactory)
42+
publisher = factory.SubFactory(PublisherFactory)
43+
44+
@factory.post_generation
45+
def categories(self, create: bool, extracted: list[Category]) -> None:
46+
if not create:
47+
return
48+
49+
if extracted:
50+
self.categories.set(extracted)
51+
else:
52+
# Create 1-3 random categories if none provided
53+
categories = CategoryFactory.create_batch(random.randint(1, 3))
54+
self.categories.set(categories)
55+
56+
class Meta:
57+
model = Book
58+
skip_postgeneration_save = True
59+
60+
61+
class ReviewFactory(factory.django.DjangoModelFactory):
62+
book = factory.SubFactory(BookFactory)
63+
rating = factory.Faker("random_int", min=1, max=5)
64+
comment = factory.Faker("text", max_nb_chars=500)
65+
66+
class Meta:
67+
model = Review
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Generated by Django 5.2.5 on 2025-08-26 14:29
2+
3+
import django.db.models.deletion
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
initial = True
9+
10+
dependencies = []
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name="Author",
15+
fields=[
16+
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
17+
("name", models.CharField(max_length=255)),
18+
("email", models.EmailField(max_length=254)),
19+
],
20+
),
21+
migrations.CreateModel(
22+
name="Category",
23+
fields=[
24+
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
25+
("name", models.CharField(max_length=100)),
26+
],
27+
),
28+
migrations.CreateModel(
29+
name="Publisher",
30+
fields=[
31+
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
32+
("name", models.CharField(max_length=255)),
33+
("country", models.CharField(max_length=100)),
34+
],
35+
),
36+
migrations.CreateModel(
37+
name="Book",
38+
fields=[
39+
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
40+
("title", models.CharField(max_length=255)),
41+
("isbn", models.CharField(max_length=13)),
42+
(
43+
"author",
44+
models.ForeignKey(
45+
on_delete=django.db.models.deletion.CASCADE, related_name="books", to="bookshop.author"
46+
),
47+
),
48+
(
49+
"translator",
50+
models.ForeignKey(
51+
null=True,
52+
on_delete=django.db.models.deletion.CASCADE,
53+
related_name="translations",
54+
to="bookshop.author",
55+
),
56+
),
57+
("categories", models.ManyToManyField(related_name="books", to="bookshop.category")),
58+
(
59+
"publisher",
60+
models.ForeignKey(
61+
on_delete=django.db.models.deletion.CASCADE, related_name="books", to="bookshop.publisher"
62+
),
63+
),
64+
],
65+
),
66+
migrations.CreateModel(
67+
name="Review",
68+
fields=[
69+
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
70+
("rating", models.IntegerField()),
71+
("comment", models.TextField()),
72+
(
73+
"book",
74+
models.ForeignKey(
75+
on_delete=django.db.models.deletion.CASCADE, related_name="reviews", to="bookshop.book"
76+
),
77+
),
78+
],
79+
),
80+
]

tests/test_project/bookshop/migrations/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)