From 844e394bca34e7d67c88588032955d58fd3c7383 Mon Sep 17 00:00:00 2001 From: palchrb Date: Mon, 3 Nov 2025 13:33:06 +0100 Subject: [PATCH 1/9] Enhance AvatarManager with etag and fetched_at tracking Include etag tracking for avatars, and 1 hr TTL to recheck etags. If etags does not match, new avatar is downloaded and used --- github/avatar_manager.py | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/github/avatar_manager.py b/github/avatar_manager.py index 5ef699e..241507a 100644 --- a/github/avatar_manager.py +++ b/github/avatar_manager.py @@ -1,8 +1,6 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional import asyncio - -from sqlalchemy import Column, MetaData, Table, Text -from sqlalchemy.engine.base import Engine +import time from mautrix.types import ContentURI @@ -15,6 +13,8 @@ class AvatarManager: bot: "GitHubBot" _avatars: dict[str, ContentURI] + _etag: dict[str, Optional[str]] + _fetched_at: dict[str, int] _db: DBManager _lock: asyncio.Lock @@ -23,26 +23,49 @@ def __init__(self, bot: "GitHubBot") -> None: self._db = bot.db self._lock = asyncio.Lock() self._avatars = {} + self._etag = {} + self._fetched_at = {} async def load_db(self) -> None: - self._avatars = { - avatar.url: ContentURI(avatar.mxc) for avatar in await self._db.get_avatars() + rows = await self._db.get_avatars() + self._avatars = {row.url: ContentURI(row.mxc) for row in rows} + self._etag = {row.url: getattr(row, "etag", None) for row in rows} + self._fetched_at = { + row.url: int(getattr(row, "fetched_at", 0) or 0) for row in rows } async def get_mxc(self, url: str) -> ContentURI: - try: + now = int(time.time()) + if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 3600: return self._avatars[url] - except KeyError: - pass - async with self.bot.http.get(url) as resp: + + headers = {} + etag = self._etag.get(url) + if etag: + headers["If-None-Match"] = etag + + async with self.bot.http.get(url, headers=headers) as resp: + if resp.status == 304 and url in self._avatars: + self._fetched_at[url] = now + await self._db.put_avatar( + url, + self._avatars[url], + etag=self._etag.get(url), + fetched_at=now, + ) + return self._avatars[url] + resp.raise_for_status() data = await resp.read() + new_etag = resp.headers.get("ETag") + async with self._lock: - try: + if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 3600: return self._avatars[url] - except KeyError: - pass + mxc = await self.bot.client.upload_media(data) self._avatars[url] = mxc - await self._db.put_avatar(url, mxc) - return mxc + self._etag[url] = new_etag + self._fetched_at[url] = now + await self._db.put_avatar(url, mxc, etag=new_etag, fetched_at=now) + return mxc From b3385095f446f41f8a421e2101bd66c029077622 Mon Sep 17 00:00:00 2001 From: palchrb Date: Mon, 3 Nov 2025 13:33:38 +0100 Subject: [PATCH 2/9] Update migration functions and add new schema changes --- github/migrations.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/github/migrations.py b/github/migrations.py index 50ad2c6..055ccc6 100644 --- a/github/migrations.py +++ b/github/migrations.py @@ -13,12 +13,13 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . + from mautrix.util.async_db import Connection, Scheme, UpgradeTable upgrade_table = UpgradeTable() -@upgrade_table.register(description="Latest revision", upgrades_to=1) +@upgrade_table.register(description="Initial schema", upgrades_to=1) async def upgrade_latest(conn: Connection, scheme: Scheme) -> None: needs_migration = False if await conn.table_exists("webhook"): @@ -26,8 +27,9 @@ async def upgrade_latest(conn: Connection, scheme: Scheme) -> None: await conn.execute("ALTER TABLE webhook RENAME TO webhook_old;") await conn.execute("ALTER TABLE client RENAME TO client_old;") await conn.execute("ALTER TABLE matrix_message RENAME TO matrix_message_old;") + await conn.execute( - f"""CREATE TABLE client ( + """CREATE TABLE client ( user_id TEXT NOT NULL, token TEXT NOT NULL, PRIMARY KEY (user_id) @@ -60,6 +62,7 @@ async def upgrade_latest(conn: Connection, scheme: Scheme) -> None: PRIMARY KEY (url) )""" ) + if needs_migration: await migrate_legacy_to_v1(conn) @@ -67,6 +70,13 @@ async def upgrade_latest(conn: Connection, scheme: Scheme) -> None: async def migrate_legacy_to_v1(conn: Connection) -> None: await conn.execute("INSERT INTO client (user_id, token) SELECT user_id, token FROM client_old") await conn.execute( - "INSERT INTO matrix_message (message_id, room_id, event_id) SELECT message_id, room_id, event_id FROM matrix_message_old" + "INSERT INTO matrix_message (message_id, room_id, event_id) " + "SELECT message_id, room_id, event_id FROM matrix_message_old" ) await conn.execute("CREATE TABLE needs_post_migration(noop INTEGER PRIMARY KEY)") + + +@upgrade_table.register(description="Add etag and fetched_at to avatar table", upgrades_to=2) +async def upgrade_v2(conn: Connection) -> None: + await conn.execute("ALTER TABLE avatar ADD COLUMN etag TEXT") + await conn.execute("ALTER TABLE avatar ADD COLUMN fetched_at INTEGER") From 8ebcb9ee581e429f06760cd753248a8d84628d27 Mon Sep 17 00:00:00 2001 From: palchrb Date: Mon, 3 Nov 2025 13:34:55 +0100 Subject: [PATCH 3/9] Include necessary changes for avatar updates --- github/db.py | 87 ++++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 47 deletions(-) diff --git a/github/db.py b/github/db.py index 40345c8..091969e 100644 --- a/github/db.py +++ b/github/db.py @@ -14,6 +14,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . + from typing import Optional import hashlib import hmac @@ -35,28 +36,25 @@ class Client: def from_row(cls, row: Record | None) -> Optional["Client"]: if not row: return None - user_id = row["user_id"] - token = row["token"] - return cls( - user_id=user_id, - token=token, - ) + return cls(user_id=row["user_id"], token=row["token"]) @dataclass(frozen=True) class Avatar: url: str mxc: ContentURI + etag: Optional[str] = None + fetched_at: int = 0 @classmethod def from_row(cls, row: Record | None) -> Optional["Avatar"]: if not row: return None - url = row["url"] - mxc = row["mxc"] return cls( - url=url, - mxc=mxc, + url=row["url"], + mxc=row["mxc"], + etag=row.get("etag"), + fetched_at=int(row.get("fetched_at") or 0), ) @@ -74,18 +72,13 @@ def from_row(cls, row: Record | None) -> Optional["WebhookInfo"]: if not row: return None id = row["id"] - repo = row["repo"] - user_id = row["user_id"] - room_id = row["room_id"] - github_id = row["github_id"] - secret = row["secret"] return cls( id=id if isinstance(id, uuid.UUID) else uuid.UUID(id), - repo=repo, - user_id=user_id, - room_id=room_id, - github_id=github_id, - secret=secret, + repo=row["repo"], + user_id=row["user_id"], + room_id=row["room_id"], + github_id=row["github_id"], + secret=row["secret"], ) def __str__(self) -> str: @@ -108,15 +101,11 @@ async def get_event(self, message_id: str, room_id: RoomID) -> EventID | None: room_id, ) - async def put_event( - self, - message_id: str, - room_id: RoomID, - event_id: EventID, - ) -> None: + async def put_event(self, message_id: str, room_id: RoomID, event_id: EventID) -> None: await self.db.execute( """ - INSERT INTO matrix_message (message_id, room_id, event_id) VALUES ($1, $2, $3) + INSERT INTO matrix_message (message_id, room_id, event_id) + VALUES ($1, $2, $3) ON CONFLICT (message_id, room_id) DO UPDATE SET event_id = excluded.event_id """, message_id, @@ -131,7 +120,8 @@ async def get_clients(self) -> list[Client]: async def put_client(self, user_id: UserID, token: str) -> None: await self.db.execute( """ - INSERT INTO client (user_id, token) VALUES ($1, $2) + INSERT INTO client (user_id, token) + VALUES ($1, $2) ON CONFLICT (user_id) DO UPDATE SET token = excluded.token """, user_id, @@ -139,33 +129,43 @@ async def put_client(self, user_id: UserID, token: str) -> None: ) async def delete_client(self, user_id: UserID) -> None: - await self.db.execute( - "DELETE FROM client WHERE user_id = $1", - user_id, - ) + await self.db.execute("DELETE FROM client WHERE user_id = $1", user_id) async def get_avatars(self) -> list[Avatar]: - rows = await self.db.fetch("SELECT url, mxc FROM avatar") + rows = await self.db.fetch("SELECT url, mxc, etag, fetched_at FROM avatar") return [Avatar.from_row(row) for row in rows] - async def put_avatar(self, url: str, mxc: ContentURI) -> None: + async def put_avatar( + self, + url: str, + mxc: ContentURI, + *, + etag: Optional[str] = None, + fetched_at: Optional[int] = None, + ) -> None: await self.db.execute( """ - INSERT INTO avatar (url, mxc) VALUES ($1, $2) - ON CONFLICT (url) DO NOTHING + INSERT INTO avatar (url, mxc, etag, fetched_at) + VALUES ($1, $2, $3, $4) + ON CONFLICT (url) DO UPDATE + SET mxc = excluded.mxc, + etag = excluded.etag, + fetched_at = excluded.fetched_at """, url, mxc, + etag, + fetched_at, ) - async def get_webhook_by_id(self, id: uuid.UUID) -> WebhookInfo | None: + async def get_webhook_by_id(self, id: uuid.UUID) -> Optional[WebhookInfo]: row = await self.db.fetchrow( "SELECT id, repo, user_id, room_id, github_id, secret FROM webhook WHERE id = $1", str(id), ) return WebhookInfo.from_row(row) - async def get_webhook_by_repo(self, room_id: RoomID, repo: str) -> WebhookInfo | None: + async def get_webhook_by_repo(self, room_id: RoomID, repo: str) -> Optional[WebhookInfo]: row = await self.db.fetchrow( "SELECT id, repo, user_id, room_id, github_id, secret FROM webhook WHERE room_id = $1 AND repo = $2", room_id, @@ -181,10 +181,7 @@ async def get_webhooks_in_room(self, room_id: RoomID) -> list[WebhookInfo]: return [WebhookInfo.from_row(row) for row in rows] async def delete_webhook(self, id: uuid.UUID) -> None: - await self.db.execute( - "DELETE FROM webhook WHERE id = $1", - str(id), - ) + await self.db.execute("DELETE FROM webhook WHERE id = $1", str(id)) async def insert_webhook( self, webhook: WebhookInfo, *, _conn: Connection | None = None @@ -210,11 +207,7 @@ async def set_webhook_github_id(self, id: uuid.UUID, github_id: int) -> None: ) async def transfer_webhook_repo(self, id: uuid.UUID, new_repo: str) -> None: - await self.db.execute( - "UPDATE webhook SET repo = $1 WHERE id = $2", - new_repo, - str(id), - ) + await self.db.execute("UPDATE webhook SET repo = $1 WHERE id = $2", new_repo, str(id)) async def transfer_webhook_rooms(self, old_room: RoomID, new_room: RoomID) -> None: await self.db.execute( From 36372d2a45be5eb102bcae962b20f471f2977d6d Mon Sep 17 00:00:00 2001 From: palchrb Date: Mon, 3 Nov 2025 13:57:12 +0100 Subject: [PATCH 4/9] Decrease avatar cache timeout to 5 minutes Reduce avatar cache expiration time from 3600 seconds to 300 seconds. --- github/avatar_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github/avatar_manager.py b/github/avatar_manager.py index 241507a..60087ee 100644 --- a/github/avatar_manager.py +++ b/github/avatar_manager.py @@ -36,7 +36,7 @@ async def load_db(self) -> None: async def get_mxc(self, url: str) -> ContentURI: now = int(time.time()) - if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 3600: + if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 300: return self._avatars[url] headers = {} @@ -60,7 +60,7 @@ async def get_mxc(self, url: str) -> ContentURI: new_etag = resp.headers.get("ETag") async with self._lock: - if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 3600: + if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 300: return self._avatars[url] mxc = await self.bot.client.upload_media(data) From cdf82cee1222c341c3a07bb9f6e973d079d91427 Mon Sep 17 00:00:00 2001 From: palchrb Date: Mon, 3 Nov 2025 14:15:43 +0100 Subject: [PATCH 5/9] Refactor avatar loading and fetching logic --- github/avatar_manager.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/github/avatar_manager.py b/github/avatar_manager.py index 60087ee..fa2fd47 100644 --- a/github/avatar_manager.py +++ b/github/avatar_manager.py @@ -1,7 +1,11 @@ -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING +from typing import Optional import asyncio import time +from sqlalchemy import Column, MetaData, Table, Text +from sqlalchemy.engine.base import Engine + from mautrix.types import ContentURI from .db import DBManager @@ -28,24 +32,24 @@ def __init__(self, bot: "GitHubBot") -> None: async def load_db(self) -> None: rows = await self._db.get_avatars() - self._avatars = {row.url: ContentURI(row.mxc) for row in rows} - self._etag = {row.url: getattr(row, "etag", None) for row in rows} - self._fetched_at = { - row.url: int(getattr(row, "fetched_at", 0) or 0) for row in rows - } + self._avatars = {avatar.url: ContentURI(avatar.mxc) for avatar in rows} + self._etag = {avatar.url: avatar.etag for avatar in rows} + self._fetched_at = {avatar.url: int(avatar.fetched_at or 0) for avatar in rows} async def get_mxc(self, url: str) -> ContentURI: now = int(time.time()) + # 5 min TTL if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 300: return self._avatars[url] - headers = {} + headers: dict[str, str] = {} etag = self._etag.get(url) if etag: headers["If-None-Match"] = etag async with self.bot.http.get(url, headers=headers) as resp: if resp.status == 304 and url in self._avatars: + # Unchanged: bump fetched_at and persist self._fetched_at[url] = now await self._db.put_avatar( url, @@ -60,6 +64,7 @@ async def get_mxc(self, url: str) -> ContentURI: new_etag = resp.headers.get("ETag") async with self._lock: + # Race guard with same TTL inside the lock if url in self._avatars and (now - self._fetched_at.get(url, 0)) < 300: return self._avatars[url] From 71dad8da175ee589a71768040e6b0686ba0f0067 Mon Sep 17 00:00:00 2001 From: palchrb Date: Mon, 3 Nov 2025 14:16:07 +0100 Subject: [PATCH 6/9] Refactor database row handling for clarity --- github/db.py | 87 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/github/db.py b/github/db.py index 091969e..ef9e019 100644 --- a/github/db.py +++ b/github/db.py @@ -36,7 +36,12 @@ class Client: def from_row(cls, row: Record | None) -> Optional["Client"]: if not row: return None - return cls(user_id=row["user_id"], token=row["token"]) + user_id = row["user_id"] + token = row["token"] + return cls( + user_id=user_id, + token=token, + ) @dataclass(frozen=True) @@ -50,11 +55,15 @@ class Avatar: def from_row(cls, row: Record | None) -> Optional["Avatar"]: if not row: return None + url = row["url"] + mxc = row["mxc"] + etag = row.get("etag") + fetched_at = int(row.get("fetched_at") or 0) return cls( - url=row["url"], - mxc=row["mxc"], - etag=row.get("etag"), - fetched_at=int(row.get("fetched_at") or 0), + url=url, + mxc=mxc, + etag=etag, + fetched_at=fetched_at, ) @@ -72,13 +81,18 @@ def from_row(cls, row: Record | None) -> Optional["WebhookInfo"]: if not row: return None id = row["id"] + repo = row["repo"] + user_id = row["user_id"] + room_id = row["room_id"] + github_id = row["github_id"] + secret = row["secret"] return cls( id=id if isinstance(id, uuid.UUID) else uuid.UUID(id), - repo=row["repo"], - user_id=row["user_id"], - room_id=row["room_id"], - github_id=row["github_id"], - secret=row["secret"], + repo=repo, + user_id=user_id, + room_id=room_id, + github_id=github_id, + secret=secret, ) def __str__(self) -> str: @@ -101,11 +115,15 @@ async def get_event(self, message_id: str, room_id: RoomID) -> EventID | None: room_id, ) - async def put_event(self, message_id: str, room_id: RoomID, event_id: EventID) -> None: + async def put_event( + self, + message_id: str, + room_id: RoomID, + event_id: EventID, + ) -> None: await self.db.execute( """ - INSERT INTO matrix_message (message_id, room_id, event_id) - VALUES ($1, $2, $3) + INSERT INTO matrix_message (message_id, room_id, event_id) VALUES ($1, $2, $3) ON CONFLICT (message_id, room_id) DO UPDATE SET event_id = excluded.event_id """, message_id, @@ -120,8 +138,7 @@ async def get_clients(self) -> list[Client]: async def put_client(self, user_id: UserID, token: str) -> None: await self.db.execute( """ - INSERT INTO client (user_id, token) - VALUES ($1, $2) + INSERT INTO client (user_id, token) VALUES ($1, $2) ON CONFLICT (user_id) DO UPDATE SET token = excluded.token """, user_id, @@ -129,28 +146,23 @@ async def put_client(self, user_id: UserID, token: str) -> None: ) async def delete_client(self, user_id: UserID) -> None: - await self.db.execute("DELETE FROM client WHERE user_id = $1", user_id) + await self.db.execute( + "DELETE FROM client WHERE user_id = $1", + user_id, + ) async def get_avatars(self) -> list[Avatar]: rows = await self.db.fetch("SELECT url, mxc, etag, fetched_at FROM avatar") return [Avatar.from_row(row) for row in rows] - async def put_avatar( - self, - url: str, - mxc: ContentURI, - *, - etag: Optional[str] = None, - fetched_at: Optional[int] = None, - ) -> None: + async def put_avatar(self, url: str, mxc: ContentURI, *, etag: Optional[str] = None, fetched_at: Optional[int] = None) -> None: await self.db.execute( """ - INSERT INTO avatar (url, mxc, etag, fetched_at) - VALUES ($1, $2, $3, $4) - ON CONFLICT (url) DO UPDATE - SET mxc = excluded.mxc, - etag = excluded.etag, - fetched_at = excluded.fetched_at + INSERT INTO avatar (url, mxc, etag, fetched_at) VALUES ($1, $2, $3, $4) + ON CONFLICT (url) DO UPDATE SET + mxc = excluded.mxc, + etag = excluded.etag, + fetched_at = excluded.fetched_at """, url, mxc, @@ -158,14 +170,14 @@ async def put_avatar( fetched_at, ) - async def get_webhook_by_id(self, id: uuid.UUID) -> Optional[WebhookInfo]: + async def get_webhook_by_id(self, id: uuid.UUID) -> WebhookInfo | None: row = await self.db.fetchrow( "SELECT id, repo, user_id, room_id, github_id, secret FROM webhook WHERE id = $1", str(id), ) return WebhookInfo.from_row(row) - async def get_webhook_by_repo(self, room_id: RoomID, repo: str) -> Optional[WebhookInfo]: + async def get_webhook_by_repo(self, room_id: RoomID, repo: str) -> WebhookInfo | None: row = await self.db.fetchrow( "SELECT id, repo, user_id, room_id, github_id, secret FROM webhook WHERE room_id = $1 AND repo = $2", room_id, @@ -181,7 +193,10 @@ async def get_webhooks_in_room(self, room_id: RoomID) -> list[WebhookInfo]: return [WebhookInfo.from_row(row) for row in rows] async def delete_webhook(self, id: uuid.UUID) -> None: - await self.db.execute("DELETE FROM webhook WHERE id = $1", str(id)) + await self.db.execute( + "DELETE FROM webhook WHERE id = $1", + str(id), + ) async def insert_webhook( self, webhook: WebhookInfo, *, _conn: Connection | None = None @@ -207,7 +222,11 @@ async def set_webhook_github_id(self, id: uuid.UUID, github_id: int) -> None: ) async def transfer_webhook_repo(self, id: uuid.UUID, new_repo: str) -> None: - await self.db.execute("UPDATE webhook SET repo = $1 WHERE id = $2", new_repo, str(id)) + await self.db.execute( + "UPDATE webhook SET repo = $1 WHERE id = $2", + new_repo, + str(id), + ) async def transfer_webhook_rooms(self, old_room: RoomID, new_room: RoomID) -> None: await self.db.execute( From 2ab3e5d1270f7049edea0d5b8716f1a4e97fae66 Mon Sep 17 00:00:00 2001 From: palchrb Date: Wed, 5 Nov 2025 09:27:01 +0100 Subject: [PATCH 7/9] Combine import statements for clarity --- github/avatar_manager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/github/avatar_manager.py b/github/avatar_manager.py index fa2fd47..cc9b418 100644 --- a/github/avatar_manager.py +++ b/github/avatar_manager.py @@ -1,5 +1,4 @@ -from typing import TYPE_CHECKING -from typing import Optional +from typing import TYPE_CHECKING, Optional import asyncio import time From ac75905cf3d9d84853f926e559a43805733df2b1 Mon Sep 17 00:00:00 2001 From: palchrb Date: Thu, 6 Nov 2025 12:29:22 +0100 Subject: [PATCH 8/9] Refactor put_avatar method for improved readability --- github/db.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/github/db.py b/github/db.py index ef9e019..9ba3ca3 100644 --- a/github/db.py +++ b/github/db.py @@ -155,7 +155,14 @@ async def get_avatars(self) -> list[Avatar]: rows = await self.db.fetch("SELECT url, mxc, etag, fetched_at FROM avatar") return [Avatar.from_row(row) for row in rows] - async def put_avatar(self, url: str, mxc: ContentURI, *, etag: Optional[str] = None, fetched_at: Optional[int] = None) -> None: + async def put_avatar( + self, + url: str, + mxc: ContentURI, + *, + etag: Optional[str] = None, + fetched_at: Optional[int] = None, + ) -> None: await self.db.execute( """ INSERT INTO avatar (url, mxc, etag, fetched_at) VALUES ($1, $2, $3, $4) @@ -170,6 +177,7 @@ async def put_avatar(self, url: str, mxc: ContentURI, *, etag: Optional[str] = N fetched_at, ) + async def get_webhook_by_id(self, id: uuid.UUID) -> WebhookInfo | None: row = await self.db.fetchrow( "SELECT id, repo, user_id, room_id, github_id, secret FROM webhook WHERE id = $1", From 1986cfc4b78f5c3e54188e45518933744f88c333 Mon Sep 17 00:00:00 2001 From: palchrb Date: Thu, 6 Nov 2025 21:01:58 +0100 Subject: [PATCH 9/9] Remove unnecessary blank line in db.py --- github/db.py | 1 - 1 file changed, 1 deletion(-) diff --git a/github/db.py b/github/db.py index 9ba3ca3..b1d72f5 100644 --- a/github/db.py +++ b/github/db.py @@ -177,7 +177,6 @@ async def put_avatar( fetched_at, ) - async def get_webhook_by_id(self, id: uuid.UUID) -> WebhookInfo | None: row = await self.db.fetchrow( "SELECT id, repo, user_id, room_id, github_id, secret FROM webhook WHERE id = $1",