Monadical-SAS · tito · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/server/asyncio_loop_analysis.md b/server/asyncio_loop_analysis.md
@@ -0,0 +1,118 @@
+# AsyncIO Event Loop Analysis for test_attendee_parsing_bug.py
+
+## Problem Summary
+The test passes but encounters an error during teardown where asyncpg tries to use a different/closed event loop, resulting in:
+- `RuntimeError: Task got Future attached to a different loop`
+- `RuntimeError: Event loop is closed`
+
+## Root Cause Analysis
+
+### 1. Multiple Event Loop Creation Points
+
+The test environment creates event loops at different scopes:
+
+1. **Session-scoped loop** (conftest.py:27-34):
+   - Created once per test session
+   - Used by session-scoped fixtures
+   - Closed after all tests complete
+
+2. **Function-scoped loop** (pytest-asyncio default):
+   - Created for each async test function
+   - This is the loop that runs the actual test
+   - Closed immediately after test completes
+
+3. **AsyncPG internal loop**:
+   - AsyncPG connections store a reference to the loop they were created with
+   - Used for connection lifecycle management
+
+### 2. Event Loop Lifecycle Mismatch
+
+The issue occurs because:
+
+1. **Session fixture creates database connection** on session-scoped loop
+2. **Test runs** on function-scoped loop (different from session loop)
+3. **During teardown**, the session fixture tries to rollback/close using the original session loop
+4. **AsyncPG connection** still references the function-scoped loop which is now closed
+5. **Conflict**: SQLAlchemy tries to use session loop, but asyncpg Future is attached to the closed function loop
+
+### 3. Configuration Issues
+
+Current pytest configuration:
+- `asyncio_mode = "auto"` in pyproject.toml
+- `asyncio_default_fixture_loop_scope=session` (shown in test output)
+- `asyncio_default_test_loop_scope=function` (shown in test output)
+
+This mismatch between fixture loop scope (session) and test loop scope (function) causes the problem.
+
+## Solutions
+
+### Option 1: Align Loop Scopes (Recommended)
+Change pytest-asyncio configuration to use consistent loop scopes:
+
+```python
+# pyproject.toml
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"  # Change from session to function
+```
+
+### Option 2: Use Function-Scoped Database Fixture
+Change the `session` fixture scope from session to function:
+
+```python
+@pytest_asyncio.fixture  # Remove scope="session"
+async def session(setup_database):
+    # ... existing code ...
+```
+
+### Option 3: Explicit Loop Management
+Ensure all async operations use the same loop:
+
+```python
+@pytest_asyncio.fixture
+async def session(setup_database, event_loop):
+    # Force using the current event loop
+    engine = create_async_engine(
+        settings.DATABASE_URL,
+        echo=False,
+        poolclass=NullPool,
+        connect_args={"loop": event_loop}  # Pass explicit loop
+    )
+    # ... rest of fixture ...
+```
+
+### Option 4: Upgrade pytest-asyncio
+The current version (1.1.0) has known issues with loop management. Consider upgrading to the latest version which has better loop scope handling.
+
+## Immediate Workaround
+
+For the test to run cleanly without the teardown error, you can:
+
+1. Add explicit cleanup in the test:
+```python
+@pytest.mark.asyncio
+async def test_attendee_parsing_bug(session):
+    # ... existing test code ...
+
+    # Explicit cleanup before fixture teardown
+    await session.commit()  # or await session.close()
+```
+
+2. Or suppress the teardown error (not recommended for production):
+```python
+@pytest.fixture
+async def session(setup_database):
+    # ... existing setup ...
+    try:
+        yield session
+        await session.rollback()
+    except RuntimeError as e:
+        if "Event loop is closed" not in str(e):
+            raise
+    finally:
+        await session.close()
+```
+
+## Recommendation
+
+The cleanest solution is to align the loop scopes by setting both fixture and test loop scopes to "function" scope. This ensures each test gets its own clean event loop and avoids cross-contamination between tests.
diff --git a/server/migrations/env.py b/server/migrations/env.py
@@ -3,7 +3,7 @@
 from alembic import context
 from sqlalchemy import engine_from_config, pool
 
-from reflector.db import metadata
+from reflector.db.base import metadata
 from reflector.settings import settings
 
 # this is the Alembic Config object, which provides

diff --git a/server/migrations/versions/38a927dcb099_rename_back_text_to_transcript.py b/server/migrations/versions/38a927dcb099_rename_back_text_to_transcript.py
@@ -28,7 +28,7 @@ def upgrade() -> None:
     transcript = table("transcript", column("id", sa.String), column("topics", sa.JSON))
 
     # Select all rows from the transcript table
-    results = bind.execute(select([transcript.c.id, transcript.c.topics]))
+    results = bind.execute(select(transcript.c.id, transcript.c.topics))
 
     for row in results:
         transcript_id = row["id"]
@@ -58,7 +58,7 @@ def downgrade() -> None:
     transcript = table("transcript", column("id", sa.String), column("topics", sa.JSON))
 
     # Select all rows from the transcript table
-    results = bind.execute(select([transcript.c.id, transcript.c.topics]))
+    results = bind.execute(select(transcript.c.id, transcript.c.topics))
 
     for row in results:
         transcript_id = row["id"]

diff --git a/server/migrations/versions/4814901632bc_fix_duration.py b/server/migrations/versions/4814901632bc_fix_duration.py
@@ -36,9 +36,7 @@ def upgrade() -> None:
 
     # select only the one with duration = 0
     results = bind.execute(
-        select([transcript.c.id, transcript.c.duration]).where(
-            transcript.c.duration == 0
-        )
+        select(transcript.c.id, transcript.c.duration).where(transcript.c.duration == 0)
     )
 
     data_dir = Path(settings.DATA_DIR)

diff --git a/server/migrations/versions/9920ecfe2735_rename_transcript_to_text.py b/server/migrations/versions/9920ecfe2735_rename_transcript_to_text.py
@@ -28,7 +28,7 @@ def upgrade() -> None:
     transcript = table("transcript", column("id", sa.String), column("topics", sa.JSON))
 
     # Select all rows from the transcript table
-    results = bind.execute(select([transcript.c.id, transcript.c.topics]))
+    results = bind.execute(select(transcript.c.id, transcript.c.topics))
 
     for row in results:
         transcript_id = row["id"]
@@ -58,7 +58,7 @@ def downgrade() -> None:
     transcript = table("transcript", column("id", sa.String), column("topics", sa.JSON))
 
     # Select all rows from the transcript table
-    results = bind.execute(select([transcript.c.id, transcript.c.topics]))
+    results = bind.execute(select(transcript.c.id, transcript.c.topics))
 
     for row in results:
         transcript_id = row["id"]

diff --git a/server/pyproject.toml b/server/pyproject.toml
@@ -19,8 +19,8 @@ dependencies = [
     "sentry-sdk[fastapi]>=1.29.2",
     "httpx>=0.24.1",
     "fastapi-pagination>=0.12.6",
-    "databases[aiosqlite, asyncpg]>=0.7.0",
-    "sqlalchemy<1.5",
+    "sqlalchemy>=2.0.0",
+    "asyncpg>=0.29.0",
     "alembic>=1.11.3",
     "nltk>=3.8.1",
     "prometheus-fastapi-instrumentator>=6.1.0",
@@ -46,6 +46,7 @@ dev = [
     "black>=24.1.1",
     "stamina>=23.1.0",
     "pyinstrument>=4.6.1",
+    "pytest-async-sqlalchemy>=0.2.0",
 ]
 tests = [
     "pytest-cov>=4.1.0",
@@ -111,12 +112,15 @@ source = ["reflector"]
 
 [tool.pytest_env]
 ENVIRONMENT = "pytest"
-DATABASE_URL = "postgresql://test_user:test_password@localhost:15432/reflector_test"
+DATABASE_URL = "postgresql+asyncpg://test_user:test_password@localhost:15432/reflector_test"
 
 [tool.pytest.ini_options]
 addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
 testpaths = ["tests"]
 asyncio_mode = "auto"
+asyncio_debug = true
+asyncio_default_fixture_loop_scope = "session"
+asyncio_default_test_loop_scope = "session"
 markers = [
     "model_api: tests for the unified model-serving HTTP API (backend- and hardware-agnostic)",
 ]

diff --git a/server/reflector/asynctask.py b/server/reflector/asynctask.py
@@ -1,21 +1,14 @@
 import asyncio
 import functools
 
-from reflector.db import get_database
-
 
 def asynctask(f):
     @functools.wraps(f)
     def wrapper(*args, **kwargs):
-        async def run_with_db():
-            database = get_database()
-            await database.connect()
-            try:
-                return await f(*args, **kwargs)
-            finally:
-                await database.disconnect()
+        async def run_async():
+            return await f(*args, **kwargs)
 
-        coro = run_with_db()
+        coro = run_async()
         try:
             loop = asyncio.get_running_loop()
         except RuntimeError:

diff --git a/server/reflector/db/__init__.py b/server/reflector/db/__init__.py
@@ -1,48 +1,69 @@
-import contextvars
-from typing import Optional
+from typing import AsyncGenerator
 
-import databases
-import sqlalchemy
+from sqlalchemy.ext.asyncio import (
+    AsyncEngine,
+    AsyncSession,
+    async_sessionmaker,
+    create_async_engine,
+)
 
+from reflector.db.base import Base as Base
+from reflector.db.base import metadata as metadata
 from reflector.events import subscribers_shutdown, subscribers_startup
 from reflector.settings import settings
 
-metadata = sqlalchemy.MetaData()
+_engine: AsyncEngine | None = None
+_session_factory: async_sessionmaker[AsyncSession] | None = None
+
+
+def get_engine() -> AsyncEngine:
+    global _engine
+    if _engine is None:
+        _engine = create_async_engine(
+            settings.DATABASE_URL,
+            echo=False,
+            pool_pre_ping=True,
+        )
+    return _engine
 
-_database_context: contextvars.ContextVar[Optional[databases.Database]] = (
-    contextvars.ContextVar("database", default=None)
-)
 
+def get_session_factory() -> async_sessionmaker[AsyncSession]:
+    global _session_factory
+    if _session_factory is None:
+        _session_factory = async_sessionmaker(
+            get_engine(),
+            class_=AsyncSession,
+            expire_on_commit=False,
+        )
+    return _session_factory
 
-def get_database() -> databases.Database:
-    """Get database instance for current asyncio context"""
-    db = _database_context.get()
-    if db is None:
-        db = databases.Database(settings.DATABASE_URL)
-        _database_context.set(db)
-    return db
+
+async def _get_session() -> AsyncGenerator[AsyncSession, None]:
+    # necessary implementation to ease mocking on pytest
+    async with get_session_factory()() as session:
+        yield session
+
+
+async def get_session() -> AsyncGenerator[AsyncSession, None]:
+    async for session in _get_session():
+        yield session
 
 
-# import models
 import reflector.db.calendar_events  # noqa
 import reflector.db.meetings  # noqa
 import reflector.db.recordings  # noqa
 import reflector.db.rooms  # noqa
 import reflector.db.transcripts  # noqa
 
-kwargs = {}
-if "postgres" not in settings.DATABASE_URL:
-    raise Exception("Only postgres database is supported in reflector")
-engine = sqlalchemy.create_engine(settings.DATABASE_URL, **kwargs)
-
 
 @subscribers_startup.append
 async def database_connect(_):
-    database = get_database()
-    await database.connect()
+    get_engine()
 
 
 @subscribers_shutdown.append
 async def database_disconnect(_):
-    database = get_database()
-    await database.disconnect()
+    global _engine
+    if _engine:
+        await _engine.dispose()
+        _engine = None