diff --git a/.env.example b/.env.example index 0f4dcd4499..f1293e2b5d 100644 --- a/.env.example +++ b/.env.example @@ -55,6 +55,14 @@ SEARXNG_INSTANCE=http://localhost:8080 # SQLite database path (default: sqlite:///./data/app.db) # DATABASE_URL=sqlite:///./data/app.db +# +# PostgreSQL (optional, opt-in). SQLite is the zero-config default; only set +# this to run the main app database on Postgres. Needs the driver: +# pip install -r requirements-optional.txt +# v1 supports FRESH Postgres installs only — there is no SQLite->Postgres data +# copy. The email side-DBs (scheduled_emails.db / email_cache.db) stay local +# SQLite caches regardless of this setting. +# DATABASE_URL=postgresql://user:pass@localhost:5432/odysseus # ============================================================ # Data directory diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 787bd9dea0..8b9b921270 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,3 +92,40 @@ jobs: if: steps.docs-check.outputs.docs_only != 'true' - run: python -m pytest -q if: steps.docs-check.outputs.docs_only != 'true' + + python-tests-postgres: + name: Python tests (Postgres leg) + runs-on: ubuntu-latest + # Informational until a maintainer promotes it: proves the ORM schema + + # round-trip work on Postgres (tests/test_postgres_compat.py). The SQLite + # leg of that test already runs inside the python-tests job above. + continue-on-error: true + services: + postgres: + image: postgres:16 + env: + POSTGRES_PASSWORD: postgres + POSTGRES_DB: odysseus_test + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U postgres" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.11" + cache: pip + # Core deps + just the Postgres driver. The rest of requirements-optional + # (PyMuPDF, faster-whisper, markitdown) is irrelevant to the DB round-trip + # and would only slow down and flake this informational job. + - run: pip install -r requirements.txt psycopg2-binary + - run: mkdir -p data # import-time init_db() uses the default sqlite DB at ./data/app.db + - env: + TEST_DATABASE_URL: postgresql://postgres:postgres@localhost:5432/odysseus_test + run: python -m pytest -q tests/test_postgres_compat.py diff --git a/core/database.py b/core/database.py index 0f1089b39f..bd69d795d1 100644 --- a/core/database.py +++ b/core/database.py @@ -59,6 +59,13 @@ def _normalize_sqlite_url(url: str) -> str: connect_args={"check_same_thread": False} if "sqlite" in DATABASE_URL else {} ) +# Single source of truth for dialect-gating. Authoritative — asks the engine's +# resolved dialect rather than string-sniffing DATABASE_URL. The legacy +# raw-sqlite3 / PRAGMA / ALTER migrations in init_db() only make sense on +# SQLite; a fresh Postgres DB gets its full schema from create_all() and never +# had an old one to upgrade. +IS_SQLITE = engine.dialect.name == "sqlite" + # Create session factory SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) @@ -1791,53 +1798,76 @@ def init_db(): """ Initialize the database by creating all tables. Should be called when starting the application. + + create_all() emits the full current schema on a fresh SQLite OR Postgres DB + (the ORM uses portable column types), so it runs unconditionally. The legacy + _migrate_* upgraders below patch PRE-EXISTING SQLite files up to today's + schema via raw sqlite3 / PRAGMA / ALTER, so they are gated behind IS_SQLITE + — a fresh Postgres DB never had an old schema. The encryption-at-rest + migrations use portable SELECT/UPDATE and MUST run on both dialects, so they + stay OUTSIDE the gate (or opt-in Postgres silently loses the at-rest + encryption SQLite users get). + + Future contributors: new model columns reach Postgres automatically via + create_all. Do NOT add a data-backfill inside the IS_SQLITE gate if Postgres + needs it too — use the SQLAlchemy Inspector pattern (not PRAGMA) and place it + outside the gate. """ - _migrate_model_endpoints() + # model_endpoints is checked BEFORE create_all: it drops a stale-schema table + # (legacy `url` column, no `base_url`) so create_all rebuilds it fresh in the + # same boot. SQLite-only (raw sqlite3 + PRAGMA). + if IS_SQLITE: + _migrate_model_endpoints() + Base.metadata.create_all(bind=engine) - _migrate_add_hidden_models_column() - _migrate_add_cached_models_column() - _migrate_add_pinned_models_column() - _migrate_add_notes_sort_order() - _migrate_add_model_type_column() - _migrate_add_model_endpoint_refresh_columns() - _migrate_add_model_endpoint_owner_column() - _migrate_add_provider_auth_id_column() - _migrate_add_supports_tools_column() - _migrate_add_task_run_model_column() - _migrate_add_owner_column() - _migrate_add_document_archived_column() - _migrate_add_last_message_at_column() - _migrate_add_folder_column() - _migrate_add_token_columns() - _migrate_add_mode_column() - _migrate_add_multiuser_owner_columns() - _migrate_add_api_token_scopes_column() - _migrate_backfill_document_owner_from_session() - _migrate_assign_legacy_owner() - _migrate_add_tidy_verdict() - _migrate_add_doc_source_email_cols() - _migrate_add_oauth_config() - _migrate_add_email_oauth_columns() - _migrate_add_task_automation_columns() - _migrate_add_disabled_tools() - _migrate_add_mcp_oauth_tokens_column() - _migrate_add_task_v2_columns() - _migrate_add_notifications_enabled() - _migrate_drop_ping_notes_tasks() - _migrate_add_crew_member_id() - _migrate_add_assistant_columns() - _migrate_add_email_smtp_security() - _migrate_seed_email_account() - _migrate_add_calendar_metadata() - _migrate_add_calendar_is_utc() - _migrate_add_calendar_origin() - _migrate_add_calendar_account_id() - _migrate_add_caldav_sync_columns() - _migrate_chat_messages_fts() + + if IS_SQLITE: + _migrate_add_hidden_models_column() + _migrate_add_cached_models_column() + _migrate_add_pinned_models_column() + _migrate_add_notes_sort_order() + _migrate_add_model_type_column() + _migrate_add_model_endpoint_refresh_columns() + _migrate_add_model_endpoint_owner_column() + _migrate_add_provider_auth_id_column() + _migrate_add_supports_tools_column() + _migrate_add_task_run_model_column() + _migrate_add_owner_column() + _migrate_add_document_archived_column() + _migrate_add_last_message_at_column() + _migrate_add_folder_column() + _migrate_add_token_columns() + _migrate_add_mode_column() + _migrate_add_multiuser_owner_columns() + _migrate_add_api_token_scopes_column() + _migrate_backfill_document_owner_from_session() + _migrate_assign_legacy_owner() + _migrate_add_tidy_verdict() + _migrate_add_doc_source_email_cols() + _migrate_add_oauth_config() + _migrate_add_email_oauth_columns() + _migrate_add_task_automation_columns() + _migrate_add_disabled_tools() + _migrate_add_mcp_oauth_tokens_column() + _migrate_add_task_v2_columns() + _migrate_add_notifications_enabled() + _migrate_drop_ping_notes_tasks() + _migrate_add_crew_member_id() + _migrate_add_assistant_columns() + _migrate_add_email_smtp_security() + _migrate_seed_email_account() + _migrate_add_calendar_metadata() + _migrate_add_calendar_is_utc() + _migrate_add_calendar_origin() + _migrate_add_calendar_account_id() + _migrate_add_caldav_sync_columns() + _migrate_chat_messages_fts() + _migrate_backfill_task_folders() + + # Encryption-at-rest — portable SELECT/UPDATE, runs on BOTH dialects. _migrate_encrypt_email_passwords() _migrate_encrypt_signatures() _migrate_encrypt_endpoint_keys() - _migrate_backfill_task_folders() def _migrate_backfill_task_folders(): diff --git a/docs/setup.md b/docs/setup.md index 6978a263f1..5677f9b0c7 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -309,6 +309,35 @@ To expose Odysseus on a local network or Tailscale with HTTPS: | `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) | | `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). | +### Using PostgreSQL (optional) + +SQLite is the zero-config default and needs no driver — leave `DATABASE_URL` +unset and Odysseus stores everything in `data/app.db`. To run the **main +application database** on PostgreSQL instead: + +1. Install the driver (it is intentionally not in `requirements.txt`): + ```bash + pip install -r requirements-optional.txt + ``` +2. Point `DATABASE_URL` at your database before first boot: + ```bash + DATABASE_URL=postgresql://user:pass@localhost:5432/odysseus + ``` + +On startup Odysseus creates the full schema on the empty database automatically. + +**v1 behaviour (by design, not bugs):** + +- **Fresh installs only.** There is no SQLite→PostgreSQL data-copy tool yet, so + switching starts from an empty database. The legacy SQLite schema-upgrade and + data-backfill/seed migrations are skipped on Postgres (a fresh DB has no old + data to patch); only the portable schema creation and the encryption-at-rest + migrations run. +- **Email side-DBs stay local SQLite.** `scheduled_emails.db` and + `email_cache.db` remain on-disk SQLite caches regardless of `DATABASE_URL`. +- **Chat full-text search falls back to `LIKE`.** The SQLite FTS5 index is not + created on Postgres; transcript search still works via a `LIKE` scan. + ### Faster, reproducible installs with uv (optional) [uv](https://docs.astral.sh/uv/) works as a drop-in replacement for the venv + pip steps in the native install guides, no project changes are needed but this change results in faster installs along with a lockfile for reproducible environments. After [installing `uv`](https://docs.astral.sh/uv/getting-started/installation/), use: diff --git a/mcp_servers/email_server.py b/mcp_servers/email_server.py index 2611491ae7..b4f19569ff 100644 --- a/mcp_servers/email_server.py +++ b/mcp_servers/email_server.py @@ -33,7 +33,7 @@ server = Server("email") EMAIL_SOCKET_TIMEOUT = float(os.environ.get("EMAIL_SOCKET_TIMEOUT", "20")) -from src.constants import DATA_DIR as _DATA_DIR, APP_DB, EMAIL_CACHE_DB, SETTINGS_FILE as _SETTINGS_FILE, MAIL_ATTACHMENTS_DIR +from src.constants import DATA_DIR as _DATA_DIR, EMAIL_CACHE_DB, SETTINGS_FILE as _SETTINGS_FILE, MAIL_ATTACHMENTS_DIR DATA_DIR = Path(_DATA_DIR) @@ -67,10 +67,6 @@ def _clean_header_value(value) -> str: return re.sub(r"[\r\n]+[ \t]*", " ", str(value)).strip() -def _db_path() -> Path: - return Path(APP_DB) - - def _current_owner() -> str: owner = _CURRENT_OWNER.get() return str(owner or "").strip() @@ -164,27 +160,47 @@ def _default_document_owner() -> str | None: def _read_accounts_from_db() -> list: - """Return all enabled email account rows. Empty list if missing. Never raises.""" - path = _db_path() - if not path.exists(): - return [] + """Return all enabled email account rows. Empty list if missing. Never raises. + + Routed through the app's SQLAlchemy engine (core.database) instead of opening + the SQLite file directly, so it follows DATABASE_URL — SQLite by default, + Postgres when configured. Lazy import keeps MCP module load light and mirrors + the document-tool import pattern elsewhere in this file. EmailAccount's + imap/smtp password columns are plain String (encryption is applied by the + startup migration, not the ORM type), so this returns the same stored values + the old raw-sqlite3 read did. Inspector-style optional-column probing is no + longer needed: create_all + the startup migrations guarantee the columns. + """ try: - conn = sqlite3.connect(str(path)) - conn.row_factory = sqlite3.Row - columns = {r[1] for r in conn.execute("PRAGMA table_info(email_accounts)").fetchall()} - owner_select = "owner" if "owner" in columns else "NULL AS owner" - smtp_security_select = "smtp_security" if "smtp_security" in columns else "'' AS smtp_security" - rows = conn.execute(f""" - SELECT id, {owner_select}, name, is_default, enabled, - imap_host, imap_port, imap_user, imap_password, imap_starttls, - smtp_host, smtp_port, {smtp_security_select}, smtp_user, smtp_password, from_address - FROM email_accounts WHERE enabled = 1 - ORDER BY is_default DESC, created_at ASC - """).fetchall() - conn.close() - return [dict(r) for r in rows] - except sqlite3.OperationalError: - return [] + from core.database import SessionLocal, EmailAccount + s = SessionLocal() + try: + rows = ( + s.query(EmailAccount) + .filter(EmailAccount.enabled.is_(True)) + .order_by(EmailAccount.is_default.desc(), EmailAccount.created_at.asc()) + .all() + ) + return [{ + "id": r.id, + "owner": r.owner, + "name": r.name, + "is_default": r.is_default, + "enabled": r.enabled, + "imap_host": r.imap_host, + "imap_port": r.imap_port, + "imap_user": r.imap_user, + "imap_password": r.imap_password, + "imap_starttls": r.imap_starttls, + "smtp_host": r.smtp_host, + "smtp_port": r.smtp_port, + "smtp_security": r.smtp_security or "", + "smtp_user": r.smtp_user, + "smtp_password": r.smtp_password, + "from_address": r.from_address, + } for r in rows] + finally: + s.close() except Exception: return [] diff --git a/requirements-optional.txt b/requirements-optional.txt index ab21e81ee1..8205708aac 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -34,3 +34,11 @@ PyMuPDF # [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per # the dependency-age discussion in issue #485. markitdown[docx,pptx,xlsx,xls]==0.1.6 + +# PostgreSQL driver — only needed if DATABASE_URL points at Postgres +# (postgresql://…). SQLite is the zero-config default and needs no driver. +# psycopg2-binary ships prebuilt wheels, so no local C toolchain is required. +# Pinned to a release well over 30 days old per the dependency-age convention +# (see the markitdown note above / issue #485). If a maintainer prefers +# psycopg v3 instead, the URL scheme becomes postgresql+psycopg://… +psycopg2-binary==2.9.10 diff --git a/tests/test_mcp_email_decode_header_spaces.py b/tests/test_mcp_email_decode_header_spaces.py index f588a6bd35..f1dc430880 100644 --- a/tests/test_mcp_email_decode_header_spaces.py +++ b/tests/test_mcp_email_decode_header_spaces.py @@ -16,47 +16,49 @@ import mcp_servers.email_server as es -def _init_accounts_db(path): - conn = sqlite3.connect(path) - conn.execute( - """ - CREATE TABLE email_accounts ( - id TEXT PRIMARY KEY, - owner TEXT, - name TEXT NOT NULL, - is_default INTEGER NOT NULL DEFAULT 0, - enabled INTEGER NOT NULL DEFAULT 1, - imap_host TEXT, - imap_port INTEGER, - imap_user TEXT, - imap_password TEXT, - imap_starttls INTEGER, - smtp_host TEXT, - smtp_port INTEGER, - smtp_security TEXT, - smtp_user TEXT, - smtp_password TEXT, - from_address TEXT, - created_at TEXT - ) - """ - ) - conn.executemany( - """ - INSERT INTO email_accounts - (id, owner, name, is_default, enabled, imap_host, imap_port, imap_user, - imap_password, imap_starttls, smtp_host, smtp_port, smtp_security, - smtp_user, smtp_password, from_address, created_at) - VALUES (?, ?, ?, ?, 1, 'imap.example.com', 993, ?, '', 1, - 'smtp.example.com', 465, 'ssl', ?, '', ?, ?) - """, - [ - ("acct-alice", "alice", "Alice Mail", 1, "alice@example.com", "alice@example.com", "alice@example.com", "2026-01-01"), - ("acct-bob", "bob", "Bob Mail", 1, "bob@example.com", "bob@example.com", "bob@example.com", "2026-01-02"), - ], - ) - conn.commit() - conn.close() +def _init_accounts_db(db_path, monkeypatch): + """Seed email_accounts and point the MCP account reader at the fixture DB. + + _read_accounts_from_db() now reads via core.database.SessionLocal (so it + follows DATABASE_URL, incl. Postgres), so the test routes THAT — not the old + raw-sqlite3 ``APP_DB`` path — at a temp database seeded through the ORM. + """ + from datetime import datetime + + from sqlalchemy import create_engine + from sqlalchemy.orm import sessionmaker + + import core.database as db_mod + + eng = create_engine(f"sqlite:///{db_path}", connect_args={"check_same_thread": False}) + db_mod.EmailAccount.__table__.create(eng, checkfirst=True) + Make = sessionmaker(bind=eng) + s = Make() + try: + s.add_all([ + db_mod.EmailAccount( + id="acct-alice", owner="alice", name="Alice Mail", + is_default=True, enabled=True, + imap_host="imap.example.com", imap_port=993, + imap_user="alice@example.com", imap_password="", imap_starttls=True, + smtp_host="smtp.example.com", smtp_port=465, smtp_security="ssl", + smtp_user="alice@example.com", smtp_password="", + from_address="alice@example.com", created_at=datetime(2026, 1, 1), + ), + db_mod.EmailAccount( + id="acct-bob", owner="bob", name="Bob Mail", + is_default=True, enabled=True, + imap_host="imap.example.com", imap_port=993, + imap_user="bob@example.com", imap_password="", imap_starttls=True, + smtp_host="smtp.example.com", smtp_port=465, smtp_security="ssl", + smtp_user="bob@example.com", smtp_password="", + from_address="bob@example.com", created_at=datetime(2026, 1, 2), + ), + ]) + s.commit() + finally: + s.close() + monkeypatch.setattr(db_mod, "SessionLocal", Make) def test_prefix_then_encoded_word_single_space(): @@ -83,8 +85,7 @@ def test_empty_header(): @pytest.mark.asyncio async def test_mcp_email_accounts_are_filtered_by_hidden_owner(tmp_path, monkeypatch): db_path = tmp_path / "app.db" - _init_accounts_db(db_path) - monkeypatch.setattr(es, "APP_DB", str(db_path)) + _init_accounts_db(db_path, monkeypatch) es._ACCOUNT_CACHE.clear() out = await es.call_tool("list_email_accounts", {"_odysseus_owner": "alice"}) @@ -97,8 +98,7 @@ async def test_mcp_email_accounts_are_filtered_by_hidden_owner(tmp_path, monkeyp @pytest.mark.asyncio async def test_mcp_email_requires_owner_when_multiple_account_owners_exist(tmp_path, monkeypatch): db_path = tmp_path / "app.db" - _init_accounts_db(db_path) - monkeypatch.setattr(es, "APP_DB", str(db_path)) + _init_accounts_db(db_path, monkeypatch) es._ACCOUNT_CACHE.clear() out = await es.call_tool("list_email_accounts", {}) @@ -109,7 +109,7 @@ async def test_mcp_email_requires_owner_when_multiple_account_owners_exist(tmp_p def test_mcp_email_scoped_owner_without_visible_account_skips_legacy_fallback(tmp_path, monkeypatch): db_path = tmp_path / "app.db" settings_path = tmp_path / "settings.json" - _init_accounts_db(db_path) + _init_accounts_db(db_path, monkeypatch) settings_path.write_text( json.dumps( { @@ -124,7 +124,6 @@ def test_mcp_email_scoped_owner_without_visible_account_skips_legacy_fallback(tm ), encoding="utf-8", ) - monkeypatch.setattr(es, "APP_DB", str(db_path)) monkeypatch.setattr(es, "_SETTINGS_FILE", str(settings_path)) es._ACCOUNT_CACHE.clear() diff --git a/tests/test_postgres_compat.py b/tests/test_postgres_compat.py new file mode 100644 index 0000000000..26722d1317 --- /dev/null +++ b/tests/test_postgres_compat.py @@ -0,0 +1,95 @@ +"""Dual-dialect smoke test for the optional PostgreSQL support. + +The SQLite leg always runs; the Postgres leg runs only when TEST_DATABASE_URL is +set (e.g. TEST_DATABASE_URL=postgresql://postgres:postgres@localhost:5432/odysseus_test). +It proves the ORM schema + a real round-trip work identically on both dialects, +so a future change can't silently break Postgres. Values are asserted (not just +"no exception") to catch representation drift: JSON (json type on Postgres), +Boolean, EncryptedText decrypt, and Integer autoincrement PKs. + +Uses a fresh engine per leg (not the import-time singleton) so it does not depend +on the process-wide DATABASE_URL. +""" +import os +import types + +import pytest + +sqlalchemy = pytest.importorskip("sqlalchemy") +if not isinstance(sqlalchemy, types.ModuleType): + pytest.skip("sqlalchemy is stubbed in this environment", allow_module_level=True) + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from core.database import Base, ModelEndpoint, UserTool, UserToolData +from core.database import Session as DBSession + + +def _roundtrip(url: str) -> None: + eng = create_engine( + url, + connect_args={"check_same_thread": False} if url.startswith("sqlite") else {}, + ) + Base.metadata.create_all(bind=eng) + Make = sessionmaker(bind=eng) + try: + # ---- write ---- + s = Make() + s.add(DBSession( + id="pgcompat-session", + name="compat", + endpoint_url="http://example/v1", + model="test-model", + rag=True, # Boolean + headers={"a": 1, "b": "two"}, # generic JSON column (json type on Postgres) + )) + s.add(ModelEndpoint( + id="pgcompat-endpoint", + name="compat-ep", + base_url="http://example/v1", + api_key="super-secret-plaintext", # EncryptedText (encrypts on write) + )) + # UserToolData.tool_id is a NOT NULL FK -> insert the parent tool first. + s.add(UserTool( + id="pgcompat-tool", + name="compat-tool", + html_content="