diff --git a/.env.example b/.env.example index 018d2cd..ff178fa 100644 --- a/.env.example +++ b/.env.example @@ -1,17 +1,20 @@ TMDB_API_KEY= -PORT=8000 -ADDON_ID=com.bimal.watchly -ADDON_NAME=Watchly REDIS_URL=redis://redis:6379/0 -TOKEN_TTL_SECONDS=0 -ANNOUNCEMENT_HTML= +TOKEN_SALT=change-me # generate secure salt... + +# app setup +APP_ENV="development" # available values are ["development", "production"] HOST_NAME= -RECOMMENDATION_SOURCE_ITEMS_LIMIT=10 # fetches recent watched/loved 10 movies and series to recommend based on those -TOKEN_SALT=change-me -# generate some very long random string preferrably using cryptography libraries +PORT=8000 + +# redis +REDIS_MAX_CONNECTIONS=20 +REDIS_CONNECTIONS_THRESHOLD=100 + # UPDATER -CATALOG_UPDATE_MODE=cron # Available options: cron, interval -# cron updates catalogs at specified times -# interval updates in specific intervals -CATALOG_UPDATE_CRON_SCHEDULES=[{"hour": 12, "minute": 0, "id": "catalog_refresh_noon"},{"hour": 0, "minute": 0, "id": "catalog_refresh_midnight"}] +AUTO_UPDATE_CATALOGS=True CATALOG_REFRESH_INTERVAL_SECONDS=6*60*60 + +# AI Catalog name generation +GEMINI_API_KEY= +DEFAULT_GEMINI_MODEL="gemma-3-27b-it" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1394663..126b570 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,7 @@ name: Build and Push Docker Image - env: DOCKER_BUILDKIT: 1 COMPOSE_DOCKER_CLI_BUILD: 1 - on: push: branches: @@ -11,33 +9,31 @@ on: paths: - 'app/core/version.py' - 'pyproject.toml' - concurrency: group: ${{ github.head_ref || github.run_id }} cancel-in-progress: true - jobs: build-and-push: runs-on: ubuntu-latest permissions: id-token: write contents: write - steps: - name: Checkout code uses: actions/checkout@v5 - - uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.CR_TOKEN }} - + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.11' - - name: Read version from version.py id: get-version run: | @@ -49,14 +45,12 @@ jobs: fi echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT echo "Read version: ${VERSION}" - - name: Set Docker image tag id: set-tag run: | VERSION="${{ steps.get-version.outputs.VERSION }}" echo "IMAGE_TAG=${VERSION}" >> $GITHUB_OUTPUT echo "Building Docker image with version: ${VERSION}" - - name: Build and Push Docker image working-directory: "./" run: | @@ -68,7 +62,6 @@ jobs: # Also tag as latest docker tag ghcr.io/${REPO_NAME}:${IMAGE_TAG} ghcr.io/${REPO_NAME}:latest docker push ghcr.io/${REPO_NAME}:latest - - name: Create and Push Git Tag run: | VERSION="${{ steps.get-version.outputs.VERSION }}" diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index ad6af93..b15b3c0 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -1,29 +1,23 @@ name: Linter - # Enable Buildkit and let compose use it to speed up image building env: DOCKER_BUILDKIT: 1 COMPOSE_DOCKER_CLI_BUILD: 1 - on: pull_request: push: - concurrency: group: ${{ github.head_ref || github.run_id }} cancel-in-progress: true - jobs: linter: runs-on: ubuntu-latest steps: - name: Checkout Code Repository uses: actions/checkout@v5 - - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.11' - - name: Run pre-commit uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7340975..e076961 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,5 +1,4 @@ name: Create GitHub Release - on: workflow_run: workflows: ["Build and Push Docker Image"] @@ -9,37 +8,30 @@ on: - main push: tags: - - '*' # Also trigger on manual tag pushes - - + - '*' # Also trigger on manual tag pushes jobs: release: runs-on: ubuntu-latest # Only run if the triggering workflow succeeded if: ${{ github.event_name == 'push' || github.event.workflow_run.conclusion == 'success' }} - permissions: packages: write contents: write - steps: - name: Checkout repository uses: actions/checkout@v5 with: - fetch-depth: 0 # Fetch all history for all tags and branches - fetch-tags: true # Fetch all tags + fetch-depth: 0 # Fetch all history for all tags and branches + fetch-tags: true # Fetch all tags ref: ${{ github.event.workflow_run.head_branch || github.ref }} - - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.11' - - name: Install dependencies run: | python -m pip install --upgrade pip pip install openai pydantic - - name: Get current tag id: get-tag run: | @@ -63,13 +55,11 @@ jobs: echo "TAG_NAME=${TAG_NAME}" >> $GITHUB_OUTPUT echo "Current tag from push: ${TAG_NAME}" fi - - name: Checkout tag commit run: | TAG_NAME="${{ steps.get-tag.outputs.TAG_NAME }}" git checkout ${TAG_NAME} || git checkout -b temp-${TAG_NAME} ${TAG_NAME} echo "Checked out tag: ${TAG_NAME}" - - name: Run Python script to generate release notes id: generate_release_notes env: @@ -80,12 +70,10 @@ jobs: echo "Running generate_release_notes.py" python scripts/generate_release_notes.py echo "Script completed" - - name: Debug Outputs run: | echo "Version: ${{ steps.generate_release_notes.outputs.version }}" echo "Release Notes: ${{ steps.generate_release_notes.outputs.release_notes }}" - - name: Create GitHub Release uses: actions/create-release@v1 env: diff --git a/.gitignore b/.gitignore index 458649e..4b70484 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,4 @@ logs/ # python notebooks */ipynb_checkpoints/ *.ipynb +.vercel diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dbaaa51..5f57aea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,5 @@ default_stages: [pre-commit] exclude: '^misc/|^data/|^docs/' - repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 @@ -16,28 +15,27 @@ repos: - id: check-case-conflict - id: check-docstring-first - id: detect-private-key - - repo: https://github.com/asottile/pyupgrade rev: v3.15.2 hooks: - id: pyupgrade args: [--py311-plus] - - repo: https://github.com/psf/black rev: 24.4.0 hooks: - id: black - - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: - id: isort - - repo: https://github.com/PyCQA/flake8 rev: 7.0.0 hooks: - id: flake8 - + - repo: https://github.com/google/yamlfmt + rev: v0.11.0 + hooks: + - id: yamlfmt # sets up .pre-commit-ci.yaml to ensure pre-commit dependencies stay up to date ci: autoupdate_schedule: weekly diff --git a/app/api/endpoints/catalogs.py b/app/api/endpoints/catalogs.py index 8e3c46f..00b72d4 100644 --- a/app/api/endpoints/catalogs.py +++ b/app/api/endpoints/catalogs.py @@ -3,19 +3,46 @@ from fastapi import APIRouter, HTTPException, Response from loguru import logger +from app.api.endpoints.manifest import get_config_id +from app.core.config import settings from app.core.security import redact_token from app.core.settings import UserSettings, get_default_settings -from app.services.catalog_updater import refresh_catalogs_for_credentials -from app.services.recommendation_service import RecommendationService -from app.services.stremio_service import StremioService +from app.services.catalog_updater import catalog_updater +from app.services.recommendation.engine import RecommendationEngine +from app.services.stremio.service import StremioBundle from app.services.token_store import token_store MAX_RESULTS = 50 +DEFAULT_MIN_ITEMS = 20 +DEFAULT_MAX_ITEMS = 32 SOURCE_ITEMS_LIMIT = 10 router = APIRouter() +def _clean_meta(meta: dict) -> dict: + """Return a sanitized Stremio meta object without internal fields. + + Keeps only public keys and drops internal scoring/IDs/keywords/cast, etc. + """ + allowed = { + "id", + "type", + "name", + "poster", + "background", + "description", + "releaseInfo", + "imdbRating", + "genres", + "runtime", + } + cleaned = {k: v for k, v in meta.items() if k in allowed} + # Drop empty values + cleaned = {k: v for k, v in cleaned.items() if v not in (None, "", [], {}, ())} + return cleaned + + @router.get("/{token}/catalog/{type}/{id}.json") async def get_catalog(type: str, id: str, response: Response, token: str): if not token: @@ -30,7 +57,14 @@ async def get_catalog(type: str, id: str, response: Response, token: str): # Supported IDs now include dynamic themes and item-based rows if id != "watchly.rec" and not any( - id.startswith(p) for p in ("tt", "watchly.theme.", "watchly.item.", "watchly.loved.", "watchly.watched.") + id.startswith(p) + for p in ( + "tt", + "watchly.theme.", + "watchly.item.", + "watchly.loved.", + "watchly.watched.", + ) ): logger.warning(f"Invalid id: {id}") raise HTTPException( @@ -46,68 +80,128 @@ async def get_catalog(type: str, id: str, response: Response, token: str): credentials = await token_store.get_user_data(token) if not credentials: raise HTTPException(status_code=401, detail="Invalid or expired token. Please reconfigure the addon.") + + # Trigger lazy update if needed + if settings.AUTO_UPDATE_CATALOGS: + await catalog_updater.trigger_update(token, credentials) + + bundle = StremioBundle() try: - # Extract settings from credentials + # 1. Resolve Auth Key (with potential fallback to login) + auth_key = credentials.get("authKey") + email = credentials.get("email") + password = credentials.get("password") + + is_valid = False + if auth_key: + try: + await bundle.auth.get_user_info(auth_key) + is_valid = True + except Exception: + pass + + if not is_valid and email and password: + try: + auth_key = await bundle.auth.login(email, password) + credentials["authKey"] = auth_key + await token_store.update_user_data(token, credentials) + except Exception as e: + logger.error(f"Failed to refresh auth key during catalog fetch: {e}") + + if not auth_key: + raise HTTPException(status_code=401, detail="Stremio session expired. Please reconfigure.") + + # 2. Extract settings from credentials settings_dict = credentials.get("settings", {}) user_settings = UserSettings(**settings_dict) if settings_dict else get_default_settings() language = user_settings.language if user_settings else "en-US" - # Create services with credentials - stremio_service = StremioService(auth_key=credentials.get("authKey")) - # Fetch library once per request and reuse across recommendation paths - library_items = await stremio_service.get_library_items() - recommendation_service = RecommendationService( - stremio_service=stremio_service, + # 3. Fetch library once per request and reuse across recommendation paths + library_items = await bundle.library.get_library_items(auth_key) + engine = RecommendationEngine( + stremio_service=bundle, language=language, user_settings=user_settings, token=token, library_data=library_items, ) + # Resolve per-catalog limits (min/max) + def _get_limits() -> tuple[int, int]: + try: + cfg_id = get_config_id({"id": id}) + except Exception: + cfg_id = id + try: + cfg = next((c for c in user_settings.catalogs if c.id == cfg_id), None) + if cfg and hasattr(cfg, "min_items") and hasattr(cfg, "max_items"): + return int(cfg.min_items or DEFAULT_MIN_ITEMS), int(cfg.max_items or DEFAULT_MAX_ITEMS) + except Exception: + pass + return DEFAULT_MIN_ITEMS, DEFAULT_MAX_ITEMS + + min_items, max_items = _get_limits() + # Enforce caps: min_items <= 20, max_items <= 32 and max >= min + try: + min_items = max(1, min(DEFAULT_MIN_ITEMS, int(min_items))) + max_items = max(min_items, min(DEFAULT_MAX_ITEMS, int(max_items))) + except (ValueError, TypeError): + logger.warning( + "Invalid min/max items values. Falling back to defaults. " + f"min_items={min_items}, max_items={max_items}" + ) + min_items, max_items = DEFAULT_MIN_ITEMS, DEFAULT_MAX_ITEMS + # Handle item-based recommendations if id.startswith("tt"): - recommendations = await recommendation_service.get_recommendations_for_item(item_id=id) + engine.per_item_limit = max_items + recommendations = await engine.get_recommendations_for_item(item_id=id, media_type=type) + if len(recommendations) < min_items: + recommendations = await engine.pad_to_min(type, recommendations, min_items) logger.info(f"Found {len(recommendations)} recommendations for {id}") - elif id.startswith("watchly.item.") or id.startswith("watchly.loved.") or id.startswith("watchly.watched."): + elif any( + id.startswith(p) + for p in ( + "watchly.item.", + "watchly.loved.", + "watchly.watched.", + ) + ): # Extract actual item ID (tt... or tmdb:...) item_id = re.sub(r"^watchly\.(item|loved|watched)\.", "", id) - recommendations = await recommendation_service.get_recommendations_for_item(item_id=item_id) + engine.per_item_limit = max_items + recommendations = await engine.get_recommendations_for_item(item_id=item_id, media_type=type) + if len(recommendations) < min_items: + recommendations = await engine.pad_to_min(type, recommendations, min_items) logger.info(f"Found {len(recommendations)} recommendations for item {item_id}") elif id.startswith("watchly.theme."): - recommendations = await recommendation_service.get_recommendations_for_theme(theme_id=id, content_type=type) + recommendations = await engine.get_recommendations_for_theme( + theme_id=id, content_type=type, limit=max_items + ) + if len(recommendations) < min_items: + recommendations = await engine.pad_to_min(type, recommendations, min_items) logger.info(f"Found {len(recommendations)} recommendations for theme {id}") else: - recommendations = await recommendation_service.get_recommendations( - content_type=type, source_items_limit=SOURCE_ITEMS_LIMIT, max_results=MAX_RESULTS + recommendations = await engine.get_recommendations( + content_type=type, source_items_limit=SOURCE_ITEMS_LIMIT, max_results=max_items ) + if len(recommendations) < min_items: + recommendations = await engine.pad_to_min(type, recommendations, min_items) logger.info(f"Found {len(recommendations)} recommendations for {type}") logger.info(f"Returning {len(recommendations)} items for {type}") - # Cache catalog responses for 4 hours - response.headers["Cache-Control"] = ( - "public, max-age=14400" if len(recommendations) > 0 else "public, max-age=7200" - ) - return {"metas": recommendations} + # Avoid serving stale results; revalidate on each request + response.headers["Cache-Control"] = "no-cache" + cleaned = [_clean_meta(m) for m in recommendations] + return {"metas": cleaned} except HTTPException: raise except Exception as e: logger.exception(f"[{redact_token(token)}] Error fetching catalog for {type}/{id}: {e}") raise HTTPException(status_code=500, detail=str(e)) - - -@router.get("/{token}/catalog/update") -async def update_catalogs(token: str): - """ - Update the catalogs for the addon. This is a manual endpoint to update the catalogs. - """ - # Decode credentials from path - credentials = await token_store.get_user_data(token) - - logger.info(f"[{redact_token(token)}] Updating catalogs in response to manual request") - updated = await refresh_catalogs_for_credentials(token, credentials) - logger.info(f"Manual catalog update completed: {updated}") - return {"success": updated} + finally: + await bundle.close() diff --git a/app/api/endpoints/manifest.py b/app/api/endpoints/manifest.py index 0eb42e6..fa59eb8 100644 --- a/app/api/endpoints/manifest.py +++ b/app/api/endpoints/manifest.py @@ -1,11 +1,13 @@ from fastapi import HTTPException, Response from fastapi.routing import APIRouter +from loguru import logger from app.core.config import settings from app.core.settings import UserSettings, get_default_settings from app.core.version import __version__ from app.services.catalog import DynamicCatalogService -from app.services.stremio_service import StremioService +from app.services.catalog_updater import get_config_id +from app.services.stremio.service import StremioBundle from app.services.token_store import token_store from app.services.translation import translation_service @@ -46,41 +48,30 @@ def get_base_manifest(user_settings: UserSettings | None = None): "name": settings.ADDON_NAME, "description": "Movie and series recommendations based on your Stremio library", "logo": "https://raw.githubusercontent.com/TimilsinaBimal/Watchly/refs/heads/main/app/static/logo.png", - "resources": [{"name": "catalog", "types": ["movie", "series"], "idPrefixes": ["tt"]}], + "background": "https://raw.githubusercontent.com/TimilsinaBimal/Watchly/refs/heads/main/app/static/cover.png", + "resources": ["catalog"], "types": ["movie", "series"], "idPrefixes": ["tt"], "catalogs": catalogs, "behaviorHints": {"configurable": True, "configurationRequired": False}, + "stremioAddonsConfig": { + "issuer": "https://stremio-addons.net", + "signature": "eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0..ycLGL5WUjggv7PxKPqMLYQ.Y_cD-8wqoXqENdXbFmR1-Si39NtqBlsxEDdrEO0deciilBsWWAlPIglx85XFE4ScSfSqzNxrCZUjHjWWIb2LdcFuvE1RVBrFsUBXgbs5eQknnEL617pFtCWNh0bi37Xv.zYhJ87ZqcYZMRfxLY0bSGQ", # noqa + }, } -async def build_dynamic_catalogs(stremio_service: StremioService, user_settings: UserSettings) -> list[dict]: - # Note: get_library_items is the heavy call; StremioService has its own short cache. - library_items = await stremio_service.get_library_items() +async def build_dynamic_catalogs(bundle: StremioBundle, auth_key: str, user_settings: UserSettings) -> list[dict]: + # Fetch library using bundle directly + library_items = await bundle.library.get_library_items(auth_key) dynamic_catalog_service = DynamicCatalogService( - stremio_service=stremio_service, language=user_settings.language, ) return await dynamic_catalog_service.get_dynamic_catalogs(library_items, user_settings) -def get_config_id(catalog) -> str | None: - catalog_id = catalog.get("id", "") - if catalog_id.startswith("watchly.theme."): - return "watchly.theme" - if catalog_id.startswith("watchly.loved."): - return "watchly.loved" - if catalog_id.startswith("watchly.watched."): - return "watchly.watched" - if catalog_id.startswith("watchly.item."): - return "watchly.item" - if catalog_id.startswith("watchly.rec"): - return "watchly.rec" - return catalog_id - - async def _manifest_handler(response: Response, token: str): - response.headers["Cache-Control"] = "no-cache" + response.headers["Cache-Control"] = "public, max-age=300" # 5 minutes if not token: raise HTTPException(status_code=401, detail="Missing token. Please reconfigure the addon.") @@ -88,16 +79,54 @@ async def _manifest_handler(response: Response, token: str): user_settings = None try: creds = await token_store.get_user_data(token) - if creds.get("settings"): + if creds and creds.get("settings"): user_settings = UserSettings(**creds["settings"]) - except Exception: - raise HTTPException(status_code=401, detail="Invalid or expired token. Please reconfigure the addon.") + except Exception as e: + logger.error(f"[{token}] Error loading user data from token store: {e}") + raise HTTPException(status_code=401, detail="Invalid token session. Please reconfigure.") + + if not creds: + raise HTTPException(status_code=401, detail="Token not found. Please reconfigure the addon.") base_manifest = get_base_manifest(user_settings) - # Build dynamic catalogs using the already-fetched credentials - stremio_service = StremioService(auth_key=creds.get("authKey")) - fetched_catalogs = await build_dynamic_catalogs(stremio_service, user_settings or get_default_settings()) + bundle = StremioBundle() + fetched_catalogs = [] + try: + # Resolve Auth Key (with potential fallback to login) + auth_key = creds.get("authKey") + email = creds.get("email") + password = creds.get("password") + + is_valid = False + if auth_key: + try: + await bundle.auth.get_user_info(auth_key) + is_valid = True + except Exception as e: + logger.debug(f"Auth key check failed for {email or 'unknown'}: {e}") + pass + + if not is_valid and email and password: + try: + auth_key = await bundle.auth.login(email, password) + # Update store + creds["authKey"] = auth_key + await token_store.update_user_data(token, creds) + except Exception as e: + logger.error(f"Failed to refresh auth key during manifest fetch: {e}") + + if auth_key: + fetched_catalogs = await build_dynamic_catalogs( + bundle, + auth_key, + user_settings or get_default_settings(), + ) + except Exception as e: + logger.exception(f"[{token}] Dynamic catalog build failed: {e}") + fetched_catalogs = [] + finally: + await bundle.close() all_catalogs = [c.copy() for c in base_manifest["catalogs"]] + [c.copy() for c in fetched_catalogs] @@ -107,7 +136,10 @@ async def _manifest_handler(response: Response, token: str): if user_settings and user_settings.language: for cat in all_catalogs: if cat.get("name"): - cat["name"] = await translation_service.translate(cat["name"], user_settings.language) + try: + cat["name"] = await translation_service.translate(cat["name"], user_settings.language) + except Exception as e: + logger.warning(f"Failed to translate catalog name '{cat.get('name')}': {e}") translated_catalogs.append(cat) else: translated_catalogs = all_catalogs @@ -116,7 +148,8 @@ async def _manifest_handler(response: Response, token: str): order_map = {c.id: i for i, c in enumerate(user_settings.catalogs)} translated_catalogs.sort(key=lambda x: order_map.get(get_config_id(x), 999)) - base_manifest["catalogs"] = translated_catalogs + if translated_catalogs: + base_manifest["catalogs"] = translated_catalogs return base_manifest diff --git a/app/api/endpoints/meta.py b/app/api/endpoints/meta.py index ea5579e..8bea561 100644 --- a/app/api/endpoints/meta.py +++ b/app/api/endpoints/meta.py @@ -1,31 +1,22 @@ -from async_lru import alru_cache from fastapi import APIRouter, HTTPException from loguru import logger -from app.services.tmdb_service import get_tmdb_service +from app.services.tmdb.service import get_tmdb_service router = APIRouter() -@alru_cache(maxsize=1, ttl=24 * 60 * 60) -async def _cached_languages(): - tmdb = get_tmdb_service() - return await tmdb._make_request("/configuration/languages") - - @router.get("/api/languages") async def get_languages(): """ Proxy endpoint to fetch languages from TMDB. """ try: - languages = await _cached_languages() + tmdb = get_tmdb_service() + languages = await tmdb.get_languages() if not languages: return [] return languages except Exception as e: logger.error(f"Failed to fetch languages: {e}") raise HTTPException(status_code=502, detail="Failed to fetch languages from TMDB") - finally: - # shared client: no explicit close - pass diff --git a/app/api/endpoints/stats.py b/app/api/endpoints/stats.py new file mode 100644 index 0000000..9adfdcb --- /dev/null +++ b/app/api/endpoints/stats.py @@ -0,0 +1,20 @@ +from fastapi import APIRouter +from loguru import logger + +from app.services.token_store import token_store + +router = APIRouter() + + +@router.get("/stats") +async def get_stats() -> dict: + """Return lightweight public stats for the homepage. + + Total users is cached for 12 hours inside TokenStore to avoid heavy scans. + """ + try: + total = await token_store.count_users() + except Exception as exc: + logger.warning(f"Failed to get total users: {exc}") + total = 0 + return {"total_users": total} diff --git a/app/api/endpoints/tokens.py b/app/api/endpoints/tokens.py index 53cdef5..7272589 100644 --- a/app/api/endpoints/tokens.py +++ b/app/api/endpoints/tokens.py @@ -1,13 +1,13 @@ -import httpx +from datetime import datetime, timezone + from fastapi import APIRouter, HTTPException, Request from loguru import logger from pydantic import BaseModel, Field -from redis import exceptions as redis_exceptions from app.core.config import settings from app.core.security import redact_token from app.core.settings import CatalogConfig, UserSettings, get_default_settings -from app.services.stremio_service import StremioService +from app.services.stremio.service import StremioBundle from app.services.token_store import token_store router = APIRouter(prefix="/tokens", tags=["tokens"]) @@ -15,6 +15,8 @@ class TokenRequest(BaseModel): authKey: str | None = Field(default=None, description="Stremio auth key") + email: str | None = Field(default=None, description="Stremio account email") + password: str | None = Field(default=None, description="Stremio account password (stored securely)") catalogs: list[CatalogConfig] | None = Field(default=None, description="Optional catalog configuration") language: str = Field(default="en-US", description="Language for TMDB API") rpdb_key: str | None = Field(default=None, description="Optional RPDB API Key") @@ -31,65 +33,43 @@ class TokenResponse(BaseModel): ) -async def _verify_credentials_or_raise(payload: dict) -> str: - """Ensure the supplied credentials/auth key are valid before issuing tokens.""" - stremio_service = StremioService(auth_key=payload.get("authKey")) - +async def _verify_credentials_or_raise(bundle: StremioBundle, auth_key: str) -> str: + """Ensure the supplied auth key is valid.""" try: - if payload.get("authKey"): - await stremio_service.get_addons(auth_key=payload["authKey"]) - return payload["authKey"] - raise ValueError("Please Login using stremio account to continue!") - except ValueError as exc: + await bundle.auth.get_user_info(auth_key) + return auth_key + except Exception as exc: raise HTTPException( status_code=400, - detail=str(exc) or "Invalid Stremio credentials or auth key.", - ) from exc - except httpx.HTTPStatusError as exc: # pragma: no cover - depends on remote API - status_code = exc.response.status_code - logger.warning("Credential validation failed with status %s", status_code) - if status_code in {401, 403}: - raise HTTPException( - status_code=400, - detail="Invalid Stremio credentials or auth key. Please double-check and try again.", - ) from exc - raise HTTPException( - status_code=502, - detail="Stremio returned an unexpected response. Please try again shortly.", + detail="Invalid Stremio auth key.", ) from exc - except Exception as exc: # pragma: no cover - defensive - logger.error("Unexpected error while validating credentials: {}", exc, exc_info=True) - raise HTTPException( - status_code=502, - detail="Unable to reach Stremio right now. Please try again later.", - ) from exc - finally: - await stremio_service.close() @router.post("/", response_model=TokenResponse) async def create_token(payload: TokenRequest, request: Request) -> TokenResponse: - stremio_auth_key = payload.authKey.strip() if payload.authKey else None + # Prefer email+password if provided; else require authKey + email = (payload.email or "").strip() or None + password = (payload.password or "").strip() or None + stremio_auth_key = (payload.authKey or "").strip() or None - if not stremio_auth_key: - raise HTTPException(status_code=400, detail="Stremio auth key is required.") + if not (email and password) and not stremio_auth_key: + raise HTTPException(status_code=400, detail="Provide email+password or a valid Stremio auth key.") - # Remove quotes if present - if stremio_auth_key.startswith('"') and stremio_auth_key.endswith('"'): + # Remove quotes if present for authKey + if stremio_auth_key and stremio_auth_key.startswith('"') and stremio_auth_key.endswith('"'): stremio_auth_key = stremio_auth_key[1:-1].strip() - rpdb_key = payload.rpdb_key.strip() if payload.rpdb_key else None + bundle = StremioBundle() + # 1. Establish a valid auth key and fetch user info + if email and password: + stremio_auth_key = await bundle.auth.login(email, password) - # 1. Fetch user info from Stremio (user_id and email) - stremio_service = StremioService(auth_key=stremio_auth_key) try: - user_info = await stremio_service.get_user_info() + user_info = await bundle.auth.get_user_info(stremio_auth_key) user_id = user_info["user_id"] - email = user_info.get("email", "") + resolved_email = user_info.get("email", "") except Exception as e: raise HTTPException(status_code=400, detail=f"Failed to verify Stremio identity: {e}") - finally: - await stremio_service.close() # 2. Check if user already exists token = token_store.get_token_from_user_id(user_id) @@ -97,40 +77,39 @@ async def create_token(payload: TokenRequest, request: Request) -> TokenResponse # 3. Construct Settings default_settings = get_default_settings() - user_settings = UserSettings( language=payload.language or default_settings.language, catalogs=payload.catalogs if payload.catalogs else default_settings.catalogs, - rpdb_key=rpdb_key, + rpdb_key=payload.rpdb_key.strip() if payload.rpdb_key else None, excluded_movie_genres=payload.excluded_movie_genres, excluded_series_genres=payload.excluded_series_genres, ) - is_new_account = not existing_data - - # 4. Verify Stremio connection - verified_auth_key = await _verify_credentials_or_raise({"authKey": stremio_auth_key}) - - # 5. Prepare payload to store + # 4. Prepare payload to store payload_to_store = { - "authKey": verified_auth_key, - "email": email, + "authKey": stremio_auth_key, + "email": resolved_email or email or "", "settings": user_settings.model_dump(), } + if existing_data: + payload_to_store["last_updated"] = existing_data.get("last_updated") + else: + payload_to_store["last_updated"] = datetime.now(timezone.utc).isoformat() - # 6. Store user data - try: - token = await token_store.store_user_data(user_id, payload_to_store) - logger.info(f"[{redact_token(token)}] Account {'created' if is_new_account else 'updated'} for user {user_id}") - except RuntimeError as exc: - raise HTTPException(status_code=500, detail="Server configuration error.") from exc - except (redis_exceptions.RedisError, OSError) as exc: - raise HTTPException(status_code=503, detail="Storage temporarily unavailable.") from exc + if email and password: + payload_to_store["password"] = password + + # 5. Store user data + token = await token_store.store_user_data(user_id, payload_to_store) + logger.info(f"[{redact_token(token)}] Account {'updated' if existing_data else 'created'} for user {user_id}") base_url = settings.HOST_NAME manifest_url = f"{base_url}/{token}/manifest.json" + # Maybe generate manifest and check if catalogs exist and if not raise error? expires_in = settings.TOKEN_TTL_SECONDS if settings.TOKEN_TTL_SECONDS > 0 else None + await bundle.close() + return TokenResponse( token=token, manifestUrl=manifest_url, @@ -139,27 +118,33 @@ async def create_token(payload: TokenRequest, request: Request) -> TokenResponse async def get_stremio_user_data(payload: TokenRequest) -> tuple[str, str]: - auth_key = payload.authKey.strip() if payload.authKey else None - - if not auth_key: - raise HTTPException(status_code=400, detail="Auth Key required.") - - if auth_key.startswith('"') and auth_key.endswith('"'): - auth_key = auth_key[1:-1].strip() - - stremio_service = StremioService(auth_key=auth_key) + bundle = StremioBundle() try: - user_info = await stremio_service.get_user_info() - user_id = user_info["user_id"] - email = user_info.get("email", "") - return user_id, email - except Exception as e: - logger.error(f"Stremio identity check failed: {e}") - raise HTTPException( - status_code=400, detail="Failed to verify Stremio identity. Your auth key might be invalid or expired." - ) + email = (payload.email or "").strip() or None + password = (payload.password or "").strip() or None + auth_key = (payload.authKey or "").strip() or None + + if email and password: + try: + auth_key = await bundle.auth.login(email, password) + user_info = await bundle.auth.get_user_info(auth_key) + return user_info["user_id"], user_info.get("email", email) + except Exception as e: + logger.error(f"Stremio identity check failed: {e}") + raise HTTPException(status_code=400, detail="Failed to verify Stremio identity.") + elif auth_key: + if auth_key.startswith('"') and auth_key.endswith('"'): + auth_key = auth_key[1:-1].strip() + try: + user_info = await bundle.auth.get_user_info(auth_key) + return user_info["user_id"], user_info.get("email", "") + except Exception as e: + logger.error(f"Stremio identity check failed: {e}") + raise HTTPException(status_code=400, detail="Invalid Stremio auth key.") + else: + raise HTTPException(status_code=400, detail="Credentials required.") finally: - await stremio_service.close() + await bundle.close() @router.post("/stremio-identity", status_code=200) @@ -167,44 +152,34 @@ async def check_stremio_identity(payload: TokenRequest): """Fetch user info from Stremio and check if account exists.""" user_id, email = await get_stremio_user_data(payload) try: - # Check existence token = token_store.get_token_from_user_id(user_id) user_data = await token_store.get_user_data(token) exists = bool(user_data) - except ValueError: + except Exception: exists = False user_data = None response = {"user_id": user_id, "email": email, "exists": exists} if exists and user_data: response["settings"] = user_data.get("settings") - return response @router.delete("/", status_code=200) -async def delete_token(payload: TokenRequest): - """Delete a token based on Stremio auth key.""" +async def delete_redis_token(payload: TokenRequest): + """Delete a token based on Stremio credentials.""" try: user_id, _ = await get_stremio_user_data(payload) - - # Get token from user_id token = token_store.get_token_from_user_id(user_id) - - # Verify account exists existing_data = await token_store.get_user_data(token) if not existing_data: raise HTTPException(status_code=404, detail="Account not found.") - # Delete the token await token_store.delete_token(token) logger.info(f"[{redact_token(token)}] Token deleted for user {user_id}") return {"detail": "Settings deleted successfully"} except HTTPException: raise - except (redis_exceptions.RedisError, OSError) as exc: - logger.error("Token deletion failed: {}", exc) - raise HTTPException( - status_code=503, - detail="Token deletion is temporarily unavailable. Please try again once Redis is reachable.", - ) from exc + except Exception as exc: + logger.error(f"Token deletion failed: {exc}") + raise HTTPException(status_code=503, detail="Storage temporarily unavailable.") diff --git a/app/api/main.py b/app/api/main.py index c0fd953..489b4ec 100644 --- a/app/api/main.py +++ b/app/api/main.py @@ -5,6 +5,7 @@ from .endpoints.health import router as health_router from .endpoints.manifest import router as manifest_router from .endpoints.meta import router as meta_router +from .endpoints.stats import router as stats_router from .endpoints.tokens import router as tokens_router api_router = APIRouter() @@ -21,3 +22,4 @@ async def root(): api_router.include_router(health_router) api_router.include_router(meta_router) api_router.include_router(announcement_router) +api_router.include_router(stats_router) diff --git a/app/core/app.py b/app/core/app.py index 11270b6..e31d770 100644 --- a/app/core/app.py +++ b/app/core/app.py @@ -1,4 +1,3 @@ -import asyncio import os from contextlib import asynccontextmanager from pathlib import Path @@ -11,60 +10,18 @@ from loguru import logger from app.api.main import api_router -from app.services.catalog_updater import BackgroundCatalogUpdater from app.services.token_store import token_store -from app.startup.migration import migrate_tokens from .config import settings from .version import __version__ -# class InterceptHandler(logging.Handler): -# def emit(self, record): -# try: -# level = logger.level(record.levelname).name -# except Exception: -# level = record.levelno - -# logger.opt(depth=6, exception=record.exc_info).log(level, record.getMessage()) - - -# logging.basicConfig(handlers=[InterceptHandler()], level=logging.INFO, force=True) - -# Global catalog updater instance -catalog_updater: BackgroundCatalogUpdater | None = None - @asynccontextmanager async def lifespan(app: FastAPI): """ Manage application lifespan events (startup/shutdown). """ - global catalog_updater - - if settings.HOST_NAME.lower() != "https://1ccea4301587-watchly.baby-beamup.club": - task = asyncio.create_task(migrate_tokens()) - - # Ensure background exceptions are surfaced in logs - def _on_done(t: asyncio.Task): - try: - t.result() - except Exception as exc: - logger.error(f"migrate_tokens background task failed: {exc}") - - task.add_done_callback(_on_done) - - # Startup - if settings.AUTO_UPDATE_CATALOGS: - catalog_updater = BackgroundCatalogUpdater() - catalog_updater.start() yield - - # Shutdown - if catalog_updater: - await catalog_updater.stop() - catalog_updater = None - logger.info("Background catalog updates stopped") - # Close shared token store Redis client try: await token_store.close() logger.info("TokenStore Redis client closed") @@ -89,7 +46,7 @@ def _on_done(t: asyncio.Task): app.add_middleware( CORSMiddleware, allow_origins=["*"], - allow_credentials=True, + allow_credentials=False, allow_methods=["*"], allow_headers=["*"], ) diff --git a/app/core/base_client.py b/app/core/base_client.py new file mode 100644 index 0000000..25a4ade --- /dev/null +++ b/app/core/base_client.py @@ -0,0 +1,71 @@ +import asyncio +from typing import Any + +import httpx +from loguru import logger + + +class BaseClient: + """ + Base asynchronous HTTP client with built-in retry logic and logging. + """ + + def __init__( + self, base_url: str = "", timeout: float = 10.0, max_retries: int = 3, headers: dict[str, str] | None = None + ): + self.base_url = base_url + self.timeout = timeout + self.max_retries = max_retries + self.headers = headers or {} + self._client: httpx.AsyncClient | None = None + + async def get_client(self) -> httpx.AsyncClient: + """Get or create the httpx.AsyncClient instance.""" + if self._client is None or self._client.is_closed: + self._client = httpx.AsyncClient( + base_url=self.base_url, timeout=self.timeout, headers=self.headers, follow_redirects=True + ) + return self._client + + async def close(self): + """Close the underlying HTTP client.""" + if self._client and not self._client.is_closed: + await self._client.aclose() + self._client = None + + async def _request(self, method: str, url: str, max_tries: int | None = None, **kwargs) -> httpx.Response: + """Internal request handler with retry logic.""" + client = await self.get_client() + tries = max_tries or self.max_retries + last_exception = None + + for attempt in range(1, tries + 1): + try: + response = await client.request(method, url, **kwargs) + response.raise_for_status() + return response + except (httpx.HTTPStatusError, httpx.RequestError) as e: + last_exception = e + if attempt < tries: + wait_time = 0.5 * (2 ** (attempt - 1)) # Exponential backoff + logger.warning( + f"Request failed ({method} {url}): {str(e)}. " + f"Retrying in {wait_time}s... (Attempt {attempt}/{tries})" + ) + await asyncio.sleep(wait_time) + else: + logger.error(f"Request failed after {tries} attempts: {str(e)}") + + if last_exception: + raise last_exception + raise httpx.RequestError("Request failed for unknown reasons") + + async def get(self, url: str, params: dict[str, Any] | None = None, **kwargs) -> dict[str, Any]: + """Perform a GET request and return the JSON response.""" + response = await self._request("GET", url, params=params, **kwargs) + return response.json() + + async def post(self, url: str, json: dict[str, Any] | None = None, **kwargs) -> dict[str, Any]: + """Perform a POST request and return the JSON response.""" + response = await self._request("POST", url, json=json, **kwargs) + return response.json() diff --git a/app/core/config.py b/app/core/config.py index ebd4b85..f6ed97d 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -31,13 +31,12 @@ class Settings(BaseSettings): TOKEN_TTL_SECONDS: int = 0 # 0 = never expire ANNOUNCEMENT_HTML: str = "" AUTO_UPDATE_CATALOGS: bool = True - CATALOG_UPDATE_MODE: Literal["cron", "interval"] = "cron" # "cron" for fixed times, "interval" for periodic - CATALOG_UPDATE_CRON_SCHEDULES: list[dict] = ({"hour": 0, "minute": 0, "id": "catalog_refresh_midnight"},) - CATALOG_REFRESH_INTERVAL_SECONDS: int = 6 * 60 * 60 # 6 hours (used when CATALOG_UPDATE_MODE="interval") + CATALOG_REFRESH_INTERVAL_SECONDS: int = 43200 # 12 hours APP_ENV: Literal["development", "production", "vercel"] = "development" HOST_NAME: str = "https://1ccea4301587-watchly.baby-beamup.club" RECOMMENDATION_SOURCE_ITEMS_LIMIT: int = 10 + LIBRARY_ITEMS_LIMIT: int = 20 # AI DEFAULT_GEMINI_MODEL: str = "gemma-3-27b-it" diff --git a/app/core/settings.py b/app/core/settings.py index b246215..464b694 100644 --- a/app/core/settings.py +++ b/app/core/settings.py @@ -5,6 +5,8 @@ class CatalogConfig(BaseModel): id: str # "watchly.rec", "watchly.theme", "watchly.item" name: str | None = None enabled: bool = True + min_items: int = Field(default=20, ge=1, le=20) + max_items: int = Field(default=24, ge=1, le=32) class UserSettings(BaseModel): diff --git a/app/core/version.py b/app/core/version.py index c72e379..7b1e312 100644 --- a/app/core/version.py +++ b/app/core/version.py @@ -1 +1 @@ -__version__ = "1.1.4" +__version__ = "1.3.3" diff --git a/app/services/catalog.py b/app/services/catalog.py index 7525f5c..fca58d9 100644 --- a/app/services/catalog.py +++ b/app/services/catalog.py @@ -1,11 +1,15 @@ +import asyncio +import random from datetime import datetime, timezone +from typing import Any + +from loguru import logger from app.core.settings import CatalogConfig, UserSettings +from app.services.profile.service import UserProfileService from app.services.row_generator import RowGeneratorService from app.services.scoring import ScoringService -from app.services.stremio_service import StremioService -from app.services.tmdb_service import get_tmdb_service -from app.services.user_profile import UserProfileService +from app.services.tmdb.service import get_tmdb_service class DynamicCatalogService: @@ -13,12 +17,12 @@ class DynamicCatalogService: Generates dynamic catalog rows based on user library and preferences. """ - def __init__(self, stremio_service: StremioService, language: str = "en-US"): - self.stremio_service = stremio_service + def __init__(self, language: str = "en-US"): self.tmdb_service = get_tmdb_service(language=language) self.scoring_service = ScoringService() self.user_profile_service = UserProfileService(language=language) self.row_generator = RowGeneratorService(tmdb_service=self.tmdb_service) + self.HISTORY_LIMIT = 30 @staticmethod def normalize_type(type_): @@ -45,149 +49,143 @@ def build_catalog_entry(self, item, label, config_id): } async def get_theme_based_catalogs( - self, library_items: list[dict], user_settings: UserSettings | None = None + self, library_items: dict, user_settings: UserSettings | None = None ) -> list[dict]: - catalogs = [] - - # 1. Build User Profile - # Combine loved and watched - all_items = library_items.get("loved", []) + library_items.get("watched", []) - - # Deduplicate + """Build thematic catalogs by profiling recently watched items.""" + # 1. Prepare Scored History + all_items = library_items.get("loved", []) + library_items.get("watched", []) + library_items.get("liked", []) unique_items = {item["_id"]: item for item in all_items} - # Score items - scored_objects = [] - - # Use only recent history for freshness - sorted_history = sorted(unique_items.values(), key=lambda x: x.get("_mtime", ""), reverse=True) - recent_history = sorted_history[:30] + sorted_history = sorted( + unique_items.values(), + key=lambda x: self._parse_item_last_watched(x), + reverse=True, + ) + recent_history = sorted_history[: self.HISTORY_LIMIT] - for item_data in recent_history: - scored_obj = self.scoring_service.process_item(item_data) - scored_objects.append(scored_obj) + scored_objects = [self.scoring_service.process_item(item) for item in recent_history] - # Get excluded genres + # 2. Extract Genre Filters excluded_movie_genres = [] excluded_series_genres = [] if user_settings: excluded_movie_genres = [int(g) for g in user_settings.excluded_movie_genres] excluded_series_genres = [int(g) for g in user_settings.excluded_series_genres] - # 2. Generate Thematic Rows with Type-Specific Profiles - # Generate for Movies - movie_profile = await self.user_profile_service.build_user_profile( - scored_objects, content_type="movie", excluded_genres=excluded_movie_genres - ) - movie_rows = await self.row_generator.generate_rows(movie_profile, "movie") - - for row in movie_rows: - # translated_title = await translation_service.translate(row.title, lang) - catalogs.append({"type": "movie", "id": row.id, "name": row.title, "extra": []}) - - # Generate for Series - series_profile = await self.user_profile_service.build_user_profile( - scored_objects, content_type="series", excluded_genres=excluded_series_genres + # 3. Generate Rows + async def _generate_for_type(media_type: str, genres: list[int]): + profile = await self.user_profile_service.build_user_profile( + scored_objects, content_type=media_type, excluded_genres=genres + ) + return await self.row_generator.generate_rows(profile, media_type) + + results = await asyncio.gather( + _generate_for_type("movie", excluded_movie_genres), + _generate_for_type("series", excluded_series_genres), + return_exceptions=True, ) - series_rows = await self.row_generator.generate_rows(series_profile, "series") - for row in series_rows: - # translated_title = await translation_service.translate(row.title, lang) - catalogs.append({"type": "series", "id": row.id, "name": row.title, "extra": []}) + # 4. Assembly with error handling + catalogs = [] + for idx, media_type in enumerate(["movie", "series"]): + res = results[idx] + if isinstance(res, Exception): + logger.error(f"Failed to generate thematic rows for {media_type}: {res}") + continue + for row in res: + catalogs.append({"type": media_type, "id": row.id, "name": row.title, "extra": []}) return catalogs - async def get_dynamic_catalogs( - self, library_items: list[dict], user_settings: UserSettings | None = None - ) -> list[dict]: - """ - Generate all dynamic catalog rows. - """ + async def get_dynamic_catalogs(self, library_items: dict, user_settings: UserSettings | None = None) -> list[dict]: + """Generate all dynamic catalog rows based on enabled configurations.""" catalogs = [] - lang = user_settings.language if user_settings else "en-US" + if not user_settings: + return catalogs - # Theme Based - theme_config = next((c for c in user_settings.catalogs if c.id == "watchly.theme"), None) + # 1. Resolve Configs + theme_cfg, loved_cfg, watched_cfg = self._resolve_catalog_configs(user_settings) - if theme_config and theme_config.enabled: + # 2. Add Thematic Catalogs + if theme_cfg and theme_cfg.enabled: catalogs.extend(await self.get_theme_based_catalogs(library_items, user_settings)) - # Item Based (Loved/Watched) - loved_config = next((c for c in user_settings.catalogs if c.id == "watchly.loved"), None) - watched_config = next((c for c in user_settings.catalogs if c.id == "watchly.watched"), None) - - # Fallback for old settings (watchly.item) - if not loved_config and not watched_config: - old_config = next((c for c in user_settings.catalogs if c.id == "watchly.item"), None) - if old_config and old_config.enabled: - # Create temporary configs - loved_config = CatalogConfig(id="watchly.loved", name=None, enabled=True) - watched_config = CatalogConfig(id="watchly.watched", name=None, enabled=True) - - # Movies - await self._add_item_based_rows(catalogs, library_items, "movie", lang, loved_config, watched_config) - # Series - await self._add_item_based_rows(catalogs, library_items, "series", lang, loved_config, watched_config) + # 3. Add Item-Based Catalogs (Movies & Series) + for mtype in ["movie", "series"]: + await self._add_item_based_rows(catalogs, library_items, mtype, loved_cfg, watched_cfg) return catalogs + def _resolve_catalog_configs(self, user_settings: UserSettings) -> tuple[Any, Any, Any]: + """Extract and fallback catalog configurations from user settings.""" + cfg_map = {c.id: c for c in user_settings.catalogs} + + theme = cfg_map.get("watchly.theme") + loved = cfg_map.get("watchly.loved") + watched = cfg_map.get("watchly.watched") + + # Fallback for old settings format (watchly.item) + if not loved and not watched: + old_item = cfg_map.get("watchly.item") + if old_item and old_item.enabled: + loved = CatalogConfig(id="watchly.loved", name=None, enabled=True) + watched = CatalogConfig(id="watchly.watched", name=None, enabled=True) + + return theme, loved, watched + + def _parse_item_last_watched(self, item: dict) -> datetime: + """Helper to extract and parse the most relevant activity date for an item.""" + val = item.get("state", {}).get("lastWatched") + if val: + try: + if isinstance(val, str): + return datetime.fromisoformat(val.replace("Z", "+00:00")) + return val + except (ValueError, TypeError): + pass + + # Fallback to mtime + val = item.get("_mtime") + if val: + try: + return datetime.fromisoformat(str(val).replace("Z", "+00:00")) + except (ValueError, TypeError): + pass + return datetime.min.replace(tzinfo=timezone.utc) + async def _add_item_based_rows( self, catalogs: list, library_items: dict, content_type: str, - language: str, loved_config, watched_config, ): - """Helper to add 'Because you watched' and 'More like' rows.""" - - # Helper to parse date - def get_date(item): - - val = item.get("state", {}).get("lastWatched") - if val: - try: - if isinstance(val, str): - return datetime.fromisoformat(val.replace("Z", "+00:00")) - return val - except (ValueError, TypeError): - pass - # Fallback to mtime - val = item.get("_mtime") - if val: - try: - return datetime.fromisoformat(str(val).replace("Z", "+00:00")) - except (ValueError, TypeError): - pass - return datetime.min.replace(tzinfo=timezone.utc) # 1. More Like last_loved = None # Initialize for the watched check if loved_config and loved_config.enabled: loved = [i for i in library_items.get("loved", []) if i.get("type") == content_type] - loved.sort(key=get_date, reverse=True) + loved.sort(key=self._parse_item_last_watched, reverse=True) - last_loved = loved[0] if loved else None + # gather random last loved from last 3 items + last_loved = random.choice(loved[:3]) if loved else None if last_loved: - label = loved_config.name - + label = loved_config.name if loved_config.name else "More like" catalogs.append(self.build_catalog_entry(last_loved, label, "watchly.loved")) # 2. Because you watched if watched_config and watched_config.enabled: watched = [i for i in library_items.get("watched", []) if i.get("type") == content_type] - watched.sort(key=get_date, reverse=True) + watched.sort(key=self._parse_item_last_watched, reverse=True) + + # watched cannot be similar to loved + if last_loved: + watched = [i for i in watched if i.get("_id") != last_loved.get("_id")] - last_watched = None - for item in watched: - # Avoid duplicate row if it's the same item as 'More like' - if last_loved and item.get("_id") == last_loved.get("_id"): - continue - last_watched = item - break + # gather random last watched from last 3 items + last_watched = random.choice(watched[:3]) if watched else None if last_watched: - label = watched_config.name - + label = watched_config.name if watched_config.name else "Because you watched" catalogs.append(self.build_catalog_entry(last_watched, label, "watchly.watched")) diff --git a/app/services/catalog_updater.py b/app/services/catalog_updater.py index b925d75..acb82f5 100644 --- a/app/services/catalog_updater.py +++ b/app/services/catalog_updater.py @@ -1,9 +1,7 @@ import asyncio +from datetime import datetime, timezone from typing import Any -from apscheduler.schedulers.asyncio import AsyncIOScheduler -from apscheduler.triggers.cron import CronTrigger -from apscheduler.triggers.interval import IntervalTrigger from fastapi import HTTPException from loguru import logger @@ -11,165 +9,205 @@ from app.core.security import redact_token from app.core.settings import UserSettings, get_default_settings from app.services.catalog import DynamicCatalogService -from app.services.stremio_service import StremioService +from app.services.stremio.service import StremioBundle from app.services.token_store import token_store from app.services.translation import translation_service -# Max number of concurrent updates to prevent overwhelming external APIs -MAX_CONCURRENT_UPDATES = 5 +def get_config_id(catalog) -> str | None: + catalog_id = catalog.get("id", "") + if catalog_id.startswith("watchly.theme."): + return "watchly.theme" + if catalog_id.startswith("watchly.loved."): + return "watchly.loved" + if catalog_id.startswith("watchly.watched."): + return "watchly.watched" + if catalog_id.startswith("watchly.item."): + return "watchly.item" + if catalog_id.startswith("watchly.rec"): + return "watchly.rec" + return catalog_id -async def refresh_catalogs_for_credentials(token: str, credentials: dict[str, Any]) -> bool: - if not credentials: - logger.warning(f"[{redact_token(token)}] Attempted to refresh catalogs with no credentials.") - raise HTTPException(status_code=401, detail="Invalid or expired token. Please reconfigure the addon.") - auth_key = credentials.get("authKey") - stremio_service = StremioService(auth_key=auth_key) - # check if user has addon installed or not - try: - addon_installed = await stremio_service.is_addon_installed(auth_key) - if not addon_installed: - logger.info(f"[{redact_token(token)}] User has not installed addon. Removing token from redis") - # Ensure we delete by token, not by raw Redis key - await token_store.delete_token(token=token) +class CatalogUpdater: + """ + Catalog updater that triggers updates on-demand when users request catalogs. + Uses in-memory locking to prevent duplicate concurrent updates. + """ + + def __init__(self): + # In-memory lock to prevent duplicate updates for the same token + self._updating_tokens: set[str] = set() + + def _needs_update(self, credentials: dict[str, Any]) -> bool: + """Check if catalog update is needed based on last_updated timestamp.""" + if not credentials: + return False + + last_updated = credentials.get("last_updated") + if not last_updated: + # No timestamp means never updated, needs update return True - except Exception as e: - logger.exception(f"[{redact_token(token)}] Failed to check if addon is installed: {e}") - try: - # Ensure user_settings is available - user_settings = get_default_settings() - if credentials.get("settings"): + try: + # Parse ISO format timestamp + if isinstance(last_updated, str): + last_update_time = datetime.fromisoformat(last_updated.replace("Z", "+00:00")) + else: + last_update_time = last_updated + + # Check if more than 11 hours have passed (update if less than 1 hour remaining) + now = datetime.now(timezone.utc) + if last_update_time.tzinfo is None: + last_update_time = last_update_time.replace(tzinfo=timezone.utc) + + time_since_update = (now - last_update_time).total_seconds() + # Update if less than 1 hour remaining until next update + return time_since_update >= (settings.CATALOG_REFRESH_INTERVAL_SECONDS - 3600) + except (ValueError, TypeError, AttributeError) as e: + logger.warning(f"Failed to parse last_updated timestamp: {e}. Treating as needs update.") + return True + + async def refresh_catalogs_for_credentials( + self, token: str, credentials: dict[str, Any], update_timestamp: bool = True + ) -> bool: + """ + Refresh catalogs for a user's credentials. + + Args: + token: User token + credentials: User credentials dict + update_timestamp: Whether to update last_updated timestamp on success + + Returns: + True if update was successful, False otherwise + """ + if not credentials: + logger.warning(f"[{redact_token(token)}] Attempted to refresh catalogs with no credentials.") + raise HTTPException(status_code=401, detail="Invalid or expired token. Please reconfigure the addon.") + + auth_key = credentials.get("authKey") + # check if auth key is valid + bundle = StremioBundle() + try: try: - user_settings = UserSettings(**credentials["settings"]) + await bundle.auth.get_user_info(auth_key) except Exception as e: - user_settings = get_default_settings() - logger.warning(f"[{redact_token(token)}] Failed to parse user settings from credentials: {e}") - # force fresh library for background refresh - library_items = await stremio_service.get_library_items(use_cache=False) - dynamic_catalog_service = DynamicCatalogService( - stremio_service=stremio_service, - language=(user_settings.language if user_settings else "en-US"), - ) - catalogs = await dynamic_catalog_service.get_dynamic_catalogs( - library_items=library_items, user_settings=user_settings - ) - - if user_settings and user_settings.language: - for cat in catalogs: - if name := cat.get("name"): - cat["name"] = await translation_service.translate(name, user_settings.language) - logger.info(f"[{redact_token(token)}] Prepared {len(catalogs)} catalogs") - return await stremio_service.update_catalogs(catalogs, auth_key) - except Exception as e: - logger.exception(f"[{redact_token(token)}] Failed to update catalogs: {e}", exc_info=True) - raise e - finally: - await stremio_service.close() - - -class BackgroundCatalogUpdater: - """Periodic job that refreshes catalogs for every stored credential token. - - Supports two modes: - - "cron": Runs twice daily at 12:00 PM UTC and 00:00 UTC (midnight) - - "interval": Runs every CATALOG_REFRESH_INTERVAL_SECONDS - """ + logger.exception(f"[{redact_token(token)}] Invalid auth key. Falling back to login: {e}") + email = credentials.get("email") + password = credentials.get("password") + if email and password: + auth_key = await bundle.auth.login(email, password) + credentials["authKey"] = auth_key + await token_store.update_user_data(token, credentials) + else: + return True # true since we won't be able to update it again. so no need to try again. + + # 1. Check if addon is still installed + try: + addon_installed = await bundle.addons.is_addon_installed(auth_key) + if not addon_installed: + logger.info(f"[{redact_token(token)}] User has not installed addon. Removing token from redis") + return True + except Exception as e: + logger.exception(f"[{redact_token(token)}] Failed to check if addon is installed: {e}") + return False - def __init__(self) -> None: - self.scheduler = AsyncIOScheduler(timezone="UTC") - self.update_mode = settings.CATALOG_UPDATE_MODE + # 2. Extract settings and refresh + user_settings = get_default_settings() + if credentials.get("settings"): + try: + user_settings = UserSettings(**credentials["settings"]) + except Exception as e: + logger.exception(f"[{redact_token(token)}] Failed to parse user settings: {e}") - def start(self) -> None: - if self.scheduler.running: - return + # Fetch fresh library + library_items = await bundle.library.get_library_items(auth_key) - if self.update_mode == "cron": - logger.info(f"Starting background catalog updater. Schedule: {settings.CATALOG_UPDATE_CRON_SCHEDULES}") - job_defaults = { - "func": self.refresh_all_tokens, - "replace_existing": True, - "max_instances": 1, - "coalesce": True, - } - for schedule in settings.CATALOG_UPDATE_CRON_SCHEDULES: - self.scheduler.add_job( - trigger=CronTrigger(hour=schedule["hour"], minute=schedule["minute"], timezone="UTC"), - id=schedule["id"], - **job_defaults, - ) - else: # interval mode - interval_seconds = max(3600, settings.CATALOG_REFRESH_INTERVAL_SECONDS) # minimum 1 hour - interval_hours = interval_seconds // 3600 - logger.info(f"Starting background catalog updater. Interval: {interval_seconds}s ({interval_hours} hours)") - - self.scheduler.add_job( - self.refresh_all_tokens, - trigger=IntervalTrigger(seconds=interval_seconds), - id="catalog_refresh", - replace_existing=True, - max_instances=1, # Prevent new job from starting if previous one is still running - coalesce=True, # If multiple runs are missed, only run once + dynamic_catalog_service = DynamicCatalogService( + language=(user_settings.language if user_settings else "en-US"), ) - self.scheduler.start() + catalogs = await dynamic_catalog_service.get_dynamic_catalogs( + library_items=library_items, user_settings=user_settings + ) - async def stop(self) -> None: - if self.scheduler.running: - logger.info("Stopping background catalog updater...") - self.scheduler.shutdown(wait=True) # Wait for running jobs to complete - logger.info("Background catalog updater stopped.") + # Translate catalogs + if user_settings and user_settings.language: + for cat in catalogs: + if name := cat.get("name"): + try: + cat["name"] = await translation_service.translate(name, user_settings.language) + except Exception as e: + logger.warning(f"Failed to translate catalog name '{name}': {e}") + continue + + logger.info(f"[{redact_token(token)}] Prepared {len(catalogs)} catalogs for background refresh") + # sort catalogs by order in user settings + if user_settings: + order_map = {c.id: i for i, c in enumerate(user_settings.catalogs)} + catalogs.sort(key=lambda x: order_map.get(get_config_id(x), 999)) + + success = await bundle.addons.update_catalogs(auth_key, catalogs) + + # Update timestamp and invalidate cache only on success + if success and update_timestamp: + try: + # Update last_updated timestamp to current time + # This represents when the update completed successfully + now = datetime.now(timezone.utc) + last_updated_str = now.replace(microsecond=0).isoformat() + credentials["last_updated"] = last_updated_str + await token_store.update_user_data(token, credentials) + logger.debug(f"[{redact_token(token)}] Updated last_updated timestamp to {last_updated_str}") + except Exception as e: + logger.warning(f"[{redact_token(token)}] Failed to update last_updated timestamp: {e}") + + return success + + except Exception as e: + logger.exception(f"[{redact_token(token)}] Failed to update catalogs in background: {e}") + return False + finally: + await bundle.close() + + async def trigger_update(self, token: str, credentials: dict[str, Any]) -> None: + """ + Trigger a catalog update if needed. + This function checks if update is needed and fires a background task. + Uses in-memory lock to prevent duplicate updates. + """ + # Check if already updating + if token in self._updating_tokens: + logger.debug(f"[{redact_token(token)}] Update already in progress, skipping") + return - async def refresh_all_tokens(self) -> None: - """Refresh catalogs for all tokens concurrently with a semaphore.""" - tasks = [] - sem = asyncio.Semaphore(MAX_CONCURRENT_UPDATES) + # Check if update is needed + if not self._needs_update(credentials): + logger.debug(f"[{redact_token(token)}] Catalog update not needed yet") + return - async def _update_safe(key: str, payload: dict[str, Any]) -> None: - if not payload.get("authKey"): - logger.debug( - f"Skipping token {redact_token(key)} with incomplete credentials", - ) - return + # Add to lock and fire background update + self._updating_tokens.add(token) + logger.info(f"[{redact_token(token)}] Triggering catalog update") - async with sem: - try: - updated = await refresh_catalogs_for_credentials(key, payload) - logger.info( - f"Background refresh for {redact_token(key)} completed (updated={updated})", - ) - except Exception as exc: - logger.error(f"Background refresh failed for {redact_token(key)}: {exc}", exc_info=True) + # Fire and forget background task + asyncio.create_task(self._update_task(token, credentials)) + async def _update_task(self, token: str, credentials: dict[str, Any]) -> None: + """Background task that performs the actual catalog update.""" try: - # Check Redis connected clients and back off if overloaded - try: - client = await token_store._get_client() - info = await client.info(section="clients") - connected = int(info.get("connected_clients", 0)) - threshold = getattr(settings, "REDIS_CONNECTIONS_THRESHOLD", 1000) - if connected > threshold: - logger.warning( - f"Redis connected clients {connected} exceed threshold {threshold}; skipping" - "background refresh." - ) - return - except Exception as exc: - logger.warning(f"Failed to check Redis client info before refresh: {exc}") - - async for key, payload in token_store.iter_payloads(): - # Extract token from redis key prefix - prefix = token_store.KEY_PREFIX - tok = key[len(prefix) :] if key.startswith(prefix) else key # noqa - tasks.append(asyncio.create_task(_update_safe(tok, payload))) - - if tasks: - logger.info(f"Starting background refresh for {len(tasks)} tokens...") - await asyncio.gather(*tasks) - logger.info(f"Completed background refresh for {len(tasks)} tokens.") + success = await self.refresh_catalogs_for_credentials(token, credentials, update_timestamp=True) + if success: + logger.info(f"[{redact_token(token)}] Catalog update completed successfully") else: - logger.info("No tokens found to refresh.") + logger.warning(f"[{redact_token(token)}] Catalog update completed with failure") + except Exception as e: + logger.exception(f"[{redact_token(token)}] Catalog update task failed: {e}") + finally: + # Always remove from lock + self._updating_tokens.discard(token) + - except Exception as exc: - logger.error(f"Catalog refresh scan failed: {exc}", exc_info=True) +logger.info(f"Catalog updater initialized with refresh interval of {settings.CATALOG_REFRESH_INTERVAL_SECONDS} seconds") +catalog_updater = CatalogUpdater() diff --git a/app/services/discovery.py b/app/services/discovery.py index 6f0d09c..7fda484 100644 --- a/app/services/discovery.py +++ b/app/services/discovery.py @@ -1,7 +1,10 @@ import asyncio +from typing import Any + +from loguru import logger from app.models.profile import UserTasteProfile -from app.services.tmdb_service import get_tmdb_service +from app.services.tmdb.service import get_tmdb_service class DiscoveryEngine: @@ -19,7 +22,6 @@ async def discover_recommendations( self, profile: UserTasteProfile, content_type: str, - limit: int = 20, excluded_genres: list[int] | None = None, *, use_genres: bool = False, @@ -30,138 +32,28 @@ async def discover_recommendations( use_year: bool = False, ) -> list[dict]: """ - Find content that matches the user's taste profile. - Strategy: - 1. Extract top weighted Genres, Keywords, Actors, Director. - 2. Build specific 'Discovery Queries' for each category. - 3. Fetch results in parallel. - 4. Return the combined candidate set (B). + Find content that matches the user's taste profile using multi-phase TMDB discovery. """ - # 1. Extract Top Features - top_genres = profile.get_top_genres(limit=3) if use_genres else [] # e.g. [(28, 1.0), (878, 0.8)] - top_keywords = profile.get_top_keywords(limit=3) if use_keywords else [] # e.g. [(123, 0.9)] - # Need to add get_top_cast to UserTasteProfile model first, assuming it exists or using profile.cast directly - # Based on previous step, profile.cast exists. - top_cast = profile.cast.get_top_features(limit=2) if use_cast else [] - top_crew = profile.get_top_crew(limit=1) if use_director else [] # e.g. [(555, 1.0)] - Director - - top_countries = profile.get_top_countries(limit=2) if use_countries else [] - top_year = profile.get_top_year(limit=1) if use_year else [] - - if not top_genres and not top_keywords and not top_cast: - # Fallback if profile is empty + # 1. Build Phase 1 Tasks + tasks = self._build_discovery_tasks_phase1( + profile, + content_type, + excluded_genres, + use_genres=use_genres, + use_keywords=use_keywords, + use_cast=use_cast, + use_director=use_director, + use_countries=use_countries, + use_year=use_year, + ) + + if not tasks: return [] - tasks = [] - base_params = {} - if excluded_genres: - base_params["without_genres"] = "|".join([str(g) for g in excluded_genres]) - - # Phase 1: build first-page tasks only - if top_genres: - genre_ids = "|".join([str(g[0]) for g in top_genres]) - params_popular = { - "with_genres": genre_ids, - "sort_by": "popularity.desc", - "vote_count.gte": 500, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_popular)) - params_rating = { - "with_genres": genre_ids, - "sort_by": "vote_average.desc", - "vote_count.gte": 500, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_rating)) - - # Query 2: Top Keywords - if top_keywords: - keyword_ids = "|".join([str(k[0]) for k in top_keywords]) - params_keywords = { - "with_keywords": keyword_ids, - "sort_by": "popularity.desc", - "vote_count.gte": 500, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_keywords)) - - for page in range(1, 3): - params_rating_kw = { - "with_keywords": keyword_ids, - "sort_by": "vote_average.desc", - "vote_count.gte": 500, - "page": page, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_rating_kw)) - - # Query 3: Top Actors - for actor in top_cast: - actor_id = actor[0] - params_actor = { - "with_cast": str(actor_id), - "sort_by": "popularity.desc", - "vote_count.gte": 500, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_actor)) - # params_rating = { - # "with_cast": str(actor_id), - # "sort_by": "vote_average.desc", - # "vote_count.gte": 500, - # **base_params, - # } - # tasks.append(self._fetch_discovery(content_type, params_rating)) - - # Query 4: Top Director - if top_crew: - director_id = top_crew[0][0] - params_director = { - "with_crew": str(director_id), - "sort_by": "vote_average.desc", # Directors imply quality preference - "vote_count.gte": 500, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_director)) - - # Query 5: Top Countries - if top_countries: - country_ids = "|".join([str(c[0]) for c in top_countries]) - params_country = { - "with_origin_country": country_ids, - "sort_by": "popularity.desc", - "vote_count.gte": 100, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_country)) - # params_rating = { - # "with_origin_country": country_ids, - # "sort_by": "vote_average.desc", - # "vote_count.gte": 300, - # **base_params, - # } - # tasks.append(self._fetch_discovery(content_type, params_rating)) - - # query 6: Top year - if top_year: - year = top_year[0][0] - # we store year in 10 years bucket - start_year = f"{year}-01-01" - end_year = f"{int(year) + 9}-12-31" - params_rating = { - "primary_release_date.gte": start_year, - "primary_release_date.lte": end_year, - "sort_by": "vote_average.desc", - "vote_count.gte": 500, - **base_params, - } - tasks.append(self._fetch_discovery(content_type, params_rating)) - - # 3. Execute Phase 1 + # 2. Execute Phase 1 results_batches = await asyncio.gather(*tasks, return_exceptions=True) - # 4. Aggregate and Deduplicate + # 3. Aggregate Candidates all_candidates = {} for batch in results_batches: if isinstance(batch, Exception) or not batch: @@ -170,62 +62,174 @@ async def discover_recommendations( if item["id"] not in all_candidates: all_candidates[item["id"]] = item - # Conditional Phase 2: fetch page 2 if pool is thin + # 4. Phase 2 (If pool is thin) if len(all_candidates) < 120: - tasks2 = [] - if top_genres: - genre_ids = "|".join([str(g[0]) for g in top_genres]) - tasks2.append( - self._fetch_discovery( - content_type, - { - "with_genres": genre_ids, - "sort_by": "vote_average.desc", - "vote_count.gte": 400, - "page": 2, - **base_params, - }, - ) + tasks2 = self._build_discovery_tasks_phase2( + profile, + content_type, + excluded_genres, + use_genres=use_genres, + use_keywords=use_keywords, + use_cast=use_cast, + ) + if tasks2: + results_batches2 = await asyncio.gather(*tasks2, return_exceptions=True) + for batch in results_batches2: + if isinstance(batch, Exception) or not batch: + continue + for item in batch: + if item["id"] not in all_candidates: + all_candidates[item["id"]] = item + + return list(all_candidates.values()) + + def _build_discovery_tasks_phase1( + self, + profile: UserTasteProfile, + content_type: str, + excluded_genres: list[int] | None = None, + **opts, + ) -> list[Any]: + """Construct the initial set of discovery tasks based on top profile features.""" + top_genres = profile.get_top_genres(limit=3) if opts.get("use_genres") else [] + top_keywords = profile.get_top_keywords(limit=3) if opts.get("use_keywords") else [] + top_cast = profile.cast.get_top_features(limit=2) if opts.get("use_cast") else [] + top_crew = profile.get_top_crew(limit=1) if opts.get("use_director") else [] + top_countries = profile.get_top_countries(limit=2) if opts.get("use_countries") else [] + top_year = profile.get_top_year(limit=1) if opts.get("use_year") else [] + + if not any([top_genres, top_keywords, top_cast, top_crew]): + return [] + + tasks = [] + base_params = {} + if excluded_genres: + base_params["without_genres"] = "|".join([str(g) for g in excluded_genres]) + + # Query 1: Top Genres + if top_genres: + genre_ids = "|".join([str(g[0]) for g in top_genres]) + tasks.append( + self._fetch_discovery( + content_type, + {"with_genres": genre_ids, "sort_by": "popularity.desc", "vote_count.gte": 500, **base_params}, ) - if top_keywords: - keyword_ids = "|".join([str(k[0]) for k in top_keywords]) - tasks2.append( + ) + tasks.append( + self._fetch_discovery( + content_type, + {"with_genres": genre_ids, "sort_by": "vote_average.desc", "vote_count.gte": 500, **base_params}, + ) + ) + + # Query 2: Top Keywords + if top_keywords: + keyword_ids = "|".join([str(k[0]) for k in top_keywords]) + tasks.append( + self._fetch_discovery( + content_type, + {"with_keywords": keyword_ids, "sort_by": "popularity.desc", "vote_count.gte": 500, **base_params}, + ) + ) + for page in range(1, 3): + tasks.append( self._fetch_discovery( content_type, { "with_keywords": keyword_ids, "sort_by": "vote_average.desc", - "vote_count.gte": 400, - "page": 2, + "vote_count.gte": 500, + "page": page, **base_params, }, ) ) - for actor in top_cast[:1]: - actor_id = actor[0] - tasks2.append( - self._fetch_discovery( - content_type, - { - "with_cast": str(actor_id), - "sort_by": "vote_average.desc", - "vote_count.gte": 400, - "page": 2, - **base_params, - }, - ) + + # Query 3: Cast & Crew + is_tv = content_type in ("tv", "series") + for actor in top_cast: + p = {"sort_by": "popularity.desc", "vote_count.gte": 500, **base_params} + p["with_people" if is_tv else "with_cast"] = str(actor[0]) + tasks.append(self._fetch_discovery(content_type, p)) + + if top_crew: + p = {"sort_by": "vote_average.desc", "vote_count.gte": 500, **base_params} + p["with_people" if is_tv else "with_crew"] = str(top_crew[0][0]) + tasks.append(self._fetch_discovery(content_type, p)) + + # Query 4: Countries & Year + if top_countries: + country_ids = "|".join([str(c[0]) for c in top_countries]) + tasks.append( + self._fetch_discovery( + content_type, + { + "with_origin_country": country_ids, + "sort_by": "popularity.desc", + "vote_count.gte": 100, + **base_params, + }, ) + ) - if tasks2: - results_batches2 = await asyncio.gather(*tasks2, return_exceptions=True) - for batch in results_batches2: - if isinstance(batch, Exception) or not batch: - continue - for item in batch: - if item["id"] not in all_candidates: - all_candidates[item["id"]] = item + if top_year: + year = top_year[0][0] + prefix = "first_air_date" if is_tv else "primary_release_date" + tasks.append( + self._fetch_discovery( + content_type, + { + "sort_by": "vote_average.desc", + "vote_count.gte": 500, + f"{prefix}.gte": f"{year}-01-01", + f"{prefix}.lte": f"{int(year)+9}-12-31", + **base_params, + }, + ) + ) + return tasks - return list(all_candidates.values()) + def _build_discovery_tasks_phase2( + self, + profile: UserTasteProfile, + content_type: str, + excluded_genres: list[int] | None = None, + **opts, + ) -> list[Any]: + """Construct additional discovery tasks with lower thresholds to fill out candidate pool.""" + top_genres = profile.get_top_genres(limit=3) if opts.get("use_genres") else [] + top_keywords = profile.get_top_keywords(limit=3) if opts.get("use_keywords") else [] + top_cast = profile.cast.get_top_features(limit=1) if opts.get("use_cast") else [] + + tasks = [] + base_params = {"vote_count.gte": 400, "page": 2} + if excluded_genres: + base_params["without_genres"] = "|".join([str(g) for g in excluded_genres]) + + if top_genres: + genre_ids = "|".join([str(g[0]) for g in top_genres]) + tasks.append( + self._fetch_discovery( + content_type, {"with_genres": genre_ids, "sort_by": "vote_average.desc", **base_params} + ) + ) + + if top_keywords: + keyword_ids = "|".join([str(k[0]) for k in top_keywords]) + tasks.append( + self._fetch_discovery( + content_type, {"with_keywords": keyword_ids, "sort_by": "vote_average.desc", **base_params} + ) + ) + + if top_cast: + actor_id = top_cast[0][0] + is_tv = content_type in ("tv", "series") + p = {"sort_by": "vote_average.desc", **base_params} + p["with_people" if is_tv else "with_cast"] = str(actor_id) + tasks.append(self._fetch_discovery(content_type, p)) + + return tasks async def _fetch_discovery(self, media_type: str, params: dict) -> list[dict]: """Helper to call TMDB discovery.""" @@ -233,5 +237,6 @@ async def _fetch_discovery(self, media_type: str, params: dict) -> list[dict]: async with self._sem: data = await self.tmdb_service.get_discover(media_type, **params) return data.get("results", []) - except Exception: + except Exception as e: + logger.exception(f"TMDB Discovery failed with params {params}: {e}") return [] diff --git a/app/services/gemini.py b/app/services/gemini.py index 8a96f7d..f54849b 100644 --- a/app/services/gemini.py +++ b/app/services/gemini.py @@ -51,7 +51,7 @@ def generate_content(self, prompt: str) -> str: ) return response.text.strip() except Exception as e: - logger.error(f"Error generating content: {e}") + logger.exception(f"Error generating content with Gemini: {e}") return "" async def generate_content_async(self, prompt: str) -> str: diff --git a/app/services/profile/__init__.py b/app/services/profile/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/profile/service.py b/app/services/profile/service.py new file mode 100644 index 0000000..b45fac1 --- /dev/null +++ b/app/services/profile/service.py @@ -0,0 +1,172 @@ +import asyncio +from collections import defaultdict +from typing import Any + +from loguru import logger + +from app.models.profile import UserTasteProfile +from app.models.scoring import ScoredItem +from app.services.profile.similarity import ( + CAST_WEIGHT, + COUNTRIES_WEIGHT, + CREW_WEIGHT, + GENRES_WEIGHT, + KEYWORDS_WEIGHT, + TOPICS_WEIGHT, + YEAR_WEIGHT, + calculate_similarity_breakdown, + calculate_simple_overlap_breakdown, +) +from app.services.profile.vectorizer import ProfileVectorizer +from app.services.tmdb.service import get_tmdb_service + + +class UserProfileService: + """ + Service for building and managing User Taste Profiles. + """ + + def __init__(self, language: str = "en-US"): + self.tmdb_service = get_tmdb_service(language=language) + + async def build_user_profile( + self, + scored_items: list[ScoredItem], + content_type: str | None = None, + excluded_genres: list[int] | None = None, + ) -> UserTasteProfile: + """ + Build a comprehensive taste profile from a list of scored items. + """ + profile_data = { + "genres": defaultdict(float), + "keywords": defaultdict(float), + "cast": defaultdict(float), + "crew": defaultdict(float), + "years": defaultdict(float), + "countries": defaultdict(float), + "topics": defaultdict(float), + } + + async def _process_item(item: ScoredItem): + try: + # 1. Filter by content type (movie/series) + if content_type and item.item.type != content_type: + return None + + # 2. Resolve TMDB ID + tmdb_id = await self._resolve_tmdb_id(item.item.id) + if not tmdb_id: + return None + + # 3. Fetch detailed metadata + meta = await self._fetch_full_metadata(tmdb_id, item.item.type) + if not meta: + return None + + # 4. Vectorize item + item_vector = ProfileVectorizer.vectorize_item(meta) + if not item_vector: + return None + + # 5. Scale by interest score + interest_weight = item.score / 100.0 + + return item_vector, interest_weight + except Exception as e: + from loguru import logger + + logger.exception(f"Failed to process profile item {item.item.id}: {e}") + return None + + # Process all items in parallel + tasks = [_process_item(item) for item in scored_items] + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Merge results into the profile + for res in results: + if res and not isinstance(res, Exception): + item_vector, weight = res + self._merge_vector(profile_data, item_vector, weight, excluded_genres) + + # Build and normalize Pydantic model + profile = UserTasteProfile( + genres={"values": dict(profile_data["genres"])}, + keywords={"values": dict(profile_data["keywords"])}, + cast={"values": dict(profile_data["cast"])}, + crew={"values": dict(profile_data["crew"])}, + years={"values": dict(profile_data["years"])}, + countries={"values": dict(profile_data["countries"])}, + topics={"values": dict(profile_data["topics"])}, + ) + profile.normalize_all() + return profile + + def _merge_vector( + self, + profile_data: dict[str, Any], + item_vector: dict[str, Any], + weight: float, + excluded_genres: list[int] | None = None, + ): + """Merge an item's vector into the profile with weighted scoring.""" + WEIGHT_MAP = { + "genres": GENRES_WEIGHT, + "keywords": KEYWORDS_WEIGHT, + "cast": CAST_WEIGHT, + "crew": CREW_WEIGHT, + "year": YEAR_WEIGHT, + "countries": COUNTRIES_WEIGHT, + "topics": TOPICS_WEIGHT, + } + + for dim, values in item_vector.items(): + dim_weight = WEIGHT_MAP.get(dim, 1.0) + final_weight = weight * dim_weight + + if dim == "year": + if values is not None: + profile_data["years"][values] += final_weight + elif values: + for feature_id in values: + if dim == "genres" and excluded_genres and feature_id in excluded_genres: + continue + profile_data[dim][feature_id] += final_weight + + async def _resolve_tmdb_id(self, stremio_id: str) -> int | None: + """Resolve various Stremio ID formats to a TMDB integer ID.""" + if stremio_id.startswith("tmdb:"): + try: + return int(stremio_id.split(":")[1]) + except (ValueError, IndexError): + return None + + if stremio_id.startswith("tt"): + tmdb_id, _ = await self.tmdb_service.find_by_imdb_id(stremio_id) + return tmdb_id + return None + + async def _fetch_full_metadata(self, tmdb_id: int, type_: str) -> dict | None: + """Fetch full metadata from TMDB based on media type.""" + try: + if type_ == "movie": + return await self.tmdb_service.get_movie_details(tmdb_id) + return await self.tmdb_service.get_tv_details(tmdb_id) + except Exception as e: + logger.error(f"Failed to fetch full metadata for TMDB {tmdb_id} ({type_}): {e}") + return None + + def calculate_similarity(self, profile: UserTasteProfile, item_meta: dict) -> float: + """Get total similarity score between profile and item.""" + score, _ = calculate_similarity_breakdown(profile, item_meta) + return score + + def calculate_similarity_with_breakdown(self, profile: UserTasteProfile, item_meta: dict) -> tuple[float, dict]: + """Get similarity score and dimensional breakdown.""" + return calculate_similarity_breakdown(profile, item_meta) + + def calculate_simple_overlap_with_breakdown( + self, profile: UserTasteProfile, item_meta: dict, **kwargs + ) -> tuple[float, dict]: + """Get simple overlap similarity and breakdown.""" + return calculate_simple_overlap_breakdown(profile, item_meta, **kwargs) diff --git a/app/services/profile/similarity.py b/app/services/profile/similarity.py new file mode 100644 index 0000000..f46f19d --- /dev/null +++ b/app/services/profile/similarity.py @@ -0,0 +1,114 @@ +from typing import Any + +from app.models.profile import UserTasteProfile +from app.services.profile.vectorizer import ProfileVectorizer + +# Weights for different dimensions +GENRES_WEIGHT = 0.20 +KEYWORDS_WEIGHT = 0.30 +CAST_WEIGHT = 0.12 +CREW_WEIGHT = 0.08 +YEAR_WEIGHT = 0.05 +COUNTRIES_WEIGHT = 0.05 +TOPICS_WEIGHT = 0.20 + + +def jaccard_similarity(set_a: set[Any], set_b: set[Any]) -> float: + """Calculate Jaccard similarity between two sets.""" + if not set_a and not set_b: + return 0.0 + if not set_a or not set_b: + return 0.0 + + intersection = len(set_a & set_b) + union = len(set_a | set_b) + return intersection / union if union > 0 else 0.0 + + +def calculate_similarity_breakdown( + profile: UserTasteProfile, item_meta: dict[str, Any] +) -> tuple[float, dict[str, float]]: + """ + Calculate similarity between a user profile and an item, + returning both the total score and a dimensional breakdown. + """ + item_vec = ProfileVectorizer.vectorize_item(item_meta) + + def avg_feature_preference(features: list[Any], weight_map: dict[Any, float]) -> float: + if not features: + return 0.0 + total_pref = sum(weight_map.get(f, 0.0) for f in features) + return total_pref / max(1, len(features)) + + # Calculate scores per dimension + g_score = avg_feature_preference(item_vec.get("genres", []), profile.genres.values) * GENRES_WEIGHT + k_score = avg_feature_preference(item_vec.get("keywords", []), profile.keywords.values) * KEYWORDS_WEIGHT + c_score = avg_feature_preference(item_vec.get("cast", []), profile.cast.values) * CAST_WEIGHT + t_score = avg_feature_preference(item_vec.get("topics", []), profile.topics.values) * TOPICS_WEIGHT + crew_score = avg_feature_preference(item_vec.get("crew", []), profile.crew.values) * CREW_WEIGHT + country_score = avg_feature_preference(item_vec.get("countries", []), profile.countries.values) * COUNTRIES_WEIGHT + + year_val = item_vec.get("year") + year_score = 0.0 + if year_val is not None: + year_score = profile.years.values.get(year_val, 0.0) * YEAR_WEIGHT + + total_score = g_score + k_score + c_score + t_score + crew_score + country_score + year_score + + breakdown = { + "genres": float(g_score), + "keywords": float(k_score), + "cast": float(c_score), + "topics": float(t_score), + "crew": float(crew_score), + "countries": float(country_score), + "year": float(year_score), + "total": float(total_score), + } + + return float(total_score), breakdown + + +def calculate_simple_overlap_breakdown( + profile: UserTasteProfile, + item_meta: dict[str, Any], + top_topic_tokens: int = 300, + top_genres: int = 20, + top_keyword_ids: int = 200, +) -> tuple[float, dict[str, float]]: + """ + Calculate similarity using simple set overlaps (Jaccard). + """ + # Item sets + item_vec = ProfileVectorizer.vectorize_item(item_meta) + item_topic_tokens = set(item_vec.get("topics") or []) + item_genres = {int(g) for g in (item_vec.get("genres") or [])} + item_keyword_ids = {int(k) for k in (item_vec.get("keywords") or [])} + + # Helper to get top features from profile + def get_top_features(weight_map: dict[Any, float], limit: int) -> set[Any]: + sorted_features = sorted(weight_map.items(), key=lambda x: x[1], reverse=True) + return {k for k, _ in sorted_features[:limit]} + + # Profile preference sets + pref_topic_tokens = get_top_features(profile.topics.values, top_topic_tokens) + pref_genres = {int(g) for g in get_top_features(profile.genres.values, top_genres)} + pref_keyword_ids = {int(k) for k in get_top_features(profile.keywords.values, top_keyword_ids)} + + # Jaccard similarities + topics_j = jaccard_similarity(item_topic_tokens, pref_topic_tokens) + genres_j = jaccard_similarity(item_genres, pref_genres) + kw_j = jaccard_similarity(item_keyword_ids, pref_keyword_ids) + + # Weighted sum + w_topics, w_genres, w_kw = 0.6, 0.25, 0.15 + total_score = (topics_j * w_topics) + (genres_j * w_genres) + (kw_j * w_kw) + + breakdown = { + "topics_jaccard": float(topics_j), + "genres_jaccard": float(genres_j), + "keywords_jaccard": float(kw_j), + "total": float(total_score), + } + + return float(total_score), breakdown diff --git a/app/services/profile/vectorizer.py b/app/services/profile/vectorizer.py new file mode 100644 index 0000000..6fcc56e --- /dev/null +++ b/app/services/profile/vectorizer.py @@ -0,0 +1,177 @@ +from typing import Any + + +class ProfileVectorizer: + """ + Handles tokenization and conversion of TMDB metadata into sparse vectors. + """ + + STOPWORDS = { + "a", + "an", + "and", + "the", + "of", + "to", + "in", + "on", + "for", + "with", + "by", + "from", + "at", + "as", + "is", + "it", + "this", + "that", + "be", + "or", + "are", + "was", + "were", + "has", + "have", + "had", + "into", + "their", + "his", + "her", + "its", + "but", + "not", + "no", + "so", + "about", + "over", + "under", + "after", + "before", + "than", + "then", + "out", + "up", + "down", + "off", + "only", + "more", + "most", + "some", + "any", + } + + @staticmethod + def normalize_token(tok: str) -> str: + """Lowercases, removes non-alphanumeric, and performs lightweight stemming.""" + t = tok.lower() + t = "".join(ch for ch in t if ch.isalnum()) + if len(t) <= 2: + return "" + + # Lightweight stemming + for suf in ("ing", "ers", "ies", "ment", "tion", "s", "ed"): + if t.endswith(suf) and len(t) - len(suf) >= 3: + t = t[: -len(suf)] + break + return t + + @classmethod + def tokenize(cls, text: str) -> list[str]: + """Split text into normalized tokens, removing stopwords and duplicates.""" + if not text: + return [] + + raw = text.replace("-", " ").replace("_", " ") + tokens = [] + for part in raw.split(): + t = cls.normalize_token(part) + if not t or t in cls.STOPWORDS: + continue + tokens.append(t) + + # De-duplicate while preserving order + seen = set() + dedup = [] + for t in tokens: + if t not in seen: + seen.add(t) + dedup.append(t) + return dedup + + @classmethod + def vectorize_item(cls, meta: dict[str, Any]) -> dict[str, Any]: + """ + Converts raw TMDB metadata into a sparse vector format. + """ + if not meta or not isinstance(meta, dict): + return {} + + # 1. Robust Keyword Extraction (Movies use 'keywords', TV uses 'results' key) + keywords_obj = meta.get("keywords") + raw_keywords = [] + if isinstance(keywords_obj, dict): + raw_keywords = keywords_obj.get("keywords") or keywords_obj.get("results") or [] + elif isinstance(keywords_obj, list): + raw_keywords = keywords_obj + + # 2. Extract countries + countries = [] + prod_countries = meta.get("production_countries") + if isinstance(prod_countries, list): + countries = [c.get("iso_3166_1") for c in prod_countries if isinstance(c, dict) and c.get("iso_3166_1")] + + if not countries: + origin = meta.get("origin_country") + if isinstance(origin, list): + countries = origin + elif isinstance(origin, str): + countries = [origin] + + # 3. Genres + genre_ids = meta.get("genre_ids") or [] + if not genre_ids: + genres_src = meta.get("genres") + if isinstance(genres_src, list): + genre_ids = [g.get("id") for g in genres_src if isinstance(g, dict) and g.get("id") is not None] + + # 4. Topics (Title + Overview + Keyword Names) + title_text = meta.get("name") or meta.get("title") or meta.get("original_title") or "" + overview_text = meta.get("description") or meta.get("overview") or "" + + kw_names = [] + if isinstance(raw_keywords, list): + kw_names = [k.get("name") for k in raw_keywords if isinstance(k, dict) and k.get("name")] + + topics_tokens: list[str] = [] + if title_text: + topics_tokens.extend(cls.tokenize(title_text)) + if overview_text: + topics_tokens.extend(cls.tokenize(overview_text)) + for nm in kw_names: + topics_tokens.extend(cls.tokenize(nm)) + + # 5. Build Final Vector + credits = meta.get("credits") or {} + cast = credits.get("cast") or [] + crew = credits.get("crew") or [] + + vector = { + "genres": [int(g) for g in genre_ids if g is not None], + "keywords": [int(k["id"]) for k in raw_keywords if isinstance(k, dict) and k.get("id") is not None], + "cast": [int(c["id"]) for c in cast[:5] if isinstance(c, dict) and c.get("id") is not None], + "crew": [int(c["id"]) for c in crew if isinstance(c, dict) and c.get("job") == "Director"], + "year": None, + "countries": countries, + "topics": topics_tokens, + } + + # Year Bucket (Decades) + date_str = meta.get("release_date") or meta.get("first_air_date") + if date_str and isinstance(date_str, str): + try: + year = int(date_str[:4]) + vector["year"] = (year // 10) * 10 + except (ValueError, TypeError): + pass + + return vector diff --git a/app/services/recommendation/__init__.py b/app/services/recommendation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/recommendation/engine.py b/app/services/recommendation/engine.py new file mode 100644 index 0000000..74ad456 --- /dev/null +++ b/app/services/recommendation/engine.py @@ -0,0 +1,769 @@ +import asyncio +from collections import defaultdict +from typing import Any + +from loguru import logger + +from app.core.config import settings +from app.core.settings import UserSettings +from app.services.discovery import DiscoveryEngine +from app.services.profile.service import UserProfileService +from app.services.recommendation.filtering import RecommendationFiltering +from app.services.recommendation.metadata import RecommendationMetadata +from app.services.recommendation.scoring import RecommendationScoring +from app.services.scoring import ScoringService +from app.services.stremio.service import StremioBundle as StremioService +from app.services.tmdb.service import get_tmdb_service + +TOP_GENRE_WHITELIST_LIMIT = 5 + +PER_GENRE_MAX_SHARE = 0.4 + + +class RecommendationEngine: + """ + Main orchestration logic for generating hybrid recommendations. + """ + + def __init__( + self, + stremio_service: StremioService, + language: str = "en-US", + user_settings: UserSettings | None = None, + token: str | None = None, + library_data: dict | None = None, + auth_key: str | None = None, + ): + self.tmdb_service = get_tmdb_service(language=language) + self.stremio_service = stremio_service + self.user_settings = user_settings + self.stable_seed = token or "" + self._library_data = library_data + self.auth_key = auth_key + + self.scoring_service = ScoringService() + self.user_profile_service = UserProfileService(language=language) + self.discovery_engine = DiscoveryEngine(language=language) + + self.per_item_limit = 20 + self._whitelist_cache: dict[str, set[int]] = {} + + async def get_recommendations( + self, + content_type: str, + source_items_limit: int = 5, + max_results: int = 20, + ) -> list[dict[str, Any]]: + """Smart Hybrid Recommendation Pipeline.""" + logger.info(f"Starting Hybrid Recommendation Pipeline for {content_type}") + + # 1. Fetch & Prep History (Use larger window for a stable profile) + profile_history_limit = 20 + scored_objects, watched_tmdb, watched_imdb = await self._get_scored_library_items( + content_type, limit=profile_history_limit + ) + + # 2. Candidate Generation (Discovery + Similarity) + excluded_ids = RecommendationFiltering.get_excluded_genre_ids(self.user_settings, content_type) + candidate_pool, user_profile, whitelist = await self._collect_hybrid_candidates( + content_type, scored_objects, set(excluded_ids), source_items_limit + ) + + # 3. Freshness Injection + await self._inject_freshness( + candidate_pool, content_type, watched_tmdb, set(excluded_ids), whitelist, max_results + ) + + # 4. Initial Ranking + # Recency setup + candidate_decades = { + (RecommendationMetadata.extract_year(it) // 10) * 10 + for it in candidate_pool.values() + if RecommendationMetadata.extract_year(it) + } + recency_fn, recency_alpha = RecommendationScoring.get_recency_multiplier_fn(user_profile, candidate_decades) + + ranked = self._rank_initial_pool( + candidate_pool, content_type, user_profile, whitelist, watched_tmdb, recency_fn, recency_alpha + ) + + # 5. Metadata Enrichment (Top items) + buffer = [it for _, it in ranked[: max_results * 2]] + enriched = await RecommendationMetadata.fetch_batch( + self.tmdb_service, buffer, content_type, max_results * 2, self.user_settings + ) + + # 6. Final Re-ranking and Diversification + final_items = self._diversify( + enriched, user_profile, whitelist, recency_fn, recency_alpha, watched_imdb, watched_tmdb, max_results + ) + + return final_items + + async def _get_scored_library_items(self, content_type: str, limit: int) -> tuple[list[Any], set[int], set[str]]: + """Fetch library, compute exclusion sets, and score top history items.""" + if self._library_data is None: + if not self.auth_key: + raise ValueError("auth_key is required to fetch library data") + self._library_data = await self.stremio_service.library.get_library_items(self.auth_key) + + lib = self._library_data + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + self.stremio_service, lib, self.auth_key + ) + + all_lib_items = lib.get("loved", []) + lib.get("watched", []) + lib.get("added", []) + typed_items = {it["_id"]: it for it in all_lib_items if it.get("type") == content_type} + + sorted_history = sorted( + typed_items.values(), + key=lambda x: x.get("state", {}).get("lastWatched") or "", + reverse=True, + ) + + scored_objects = [] + top_sources = [] + for it in sorted_history[:limit]: + scored = self.scoring_service.process_item(it) + scored_objects.append(scored) + it["_interest_score"] = scored.score + top_sources.append(it) + + top_sources.sort(key=lambda x: x["_interest_score"], reverse=True) + # Store top sources in self if needed, but returning scored objects is enough for profile + return scored_objects, watched_tmdb, watched_imdb + + async def _collect_hybrid_candidates( + self, content_type: str, scored_objects: list, excluded_genres: set[int], max_results: int + ) -> tuple[dict[int, dict], Any, set[int]]: + """Run TMDB Similarity and Discovery in parallel to populate initial candidate pool.""" + # 1. Profile Building + user_profile = await self.user_profile_service.build_user_profile( + scored_objects, content_type=content_type, excluded_genres=list(excluded_genres) + ) + + # 2. Similarity Candidates (Candidate Set A) + # Assuming we use top 5 scored items for similarity sources + top_ids = [obj.item.id for obj in scored_objects[:5]] + tasks = [self._fetch_raw_recommendations(tid, content_type, limit=10) for tid in top_ids] + sim_batches = await asyncio.gather(*tasks, return_exceptions=True) + + sim_candidates = [] + for batch in sim_batches: + if isinstance(batch, Exception): + continue + for it in batch: + gids = it.get("genre_ids") or [] + if not excluded_genres.intersection(gids): + it["_ranked_candidate"] = True + sim_candidates.append(it) + + # 3. Discovery Candidates (Candidate Set B) + whitelist = await self._get_genre_whitelist(content_type, scored_objects) + discovery_candidates = await self.discovery_engine.discover_recommendations( + user_profile, + content_type, + excluded_genres=list(excluded_genres), + use_genres=False, + use_keywords=True, + use_cast=True, + use_director=True, + use_countries=False, + use_year=False, + ) + + # 4. Merge (No hard filtering here, leave it for the ranker) + candidate_pool = {} + for it in discovery_candidates: + candidate_pool[it["id"]] = it + for it in sim_candidates: + candidate_pool[it["id"]] = it + + return candidate_pool, user_profile, whitelist + + def _rank_initial_pool( + self, + pool: dict[int, dict], + content_type: str, + profile: Any, + whitelist: set[int], + watched_tmdb: set[int], + recency_fn: Any, + recency_alpha: float, + ) -> list[tuple[float, dict]]: + """Apply base ranking, similarity, and quality adjustments to the initial candidate pool.""" + ranked = [] + for tid, it in pool.items(): + if tid in watched_tmdb: + continue + + sim_score, bd = self.user_profile_service.calculate_simple_overlap_with_breakdown(profile, it) + if float(bd.get("topics_jaccard", 0.0)) + float(bd.get("keywords_jaccard", 0.0)) <= 0.0001: + sim_score *= 0.8 # Penalty for genre-only match + + wr = RecommendationScoring.weighted_rating( + it.get("vote_average"), + it.get("vote_count"), + C=7.2 if content_type in ("tv", "series") else 6.8, + ) + v_score = RecommendationScoring.normalize(wr) + p_score = RecommendationScoring.normalize(float(it.get("popularity") or 0.0), max_v=1000.0) + + final_score = (sim_score * 0.55) + (v_score * 0.35) + (p_score * 0.10) + + # Apply Genre Bias (Preference Boost/Penalty) + final_score *= RecommendationFiltering.get_genre_multiplier(it.get("genre_ids"), whitelist) + + year = RecommendationMetadata.extract_year(it) + final_score *= (1.0 - recency_alpha) + (recency_alpha * recency_fn(year)) + final_score += RecommendationScoring.stable_epsilon(tid, self.stable_seed) + + final_score = RecommendationScoring.apply_quality_adjustments( + final_score, + wr, + int(it.get("vote_count") or 0), + bool(it.get("_ranked_candidate")), + bool(it.get("_fresh_boost")), + ) + ranked.append((final_score, it)) + + ranked.sort(key=lambda x: x[0], reverse=True) + return ranked + + async def get_recommendations_for_item(self, item_id: str, media_type: str = "movie") -> list[dict[str, Any]]: + """Get recommendations for a specific item, strictly excluding watched content.""" + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + self.stremio_service, self._library_data, self.auth_key + ) + + # Explicitly exclude the source item + if item_id.startswith("tt"): + watched_imdb.add(item_id) + elif item_id.startswith("tmdb:"): + try: + watched_tmdb.add(int(item_id.split(":")[1])) + except Exception: + pass + + # Use the provided media_type (series/movie -> tv/movie) + mtype = "tv" if media_type in ("tv", "series") else "movie" + + # Fetch candidates using the known type + candidates = await self._fetch_raw_recommendations(item_id, mtype, limit=40) + + # Build whitelist + stremio_mtype = "series" if mtype == "tv" else "movie" + whitelist = await self._get_genre_whitelist(stremio_mtype) + + # Process candidates + filtered = [] + for it in candidates: + if it.get("id") in watched_tmdb: + continue + gids = it.get("genre_ids") or [] + if not RecommendationFiltering.passes_top_genre_whitelist(gids, whitelist): + continue + filtered.append(it) + + # Freshness injection for related items (threshold 40) + excluded_ids = RecommendationFiltering.get_excluded_genre_ids(self.user_settings, stremio_mtype) + if len(filtered) < 40: + tmp_pool = {it["id"]: it for it in filtered} + await self._inject_freshness(tmp_pool, mtype, watched_tmdb, set(excluded_ids), whitelist, 20) + filtered = list(tmp_pool.values()) + + # Enrichment + enriched = await RecommendationMetadata.fetch_batch( + self.tmdb_service, filtered, stremio_mtype, target_count=20, user_settings=self.user_settings + ) + + # Strict final filtering + final = [] + for it in enriched: + if it["id"] in watched_imdb: + continue + if it.get("_external_ids", {}).get("imdb_id") in watched_imdb: + continue + it.pop("_external_ids", None) + final.append(it) + if len(final) >= 20: + break + + return final + + async def _fetch_raw_recommendations(self, item_id: str, media_type: str, limit: int) -> list[dict[str, Any]]: + """Fetch raw recommendations from TMDB (multiple pages).""" + mtype = "tv" if media_type in ("tv", "series") else "movie" + tmdb_id = None + + if item_id.startswith("tt"): + tmdb_id, _ = await self.tmdb_service.find_by_imdb_id(item_id) + elif item_id.startswith("tmdb:"): + try: + tmdb_id = int(item_id.split(":")[1]) + except Exception: + pass + else: + try: + tmdb_id = int(item_id) + except Exception: + pass + + if not tmdb_id: + return [] + + combined = {} + # fetch two pages of data + for action in ["recommendations", "similar"]: + method = getattr(self.tmdb_service, f"get_{action}") + results = await asyncio.gather(*[method(tmdb_id, mtype, page=p) for p in [1, 2]], return_exceptions=True) + + for res in results: + if isinstance(res, Exception): + logger.error(f"Error fetching {action} for {tmdb_id}: {res}") + continue + for it in res.get("results", []): + if it.get("id"): + combined[it["id"]] = it + + if len(combined) >= max(20, limit // 2): + break + + return list(combined.values()) + + async def _get_genre_whitelist(self, content_type: str, scored_objects: list | None = None) -> set[int]: + if content_type in self._whitelist_cache: + return self._whitelist_cache[content_type] + + try: + if scored_objects is None: + if self._library_data is None: + if not self.auth_key: + return set() + self._library_data = await self.stremio_service.library.get_library_items(self.auth_key) + + all_lib = ( + self._library_data.get("loved", []) + + self._library_data.get("watched", []) + + self._library_data.get("added", []) + + self._library_data.get("liked", []) + ) + typed = [ + it + for it in all_lib + if it.get("type") == content_type or (content_type == "series" and it.get("type") == "tv") + ] + sorted_hist = sorted( + {it["_id"]: it for it in typed}.values(), + key=lambda x: x.get("state", {}).get("lastWatched") or "", + reverse=True, + ) + scored_objects = [ + self.scoring_service.process_item(it) for it in sorted_hist[: settings.LIBRARY_ITEMS_LIMIT] + ] + + prof_type = "series" if content_type in ("tv", "series") else "movie" + temp_profile = await self.user_profile_service.build_user_profile(scored_objects, content_type=prof_type) + top_pairs = temp_profile.get_top_genres(limit=TOP_GENRE_WHITELIST_LIMIT) + whitelist = {int(gid) for gid, _ in top_pairs} + except Exception as e: + logger.warning(f"Failed to build whitelist for {content_type}: {e}") + whitelist = set() + + self._whitelist_cache[content_type] = whitelist + return whitelist + + async def _inject_freshness( + self, pool: dict, media_type: str, watched_tmdb: set, excluded_ids: set, whitelist: set, max_results: int + ): + mtype = "tv" if media_type in ("tv", "series") else "movie" + try: + trending = (await self.tmdb_service.get_trending(mtype)).get("results", []) + top_rated = (await self.tmdb_service.get_top_rated(mtype)).get("results", []) + fresh_pool = trending[:40] + top_rated[:40] + + cap = max(1, int(max_results * PER_GENRE_MAX_SHARE)) + genre_counts = defaultdict(int) + fresh_added = 0 + + for it in fresh_pool: + tid = it.get("id") + if not tid or tid in pool or tid in watched_tmdb: + continue + gids = it.get("genre_ids") or [] + if excluded_ids.intersection(gids): + continue + if not RecommendationFiltering.passes_top_genre_whitelist(gids, whitelist): + continue + + wr = RecommendationScoring.weighted_rating(it.get("vote_average"), it.get("vote_count")) + if int(it.get("vote_count") or 0) < 300 or wr < 7.0: + continue + if any(genre_counts[g] >= cap for g in gids): + continue + + it["_fresh_boost"] = True + pool[tid] = it + for g in gids: + genre_counts[g] += 1 + fresh_added += 1 + if fresh_added >= max_results * 2: + break + except Exception as e: + logger.warning(f"Freshness injection failed: {e}") + + def _diversify( + self, + enriched: list, + profile: Any, + whitelist: set, + rec_fn: callable, + rec_alpha: float, + watched_imdb: set, + watched_tmdb: set, + max_results: int, + ) -> list: + """Final re-ranking and diversification with strict filtering.""" + # 1. Filter and compute base scores + pool = self._filter_and_re_rank_enriched( + enriched, profile, whitelist, rec_fn, rec_alpha, watched_imdb, watched_tmdb + ) + + # 2. Compute Target Distribution (Decades) + targets = self._compute_decade_targets(pool, profile, max_results) + + # 3. Final Selection with Diversity Constraints + return self._apply_diversification(pool, targets, max_results) + + def _filter_and_re_rank_enriched( + self, + enriched: list, + profile: Any, + whitelist: set, + rec_fn: callable, + rec_alpha: float, + watched_imdb: set, + watched_tmdb: set, + ) -> list: + """Process enriched items, applying strict filters and final weighted scoring.""" + final_pool = [] + used_collections = set() + used_cast = set() + + for it in enriched: + # STRICT FILTER + tid = it.get("_tmdb_id") + if tid and tid in watched_tmdb: + continue + + sid = it.get("id") + if sid in watched_imdb: + continue + if sid and sid.startswith("tmdb:"): + try: + if int(sid.split(":")[1]) in watched_tmdb: + continue + except Exception: + pass + + external_imdb = it.get("_external_ids", {}).get("imdb_id") + if external_imdb and external_imdb in watched_imdb: + continue + + if not RecommendationFiltering.passes_top_genre_whitelist(it.get("genre_ids"), whitelist): + continue + + sim_score, bd = self.user_profile_service.calculate_simple_overlap_with_breakdown(profile, it) + it["_sim_breakdown"] = bd + + wr = RecommendationScoring.weighted_rating( + it.get("vote_average"), + it.get("vote_count"), + C=7.2 if it.get("type") == "series" else 6.8, + ) + v_score = RecommendationScoring.normalize(wr) + p_score = RecommendationScoring.normalize(float(it.get("popularity") or 0.0), max_v=1000.0) + + base = (sim_score * 0.55) + (v_score * 0.35) + (p_score * 0.10) + # Apply Genre Bias (Preference Boost/Penalty) + base *= RecommendationFiltering.get_genre_multiplier(it.get("genre_ids"), whitelist) + + year = RecommendationMetadata.extract_year(it) + q_mult = (1.0 - rec_alpha) + (rec_alpha * rec_fn(year)) + + # Final Pass: Consistent quality adjustments (Enhanced: preserve source boosts) + score = base + RecommendationScoring.stable_epsilon(it.get("_tmdb_id", 0), self.stable_seed) + score = RecommendationScoring.apply_quality_adjustments( + score * q_mult, + wr, + int(it.get("vote_count") or 0), + is_ranked=bool(it.get("_ranked_candidate")), + is_fresh=bool(it.get("_fresh_boost")), + ) + + # Simple static suppression + penalty = 0.0 + if it.get("_collection_id") in used_collections: + penalty += 0.05 + cast_overlap = len(set(it.get("_top_cast_ids", [])) & used_cast) + if cast_overlap: + penalty += min(0.03 * cast_overlap, 0.09) + + it["_adjusted_score"] = score * (1.0 - penalty) + final_pool.append(it) + + final_pool.sort(key=lambda x: x.get("_adjusted_score", 0.0), reverse=True) + return final_pool + + def _compute_decade_targets(self, pool: list, profile: Any, max_results: int) -> dict[int | None, int]: + """Calculate how many slots each decade should occupy based on profile distribution.""" + decades_in_results = [] + for it in pool: + y = RecommendationMetadata.extract_year(it) + decades_in_results.append((int(y) // 10) * 10 if y else None) + + try: + years_map = getattr(profile.years, "values", {}) or {} + decade_weights = {int(k): float(v) for k, v in years_map.items() if isinstance(k, int)} + total_w = sum(decade_weights.values()) + except Exception: + decade_weights, total_w = {}, 0.0 + + support = {d for d in decades_in_results if d is not None} + if total_w > 0 and support: + p_user = {d: (decade_weights.get(d, 0.0) / total_w) for d in support} + s = sum(p_user.values()) + if s > 0: + for d in p_user: + p_user[d] /= s + else: + p_user = {d: 1.0 / len(support) for d in support} + else: + p_user = {d: 1.0 / len(support) for d in support} if support else {} + + targets = defaultdict(int) + remainders = [] + slots = max_results + for d, p in p_user.items(): + tgt = p * slots + base = int(tgt) + targets[d] = base + remainders.append((tgt - base, d)) + + assigned = sum(targets.values()) + remaining = max(0, slots - assigned) + if remaining > 0 and remainders: + remainders.sort(key=lambda x: x[0], reverse=True) + for _, d in remainders[:remaining]: + targets[d] += 1 + return targets + + def _apply_diversification(self, pool: list, targets: dict, max_results: int) -> list: + """Perform final selection loop ensuring genre caps and decade distribution are respected.""" + genre_counts = defaultdict(int) + cap = max(1, int(max_results * PER_GENRE_MAX_SHARE)) + decade_counts = defaultdict(int) + result = [] + + for it in pool: + if len(result) >= max_results: + break + + gids = it.get("genre_ids") or [] + if any(genre_counts[g] >= cap for g in gids): + continue + + y = RecommendationMetadata.extract_year(it) + d = (int(y) // 10) * 10 if y else None + # If d is None, we don't apply decade cap (acts as fallback) + if d is not None and d in targets and decade_counts[d] >= targets[d]: + continue + + result.append(it) + for g in gids: + genre_counts[g] += 1 + if d is not None: + decade_counts[d] += 1 + + # Clean internal fields + it.pop("_external_ids", None) + it.pop("_tmdb_id", None) + it.pop("_adjusted_score", None) + it.pop("_top_cast_ids", None) + it.pop("_collection_id", None) + + return result + + async def get_recommendations_for_theme(self, theme_id: str, content_type: str, limit: int = 20) -> list[dict]: + """Parse theme and fetch recommendations with strict filtering.""" + params = {} + parts = theme_id.replace("watchly.theme.", "").split(".") + + for part in parts: + if part.startswith("g"): + genre_str = part[1:].replace("-", ",") + params["with_genres"] = genre_str.replace(",", "|") + elif part.startswith("k"): + kw_str = part[1:].replace("-", "|") + params["with_keywords"] = kw_str + elif part.startswith("ct"): + params["with_origin_country"] = part[2:] + elif part.startswith("y"): + try: + year = int(part[1:]) + is_tv = content_type in ("tv", "series") + prefix = "first_air_date" if is_tv else "primary_release_date" + params[f"{prefix}.gte"] = f"{year}-01-01" + params[f"{prefix}.lte"] = f"{year+9}-12-31" + except Exception: + pass + elif part == "sort-vote": + params["sort_by"] = "vote_average.desc" + params["vote_count.gte"] = 200 + + if "sort_by" not in params: + params["sort_by"] = "popularity.desc" + + excluded_ids = RecommendationFiltering.get_excluded_genre_ids(self.user_settings, content_type) + if excluded_ids: + try: + with_ids = {int(g) for g in (params.get("with_genres", "").replace("|", ",").split(",")) if g} + except Exception: + with_ids = set() + final_without = [g for g in excluded_ids if g not in with_ids] + if final_without: + params["without_genres"] = "|".join(str(g) for g in final_without) + + whitelist = await self._get_genre_whitelist(content_type) + candidates = [] + try: + discover_tasks = [self.tmdb_service.get_discover(content_type, page=p, **params) for p in [1, 2, 3]] + discover_results = await asyncio.gather(*discover_tasks, return_exceptions=True) + for res in discover_results: + if isinstance(res, Exception): + logger.error(f"Error fetching discover for {content_type}: {res}") + continue + candidates.extend(res.get("results", [])) + except Exception: + pass + + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + self.stremio_service, self._library_data, self.auth_key + ) + + # Initial filter + filtered = [] + for it in candidates: + if it.get("id") in watched_tmdb: + continue + if not RecommendationFiltering.passes_top_genre_whitelist(it.get("genre_ids"), whitelist): + continue + filtered.append(it) + + if len(filtered) < limit * 2: + tmp_pool = {it["id"]: it for it in filtered} + await self._inject_freshness(tmp_pool, content_type, watched_tmdb, set(excluded_ids), whitelist, limit) + filtered = list(tmp_pool.values()) + + meta = await RecommendationMetadata.fetch_batch( + self.tmdb_service, filtered, content_type, target_count=limit * 2, user_settings=self.user_settings + ) + + final = [] + for it in meta: + if it["id"] in watched_imdb: + continue + if it.get("_external_ids", {}).get("imdb_id") in watched_imdb: + continue + if not RecommendationFiltering.passes_top_genre_whitelist(it.get("genre_ids"), whitelist): + continue + it.pop("_external_ids", None) + final.append(it) + if len(final) >= limit: + break + + return final + + async def pad_to_min(self, content_type: str, existing: list[dict], min_items: int) -> list[dict]: + """Pad results with trending/top-rated items, ensuring strict exclusion.""" + need = max(0, int(min_items) - len(existing)) + if need <= 0: + return existing + + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + self.stremio_service, self._library_data, self.auth_key + ) + excluded_ids = set(RecommendationFiltering.get_excluded_genre_ids(self.user_settings, content_type)) + whitelist = await self._get_genre_whitelist(content_type) + + mtype = "tv" if content_type in ("tv", "series") else "movie" + pool = [] + try: + tr = await self.tmdb_service.get_trending(mtype, time_window="week") + pool.extend(tr.get("results", [])[:60]) + tr2 = await self.tmdb_service.get_top_rated(mtype) + pool.extend(tr2.get("results", [])[:60]) + except Exception: + pass + + existing_tmdb = set() + for it in existing: + tid = it.get("_tmdb_id") or it.get("tmdb_id") or it.get("id") + try: + if isinstance(tid, str) and tid.startswith("tmdb:"): + tid = int(tid.split(":")[1]) + existing_tmdb.add(int(tid)) + except Exception: + pass + + dedup = {} + for it in pool: + tid = it.get("id") + if not tid or tid in existing_tmdb or tid in watched_tmdb: + continue + gids = it.get("genre_ids") or [] + if excluded_ids.intersection(gids): + continue + if not RecommendationFiltering.passes_top_genre_whitelist(gids, whitelist): + continue + + va, vc = float(it.get("vote_average") or 0.0), int(it.get("vote_count") or 0) + if vc < 100 or va < 6.2: + continue + dedup[tid] = it + if len(dedup) >= need * 3: + break + + if not dedup: + return existing + + meta = await RecommendationMetadata.fetch_batch( + self.tmdb_service, + list(dedup.values()), + content_type, + target_count=need * 2, + user_settings=self.user_settings, + ) + + extra = [] + for it in meta: + if it.get("id") in watched_imdb: + continue + if it.get("_external_ids", {}).get("imdb_id") in watched_imdb: + continue + + # Final check against existing + is_dup = False + for e in existing: + if e.get("id") == it.get("id"): + is_dup = True + break + if is_dup: + continue + + it.pop("_external_ids", None) + extra.append(it) + if len(extra) >= need: + break + + return existing + extra diff --git a/app/services/recommendation/filtering.py b/app/services/recommendation/filtering.py new file mode 100644 index 0000000..947ae54 --- /dev/null +++ b/app/services/recommendation/filtering.py @@ -0,0 +1,168 @@ +from typing import Any +from urllib.parse import unquote + + +def parse_identifier(identifier: str) -> tuple[str | None, int | None]: + """Parse Stremio identifier to extract IMDB ID and TMDB ID.""" + if not identifier: + return None, None + + decoded = unquote(identifier) + imdb_id: str | None = None + tmdb_id: int | None = None + + for token in decoded.split(","): + token = token.strip() + if not token: + continue + if token.startswith("tt") and imdb_id is None: + imdb_id = token + elif token.startswith("tmdb:") and tmdb_id is None: + try: + tmdb_id = int(token.split(":", 1)[1]) + except (ValueError, IndexError): + continue + if imdb_id and tmdb_id is not None: + break + + return imdb_id, tmdb_id + + +class RecommendationFiltering: + """ + Handles exclusion sets, genre whitelists, and item filtering. + """ + + @staticmethod + async def get_exclusion_sets( + stremio_service: Any, library_data: dict | None = None, auth_key: str | None = None + ) -> tuple[set[str], set[int]]: + """ + Fetch library items and build exclusion sets for watched/loved content. + """ + if library_data is None: + if not auth_key: + return set(), set() + library_data = await stremio_service.library.get_library_items(auth_key) + + all_items = ( + library_data.get("loved", []) + + library_data.get("watched", []) + + library_data.get("removed", [] + library_data.get("liked")) + ) + + imdb_ids = set() + tmdb_ids = set() + + for item in all_items: + item_id = item.get("_id", "") + if not item_id: + continue + + imdb_id, tmdb_id = parse_identifier(item_id) + + if imdb_id: + imdb_ids.add(imdb_id) + if tmdb_id: + tmdb_ids.add(tmdb_id) + + # Fallback parsing for common Stremio/Watchly patterns + if item_id.startswith("tt"): + # Handle tt123 and tt123:1:1 + base_imdb = item_id.split(":")[0] + imdb_ids.add(base_imdb) + elif item_id.startswith("tmdb:"): + try: + tid = int(item_id.split(":")[1]) + tmdb_ids.add(tid) + except Exception: + pass + + return imdb_ids, tmdb_ids + + @staticmethod + def filter_candidates( + candidates: list[dict[str, Any]], watched_imdb: set[str], watched_tmdb: set[int] + ) -> list[dict[str, Any]]: + """ + Filter candidates against watched sets. + Matches both TMDB (int) and IMDB (str). + """ + filtered = [] + for item in candidates: + tid = item.get("id") + # 1. Check TMDB ID (integer) + if tid and isinstance(tid, int) and tid in watched_tmdb: + continue + + # 2. Check Stremio ID (string) if present as 'id' + if tid and isinstance(tid, str): + if tid in watched_imdb: + continue + if tid.startswith("tmdb:"): + try: + if int(tid.split(":")[1]) in watched_tmdb: + continue + except Exception: + pass + + # 3. Check External IDs + ext = item.get("external_ids", {}) or item.get("_external_ids", {}) + imdb = ext.get("imdb_id") + if imdb and imdb in watched_imdb: + continue + + # 4. Handle cases where TMDB ID is in 'id' but it's a string + try: + if tid and int(tid) in watched_tmdb: + continue + except Exception: + pass + + filtered.append(item) + return filtered + + @staticmethod + def get_excluded_genre_ids(user_settings: Any, content_type: str) -> list[int]: + """Get genre IDs to exclude based on user settings.""" + if not user_settings: + return [] + if content_type == "movie": + return [int(g) for g in user_settings.excluded_movie_genres] + elif content_type in ["series", "tv"]: + return [int(g) for g in user_settings.excluded_series_genres] + return [] + + @staticmethod + def get_genre_multiplier(genre_ids: list[int] | None, whitelist: set[int]) -> float: + """Calculate a score multiplier based on genre preference. Blocks animation if not preferred.""" + if not whitelist: + return 1.0 + + gids = set(genre_ids or []) + if not gids: + return 1.0 + + # Special handling for Animation (16): Heavy penalty if not in whitelist + if 16 in gids and 16 not in whitelist: + return 0.1 + + # If it has at least one preferred genre, full score + if gids & whitelist: + return 1.0 + + # Otherwise, soft penalty to prioritize whitelist items without blocking variety + return 0.4 + + @staticmethod + def passes_top_genre_whitelist(genre_ids: list[int] | None, whitelist: set[int]) -> bool: + """Check if an item's genres match the user's top genre whitelist (Softened).""" + if not whitelist: + return True + gids = set(genre_ids or []) + if not gids: + return True + # If it's animation and not in whitelist, we still block it to prevent 'Anime Takeover' + if 16 in gids and 16 not in whitelist: + return False + return True diff --git a/app/services/recommendation/metadata.py b/app/services/recommendation/metadata.py new file mode 100644 index 0000000..71576b7 --- /dev/null +++ b/app/services/recommendation/metadata.py @@ -0,0 +1,159 @@ +import asyncio +from typing import Any + +from app.services.rpdb import RPDBService + + +class RecommendationMetadata: + """ + Handles fetching and formatting metadata for Stremio. + """ + + @staticmethod + def extract_year(item: dict[str, Any]) -> int | None: + """Extract year from TMDB item.""" + date_str = item.get("release_date") or item.get("first_air_date") + if not date_str: + ri = item.get("releaseInfo") + if isinstance(ri, str) and len(ri) >= 4 and ri[:4].isdigit(): + return int(ri[:4]) + return None + try: + return int(date_str[:4]) + except Exception: + return None + + @classmethod + async def format_for_stremio( + cls, details: dict[str, Any], media_type: str, user_settings: Any = None + ) -> dict[str, Any] | None: + """Format TMDB details into Stremio metadata object.""" + external_ids = details.get("external_ids", {}) + imdb_id = external_ids.get("imdb_id") + tmdb_id_raw = details.get("id") + + if imdb_id: + stremio_id = imdb_id + elif tmdb_id_raw: + stremio_id = f"tmdb:{tmdb_id_raw}" + else: + return None + + title = details.get("title") or details.get("name") + if not title: + return None + + # Base Fields + genres_full = details.get("genres", []) or [] + release_date = details.get("release_date") or details.get("first_air_date") or "" + + meta_data = { + "id": stremio_id, + "imdb_id": imdb_id, + "type": "series" if media_type in ["tv", "series"] else "movie", + "name": title, + "poster": cls._get_poster_url(details, stremio_id, user_settings), + "background": cls._get_backdrop_url(details), + "description": details.get("overview"), + "releaseInfo": release_date[:4] if release_date else None, + "imdbRating": str(details.get("vote_average", "")), + "genres": [g.get("name") for g in genres_full if isinstance(g, dict)], + "vote_average": details.get("vote_average"), + "vote_count": details.get("vote_count"), + "popularity": details.get("popularity"), + "original_language": details.get("original_language"), + "_external_ids": external_ids, + "_tmdb_id": tmdb_id_raw, + "genre_ids": [g.get("id") for g in genres_full if isinstance(g, dict) and g.get("id") is not None], + } + + # Extensions + runtime_str = cls._extract_runtime_string(details) + if runtime_str: + meta_data["runtime"] = runtime_str + + if media_type == "movie": + coll = details.get("belongs_to_collection") + if isinstance(coll, dict): + meta_data["_collection_id"] = coll.get("id") + + # Cast & Crew + cast = details.get("credits", {}).get("cast", []) or [] + meta_data["_top_cast_ids"] = [c.get("id") for c in cast[:3] if isinstance(c, dict) and c.get("id")] + + # Keywords & Credits for similarity re-ranking + if details.get("keywords"): + meta_data["keywords"] = details.get("keywords") + if details.get("credits"): + meta_data["credits"] = details.get("credits") + + return meta_data + + @staticmethod + def _get_poster_url(details: dict, stremio_id: str, user_settings: Any) -> str | None: + """Resolve poster URL using RPDB if configured, otherwise TMDB.""" + if user_settings and user_settings.rpdb_key: + return RPDBService.get_poster_url(user_settings.rpdb_key, stremio_id) + path = details.get("poster_path") + return f"https://image.tmdb.org/t/p/w500{path}" if path else None + + @staticmethod + def _get_backdrop_url(details: dict) -> str | None: + """Construct full TMDB backdrop URL.""" + path = details.get("backdrop_path") + return f"https://image.tmdb.org/t/p/original{path}" if path else None + + @staticmethod + def _extract_runtime_string(details: dict) -> str | None: + """Extract and format runtime from either movie or TV format.""" + runtime = details.get("runtime") + if not runtime and details.get("episode_run_time"): + runtime = details.get("episode_run_time")[0] + return f"{runtime} min" if runtime else None + + @classmethod + async def fetch_batch( + cls, + tmdb_service: Any, + items: list[dict[str, Any]], + media_type: str, + target_count: int, + user_settings: Any = None, + ) -> list[dict[str, Any]]: + """Fetch details for a batch of items in parallel with target-based short-circuiting.""" + final_results = [] + valid_items = [it for it in items if it.get("id")] + query_type = "movie" if media_type == "movie" else "tv" + sem = asyncio.Semaphore(30) + + async def _fetch_one(tid: int): + async with sem: + try: + if query_type == "movie": + return await tmdb_service.get_movie_details(tid) + return await tmdb_service.get_tv_details(tid) + except Exception: + return None + + # Process in chunks to allow early exit once target_count is reached + batch_size = 20 + for i in range(0, len(valid_items), batch_size): + if len(final_results) >= target_count: + break + + chunk = valid_items[i : i + batch_size] # noqa + tasks = [_fetch_one(it["id"]) for it in chunk] + details_list = await asyncio.gather(*tasks) + + for details in details_list: + if not details: + continue + + formatted = await cls.format_for_stremio(details, media_type, user_settings) + if formatted: + final_results.append(formatted) + + if len(final_results) >= target_count: + break + + return final_results diff --git a/app/services/recommendation/scoring.py b/app/services/recommendation/scoring.py new file mode 100644 index 0000000..630478b --- /dev/null +++ b/app/services/recommendation/scoring.py @@ -0,0 +1,116 @@ +import hashlib +import math +from collections.abc import Callable +from typing import Any + + +class RecommendationScoring: + """ + Handles ranking, recency multipliers, and score normalization. + """ + + @staticmethod + def weighted_rating(vote_avg: float | None, vote_count: int | None, C: float = 6.8, m: int = 300) -> float: + """IMDb-style weighted rating on 0-10 scale.""" + try: + R = float(vote_avg or 0.0) + v = int(vote_count or 0) + except Exception: + R, v = 0.0, 0 + return ((v / (v + m)) * R) + ((m / (v + m)) * C) + + @staticmethod + def normalize(value: float, min_v: float = 0.0, max_v: float = 10.0) -> float: + """Normalize score to 0-1 range.""" + if max_v == min_v: + return 0.0 + return max(0.0, min(1.0, (value - min_v) / (max_v - min_v))) + + @staticmethod + def stable_epsilon(tmdb_id: int, seed: str) -> float: + """Generate a stable tiny epsilon to break ties deterministically.""" + if not seed: + return 0.0 + h = hashlib.md5(f"{seed}:{tmdb_id}".encode()).hexdigest() + eps = int(h[-6:], 16) % 1000 + return eps / 1_000_000.0 + + @staticmethod + def get_recency_multiplier_fn( + profile: Any, candidate_decades: set[int] | None = None + ) -> tuple[Callable[[int | None], float], float]: + """ + Build a multiplier function m(year) based on user's decade preferences. + """ + try: + years_map = getattr(profile.years, "values", {}) or {} + decade_weights = {int(k): float(v) for k, v in years_map.items() if isinstance(k, int)} + total_w = sum(decade_weights.values()) + except Exception: + decade_weights = {} + total_w = 0.0 + + recent_w = sum(w for d, w in decade_weights.items() if d >= 2010) + classic_w = sum(w for d, w in decade_weights.items() if d < 2000) + total_rc = recent_w + classic_w + + if total_rc <= 0: + return (lambda _y: 1.0), 0.0 + + score = (recent_w - classic_w) / (total_rc + 1e-6) + k = 2.0 + intensity_raw = 1.0 / (1.0 + math.exp(-k * score)) + intensity = 2.0 * (intensity_raw - 0.5) # [-1, 1] + alpha = abs(intensity) + + if candidate_decades: + support = {int(d) for d in candidate_decades if isinstance(d, int)} | set(decade_weights.keys()) + else: + support = set(decade_weights.keys()) + + if not support: + return (lambda _y: 1.0), 0.0 + + if total_w > 0: + p_user = {d: (decade_weights.get(d, 0.0) / total_w) for d in support} + else: + p_user = {d: 0.0 for d in support} + + D = max(1, len(support)) + uniform = 1.0 / D + + def m_raw(year: int | None) -> float: + if year is None: + return 1.0 + decade = (int(year) // 10) * 10 + pu = p_user.get(decade, 0.0) + return 1.0 + intensity * (pu - uniform) + + return m_raw, alpha + + @staticmethod + def apply_quality_adjustments(score: float, wr: float, vote_count: int, is_ranked: bool, is_fresh: bool) -> float: + """Apply multiplicative adjustments based on item quality and source.""" + q_mult = 1.0 + if vote_count < 50: + q_mult *= 0.6 + elif vote_count < 150: + q_mult *= 0.85 + + if wr < 5.5: + q_mult *= 0.5 + elif wr < 6.0: + q_mult *= 0.7 + elif wr >= 7.0 and vote_count >= 500: + q_mult *= 1.10 + + if is_ranked: + if wr >= 6.5 and vote_count >= 200: + q_mult *= 1.25 + elif wr >= 6.0 and vote_count >= 100: + q_mult *= 1.10 + + if is_fresh and wr >= 7.0 and vote_count >= 300: + q_mult *= 1.10 + + return score * q_mult diff --git a/app/services/recommendation_service.py b/app/services/recommendation_service.py deleted file mode 100644 index e032de6..0000000 --- a/app/services/recommendation_service.py +++ /dev/null @@ -1,1291 +0,0 @@ -import asyncio -import hashlib -import math -from urllib.parse import unquote - -from loguru import logger - -from app.core.settings import UserSettings -from app.services.discovery import DiscoveryEngine -from app.services.rpdb import RPDBService -from app.services.scoring import ScoringService -from app.services.stremio_service import StremioService -from app.services.tmdb_service import get_tmdb_service -from app.services.user_profile import TOP_GENRE_WHITELIST_LIMIT, UserProfileService - -# Diversification: cap per-genre share in final results (e.g., 0.4 => max 40% per genre) -PER_GENRE_MAX_SHARE = 0.4 - - -def _parse_identifier(identifier: str) -> tuple[str | None, int | None]: - """Parse Stremio identifier to extract IMDB ID and TMDB ID.""" - if not identifier: - return None, None - - decoded = unquote(identifier) - imdb_id: str | None = None - tmdb_id: int | None = None - - for token in decoded.split(","): - token = token.strip() - if not token: - continue - if token.startswith("tt") and imdb_id is None: - imdb_id = token - elif token.startswith("tmdb:") and tmdb_id is None: - try: - tmdb_id = int(token.split(":", 1)[1]) - except (ValueError, IndexError): - continue - if imdb_id and tmdb_id is not None: - break - - return imdb_id, tmdb_id - - -class RecommendationService: - """ - Service for generating recommendations based on user's Stremio library. - Implements a Hybrid Recommendation System (Similarity + Discovery). - """ - - def __init__( - self, - stremio_service: StremioService | None = None, - language: str = "en-US", - user_settings: UserSettings | None = None, - token: str | None = None, - library_data: dict | None = None, - ): - if stremio_service is None: - raise ValueError("StremioService instance is required for personalized recommendations") - self.tmdb_service = get_tmdb_service(language=language) - self.stremio_service = stremio_service - self.scoring_service = ScoringService() - self.user_profile_service = UserProfileService(language=language) - self.discovery_engine = DiscoveryEngine(language=language) - self.per_item_limit = 20 - self.user_settings = user_settings - # Stable seed for tie-breaking and per-token caching - self.stable_seed = token or "" - # Optional pre-fetched library payload (reuse within the request) - self._library_data: dict | None = library_data - # cache: content_type -> set of top genre IDs - self._whitelist_cache: dict[str, set[int]] = {} - - def _stable_epsilon(self, tmdb_id: int) -> float: - if not self.stable_seed: - return 0.0 - h = hashlib.md5(f"{self.stable_seed}:{tmdb_id}".encode()).hexdigest() - # Use last 6 hex digits for tiny epsilon - eps = int(h[-6:], 16) % 1000 - return eps / 1_000_000.0 - - @staticmethod - def _normalize(value: float, min_v: float = 0.0, max_v: float = 10.0) -> float: - if max_v == min_v: - return 0.0 - return max(0.0, min(1.0, (value - min_v) / (max_v - min_v))) - - @staticmethod - def _weighted_rating(vote_avg: float | None, vote_count: int | None, C: float = 6.8, m: int = 300) -> float: - """ - IMDb-style weighted rating. Returns value on 0-10 scale. - C = global mean; m = minimum votes for full weight. - """ - try: - R = float(vote_avg or 0.0) - v = int(vote_count or 0) - except Exception: - R, v = 0.0, 0 - return ((v / (v + m)) * R) + ((m / (v + m)) * C) - - # ---------------- Recency preference (AUTO, sigmoid intensity) ---------------- - def _get_recency_multiplier_fn(self, profile, candidate_decades: set[int] | None = None): - """ - Build a multiplier function m(year) using a sigmoid-scaled intensity of the user's - recent vs classic preference derived from profile.years. - - Compute score in [-1,1] from recent (>=2015) vs classic (<2000) weights - - intensity = 2*(sigmoid(k*score)-0.5) in [-1,1] - - Apply per-year-bin deltas scaled by intensity, clamped to [0.85, 1.15] - """ - try: - years_map = getattr(profile.years, "values", {}) or {} - # Build user decade weights (keys are decades like 1990, 2000, ...) - decade_weights = {int(k): float(v) for k, v in years_map.items() if isinstance(k, int)} - total_w = sum(decade_weights.values()) - except Exception: - decade_weights = {} - total_w = 0.0 - - # Recent vs classic signal for intensity - recent_w = sum(w for d, w in decade_weights.items() if d >= 2010) - classic_w = sum(w for d, w in decade_weights.items() if d < 2000) - total_rc = recent_w + classic_w - if total_rc <= 0: - # No signal → neutral function with zero intensity - return (lambda _y: 1.0), 0.0 - - score = (recent_w - classic_w) / (total_rc + 1e-6) - k = 2.0 - intensity_raw = 1.0 / (1.0 + math.exp(-k * score)) - intensity = 2.0 * (intensity_raw - 0.5) # [-1, 1] - alpha = abs(intensity) - - # Build p_user over the support set of decades (union of profile and candidate decades) - if candidate_decades: - support = {int(d) for d in candidate_decades if isinstance(d, int)} | set(decade_weights.keys()) - else: - support = set(decade_weights.keys()) - if not support: - return (lambda _y: 1.0), 0.0 - - # Normalize user distribution over support (zero for unseen decades) - # If total_w is zero, return neutral - if total_w > 0: - p_user = {d: (decade_weights.get(d, 0.0) / total_w) for d in support} - else: - p_user = {d: 0.0 for d in support} - D = max(1, len(support)) - uniform = 1.0 / D - - def m_raw(year: int | None) -> float: - if year is None: - return 1.0 - try: - y = int(year) - except Exception: - return 1.0 - decade = (y // 10) * 10 - pu = p_user.get(decade, 0.0) - return 1.0 + intensity * (pu - uniform) - - return m_raw, alpha - - @staticmethod - def _extract_year_from_item(item: dict) -> int | None: - """Extract year from a TMDB item dict (raw or enriched).""" - date_str = item.get("release_date") or item.get("first_air_date") - if not date_str: - ri = item.get("releaseInfo") - if isinstance(ri, str) and len(ri) >= 4 and ri[:4].isdigit(): - try: - return int(ri[:4]) - except Exception: - return None - return None - try: - return int(date_str[:4]) - except Exception: - return None - - @staticmethod - def _recency_multiplier(year: int | None) -> float: - """Prefer recent titles. Softly dampen very old titles.""" - if not year: - return 1.0 - try: - y = int(year) - except Exception: - return 1.0 - if y >= 2021: - return 1.12 - if y >= 2015: - return 1.06 - if y >= 2010: - return 1.00 - if y >= 2000: - return 0.92 - if y >= 1990: - return 0.82 - return 0.70 - - async def _get_exclusion_sets(self, content_type: str | None = None) -> tuple[set[str], set[int]]: - """ - Fetch library items and build strict exclusion sets for watched content. - Excludes watched and loved items (and items user explicitly removed). - Note: We no longer exclude 'added' items to avoid over-thinning the pool. - Returns (watched_imdb_ids, watched_tmdb_ids) - """ - # Use cached/pre-fetched library data when available - if self._library_data is None: - self._library_data = await self.stremio_service.get_library_items() - library_data = self._library_data - # Combine loved, watched, added, and removed (added/removed treated as exclude-only) - all_items = library_data.get("loved", []) + library_data.get("watched", []) + library_data.get("removed", []) - - imdb_ids = set() - tmdb_ids = set() - - for item in all_items: - # Optional: filter by type if provided, but safer to exclude all types to avoid cross-contamination - # if content_type and item.get("type") != content_type: continue - - item_id = item.get("_id", "") - imdb_id, tmdb_id = _parse_identifier(item_id) - - if imdb_id: - imdb_ids.add(imdb_id) - if tmdb_id: - tmdb_ids.add(tmdb_id) - - # Also handle raw IDs if parse failed but it looks like one - if item_id.startswith("tt"): - imdb_ids.add(item_id) - elif item_id.startswith("tmdb:"): - try: - tmdb_ids.add(int(item_id.split(":")[1])) - except Exception: - pass - - return imdb_ids, tmdb_ids - - async def _get_top_genre_whitelist(self, content_type: str) -> set[int]: - """Compute and cache user's top-genre whitelist for the given content type.""" - if content_type in self._whitelist_cache: - return self._whitelist_cache[content_type] - - try: - if self._library_data is None: - self._library_data = await self.stremio_service.get_library_items() - all_items = ( - self._library_data.get("loved", []) - + self._library_data.get("watched", []) - + self._library_data.get("added", []) - ) - typed = [ - it - for it in all_items - if it.get("type") == content_type or (content_type in ("tv", "series") and it.get("type") == "series") - ] - unique_items = {it["_id"]: it for it in typed} - scored_objects = [] - sorted_history = sorted( - unique_items.values(), key=lambda x: x.get("state", {}).get("lastWatched"), reverse=True - ) - for it in sorted_history[:10]: - scored_objects.append(self.scoring_service.process_item(it)) - # UserProfileService expects 'movie' or 'series' - prof_content_type = "series" if content_type in ("tv", "series") else "movie" - user_profile = await self.user_profile_service.build_user_profile( - scored_objects, content_type=prof_content_type - ) - top_gen_pairs = user_profile.get_top_genres(limit=TOP_GENRE_WHITELIST_LIMIT) - whitelist = {int(gid) for gid, _ in top_gen_pairs} - except Exception: - whitelist = set() - - self._whitelist_cache[content_type] = whitelist - return whitelist - - async def _passes_top_genre(self, genre_ids: list[int] | None, content_type: str) -> bool: - whitelist = await self._get_top_genre_whitelist(content_type) - if not whitelist: - return True - gids = set(genre_ids or []) - if not gids: - return True - if 16 in gids and 16 not in whitelist: - return False - return bool(gids & whitelist) - - async def _inject_freshness( - self, - pool: list[dict], - media_type: str, - watched_tmdb: set[int], - excluded_ids: set[int], - cap_injection: int, - target_capacity: int, - ) -> list[dict]: - try: - mtype = "tv" if media_type in ("tv", "series") else "movie" - trending_resp = await self.tmdb_service.get_trending(mtype, time_window="week") - trending = trending_resp.get("results", []) if trending_resp else [] - top_rated_resp = await self.tmdb_service.get_top_rated(mtype) - top_rated = top_rated_resp.get("results", []) if top_rated_resp else [] - fresh_pool = [] - fresh_pool.extend(trending[:40]) - fresh_pool.extend(top_rated[:40]) - - from collections import defaultdict - - existing_ids = {it.get("id") for it in pool if it.get("id") is not None} - fresh_genre_counts = defaultdict(int) - fresh_added = 0 - for it in fresh_pool: - tid = it.get("id") - if not tid or tid in existing_ids or tid in watched_tmdb: - continue - gids = it.get("genre_ids") or [] - if excluded_ids and excluded_ids.intersection(set(gids)): - continue - if not await self._passes_top_genre(gids, media_type): - continue - if gids and any(fresh_genre_counts[g] >= cap_injection for g in gids): - continue - va = float(it.get("vote_average") or 0.0) - vc = int(it.get("vote_count") or 0) - if vc < 300 or va < 7.0: - continue - pool.append(it) - existing_ids.add(tid) - for g in gids: - fresh_genre_counts[g] += 1 - fresh_added += 1 - if len(pool) >= target_capacity: - break - if fresh_added: - logger.info(f"Freshness injection added {fresh_added} items") - except Exception as e: - logger.warning(f"Freshness injection failed: {e}") - return pool - - async def _filter_candidates( - self, candidates: list[dict], watched_imdb_ids: set[str], watched_tmdb_ids: set[int] - ) -> list[dict]: - """ - Filter candidates against watched sets using TMDB ID first, then IMDB ID (if available). - """ - filtered = [] - for item in candidates: - tmdb_id = item.get("id") - # 1. Check TMDB ID (Fast) - if tmdb_id and tmdb_id in watched_tmdb_ids: - continue - - # 2. Check external IDs (if present in candidate) - external_ids = item.get("external_ids", {}) - imdb_id = external_ids.get("imdb_id") - if imdb_id and imdb_id in watched_imdb_ids: - continue - - filtered.append(item) - return filtered - - async def _fetch_metadata_for_items( - self, items: list[dict], media_type: str, target_count: int | None = None, batch_size: int = 20 - ) -> list[dict]: - """ - Fetch detailed metadata for items directly from TMDB API and format for Stremio. - """ - final_results = [] - # Ensure media_type is correct - query_media_type = "movie" if media_type == "movie" else "tv" - - sem = asyncio.Semaphore(30) - - async def _fetch_details(tmdb_id: int): - try: - async with sem: - if query_media_type == "movie": - return await self.tmdb_service.get_movie_details(tmdb_id) - else: - return await self.tmdb_service.get_tv_details(tmdb_id) - except Exception as e: - logger.warning(f"Failed to fetch details for TMDB ID {tmdb_id}: {e}") - return None - - # Filter out items without ID and process in batches for early stop - valid_items = [item for item in items if item.get("id")] - if not valid_items: - return [] - - # Decide target_count if not provided - if target_count is None: - # Aim to collect up to 2x of typical need but not exceed total - target_count = min(len(valid_items), 40) - - for i in range(0, len(valid_items), batch_size): - if len(final_results) >= target_count: - break - chunk = valid_items[i : i + batch_size] # noqa - tasks = [_fetch_details(item["id"]) for item in chunk] - details_results = await asyncio.gather(*tasks) - for details in details_results: - if not details: - continue - - # Extract IMDB ID from external_ids - external_ids = details.get("external_ids", {}) - imdb_id = external_ids.get("imdb_id") - - # Prefer IMDB ID, fallback to TMDB ID (as stremio:tmdb:) to avoid losing candidates - if imdb_id: - stremio_id = imdb_id - else: - tmdb_fallback = details.get("id") - if tmdb_fallback: - stremio_id = f"tmdb:{tmdb_fallback}" - else: - continue - - # Construct Stremio meta object - title = details.get("title") or details.get("name") - if not title: - continue - - # Image paths - poster_path = details.get("poster_path") - backdrop_path = details.get("backdrop_path") - - release_date = details.get("release_date") or details.get("first_air_date") or "" - year = release_date[:4] if release_date else None - - if self.user_settings and self.user_settings.rpdb_key: - poster_url = RPDBService.get_poster_url(self.user_settings.rpdb_key, stremio_id) - else: - poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}" if poster_path else None - - genres_full = details.get("genres", []) or [] - genre_ids = [g.get("id") for g in genres_full if isinstance(g, dict) and g.get("id") is not None] - - meta_data = { - "id": stremio_id, - "imdb_id": imdb_id, - "type": "series" if media_type in ["tv", "series"] else "movie", - "name": title, - "poster": poster_url, - "background": f"https://image.tmdb.org/t/p/original{backdrop_path}" if backdrop_path else None, - "description": details.get("overview"), - "releaseInfo": year, - "imdbRating": str(details.get("vote_average", "")), - # Display genres (names) but keep full ids separately - "genres": [g.get("name") for g in genres_full], - # Keep fields for ranking and post-processing - "vote_average": details.get("vote_average"), - "vote_count": details.get("vote_count"), - "popularity": details.get("popularity"), - "original_language": details.get("original_language"), - # pass internal external_ids for post-filtering if needed - "_external_ids": external_ids, - # internal fields for suppression/rerank - "_tmdb_id": details.get("id"), - "genre_ids": genre_ids, - } - - # Add runtime if available (Movie) or episode run time (TV) - runtime = details.get("runtime") - if not runtime and details.get("episode_run_time"): - runtime = details.get("episode_run_time")[0] - - if runtime: - meta_data["runtime"] = f"{runtime} min" - - # internal fields for collection and cast (movies only for collection) - if query_media_type == "movie": - coll = details.get("belongs_to_collection") or {} - if isinstance(coll, dict): - meta_data["_collection_id"] = coll.get("id") - - # top 3 cast ids - cast = details.get("credits", {}).get("cast", []) or [] - meta_data["_top_cast_ids"] = [c.get("id") for c in cast[:3] if c.get("id") is not None] - - # Attach minimal structures for similarity to use keywords/credits later - if details.get("keywords"): - meta_data["keywords"] = details.get("keywords") - if details.get("credits"): - meta_data["credits"] = details.get("credits") - - final_results.append(meta_data) - - if len(final_results) >= target_count: - break - - return final_results - - async def get_recommendations_for_item(self, item_id: str) -> list[dict]: - """ - Get recommendations for a specific item by IMDB ID. - STRICT FILTERING: Excludes watched items. - """ - # Fetch Exclusion Sets first - watched_imdb, watched_tmdb = await self._get_exclusion_sets() - - # Ensure the source item itself is excluded - if item_id.startswith("tt"): - watched_imdb.add(item_id) - elif item_id.startswith("tmdb:"): - watched_tmdb.add(int(item_id.split(":")[1])) - - # Convert IMDB ID to TMDB ID - if item_id.startswith("tt"): - tmdb_id, media_type = await self.tmdb_service.find_by_imdb_id(item_id) - if not tmdb_id: - logger.warning(f"No TMDB ID found for {item_id}") - return [] - else: - tmdb_id = item_id.split(":")[1] - media_type = "movie" # Default - - if not media_type: - media_type = "movie" - - # Build top-genre whitelist for this type - _whitelist = await self._get_top_genre_whitelist(media_type) - - def _passes_top_genre(item_genre_ids: list[int] | None) -> bool: - if not _whitelist: - return True - gids = set(item_genre_ids or []) - if not gids: - return True - if 16 in gids and 16 not in _whitelist: - return False - return bool(gids & _whitelist) - - # Fetch more candidates to account for filtering - # We want 20 final, so fetch 40 - buffer_limit = self.per_item_limit * 2 - recommendations = await self._fetch_recommendations_from_tmdb(str(tmdb_id), media_type, buffer_limit) - - if not recommendations: - return [] - - # 1. Filter by TMDB ID - recommendations = await self._filter_candidates(recommendations, watched_imdb, watched_tmdb) - - # 1.5 Filter by Excluded Genres - # We need to detect content_type from item_id or media_type to know which exclusion list to use. - # media_type is already resolved above. - excluded_ids = set(self._get_excluded_genre_ids(media_type)) - - if excluded_ids: - recommendations = [ - item for item in recommendations if not excluded_ids.intersection(item.get("genre_ids") or []) - ] - # Top-genre whitelist filter - recommendations = [it for it in recommendations if _passes_top_genre(it.get("genre_ids"))] - - # 1.6 Freshness: inject trending/top-rated within whitelist to expand pool - if len(recommendations) < buffer_limit: - recommendations = await self._inject_freshness( - recommendations, - media_type, - watched_tmdb, - excluded_ids, - max(1, int(self.per_item_limit * PER_GENRE_MAX_SHARE)), - buffer_limit, - ) - - # 2. Fetch Metadata (gets IMDB IDs) - meta_items = await self._fetch_metadata_for_items( - recommendations, media_type, target_count=self.per_item_limit * 2 - ) - - # 3. Strict Filter by IMDB ID (using metadata) - final_items = [] - for item in meta_items: - # check ID (stremio_id) which is usually imdb_id - if item["id"] in watched_imdb: - continue - # check hidden external_ids if available - ext_ids = item.get("_external_ids", {}) - if ext_ids.get("imdb_id") in watched_imdb: - continue - # Apply top-genre whitelist with enriched genre_ids - if not _passes_top_genre(item.get("genre_ids")): - continue - - # Clean up internal fields - item.pop("_external_ids", None) - final_items.append(item) - - if len(final_items) >= self.per_item_limit: - break - - logger.info(f"Found {len(final_items)} valid recommendations for {item_id}") - return final_items - - def _get_excluded_genre_ids(self, content_type: str) -> list[int]: - if not self.user_settings: - return [] - if content_type == "movie": - return [int(g) for g in self.user_settings.excluded_movie_genres] - elif content_type in ["series", "tv"]: - return [int(g) for g in self.user_settings.excluded_series_genres] - return [] - - async def get_recommendations_for_theme(self, theme_id: str, content_type: str, limit: int = 20) -> list[dict]: - """ - Parse a dynamic theme ID and fetch recommendations. - Format: watchly.theme.g[-].k[-].ct... - """ - # Parse params from ID - params = {} - parts = theme_id.replace("watchly.theme.", "").split(".") - - for part in parts: - if part.startswith("g"): - # Genres: g878-53 -> 878,53 - genre_str = part[1:].replace("-", ",") - params["with_genres"] = genre_str.replace(",", "|") - elif part.startswith("k"): - # Keywords: k123-456 - kw_str = part[1:].replace("-", "|") - params["with_keywords"] = kw_str - elif part.startswith("ct"): - # Country: ctUS - params["with_origin_country"] = part[2:] - elif part.startswith("y"): - # Year/Decade: y1990 -> 1990-01-01 to 1999-12-31 - try: - year = int(part[1:]) - params["primary_release_date.gte"] = f"{year}-01-01" - params["primary_release_date.lte"] = f"{year+9}-12-31" - except ValueError: - pass - elif part == "sort-vote": - params["sort_by"] = "vote_average.desc" - params["vote_count.gte"] = 200 - - # Default Sort - if "sort_by" not in params: - params["sort_by"] = "popularity.desc" - - # Apply Excluded Genres but don't conflict with explicit with_genres from theme - excluded_ids = self._get_excluded_genre_ids(content_type) - if excluded_ids: - try: - with_ids = { - int(g) - for g in ( - params.get("with_genres", "").replace("|", ",").split(",") if params.get("with_genres") else [] - ) - if g - } - except Exception: - with_ids = set() - final_without = [g for g in excluded_ids if g not in with_ids] - if final_without: - params["without_genres"] = "|".join(str(g) for g in final_without) - - # Build whitelist via helper - _whitelist = await self._get_top_genre_whitelist(content_type) - - def _passes_top_genre(item_genre_ids: list[int] | None) -> bool: - if not _whitelist: - return True - gids = set(item_genre_ids or []) - if not gids: - return True - if 16 in gids and 16 not in _whitelist: - return False - return bool(gids & _whitelist) - - # Fetch (with simple multi-page fallback to increase pool) - candidates: list[dict] = [] - try: - first = await self.tmdb_service.get_discover(content_type, **params) - candidates.extend(first.get("results", [])) - # If we have too few, try page 2 (and 3) to increase pool size - if len(candidates) < limit * 2: - second = await self.tmdb_service.get_discover(content_type, page=2, **params) - candidates.extend(second.get("results", [])) - if len(candidates) < limit * 2: - third = await self.tmdb_service.get_discover(content_type, page=3, **params) - candidates.extend(third.get("results", [])) - except Exception: - candidates = [] - - # Apply top-genre whitelist on raw candidates - if candidates: - candidates = [it for it in candidates if _passes_top_genre(it.get("genre_ids"))] - - # Strict Filtering - watched_imdb, watched_tmdb = await self._get_exclusion_sets() - filtered = await self._filter_candidates(candidates, watched_imdb, watched_tmdb) - - # Freshness injection: add trending/popular/top-rated (within whitelist) if pool thin - if len(filtered) < limit * 2: - filtered = await self._inject_freshness( - filtered, - content_type, - watched_tmdb, - set(excluded_ids), - max(1, int(limit * PER_GENRE_MAX_SHARE)), - limit * 3, - ) - - # Meta - meta_items = await self._fetch_metadata_for_items(filtered, content_type, target_count=limit * 3) - - final_items = [] - for item in meta_items: - if item["id"] in watched_imdb: - continue - if item.get("_external_ids", {}).get("imdb_id") in watched_imdb: - continue - # Apply whitelist again on enriched metadata - if not _passes_top_genre(item.get("genre_ids")): - continue - item.pop("_external_ids", None) - final_items.append(item) - - # Enforce limit - if len(final_items) > limit: - final_items = final_items[:limit] - - return final_items - - async def _fetch_recommendations_from_tmdb(self, item_id: str, media_type: str, limit: int) -> list[dict]: - """ - Fetch recommendations from TMDB for a given TMDB ID. - """ - if isinstance(item_id, int): - item_id = str(item_id) - - if item_id.startswith("tt"): - tmdb_id, detected_type = await self.tmdb_service.find_by_imdb_id(item_id) - if not tmdb_id: - return [] - if detected_type: - media_type = detected_type - elif item_id.startswith("tmdb:"): - tmdb_id = int(item_id.split(":")[1]) - # Detect media_type if unknown or invalid - if media_type not in ("movie", "tv", "series"): - detected_type = None - try: - details = await self.tmdb_service.get_movie_details(tmdb_id) - if details: - detected_type = "movie" - except Exception: - pass - if not detected_type: - try: - details = await self.tmdb_service.get_tv_details(tmdb_id) - if details: - detected_type = "tv" - except Exception: - pass - if detected_type: - media_type = detected_type - else: - tmdb_id = item_id - - # Normalize series alias - mtype = "tv" if media_type in ("tv", "series") else "movie" - # Try multiple pages to increase pool - combined: dict[int, dict] = {} - try: - rec1 = await self.tmdb_service.get_recommendations(tmdb_id, mtype, page=1) - for it in rec1.get("results", []): - if it.get("id") is not None: - combined[it["id"]] = it - if len(combined) < limit: - rec2 = await self.tmdb_service.get_recommendations(tmdb_id, mtype, page=2) - for it in rec2.get("results", []): - if it.get("id") is not None: - combined[it["id"]] = it - if len(combined) < limit: - rec3 = await self.tmdb_service.get_recommendations(tmdb_id, mtype, page=3) - for it in rec3.get("results", []): - if it.get("id") is not None: - combined[it["id"]] = it - except Exception: - pass - - # If still thin, use similar as fallback - if len(combined) < max(20, limit // 2): - try: - sim1 = await self.tmdb_service.get_similar(tmdb_id, mtype, page=1) - for it in sim1.get("results", []): - if it.get("id") is not None: - combined[it["id"]] = it - if len(combined) < limit: - sim2 = await self.tmdb_service.get_similar(tmdb_id, mtype, page=2) - for it in sim2.get("results", []): - if it.get("id") is not None: - combined[it["id"]] = it - except Exception: - pass - - return list(combined.values()) - - async def get_recommendations( - self, - content_type: str | None = None, - source_items_limit: int = 5, - max_results: int = 20, - ) -> list[dict]: - """ - Get Smart Hybrid Recommendations. - """ - if not content_type: - logger.warning("content_type must be specified (movie or series)") - return [] - - logger.info(f"Starting Hybrid Recommendation Pipeline for {content_type}") - - # Step 1: Fetch & Score User Library - if self._library_data is None: - self._library_data = await self.stremio_service.get_library_items() - library_data = self._library_data - all_items = library_data.get("loved", []) + library_data.get("watched", []) + library_data.get("added", []) - logger.info(f"processing {len(all_items)} Items.") - # Cold-start fallback remains (redundant safety) - if not all_items: - all_items = library_data.get("added", []) - - # Build Exclusion Sets explicitly - watched_imdb_ids, watched_tmdb_ids = await self._get_exclusion_sets() - - # Deduplicate and Filter by Type - unique_items = {item["_id"]: item for item in all_items if item.get("type") == content_type} - processed_items = [] - scored_objects = [] - - sorted_history = sorted( - unique_items.values(), key=lambda x: x.get("state", {}).get("lastWatched"), reverse=True - ) - recent_history = sorted_history[:source_items_limit] - - for item_data in recent_history: - scored_obj = self.scoring_service.process_item(item_data) - scored_objects.append(scored_obj) - item_data["_interest_score"] = scored_obj.score - processed_items.append(item_data) - - processed_items.sort(key=lambda x: x["_interest_score"], reverse=True) - top_source_items = processed_items[:source_items_limit] - - # --- Candidate Set A: Item-based Similarity --- - tasks_a = [] - for source in top_source_items: - tasks_a.append(self._fetch_recommendations_from_tmdb(source.get("_id"), source.get("type"), limit=10)) - similarity_candidates = [] - similarity_recommendations = await asyncio.gather(*tasks_a, return_exceptions=True) - - excluded_ids = set(self._get_excluded_genre_ids(content_type)) - - similarity_recommendations = [item for item in similarity_recommendations if not isinstance(item, Exception)] - # Apply excluded-genre filter for similarity candidates (whitelist will be applied after profile build) - for batch in similarity_recommendations: - for item in batch: - gids = item.get("genre_ids") or [] - if excluded_ids.intersection(gids): - continue - similarity_candidates.append(item) - - # Quality gate for similarity candidates: keep higher-quality when we have enough - def _qual(item: dict) -> bool: - try: - vc = int(item.get("vote_count") or 0) - va = float(item.get("vote_average") or 0.0) - wr = self._weighted_rating(va, vc) - return (vc >= 150 and wr >= 6.0) or (vc >= 500 and wr >= 5.6) - except Exception: - return False - - # filtered_sim = [it for it in similarity_candidates if _qual(it)] - # if len(filtered_sim) >= 40: - # similarity_candidates = filtered_sim - - # --- Candidate Set B: Profile-based Discovery --- - # Extract excluded genres - excluded_genres = list(excluded_ids) # Convert back to list for consistency - - # Use typed profile based on content_type - user_profile = await self.user_profile_service.build_user_profile( - scored_objects, content_type=content_type, excluded_genres=excluded_genres - ) - # AUTO recency preference function based on profile years - # recency_fn = self._get_recency_multiplier_fn(user_profile) - # Build per-user top-genre whitelist - try: - top_gen_pairs = user_profile.get_top_genres(limit=TOP_GENRE_WHITELIST_LIMIT) - top_genre_whitelist: set[int] = {int(gid) for gid, _ in top_gen_pairs} - except Exception: - top_genre_whitelist = set() - - def _passes_top_genre(item_genre_ids: list[int] | None) -> bool: - if not top_genre_whitelist: - return True - gids = set(item_genre_ids or []) - if not gids: - return True - if 16 in gids and 16 not in top_genre_whitelist: - return False - return bool(gids & top_genre_whitelist) - - # Always include discovery, but bias to keywords/cast (avoid genre-heavy discovery) - try: - discovery_candidates = await self.discovery_engine.discover_recommendations( - user_profile, - content_type, - limit=max_results * 3, - excluded_genres=excluded_genres, - use_genres=False, - use_keywords=True, - use_cast=True, - use_director=True, - use_countries=False, - use_year=False, - ) - except Exception as e: - logger.warning(f"Discovery fetch failed: {e}") - discovery_candidates = [] - - # --- Combine & Deduplicate --- - candidate_pool = {} # tmdb_id -> item_dict - - for item in discovery_candidates: - gids = item.get("genre_ids") or [] - if not _passes_top_genre(gids): - continue - candidate_pool[item["id"]] = item - - for item in similarity_candidates: - # add score to boost similarity candidates - item["_ranked_candidate"] = True - candidate_pool[item["id"]] = item - - logger.info(f"Similarity candidates collected: {len(similarity_candidates)}; pool size: {len(candidate_pool)}") - - # Build recency blend function (m_raw, alpha) based on profile and candidate decades - try: - candidate_decades = set() - for it in candidate_pool.values(): - y = self._extract_year_from_item(it) - if y: - candidate_decades.add((int(y) // 10) * 10) - recency_m_raw, recency_alpha = self._get_recency_multiplier_fn(user_profile, candidate_decades) - except Exception: - recency_m_raw, recency_alpha = (lambda _y: 1.0), 0.0 - - # Freshness injection: trending/highly rated items to broaden taste - try: - fresh_added = 0 - from collections import defaultdict - - fresh_genre_counts = defaultdict(int) - cap_injection = max(1, int(max_results * PER_GENRE_MAX_SHARE)) - mtype = "tv" if content_type in ("tv", "series") else "movie" - trending_resp = await self.tmdb_service.get_trending(mtype, time_window="week") - trending = trending_resp.get("results", []) if trending_resp else [] - # Mix in top-rated - top_rated_resp = await self.tmdb_service.get_top_rated(mtype) - top_rated = top_rated_resp.get("results", []) if top_rated_resp else [] - fresh_pool = [] - fresh_pool.extend(trending[:40]) - fresh_pool.extend(top_rated[:40]) - # Filter by excluded genres and quality threshold - for it in fresh_pool: - tid = it.get("id") - if not tid or tid in candidate_pool: - continue - # Exclude already watched by TMDB id - if tid in watched_tmdb_ids: - continue - # Excluded genres - gids = it.get("genre_ids") or [] - if excluded_ids and excluded_ids.intersection(set(gids)): - continue - # Respect top-genre whitelist - if not _passes_top_genre(gids): - continue - # Quality: prefer strong audience signal - va = float(it.get("vote_average") or 0.0) - vc = int(it.get("vote_count") or 0) - if vc < 300 or va < 7.0: - continue - # Genre diversity inside freshness injection - if gids and any(fresh_genre_counts[g] >= cap_injection for g in gids): - continue - # Mark as freshness candidate - it["_fresh_boost"] = True - candidate_pool[tid] = it - for g in gids: - fresh_genre_counts[g] += 1 - fresh_added += 1 - if fresh_added >= max_results * 2: - break - if fresh_added: - logger.info(f"Freshness injection added {fresh_added} trending/top-rated candidates") - except Exception as e: - logger.warning(f"Freshness injection failed: {e}") - - # --- Re-Ranking & Filtering --- - ranked_candidates = [] - - for tmdb_id, item in candidate_pool.items(): - # 1. Strict Filter by TMDB ID - if tmdb_id in watched_tmdb_ids or f"tmdb:{tmdb_id}" in watched_imdb_ids: - continue - - # Use simple overlap similarity (Jaccard on tokens/genres/keywords) - try: - sim_score, sim_breakdown = self.user_profile_service.calculate_simple_overlap_with_breakdown( - user_profile, item - ) - except Exception: - sim_score = 0.0 - sim_breakdown = {} - # attach breakdown to item for later inspection - item["_sim_breakdown"] = sim_breakdown - - # If we only matched on genres (topics/keywords near zero), slightly penalize - try: - non_gen_relevance = float(sim_breakdown.get("topics_jaccard", 0.0)) + float( - sim_breakdown.get("keywords_jaccard", 0.0) - ) - if non_gen_relevance <= 0.0001: - sim_score *= 0.8 - item["_sim_penalty"] = True - item["_sim_penalty_reason"] = "genre_only_match" - except Exception: - pass - vote_avg = item.get("vote_average", 0.0) - vote_count = item.get("vote_count", 0) - popularity = float(item.get("popularity", 0.0)) - - # Weighted rating then normalize to 0-1 - wr = self._weighted_rating(vote_avg, vote_count) - vote_score = self._normalize(wr, 0.0, 10.0) - pop_score = self._normalize(popularity, 0.0, 1000.0) - - # Increase weight on quality to avoid low-rated picks - final_score = (sim_score * 0.55) + (vote_score * 0.35) + (pop_score * 0.10) - # AUTO recency (blend): final *= (1 - alpha) + alpha * m_raw - try: - y = self._extract_year_from_item(item) - m = recency_m_raw(y) - final_score *= (1.0 - recency_alpha) + (recency_alpha * m) - except Exception: - pass - # Stable tiny epsilon to break ties deterministically - final_score += self._stable_epsilon(tmdb_id) - - # Quality-aware multiplicative adjustments - q_mult = 1.0 - if vote_count < 50: - q_mult *= 0.6 - elif vote_count < 150: - q_mult *= 0.85 - if wr < 5.5: - q_mult *= 0.5 - elif wr < 6.0: - q_mult *= 0.7 - elif wr >= 7.0 and vote_count >= 500: - q_mult *= 1.10 - - # Boost candidate if from TMDB collaborative recommendations, but only if quality is decent - if item.get("_ranked_candidate"): - if wr >= 6.5 and vote_count >= 200: - q_mult *= 1.25 - elif wr >= 6.0 and vote_count >= 100: - q_mult *= 1.10 - # else no boost - - # Mild boost for freshness-injected trending/top-rated picks to keep feed fresh - if item.get("_fresh_boost") and wr >= 7.0 and vote_count >= 300: - q_mult *= 1.10 - - final_score *= q_mult - ranked_candidates.append((final_score, item)) - - # Sort by Final Score and cache score on item for diversification - ranked_candidates.sort(key=lambda x: x[0], reverse=True) - for score, item in ranked_candidates: - item["_final_score"] = score - - # Lightweight logging: show top 5 ranked candidates with similarity breakdown - try: - top_n = ranked_candidates[:5] - if top_n: - logger.info("Top similarity-ranked candidates (pre-meta):") - for sc, it in top_n: - name = it.get("title") or it.get("name") or it.get("original_title") or it.get("id") - bd = it.get("_sim_breakdown") or {} - logger.info(f"- {name} (tmdb:{it.get('id')}): score={sc:.4f} breakdown={bd}") - except Exception: - pass - - # Simplified selection: take top-ranked items directly (no MMR diversification) - top_ranked_items = [item for _, item in ranked_candidates] - # Buffer selection size is 2x requested results to allow final filtering - buffer_selection = top_ranked_items[: max_results * 2] - - # Fetch Full Metadata - meta_items = await self._fetch_metadata_for_items(buffer_selection, content_type, target_count=max_results * 2) - - # Recompute similarity with enriched metadata (keywords, credits) - final_items = [] - used_collections: set[int] = set() - used_cast: set[int] = set() - for item in meta_items: - if item["id"] in watched_imdb_ids: - continue - ext_ids = item.get("_external_ids", {}) - if ext_ids.get("imdb_id") in watched_imdb_ids: - continue - # Apply top-genre whitelist again using enriched genre_ids if present - if not _passes_top_genre(item.get("genre_ids")): - continue - - try: - sim_score, sim_breakdown = self.user_profile_service.calculate_simple_overlap_with_breakdown( - user_profile, item - ) - except Exception: - sim_score = 0.0 - sim_breakdown = {} - item["_sim_breakdown"] = sim_breakdown - wr = self._weighted_rating(item.get("vote_average"), item.get("vote_count")) - vote_score = self._normalize(wr, 0.0, 10.0) - pop_score = self._normalize(float(item.get("popularity") or 0.0), 0.0, 1000.0) - - base = (sim_score * 0.55) + (vote_score * 0.35) + (pop_score * 0.10) - base += self._stable_epsilon(item.get("_tmdb_id") or 0) - - # Quality-aware adjustment - vc = int(item.get("vote_count") or 0) - q_mult = 1.0 - if vc < 50: - q_mult *= 0.6 - elif vc < 150: - q_mult *= 0.85 - if wr < 5.5: - q_mult *= 0.5 - elif wr < 6.0: - q_mult *= 0.7 - elif wr >= 7.0 and vc >= 500: - q_mult *= 1.10 - - # AUTO recency (blend) in post-metadata stage as well - try: - y = self._extract_year_from_item(item) - m = recency_m_raw(y) - q_mult *= (1.0 - recency_alpha) + (recency_alpha * m) - except Exception: - pass - - score = base * q_mult - - # Collection/cast suppression - penalty = 0.0 - coll_id = item.get("_collection_id") - if isinstance(coll_id, int) and coll_id in used_collections: - penalty += 0.05 - cast_ids = set(item.get("_top_cast_ids", []) or []) - overlap = len(cast_ids & used_cast) - if overlap: - penalty += min(0.03 * overlap, 0.09) - score *= 1.0 - penalty - item["_adjusted_score"] = score - final_items.append(item) - - # Sort by adjusted score descending - final_items.sort(key=lambda x: x.get("_adjusted_score", 0.0), reverse=True) - - # Diversified selection: per-genre cap AND proportional decade apportionment - from collections import defaultdict - - genre_take_counts = defaultdict(int) - cap_per_genre = max(1, int(max_results * PER_GENRE_MAX_SHARE)) - - # Build decade targets from user profile distribution over decades present in final_items - decades_in_results = [] - for it in final_items: - y = self._extract_year_from_item(it) - if y: - decades_in_results.append((int(y) // 10) * 10) - else: - decades_in_results.append(None) - - # User decade prefs - try: - years_map = getattr(user_profile.years, "values", {}) or {} - decade_weights = {int(k): float(v) for k, v in years_map.items() if isinstance(k, int)} - total_w = sum(decade_weights.values()) - except Exception: - decade_weights = {} - total_w = 0.0 - - support = {d for d in decades_in_results if d is not None} - if total_w > 0 and support: - p_user = {d: (decade_weights.get(d, 0.0) / total_w) for d in support} - # Normalize to sum 1 over support - s = sum(p_user.values()) - if s > 0: - for d in list(p_user.keys()): - p_user[d] = p_user[d] / s - else: - # fallback to uniform over support - p_user = {d: 1.0 / len(support) for d in support} - else: - # Neutral: uniform over decades present - p_user = {d: 1.0 / len(support) for d in support} if support else {} - - # Largest remainder apportionment - targets = defaultdict(int) - remainders = [] - slots = max_results - for d, p in p_user.items(): - tgt = p * slots - base = int(tgt) - targets[d] = base - remainders.append((tgt - base, d)) - assigned = sum(targets.values()) - remaining = max(0, slots - assigned) - if remaining > 0 and remainders: - remainders.sort(key=lambda x: x[0], reverse=True) - for _, d in remainders[:remaining]: - targets[d] += 1 - - # First pass: honor decade targets and genre caps - decade_counts = defaultdict(int) - diversified = [] - for it in final_items: - if len(diversified) >= max_results * 2: - break - gids = list(it.get("genre_ids") or []) - if gids and any(genre_take_counts[g] >= cap_per_genre for g in gids): - continue - y = self._extract_year_from_item(it) - d = (int(y) // 10) * 10 if y else None - if d is not None and d in targets and decade_counts[d] >= targets[d]: - continue - diversified.append(it) - for g in gids: - genre_take_counts[g] += 1 - if d is not None: - decade_counts[d] += 1 - - # Second pass: fill remaining up to max_results ignoring decade targets but keeping genre caps - if len(diversified) < max_results: - for it in final_items: - if it in diversified: - continue - if len(diversified) >= max_results * 2: - break - gids = list(it.get("genre_ids") or []) - if gids and any(genre_take_counts[g] >= cap_per_genre for g in gids): - continue - diversified.append(it) - for g in gids: - genre_take_counts[g] += 1 - - # Update used sets for next requests (implicit) and cleanup internal fields - ordered = [] - for it in diversified: - coll = it.pop("_collection_id", None) - if isinstance(coll, int): - used_collections.add(coll) - for cid in it.pop("_top_cast_ids", []) or []: - try: - used_cast.add(int(cid)) - except Exception: - pass - it.pop("_external_ids", None) - it.pop("_tmdb_id", None) - it.pop("_adjusted_score", None) - ordered.append(it) - - # Enforce max_results limit - if len(ordered) > max_results: - ordered = ordered[:max_results] - - return ordered diff --git a/app/services/row_generator.py b/app/services/row_generator.py index 706c040..0736fb8 100644 --- a/app/services/row_generator.py +++ b/app/services/row_generator.py @@ -1,12 +1,14 @@ +import asyncio import random +from loguru import logger from pydantic import BaseModel from app.models.profile import UserTasteProfile from app.services.gemini import gemini_service from app.services.tmdb.countries import COUNTRY_ADJECTIVES from app.services.tmdb.genre import movie_genres, series_genres -from app.services.tmdb_service import TMDBService, get_tmdb_service +from app.services.tmdb.service import TMDBService, get_tmdb_service def normalize_keyword(kw): @@ -40,133 +42,167 @@ def __init__(self, tmdb_service: TMDBService | None = None): async def generate_rows(self, profile: UserTasteProfile, content_type: str = "movie") -> list[RowDefinition]: """ - Generate a diverse set of 3-5 thematic rows. - Async to allow fetching names for keywords. + Generate a diverse set of 3-5 thematic rows in parallel. """ - rows = [] + # 1. Extract features + top_genres = profile.get_top_genres(limit=3) + top_keywords = profile.get_top_keywords(limit=6) + top_countries = profile.get_top_countries(limit=1) + top_years = profile.years.get_top_features(limit=1) - # Extract features - top_genres = profile.get_top_genres(limit=3) # [(id, score), ...] - top_keywords = profile.get_top_keywords(limit=4) # [(id, score), ...] - top_countries = profile.get_top_countries(limit=1) # [(code, score)] - top_years = profile.years.get_top_features(limit=1) # [(decade_start, score)] + # 2. Fetch all keyword names in parallel + kw_ids = [k_id for k_id, _ in top_keywords] + kw_names = await asyncio.gather(*[self._get_keyword_name(kid) for kid in kw_ids], return_exceptions=True) + keyword_map = {kid: name for kid, name in zip(kw_ids, kw_names) if name and not isinstance(name, Exception)} genre_map = movie_genres if content_type == "movie" else series_genres - # Helper to get genre name safely def get_gname(gid): return genre_map.get(gid, "Movies") def get_cname(code): adjectives = COUNTRY_ADJECTIVES.get(code, []) - if adjectives: - return random.choice(adjectives) - return "" + return random.choice(adjectives) if adjectives else "" - # Strategy 1: Combined Keyword Row (Top Priority) + # 3. Define Strategy Candidates & Gemini Tasks + gemini_tasks = [] + rows_to_build = [] # List of (builder_func, prompt_index_or_none) + + # Strategy 1: Keywords if top_keywords: k_id1 = top_keywords[0][0] - kw_name1 = await self._get_keyword_name(k_id1) + kw_name1 = keyword_map.get(k_id1) - use_single_keyword_row = True if len(top_keywords) >= 2: k_id2 = top_keywords[1][0] - kw_name2 = await self._get_keyword_name(k_id2) - title = "" + kw_name2 = keyword_map.get(k_id2) if kw_name1 and kw_name2: - title = await gemini_service.generate_content_async(f"Keywords: {kw_name1} + {kw_name2}") - - if title: - rows.append( - RowDefinition( - title=title, - id=f"watchly.theme.k{k_id1}.k{k_id2}", - keywords=[k_id1, k_id2], - ) + prompt = f"Keywords: {kw_name1} + {kw_name2}" + gemini_tasks.append(gemini_service.generate_content_async(prompt)) + rows_to_build.append( + { + "id": f"watchly.theme.k{k_id1}.k{k_id2}", + "keywords": [k_id1, k_id2], + "prompt_idx": len(gemini_tasks) - 1, + "fallback": None, # Will use Strategy 1.1 if this fails + } ) - use_single_keyword_row = False - - if use_single_keyword_row and kw_name1: - rows.append( - RowDefinition( - title=normalize_keyword(kw_name1), - id=f"watchly.theme.k{k_id1}", - keywords=[k_id1], + elif kw_name1: + rows_to_build.append( + {"id": f"watchly.theme.k{k_id1}", "keywords": [k_id1], "title": normalize_keyword(kw_name1)} ) + elif kw_name1: + rows_to_build.append( + {"id": f"watchly.theme.k{k_id1}", "keywords": [k_id1], "title": normalize_keyword(kw_name1)} ) - # Strategy 2: Keyword + Genre (Specific Niche) + # Strategy 2: Genre + Keyword if top_genres and len(top_keywords) > 2: g_id = top_genres[0][0] - # get random keywords: Just to surprise user in every refresh k_id = random.choice(top_keywords[2:])[0] + kw_name = keyword_map.get(k_id) + if kw_name: + prompt = f"Genre: {get_gname(g_id)} + Keyword: {normalize_keyword(kw_name)}" + gemini_tasks.append(gemini_service.generate_content_async(prompt)) + rows_to_build.append( + { + "id": f"watchly.theme.g{g_id}.k{k_id}", + "genres": [g_id], + "keywords": [k_id], + "prompt_idx": len(gemini_tasks) - 1, + "fallback": f"{normalize_keyword(kw_name)} {get_gname(g_id)}", + } + ) - if k_id: - kw_name = await self._get_keyword_name(k_id) - if kw_name: - title = await gemini_service.generate_content_async( - f"Genre: {get_gname(g_id)} + Keyword: {normalize_keyword(kw_name)}" - ) - if not title: - title = f"{normalize_keyword(kw_name)} {get_gname(g_id)}" - # keyword and genre can have same name sometimes, remove if so - title = " ".join(dict.fromkeys(title.split())) - - rows.append( - RowDefinition( - title=title, - id=f"watchly.theme.g{g_id}.k{k_id}", - genres=[g_id], - keywords=[k_id], - ) - ) - - # Strategy 3: Genre + Country (e.g. "Bollywood Action") + # Strategy 3: Genre + Country if top_countries and len(top_genres) > 0: g_id = top_genres[0][0] if len(top_genres) == 1 else top_genres[1][0] c_code = top_countries[0][0] c_adj = get_cname(c_code) if c_adj: - title = await gemini_service.generate_content_async(f"Genre: {get_gname(g_id)} + Country: {c_adj}") - if not title: - title = f"{c_adj} {get_gname(g_id)}" - rows.append( - RowDefinition( - title=title, - id=f"watchly.theme.g{g_id}.ct{c_code}", # ct for country - genres=[g_id], - country=c_code, - ) + prompt = f"Genre: {get_gname(g_id)} + Country: {c_adj}" + gemini_tasks.append(gemini_service.generate_content_async(prompt)) + rows_to_build.append( + { + "id": f"watchly.theme.g{g_id}.ct{c_code}", + "genres": [g_id], + "country": c_code, + "prompt_idx": len(gemini_tasks) - 1, + "fallback": f"{c_adj} {get_gname(g_id)}", + } ) - # Strategy 4: Genre + Era ("90s Action") + # Strategy 4: Genre + Era if len(top_genres) > 0 and top_years: - # Use 3rd genre if available for diversity, else 1st - g_id = top_genres[0][0] - if len(top_genres) > 2: - g_id = top_genres[2][0] - + g_id = top_genres[2][0] if len(top_genres) > 2 else top_genres[0][0] decade_start = top_years[0][0] - # # Only do this if decade is valid and somewhat old (nostalgia factor) if 1970 <= decade_start <= 2010: - decade_str = str(decade_start)[2:] + "s" # "90s" - title = await gemini_service.generate_content_async(f"Genre: {get_gname(g_id)} + Era: {decade_str}") - if not title: - title = f"{decade_str} {get_gname(g_id)}" - rows.append( + decade_str = f"{str(decade_start)[2:]}s" + prompt = f"Genre: {get_gname(g_id)} + Era: {decade_str}" + gemini_tasks.append(gemini_service.generate_content_async(prompt)) + rows_to_build.append( + { + "id": f"watchly.theme.g{g_id}.y{decade_start}", + "genres": [g_id], + "year_range": (decade_start, decade_start + 9), + "prompt_idx": len(gemini_tasks) - 1, + "fallback": f"{decade_str} {get_gname(g_id)}", + } + ) + + # 4. Execute all Gemini tasks in parallel + gemini_results = await asyncio.gather(*gemini_tasks, return_exceptions=True) + + # 5. Build Final Rows + final_rows = [] + # Support for Strategy 1 fallback (single keyword if dual fails) + strategy1_success = False + + for r in rows_to_build: + title = r.get("title") + idx = r.get("prompt_idx") + + if title is None and idx is not None: + res = gemini_results[idx] + if not isinstance(res, Exception) and res: + title = res + if "k" in r["id"] and "." in r["id"]: # Strategy 1 (dual) + strategy1_success = True + else: + if isinstance(res, Exception): + logger.warning(f"Gemini failed for strategy {r['id']}: {res}") + title = r.get("fallback") + + if title: + # Cleanup title + title = " ".join(dict.fromkeys(title.split())) if r.get("genres") and r.get("keywords") else title + final_rows.append( RowDefinition( title=title, - id=f"watchly.theme.g{g_id}.y{decade_start}", - genres=[g_id], - year_range=(decade_start, decade_start + 9), + id=r["id"], + genres=r.get("genres", []), + keywords=r.get("keywords", []), + country=r.get("country"), + year_range=r.get("year_range"), ) ) - return rows + # Handle Strategy 1 fallback if dual keyword failed to generate or was never added + if top_keywords and not strategy1_success: + k1 = top_keywords[0][0] + name1 = keyword_map.get(k1) + # Only add if it's not already in final_rows (it might be there if dual wasn't possible) + if name1 and not any(row.id == f"watchly.theme.k{k1}" for row in final_rows): + final_rows.insert( + 0, RowDefinition(title=normalize_keyword(name1), id=f"watchly.theme.k{k1}", keywords=[k1]) + ) + + return final_rows async def _get_keyword_name(self, keyword_id: int) -> str | None: try: - data = await self.tmdb_service._make_request(f"/keyword/{keyword_id}") + data = await self.tmdb_service.get_keyword_details(keyword_id) return data.get("name") - except Exception: + except Exception as e: + logger.exception(f"Failed to fetch keyword name: {e}", exc_info=True) return None diff --git a/app/services/scoring.py b/app/services/scoring.py index a01770c..f271322 100644 --- a/app/services/scoring.py +++ b/app/services/scoring.py @@ -1,5 +1,7 @@ from datetime import datetime, timezone +from loguru import logger + from app.models.scoring import ScoredItem, StremioLibraryItem @@ -69,7 +71,8 @@ def _calculate_score_components(self, item: StremioLibraryItem) -> dict: if state.duration and state.duration > 0: try: ratio = min(float(state.timeWatched) / float(state.duration), 1.0) - except Exception: + except Exception as e: + logger.debug(f"Math error in completion ratio calculation for {item.state}: {e}") ratio = 0.0 completion_rate = ratio completion_score = ratio * 100.0 @@ -117,7 +120,10 @@ def _calculate_score_components(self, item: StremioLibraryItem) -> dict: ratio_component = max((ratio_est - 1.0) * 100.0, 0.0) else: ratio_component = max((float(state.timesWatched) - 1.0) * 20.0, 0.0) - except Exception: + except Exception as e: + from loguru import logger + + logger.debug(f"Math error in rewatch score calculation for {item.item.id}: {e}") ratio_component = 0.0 # Combine components but clamp to reasonable bounds diff --git a/app/services/stremio/__init__.py b/app/services/stremio/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/stremio/addons.py b/app/services/stremio/addons.py new file mode 100644 index 0000000..d9af230 --- /dev/null +++ b/app/services/stremio/addons.py @@ -0,0 +1,102 @@ +from typing import Any +from urllib.parse import urlparse + +from loguru import logger + +from app.services.stremio.client import StremioClient + + +def match_hostname(url: str, hostname: str) -> bool: + """Return True if the URL host matches the target host (scheme-agnostic).""" + try: + url_host = urlparse(url if "://" in url else f"https://{url}").hostname + target_host = urlparse(hostname if "://" in hostname else f"https://{hostname}").hostname + return bool(url_host and target_host and url_host.lower() == target_host.lower()) + except Exception as e: + logger.debug(f"Failed to parse or match hostname for URL {url} against {hostname}: {e}") + return False + + +class StremioAddonService: + """ + Handles fetching and updating Stremio addon collections. + """ + + def __init__(self, client: StremioClient): + self.client = client + + async def get_addons(self, auth_key: str) -> list[dict[str, Any]]: + """Fetch the user's addon collection.""" + payload = { + "type": "AddonCollectionGet", + "authKey": auth_key, + "update": True, + } + try: + data = await self.client.post("/api/addonCollectionGet", json=payload) + + if "error" in data: + error = data["error"] + message = error.get("message") if isinstance(error, dict) else str(error) + raise ValueError(f"Stremio Addon Error: {message}") + + return data.get("result", {}).get("addons", []) + except Exception as e: + logger.exception(f"Failed to fetch addons: {e}") + raise + + async def update_addon_collection(self, auth_key: str, addons: list[dict[str, Any]]) -> bool: + """Update the user's entire addon collection.""" + payload = { + "type": "AddonCollectionSet", + "authKey": auth_key, + "addons": addons, + } + try: + data = await self.client.post("/api/addonCollectionSet", json=payload) + return data.get("result", {}).get("success", False) + except Exception as e: + logger.exception(f"Failed to update addon collection: {e}") + return False + + async def update_catalogs(self, auth_key: str, catalogs: list[dict[str, Any]]) -> bool: + """ + Inject dynamic catalogs into the installed Watchly addon. + """ + from app.core.config import settings + + # Base catalogs that are always present + BASE_CATALOGS = [ + {"type": "movie", "id": "watchly.rec", "name": "Top Picks for You", "extra": []}, + {"type": "series", "id": "watchly.rec", "name": "Top Picks for You", "extra": []}, + ] + + addons = await self.get_addons(auth_key) + full_catalogs = BASE_CATALOGS + catalogs + + found = False + for addon in addons: + if addon.get("manifest", {}).get("id") == settings.ADDON_ID and match_hostname( + addon.get("transportUrl"), settings.HOST_NAME + ): + addon["manifest"]["catalogs"] = full_catalogs + found = True + break + + if not found: + logger.warning(f"Addon {settings.ADDON_ID} not found in user collection; cannot update catalogs.") + return False + + return await self.update_addon_collection(auth_key, addons) + + async def is_addon_installed(self, auth_key: str) -> bool: + """Check if the Watchly addon is present in the user's collection.""" + from app.core.config import settings + + addons = await self.get_addons(auth_key) + for addon in addons: + if addon.get("manifest", {}).get("id") == settings.ADDON_ID and match_hostname( + addon.get("transportUrl"), settings.HOST_NAME + ): + return True + return False diff --git a/app/services/stremio/auth.py b/app/services/stremio/auth.py new file mode 100644 index 0000000..b87984c --- /dev/null +++ b/app/services/stremio/auth.py @@ -0,0 +1,70 @@ +from loguru import logger + +from app.services.stremio.client import StremioClient + + +class StremioAuthService: + """ + Handles authentication and user information retrieval from Stremio. + """ + + def __init__(self, client: StremioClient): + self.client = client + + async def login(self, email: str, password: str) -> str: + """ + Authenticate with Stremio using email and password. + Returns the authKey. + """ + payload = { + "email": email, + "password": password, + "type": "Login", + "facebook": False, + } + + try: + data = await self.client.post("/api/login", json=payload) + auth_key = data.get("result", {}).get("authKey") + + if not auth_key: + error_obj = data.get("error") or data + error_message = "Invalid Stremio credentials" + if isinstance(error_obj, dict): + error_message = error_obj.get("message") or error_message + raise ValueError(f"Stremio Auth Error: {error_message}") + + return auth_key + except Exception as e: + logger.exception(f"Failed to login to Stremio: {e}") + raise + + async def get_user_info(self, auth_key: str) -> dict[str, str]: + """ + Fetch user information (ID and Email) using an auth key. + """ + payload = { + "type": "GetUser", + "authKey": auth_key, + } + + try: + data = await self.client.post("/api/getUser", json=payload) + + if "error" in data: + error_msg = data["error"] + if isinstance(error_msg, dict): + error_msg = error_msg.get("message", "Unknown error") + raise ValueError(f"Stremio API Error: {error_msg}") + + result = data.get("result", {}) + user_id = result.get("_id") + email = result.get("email") + + if not user_id: + raise ValueError("User ID missing in Stremio profile response") + + return {"user_id": user_id, "email": email} + except Exception as e: + logger.exception(f"Failed to fetch Stremio user info: {e}") + raise diff --git a/app/services/stremio/client.py b/app/services/stremio/client.py new file mode 100644 index 0000000..32fcabc --- /dev/null +++ b/app/services/stremio/client.py @@ -0,0 +1,29 @@ +from app.core.base_client import BaseClient + + +class StremioClient(BaseClient): + """ + Client for interacting with the main Stremio API. + """ + + def __init__(self, timeout: float = 10.0, max_retries: int = 3): + headers = { + "User-Agent": "Watchly/Client", + "Accept": "application/json", + } + super().__init__(base_url="https://api.strem.io", timeout=timeout, max_retries=max_retries, headers=headers) + + +class StremioLikesClient(BaseClient): + """ + Client for interacting with the Stremio Likes API. + """ + + def __init__(self, timeout: float = 10.0, max_retries: int = 3): + headers = { + "User-Agent": "Watchly/Client", + "Accept": "application/json", + } + super().__init__( + base_url="https://likes.stremio.com", timeout=timeout, max_retries=max_retries, headers=headers + ) diff --git a/app/services/stremio/library.py b/app/services/stremio/library.py new file mode 100644 index 0000000..4d6b5f8 --- /dev/null +++ b/app/services/stremio/library.py @@ -0,0 +1,142 @@ +import asyncio +from typing import Any + +from async_lru import alru_cache +from loguru import logger + +from app.services.stremio.client import StremioClient, StremioLikesClient + + +class StremioLibraryService: + """ + Handles fetching and processing of user's Stremio library and likes. + """ + + def __init__(self, client: StremioClient, likes_client: StremioLikesClient): + self.client = client + self.likes_client = likes_client + + @alru_cache(maxsize=100, ttl=3600) + async def get_likes_by_type(self, auth_token: str, media_type: str, status: str = "loved") -> list[str]: + """ + Fetch IDs of items liked or loved by the user. + status: 'loved' or 'liked' + """ + path = f"/addons/{status}/movies-shows/{auth_token}/catalog/{media_type}/stremio-{status}-{media_type}.json" + try: + data = await self.likes_client.get(path) + metas = data.get("metas", []) + return [meta.get("id") for meta in metas if meta.get("id")] + except Exception as e: + logger.exception(f"Failed to fetch {status} {media_type} items: {e}") + return [] + + async def get_library_items(self, auth_key: str) -> dict[str, list[dict[str, Any]]]: + """ + Fetch all library items and categorize them (watched, loved, added, removed). + """ + try: + # 1. Fetch raw library from datastore + payload = { + "authKey": auth_key, + "collection": "libraryItem", + "all": True, + } + data = await self.client.post("/api/datastoreGet", json=payload) + all_raw_items = data.get("result", []) + + # 2. Fetch loved/liked IDs in parallel + loved_movies_task = self.get_likes_by_type(auth_key, "movie", "loved") + loved_series_task = self.get_likes_by_type(auth_key, "series", "loved") + liked_movies_task = self.get_likes_by_type(auth_key, "movie", "liked") + liked_series_task = self.get_likes_by_type(auth_key, "series", "liked") + + loved_movies, loved_series, liked_movies, liked_series = await asyncio.gather( + loved_movies_task, loved_series_task, liked_movies_task, liked_series_task + ) + + all_loved_ids = set(loved_movies + loved_series + liked_movies + liked_series) + + # 3. Categorize items + watched: list[dict] = [] + loved: list[dict] = [] + added: list[dict] = [] + removed: list[dict] = [] + liked: list[dict] = [] + + # Create sets for faster lookup + loved_set = set(loved_movies + loved_series) + liked_set = set(liked_movies + liked_series) + all_loved_ids = loved_set.union(liked_set) + + for item in all_raw_items: + # Basic validation + if item.get("type") not in ["movie", "series"]: + continue + item_id = item.get("_id", "") + if not item_id.startswith("tt"): + continue + + # Check Watched status + state = item.get("state", {}) or {} + times_watched = int(state.get("timesWatched") or 0) + flagged_watched = int(state.get("flaggedWatched") or 0) + duration = int(state.get("duration") or 0) + time_watched = int(state.get("timeWatched") or 0) + + is_completion_high = duration > 0 and (time_watched / duration) >= 0.7 + is_watched = times_watched > 0 or flagged_watched > 0 or is_completion_high + + if is_watched: + # Set flags for recommendation engine compatibility + is_item_loved = False + is_item_liked = False + if item_id in loved_set: + item["_is_loved"] = True + is_item_loved = True + if item_id in liked_set: + item["_is_liked"] = True + is_item_liked = True + + watched.append(item) + if is_item_loved: + loved.append(item) + if is_item_liked: + liked.append(item) + else: + if item_id in all_loved_ids: + # if item is loved but not watched, do nothing + pass + elif not item.get("removed") and not item.get("temp"): + # item has not removed and item is not temporary meaning item is not + # added by stremio itself on user watch + added.append(item) + elif item.get("removed"): + removed.append(item) + + # 4. Sort watched items by recency + def sort_by_recency(x: dict): + state = x.get("state", {}) or {} + return (str(state.get("lastWatched") or ""), str(x.get("_mtime") or "")) + + watched.sort(key=sort_by_recency, reverse=True) + loved.sort(key=sort_by_recency, reverse=True) + liked.sort(key=sort_by_recency, reverse=True) + added.sort(key=sort_by_recency, reverse=True) + removed.sort(key=sort_by_recency, reverse=True) + + logger.info( + f"Processed {len(watched)} watched items, {len(loved)} loved items," + f"{len(liked)} liked items, {len(added)} added items, {len(removed)} removed items" + ) + + return { + "watched": watched, + "loved": loved, + "liked": liked, + "added": added, + "removed": removed, + } + except Exception as e: + logger.exception(f"Error processing library items: {e}") + return {"watched": [], "loved": [], "liked": [], "added": [], "removed": []} diff --git a/app/services/stremio/service.py b/app/services/stremio/service.py new file mode 100644 index 0000000..a34ac6b --- /dev/null +++ b/app/services/stremio/service.py @@ -0,0 +1,24 @@ +from app.services.stremio.addons import StremioAddonService +from app.services.stremio.auth import StremioAuthService +from app.services.stremio.client import StremioClient, StremioLikesClient +from app.services.stremio.library import StremioLibraryService + + +class StremioBundle: + """ + A unified bundle for all Stremio-related services. + Provides a clean interface for the rest of the application. + """ + + def __init__(self): + self._client = StremioClient() + self._likes_client = StremioLikesClient() + + self.auth = StremioAuthService(self._client) + self.library = StremioLibraryService(self._client, self._likes_client) + self.addons = StremioAddonService(self._client) + + async def close(self): + """Close all underlying HTTP clients.""" + await self._client.close() + await self._likes_client.close() diff --git a/app/services/stremio_service.py b/app/services/stremio_service.py deleted file mode 100644 index fe7bf99..0000000 --- a/app/services/stremio_service.py +++ /dev/null @@ -1,464 +0,0 @@ -import asyncio -import random -from urllib.parse import urlparse - -import httpx -from async_lru import alru_cache -from loguru import logger - -from app.core.config import settings - -BASE_CATALOGS = [ - {"type": "movie", "id": "watchly.rec", "name": "Top Picks for You", "extra": []}, - {"type": "series", "id": "watchly.rec", "name": "Top Picks for You", "extra": []}, -] - - -def match_hostname(url: str, hostname: str) -> bool: - """Return True if the URL host matches the target host (scheme-agnostic). - - Accepts `hostname` as either a naked host (example.com) or full URL (https://example.com). - """ - try: - url_host = urlparse(url if "://" in url else f"https://{url}").hostname - target_host = urlparse(hostname if "://" in hostname else f"https://{hostname}").hostname - return bool(url_host and target_host and url_host.lower() == target_host.lower()) - except Exception: - return False - - -class StremioService: - """Service for interacting with Stremio API to fetch user library.""" - - def __init__( - self, - username: str = "", - password: str = "", - auth_key: str | None = None, - ): - self.base_url = "https://api.strem.io" - self.username = username - self.password = password - self._auth_key: str | None = auth_key - if not self._auth_key and (not self.username or not self.password): - raise ValueError("Username/password or auth key are required") - # Reuse HTTP client for connection pooling and better performance - self._client: httpx.AsyncClient | None = None - self._likes_client: httpx.AsyncClient | None = None - # lightweight per-instance cache for library fetch - self._library_cache: dict | None = None - self._library_cache_expiry: float = 0.0 - - async def _get_client(self) -> httpx.AsyncClient: - """Get or create the main Stremio API client.""" - if self._client is None: - self._client = httpx.AsyncClient( - timeout=10.0, - limits=httpx.Limits(max_keepalive_connections=10, max_connections=50), - http2=True, - headers={ - "User-Agent": "Watchly/Client", - "Accept": "application/json", - }, - ) - return self._client - - async def _get_likes_client(self) -> httpx.AsyncClient: - """Get or create the likes API client.""" - if self._likes_client is None: - self._likes_client = httpx.AsyncClient( - timeout=10.0, - limits=httpx.Limits(max_keepalive_connections=10, max_connections=50), - http2=True, - headers={ - "User-Agent": "Watchly/Client", - "Accept": "application/json", - }, - ) - return self._likes_client - - async def close(self): - """Close HTTP clients.""" - if self._client: - await self._client.aclose() - self._client = None - if self._likes_client: - await self._likes_client.aclose() - self._likes_client = None - - async def _login_for_auth_key(self) -> str: - """Login with username/password and fetch a fresh auth key.""" - if not self.username or not self.password: - raise ValueError("Username and password are required to fetch an auth key") - url = f"{self.base_url}/api/login" - payload = { - "email": self.username, - "password": self.password, - "type": "Login", - "facebook": False, - } - - try: - client = await self._get_client() - result = await self._post_with_retries(client, url, json=payload) - data = result - auth_key = data.get("result", {}).get("authKey", "") - if auth_key: - logger.info("Successfully authenticated with Stremio") - self._auth_key = auth_key - else: - error_obj = data.get("error") or data - error_message = "Invalid Stremio username/password." - if isinstance(error_obj, dict): - error_message = error_obj.get("message") or error_message - elif isinstance(error_obj, str): - error_message = error_obj or error_message - logger.warning(error_obj) - raise ValueError(f"Stremio: {error_message}") - return auth_key - except Exception as e: - logger.error(f"Error authenticating with Stremio: {e}", exc_info=True) - raise - - async def get_auth_key(self) -> str: - """Return the cached auth key.""" - if not self._auth_key: - raise ValueError("Stremio auth key is missing.") - return self._auth_key - - async def is_loved(self, auth_key: str, imdb_id: str, media_type: str) -> tuple[bool, bool]: - """ - Check if user has loved or liked a movie or series. - Returns: (is_loved, is_liked) - """ - if not imdb_id.startswith("tt"): - return False, False - url = "https://likes.stremio.com/api/get_status" - params = { - "authToken": auth_key, - "mediaType": media_type, - "mediaId": imdb_id, - } - - try: - client = await self._get_likes_client() - result = await self._get_with_retries(client, url, params=params) - status = result.get("status", "") - return (status == "loved", status == "liked") - except Exception as e: - logger.error( - f"Error checking if user has loved a movie or series: {e}", - exc_info=True, - ) - return False, False - - @alru_cache(maxsize=1000, ttl=3600) - async def get_loved_items(self, auth_token: str, media_type: str) -> list[str]: - url = f"https://likes.stremio.com/addons/loved/movies-shows/{auth_token}/catalog/{media_type}/stremio-loved-{media_type.lower()}.json" # noqa - try: - client = await self._get_likes_client() - data = await self._get_with_retries(client, url) - metas = data.get("metas", []) - return [meta.get("id") for meta in metas] - except Exception as e: - logger.warning(f"Failed to fetch loved items: {e}") - return [] - - @alru_cache(maxsize=1000, ttl=3600) - async def get_liked_items(self, auth_token: str, media_type: str) -> list[str]: - url = f"https://likes.stremio.com/addons/liked/movies-shows/{auth_token}/catalog/{media_type}/stremio-liked-{media_type.lower()}.json" # noqa - try: - client = await self._get_likes_client() - data = await self._get_with_retries(client, url) - metas = data.get("metas", []) - return [meta.get("id") for meta in metas] - except Exception as e: - logger.warning(f"Failed to fetch liked items: {e}") - return [] - - async def get_user_info(self) -> dict[str, str]: - """Fetch user ID and email using the auth key.""" - if not self._auth_key: - raise ValueError("Stremio auth key is missing.") - - url = f"{self.base_url}/api/getUser" - payload = { - "type": "GetUser", - "authKey": self._auth_key, - } - - try: - client = await self._get_client() - data = await self._post_with_retries(client, url, json=payload) - - if "error" in data: - error_msg = data["error"] - if isinstance(error_msg, dict): - error_msg = error_msg.get("message", "Unknown error") - raise ValueError(f"Stremio Error: {error_msg}") - - # Structure: { result: { _id, email, ... } } - res = data.get("result", {}) - user_id = res.get("_id", "") - email = res.get("email", "") - - if not user_id: - raise ValueError("Could not retrieve user ID from Stremio profile.") - - return {"user_id": user_id, "email": email} - except Exception as e: - logger.error(f"Error fetching user profile: {e}") - raise - - async def get_user_email(self) -> str: - """Fetch user email using the auth key.""" - user_info = await self.get_user_info() - return user_info.get("email", "") - - async def get_library_items(self) -> dict[str, list[dict]]: - """ - Fetch library items from Stremio once and return both watched and loved items. - Returns a dict with 'watched' and 'loved' keys. - """ - - if not self._auth_key: - logger.warning("Stremio auth key not configured") - return {"watched": [], "loved": []} - - try: - # Get auth token - auth_key = await self.get_auth_key() - if not auth_key: - logger.error("Failed to get Stremio auth token") - return {"watched": [], "loved": []} - - # Fetch library items once - url = f"{self.base_url}/api/datastoreGet" - payload = { - "authKey": auth_key, - "collection": "libraryItem", - "all": True, - } - - client = await self._get_client() - data = await self._post_with_retries(client, url, json=payload) - items = data.get("result", []) - logger.info(f"Fetched {len(items)} library items from Stremio") - - # Filter items considered watched: explicit timesWatched/flaggedWatched OR high completion ratio - watched_items = [] - for item in items: - if item.get("type") not in ["movie", "series"]: - continue - item_id = item.get("_id", "") - if not item_id.startswith("tt"): - continue - state = item.get("state", {}) or {} - times_watched = int(state.get("timesWatched") or 0) - flagged_watched = int(state.get("flaggedWatched") or 0) - duration = int(state.get("duration") or 0) - time_watched = int(state.get("timeWatched") or 0) - ratio_ok = duration > 0 and (time_watched / duration) >= 0.7 - if times_watched > 0 or flagged_watched > 0 or ratio_ok: - watched_items.append(item) - logger.info(f"Filtered {len(watched_items)} watched library items") - - # Sort watched items by lastWatched, fallback to _mtime (most recent first) - def _sort_key(x: dict): - state = x.get("state", {}) or {} - return ( - str(state.get("lastWatched") or ""), - str(x.get("_mtime") or ""), - ) - - watched_items.sort(key=_sort_key, reverse=True) - - loved_items = [] - added_items = [] - removed_items = [] - - # fetch loved and liked items - - loved_movies, loved_series, liked_movies, liked_series = await asyncio.gather( - self.get_loved_items(auth_key, "movie"), - self.get_loved_items(auth_key, "series"), - self.get_liked_items(auth_key, "movie"), - self.get_liked_items(auth_key, "series"), - ) - - watched_ids = {i.get("_id") for i in watched_items} - - for item in watched_items: - loved = False - if item.get("_id") in loved_movies or item.get("_id") in loved_series: - item["_is_loved"] = True - loved = True - if item.get("_id") in liked_movies or item.get("_id") in liked_series: - item["_is_liked"] = True - loved = True - - if loved: - loved_items.append(item) - - logger.info(f"Found {len(loved_items)} loved library items") - - # Build added-only items: in library, type movie/series, imdb id, not watched, not loved/liked - for item in items: - if item.get("type") not in ["movie", "series"]: - continue - iid = item.get("_id", "") - if not iid.startswith("tt"): - continue - if iid in watched_ids: - continue - if iid in loved_movies or iid in loved_series or iid in liked_movies or iid in liked_series: - continue - if item.get("temp"): - continue - if item.get("removed"): - removed_items.append(item) - continue - - added_items.append(item) - - logger.info(f"Found {len(added_items)} added (unwatched) and {len(removed_items)} removed library items") - # Prepare result - result = { - "watched": watched_items, - "loved": loved_items, - "added": added_items, - "removed": removed_items, - } - return result - except Exception as e: - logger.error(f"Error fetching library items: {e}", exc_info=True) - return {"watched": [], "loved": []} - - async def get_addons(self, auth_key: str | None = None) -> list[dict]: - """Get addons from Stremio.""" - url = f"{self.base_url}/api/addonCollectionGet" - payload = { - "type": "AddonCollectionGet", - "authKey": auth_key or await self.get_auth_key(), - "update": True, - } - client = await self._get_client() - data = await self._post_with_retries(client, url, json=payload) - error_payload = data.get("error") - if not error_payload and (data.get("code") and data.get("message")): - error_payload = data - - if error_payload: - message = "Invalid Stremio auth key." - if isinstance(error_payload, dict): - message = error_payload.get("message") or message - elif isinstance(error_payload, str): - message = error_payload or message - logger.warning(f"Addon collection request failed: {error_payload}") - raise ValueError(f"Stremio: {message}") - addons = data.get("result", {}).get("addons", []) - logger.info(f"Found {len(addons)} addons") - return addons - - async def update_addon(self, addons: list[dict], auth_key: str | None = None): - """Update an addon in Stremio.""" - url = f"{self.base_url}/api/addonCollectionSet" - payload = { - "type": "AddonCollectionSet", - "authKey": auth_key or await self.get_auth_key(), - "addons": addons, - } - - client = await self._get_client() - data = await self._post_with_retries(client, url, json=payload) - logger.info("Updated addons") - return data.get("result", {}).get("success", False) - - async def update_catalogs(self, catalogs: list[dict], auth_key: str | None = None): - auth_key = auth_key or await self.get_auth_key() - addons = await self.get_addons(auth_key) - catalogs = BASE_CATALOGS + catalogs - logger.info(f"Found {len(addons)} addons") - # find addon with id "com.watchly" - for addon in addons: - if addon.get("manifest", {}).get("id") == settings.ADDON_ID and match_hostname( - addon.get("transportUrl"), settings.HOST_NAME - ): - logger.info(f"Found addon with id {settings.ADDON_ID}") - addon["manifest"]["catalogs"] = catalogs - break - return await self.update_addon(addons, auth_key) - - async def is_addon_installed(self, auth_key: str | None = None): - auth_key = auth_key or await self.get_auth_key() - addons = await self.get_addons(auth_key) - for addon in addons: - if addon.get("manifest", {}).get("id") == settings.ADDON_ID and match_hostname( - addon.get("transportUrl"), settings.HOST_NAME - ): - return True - return False - - async def _post_with_retries(self, client: httpx.AsyncClient, url: str, json: dict, max_tries: int = 3) -> dict: - attempts = 0 - last_exc: Exception | None = None - while attempts < max_tries: - try: - resp = await client.post(url, json=json) - resp.raise_for_status() - return resp.json() - except httpx.HTTPStatusError as e: - status = e.response.status_code - if status == 429 or 500 <= status < 600: - attempts += 1 - backoff = (2 ** (attempts - 1)) + random.uniform(0, 0.25) - logger.warning( - f"Stremio POST {url} failed with {status}; retry {attempts}/{max_tries} in" f" {backoff:.2f}s" - ) - await asyncio.sleep(backoff) - last_exc = e - continue - raise - except httpx.RequestError as e: - attempts += 1 - backoff = (2 ** (attempts - 1)) + random.uniform(0, 0.25) - logger.warning(f"Stremio POST {url} request error: {e}; retry {attempts}/{max_tries} in {backoff:.2f}s") - await asyncio.sleep(backoff) - last_exc = e - continue - if last_exc: - raise last_exc - return {} - - async def _get_with_retries( - self, client: httpx.AsyncClient, url: str, params: dict | None = None, max_tries: int = 3 - ) -> dict: - attempts = 0 - last_exc: Exception | None = None - while attempts < max_tries: - try: - resp = await client.get(url, params=params) - resp.raise_for_status() - return resp.json() - except httpx.HTTPStatusError as e: - status = e.response.status_code - if status == 429 or 500 <= status < 600: - attempts += 1 - backoff = (2 ** (attempts - 1)) + random.uniform(0, 0.25) - logger.warning( - f"Stremio GET {url} failed with {status}; retry {attempts}/{max_tries} in" f" {backoff:.2f}s" - ) - await asyncio.sleep(backoff) - last_exc = e - continue - raise - except httpx.RequestError as e: - attempts += 1 - backoff = (2 ** (attempts - 1)) + random.uniform(0, 0.25) - logger.warning(f"Stremio GET {url} request error: {e}; retry {attempts}/{max_tries} in {backoff:.2f}s") - await asyncio.sleep(backoff) - last_exc = e - continue - if last_exc: - raise last_exc - return {} diff --git a/app/services/tmdb/__init__.py b/app/services/tmdb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/tmdb/client.py b/app/services/tmdb/client.py new file mode 100644 index 0000000..99d758d --- /dev/null +++ b/app/services/tmdb/client.py @@ -0,0 +1,31 @@ +from typing import Any + +from app.core.base_client import BaseClient +from app.core.version import __version__ + + +class TMDBClient(BaseClient): + """ + Client for interacting with the TMDB API. + """ + + def __init__(self, api_key: str, language: str = "en-US", timeout: float = 10.0, max_retries: int = 3): + headers = { + "User-Agent": f"Watchly/{__version__} (+https://github.com/TimilsinaBimal/Watchly)", + "Accept": "application/json", + } + super().__init__( + base_url="https://api.themoviedb.org/3", timeout=timeout, max_retries=max_retries, headers=headers + ) + self.api_key = api_key + self.language = language + + async def _request(self, method: str, url: str, **kwargs) -> Any: + """Override request to always include API key and language.""" + params = kwargs.get("params", {}) + if params is None: + params = {} + params["api_key"] = self.api_key + params["language"] = self.language + kwargs["params"] = params + return await super()._request(method, url, **kwargs) diff --git a/app/services/tmdb/service.py b/app/services/tmdb/service.py new file mode 100644 index 0000000..723bdb1 --- /dev/null +++ b/app/services/tmdb/service.py @@ -0,0 +1,119 @@ +import functools +from typing import Any + +from async_lru import alru_cache +from loguru import logger + +from app.services.tmdb.client import TMDBClient + + +class TMDBService: + """ + Service for interacting with The Movie Database (TMDB) API. + Refactored to use TMDBClient for better resilience and maintainability. + """ + + def __init__(self, api_key: str, language: str = "en-US"): + self.client = TMDBClient(api_key=api_key, language=language) + + async def close(self): + """Close the underlying HTTP client.""" + await self.client.close() + + @alru_cache(maxsize=2000) + async def find_by_imdb_id(self, imdb_id: str) -> tuple[int | None, str | None]: + """Find TMDB ID and type by IMDB ID.""" + try: + params = {"external_source": "imdb_id"} + data = await self.client.get(f"/find/{imdb_id}", params=params) + + if not data or not isinstance(data, dict): + return None, None + + # Check movie results + movie_results = data.get("movie_results", []) + if movie_results: + tmdb_id = movie_results[0].get("id") + if tmdb_id: + return tmdb_id, "movie" + + # Check TV results + tv_results = data.get("tv_results", []) + if tv_results: + tmdb_id = tv_results[0].get("id") + if tmdb_id: + return tmdb_id, "tv" + + return None, None + except Exception as e: + logger.exception(f"Error finding TMDB ID for IMDB {imdb_id}: {e}") + return None, None + + @alru_cache(maxsize=5000) + async def get_movie_details(self, movie_id: int) -> dict[str, Any]: + """Get details of a specific movie with credits and keywords.""" + params = {"append_to_response": "credits,external_ids,keywords"} + return await self.client.get(f"/movie/{movie_id}", params=params) + + @alru_cache(maxsize=5000) + async def get_tv_details(self, tv_id: int) -> dict[str, Any]: + """Get details of a specific TV series with credits and keywords.""" + params = {"append_to_response": "credits,external_ids,keywords"} + return await self.client.get(f"/tv/{tv_id}", params=params) + + async def get_recommendations(self, tmdb_id: int, media_type: str, page: int = 1) -> dict[str, Any]: + """Get recommendations based on TMDB ID and media type.""" + logger.info(f"Fetching recommendations for: {tmdb_id}") + params = {"page": page} + return await self.client.get(f"/{media_type}/{tmdb_id}/recommendations", params=params) + + async def get_similar(self, tmdb_id: int, media_type: str, page: int = 1) -> dict[str, Any]: + """Get similar content based on TMDB ID and media type.""" + logger.info(f"Fetching similar for {tmdb_id}") + params = {"page": page} + return await self.client.get(f"/{media_type}/{tmdb_id}/similar", params=params) + + async def get_discover( + self, + media_type: str, + with_genres: str | None = None, + sort_by: str = "popularity.desc", + page: int = 1, + **kwargs, + ) -> dict[str, Any]: + """Get discover content based on params.""" + mt = "movie" if media_type == "movie" else "tv" + params = {"page": page, "sort_by": sort_by} + if with_genres: + params["with_genres"] = with_genres + params.update(kwargs) + return await self.client.get(f"/discover/{mt}", params=params) + + @alru_cache(maxsize=1000) + async def get_keyword_details(self, keyword_id: int) -> dict[str, Any]: + """Get details of a specific keyword.""" + return await self.client.get(f"/keyword/{keyword_id}") + + async def get_trending(self, media_type: str, time_window: str = "week", page: int = 1) -> dict[str, Any]: + """Get trending content.""" + mt = "movie" if media_type == "movie" else "tv" + params = {"page": page} + return await self.client.get(f"/trending/{mt}/{time_window}", params=params) + + async def get_top_rated(self, media_type: str, page: int = 1) -> dict[str, Any]: + """Get top-rated content list.""" + mt = "movie" if media_type == "movie" else "tv" + params = {"page": page} + return await self.client.get(f"/{mt}/top_rated", params=params) + + @alru_cache(maxsize=1, ttl=86400) + async def get_languages(self) -> list[dict[str, Any]]: + """Fetch supported languages from TMDB.""" + return await self.client.get("/configuration/languages") + + +@functools.lru_cache(maxsize=16) +def get_tmdb_service(language: str = "en-US") -> TMDBService: + from app.core.config import settings + + return TMDBService(api_key=settings.TMDB_API_KEY, language=language) diff --git a/app/services/tmdb_service.py b/app/services/tmdb_service.py deleted file mode 100644 index 3f6836e..0000000 --- a/app/services/tmdb_service.py +++ /dev/null @@ -1,203 +0,0 @@ -import asyncio -import functools -import random - -import httpx -from async_lru import alru_cache -from loguru import logger - -from app.core.config import settings -from app.core.version import __version__ - - -class TMDBService: - """Service for interacting with The Movie Database (TMDB) API.""" - - def __init__(self, language: str = "en-US"): - self.api_key = settings.TMDB_API_KEY - self.base_url = "https://api.themoviedb.org/3" - self.language = language - # Reuse HTTP client for connection pooling and better performance - self._client: httpx.AsyncClient | None = None - if not self.api_key: - logger.warning("TMDB_API_KEY is not configured. Catalog endpoints will fail until the key is provided.") - - async def _get_client(self) -> httpx.AsyncClient: - """Get or create the main TMDB API client.""" - if self._client is None: - self._client = httpx.AsyncClient( - timeout=10.0, - limits=httpx.Limits(max_keepalive_connections=20, max_connections=100), - http2=True, - headers={ - "User-Agent": f"Watchly/{__version__} (+https://github.com/TimilsinaBimal/Watchly)", - "Accept": "application/json", - }, - ) - return self._client - - async def close(self): - """Close HTTP clients.""" - if self._client: - await self._client.aclose() - self._client = None - - async def _make_request(self, endpoint: str, params: dict | None = None) -> dict: - """Make a request to the TMDB API.""" - if not self.api_key: - raise RuntimeError("TMDB_API_KEY is not configured. Set the environment variable to enable TMDB requests.") - url = f"{self.base_url}{endpoint}" - default_params = {"api_key": self.api_key, "language": self.language} - - if params: - default_params.update(params) - - attempts = 0 - last_exc: Exception | None = None - while attempts < 3: - try: - client = await self._get_client() - response = await client.get(url, params=default_params) - response.raise_for_status() - - if not response.text: - logger.warning(f"TMDB API returned empty response for {endpoint}") - return {} - - try: - return response.json() - except ValueError as e: - logger.error(f"TMDB API returned invalid JSON for {endpoint}: {e}. Response: {response.text[:200]}") - return {} - except httpx.HTTPStatusError as e: - status = e.response.status_code - # Retry on 429 or 5xx - if status == 429 or 500 <= status < 600: - attempts += 1 - backoff = (2 ** (attempts - 1)) + random.uniform(0, 0.25) - logger.warning(f"TMDB {endpoint} failed with {status}; retry {attempts}/3 in {backoff:.2f}s") - await asyncio.sleep(backoff) - last_exc = e - continue - logger.error(f"TMDB API error for {endpoint}: {status} - {e.response.text[:200]}") - raise - except httpx.RequestError as e: - attempts += 1 - backoff = (2 ** (attempts - 1)) + random.uniform(0, 0.25) - logger.warning(f"TMDB request error for {endpoint}: {e}; retry {attempts}/3 in {backoff:.2f}s") - await asyncio.sleep(backoff) - last_exc = e - continue - - # Exhausted retries - if last_exc: - raise last_exc - return {} - - @alru_cache(maxsize=2000) - async def find_by_imdb_id(self, imdb_id: str) -> tuple[int | None, str | None]: - """Find TMDB ID and type by IMDB ID.""" - try: - endpoint = f"/find/{imdb_id}" - params = {"external_source": "imdb_id"} - data = await self._make_request(endpoint, params) - - # Check if we got valid data - if not data or not isinstance(data, dict): - logger.info(f"Invalid response data for IMDB {imdb_id}") - return None, None - - # Check movie results first - movie_results = data.get("movie_results", []) - if movie_results and len(movie_results) > 0: - tmdb_id = movie_results[0].get("id") - if tmdb_id: - logger.info(f"Found TMDB movie {tmdb_id} for IMDB {imdb_id}") - return tmdb_id, "movie" - - # Check TV results - tv_results = data.get("tv_results", []) - if tv_results and len(tv_results) > 0: - tmdb_id = tv_results[0].get("id") - if tmdb_id: - logger.info(f"Found TMDB TV {tmdb_id} for IMDB {imdb_id}") - return tmdb_id, "tv" - - logger.info(f"No TMDB result found for IMDB {imdb_id}") - return None, None - except httpx.HTTPStatusError: - # Already logged in _make_request - return None, None - except httpx.RequestError: - # Already logged in _make_request - return None, None - except Exception as e: - logger.warning(f"Unexpected error finding TMDB ID for IMDB {imdb_id}: {e}") - return None, None - - @alru_cache(maxsize=5000) - async def get_movie_details(self, movie_id: int) -> dict: - """Get details of a specific movie with credits and external IDs.""" - params = {"append_to_response": "credits,external_ids,keywords"} - return await self._make_request(f"/movie/{movie_id}", params=params) - - @alru_cache(maxsize=5000) - async def get_tv_details(self, tv_id: int) -> dict: - """Get details of a specific TV series with credits and external IDs.""" - params = {"append_to_response": "credits,external_ids,keywords"} - return await self._make_request(f"/tv/{tv_id}", params=params) - - @alru_cache(maxsize=1000, ttl=6 * 60 * 60) - async def get_recommendations(self, tmdb_id: int, media_type: str, page: int = 1) -> dict: - """Get recommendations based on TMDB ID and media type.""" - params = {"page": page} - endpoint = f"/{media_type}/{tmdb_id}/recommendations" - return await self._make_request(endpoint, params=params) - - @alru_cache(maxsize=1000, ttl=6 * 60 * 60) - async def get_similar(self, tmdb_id: int, media_type: str, page: int = 1) -> dict: - """Get similar content based on TMDB ID and media type.""" - params = {"page": page} - endpoint = f"/{media_type}/{tmdb_id}/similar" - return await self._make_request(endpoint, params=params) - - @alru_cache(maxsize=1000, ttl=30 * 60) - async def get_discover( - self, - media_type: str, - with_genres: str | None = None, - sort_by: str = "popularity.desc", - page: int = 1, - **kwargs, - ) -> dict: - """Get discover content based on params.""" - media_type = "movie" if media_type == "movie" else "tv" - params = {"page": page, "sort_by": sort_by} - if with_genres: - params["with_genres"] = with_genres - if kwargs: - params.update(kwargs) - endpoint = f"/discover/{media_type}" - return await self._make_request(endpoint, params=params) - - @alru_cache(maxsize=500, ttl=60 * 60) - async def get_trending(self, media_type: str, time_window: str = "week", page: int = 1) -> dict: - """Get trending content. media_type: 'movie' or 'tv'. time_window: 'day' or 'week'""" - mt = "movie" if media_type == "movie" else "tv" - params = {"page": page} - endpoint = f"/trending/{mt}/{time_window}" - return await self._make_request(endpoint, params=params) - - @alru_cache(maxsize=500, ttl=60 * 60) - async def get_top_rated(self, media_type: str, page: int = 1) -> dict: - """Get top-rated content list.""" - mt = "movie" if media_type == "movie" else "tv" - params = {"page": page} - endpoint = f"/{mt}/top_rated" - return await self._make_request(endpoint, params=params) - - -# Singleton factory to reuse clients and async caches per language -@functools.lru_cache(maxsize=16) -def get_tmdb_service(language: str = "en-US") -> TMDBService: - return TMDBService(language=language) diff --git a/app/services/token_store.py b/app/services/token_store.py index e632383..ad2802c 100644 --- a/app/services/token_store.py +++ b/app/services/token_store.py @@ -1,12 +1,11 @@ import base64 import json -from collections.abc import AsyncIterator from typing import Any import redis.asyncio as redis from async_lru import alru_cache from cachetools import TTLCache -from cryptography.fernet import Fernet, InvalidToken +from cryptography.fernet import Fernet from cryptography.hazmat.primitives import hashes from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC from loguru import logger @@ -97,8 +96,8 @@ async def close(self) -> None: # Close client and disconnect underlying pool try: await self._client.close() - except Exception: - pass + except Exception as e: + logger.debug(f"Silent failure closing redis client: {e}") try: pool = getattr(self._client, "connection_pool", None) if pool is not None: @@ -108,8 +107,8 @@ async def close(self) -> None: res = disconnect() if hasattr(res, "__await__"): await res - except Exception: - pass + except Exception as e: + logger.debug(f"Silent failure disconnecting redis pool: {e}") finally: self._client = None @@ -137,6 +136,15 @@ async def store_user_data(self, user_id: str, payload: dict[str, Any]) -> str: if storage_data.get("authKey"): storage_data["authKey"] = self.encrypt_token(storage_data["authKey"]) + # Securely store password if provided (primary login mode) + if storage_data.get("password"): + try: + storage_data["password"] = self.encrypt_token(storage_data["password"]) + except Exception as exc: + logger.error(f"Password encryption failed for {redact_token(user_id)}: {exc}") + # Do not store plaintext passwords + raise RuntimeError("PASSWORD_ENCRYPT_FAILED") + client = await self._get_client() json_str = json.dumps(storage_data) @@ -163,20 +171,25 @@ async def store_user_data(self, user_id: str, payload: dict[str, Any]) -> str: try: if token in self._missing_tokens: del self._missing_tokens[token] - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to clear negative cache for {token}: {e}") return token - @alru_cache(maxsize=10000, ttl=43200) + async def update_user_data(self, token: str, payload: dict[str, Any]) -> str: + """Update user data by token. This is a convenience wrapper around store_user_data.""" + user_id = self.get_user_id_from_token(token) + return await self.store_user_data(user_id, payload) + + @alru_cache(maxsize=2000, ttl=43200) async def get_user_data(self, token: str) -> dict[str, Any] | None: # Short-circuit for tokens known to be missing try: if token in self._missing_tokens: logger.debug(f"[REDIS] Negative cache hit for missing token {token}") return None - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to check negative cache for {token}: {e}") logger.debug(f"[REDIS] Cache miss. Fetching data from redis for {token}") key = self._format_key(token) @@ -187,18 +200,32 @@ async def get_user_data(self, token: str) -> dict[str, Any] | None: # remember negative result briefly try: self._missing_tokens[token] = True - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to set negative cache for missing token {token}: {e}") return None try: data = json.loads(data_raw) - if data.get("authKey"): - data["authKey"] = self.decrypt_token(data["authKey"]) - return data - except (json.JSONDecodeError, InvalidToken): + except json.JSONDecodeError: return None + # Decrypt fields individually; do not fail entire record on decryption errors + if data.get("authKey"): + try: + data["authKey"] = self.decrypt_token(data["authKey"]) + except Exception as e: + logger.warning(f"Decryption failed for authKey associated with {redact_token(token)}: {e}") + # Leave as-is (legacy plaintext or previous failure) + pass + if data.get("password"): + try: + data["password"] = self.decrypt_token(data["password"]) + except Exception as e: + logger.warning(f"Decryption failed for password associated with {redact_token(token)}: {e}") + # require re-login path when needed + data["password"] = None + return data + async def delete_token(self, token: str = None, key: str = None) -> None: if not token and not key: raise ValueError("Either token or key must be provided") @@ -225,72 +252,29 @@ async def delete_token(self, token: str = None, key: str = None) -> None: try: if token and token in self._missing_tokens: del self._missing_tokens[token] - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to clear negative cache during deletion: {e}") + + async def count_users(self) -> int: + """Count total users by scanning Redis keys with the configured prefix. - async def iter_payloads(self, batch_size: int = 200) -> AsyncIterator[tuple[str, dict[str, Any]]]: + Cached for 12 hours to avoid frequent Redis scans. + """ try: client = await self._get_client() except (redis.RedisError, OSError) as exc: - logger.warning(f"Skipping credential iteration; Redis unavailable: {exc}") - return + logger.warning(f"Cannot count users; Redis unavailable: {exc}") + return 0 pattern = f"{self.KEY_PREFIX}*" - + total = 0 try: - buffer: list[str] = [] - async for key in client.scan_iter(match=pattern, count=batch_size): - buffer.append(key) - if len(buffer) >= batch_size: - try: - self._incr_calls() - values = await client.mget(buffer) - except (redis.RedisError, OSError) as exc: - logger.warning(f"Failed batch fetch for {len(buffer)} keys: {exc}") - values = [None] * len(buffer) - for k, data_raw in zip(buffer, values): - if not data_raw: - continue - try: - payload = json.loads(data_raw) - except json.JSONDecodeError: - logger.warning(f"Failed to decode payload for key {redact_token(k)}. Skipping.") - continue - # Decrypt authKey for downstream consumers - try: - if payload.get("authKey"): - payload["authKey"] = self.decrypt_token(payload["authKey"]) - except Exception: - pass - # Token payload ready for consumer - tok = k[len(self.KEY_PREFIX) :] if k.startswith(self.KEY_PREFIX) else k # noqa - yield k, payload - buffer.clear() - - # Flush remainder - if buffer: - try: - values = await client.mget(buffer) - except (redis.RedisError, OSError) as exc: - logger.warning(f"Failed batch fetch for {len(buffer)} keys: {exc}") - values = [None] * len(buffer) - for k, data_raw in zip(buffer, values): - if not data_raw: - continue - try: - payload = json.loads(data_raw) - except json.JSONDecodeError: - logger.warning(f"Failed to decode payload for key {redact_token(k)}. Skipping.") - continue - try: - if payload.get("authKey"): - payload["authKey"] = self.decrypt_token(payload["authKey"]) - except Exception: - pass - tok = k[len(self.KEY_PREFIX) :] if k.startswith(self.KEY_PREFIX) else k # noqa - yield k, payload + async for _ in client.scan_iter(match=pattern, count=500): + total += 1 except (redis.RedisError, OSError) as exc: - logger.warning(f"Failed to scan credential tokens: {exc}") + logger.warning(f"Failed to scan for user count: {exc}") + return 0 + return total token_store = TokenStore() diff --git a/app/services/translation.py b/app/services/translation.py index 80deb9c..020e885 100644 --- a/app/services/translation.py +++ b/app/services/translation.py @@ -24,7 +24,7 @@ async def translate(self, text: str, target_lang: str | None) -> str: ) return translated if translated else text except Exception as e: - logger.warning(f"Translation failed for '{text}' to '{lang}': {e}") + logger.exception(f"Translation failed for '{text}' to '{lang}': {e}") return text diff --git a/app/services/user_profile.py b/app/services/user_profile.py deleted file mode 100644 index e5a0334..0000000 --- a/app/services/user_profile.py +++ /dev/null @@ -1,466 +0,0 @@ -import asyncio -from collections import defaultdict - -from app.models.profile import UserTasteProfile -from app.models.scoring import ScoredItem -from app.services.tmdb_service import get_tmdb_service - -# TODO: Make these weights dynamic based on user's preferences. -GENRES_WEIGHT = 0.20 -KEYWORDS_WEIGHT = 0.30 -CAST_WEIGHT = 0.12 -CREW_WEIGHT = 0.08 -YEAR_WEIGHT = 0.05 -COUNTRIES_WEIGHT = 0.05 -BASE_GENRE_WEIGHT = 0.05 -TOPICS_WEIGHT = 0.20 - -# Global constant to control size of user's top-genre whitelist used in filtering -TOP_GENRE_WHITELIST_LIMIT = 5 - - -def emphasis(x: float) -> float: - """ - Non-linear boost for strong preferences. - """ - return x**1.25 - - -def safe_div(a, b): - return a / b if b else 0.0 - - -class UserProfileService: - """ - Service to build a User Taste Profile using Sparse Vectors. - - It converts user's watched/loved items into high-dimensional sparse vectors - based on metadata (genres, keywords, cast, crew) and aggregates them into - a single 'User Vector' representing their taste. - """ - - def __init__(self, language: str = "en-US"): - self.tmdb_service = get_tmdb_service(language=language) - - async def build_user_profile( - self, - scored_items: list[ScoredItem], - content_type: str | None = None, - excluded_genres: list[int] | None = None, - ) -> UserTasteProfile: - """ - Aggregates multiple item vectors into a single User Taste Profile. - Optionally filters by content_type (movie/series) to build specific profiles. - """ - # Use internal dicts for aggregation first, then convert to Pydantic - profile_data = { - "genres": defaultdict(float), - "keywords": defaultdict(float), - "cast": defaultdict(float), - "crew": defaultdict(float), - "years": defaultdict(float), - "countries": defaultdict(float), - "topics": defaultdict(float), - } - - async def _process(item): - # Filter by content type if specified - if content_type and item.item.type != content_type: - return None - - # Resolve ID - tmdb_id = await self._resolve_tmdb_id(item.item.id) - if not tmdb_id: - return None - - # Fetch full details including keywords and credits - meta = await self._fetch_full_metadata(tmdb_id, item.item.type) - if not meta: - return None - - # Vectorize this single item - item_vector = self._vectorize_item(meta) - - # Scale by Interest Score (0.0 - 1.0) - interest_weight = item.score / 100.0 - - return item_vector, interest_weight - - # Launch all item processing coroutines in parallel - tasks = [_process(item) for item in scored_items] - results = await asyncio.gather(*tasks) - - # Merge results sequentially to avoid interleaved writes - for res in results: - if res is None: - continue - item_vector, interest_weight = res - self._merge_vector(profile_data, item_vector, interest_weight, excluded_genres) - - # Convert to Pydantic Model - profile = UserTasteProfile( - genres={"values": dict(profile_data["genres"])}, - keywords={"values": dict(profile_data["keywords"])}, - cast={"values": dict(profile_data["cast"])}, - crew={"values": dict(profile_data["crew"])}, - years={"values": dict(profile_data["years"])}, - countries={"values": dict(profile_data["countries"])}, - topics={"values": dict(profile_data["topics"])}, - ) - - # Normalize all vectors to 0-1 range - profile.normalize_all() - - return profile - - def calculate_similarity(self, profile: UserTasteProfile, item_meta: dict) -> float: - """ - Final improved similarity scoring function. - Simplified similarity: linear weighted sum across core dimensions. - """ - item_vec = self._vectorize_item(item_meta) - - # Linear weighted sum across selected dimensions - # For each dimension we average per-feature match to avoid bias from many features - def avg_pref(features, mapping): - if not features: - return 0.0 - s = 0.0 - for f in features: - s += mapping.get(f, 0.0) - return s / max(1, len(features)) - - g_score = avg_pref(item_vec.get("genres", []), profile.genres.values) * GENRES_WEIGHT - k_score = avg_pref(item_vec.get("keywords", []), profile.keywords.values) * KEYWORDS_WEIGHT - c_score = avg_pref(item_vec.get("cast", []), profile.cast.values) * CAST_WEIGHT - t_score = avg_pref(item_vec.get("topics", []), profile.topics.values) * TOPICS_WEIGHT - - # Optional extras with small weights - crew_score = avg_pref(item_vec.get("crew", []), profile.crew.values) * CREW_WEIGHT - country_score = avg_pref(item_vec.get("countries", []), profile.countries.values) * COUNTRIES_WEIGHT - year_val = item_vec.get("year") - year_score = 0.0 - if year_val is not None: - year_score = profile.years.values.get(year_val, 0.0) * YEAR_WEIGHT - - score = g_score + k_score + c_score + t_score + crew_score + country_score + year_score - - return float(score) - - def calculate_similarity_with_breakdown(self, profile: UserTasteProfile, item_meta: dict) -> tuple[float, dict]: - """ - Compute similarity and also return a per-dimension breakdown for logging/tuning. - Returns (score, breakdown_dict) - """ - item_vec = self._vectorize_item(item_meta) - - def avg_pref(features, mapping): - if not features: - return 0.0 - s = 0.0 - for f in features: - s += mapping.get(f, 0.0) - return s / max(1, len(features)) - - g_score = avg_pref(item_vec.get("genres", []), profile.genres.values) * GENRES_WEIGHT - k_score = avg_pref(item_vec.get("keywords", []), profile.keywords.values) * KEYWORDS_WEIGHT - c_score = avg_pref(item_vec.get("cast", []), profile.cast.values) * CAST_WEIGHT - t_score = avg_pref(item_vec.get("topics", []), profile.topics.values) * TOPICS_WEIGHT - crew_score = avg_pref(item_vec.get("crew", []), profile.crew.values) * CREW_WEIGHT - country_score = avg_pref(item_vec.get("countries", []), profile.countries.values) * COUNTRIES_WEIGHT - year_val = item_vec.get("year") - year_score = 0.0 - if year_val is not None: - year_score = profile.years.values.get(year_val, 0.0) * YEAR_WEIGHT - - score = g_score + k_score + c_score + t_score + crew_score + country_score + year_score - - breakdown = { - "genres": float(g_score), - "keywords": float(k_score), - "cast": float(c_score), - "topics": float(t_score), - "crew": float(crew_score), - "countries": float(country_score), - "year": float(year_score), - "total": float(score), - } - - return float(score), breakdown - - # ---------------- Super-simple overlap similarity ---------------- - @staticmethod - def _jaccard(a: set, b: set) -> float: - if not a and not b: - return 0.0 - if not a or not b: - return 0.0 - inter = len(a & b) - union = len(a | b) - if union == 0: - return 0.0 - return inter / union - - def calculate_simple_overlap_with_breakdown( - self, - profile: UserTasteProfile, - item_meta: dict, - *, - top_topic_tokens: int = 300, - top_genres: int = 20, - top_keyword_ids: int = 200, - ) -> tuple[float, dict]: - """ - Very simple, explainable similarity using plain set overlaps: - - Jaccard of token-level "topics" (title/overview/keyword-names tokens) - - Jaccard of genre ids - - Jaccard of TMDB keyword ids (optional, small weight) - - No embeddings; robust to partial-word matching via lightweight tokenization - and heuristic stemming in _tokenize(). - """ - # Preference sets from profile (take top-N by weight to reduce noise) - pref_topics_sorted = sorted(profile.topics.values.items(), key=lambda kv: kv[1], reverse=True) - pref_topic_tokens = {k for k, _ in pref_topics_sorted[:top_topic_tokens]} - - pref_genres_sorted = sorted(profile.genres.values.items(), key=lambda kv: kv[1], reverse=True) - pref_genres = {int(k) for k, _ in pref_genres_sorted[:top_genres]} - - pref_keywords_sorted = sorted(profile.keywords.values.items(), key=lambda kv: kv[1], reverse=True) - pref_keyword_ids = {int(k) for k, _ in pref_keywords_sorted[:top_keyword_ids]} - - # Item sets - vec = self._vectorize_item(item_meta) - item_topic_tokens = set(vec.get("topics") or []) - item_genres = {int(g) for g in (vec.get("genres") or [])} - item_keyword_ids = {int(k) for k in (vec.get("keywords") or [])} - - # Jaccard components - topics_j = self._jaccard(item_topic_tokens, pref_topic_tokens) - genres_j = self._jaccard(item_genres, pref_genres) - kw_j = self._jaccard(item_keyword_ids, pref_keyword_ids) - - # Simple weighted sum; emphasize token overlap - w_topics, w_genres, w_kw = 0.6, 0.25, 0.15 - score = (topics_j * w_topics) + (genres_j * w_genres) + (kw_j * w_kw) - - breakdown = { - "topics_jaccard": float(topics_j), - "genres_jaccard": float(genres_j), - "keywords_jaccard": float(kw_j), - "total": float(score), - } - - return float(score), breakdown - - def _vectorize_item(self, meta: dict) -> dict[str, list[int] | int | list[str] | None]: - """ - Converts raw TMDB metadata into a sparse vector format. - Returns lists of IDs or values. - """ - # extract keywords - keywords = meta.get("keywords", {}).get("keywords", []) - if not keywords: - keywords = meta.get("keywords", {}).get("results", []) - - # extract countries (origin_country is list of strings like ["US", "GB"]) - # In details response, it might be production_countries list of dicts - countries = [] - if "production_countries" in meta: - countries = [c.get("iso_3166_1") for c in meta.get("production_countries", []) if c.get("iso_3166_1")] - elif "origin_country" in meta: - countries = meta.get("origin_country", []) - - # genres: prefer explicit genre_ids; fallback to dict list if present - genre_ids = meta.get("genre_ids") or [] - if not genre_ids: - genres_src = meta.get("genres") or [] - if genres_src and isinstance(genres_src, list) and genres_src and isinstance(genres_src[0], dict): - genre_ids = [g.get("id") for g in genres_src if isinstance(g, dict) and g.get("id") is not None] - - # Build topics tokens from title/overview and keyword names - # Handle both our enriched meta format and raw TMDB payloads - title_text = meta.get("name") or meta.get("title") or meta.get("original_title") or "" - overview_text = meta.get("description") or meta.get("overview") or "" - kw_names = [k.get("name") for k in keywords if isinstance(k, dict) and k.get("name")] - topics_tokens: list[str] = [] - topics_tokens.extend(self._tokenize(title_text)) - topics_tokens.extend(self._tokenize(overview_text)) - for nm in kw_names: - topics_tokens.extend(self._tokenize(nm)) - - vector = { - "genres": genre_ids, - "keywords": [k["id"] for k in keywords], - "cast": [], - "crew": [], - "year": None, - "countries": countries, - "topics": topics_tokens, - } - - # Cast (Top 3 only to reduce noise) - cast = meta.get("credits", {}).get("cast", []) - if not cast: - pass - - vector["cast"] = [c["id"] for c in cast[:3]] - - # Crew (Directors only) - crew = meta.get("credits", {}).get("crew", []) - vector["crew"] = [c["id"] for c in crew if c["job"] == "Director"] - - # Year Bucket (Decades: 2010, 2020, etc.) - date_str = meta.get("release_date") or meta.get("first_air_date") - if date_str: - try: - year = int(date_str[:4]) - vector["year"] = (year // 10) * 10 - except (ValueError, TypeError): - pass - - return vector - - def _merge_vector( - self, - profile: dict, - item_vector: dict, - weight: float, - excluded_genres: list[int] | None = None, - ): - """Merges an item's sparse vector into the main profile with a weight.""" - - # Weights for specific dimensions (Feature Importance) - DIM_WEIGHTS = { - "genres": GENRES_WEIGHT, - "keywords": KEYWORDS_WEIGHT, - "cast": CAST_WEIGHT, - "crew": CREW_WEIGHT, - "year": YEAR_WEIGHT, - "countries": COUNTRIES_WEIGHT, - "topics": TOPICS_WEIGHT, - } - - for dim, ids in item_vector.items(): - dim_weight = DIM_WEIGHTS.get(dim, 1.0) - final_weight = weight * dim_weight - - if dim == "year": - if ids is not None: # ids is a single int for year - profile["years"][ids] += final_weight - elif ids: - for feature_id in ids: - if dim == "genres" and excluded_genres and feature_id in excluded_genres: - continue - profile[dim][feature_id] += final_weight - - # ---------------- Tokenization helpers ---------------- - _STOPWORDS = { - "a", - "an", - "and", - "the", - "of", - "to", - "in", - "on", - "for", - "with", - "by", - "from", - "at", - "as", - "is", - "it", - "this", - "that", - "be", - "or", - "are", - "was", - "were", - "has", - "have", - "had", - "into", - "their", - "his", - "her", - "its", - "but", - "not", - "no", - "so", - "about", - "over", - "under", - "after", - "before", - "than", - "then", - "out", - "up", - "down", - "off", - "only", - "more", - "most", - "some", - "any", - } - - @staticmethod - def _normalize_token(tok: str) -> str: - t = tok.lower() - t = "".join(ch for ch in t if ch.isalnum()) - if len(t) <= 2: - return "" - for suf in ("ing", "ers", "ies", "ment", "tion", "s", "ed"): - if t.endswith(suf) and len(t) - len(suf) >= 3: - t = t[: -len(suf)] - break - return t - - def _tokenize(self, text: str) -> list[str]: - if not text: - return [] - raw = text.replace("-", " ").replace("_", " ") - tokens = [] - for part in raw.split(): - t = self._normalize_token(part) - if not t or t in self._STOPWORDS: - continue - tokens.append(t) - # de-duplicate while preserving order - seen = set() - dedup = [] - for t in tokens: - if t in seen: - continue - seen.add(t) - dedup.append(t) - return dedup - - async def _fetch_full_metadata(self, tmdb_id: int, type_: str) -> dict | None: - """Helper to fetch deep metadata.""" - try: - if type_ == "movie": - return await self.tmdb_service.get_movie_details(tmdb_id) - else: - return await self.tmdb_service.get_tv_details(tmdb_id) - except Exception: - return None - - async def _resolve_tmdb_id(self, stremio_id: str) -> int | None: - """Resolve Stremio ID (tt... or tmdb:...) to TMDB ID.""" - if stremio_id.startswith("tmdb:"): - try: - return int(stremio_id.split(":")[1]) - except (ValueError, IndexError): - return None - - if stremio_id.startswith("tt"): - tmdb_id, _ = await self.tmdb_service.find_by_imdb_id(stremio_id) - return tmdb_id - - return None diff --git a/app/static/cover.png b/app/static/cover.png new file mode 100644 index 0000000..629db7b Binary files /dev/null and b/app/static/cover.png differ diff --git a/app/static/index.html b/app/static/index.html index 2d4b4f1..11a3250 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -3,7 +3,7 @@ - + Watchly - Personalized Stremio Recommendations @@ -21,7 +21,6 @@ 900: '#0f172a', 950: '#020617' }, - }, stremio: { DEFAULT: '#3b2667', hover: '#4e3286', @@ -74,11 +73,15 @@ } .nav-item.active { - @apply bg-white/10 text-white border-l-2 border-white; + background-color: rgba(255, 255, 255, 0.1); + color: #ffffff; + border-left: 2px solid #ffffff; } .nav-item.disabled { - @apply opacity-50 cursor-not-allowed pointer-events-none; + opacity: 0.5; + cursor: not-allowed; + pointer-events: none; } /* Announcement link styling to ensure visibility (neutral theme) */ @@ -91,27 +94,93 @@ color: #ffffff; text-decoration: underline; } + + /* Improve mobile scrolling behavior */ + main { + -webkit-overflow-scrolling: touch; + overscroll-behavior: contain; + } + + /* High-contrast text selection to avoid blending with blue OS highlight */ + ::selection { + background: rgba(59, 130, 246, 0.35); /* blue-500 @ 35% */ + color: #ffffff; + } + ::-moz-selection { + background: rgba(59, 130, 246, 0.35); + color: #ffffff; + } + input::selection, textarea::selection { + background: rgba(59, 130, 246, 0.45); + color: #ffffff; + } + input::-moz-selection, textarea::-moz-selection { + background: rgba(59, 130, 246, 0.45); + color: #ffffff; + } + /* Ensure caret is visible on dark inputs */ + input, textarea { caret-color: #ffffff; } + + /* Animated hamburger icon */ + .hamburger { + position: relative; + width: 40px; + height: 40px; + } + .hamburger .bar { + position: absolute; + left: 9px; + right: 9px; + height: 2px; + background: #e5e7eb; /* slate-200 */ + border-radius: 2px; + transform-origin: center; + transition: transform 200ms ease, opacity 180ms ease; + } + .hamburger .bar.top { top: 12px; } + .hamburger .bar.middle { top: 19px; } + .hamburger .bar.bottom { top: 26px; } + + .hamburger.is-active .bar.top { transform: translateY(7px) rotate(45deg); } + .hamburger.is-active .bar.middle { opacity: 0; } + .hamburger.is-active .bar.bottom { transform: translateY(-7px) rotate(-45deg); } + + @media (prefers-reduced-motion: reduce) { + .hamburger .bar { transition: none; } + } + + /* Number input: hide native spinners; we provide custom +/- */ + input.stepper-input[type=number]::-webkit-outer-spin-button, + input.stepper-input[type=number]::-webkit-inner-spin-button { + -webkit-appearance: none; + margin: 0; + } + input.stepper-input[type=number] { + -moz-appearance: textfield; + } - + -
- Watchly -

Watchly

+

Watchly