Skip to content
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("organizations:seer-explorer-streaming", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable context engine for Seer Explorer
manager.add("organizations:seer-explorer-context-engine", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable context engine experimental contexts
manager.add("organizations:context-engine-experiments", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enable frontend override for context engine (only for AI/ML/Reasoning platform team)
manager.add("organizations:seer-explorer-context-engine-allow-fe-override", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable frontend override UI component for context engine (only for AI/ML/Reasoning platform team)
Expand Down
1 change: 1 addition & 0 deletions src/sentry/seer/autofix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,7 @@ def get_autofix_repos_from_project_code_mappings(
"owner": repo_name_sections[0],
"name": "/".join(repo_name_sections[1:]),
"external_id": repo.external_id,
"languages": repo.languages or [],
}
repo_key = (repo_dict["provider"], repo_dict["owner"], repo_dict["name"])

Expand Down
29 changes: 29 additions & 0 deletions src/sentry/seer/signed_seer_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,35 @@ class LlmGenerateRequest(TypedDict):
response_schema: NotRequired[dict[str, Any]]


class RepoDetails(TypedDict):
project_ids: list[int]
provider: str
owner: str
name: str
external_id: str
languages: list[str]
integration_id: NotRequired[str | None]


class ExplorerIndexOrgRepoRequest(TypedDict):
org_id: int
repos: list[RepoDetails]


def make_org_repo_knowledge_index_request(
body: ExplorerIndexOrgRepoRequest,
timeout: int | float | None = None,
viewer_context: SeerViewerContext | None = None,
):
return make_signed_seer_api_request(
seer_autofix_default_connection_pool,
"/v1/automation/explorer/index/org-repo-knowledge",
body=orjson.dumps(body),
timeout=timeout,
viewer_context=viewer_context,
)
Comment thread
cursor[bot] marked this conversation as resolved.


def make_org_project_knowledge_index_request(
body: OrgProjectKnowledgeIndexRequest,
timeout: int | float | None = None,
Expand Down
89 changes: 89 additions & 0 deletions src/sentry/tasks/seer/context_engine_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.search.events.types import SnubaParams
from sentry.seer.autofix.utils import (
bulk_get_project_preferences,
get_autofix_repos_from_project_code_mappings,
)
from sentry.seer.explorer.context_engine_utils import (
EVENT_COUNT_LOOKBACK_DAYS,
ProjectEventCounts,
Expand All @@ -30,12 +34,15 @@
)
from sentry.seer.models import SeerApiError
from sentry.seer.signed_seer_api import (
ExplorerIndexOrgRepoRequest,
ExplorerIndexSentryKnowledgeRequest,
OrgProjectKnowledgeIndexRequest,
OrgProjectKnowledgeProjectData,
RepoDetails,
SeerViewerContext,
make_index_sentry_knowledge_request,
make_org_project_knowledge_index_request,
make_org_repo_knowledge_index_request,
)
from sentry.tasks.base import instrumented_task
from sentry.taskworker.namespaces import seer_tasks
Expand Down Expand Up @@ -213,6 +220,83 @@
raise


@instrumented_task(
name="sentry.tasks.seer.context_engine_index.index_repos",
namespace=seer_tasks,
processing_deadline_duration=10 * 60, # 10 minutes
)
def index_repos(organization_id: int, *args, **kwargs) -> None:
if not options.get("explorer.context_engine_indexing.enable"):
logger.info("explorer.context_engine_indexing.enable flag is disabled")
return

try:
organization = Organization.objects.get(id=organization_id)
except Organization.DoesNotExist:
logger.error("Organization not found", extra={"org_id": organization_id})
return

if not features.has("organizations:context-engine-experiments", organization):
logger.info("organizations:context-engine-experiments flag is disabled")
return

logger.info(
"Starting repo index task",
extra={"org_id": organization_id},
)

projects = list(
Project.objects.filter(organization_id=organization_id, status=ObjectStatus.ACTIVE)
)
project_map = {p.id: p for p in projects}

if not project_map:
logger.info("No projects found for organization", extra={"org_id": organization_id})
return

org_repo_definitions: dict[tuple[str, str, str], RepoDetails] = {}

preferences_by_id = bulk_get_project_preferences(organization_id, list(project_map.keys()))
Comment thread
sentry-warden[bot] marked this conversation as resolved.
Comment thread
sentry[bot] marked this conversation as resolved.

for project_id, project in project_map.items():
existing_pref = preferences_by_id.get(str(project_id))
project_pref_repos = existing_pref.get("repositories") or []

Check failure on line 263 in src/sentry/tasks/seer/context_engine_index.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

AttributeError when project has no Seer preferences

On line 262-263, `preferences_by_id.get(str(project_id))` returns `None` when no preference exists for a project, then `existing_pref.get("repositories")` raises `AttributeError: 'NoneType' object has no attribute 'get'`. This will crash the task for any organization with projects that have not yet set Seer preferences — a common case since this is a new experimental feature.
Comment thread
sentry[bot] marked this conversation as resolved.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NoneType AttributeError when project has no preferences

High Severity

preferences_by_id.get(str(project_id)) returns None when a project has no Seer preferences, then existing_pref.get("repositories") raises AttributeError: 'NoneType' object has no attribute 'get'. The bulk_get_project_preferences function returns a sparse dict — only projects with configured preferences appear as keys. Using .get(str(project_id), {}) as the default would prevent the crash.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 3243fdb. Configure here.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should skip projects that don't have preferences setup. If a project does not have preferences then customers basically can't use Seer for that project.

autofix_repos = get_autofix_repos_from_project_code_mappings(project_map[project_id])

language_map: dict[tuple[str, str, str], list[str]] = {}
for autofix_repo in autofix_repos:
key = (autofix_repo["provider"], autofix_repo["owner"], autofix_repo["name"])
language_map[key] = autofix_repo["languages"]

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The index_repos task will crash with a KeyError if a repository configured via SEER_AUTOFIX_FORCE_USE_REPOS is missing the languages key.
Severity: MEDIUM

Suggested Fix

Use the .get() method with a default value when accessing the languages key to prevent a KeyError. Change autofix_repo["languages"] to autofix_repo.get("languages", []). This will provide a safe fallback to an empty list if the key is not present in the repository configuration.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/tasks/seer/context_engine_index.py#L271

Potential issue: When the `SEER_AUTOFIX_FORCE_USE_REPOS` setting is used, for example in
testing or staging environments, the `index_repos` task can fail. The code iterates
through the configured repositories and directly accesses the `languages` key from each
repository dictionary. However, unlike the standard code path, the logic for this
setting does not ensure the `languages` key is present. If a repository is configured
without this key, the task will raise a `KeyError` and fail, as this exception is not
configured for retries. This will halt the repository indexing process in environments
that use this override.


repos = project_pref_repos if project_pref_repos else autofix_repos
for repo in repos:
key = (repo["provider"], repo["owner"], repo["name"])
if key in org_repo_definitions:
repo_definition = org_repo_definitions[key]
repo_definition["project_ids"].append(project_id)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Repo languages lost during cross-project deduplication

Low Severity

When a repo already exists in org_repo_definitions, only project_ids is appended — the languages field is never backfilled. If the first project to register a repo uses seer preferences (where the repo isn't in that project's autofix code mappings), languages is set to [] via language_map.get(key, []). When a later project encounters the same repo from its autofix repos (which do have language data), the existing entry's empty languages is never updated, permanently losing that information.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit e1ab127. Configure here.

else:
org_repo_definitions[key] = {
"project_ids": [project_id],
"provider": repo["provider"],
"owner": repo["owner"],
"name": repo["name"],
"external_id": repo["external_id"],

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The code unsafely accesses keys on a raw dictionary from an API response, which will raise a KeyError if the response is malformed or missing expected keys.
Severity: HIGH

Suggested Fix

Use the safe .get() method when accessing keys from the repo dictionary to prevent KeyError exceptions. For a more robust solution, validate the raw API response with a Pydantic model before processing the data to ensure the data structure is correct.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/tasks/seer/context_engine_index.py#L285

Potential issue: The function `index_repos` processes repository data fetched from the
Seer API via `bulk_get_project_preferences()`. The code directly accesses dictionary
keys like `repo["external_id"]`, `repo["provider"]`, `repo["owner"]`, and `repo["name"]`
without using safe access methods like `.get()`. The API response is not validated
against a schema. If the Seer API returns a malformed response object that is missing
one of these required keys, the operation will fail with a `KeyError`. This will cause
the `index_repos` background task to crash, preventing repository indexing for the
affected organization.

"languages": language_map.get(key, []),
"integration_id": repo["integration_id"],
Comment thread
sentry[bot] marked this conversation as resolved.
Outdated
}

response = make_org_repo_knowledge_index_request(
ExplorerIndexOrgRepoRequest(
org_id=organization.id, repos=list(org_repo_definitions.values())
)
)
Comment thread
cursor[bot] marked this conversation as resolved.

if response.status >= 400:
raise SeerApiError("Seer request failed", response.status)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing early return when no repos are collected

Low Severity

The index_repos function makes a Seer API call even when org_repo_definitions is empty (e.g., when all projects lack preferences or have empty/None repository lists). Other similar tasks like build_service_map and index_org_project_knowledge include early returns for analogous "no data" scenarios (no nodes, no high-volume projects). Adding an early return when org_repo_definitions is empty would avoid unnecessary API calls, which can add up since this runs across many orgs.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit cf5c5a4. Configure here.


logger.info("Successfully indexed repos for org", extra={"org_id": organization_id})


def get_allowed_org_ids_context_engine_indexing() -> list[int]:
"""
Get the list of allowed organizations for context engine indexing.
Expand Down Expand Up @@ -283,12 +367,17 @@
return

allowed_org_ids = get_allowed_org_ids_context_engine_indexing()
now = datetime.now(UTC)

dispatched = 0
for org_id in allowed_org_ids:
try:
index_org_project_knowledge.apply_async(args=[org_id])
build_service_map.apply_async(args=[org_id])

if now.weekday() == 6: # Sunday
index_repos.apply_async(args=[org_id])

dispatched += 1
except Exception:
logger.exception(
Expand Down
1 change: 1 addition & 0 deletions tests/sentry/autofix/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def test_get_repos_from_project_code_mappings_with_data(self) -> None:
"owner": "getsentry",
"name": "sentry",
"external_id": "123",
"languages": [],
}
]
assert repos == expected_repos
Expand Down
121 changes: 121 additions & 0 deletions tests/sentry/tasks/seer/test_context_engine_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry.tasks.seer.context_engine_index import (
get_allowed_org_ids_context_engine_indexing,
index_org_project_knowledge,
index_repos,
schedule_context_engine_indexing_tasks,
)
from sentry.testutils.cases import TestCase
Expand Down Expand Up @@ -207,6 +208,126 @@ def feature_enabled_for_all(_flag_name: str, org, *args, **kwargs) -> bool:
assert org_without_github.id not in eligible


@django_db_all
class TestIndexRepos(TestCase):
def setUp(self) -> None:
super().setUp()
self.org = self.create_organization()
self.integration, self.org_integration = self.create_provider_integration_for(
organization=self.org,
user=None,
provider="github",
external_id=f"github:{self.org.id}",
)
self.project1 = self.create_project(organization=self.org)
self.project2 = self.create_project(organization=self.org)

self.repo1 = self.create_repo(
project=self.project1,
name="getsentry/sentry",
provider="integrations:github",
external_id="123",
integration_id=self.integration.id,
)
self.repo1.languages = ["python", "javascript"]
self.repo1.save()

self.repo2 = self.create_repo(
project=self.project2,
name="getsentry/relay",
provider="integrations:github",
external_id="456",
integration_id=self.integration.id,
)
self.repo2.languages = ["rust"]
self.repo2.save()

self.create_code_mapping(
project=self.project1,
repo=self.repo1,
organization_integration=self.org_integration,
)
self.create_code_mapping(
project=self.project2,
repo=self.repo2,
organization_integration=self.org_integration,
)

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_returns_early_when_option_disabled(self, mock_request) -> None:
with override_options({"explorer.context_engine_indexing.enable": False}):
index_repos(self.org.id)
mock_request.assert_not_called()

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_returns_early_when_feature_flag_disabled(self, mock_request) -> None:
with override_options({"explorer.context_engine_indexing.enable": True}):
index_repos(self.org.id)
mock_request.assert_not_called()

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_returns_early_when_no_projects(self, mock_request) -> None:
org_without_projects = self.create_organization()
with override_options({"explorer.context_engine_indexing.enable": True}):
with self.feature({"organizations:context-engine-experiments": True}):
index_repos(org_without_projects.id)
mock_request.assert_not_called()

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_calls_seer_with_correct_org_and_repos(self, mock_request) -> None:
mock_request.return_value.status = 200
with override_options({"explorer.context_engine_indexing.enable": True}):
with self.feature({"organizations:context-engine-experiments": True}):
index_repos(self.org.id)

mock_request.assert_called_once()
body = mock_request.call_args[0][0]
assert body["org_id"] == self.org.id
repos = body["repos"]
assert len(repos) == 2

repos_by_name = {r["name"]: r for r in repos}
sentry_repo = repos_by_name["sentry"]
assert sentry_repo["provider"] == "integrations:github"
assert sentry_repo["owner"] == "getsentry"
assert sentry_repo["external_id"] == "123"
assert sentry_repo["languages"] == ["python", "javascript"]
assert sentry_repo["project_ids"] == [self.project1.id]
assert sentry_repo["integration_id"] == str(self.integration.id)

relay_repo = repos_by_name["relay"]
assert relay_repo["provider"] == "integrations:github"
assert relay_repo["owner"] == "getsentry"
assert relay_repo["external_id"] == "456"
assert relay_repo["languages"] == ["rust"]
assert relay_repo["project_ids"] == [self.project2.id]
assert relay_repo["integration_id"] == str(self.integration.id)

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_deduplicates_repos_across_projects(self, mock_request) -> None:
mock_request.return_value.status = 200
# Map project2 to the same repo as project1
self.create_code_mapping(
project=self.project2,
repo=self.repo1,
organization_integration=self.org_integration,
stack_root="src/",
source_root="src/",
)

with override_options({"explorer.context_engine_indexing.enable": True}):
with self.feature({"organizations:context-engine-experiments": True}):
index_repos(self.org.id)

mock_request.assert_called_once()
body = mock_request.call_args[0][0]
repos = body["repos"]
repos_by_name = {r["name"]: r for r in repos}

sentry_repo = repos_by_name["sentry"]
assert sorted(sentry_repo["project_ids"]) == sorted([self.project1.id, self.project2.id])


Comment thread
cursor[bot] marked this conversation as resolved.
@django_db_all
class TestScheduleContextEngineIndexingTasks(TestCase):
@mock.patch("sentry.tasks.seer.context_engine_index.build_service_map.apply_async")
Expand Down
Loading