Skip to content
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("organizations:seer-explorer-streaming", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable context engine for Seer Explorer
manager.add("organizations:seer-explorer-context-engine", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable context engine experimental contexts
manager.add("organizations:context-engine-experiments", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enable frontend override for context engine (only for AI/ML/Reasoning platform team)
manager.add("organizations:seer-explorer-context-engine-allow-fe-override", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable frontend override UI component for context engine (only for AI/ML/Reasoning platform team)
Expand Down
1 change: 1 addition & 0 deletions src/sentry/seer/autofix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,7 @@ def get_autofix_repos_from_project_code_mappings(
"owner": repo_name_sections[0],
"name": "/".join(repo_name_sections[1:]),
"external_id": repo.external_id,
"languages": repo.languages or [],
}
repo_key = (repo_dict["provider"], repo_dict["owner"], repo_dict["name"])

Expand Down
29 changes: 29 additions & 0 deletions src/sentry/seer/signed_seer_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,35 @@ class LlmGenerateRequest(TypedDict):
response_schema: NotRequired[dict[str, Any]]


class RepoDetails(TypedDict):
project_ids: list[int]
provider: str
owner: str
name: str
external_id: str
languages: list[str]
integration_id: NotRequired[str | None]


class ExplorerIndexOrgRepoRequest(TypedDict):
org_id: int
repos: list[RepoDetails]


def make_org_repo_knowledge_index_request(
body: ExplorerIndexOrgRepoRequest,
timeout: int | float | None = None,
viewer_context: SeerViewerContext | None = None,
):
return make_signed_seer_api_request(
seer_autofix_default_connection_pool,
"/v1/automation/explorer/index/org-repo-knowledge",
body=orjson.dumps(body),
timeout=timeout,
viewer_context=viewer_context,
)
Comment thread
cursor[bot] marked this conversation as resolved.


def make_org_project_knowledge_index_request(
body: OrgProjectKnowledgeIndexRequest,
timeout: int | float | None = None,
Expand Down
74 changes: 74 additions & 0 deletions src/sentry/tasks/seer/context_engine_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.search.events.types import SnubaParams
from sentry.seer.autofix.utils import get_autofix_repos_from_project_code_mappings
from sentry.seer.explorer.context_engine_utils import (
EVENT_COUNT_LOOKBACK_DAYS,
ProjectEventCounts,
Expand All @@ -30,12 +31,15 @@
)
from sentry.seer.models import SeerApiError
from sentry.seer.signed_seer_api import (
ExplorerIndexOrgRepoRequest,
ExplorerIndexSentryKnowledgeRequest,
OrgProjectKnowledgeIndexRequest,
OrgProjectKnowledgeProjectData,
RepoDetails,
SeerViewerContext,
make_index_sentry_knowledge_request,
make_org_project_knowledge_index_request,
make_org_repo_knowledge_index_request,
)
from sentry.tasks.base import instrumented_task
from sentry.taskworker.namespaces import seer_tasks
Expand Down Expand Up @@ -213,6 +217,71 @@ def build_service_map(organization_id: int, *args, **kwargs) -> None:
raise


@instrumented_task(
name="sentry.tasks.seer.context_engine_index.index_repos",
namespace=seer_tasks,
processing_deadline_duration=10 * 60, # 10 minutes
)
def index_repos(organization_id: int, *args, **kwargs) -> None:
if not options.get("explorer.context_engine_indexing.enable"):
logger.info("explorer.context_engine_indexing.enable flag is disabled")
return

try:
organization = Organization.objects.get(id=organization_id)
except Organization.DoesNotExist:
logger.error("Organization not found", extra={"org_id": organization_id})
return

if not features.has("organizations:context-engine-experiments", organization):
logger.info("organizations:context-engine-experiments flag is disabled")
return

logger.info(
"Starting repo index task",
extra={"org_id": organization_id},
)

projects = list(
Project.objects.filter(organization_id=organization_id, status=ObjectStatus.ACTIVE)
)

if not projects:
logger.info("No projects found for organization", extra={"org_id": organization_id})
return

org_repo_definitions: dict[tuple[str, str, str], RepoDetails] = {}

for project in projects:
repos = get_autofix_repos_from_project_code_mappings(project)
Comment thread
Mihir-Mavalankar marked this conversation as resolved.
Outdated
for repo in repos:
key = (repo["provider"], repo["owner"], repo["name"])
if key in org_repo_definitions:
repo_definition = org_repo_definitions[key]
repo_definition["project_ids"].append(project.id)
else:
org_repo_definitions[key] = {
"project_ids": [project.id],
"provider": repo["provider"],
"owner": repo["owner"],
"name": repo["name"],
"external_id": repo["external_id"],

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The code unsafely accesses keys on a raw dictionary from an API response, which will raise a KeyError if the response is malformed or missing expected keys.
Severity: HIGH

Suggested Fix

Use the safe .get() method when accessing keys from the repo dictionary to prevent KeyError exceptions. For a more robust solution, validate the raw API response with a Pydantic model before processing the data to ensure the data structure is correct.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/tasks/seer/context_engine_index.py#L285

Potential issue: The function `index_repos` processes repository data fetched from the
Seer API via `bulk_get_project_preferences()`. The code directly accesses dictionary
keys like `repo["external_id"]`, `repo["provider"]`, `repo["owner"]`, and `repo["name"]`
without using safe access methods like `.get()`. The API response is not validated
against a schema. If the Seer API returns a malformed response object that is missing
one of these required keys, the operation will fail with a `KeyError`. This will cause
the `index_repos` background task to crash, preventing repository indexing for the
affected organization.

"languages": repo["languages"],
Comment thread
sentry[bot] marked this conversation as resolved.
Outdated
"integration_id": repo["integration_id"],
Comment thread
sentry[bot] marked this conversation as resolved.
Outdated
}

response = make_org_repo_knowledge_index_request(
ExplorerIndexOrgRepoRequest(
org_id=organization.id, repos=list(org_repo_definitions.values())
)
)
Comment thread
cursor[bot] marked this conversation as resolved.

if response.status >= 400:
raise SeerApiError("Seer request failed", response.status)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing early return when no repos are collected

Low Severity

The index_repos function makes a Seer API call even when org_repo_definitions is empty (e.g., when all projects lack preferences or have empty/None repository lists). Other similar tasks like build_service_map and index_org_project_knowledge include early returns for analogous "no data" scenarios (no nodes, no high-volume projects). Adding an early return when org_repo_definitions is empty would avoid unnecessary API calls, which can add up since this runs across many orgs.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit cf5c5a4. Configure here.


logger.info("Successfully indexed repos for org", extra={"org_id": organization_id})


def get_allowed_org_ids_context_engine_indexing() -> list[int]:
"""
Get the list of allowed organizations for context engine indexing.
Expand Down Expand Up @@ -283,12 +352,17 @@ def schedule_context_engine_indexing_tasks() -> None:
return

allowed_org_ids = get_allowed_org_ids_context_engine_indexing()
now = datetime.now(UTC)

dispatched = 0
for org_id in allowed_org_ids:
try:
index_org_project_knowledge.apply_async(args=[org_id])
build_service_map.apply_async(args=[org_id])

if now.weekday() == 6: # Sunday
index_repos.apply_async(args=[org_id])

dispatched += 1
except Exception:
logger.exception(
Expand Down
1 change: 1 addition & 0 deletions tests/sentry/autofix/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def test_get_repos_from_project_code_mappings_with_data(self) -> None:
"owner": "getsentry",
"name": "sentry",
"external_id": "123",
"languages": [],
}
]
assert repos == expected_repos
Expand Down
121 changes: 121 additions & 0 deletions tests/sentry/tasks/seer/test_context_engine_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry.tasks.seer.context_engine_index import (
get_allowed_org_ids_context_engine_indexing,
index_org_project_knowledge,
index_repos,
schedule_context_engine_indexing_tasks,
)
from sentry.testutils.cases import TestCase
Expand Down Expand Up @@ -207,6 +208,126 @@ def feature_enabled_for_all(_flag_name: str, org, *args, **kwargs) -> bool:
assert org_without_github.id not in eligible


@django_db_all
class TestIndexRepos(TestCase):
def setUp(self) -> None:
super().setUp()
self.org = self.create_organization()
self.integration, self.org_integration = self.create_provider_integration_for(
organization=self.org,
user=None,
provider="github",
external_id=f"github:{self.org.id}",
)
self.project1 = self.create_project(organization=self.org)
self.project2 = self.create_project(organization=self.org)

self.repo1 = self.create_repo(
project=self.project1,
name="getsentry/sentry",
provider="integrations:github",
external_id="123",
integration_id=self.integration.id,
)
self.repo1.languages = ["python", "javascript"]
self.repo1.save()

self.repo2 = self.create_repo(
project=self.project2,
name="getsentry/relay",
provider="integrations:github",
external_id="456",
integration_id=self.integration.id,
)
self.repo2.languages = ["rust"]
self.repo2.save()

self.create_code_mapping(
project=self.project1,
repo=self.repo1,
organization_integration=self.org_integration,
)
self.create_code_mapping(
project=self.project2,
repo=self.repo2,
organization_integration=self.org_integration,
)

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_returns_early_when_option_disabled(self, mock_request) -> None:
with override_options({"explorer.context_engine_indexing.enable": False}):
index_repos(self.org.id)
mock_request.assert_not_called()

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_returns_early_when_feature_flag_disabled(self, mock_request) -> None:
with override_options({"explorer.context_engine_indexing.enable": True}):
index_repos(self.org.id)
mock_request.assert_not_called()

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_returns_early_when_no_projects(self, mock_request) -> None:
org_without_projects = self.create_organization()
with override_options({"explorer.context_engine_indexing.enable": True}):
with self.feature({"organizations:context-engine-experiments": True}):
index_repos(org_without_projects.id)
mock_request.assert_not_called()

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_calls_seer_with_correct_org_and_repos(self, mock_request) -> None:
mock_request.return_value.status = 200
with override_options({"explorer.context_engine_indexing.enable": True}):
with self.feature({"organizations:context-engine-experiments": True}):
index_repos(self.org.id)

mock_request.assert_called_once()
body = mock_request.call_args[0][0]
assert body["org_id"] == self.org.id
repos = body["repos"]
assert len(repos) == 2

repos_by_name = {r["name"]: r for r in repos}
sentry_repo = repos_by_name["sentry"]
assert sentry_repo["provider"] == "integrations:github"
assert sentry_repo["owner"] == "getsentry"
assert sentry_repo["external_id"] == "123"
assert sentry_repo["languages"] == ["python", "javascript"]
assert sentry_repo["project_ids"] == [self.project1.id]
assert sentry_repo["integration_id"] == str(self.integration.id)

relay_repo = repos_by_name["relay"]
assert relay_repo["provider"] == "integrations:github"
assert relay_repo["owner"] == "getsentry"
assert relay_repo["external_id"] == "456"
assert relay_repo["languages"] == ["rust"]
assert relay_repo["project_ids"] == [self.project2.id]
assert relay_repo["integration_id"] == str(self.integration.id)

@mock.patch("sentry.tasks.seer.context_engine_index.make_org_repo_knowledge_index_request")
def test_deduplicates_repos_across_projects(self, mock_request) -> None:
mock_request.return_value.status = 200
# Map project2 to the same repo as project1
self.create_code_mapping(
project=self.project2,
repo=self.repo1,
organization_integration=self.org_integration,
stack_root="src/",
source_root="src/",
)

with override_options({"explorer.context_engine_indexing.enable": True}):
with self.feature({"organizations:context-engine-experiments": True}):
index_repos(self.org.id)

mock_request.assert_called_once()
body = mock_request.call_args[0][0]
repos = body["repos"]
repos_by_name = {r["name"]: r for r in repos}

sentry_repo = repos_by_name["sentry"]
assert sorted(sentry_repo["project_ids"]) == sorted([self.project1.id, self.project2.id])


Comment thread
cursor[bot] marked this conversation as resolved.
@django_db_all
class TestScheduleContextEngineIndexingTasks(TestCase):
@mock.patch("sentry.tasks.seer.context_engine_index.build_service_map.apply_async")
Expand Down
Loading