-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
feat: Add repo indexing job #112136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add repo indexing job #112136
Changes from 6 commits
ebc8938
c6b7523
8ece1bf
b7850b5
a5ed0a4
1d53649
3243fdb
4abf5aa
f8a59de
58b0a70
e1ab127
f63942d
f3b33d2
cf5c5a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| from sentry.models.organization import Organization | ||
| from sentry.models.project import Project | ||
| from sentry.search.events.types import SnubaParams | ||
| from sentry.seer.autofix.utils import get_autofix_repos_from_project_code_mappings | ||
| from sentry.seer.explorer.context_engine_utils import ( | ||
| EVENT_COUNT_LOOKBACK_DAYS, | ||
| ProjectEventCounts, | ||
|
|
@@ -30,12 +31,15 @@ | |
| ) | ||
| from sentry.seer.models import SeerApiError | ||
| from sentry.seer.signed_seer_api import ( | ||
| ExplorerIndexOrgRepoRequest, | ||
| ExplorerIndexSentryKnowledgeRequest, | ||
| OrgProjectKnowledgeIndexRequest, | ||
| OrgProjectKnowledgeProjectData, | ||
| RepoDetails, | ||
| SeerViewerContext, | ||
| make_index_sentry_knowledge_request, | ||
| make_org_project_knowledge_index_request, | ||
| make_org_repo_knowledge_index_request, | ||
| ) | ||
| from sentry.tasks.base import instrumented_task | ||
| from sentry.taskworker.namespaces import seer_tasks | ||
|
|
@@ -213,6 +217,71 @@ def build_service_map(organization_id: int, *args, **kwargs) -> None: | |
| raise | ||
|
|
||
|
|
||
| @instrumented_task( | ||
| name="sentry.tasks.seer.context_engine_index.index_repos", | ||
| namespace=seer_tasks, | ||
| processing_deadline_duration=10 * 60, # 10 minutes | ||
| ) | ||
| def index_repos(organization_id: int, *args, **kwargs) -> None: | ||
| if not options.get("explorer.context_engine_indexing.enable"): | ||
| logger.info("explorer.context_engine_indexing.enable flag is disabled") | ||
| return | ||
|
|
||
| try: | ||
| organization = Organization.objects.get(id=organization_id) | ||
| except Organization.DoesNotExist: | ||
| logger.error("Organization not found", extra={"org_id": organization_id}) | ||
| return | ||
|
|
||
| if not features.has("organizations:context-engine-experiments", organization): | ||
| logger.info("organizations:context-engine-experiments flag is disabled") | ||
| return | ||
|
|
||
| logger.info( | ||
| "Starting repo index task", | ||
| extra={"org_id": organization_id}, | ||
| ) | ||
|
|
||
| projects = list( | ||
| Project.objects.filter(organization_id=organization_id, status=ObjectStatus.ACTIVE) | ||
| ) | ||
|
|
||
| if not projects: | ||
| logger.info("No projects found for organization", extra={"org_id": organization_id}) | ||
| return | ||
|
|
||
| org_repo_definitions: dict[tuple[str, str, str], RepoDetails] = {} | ||
|
|
||
| for project in projects: | ||
| repos = get_autofix_repos_from_project_code_mappings(project) | ||
|
Mihir-Mavalankar marked this conversation as resolved.
Outdated
|
||
| for repo in repos: | ||
| key = (repo["provider"], repo["owner"], repo["name"]) | ||
| if key in org_repo_definitions: | ||
| repo_definition = org_repo_definitions[key] | ||
| repo_definition["project_ids"].append(project.id) | ||
| else: | ||
| org_repo_definitions[key] = { | ||
| "project_ids": [project.id], | ||
| "provider": repo["provider"], | ||
| "owner": repo["owner"], | ||
| "name": repo["name"], | ||
| "external_id": repo["external_id"], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: The code unsafely accesses keys on a raw dictionary from an API response, which will raise a Suggested FixUse the safe Prompt for AI Agent |
||
| "languages": repo["languages"], | ||
|
sentry[bot] marked this conversation as resolved.
Outdated
|
||
| "integration_id": repo["integration_id"], | ||
|
sentry[bot] marked this conversation as resolved.
Outdated
|
||
| } | ||
|
|
||
| response = make_org_repo_knowledge_index_request( | ||
| ExplorerIndexOrgRepoRequest( | ||
| org_id=organization.id, repos=list(org_repo_definitions.values()) | ||
| ) | ||
| ) | ||
|
cursor[bot] marked this conversation as resolved.
|
||
|
|
||
| if response.status >= 400: | ||
| raise SeerApiError("Seer request failed", response.status) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing early return when no repos are collectedLow Severity The Reviewed by Cursor Bugbot for commit cf5c5a4. Configure here. |
||
|
|
||
| logger.info("Successfully indexed repos for org", extra={"org_id": organization_id}) | ||
|
|
||
|
|
||
| def get_allowed_org_ids_context_engine_indexing() -> list[int]: | ||
| """ | ||
| Get the list of allowed organizations for context engine indexing. | ||
|
|
@@ -283,12 +352,17 @@ def schedule_context_engine_indexing_tasks() -> None: | |
| return | ||
|
|
||
| allowed_org_ids = get_allowed_org_ids_context_engine_indexing() | ||
| now = datetime.now(UTC) | ||
|
|
||
| dispatched = 0 | ||
| for org_id in allowed_org_ids: | ||
| try: | ||
| index_org_project_knowledge.apply_async(args=[org_id]) | ||
| build_service_map.apply_async(args=[org_id]) | ||
|
|
||
| if now.weekday() == 6: # Sunday | ||
| index_repos.apply_async(args=[org_id]) | ||
|
|
||
| dispatched += 1 | ||
| except Exception: | ||
| logger.exception( | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.