Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/sentry/seer/agent/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
get_retention_boundary,
)
from sentry.seer.autofix.autofix import get_all_tags_overview
from sentry.seer.autofix.utils import get_repo_url_path
from sentry.seer.seer_setup import get_supported_scm_providers
from sentry.seer.sentry_data_models import (
BaselineTagDistributionEntry,
Expand Down Expand Up @@ -938,7 +939,9 @@ def get_repository_definition(
return None

# Use the actual repo name from the database, not the requested name.
repo_name_parts = repo.name.split("/")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GitLab path name lookup mismatch

Medium Severity

get_repository_definition still resolves repositories by exact Repository.name, while this change makes GitLab owner/name come from get_repo_url_path (config["path"]). Callers that rebuild repo_full_name from those slug fields (without external_id) no longer match GitLab rows stored under name_with_namespace display names.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit bb78554. Configure here.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clanker checked (including seer):

I checked the three things that could break, and traced each through the actual Seer code (../seer):

  1. GitLab API access — unaffected. Seer addresses GitLab repos exclusively by external_id (parsed as {netloc}:{project_id} at
    scm/providers/gitlab/provider.py:238), never by owner/name. external_id is unchanged by this PR.

  2. Web/PR/commit URLs — fixed. web_repository_path does self.repository["name"].replace(" ", "") (provider.py:247). The old display name "My Group / My
    Project" collapsed to "MyGroup/MyProject" — wrong case, no dashes → the exact 404 in your branch name. The new slug "my-group/my-project" makes that
    .replace() a no-op and the URL correct.

  3. external_id discovery — improved. Seer's get_external_id_from_user_org_context (explorer/tools/utils.py:46-78) matches context repos against the LLM's
    clean slug via _normalize_repo_full_name, which only trims whitespace around / (not case/dashes). Before, context held display names → "My Group/My Project"
    ≠ "my-group/my-project" → match often failed. Now the context repos come from the same get_autofix_repos_from_project_code_mappings you changed, so both
    sides are clean slugs → they match → external_id is reliably found.

# For GitLab, repo.name is the display name (name_with_namespace, may contain spaces);
# get_repo_url_path() returns the URL-safe path_with_namespace instead.
repo_name_parts = get_repo_url_path(repo).split("/")
owner = repo_name_parts[0]
name = "/".join(repo_name_parts[1:])

Expand Down
22 changes: 20 additions & 2 deletions src/sentry/seer/autofix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,14 +498,32 @@ def clear_preference_automation_handoff(project: Project) -> None:
).delete()


def get_repo_url_path(repo: Repository) -> str:
"""Return the URL-safe owner/name path for a repository.

For GitLab, ``repo.name`` is ``name_with_namespace`` (the human-readable
display name, e.g. ``"My Group / My Project"`` — with spaces). The
URL-safe equivalent is stored in ``repo.config["path"]``
(``path_with_namespace``, e.g. ``"my-group/my-project"``).

For GitHub and all other providers, ``repo.name`` is already the
URL-safe ``owner/repo`` string, so we return it unchanged.
"""
if repo.provider == "integrations:gitlab":
path = repo.config.get("path")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would we rather raise if there's no path as that's unexpected
also maybe this deserves some metric or log just to know how often we're falling back to repo.name in the gitlab case

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah good point, will change to throw. dont think we need a metric, ive looked at the db and there are no cases where path doesnt exist

if path:
return path
return repo.name


def build_repo_definition_from_project_repo(
seer_project_repo: SeerProjectRepository,
) -> SeerRepoDefinition | None:
"""Build a SeerRepoDefinition from a SeerProjectRepository with its joined Repository.

Returns None if Repository name is invalid."""
repo = seer_project_repo.project_repository.repository
repo_name_sections = repo.name.split("/")

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this has downstream (seer) implications since the repo definition is used in seer but i believe we already have utils in seer that strip whitespace, etc

repo_name_sections = get_repo_url_path(repo).split("/")
Comment thread
sentry[bot] marked this conversation as resolved.
if len(repo_name_sections) < 2:
sentry_sdk.capture_exception(ValueError(f"Invalid repository name format: {repo.name}"))
return None
Expand Down Expand Up @@ -839,7 +857,7 @@ def get_autofix_repos_from_project_code_mappings(
repos: dict[tuple, dict] = {}
for code_mapping in code_mappings:
repo: Repository = code_mapping.project_repository.repository
repo_name_sections = repo.name.split("/")
repo_name_sections = get_repo_url_path(repo).split("/")
Comment thread
cursor[bot] marked this conversation as resolved.

if (
# We expect a repository name to be in the format of "owner/name" for now.
Expand Down
5 changes: 3 additions & 2 deletions src/sentry/seer/endpoints/project_seer_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from sentry.models.repository import Repository
from sentry.seer.autofix.utils import (
add_seer_project_repos,
get_repo_url_path,
replace_all_seer_project_repos,
)
from sentry.seer.models.project_repository import (
Expand Down Expand Up @@ -69,9 +70,9 @@ class ProjectRepoResponse(TypedDict):

def _serialize_project_repo(project_repo: SeerProjectRepository) -> ProjectRepoResponse:
repo = project_repo.project_repository.repository
name_parts = repo.name.split("/", 1)
name_parts = get_repo_url_path(repo).split("/", 1)
owner = name_parts[0] if len(name_parts) > 1 else ""
name = name_parts[1] if len(name_parts) > 1 else repo.name
name = name_parts[1] if len(name_parts) > 1 else get_repo_url_path(repo)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this changes our api response, which could be unexpected if users expect to see their display name... this is more correct though since we assume that this is used as a "slug" like we do for github.


return ProjectRepoResponse(
id=str(project_repo.id),
Expand Down
25 changes: 25 additions & 0 deletions tests/sentry/seer/endpoints/test_project_seer_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,31 @@ def test_returns_connected_repos(self):
assert project_repo_relay["branchName"] is None
assert project_repo_relay["instructions"] is None

def test_gitlab_repo_uses_path_not_display_name(self):
"""GitLab repo.name is name_with_namespace (display name with spaces).
The serializer must use repo.config['path'] (path_with_namespace) so that
owner/name are URL-safe and don't cause 404s in SCM links."""
gitlab_integration = self.create_integration(
organization=self.organization, provider="gitlab", external_id="gl123"
)
gitlab_repo = Repository.objects.create(
organization_id=self.organization.id,
# name_with_namespace: human-readable display name that contains spaces
name="My Group / My Project",
provider="integrations:gitlab",
external_id="gl-999",
integration_id=gitlab_integration.id,
config={"path": "my-group/my-project"},
)
self.create_seer_project_repository(self.project, repository=gitlab_repo)

response = self.get_success_response()
assert len(response.data) == 1
repo_data = response.data[0]
# owner and name must come from the URL-safe path, not the display name
assert repo_data["owner"] == "my-group"
assert repo_data["name"] == "my-project"

def test_excludes_inactive_repos(self):
self.create_seer_project_repository(self.project, repository=self.repo1)
self.repo1.status = ObjectStatus.HIDDEN
Expand Down
Loading