Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ replays: 0007_organizationmember_replay_access

seer: 0017_drop_old_fk_columns

sentry: 1101_remove_email_model_pending
sentry: 1102_backfill_auto_link_repos_by_name

social_auth: 0003_social_auth_json_field

Expand Down
160 changes: 160 additions & 0 deletions src/sentry/migrations/1102_backfill_auto_link_repos_by_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""
Backfill ProjectRepository rows by matching repo name suffix to project slug.

This is a one-time backfill for the auto-link-repos-by-name feature. It
iterates all active organizations, and for each one matches unlinked repos
to unlinked projects by name. Respects the dry-run option
(repository.auto-link-by-name-dry-run) read directly from sentry_option.

Safe to re-run: uses get_or_create and skips already-linked pairs.
"""

import logging
from typing import Any

from django.db import migrations
from django.db.models import Exists, OuterRef
from django.utils.text import slugify

from sentry.new_migrations.migrations import CheckedMigration
from sentry.utils.query import RangeQuerySetWrapperWithProgressBar

logger = logging.getLogger(__name__)

# source enum: AUTO_NAME_MATCH = 3
AUTO_NAME_MATCH = 3
ACTIVE = 0


def _get_dry_run(apps: Any) -> bool:
Option = apps.get_model("sentry", "Option")
try:
opt = Option.objects.get(key="repository.auto-link-by-name-dry-run")
return bool(opt.value)
except Option.DoesNotExist:
return True


def _get_repo_name_candidates(repo_name: str) -> list[str]:
parts = [slugify(part.strip()) for part in repo_name.split("/") if part.strip()]
parts = [p for p in parts if p]
if not parts:
return []
candidates = [parts[-1]]
if len(parts) > 1:
candidates.append("-".join(parts))
return candidates


def backfill_auto_link_repos(apps: Any, schema_editor: Any) -> None:
Organization = apps.get_model("sentry", "Organization")
Project = apps.get_model("sentry", "Project")
Repository = apps.get_model("sentry", "Repository")
ProjectRepository = apps.get_model("sentry", "ProjectRepository")

dry_run = _get_dry_run(apps)
total_matched = 0
total_created = 0

for org in RangeQuerySetWrapperWithProgressBar(Organization.objects.filter(status=ACTIVE)):
repos = Repository.objects.filter(
organization_id=org.id,
status=ACTIVE,
).exclude(Exists(ProjectRepository.objects.filter(repository_id=OuterRef("id"))))
if not repos.exists():
continue

unlinked_projects_by_slug = {}
for project_id, slug in (
Project.objects.filter(
organization_id=org.id,
status=ACTIVE,
)
.exclude(Exists(ProjectRepository.objects.filter(project_id=OuterRef("id"))))
.values_list("id", "slug")
):
unlinked_projects_by_slug[slug] = (project_id, slug)

if not unlinked_projects_by_slug:
continue

for repo in repos:
project_id = None
project_slug = None
for candidate in _get_repo_name_candidates(repo.name):
if candidate in unlinked_projects_by_slug:
project_id, project_slug = unlinked_projects_by_slug.pop(candidate)
break
if project_id is None:
continue

total_matched += 1

if dry_run:
logger.info(
"backfill_auto_link_repos.dry_run_match",
extra={
"organization_id": org.id,
"repository_id": repo.id,
"repository_name": repo.name,
"project_id": project_id,
"project_slug": project_slug,
},
)
else:
_, was_created = ProjectRepository.objects.get_or_create(
project_id=project_id,
repository=repo,
defaults={"source": AUTO_NAME_MATCH},
)
if was_created:
total_created += 1
logger.info(
"backfill_auto_link_repos.linked",
extra={
"organization_id": org.id,
"repository_id": repo.id,
"repository_name": repo.name,
"project_id": project_id,
"project_slug": project_slug,
},
)

if dry_run:
logger.info(
"backfill_auto_link_repos.dry_run_complete",
extra={"total_matched": total_matched},
)
else:
logger.info(
"backfill_auto_link_repos.complete",
extra={"total_matched": total_matched, "total_created": total_created},
)


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = True

dependencies = [
("sentry", "1101_remove_email_model_pending"),
]

operations = [
migrations.RunPython(
backfill_auto_link_repos,
migrations.RunPython.noop,
hints={"tables": ["sentry_projectrepository"]},
),
]
Loading