Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 63 additions & 27 deletions src/sentry/integrations/gitlab/webhooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,40 @@
) -> None: ...


def _extract_payload_repo_info(request) -> dict[str, Any]:
"""
Best-effort identifiers pulled from the webhook body.

The token (HTTP_X_GITLAB_TOKEN) is what we'd normally use to resolve the
integration/org, but when it's missing or malformed we can't. The payload
body is independent of the token, and GitLab events carry a ``project``
object that identifies the repo/owner — enough to track down which customer
a bad webhook is coming from. Returns {} if anything is off.
"""
try:
payload = orjson.loads(request.body)
except (orjson.JSONDecodeError, TypeError, AttributeError):
return {}
if not isinstance(payload, dict):
return {}

project = payload.get("project")
project = project if isinstance(project, dict) else {}
info = {
# e.g. "cool-group/sentry" — the owning group/namespace plus repo
"webhook.repo.path": project.get("path_with_namespace"),
# e.g. "Cool Group" — the owning group
"webhook.repo.namespace": project.get("namespace"),
"webhook.repo.web_url": project.get("web_url"),
"webhook.repo.project_id": project.get("id"),
"webhook.object_kind": payload.get("object_kind"),
# who triggered the event, when present
"webhook.user.username": payload.get("user_username"),

Check warning on line 97 in src/sentry/integrations/gitlab/webhooks.py

View check run for this annotation

@sentry/warden / warden: wrdn-pii

GitLab `user_username` (personal identifier) forwarded to durable warning logs

The new `_extract_payload_repo_info` function copies `user_username` from the raw webhook payload into `extra` and then into `logger.warning('gitlab.webhook.malformed-gitlab-token', extra=extra)`. This logs the identity of the specific person who triggered the GitLab event to Sentry's durable Logs explorer — unnecessary for the stated goal of identifying *which customer* sent a malformed token. Replace with a non-personal fallback or drop the field; repo path and project ID already identify the customer.
Comment thread
sentry-warden[bot] marked this conversation as resolved.
Outdated
}
# Drop missing keys so the log attributes stay clean.
return {k: v for k, v in info.items() if v is not None}


def get_gitlab_external_id(request, extra) -> tuple[str, str] | HttpResponse:
token = "<unknown>"
try:
Expand All @@ -78,17 +112,20 @@
external_id = f"{instance}:{group_path}"
return (external_id, secret)
except KeyError:
extra["reason"] = "The customer needs to set a Secret Token in their webhook."
extra["webhook.reason"] = "The customer needs to set a Secret Token in their webhook."
logger.warning("gitlab.webhook.missing-gitlab-token", extra=extra)
return HttpResponse(status=400, reason=extra["reason"])
return HttpResponse(status=400, reason=extra["webhook.reason"])
except ValueError:
extra["reason"] = "The customer's Secret Token is malformed."
# The token is malformed so we can't resolve the integration/org from it.
# Fall back to identifiers in the payload body to find the source repo.
extra = {**extra, **_extract_payload_repo_info(request)}
extra["webhook.reason"] = "The customer's Secret Token is malformed."
logger.warning("gitlab.webhook.malformed-gitlab-token", extra=extra)
return HttpResponse(status=400, reason=extra["reason"])
return HttpResponse(status=400, reason=extra["webhook.reason"])
except Exception:
extra["reason"] = "Generic catch-all error."
extra["webhook.reason"] = "Generic catch-all error."
logger.warning("gitlab.webhook.invalid-token", extra=extra)
return HttpResponse(status=400, reason=extra["reason"])
return HttpResponse(status=400, reason=extra["webhook.reason"])


class GitlabWebhook(SCMWebhook, ABC):
Expand Down Expand Up @@ -644,10 +681,10 @@
clear_organization_info()
extra = {
# This tells us the Gitlab version being used (e.g. current gitlab.com version -> GitLab/15.4.0-pre)
"user-agent": request.META.get("HTTP_USER_AGENT"),
"webhook.user_agent": request.META.get("HTTP_USER_AGENT"),
# Gitlab does not seem to be the only host sending events
# AppPlatformEvents also hit this API
"event-type": request.META.get("HTTP_X_GITLAB_EVENT"),
"webhook.event_type": request.META.get("HTTP_X_GITLAB_EVENT"),
}
result = get_gitlab_external_id(request=request, extra=extra)
if isinstance(result, HttpResponse):
Expand All @@ -661,49 +698,48 @@
installs = org_contexts.organization_integrations
if integration is None:
logger.info("gitlab.webhook.invalid-organization", extra=extra)
extra["reason"] = "There is no integration that matches your organization."
logger.warning(extra["reason"])
return HttpResponse(status=409, reason=extra["reason"])
extra["webhook.reason"] = "There is no integration that matches your organization."
logger.warning(extra["webhook.reason"])
return HttpResponse(status=409, reason=extra["webhook.reason"])

extra = {
**extra,
**{
"integration": {
# The metadata could be useful to debug
# domain_name -> gitlab.com/getsentry-ecosystem/foo'
# scopes -> ['api']
"metadata": integration.metadata,
"id": integration.id, # This is useful to query via Redash
"status": integration.status, # 0 seems to be active
},
Comment on lines -671 to -678

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to flatten this

"org_ids": [install.organization_id for install in installs],

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not supported

},
# The metadata could be useful to debug
# domain_name -> gitlab.com/getsentry-ecosystem/foo'
# scopes -> ['api']
"webhook.integration.metadata": integration.metadata,
"webhook.integration.id": integration.id, # This is useful to query via Redash
"webhook.integration.status": integration.status, # 0 seems to be active
# Logs/EAP attributes are scalar-first; a list serializes as an
# "array" attribute that the Logs explorer won't expose as a
# queryable column. Join to a string so it's filterable.
"webhook.org_ids": ",".join(str(install.organization_id) for install in installs),
}

if not constant_time_compare(secret, integration.metadata["webhook_secret"]):
# Summary and potential workaround mentioned here:
# https://github.com/getsentry/sentry/issues/34903#issuecomment-1262754478
extra["reason"] = GITLAB_WEBHOOK_SECRET_INVALID_ERROR
extra["webhook.reason"] = GITLAB_WEBHOOK_SECRET_INVALID_ERROR
logger.info("gitlab.webhook.invalid-token-secret", extra=extra)
return HttpResponse(status=409, reason=GITLAB_WEBHOOK_SECRET_INVALID_ERROR)

try:
event = orjson.loads(request.body)
except orjson.JSONDecodeError:
extra["reason"] = "Data received is not JSON."
extra["webhook.reason"] = "Data received is not JSON."
logger.warning("gitlab.webhook.invalid-json", extra=extra)
return HttpResponse(status=400, reason=extra["reason"])
return HttpResponse(status=400, reason=extra["webhook.reason"])

try:
handler = self._handlers[request.META["HTTP_X_GITLAB_EVENT"]]
except KeyError:
supported_events = ", ".join(sorted(self._handlers.keys()))
extra["reason"] = (
extra["webhook.reason"] = (
"The customer has edited the webhook in Gitlab to include other types of events. We only support these kinds of events: %s"
% supported_events
)
logger.warning("gitlab.webhook.wrong-event-type", extra=extra)
return HttpResponse(status=400, reason=extra["reason"])
return HttpResponse(status=400, reason=extra["webhook.reason"])

for install in installs:
org_context = organization_service.get_organization_by_id(
Expand Down
12 changes: 11 additions & 1 deletion tests/sentry/integrations/gitlab/test_webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def test_unknown_event(self) -> None:
== "The customer has edited the webhook in Gitlab to include other types of events. We only support these kinds of events: Issue Hook, Merge Request Hook, Note Hook, Push Hook"
)

def test_invalid_token(self) -> None:
@patch("sentry.integrations.gitlab.webhooks.logger")
def test_invalid_token(self, mock_logger: MagicMock) -> None:
response = self.client.post(
self.url,
data=PUSH_EVENT,
Expand All @@ -91,6 +92,15 @@ def test_invalid_token(self) -> None:
assert response.status_code == 400
assert response.reason_phrase == "The customer's Secret Token is malformed."

# The token is malformed so we can't resolve the org, but the payload
# body still identifies the source repo/owner — attach it to the log.
mock_logger.warning.assert_called_once()
extra = mock_logger.warning.call_args.kwargs["extra"]
assert extra["webhook.repo.path"] == "cool-group/sentry"
assert extra["webhook.repo.namespace"] == "Cool Group"
assert extra["webhook.repo.web_url"] == "http://example.com/cool-group/sentry"
assert extra["webhook.object_kind"] == "push"

def test_valid_id_invalid_secret(self) -> None:
response = self.client.post(
self.url,
Expand Down
Loading