Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions src/sentry/workflow_engine/tasks/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

from django.db.models import Value
from taskbroker_client.retry import Retry
from taskbroker_client.worker.workerchild import ProcessingDeadlineExceeded

from sentry.eventstream.base import GroupState
from sentry.models.activity import Activity
from sentry.models.group import Group
from sentry.models.project import Project
from sentry.services.eventstore.models import GroupEvent
from sentry.silo.base import SiloMode
from sentry.tasks.base import instrumented_task, retry
from sentry.tasks.base import instrumented_task
from sentry.taskworker import namespaces
from sentry.utils import metrics
from sentry.utils.exceptions import timeout_grouping_context
Expand Down Expand Up @@ -74,14 +75,24 @@ def build_trigger_action_task_params(
name="sentry.workflow_engine.tasks.trigger_action",
namespace=namespaces.workflow_engine_tasks,
processing_deadline_duration=30,
retry=Retry(times=3, delay=5),
retry=Retry(
times=3,
delay=5,
on=(Exception, ProcessingDeadlineExceeded),
ignore=(
Action.DoesNotExist,
Group.DoesNotExist,
Project.DoesNotExist,
ProjectNotActiveError,
Workflow.DoesNotExist,
),
Comment thread
sentry[bot] marked this conversation as resolved.
),
Comment on lines +79 to +89
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The trigger_action task will now fail loudly after exhausting retries instead of silently completing, due to a missing times_exceeded parameter.
Severity: MEDIUM

Suggested Fix

To restore the previous behavior of silently discarding the task after all retries are exhausted, add times_exceeded=LastAction.Discard to the Retry object in the trigger_action task.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/workflow_engine/tasks/actions.py#L78-L89

Potential issue: The `trigger_action` task's behavior upon exhausting its retries has
changed. Previously, it used `raise_on_no_retries=False` to silently complete without
error if all retries failed. The new implementation using the `Retry` object lacks an
equivalent configuration, such as `times_exceeded=LastAction.Discard`. As a result,
after exhausting its retries, the task will now raise an exception and be marked as a
failure, which is a change from its previous behavior.

Did we get this right? 👍 / 👎 to inform future reviews.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing times_exceeded=LastAction.Discard for exhausted retries

Medium Severity

The old @retry decorator for trigger_action had raise_on_no_retries=False, which meant that when all retries were exhausted, the task would complete successfully instead of failing. The new Retry configuration doesn't include the equivalent times_exceeded=LastAction.Discard parameter (which is used elsewhere in the codebase, e.g., scheduled.py). This means the task will now fail with an unhandled exception after retries are exhausted, instead of silently succeeding as before.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 33269fc. Configure here.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

silo_mode=SiloMode.CELL,
)
@retry(
timeouts=True,
raise_on_no_retries=False,
ignore_and_capture=(Action.DoesNotExist, Group.DoesNotExist),
ignore=(Project.DoesNotExist, ProjectNotActiveError, Workflow.DoesNotExist),
silenced_exceptions=(
Project.DoesNotExist,
ProjectNotActiveError,
Workflow.DoesNotExist,
),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lost Sentry capture for ignore_and_capture exceptions

Medium Severity

The old @retry decorator distinguished between ignore_and_capture=(Action.DoesNotExist, Group.DoesNotExist) (no retry, task succeeds, but explicitly captured to Sentry at "info" level) and ignore=(Project.DoesNotExist, ...) (no retry, task succeeds, no Sentry report). The new code places all five exceptions into Retry.ignore, losing the intentional sentry_sdk.capture_exception(level="info") call for Action.DoesNotExist and Group.DoesNotExist. The deliberate exclusion of these two from silenced_exceptions suggests the author wanted them reported, but if Retry.ignore silently consumes the exception, it never reaches the reporting layer.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit f649020. Configure here.

)
def trigger_action(
action_id: int,
Expand Down
13 changes: 10 additions & 3 deletions src/sentry/workflow_engine/tasks/delayed_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from typing import Any

from taskbroker_client.retry import Retry
from taskbroker_client.worker.workerchild import ProcessingDeadlineExceeded

from sentry.silo.base import SiloMode
from sentry.tasks.base import instrumented_task, retry
from sentry.tasks.base import instrumented_task
from sentry.taskworker.namespaces import workflow_engine_tasks
from sentry.utils.exceptions import quiet_redis_noise
from sentry.workflow_engine.utils import log_context
Expand All @@ -17,10 +18,16 @@
name="sentry.workflow_engine.tasks.delayed_workflows",
namespace=workflow_engine_tasks,
processing_deadline_duration=60,
retry=Retry(times=5, delay=5),
retry=Retry(
times=5,
delay=5,
on=(
Exception,
ProcessingDeadlineExceeded,
),
),
silo_mode=SiloMode.CELL,
)
@retry(timeouts=True)
@log_context.root()
def process_delayed_workflows(
project_id: int, batch_key: str | None = None, *args: Any, **kwargs: Any
Expand Down
31 changes: 21 additions & 10 deletions src/sentry/workflow_engine/tasks/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.db import router, transaction
from google.api_core.exceptions import RetryError
from taskbroker_client.retry import Retry, retry_task
from taskbroker_client.worker.workerchild import ProcessingDeadlineExceeded

from sentry.eventstream.base import GroupState
from sentry.locks import locks
Expand All @@ -15,7 +16,7 @@
from sentry.sentry_apps.tasks.service_hooks import kick_off_service_hooks
from sentry.services.eventstore.models import GroupEvent
from sentry.silo.base import SiloMode
from sentry.tasks.base import instrumented_task, retry
from sentry.tasks.base import instrumented_task
from sentry.taskworker import namespaces
from sentry.utils import metrics
from sentry.utils.exceptions import quiet_redis_noise
Expand All @@ -37,10 +38,9 @@
name="sentry.workflow_engine.tasks.process_workflow_activity",
namespace=namespaces.workflow_engine_tasks,
processing_deadline_duration=60,
retry=Retry(times=3, delay=5),
retry=Retry(times=3, delay=5, on=(Exception,)),
silo_mode=SiloMode.CELL,
)
@retry
def process_workflow_activity(activity_id: int, group_id: int, detector_id: int) -> None:
"""
Process a workflow task identified by the given activity, group, and detector.
Expand Down Expand Up @@ -89,14 +89,25 @@ def process_workflow_activity(activity_id: int, group_id: int, detector_id: int)
name="sentry.workflow_engine.tasks.process_workflows_event",
namespace=namespaces.workflow_engine_tasks,
processing_deadline_duration=60,
retry=Retry(times=3, delay=5),
retry=Retry(
times=3,
delay=5,
on=(Exception, ProcessingDeadlineExceeded),
ignore=(
EventNotFoundError,
Group.DoesNotExist,
Project.DoesNotExist,
ProjectNotActiveError,
),
),
silo_mode=SiloMode.CELL,
)
@retry(
timeouts=True,
exclude=EventNotFoundError,
ignore=(Group.DoesNotExist, Project.DoesNotExist, ProjectNotActiveError),
on_silent=DataConditionGroup.DoesNotExist,
silenced_exceptions=(
EventNotFoundError,
DataConditionGroup.DoesNotExist,
Group.DoesNotExist,
Project.DoesNotExist,
ProjectNotActiveError,
),
)
def process_workflows_event(
event_id: str,
Expand Down
Loading