Skip to content

roachtest: clearrange/dense failed #49165

roachtest: clearrange/dense failed

roachtest: clearrange/dense failed #49165

Workflow file for this run

# Investigate Test Failure
#
# Triggers when a collaborator comments `/investigate` on a test failure
# issue. Invokes Claude to autonomously analyze the failure and post
# findings as a comment.
#
# Manual testing via workflow_dispatch:
#
# Changes to this workflow (especially to permissions, allowed tools,
# or the agent prompt) should be reviewed by SecEng before testing or
# merging, as public-facing AI workflows require sign-off.
#
# Use --ref to point at a branch containing the workflow file:
#
# gh workflow run investigate.yml \
# --repo cockroachdb/cockroach \
# --ref your-branch-name \
# -f issue_number=163542
#
# When triggered via dispatch, findings are uploaded as a workflow
# artifact (visible in the run's "Artifacts" section) but not posted
# as a comment. The artifact is uploaded regardless of trigger type.
#
# To test on a personal fork (where Vertex AI OIDC is unavailable):
#
# 1. Add an ANTHROPIC_API_KEY repository secret to the fork. The
# workflow detects this and uses the API key directly instead of
# Vertex.
#
# 2. Copy the test failure issue to your fork (the agent reads the
# issue by number from the workflow's own repo):
#
# BODY=$(gh issue view 163542 --repo cockroachdb/cockroach --json body -q .body)
# gh issue create --repo <you>/cockroach --title "..." --body "$BODY"
#
# 3. The checkout is a blobless clone with full history, so git log
# and git blame work without deepening. The failure SHA must still
# be reachable from the fork's remote. Push it to a throwaway
# branch if needed:
#
# git push <your-fork-remote> <failure-sha>:refs/heads/investigate-sha
#
# 4. Trigger the workflow. Dispatch defaults to a cheaper model
# (Sonnet 4.5); add -f cheap=false for Opus 4.6:
#
# gh workflow run investigate.yml \
# --repo <you>/cockroach \
# --ref agent-workflow-investigate \
# -f issue_number=<fork-issue-number>
name: Investigate Test Failure
on:
issue_comment:
types: [created]
workflow_dispatch:
inputs:
issue_number:
description: 'Issue number to investigate'
required: true
comment_body:
description: 'Simulated trigger comment'
default: '/investigate'
cheap:
description: 'Use a cheaper model (claude-sonnet-4-5)'
type: boolean
default: true
smoke_test:
description: 'Run a tool smoke test instead of a real investigation'
type: boolean
default: false
jobs:
investigate:
if: >-
github.event_name == 'workflow_dispatch' ||
(github.event.issue.pull_request == null &&
(github.event.comment.body == '/investigate' ||
startsWith(github.event.comment.body, '/investigate ')) &&
(github.event.comment.author_association == 'COLLABORATOR' ||
github.event.comment.author_association == 'MEMBER' ||
github.event.comment.author_association == 'OWNER'))
runs-on: ubuntu-latest
timeout-minutes: 60
permissions:
contents: read
issues: write
id-token: write
env:
ISSUE_NUMBER: ${{ inputs.issue_number || github.event.issue.number }}
COMMENT_BODY: ${{ inputs.comment_body || github.event.comment.body }}
HAS_API_KEY: ${{ secrets.ANTHROPIC_API_KEY != '' }}
# Repository to check out the code from. Issues and comments use github.repository.
CODE_REPO: ${{ secrets.CODE_REPO }}
steps:
- name: Acknowledge trigger
if: github.event_name == 'issue_comment'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions \
-f content=eyes
# Blobless clone: fetches the full commit graph (so git log,
# git blame, etc. work immediately) but defers downloading file
# contents until they're actually accessed. Much faster than a
# full clone of the cockroach repo while still giving the agent
# full history without manual deepening.
- name: Checkout repository
uses: actions/checkout@v5
with:
repository: ${{ env.HAS_API_KEY == 'true' && github.repository || env.CODE_REPO }}
token: ${{ env.HAS_API_KEY == 'true' && secrets.GITHUB_TOKEN || secrets.INVESTIGATE_PAT }}
filter: blob:none
fetch-depth: 0
- name: Create fetch-url wrapper
run: |
cat > /usr/local/bin/fetch-url <<'WRAPPER'
#!/bin/bash
set -euo pipefail
url="${1:?Usage: fetch-url URL [OUTPUT_FILE]}"
if [ -n "${2:-}" ]; then
exec curl -fsSL -o "$2" "$url"
else
exec curl -fsSL "$url"
fi
WRAPPER
chmod +x /usr/local/bin/fetch-url
# Vertex AI auth for cockroachdb/cockroach. Skipped when an
# ANTHROPIC_API_KEY secret is set (e.g. on a personal fork).
- name: Authenticate to Google Cloud
if: env.HAS_API_KEY != 'true'
uses: 'google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093' # v3
with:
project_id: 'vertex-model-runners'
service_account: 'ai-review@dev-inf-prod.iam.gserviceaccount.com'
workload_identity_provider: 'projects/72497726731/locations/global/workloadIdentityPools/ai-review/providers/ai-review'
- name: Retrieve EngFlow certificates
if: env.HAS_API_KEY != 'true'
id: engflow-certs
run: |
CERT_DIR=$(mktemp -d)
if gcloud secrets versions access 2 --secret=engflow-mesolite-key --project=crl-github-actions > "$CERT_DIR/engflow.key" 2>/dev/null &&
gcloud secrets versions access 2 --secret=engflow-mesolite-crt --project=crl-github-actions > "$CERT_DIR/engflow.crt" 2>/dev/null; then
chmod 600 "$CERT_DIR/engflow.key" "$CERT_DIR/engflow.crt"
echo "ENGFLOW_CERT_FILE=$CERT_DIR/engflow.crt" >> "$GITHUB_ENV"
echo "ENGFLOW_KEY_FILE=$CERT_DIR/engflow.key" >> "$GITHUB_ENV"
echo "has_engflow=true" >> "$GITHUB_OUTPUT"
else
echo "::warning::Could not retrieve EngFlow certificates — EngFlow artifact access will be unavailable"
echo "has_engflow=false" >> "$GITHUB_OUTPUT"
rm -rf "$CERT_DIR"
fi
- name: Investigate
uses: cockroachdb/claude-code-action@v1
env:
ANTHROPIC_VERTEX_PROJECT_ID: ${{ env.HAS_API_KEY != 'true' && 'vertex-model-runners' || '' }}
CLOUD_ML_REGION: ${{ env.HAS_API_KEY != 'true' && 'global' || '' }}
# The checkout is a different repo than this one; point the
# agent's gh commands at this repo's issue tracker.
GH_REPO: ${{ github.repository }}
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ secrets.GITHUB_TOKEN }}
use_vertex: ${{ env.HAS_API_KEY != 'true' && 'true' || 'false' }}
# Permissions are passed via --allowedTools using the colon
# format (Bash(cmd:args)) because cockroachdb/claude-code-action@v1
# (Claude Code 2.0.1) ignores permissions set via the `settings`
# input — tools end up denied even though settings.json is written
# correctly. The newer space format (Bash(cmd args)) and settings-
# based permissions may work after upgrading the action.
claude_args: |
--model ${{ inputs.cheap == true && 'claude-sonnet-4-5' || 'claude-opus-4-6' }}
--allowedTools "Write,Read,Grep,Glob,WebFetch,Bash(cat:*),Bash(head:*),Bash(tail:*),Bash(grep:*),Bash(rg:*),Bash(awk:*),Bash(cut:*),Bash(tr:*),Bash(sort:*),Bash(uniq:*),Bash(wc:*),Bash(tee:*),Bash(diff:*),Bash(file:*),Bash(strings:*),Bash(jq:*),Bash(ls:*),Bash(find:*),Bash(tree:*),Bash(stat:*),Bash(du:*),Bash(mkdir:*),Bash(git:*),Bash(gh issue view:*),Bash(gh issue list:*),Bash(gh pr view:*),Bash(gh pr list:*),Bash(gh pr diff:*),Bash(gh search:*),Bash(fetch-url:*),Bash(unzip:*),Bash(tar x*),Bash(tar -x*),Bash(tar --extract:*),Bash(go mod download:*),Bash(go env:*),Bash(.claude/skills/engflow-artifacts/run.sh:*),Bash(go tool pprof:*),Bash(go run ./pkg/cmd/tsdump2duck:*),Bash(duckdb:*)"
prompt: |
Read and follow the instructions in the prompt file
`.github/prompts/${{ inputs.smoke_test == true && 'investigate-smoke' || 'investigate' }}.md`.
ISSUE_REPO: ${{ github.repository }}
CODE_REPO: ${{ env.HAS_API_KEY == 'true' && github.repository || env.CODE_REPO }}
ISSUE NUMBER: ${{ env.ISSUE_NUMBER }}
TRIGGER COMMENT: ${{ env.COMMENT_BODY }}
WORKFLOW RUN: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Use ISSUE_REPO for all gh issue/pr/search commands. Use
CODE_REPO when building source links (blob/permalink URLs).
- name: Upload findings
if: always()
uses: actions/upload-artifact@v4
with:
name: investigation-findings
path: artifacts/findings.md
if-no-files-found: ignore
- name: Post findings
if: always() && github.event_name == 'issue_comment'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if [ -s artifacts/findings.md ]; then
gh issue comment "$ISSUE_NUMBER" \
--repo ${{ github.repository }} \
--body-file artifacts/findings.md
else
gh issue comment "$ISSUE_NUMBER" \
--repo ${{ github.repository }} \
--body "Investigation did not produce findings. Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details."
fi
- name: Clean up EngFlow certificates
if: always() && steps.engflow-certs.outputs.has_engflow == 'true'
run: |
rm -f "$ENGFLOW_CERT_FILE" "$ENGFLOW_KEY_FILE"
rmdir "$(dirname "$ENGFLOW_CERT_FILE")" 2>/dev/null || true