cockroachdb · fantapop · Mar 27, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -8,4 +8,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
-      - run: ./test.sh
+      - uses: actions/setup-go@v6
+        with:
+          go-version-file: autosolve/go.mod
+      - name: Run shell tests
+        run: ./test.sh
+      - name: Run Go tests
+        run: cd autosolve && go test ./... -count=1
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,10 @@ Breaking changes are prefixed with "Breaking Change: ".
 
 ### Added
 
+- `autosolve/assess` and `autosolve/implement` actions: evaluate tasks for
+  automated resolution and autonomously implement solutions using Claude.
+  Includes AI security review, token usage tracking, structured log levels
+  (error/info/debug), and fast-fail when the target branch already exists.
 - `create-release-pr` reusable workflow: automates version bump PRs by checking for
   unreleased changes in CHANGELOG, extracting the next version, updating
   the CHANGELOG with new version and release date, optionally running custom update

diff --git a/README.md b/README.md
@@ -108,6 +108,156 @@ determine whether a major, minor, or patch version bump is needed.
 - Returns empty `bump_type` when there are no unreleased changes
 - Follows semantic versioning principles
 
+### autosolve/assess
+
+Runs Claude in read-only mode to assess whether a task is suitable for automated
+resolution. Claude evaluates the task against configurable criteria and returns a
+PROCEED or SKIP decision with reasoning.
+
+**Usage:**
+
+```yaml
+- uses: cockroachdb/actions/autosolve/assess@v0
+  with:
+    system_prompt: "Assess whether this issue can be resolved automatically."
+    context_vars: "ISSUE_TITLE,ISSUE_BODY"
+  env:
+    ISSUE_TITLE: ${{ github.event.issue.title }}
+    ISSUE_BODY: ${{ github.event.issue.body }}
+```
+
+**Inputs:**
+
+| Name                  | Default              | Description                                                                                                                                           |
+| --------------------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `claude_cli_version`  | `2.1.79`             | Claude CLI version to install (e.g. `2.1.79` or `latest`)                                                                                            |
+| `system_prompt`       | **one required**     | Trusted instructions for Claude describing the task to assess. Do not embed untrusted user input here — use `context_vars` instead. At least one of `system_prompt` or `skill` is required. |
+| `skill`               | **one required**     | Path to a skill/prompt file relative to `GITHUB_WORKSPACE`. At least one of `system_prompt` or `skill` is required.                                  |
+| `context_vars`        | `""`                 | Comma-separated list of environment variable names to pass through to Claude for untrusted user input (e.g., issue titles/bodies)                     |
+| `assessment_criteria` | [see default](autosolve/internal/prompt/templates/default-assessment-criteria.md) | Trusted criteria for the assessment. Do not embed untrusted user input. |
+| `model`               | `claude-opus-4-6`    | Claude model ID                                                                                                                                       |
+| `blocked_paths`       | `""`                 | Comma-separated path prefixes that cannot be modified (case-sensitive). `.github/` is always blocked.                                                                 |
+| `log_level`           | `error`              | Controls Claude output in the step log: `error` (status only), `info` (result summary, permission denial warnings), `debug` (stream everything).     |
+| `working_directory`   | `.`                  | Directory to run in (relative to workspace root)                                                                                                      |
+
+**Outputs:**
+
+| Name         | Description                        |
+| ------------ | ---------------------------------- |
+| `assessment` | `PROCEED` or `SKIP`                |
+| `summary`    | Human-readable assessment reasoning |
+| `result`     | Full Claude result text             |
+
+**Features:**
+
+- Runs Claude in read-only mode (Read, Grep, Glob only) — no file modifications
+- Safely passes untrusted user input via environment variables instead of prompt injection
+- Supports custom assessment criteria or skill files
+- Designed to gate the more expensive `autosolve/implement` step
+
+### autosolve/implement
+
+Runs Claude to implement a solution, validates changes with a security review,
+pushes to a fork, and creates a pull request. Includes retry logic, blocked-path
+enforcement, sensitive file detection, and token usage tracking.
+
+**Usage:**
+
+```yaml
+- uses: cockroachdb/actions/autosolve/implement@v0
+  with:
+    system_prompt: "Fix the issue described in the environment variables."
+    context_vars: "ISSUE_TITLE,ISSUE_BODY"
+    fork_owner: my-bot
+    fork_repo: my-repo-fork
+    fork_push_token: ${{ secrets.FORK_PAT }}
+    pr_create_token: ${{ secrets.PR_PAT }}
+  env:
+    ISSUE_TITLE: ${{ github.event.issue.title }}
+    ISSUE_BODY: ${{ github.event.issue.body }}
+```
+
+**Inputs:**
+
+| Name                 | Default                          | Description                                                                                                     |
+| -------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------- |
+| `claude_cli_version` | `2.1.79`                         | Claude CLI version to install (e.g. `2.1.79` or `latest`)                                                      |
+| `system_prompt`      | **one required**                 | Trusted instructions for Claude describing the task. Do not embed untrusted user input — use `context_vars`. At least one of `system_prompt` or `skill` is required. |
+| `skill`              | **one required**                 | Path to a skill/prompt file relative to `GITHUB_WORKSPACE`. At least one of `system_prompt` or `skill` is required. |
+| `context_vars`       | `""`                             | Comma-separated list of environment variable names to pass through to Claude for untrusted user input           |
+| `allowed_tools`      | [see below](#allowed_tools-default) | Claude `--allowedTools` string                                                                               |
+| `model`              | `claude-opus-4-6`                | Claude model ID                                                                                                  |
+| `max_retries`        | `3`                              | Maximum implementation attempts                                                                                  |
+| `pr_target_repo`     | `${{ github.repository }}`       | Repository where the PR is created (`owner/repo`). Set this when the PR should target a different repo than the one running the workflow. |
+| `pr_base_branch`     | `main`                           | Base branch for the PR                                                                                           |
+| `pr_labels`          | `autosolve`                      | Comma-separated labels to apply to the PR                                                                        |
+| `pr_draft`           | `true`                           | Whether to create the PR as a draft                                                                              |
+| `fork_owner`         | **required**                     | GitHub username or org that owns the fork                                                                        |
+| `fork_repo`          | **required**                     | Repository name of the fork                                                                                      |
+| `fork_push_token`    | **required**                     | PAT with `contents: write` on the fork repository                                                                |
+| `pr_create_token`    | **required**                     | PAT with `pull_requests: write` on the target repo (see [Token permissions](#token-permissions))                 |
+| `blocked_paths`      | `""`                             | Comma-separated path prefixes that cannot be modified (case-sensitive). `.github/` is always blocked.                             |
+| `git_user_name`      | `autosolve[bot]`                 | Git author/committer name                                                                                        |
+| `git_user_email`     | `autosolve[bot]@users.noreply.github.com` | Git author/committer email                                                                            |
+| `branch_prefix`      | `autosolve/`                     | Prefix for the branch name                                                                                       |
+| `branch_suffix`      | `""`                             | Suffix for branch name. Defaults to timestamp.                                                                   |
+| `commit_signature`   | `Co-Authored-By: Claude <[email protected]>` | Signature line appended to commit messages                                                        |
+| `pr_footer`          | [see below](#pr_footer-default)  | Footer appended to the PR body                                                                                   |
+| `log_level`          | `error`                          | Controls Claude output in the step log: `error` (status only), `info` (result summary, permission denial warnings), `debug` (stream everything). |
+| `working_directory`  | `.`                              | Directory to run in (relative to workspace root)                                                                 |
+
+<a id="allowed_tools-default"></a>
+> Default `allowed_tools`:
+> ```
+> Read,Write,Edit,Grep,Glob,
+> Bash(git add:*),Bash(git status:*),Bash(git diff:*),Bash(git log:*),Bash(git show:*),
+> Bash(go build:*),Bash(go test:*),Bash(go vet:*),Bash(make:*)
+> ```
+
+<a id="pr_footer-default"></a>
+> Default `pr_footer`:
+> ```
+> ---
+>
+> *This PR was auto-generated by [claude-autosolve-action](https://github.com/cockroachdb/actions) using Claude Code.*
+> *Please review carefully before approving.*
+> ```
+
+**Outputs:**
+
+| Name          | Description                                  |
+| ------------- | -------------------------------------------- |
+| `status`      | `SUCCESS` or `FAILED`                        |
+| `pr_url`      | URL of the created PR                        |
+| `summary`     | Human-readable summary                       |
+| `result`      | Full Claude result text                      |
+| `branch_name` | Name of the branch pushed to the fork        |
+
+**Features:**
+
+- Retries implementation up to `max_retries` times on failure
+- Enforces blocked-path restrictions (`.github/` is always blocked)
+- Detects and rejects sensitive files (credentials, keys, `.env`)
+- Runs an AI-powered security review on all changes before committing
+- Pushes changes to a fork and creates a PR on the upstream repository
+- Tracks Claude token usage
+
+<a id="token-permissions"></a>
+**Token permissions:**
+
+| Token              | Fine-grained                                | Classic |
+| ------------------ | ------------------------------------------- | ------- |
+| `fork_push_token`  | `contents: write` on the fork repository    | `repo`  |
+| `pr_create_token`  | `pull_requests: write` on the target repository | `repo`  |
+
+Applying labels (`pr_labels`) requires `issues: write` on the target repo
+(already covered by `repo` for classic tokens). If the token lacks this
+permission, the action logs a warning and creates the PR without labels.
+
+For organizations using SAML/SSO, the PAT must be authorized for the
+organization that owns the target repository. See
+[GitHub docs on SSO authorization](https://docs.github.com/en/enterprise-cloud@latest/authentication/authenticating-with-saml-single-sign-on/authorizing-a-personal-access-token-for-use-with-saml-single-sign-on).
+
 ### get-workflow-ref
 
 Resolves the git ref that a caller used to invoke a reusable workflow by parsing

diff --git a/autosolve/Makefile b/autosolve/Makefile
@@ -0,0 +1,11 @@
+.PHONY: build test clean
+
+# Local dev binary
+build:
+	go build -o autosolve ./cmd/autosolve
+
+test:
+	go test ./... -count=1
+
+clean:
+	rm -f autosolve
diff --git a/autosolve/assess/action.yml b/autosolve/assess/action.yml
@@ -0,0 +1,127 @@
+name: Autosolve Assess
+description: Run Claude in read-only mode to assess whether a task is suitable for automated resolution.
+
+inputs:
+  claude_cli_version:
+    description: "Claude CLI version to install (e.g. '2.1.79' or 'latest')."
+    required: false
+    default: "2.1.79"
+  system_prompt:
+    description: >
+      Trusted instructions for Claude describing the task to assess.
+      Do not embed untrusted user input (e.g., issue titles/bodies) here.
+      Pass user-supplied data via environment variables and list them in context_vars.
+    required: false
+    default: ""
+  skill:
+    description: Path to a skill/prompt file relative to GITHUB_WORKSPACE.
+    required: false
+    default: ""
+  context_vars:
+    description: >
+      Comma-separated list of environment variable names to pass through to Claude.
+      Use this to provide untrusted user input (e.g., issue titles/bodies) safely.
+      Claude is automatically told which variables are available and instructed to
+      read them — you do not need to reference them in system_prompt.
+      Claude will only have access to these variables plus a baseline set of
+      system and authentication variables (PATH, HOME, etc.).
+    required: false
+    default: ""
+  assessment_criteria:
+    description: Custom criteria for the assessment. If not provided, uses default criteria.
+    required: false
+    default: ""
+  model:
+    description: Claude model ID.
+    required: false
+    default: "claude-opus-4-6"
+  blocked_paths:
+    description: >
+      Comma-separated path prefixes that cannot be modified.
+      .github/ is always blocked and cannot be removed.
+    required: false
+    default: ""
+  log_level:
+    description: >
+      Controls how much Claude output streams to the step log.
+      "error" (default) logs only errors and final status (token counts, result).
+      "info" adds the result summary (turns, duration, cost) and warns on
+      permission denials.
+      "debug" streams everything including all tool calls, assistant text,
+      and tool I/O.
+      Info and debug may contain source code snippets or environment
+      variable values. Security review output is never logged regardless
+      of this setting.
+    required: false
+    default: "error"
+  working_directory:
+    description: Directory to run in (relative to workspace root). Defaults to workspace root.
+    required: false
+    default: "."
+
+outputs:
+  assessment:
+    description: PROCEED or SKIP
+    value: ${{ steps.assess.outputs.assessment }}
+  summary:
+    description: Human-readable assessment reasoning.
+    value: ${{ steps.assess.outputs.summary }}
+  result:
+    description: Full Claude result text.
+    value: ${{ steps.assess.outputs.result }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Claude CLI
+      shell: bash
+      run: |
+        if command -v roachdev >/dev/null; then
+          printf '#!/bin/sh\nexec roachdev claude -- "$@"\n' > /usr/local/bin/claude
+          chmod +x /usr/local/bin/claude
+          echo "Claude CLI: using roachdev wrapper ($(roachdev version))"
+        else
+          curl --fail --silent --show-error --location https://claude.ai/install.sh | bash -s -- "$CLAUDE_CLI_VERSION"
+          echo "Claude CLI installed: $(claude --version)"
+        fi
+      env:
+        CLAUDE_CLI_VERSION: ${{ inputs.claude_cli_version }}
+
+    - name: Check for existing build
+      id: check-build
+      shell: bash
+      run: |
+        if [ -x "$RUNNER_TEMP/autosolve" ]; then
+          echo "skip_build=true" >> "$GITHUB_OUTPUT"
+          echo "autosolve binary already available, skipping Go setup and build"
+        elif command -v go >/dev/null; then
+          echo "skip_go=true" >> "$GITHUB_OUTPUT"
+          echo "Go already available ($(go version)), skipping setup-go"
+        fi
+
+    - name: Set up Go
+      if: steps.check-build.outputs.skip_build != 'true' && steps.check-build.outputs.skip_go != 'true'
+      uses: actions/setup-go@v6
+      with:
+        go-version-file: ${{ github.action_path }}/../go.mod
+        cache: false
+
+    - name: Build autosolve
+      if: steps.check-build.outputs.skip_build != 'true'
+      shell: bash
+      run: go build -trimpath -o "$RUNNER_TEMP/autosolve" ./cmd/autosolve
+      working-directory: ${{ github.action_path }}/..
+
+    - name: Run assessment
+      id: assess
+      shell: bash
+      working-directory: ${{ inputs.working_directory }}
+      run: $RUNNER_TEMP/autosolve assess
+      env:
+        INPUT_SYSTEM_PROMPT: ${{ inputs.system_prompt }}
+        INPUT_SKILL: ${{ inputs.skill }}
+        INPUT_CONTEXT_VARS: ${{ inputs.context_vars }}
+        INPUT_ASSESSMENT_CRITERIA: ${{ inputs.assessment_criteria }}
+        INPUT_MODEL: ${{ inputs.model }}
+        INPUT_BLOCKED_PATHS: ${{ inputs.blocked_paths }}
+        INPUT_LOG_LEVEL: ${{ inputs.log_level }}