cockroachdb · fantapop · Mar 27, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -8,4 +8,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
-      - run: ./test.sh
+      - uses: actions/setup-go@v6
+        with:
+          go-version-file: autosolve/go.mod
+      - name: Run shell tests
+        run: ./test.sh
+      - name: Run Go tests
+        run: cd autosolve && go test ./... -count=1
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,12 @@ Breaking changes are prefixed with "Breaking Change: ".
 - `autotag-from-changelog` now exposes `tag_created` and `tag` outputs so
   callers can react to whether a new tag was pushed.
 - `expect_step_output` test helper for asserting GitHub Actions step outputs.
+- `autosolve/assess` action: evaluate tasks for automated resolution suitability
+  using Claude in read-only mode.
+- `autosolve/implement` action: autonomously implement solutions, validate
+  security, push to fork, and create PRs using Claude. Includes AI security
+  review, token usage tracking, per-file batched diff analysis, and structured
+  log levels (error/info/debug) with permission denial warnings.
 - `get-workflow-ref` action: resolve the ref a caller used to invoke a reusable
   workflow by parsing the caller's workflow file — no API calls or extra
   permissions needed.

diff --git a/README.md b/README.md
@@ -108,6 +108,126 @@ determine whether a major, minor, or patch version bump is needed.
 - Returns empty `bump_type` when there are no unreleased changes
 - Follows semantic versioning principles
 
+### autosolve/assess
+
+Runs Claude in read-only mode to assess whether a task is suitable for automated
+resolution. Claude evaluates the task against configurable criteria and returns a
+PROCEED or SKIP decision with reasoning.
+
+**Usage:**
+
+```yaml
+- uses: cockroachdb/actions/autosolve/assess@v0
+  with:
+    system_prompt: "Assess whether this issue can be resolved automatically."
+    context_vars: "ISSUE_TITLE,ISSUE_BODY"
+  env:
+    ISSUE_TITLE: ${{ github.event.issue.title }}
+    ISSUE_BODY: ${{ github.event.issue.body }}
+```
+
+**Inputs:**
+
+| Name                  | Default              | Description                                                                                                                                           |
+| --------------------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `claude_cli_version`  | `2.1.79`             | Claude CLI version to install (e.g. `2.1.79` or `latest`)                                                                                            |
+| `system_prompt`       | `""`                 | Trusted instructions for Claude describing the task to assess. Do not embed untrusted user input here — use `context_vars` instead.                   |
+| `skill`               | `""`                 | Path to a skill/prompt file relative to the repo root                                                                                                 |
+| `context_vars`        | `""`                 | Comma-separated list of environment variable names to pass through to Claude for untrusted user input (e.g., issue titles/bodies)                     |
+| `assessment_criteria` | `""`                 | Custom criteria for the assessment. Uses default criteria if not provided.                                                                            |
+| `model`               | `claude-opus-4-6`    | Claude model ID                                                                                                                                       |
+| `blocked_paths`       | `.github/workflows/` | Comma-separated path prefixes that cannot be modified. `.github/` is always blocked.                                                                 |
+| `log_level`           | `error`              | Controls Claude output in the step log: `error` (status only), `info` (result summary, permission denial warnings), `debug` (stream everything).     |
+| `working_directory`   | `.`                  | Directory to run in (relative to workspace root)                                                                                                      |
+
+**Outputs:**
+
+| Name         | Description                        |
+| ------------ | ---------------------------------- |
+| `assessment` | `PROCEED` or `SKIP`                |
+| `summary`    | Human-readable assessment reasoning |
+| `result`     | Full Claude result text             |
+
+**Features:**
+
+- Runs Claude in read-only mode (Read, Grep, Glob only) — no file modifications
+- Safely passes untrusted user input via environment variables instead of prompt injection
+- Supports custom assessment criteria or skill files
+- Designed to gate the more expensive `autosolve/implement` step
+
+### autosolve/implement
+
+Runs Claude to implement a solution, validates changes with a security review,
+pushes to a fork, and creates a pull request. Includes retry logic, blocked-path
+enforcement, sensitive file detection, and token usage tracking.
+
+**Usage:**
+
+```yaml
+- uses: cockroachdb/actions/autosolve/implement@v0
+  with:
+    system_prompt: "Fix the issue described in the environment variables."
+    context_vars: "ISSUE_TITLE,ISSUE_BODY"
+    fork_owner: my-bot
+    fork_repo: my-repo-fork
+    fork_push_token: ${{ secrets.FORK_PAT }}
+    pr_create_token: ${{ secrets.PR_PAT }}
+  env:
+    ISSUE_TITLE: ${{ github.event.issue.title }}
+    ISSUE_BODY: ${{ github.event.issue.body }}
+```
+
+**Inputs:**
+
+| Name                 | Default                          | Description                                                                                                     |
+| -------------------- | -------------------------------- | --------------------------------------------------------------------------------------------------------------- |
+| `claude_cli_version` | `2.1.79`                         | Claude CLI version to install (e.g. `2.1.79` or `latest`)                                                      |
+| `system_prompt`      | `""`                             | Trusted instructions for Claude describing the task. Do not embed untrusted user input — use `context_vars`.    |
+| `skill`              | `""`                             | Path to a skill/prompt file relative to the repo root                                                           |
+| `context_vars`       | `""`                             | Comma-separated list of environment variable names to pass through to Claude for untrusted user input           |
+| `allowed_tools`      | `Read,Write,Edit,Grep,Glob,...`  | Claude `--allowedTools` string (defaults include git, go build/test/vet, and make)                              |
+| `model`              | `claude-opus-4-6`                | Claude model ID                                                                                                  |
+| `max_retries`        | `3`                              | Maximum implementation attempts                                                                                  |
+| `create_pr`          | `true`                           | Whether to create a PR from the changes                                                                          |
+| `pr_target_repo`     | `${{ github.repository }}`       | Repository where the PR is created (`owner/repo`). Set this when the PR should target a different repo than the one running the workflow. |
+| `pr_base_branch`     | `main`                           | Base branch for the PR                                                                                           |
+| `pr_labels`          | `autosolve`                      | Comma-separated labels to apply to the PR                                                                        |
+| `pr_draft`           | `true`                           | Whether to create the PR as a draft                                                                              |
+| `pr_title`           | `""`                             | PR title. If empty, derived from the first commit subject line.                                                  |
+| `pr_body_template`   | `""`                             | Template for the PR body. Supports `{{SUMMARY}}` and `{{BRANCH}}` placeholders.                                 |
+| `fork_owner`         | `""`                             | GitHub username or org that owns the fork                                                                        |
+| `fork_repo`          | `""`                             | Repository name of the fork                                                                                      |
+| `fork_push_token`    | `""`                             | PAT with push access to the fork                                                                                 |
+| `pr_create_token`    | `""`                             | PAT with permission to create PRs on the upstream repo                                                           |
+| `blocked_paths`      | `.github/workflows/`             | Comma-separated path prefixes that cannot be modified. `.github/` is always blocked.                             |
+| `git_user_name`      | `autosolve[bot]`                 | Git author/committer name                                                                                        |
+| `git_user_email`     | `autosolve[bot]@users.noreply.github.com` | Git author/committer email                                                                            |
+| `branch_prefix`      | `autosolve/`                     | Prefix for the branch name                                                                                       |
+| `branch_suffix`      | `""`                             | Suffix for branch name. Defaults to timestamp.                                                                   |
+| `commit_signature`   | `Co-Authored-By: Claude <[email protected]>` | Signature line appended to commit messages                                                        |
+| `pr_footer`          | *(auto-generated attribution)*   | Footer appended to the PR body                                                                                   |
+| `log_level`          | `error`                          | Controls Claude output in the step log: `error` (status only), `info` (result summary, permission denial warnings), `debug` (stream everything). |
+| `working_directory`  | `.`                              | Directory to run in (relative to workspace root)                                                                 |
+
+**Outputs:**
+
+| Name          | Description                                  |
+| ------------- | -------------------------------------------- |
+| `status`      | `SUCCESS` or `FAILED`                        |
+| `pr_url`      | URL of the created PR                        |
+| `summary`     | Human-readable summary                       |
+| `result`      | Full Claude result text                      |
+| `branch_name` | Name of the branch pushed to the fork        |
+
+**Features:**
+
+- Retries implementation up to `max_retries` times on failure
+- Enforces blocked-path restrictions (`.github/` is always blocked)
+- Detects and rejects sensitive files (credentials, keys, `.env`)
+- Runs an AI-powered security review on all changes before committing
+- Pushes changes to a fork and creates a PR on the upstream repository
+- Tracks Claude token usage
+
 ### get-workflow-ref
 
 Resolves the git ref that a caller used to invoke a reusable workflow by parsing

diff --git a/autosolve/Makefile b/autosolve/Makefile
@@ -0,0 +1,11 @@
+.PHONY: build test clean
+
+# Local dev binary
+build:
+	go build -o autosolve ./cmd/autosolve
+
+test:
+	go test ./... -count=1
+
+clean:
+	rm -f autosolve
diff --git a/autosolve/assess/action.yml b/autosolve/assess/action.yml
@@ -0,0 +1,127 @@
+name: Autosolve Assess
+description: Run Claude in read-only mode to assess whether a task is suitable for automated resolution.
+
+inputs:
+  claude_cli_version:
+    description: "Claude CLI version to install (e.g. '2.1.79' or 'latest')."
+    required: false
+    default: "2.1.79"
+  system_prompt:
+    description: >
+      Trusted instructions for Claude describing the task to assess.
+      Do not embed untrusted user input (e.g., issue titles/bodies) here.
+      Pass user-supplied data via environment variables and list them in context_vars.
+    required: false
+    default: ""
+  skill:
+    description: Path to a skill/prompt file relative to the repo root.
+    required: false
+    default: ""
+  context_vars:
+    description: >
+      Comma-separated list of environment variable names to pass through to Claude.
+      Use this to provide untrusted user input (e.g., issue titles/bodies) safely.
+      Claude is automatically told which variables are available and instructed to
+      read them — you do not need to reference them in system_prompt.
+      Claude will only have access to these variables plus a baseline set of
+      system and authentication variables (PATH, HOME, etc.).
+    required: false
+    default: ""
+  assessment_criteria:
+    description: Custom criteria for the assessment. If not provided, uses default criteria.
+    required: false
+    default: ""
+  model:
+    description: Claude model ID.
+    required: false
+    default: "claude-opus-4-6"
+  blocked_paths:
+    description: >
+      Comma-separated path prefixes that cannot be modified.
+      .github/ is always blocked and cannot be removed.
+    required: false
+    default: ".github/workflows/"
+  log_level:
+    description: >
+      Controls how much Claude output streams to the step log.
+      "error" (default) logs only errors and final status (token counts, result).
+      "info" adds the result summary (turns, duration, cost) and warns on
+      permission denials.
+      "debug" streams everything including all tool calls, assistant text,
+      and tool I/O.
+      Info and debug may contain source code snippets or environment
+      variable values. Security review output is never logged regardless
+      of this setting.
+    required: false
+    default: "error"
+  working_directory:
+    description: Directory to run in (relative to workspace root). Defaults to workspace root.
+    required: false
+    default: "."
+
+outputs:
+  assessment:
+    description: PROCEED or SKIP
+    value: ${{ steps.assess.outputs.assessment }}
+  summary:
+    description: Human-readable assessment reasoning.
+    value: ${{ steps.assess.outputs.summary }}
+  result:
+    description: Full Claude result text.
+    value: ${{ steps.assess.outputs.result }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Claude CLI
+      shell: bash
+      run: |
+        if command -v roachdev >/dev/null; then
+          printf '#!/bin/sh\nexec roachdev claude -- "$@"\n' > /usr/local/bin/claude
+          chmod +x /usr/local/bin/claude
+          echo "Claude CLI: using roachdev wrapper"
+        else
+          curl --fail --silent --show-error --location https://claude.ai/install.sh | bash -s -- "$CLAUDE_CLI_VERSION"
+          echo "Claude CLI installed: $(claude --version)"
+        fi
+      env:
+        CLAUDE_CLI_VERSION: ${{ inputs.claude_cli_version }}
+
+    - name: Check for existing build
+      id: check-build
+      shell: bash
+      run: |
+        if [ -x "$RUNNER_TEMP/autosolve" ]; then
+          echo "skip_build=true" >> "$GITHUB_OUTPUT"
+          echo "autosolve binary already available, skipping Go setup and build"
+        elif command -v go >/dev/null; then
+          echo "skip_go=true" >> "$GITHUB_OUTPUT"
+          echo "Go already available ($(go version)), skipping setup-go"
+        fi
+
+    - name: Set up Go
+      if: steps.check-build.outputs.skip_build != 'true' && steps.check-build.outputs.skip_go != 'true'
+      uses: actions/setup-go@v6
+      with:
+        go-version-file: ${{ github.action_path }}/../go.mod
+        cache: false
+
+    - name: Build autosolve
+      if: steps.check-build.outputs.skip_build != 'true'
+      shell: bash
+      run: go build -trimpath -o "$RUNNER_TEMP/autosolve" ./cmd/autosolve
+      working-directory: ${{ github.action_path }}/..
+
+    - name: Run assessment
+      id: assess
+      shell: bash
+      working-directory: ${{ inputs.working_directory }}
+      run: $RUNNER_TEMP/autosolve assess
+      env:
+        INPUT_SYSTEM_PROMPT: ${{ inputs.system_prompt }}
+        INPUT_SKILL: ${{ inputs.skill }}
+        INPUT_CONTEXT_VARS: ${{ inputs.context_vars }}
+        INPUT_ASSESSMENT_CRITERIA: ${{ inputs.assessment_criteria }}
+        INPUT_MODEL: ${{ inputs.model }}
+        INPUT_BLOCKED_PATHS: ${{ inputs.blocked_paths }}
+        INPUT_LOG_LEVEL: ${{ inputs.log_level }}
diff --git a/autosolve/cmd/autosolve/main.go b/autosolve/cmd/autosolve/main.go
@@ -0,0 +1,98 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/signal"
+
+	"github.com/cockroachdb/actions/autosolve/internal/action"
+	"github.com/cockroachdb/actions/autosolve/internal/assess"
+	"github.com/cockroachdb/actions/autosolve/internal/claude"
+	"github.com/cockroachdb/actions/autosolve/internal/config"
+	"github.com/cockroachdb/actions/autosolve/internal/git"
+	"github.com/cockroachdb/actions/autosolve/internal/github"
+	"github.com/cockroachdb/actions/autosolve/internal/implement"
+)
+
+const usage = `Usage: autosolve <command>
+
+Commands:
+  assess      Run assessment phase
+  implement   Run implementation phase
+`
+
+func main() {
+	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
+	defer cancel()
+
+	if len(os.Args) < 2 {
+		fatalf(usage)
+	}
+
+	var err error
+	switch os.Args[1] {
+	case "assess":
+		err = runAssess(ctx)
+	case "implement":
+		err = runImplement(ctx)
+	default:
+		fatalf("unknown command: %s\n\n%s", os.Args[1], usage)
+	}
+
+	if err != nil {
+		action.LogError(err.Error())
+		os.Exit(1)
+	}
+}
+
+func fatalf(format string, args ...any) {
+	fmt.Fprintf(os.Stderr, format+"\n", args...)
+	os.Exit(1)
+}
+
+func runAssess(ctx context.Context) error {
+	cfg, err := config.LoadAssessConfig()
+	if err != nil {
+		return err
+	}
+	if err := config.ValidateAuth(); err != nil {
+		return err
+	}
+	tmpDir, err := ensureTmpDir()
+	if err != nil {
+		return err
+	}
+	return assess.Run(ctx, cfg, &claude.CLIRunner{}, tmpDir)
+}
+
+func runImplement(ctx context.Context) error {
+	cfg, err := config.LoadImplementConfig()
+	if err != nil {
+		return err
+	}
+	if err := config.ValidateAuth(); err != nil {
+		return err
+	}
+	tmpDir, err := ensureTmpDir()
+	if err != nil {
+		return err
+	}
+
+	gitClient := &git.CLIClient{}
+	ghClient := &github.GithubClient{Token: cfg.PRCreateToken}
+	return implement.Run(ctx, cfg, &claude.CLIRunner{}, ghClient, gitClient, tmpDir)
+}
+
+func ensureTmpDir() (string, error) {
+	dir := os.Getenv("AUTOSOLVE_TMPDIR")
+	if dir != "" {
+		return dir, nil
+	}
+	dir, err := os.MkdirTemp("", "autosolve_*")
+	if err != nil {
+		return "", fmt.Errorf("creating temp dir: %w", err)
+	}
+	os.Setenv("AUTOSOLVE_TMPDIR", dir)
+	return dir, nil
+}
diff --git a/autosolve/go.mod b/autosolve/go.mod
@@ -0,0 +1,3 @@
+module github.com/cockroachdb/actions/autosolve
+
+go 1.23.8
diff --git a/autosolve/go.sum b/autosolve/go.sum
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		module github.com/cockroachdb/actions/autosolve

		go 1.23.8
Comment thread fantapop marked this conversation as resolved. Outdated