From 684bb958633713db39306236c264ec5659ce4056 Mon Sep 17 00:00:00 2001 From: ebembi-crdb Date: Wed, 8 Apr 2026 17:34:03 +0530 Subject: [PATCH 1/4] Add AI-powered release notes reviewer GitHub Action Introduces a GitHub Action that automatically reviews PRs touching release notes files using schema validation, technical accuracy checks, and OpenAI GPT-4o content quality analysis. - Copies core reviewer logic into .github/scripts/release_review/ - Adds standalone runner script (.github/scripts/run_release_review.py) - Adds workflow that triggers on PRs to main touching releases.yml or _includes/releases/** - Posts results as a neutral (advisory) check run and updatable PR comment - Never blocks merge; requires OPENAI_API_KEY repository secret --- .github/scripts/release_review/__init__.py | 5 + .github/scripts/release_review/config.py | 190 ++++++ .../scripts/release_review/github_client.py | 306 ++++++++++ .github/scripts/release_review/reporter.py | 252 ++++++++ .github/scripts/release_review/reviewer.py | 571 ++++++++++++++++++ .github/scripts/release_review/schemas.py | 127 ++++ .github/scripts/run_release_review.py | 47 ++ .github/workflows/release-notes-review.yml | 38 ++ 8 files changed, 1536 insertions(+) create mode 100644 .github/scripts/release_review/__init__.py create mode 100644 .github/scripts/release_review/config.py create mode 100644 .github/scripts/release_review/github_client.py create mode 100644 .github/scripts/release_review/reporter.py create mode 100644 .github/scripts/release_review/reviewer.py create mode 100644 .github/scripts/release_review/schemas.py create mode 100644 .github/scripts/run_release_review.py create mode 100644 .github/workflows/release-notes-review.yml diff --git a/.github/scripts/release_review/__init__.py b/.github/scripts/release_review/__init__.py new file mode 100644 index 00000000000..2efdb19df2d --- /dev/null +++ b/.github/scripts/release_review/__init__.py @@ -0,0 +1,5 @@ +from .schemas import Issue, Links, ReviewPayload +from .config import ReleaseReviewConfig, get_config +from .reviewer import ReleaseNotesReviewer, ReviewResult, get_reviewer +from .github_client import GitHubClient, get_github_client +from .reporter import Reporter, get_reporter diff --git a/.github/scripts/release_review/config.py b/.github/scripts/release_review/config.py new file mode 100644 index 00000000000..df063a67d1d --- /dev/null +++ b/.github/scripts/release_review/config.py @@ -0,0 +1,190 @@ +""" +Configuration settings for the release review service. + +Loads settings from environment variables and optional YAML config file. +""" +import os +from pathlib import Path +from typing import Dict, Optional +from dataclasses import dataclass, field + +import yaml + + +@dataclass +class GitHubConfig: + """GitHub-related configuration.""" + token: str = "" + check_run_name: str = "Release Notes Review (AI)" + check_run_title: str = "Advisory Release Notes Review (AI)" + max_annotations: int = 50 + max_annotation_message_length: int = 640 + bot_comment_marker: str = "" + + +@dataclass +class SecurityConfig: + """Security-related configuration.""" + docs_agent_secret: str = "" + signature_header: str = "X-Docs-Agent-Signature" + idempotency_header: str = "X-Idempotency-Key" + + +@dataclass +class StoreConfig: + """Storage configuration.""" + db_path: str = "docs_agent.db" + stale_job_ttl_seconds: int = 3600 # 1 hour + + +@dataclass +class FeatureFlags: + """Feature flags for the service.""" + post_comments: bool = True + post_check_runs: bool = True + + +@dataclass +class SeverityMapping: + """Mapping of severity levels to GitHub annotation levels.""" + high: str = "failure" + medium: str = "warning" + low: str = "notice" + + +@dataclass +class ReleaseReviewConfig: + """Main configuration for the release review service.""" + github: GitHubConfig = field(default_factory=GitHubConfig) + security: SecurityConfig = field(default_factory=SecurityConfig) + store: StoreConfig = field(default_factory=StoreConfig) + features: FeatureFlags = field(default_factory=FeatureFlags) + severity_mapping: SeverityMapping = field(default_factory=SeverityMapping) + + @classmethod + def from_env(cls, config_path: Optional[str] = None) -> "ReleaseReviewConfig": + """ + Load configuration from environment variables and optional YAML file. + + Environment variables take precedence over YAML config. + """ + config = cls() + + # Load from YAML if path provided or default exists + yaml_path = config_path or os.getenv("RELEASE_REVIEWER_CONFIG") + if yaml_path and Path(yaml_path).exists(): + config = cls._load_yaml(yaml_path, config) + else: + # Check default locations + default_paths = [ + Path("config/release-reviewer.yml"), + Path("config/release-reviewer.yaml"), + ] + for path in default_paths: + if path.exists(): + config = cls._load_yaml(str(path), config) + break + + # Override with environment variables + config.github.token = os.getenv("GITHUB_TOKEN", config.github.token) + config.security.docs_agent_secret = os.getenv( + "DOCS_AGENT_SECRET", config.security.docs_agent_secret + ) + config.store.db_path = os.getenv("DOCS_AGENT_DB", config.store.db_path) + + # Feature flags from env + post_comments = os.getenv("POST_COMMENTS") + if post_comments is not None: + config.features.post_comments = post_comments.lower() in ("true", "1", "yes") + + post_check_runs = os.getenv("POST_CHECK_RUNS") + if post_check_runs is not None: + config.features.post_check_runs = post_check_runs.lower() in ("true", "1", "yes") + + return config + + @classmethod + def _load_yaml(cls, path: str, config: "ReleaseReviewConfig") -> "ReleaseReviewConfig": + """Load configuration from YAML file.""" + try: + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + + # GitHub settings + if "github" in data: + gh = data["github"] + config.github.check_run_name = gh.get( + "check_run_name", config.github.check_run_name + ) + config.github.check_run_title = gh.get( + "check_run_title", config.github.check_run_title + ) + config.github.max_annotations = gh.get( + "max_annotations", config.github.max_annotations + ) + config.github.max_annotation_message_length = gh.get( + "max_annotation_message_length", + config.github.max_annotation_message_length + ) + + # Store settings + if "store" in data: + store = data["store"] + config.store.stale_job_ttl_seconds = store.get( + "stale_job_ttl_seconds", config.store.stale_job_ttl_seconds + ) + + # Severity mapping + if "severity_mapping" in data: + sm = data["severity_mapping"] + config.severity_mapping.high = sm.get("high", config.severity_mapping.high) + config.severity_mapping.medium = sm.get("medium", config.severity_mapping.medium) + config.severity_mapping.low = sm.get("low", config.severity_mapping.low) + + # Feature flags + if "features" in data: + features = data["features"] + config.features.post_comments = features.get( + "post_comments", config.features.post_comments + ) + config.features.post_check_runs = features.get( + "post_check_runs", config.features.post_check_runs + ) + + except Exception as e: + # Log warning but don't fail - use defaults + import logging + logging.warning(f"Failed to load config from {path}: {e}") + + return config + + def validate(self) -> None: + """Validate that required configuration is present.""" + errors = [] + + if not self.github.token: + errors.append("GITHUB_TOKEN is required") + + if not self.security.docs_agent_secret: + errors.append("DOCS_AGENT_SECRET is required") + + if errors: + raise ValueError(f"Configuration errors: {', '.join(errors)}") + + +# Global config instance (lazy loaded) +_config: Optional[ReleaseReviewConfig] = None + + +def get_config() -> ReleaseReviewConfig: + """Get the global configuration instance.""" + global _config + if _config is None: + _config = ReleaseReviewConfig.from_env() + return _config + + +def reset_config() -> None: + """Reset the global configuration (useful for testing).""" + global _config + _config = None diff --git a/.github/scripts/release_review/github_client.py b/.github/scripts/release_review/github_client.py new file mode 100644 index 00000000000..413d27deab3 --- /dev/null +++ b/.github/scripts/release_review/github_client.py @@ -0,0 +1,306 @@ +""" +GitHub API client for the release review service. + +Handles creating check runs, posting/updating PR comments, and fetching PR info. +""" +import logging +import time +from typing import Optional, Dict, Any, List, Tuple +from dataclasses import dataclass + +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +from .config import get_config +from .schemas import ReviewPayload, Annotation + +logger = logging.getLogger(__name__) + + +class GitHubAPIError(Exception): + """Exception for GitHub API errors.""" + + def __init__(self, message: str, status_code: int = 0, retry_after: Optional[int] = None): + super().__init__(message) + self.status_code = status_code + self.retry_after = retry_after + + +class GitHubRateLimitError(GitHubAPIError): + """Exception for GitHub rate limit errors.""" + pass + + +@dataclass +class CheckRunOutput: + """Output data for a GitHub check run.""" + title: str + summary: str + annotations: List[Dict[str, Any]] + + +class GitHubClient: + """Client for interacting with GitHub API.""" + + BASE_URL = "https://api.github.com" + + def __init__(self, token: Optional[str] = None): + """Initialize the GitHub client.""" + config = get_config() + self.token = token or config.github.token + self.config = config.github + + # Set up session with retry logic + self.session = requests.Session() + retry_strategy = Retry( + total=2, + backoff_factor=0.5, + status_forcelist=[500, 502, 503, 504], + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("https://", adapter) + self.session.mount("http://", adapter) + + def _headers(self) -> Dict[str, str]: + """Get the headers for GitHub API requests.""" + return { + "Authorization": f"Bearer {self.token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + + def _handle_response(self, response: requests.Response) -> Dict[str, Any]: + """Handle GitHub API response and raise appropriate errors.""" + # Check for rate limiting + if response.status_code in (403, 429): + remaining = response.headers.get("X-RateLimit-Remaining", "unknown") + retry_after = response.headers.get("Retry-After") + + if remaining == "0" or response.status_code == 429: + retry_seconds = int(retry_after) if retry_after else 60 + raise GitHubRateLimitError( + f"GitHub rate limit exceeded. Retry after {retry_seconds}s", + status_code=response.status_code, + retry_after=retry_seconds + ) + + # Check for other errors + if not response.ok: + try: + error_data = response.json() + message = error_data.get("message", response.text) + except Exception: + message = response.text + + raise GitHubAPIError( + f"GitHub API error: {message}", + status_code=response.status_code + ) + + return response.json() + + def get_pr_head_sha(self, repo: str, pr_number: int) -> str: + """ + Get the head SHA of a pull request. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + + Returns: + The head commit SHA + """ + url = f"{self.BASE_URL}/repos/{repo}/pulls/{pr_number}" + logger.debug(f"Fetching PR info: {url}") + + response = self.session.get(url, headers=self._headers()) + data = self._handle_response(response) + + return data["head"]["sha"] + + def create_check_run( + self, + repo: str, + head_sha: str, + output: CheckRunOutput + ) -> int: + """ + Create a GitHub check run with neutral conclusion. + + Args: + repo: Repository in 'owner/repo' format + head_sha: The commit SHA to attach the check run to + output: Check run output data + + Returns: + The check run ID + """ + url = f"{self.BASE_URL}/repos/{repo}/check-runs" + + # Truncate annotations to max allowed + annotations = output.annotations[:self.config.max_annotations] + if len(output.annotations) > self.config.max_annotations: + logger.warning( + f"Truncating annotations from {len(output.annotations)} " + f"to {self.config.max_annotations}" + ) + + # Truncate annotation messages + for ann in annotations: + if len(ann.get("message", "")) > self.config.max_annotation_message_length: + ann["message"] = ( + ann["message"][:self.config.max_annotation_message_length - 3] + "..." + ) + + payload = { + "name": self.config.check_run_name, + "head_sha": head_sha, + "status": "completed", + "conclusion": "neutral", + "output": { + "title": output.title, + "summary": output.summary, + "annotations": annotations + } + } + + logger.debug(f"Creating check run: {url}") + response = self.session.post(url, headers=self._headers(), json=payload) + data = self._handle_response(response) + + check_run_id = data["id"] + logger.info(f"Created check run {check_run_id} for {repo}") + return check_run_id + + def find_bot_comment(self, repo: str, pr_number: int) -> Optional[int]: + """ + Find an existing bot comment on the PR. + + Looks for a comment containing the bot marker. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + + Returns: + The comment ID if found, None otherwise + """ + url = f"{self.BASE_URL}/repos/{repo}/issues/{pr_number}/comments" + logger.debug(f"Searching for bot comment: {url}") + + # Paginate through comments + page = 1 + per_page = 100 + + while True: + response = self.session.get( + url, + headers=self._headers(), + params={"page": page, "per_page": per_page} + ) + comments = self._handle_response(response) + + if not comments: + break + + for comment in comments: + body = comment.get("body", "") + if self.config.bot_comment_marker in body: + logger.debug(f"Found existing bot comment: {comment['id']}") + return comment["id"] + + if len(comments) < per_page: + break + + page += 1 + + return None + + def create_comment(self, repo: str, pr_number: int, body: str) -> int: + """ + Create a new PR comment. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + body: Comment body (markdown) + + Returns: + The comment ID + """ + url = f"{self.BASE_URL}/repos/{repo}/issues/{pr_number}/comments" + logger.debug(f"Creating comment: {url}") + + response = self.session.post( + url, + headers=self._headers(), + json={"body": body} + ) + data = self._handle_response(response) + + comment_id = data["id"] + logger.info(f"Created comment {comment_id} on PR {pr_number}") + return comment_id + + def update_comment(self, repo: str, comment_id: int, body: str) -> int: + """ + Update an existing PR comment. + + Args: + repo: Repository in 'owner/repo' format + comment_id: The comment ID to update + body: New comment body (markdown) + + Returns: + The comment ID + """ + url = f"{self.BASE_URL}/repos/{repo}/issues/comments/{comment_id}" + logger.debug(f"Updating comment: {url}") + + response = self.session.patch( + url, + headers=self._headers(), + json={"body": body} + ) + self._handle_response(response) + + logger.info(f"Updated comment {comment_id}") + return comment_id + + def create_or_update_comment(self, repo: str, pr_number: int, body: str) -> int: + """ + Create a new comment or update existing bot comment. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + body: Comment body (markdown) + + Returns: + The comment ID (new or existing) + """ + existing_comment_id = self.find_bot_comment(repo, pr_number) + + if existing_comment_id: + return self.update_comment(repo, existing_comment_id, body) + else: + return self.create_comment(repo, pr_number, body) + + +# Global client instance (lazy loaded) +_client: Optional[GitHubClient] = None + + +def get_github_client() -> GitHubClient: + """Get the global GitHub client instance.""" + global _client + if _client is None: + _client = GitHubClient() + return _client + + +def reset_github_client() -> None: + """Reset the global GitHub client (useful for testing).""" + global _client + _client = None diff --git a/.github/scripts/release_review/reporter.py b/.github/scripts/release_review/reporter.py new file mode 100644 index 00000000000..e2b8464f9d2 --- /dev/null +++ b/.github/scripts/release_review/reporter.py @@ -0,0 +1,252 @@ +""" +Reporter for formatting check run outputs and PR comments. + +Builds the check run summary/annotations and the PR comment body +from the review payload. +""" +import logging +from typing import Dict, List, Any, Tuple +from dataclasses import dataclass + +from .schemas import ReviewPayload, Issue, Annotation +from .config import get_config +from .github_client import CheckRunOutput + +logger = logging.getLogger(__name__) + + +@dataclass +class SeverityCounts: + """Counts of issues by severity.""" + high: int = 0 + medium: int = 0 + low: int = 0 + + def total(self) -> int: + return self.high + self.medium + self.low + + def summary_line(self) -> str: + return f"High: {self.high} · Medium: {self.medium} · Low: {self.low}" + + +class Reporter: + """Formats review results for GitHub.""" + + def __init__(self): + self.config = get_config() + + def count_severities(self, issues: List[Issue]) -> SeverityCounts: + """Count issues by severity level.""" + counts = SeverityCounts() + for issue in issues: + if issue.severity == "HIGH": + counts.high += 1 + elif issue.severity == "MEDIUM": + counts.medium += 1 + elif issue.severity == "LOW": + counts.low += 1 + return counts + + def build_check_output(self, payload: ReviewPayload) -> CheckRunOutput: + """ + Build the check run output from the review payload. + + Args: + payload: The review payload + + Returns: + CheckRunOutput with title, summary, and annotations + """ + counts = self.count_severities(payload.issues) + + # Build title + title = self.config.github.check_run_title + + # Build summary + summary_parts = [counts.summary_line()] + + if payload.summary: + summary_parts.append("") + summary_parts.append(payload.summary) + + # Add up to 10 example issues in summary + if payload.issues: + summary_parts.append("") + summary_parts.append("**Sample issues:**") + for issue in payload.issues[:10]: + location = "" + if issue.file: + location = f" (`{issue.file}" + if issue.line: + location += f":{issue.line}" + location += "`)" + summary_parts.append(f"- [{issue.severity}] {issue.title}{location}") + + if len(payload.issues) > 10: + summary_parts.append(f"- ... and {len(payload.issues) - 10} more") + + summary = "\n".join(summary_parts) + + # Build annotations + annotations = self._build_annotations(payload) + + return CheckRunOutput( + title=title, + summary=summary, + annotations=annotations + ) + + def _build_annotations(self, payload: ReviewPayload) -> List[Dict[str, Any]]: + """Build GitHub annotations from issues and explicit annotations.""" + annotations = [] + + # First, add explicit annotations from the payload + if payload.annotations: + for ann in payload.annotations: + annotations.append({ + "path": ann.path, + "start_line": ann.start_line, + "end_line": ann.end_line or ann.start_line, + "annotation_level": ann.annotation_level, + "message": ann.message + }) + + # Then, create annotations from issues that have file/line info + for issue in payload.issues: + if issue.file and issue.line: + # Map severity to annotation level + level = self._severity_to_annotation_level(issue.severity) + + message = f"{issue.title}: {issue.message}" + if issue.suggestion: + message += f"\n\nSuggestion: {issue.suggestion}" + + annotations.append({ + "path": issue.file, + "start_line": issue.line, + "end_line": issue.line, + "annotation_level": level, + "message": message + }) + + # Warn if we have too many annotations + max_annotations = self.config.github.max_annotations + if len(annotations) > max_annotations: + logger.warning( + f"Payload has {len(annotations)} annotations, " + f"but GitHub only accepts {max_annotations}. Truncating." + ) + + return annotations + + def _severity_to_annotation_level(self, severity: str) -> str: + """Map issue severity to GitHub annotation level.""" + mapping = { + "HIGH": self.config.severity_mapping.high, + "MEDIUM": self.config.severity_mapping.medium, + "LOW": self.config.severity_mapping.low, + } + return mapping.get(severity, "notice") + + def build_comment_body(self, payload: ReviewPayload) -> str: + """ + Build the PR comment body from the review payload. + + Args: + payload: The review payload + + Returns: + Markdown-formatted comment body + """ + counts = self.count_severities(payload.issues) + marker = self.config.github.bot_comment_marker + + # Start with marker and header + lines = [ + marker, + f"**Release Notes Advisory (AI)** — {payload.summary or 'Review complete'}", + "", + counts.summary_line(), + "", + ] + + # Group issues by severity + high_issues = [i for i in payload.issues if i.severity == "HIGH"] + medium_issues = [i for i in payload.issues if i.severity == "MEDIUM"] + low_issues = [i for i in payload.issues if i.severity == "LOW"] + + # Add each severity section + if high_issues: + lines.extend(self._format_issue_section("HIGH", high_issues)) + + if medium_issues: + lines.extend(self._format_issue_section("MEDIUM", medium_issues)) + + if low_issues: + lines.extend(self._format_issue_section("LOW", low_issues)) + + # Add links section + if payload.links: + lines.append("---") + lines.append("") + lines.append("**Links**") + if payload.links.deploy_preview: + lines.append(f"- [Deploy Preview]({payload.links.deploy_preview})") + if payload.links.full_report: + lines.append(f"- [Full JSON Report]({payload.links.full_report})") + lines.append("") + + # Footer + lines.append("---") + lines.append("_Posted by docs-fast-agent — advisory only, does not block merge._") + + return "\n".join(lines) + + def _format_issue_section( + self, + severity: str, + issues: List[Issue] + ) -> List[str]: + """Format a section of issues for the PR comment.""" + lines = [ + f"### {severity} ({len(issues)})", + "", + ] + + for issue in issues: + # Title and message + lines.append(f"- **{issue.title}:** {issue.message}") + + # File and line + if issue.file: + location = f"`{issue.file}" + if issue.line: + location += f":{issue.line}" + location += "`" + lines.append(f" {location}") + + # Suggestion + if issue.suggestion: + lines.append(f" **Suggestion:** {issue.suggestion}") + + lines.append("") + + return lines + + +# Global reporter instance +_reporter: Reporter = None + + +def get_reporter() -> Reporter: + """Get the global reporter instance.""" + global _reporter + if _reporter is None: + _reporter = Reporter() + return _reporter + + +def reset_reporter() -> None: + """Reset the global reporter (useful for testing).""" + global _reporter + _reporter = None diff --git a/.github/scripts/release_review/reviewer.py b/.github/scripts/release_review/reviewer.py new file mode 100644 index 00000000000..5dea2ebf750 --- /dev/null +++ b/.github/scripts/release_review/reviewer.py @@ -0,0 +1,571 @@ +""" +AI-powered Release Notes Reviewer. + +This module analyzes release notes PRs and generates review issues +using AI (OpenAI) based on the CockroachDB style guide. +""" +import os +import re +import logging +import json +from typing import List, Dict, Any, Optional, Tuple +from dataclasses import dataclass, field +from datetime import datetime + +import requests +import yaml +from openai import OpenAI + +from .schemas import Issue, ReviewPayload, Links + +logger = logging.getLogger(__name__) + +# Style guide content (loaded from file or embedded) +STYLE_GUIDE = """ +# Release Note Writing Guide for CockroachDB + +## Style and Tone +- Use clear, concise, and correct language +- Use the second-person imperative present tense for instructions +- Use active voice instead of passive for clarity +- Avoid using "please" when giving instructions +- Avoid hyperbolic language like "simple," "just," "easily," or "actually" +- Use contractions to simplify language, except for clear directives (use "cannot" instead of "can't") +- Avoid forward-looking language about future features + +## Format and Structure +- Use title case for page titles +- Use sentence case for all headings +- Use the Oxford (serial) comma +- When listing a range of versions, use "to" not a dash (e.g., v22.1.0 to v22.1.4) + +## Technical Content +- Link to relevant documentation when referencing CockroachDB features +- Use inline code format (backticks) for code, commands, or technical syntax +- Include GitHub issue or PR numbers for reference + +## Version References +- Format as vXX.X.X (e.g., v21.1.8) with lowercase 'v' + +## Technical Terminology +- Use "CockroachDB" (proper capitalization) +- Use "PostgreSQL" (not "Postgres") +- Use inclusive terminology (allowlist/denylist, main/primary) + +## Release Note Requirements +- Clearly describe what changed or was added +- Mention any impact on users, including breaking changes +- Be factual and technical without unnecessary jargon +- Include GitHub issue or PR numbers for reference +""" + + +@dataclass +class ParsedYAMLRelease: + """Parsed release entry from releases.yml.""" + release_name: str + major_version: str + release_date: str + release_type: str + go_version: Optional[str] = None + sha: Optional[str] = None + previous_release: Optional[str] = None + raw: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class ParsedMarkdownRelease: + """Parsed release notes from markdown file.""" + version: str + release_date: str + sections: Dict[str, List[str]] # section_name -> list of notes + pr_references: List[str] # List of PR numbers referenced + link_definitions: Dict[str, str] # PR number -> URL + raw_content: str = "" + + +@dataclass +class ReviewResult: + """Result of the review process.""" + issues: List[Issue] + summary: str + yaml_data: Optional[ParsedYAMLRelease] = None + markdown_data: Optional[ParsedMarkdownRelease] = None + + +class ReleaseNotesReviewer: + """AI-powered reviewer for CockroachDB release notes PRs.""" + + # Required YAML fields + REQUIRED_YAML_FIELDS = [ + "release_name", "major_version", "release_date", "release_type" + ] + + # Valid release types + VALID_RELEASE_TYPES = [ + "Production", "Testing", "Preview", "Beta", "Alpha", "Withdrawn" + ] + + # Valid section headers in markdown + VALID_SECTIONS = [ + "backward-incompatible-changes", "security-updates", "sql-language-changes", + "operational-changes", "command-line-changes", "db-console-changes", + "bug-fixes", "performance-improvements", "contributors", "doc-updates", + "enterprise-edition-changes", "general-changes" + ] + + def __init__(self, github_token: Optional[str] = None, openai_api_key: Optional[str] = None): + """Initialize the reviewer.""" + self.github_token = github_token or os.getenv("GITHUB_TOKEN") + self.openai_api_key = openai_api_key or os.getenv("OPENAI_API_KEY") + self.openai_client = None + if self.openai_api_key: + self.openai_client = OpenAI(api_key=self.openai_api_key) + + def _github_headers(self) -> Dict[str, str]: + """Get GitHub API headers.""" + return { + "Authorization": f"Bearer {self.github_token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + + def fetch_pr_files(self, repo: str, pr_number: int) -> List[Dict[str, Any]]: + """Fetch the files changed in a PR.""" + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files" + response = requests.get(url, headers=self._github_headers()) + response.raise_for_status() + return response.json() + + def fetch_file_content(self, repo: str, path: str, ref: str) -> str: + """Fetch file content from GitHub.""" + url = f"https://api.github.com/repos/{repo}/contents/{path}?ref={ref}" + response = requests.get(url, headers=self._github_headers()) + response.raise_for_status() + data = response.json() + + if data.get("encoding") == "base64": + import base64 + return base64.b64decode(data["content"]).decode("utf-8") + return data.get("content", "") + + def fetch_pr_diff(self, repo: str, pr_number: int) -> str: + """Fetch the diff of a PR.""" + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" + headers = self._github_headers() + headers["Accept"] = "application/vnd.github.diff" + response = requests.get(url, headers=headers) + response.raise_for_status() + return response.text + + def check_pr_exists(self, pr_number: str) -> bool: + """Check if a PR exists in cockroachdb/cockroach.""" + url = f"https://api.github.com/repos/cockroachdb/cockroach/pulls/{pr_number}" + response = requests.get(url, headers=self._github_headers()) + return response.status_code == 200 + + def parse_yaml_diff(self, diff: str) -> Optional[ParsedYAMLRelease]: + """Parse the YAML additions from a diff.""" + # Extract added lines from releases.yml + yaml_section = False + yaml_lines = [] + + for line in diff.split("\n"): + if "releases.yml" in line: + yaml_section = True + continue + if yaml_section: + if line.startswith("diff --git"): + break + if line.startswith("+") and not line.startswith("+++"): + yaml_lines.append(line[1:]) # Remove the '+' prefix + + if not yaml_lines: + return None + + # Parse the YAML + yaml_content = "\n".join(yaml_lines) + try: + # Handle the case where we're adding to a list + if yaml_content.strip().startswith("-"): + data = yaml.safe_load(yaml_content) + if isinstance(data, list) and len(data) > 0: + release = data[0] + else: + release = data + else: + release = yaml.safe_load(yaml_content) + + if not release: + return None + + return ParsedYAMLRelease( + release_name=release.get("release_name", ""), + major_version=release.get("major_version", ""), + release_date=release.get("release_date", ""), + release_type=release.get("release_type", ""), + go_version=release.get("go_version"), + sha=release.get("sha"), + previous_release=release.get("previous_release"), + raw=release + ) + except yaml.YAMLError as e: + logger.warning(f"Failed to parse YAML: {e}") + return None + + def parse_markdown_diff(self, diff: str) -> Optional[ParsedMarkdownRelease]: + """Parse the markdown additions from a diff.""" + # Extract added lines from .md file + md_section = False + md_lines = [] + md_filename = "" + + for line in diff.split("\n"): + if ".md" in line and "diff --git" in line: + md_section = True + md_filename = line + continue + if md_section: + if line.startswith("diff --git"): + break + if line.startswith("+") and not line.startswith("+++"): + md_lines.append(line[1:]) + + if not md_lines: + return None + + content = "\n".join(md_lines) + + # Extract version from header + version_match = re.search(r"## (v[\d.]+(?:-[\w.]+)?)", content) + version = version_match.group(1) if version_match else "" + + # Extract release date + date_match = re.search(r"Release Date:\s*(.+)", content) + release_date = date_match.group(1).strip() if date_match else "" + + # Extract sections + sections: Dict[str, List[str]] = {} + current_section = None + current_notes = [] + + for line in md_lines: + # Check for section header + section_match = re.search(r'

', line) + if section_match: + if current_section and current_notes: + sections[current_section] = current_notes + current_section = section_match.group(1) + current_notes = [] + elif current_section and line.strip().startswith("-"): + current_notes.append(line.strip()) + elif current_section and line.strip() and not line.startswith("[#"): + current_notes.append(line.strip()) + + if current_section and current_notes: + sections[current_section] = current_notes + + # Extract PR references + pr_refs = re.findall(r"\[#(\d+)\]", content) + + # Extract link definitions + link_defs = {} + for match in re.finditer(r"\[#(\d+)\]:\s*(https://[^\s]+)", content): + link_defs[match.group(1)] = match.group(2) + + return ParsedMarkdownRelease( + version=version, + release_date=release_date, + sections=sections, + pr_references=pr_refs, + link_definitions=link_defs, + raw_content=content + ) + + def check_schema_format(self, yaml_data: Optional[ParsedYAMLRelease], + md_data: Optional[ParsedMarkdownRelease]) -> List[Issue]: + """ + HIGH severity: Schema/Format Checks + - Required YAML fields present + - Valid date formats + - Correct version patterns + - Valid category tags + """ + issues = [] + + if yaml_data: + # Check required fields + for field in self.REQUIRED_YAML_FIELDS: + value = getattr(yaml_data, field, None) or yaml_data.raw.get(field) + if not value: + issues.append(Issue( + severity="HIGH", + title=f"Missing required YAML field: {field}", + message=f"The '{field}' field is required in releases.yml but is missing or empty.", + file="src/current/_data/releases.yml", + suggestion=f"Add the '{field}' field with an appropriate value." + )) + + # Check release_type validity + if yaml_data.release_type and yaml_data.release_type not in self.VALID_RELEASE_TYPES: + issues.append(Issue( + severity="HIGH", + title="Invalid release_type", + message=f"release_type '{yaml_data.release_type}' is not a valid type. " + f"Valid types are: {', '.join(self.VALID_RELEASE_TYPES)}", + file="src/current/_data/releases.yml", + suggestion=f"Use one of: {', '.join(self.VALID_RELEASE_TYPES)}" + )) + + # Check date format (YYYY-MM-DD) + if yaml_data.release_date: + try: + datetime.strptime(str(yaml_data.release_date), "%Y-%m-%d") + except ValueError: + issues.append(Issue( + severity="HIGH", + title="Invalid date format", + message=f"release_date '{yaml_data.release_date}' is not in YYYY-MM-DD format.", + file="src/current/_data/releases.yml", + suggestion="Use format: YYYY-MM-DD (e.g., 2026-03-25)" + )) + + # Check version pattern (vXX.X.X or vXX.X.X-suffix) + if yaml_data.release_name: + if not re.match(r"^v\d+\.\d+\.\d+(-[\w.]+)?$", yaml_data.release_name): + issues.append(Issue( + severity="HIGH", + title="Invalid version format", + message=f"release_name '{yaml_data.release_name}' doesn't match expected pattern vXX.X.X[-suffix].", + file="src/current/_data/releases.yml", + suggestion="Use format: vXX.X.X or vXX.X.X-beta.1 (lowercase 'v')" + )) + + if md_data: + # Check version matches in markdown + if yaml_data and md_data.version and yaml_data.release_name: + if md_data.version != yaml_data.release_name: + issues.append(Issue( + severity="HIGH", + title="Version mismatch between YAML and Markdown", + message=f"YAML has '{yaml_data.release_name}' but Markdown has '{md_data.version}'.", + file="src/current/_includes/releases/", + suggestion="Ensure version numbers match in both files." + )) + + return issues + + def check_technical_accuracy(self, yaml_data: Optional[ParsedYAMLRelease], + md_data: Optional[ParsedMarkdownRelease]) -> List[Issue]: + """ + HIGH severity: Technical Accuracy Checks + - Version numbers match + - Referenced PRs exist + - Backport references valid + - No broken internal links + """ + issues = [] + + if md_data: + # Check that all PR references have link definitions + referenced_prs = set(md_data.pr_references) + defined_prs = set(md_data.link_definitions.keys()) + + missing_links = referenced_prs - defined_prs + for pr in missing_links: + issues.append(Issue( + severity="HIGH", + title=f"Missing link definition for PR #{pr}", + message=f"PR #{pr} is referenced in the text but has no link definition at the bottom.", + file="src/current/_includes/releases/", + suggestion=f"Add: [#{pr}]: https://github.com/cockroachdb/cockroach/pull/{pr}" + )) + + # Check a sample of PRs to see if they exist (limit to avoid rate limits) + prs_to_check = list(referenced_prs)[:5] + for pr in prs_to_check: + if not self.check_pr_exists(pr): + issues.append(Issue( + severity="HIGH", + title=f"Referenced PR #{pr} does not exist", + message=f"PR #{pr} was referenced but could not be found in cockroachdb/cockroach.", + file="src/current/_includes/releases/", + suggestion="Verify the PR number is correct." + )) + + # Check for orphaned link definitions (defined but not referenced) + orphaned_links = defined_prs - referenced_prs + for pr in orphaned_links: + issues.append(Issue( + severity="MEDIUM", + title=f"Orphaned link definition for PR #{pr}", + message=f"Link definition for #{pr} exists but is not referenced in the text.", + file="src/current/_includes/releases/", + suggestion="Remove unused link definition or add reference in text." + )) + + return issues + + def check_content_quality_with_ai(self, md_data: Optional[ParsedMarkdownRelease]) -> List[Issue]: + """ + MEDIUM severity: Content Quality Checks using AI + - Release note text not empty + - No placeholder text + - Action-oriented (starts with verb) + - Appropriate length + - No duplicates + - Style guide compliance + """ + issues = [] + + if not md_data or not self.openai_client: + return issues + + # Prepare the content for AI review + content_to_review = md_data.raw_content + + prompt = f"""You are a technical writing reviewer for CockroachDB release notes. + +Review the following release notes content and identify issues based on these criteria: + +MEDIUM SEVERITY ISSUES: +1. Empty or placeholder text (TODO, TBD, FIXME, placeholder, lorem ipsum) +2. Notes that don't start with an action verb (should describe what changed) +3. Notes that are too short (<10 words) or too long (>100 words for a single point) +4. Duplicate or very similar release notes +5. Style guide violations: + - Using passive voice instead of active voice + - Using "please" in instructions + - Using hyperbolic words like "simple", "just", "easily", "actually" + - Incorrect capitalization (should be "CockroachDB" not "cockroachdb") + - Using "Postgres" instead of "PostgreSQL" + - Not using inclusive language (should use allowlist/denylist, main/primary) + +STYLE GUIDE: +{STYLE_GUIDE} + +RELEASE NOTES CONTENT: +{content_to_review} + +Respond with a JSON array of issues found. Each issue should have: +- "title": Short title of the issue +- "message": Detailed explanation +- "suggestion": How to fix it +- "line_hint": A snippet of the problematic text (for locating it) + +If no issues found, return an empty array: [] + +Return ONLY valid JSON, no other text.""" + + try: + response = self.openai_client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": "You are a technical writing reviewer. Respond only with valid JSON."}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + max_tokens=2000 + ) + + result_text = response.choices[0].message.content.strip() + + # Clean up the response (remove markdown code blocks if present) + if result_text.startswith("```"): + result_text = re.sub(r"^```(?:json)?\n?", "", result_text) + result_text = re.sub(r"\n?```$", "", result_text) + + ai_issues = json.loads(result_text) + + for ai_issue in ai_issues: + issues.append(Issue( + severity="MEDIUM", + title=ai_issue.get("title", "Content quality issue"), + message=ai_issue.get("message", ""), + suggestion=ai_issue.get("suggestion", ""), + metadata={"line_hint": ai_issue.get("line_hint", "")} + )) + + except Exception as e: + logger.error(f"AI content review failed: {e}") + # Don't fail the whole review if AI fails + + return issues + + def review_pr(self, repo: str, pr_number: int, commit_sha: Optional[str] = None) -> ReviewResult: + """ + Review a release notes PR and generate issues. + + Args: + repo: Repository in 'owner/repo' format + pr_number: Pull request number + commit_sha: Optional commit SHA (fetched if not provided) + + Returns: + ReviewResult with issues and parsed data + """ + issues = [] + + # Fetch PR diff + logger.info(f"Fetching PR diff for {repo}#{pr_number}") + diff = self.fetch_pr_diff(repo, pr_number) + + # Parse YAML and Markdown from diff + yaml_data = self.parse_yaml_diff(diff) + md_data = self.parse_markdown_diff(diff) + + logger.info(f"Parsed YAML: {yaml_data is not None}, Markdown: {md_data is not None}") + + # Run schema/format checks (HIGH) + schema_issues = self.check_schema_format(yaml_data, md_data) + issues.extend(schema_issues) + logger.info(f"Schema/format issues: {len(schema_issues)}") + + # Run technical accuracy checks (HIGH) + accuracy_issues = self.check_technical_accuracy(yaml_data, md_data) + issues.extend(accuracy_issues) + logger.info(f"Technical accuracy issues: {len(accuracy_issues)}") + + # Run content quality checks with AI (MEDIUM) + if md_data: + quality_issues = self.check_content_quality_with_ai(md_data) + issues.extend(quality_issues) + logger.info(f"Content quality issues: {len(quality_issues)}") + + # Generate summary + high_count = sum(1 for i in issues if i.severity == "HIGH") + medium_count = sum(1 for i in issues if i.severity == "MEDIUM") + low_count = sum(1 for i in issues if i.severity == "LOW") + + if not issues: + summary = "No issues found. Release notes look good!" + else: + summary = f"Found {len(issues)} issue(s): {high_count} HIGH, {medium_count} MEDIUM, {low_count} LOW" + + return ReviewResult( + issues=issues, + summary=summary, + yaml_data=yaml_data, + markdown_data=md_data + ) + + def create_review_payload(self, repo: str, pr_number: int, + commit_sha: str, result: ReviewResult) -> ReviewPayload: + """Create a ReviewPayload from the review result.""" + return ReviewPayload( + source="ai-release-notes-reviewer", + repo=repo, + pr_number=pr_number, + commit_sha=commit_sha, + generated_at=datetime.utcnow().isoformat() + "Z", + summary=result.summary, + issues=result.issues, + links=Links( + full_report=None # Could add link to stored full report + ) + ) + + +def get_reviewer() -> ReleaseNotesReviewer: + """Get a configured reviewer instance.""" + return ReleaseNotesReviewer() diff --git a/.github/scripts/release_review/schemas.py b/.github/scripts/release_review/schemas.py new file mode 100644 index 00000000000..b4958c7d656 --- /dev/null +++ b/.github/scripts/release_review/schemas.py @@ -0,0 +1,127 @@ +""" +Pydantic models for the /api/v1/release-review-results endpoint. + +This module defines the request and response schemas for receiving +AI-generated release-notes reviews and posting them to GitHub. +""" +import re +from typing import List, Optional, Dict, Any, Literal +from pydantic import BaseModel, Field, HttpUrl, field_validator +from datetime import datetime + + +# ----------------------------------------------------------------------------- +# Request Models +# ----------------------------------------------------------------------------- + +class Issue(BaseModel): + """A single review issue found by the AI.""" + severity: Literal["HIGH", "MEDIUM", "LOW"] = Field( + ..., description="Severity level of the issue" + ) + title: str = Field(..., description="Short title of the issue") + message: str = Field(..., description="Detailed message describing the issue") + file: Optional[str] = Field(None, description="File path where the issue was found") + line: Optional[int] = Field(None, description="Line number in the file") + suggestion: Optional[str] = Field(None, description="Suggested fix for the issue") + metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata") + + +class Annotation(BaseModel): + """A GitHub check run annotation.""" + path: str = Field(..., description="File path for the annotation") + start_line: int = Field(..., ge=1, description="Starting line number") + end_line: Optional[int] = Field(None, description="Ending line number") + annotation_level: Literal["notice", "warning", "failure"] = Field( + ..., description="Annotation severity level" + ) + message: str = Field(..., description="Annotation message") + + +class Links(BaseModel): + """Optional links to related resources.""" + deploy_preview: Optional[HttpUrl] = Field(None, description="Deploy preview URL") + full_report: Optional[HttpUrl] = Field(None, description="Full JSON report URL") + + +class ReviewPayload(BaseModel): + """Request payload for the release review results endpoint.""" + source: str = Field(..., description="Source identifier of the review system") + repo: str = Field(..., description="Repository in 'owner/repo' format") + pr_number: int = Field(..., ge=1, description="Pull request number") + commit_sha: Optional[str] = Field(None, description="Git commit SHA (40 hex chars)") + generated_at: str = Field(..., description="ISO8601 datetime when review was generated") + summary: Optional[str] = Field(None, description="Short summary of the review") + issues: List[Issue] = Field(default_factory=list, description="List of issues found") + annotations: Optional[List[Annotation]] = Field( + default_factory=list, description="GitHub annotations for the check run" + ) + links: Optional[Links] = Field(None, description="Related links") + idempotency_key: Optional[str] = Field( + None, description="Idempotency key from payload (fallback)" + ) + + @field_validator("repo") + @classmethod + def validate_repo_format(cls, v: str) -> str: + """Validate repo is in 'owner/repo' format.""" + if not re.match(r"^[^/]+/[^/]+$", v): + raise ValueError("repo must be in 'owner/repo' format") + return v + + @field_validator("commit_sha") + @classmethod + def validate_commit_sha(cls, v: Optional[str]) -> Optional[str]: + """Validate commit SHA is 40 hex characters if provided.""" + if v is not None and not re.match(r"^[0-9a-f]{40}$", v.lower()): + raise ValueError("commit_sha must be 40 hexadecimal characters") + return v.lower() if v else None + + @field_validator("generated_at") + @classmethod + def validate_generated_at(cls, v: str) -> str: + """Validate generated_at is a valid ISO8601 datetime.""" + try: + # Try parsing the datetime + datetime.fromisoformat(v.replace("Z", "+00:00")) + except ValueError: + raise ValueError("generated_at must be a valid ISO8601 datetime") + return v + + +# ----------------------------------------------------------------------------- +# Response Models +# ----------------------------------------------------------------------------- + +class ReviewResponse(BaseModel): + """Response from the release review results endpoint.""" + status: Literal["ok", "error"] = Field(..., description="Response status") + check_run_id: Optional[int] = Field(None, description="GitHub check run ID") + comment_id: Optional[int] = Field(None, description="GitHub PR comment ID") + message: str = Field(..., description="Human-readable message") + + +class ErrorResponse(BaseModel): + """Error response model.""" + status: Literal["error"] = "error" + message: str = Field(..., description="Error message") + detail: Optional[str] = Field(None, description="Detailed error information") + + +# ----------------------------------------------------------------------------- +# Internal Models +# ----------------------------------------------------------------------------- + +class Job(BaseModel): + """Internal job representation for the store.""" + id: Optional[int] = None + idempotency_key: str + repo: str + pr_number: int + created_at: datetime + processed_at: Optional[datetime] = None + check_run_id: Optional[int] = None + comment_id: Optional[int] = None + payload_json: str + status: Literal["pending", "processing", "completed", "failed"] = "pending" + error_message: Optional[str] = None diff --git a/.github/scripts/run_release_review.py b/.github/scripts/run_release_review.py new file mode 100644 index 00000000000..621e12e73e0 --- /dev/null +++ b/.github/scripts/run_release_review.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +Standalone runner for the release notes AI reviewer. +Invoked from the GitHub Action workflow. +""" +import os +import sys +import logging + +sys.path.insert(0, os.path.dirname(__file__)) + +from release_review.reviewer import ReleaseNotesReviewer +from release_review.reporter import Reporter +from release_review.github_client import GitHubClient +from release_review.config import ReleaseReviewConfig + +def main(): + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") + logger = logging.getLogger(__name__) + + repo = os.environ["GITHUB_REPOSITORY"] + pr_number = int(os.environ["PR_NUMBER"]) + commit_sha = os.environ["COMMIT_SHA"] + + config = ReleaseReviewConfig.from_env() + reviewer = ReleaseNotesReviewer() + reporter = Reporter() + github = GitHubClient() + + logger.info(f"Reviewing PR #{pr_number} in {repo}") + result = reviewer.review_pr(repo, pr_number, commit_sha) + payload = reviewer.create_review_payload(repo, pr_number, commit_sha, result) + + if config.features.post_check_runs: + check_output = reporter.build_check_output(payload) + check_run_id = github.create_check_run(repo, commit_sha, check_output) + logger.info(f"Created check run: {check_run_id}") + + if config.features.post_comments: + comment_body = reporter.build_comment_body(payload) + comment_id = github.create_or_update_comment(repo, pr_number, comment_body) + logger.info(f"Posted comment: {comment_id}") + + logger.info(f"Review complete: {result.summary}") + +if __name__ == "__main__": + main() diff --git a/.github/workflows/release-notes-review.yml b/.github/workflows/release-notes-review.yml new file mode 100644 index 00000000000..aadc0d0f70a --- /dev/null +++ b/.github/workflows/release-notes-review.yml @@ -0,0 +1,38 @@ +name: Release Notes AI Review + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + branches: [main] + paths: + - 'src/current/_data/releases.yml' + - 'src/current/_includes/releases/**' + +permissions: + pull-requests: write + checks: write + contents: read + +jobs: + ai-review: + name: AI Release Notes Review + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: pip install requests pydantic PyYAML openai + + - name: Run AI release notes review + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PR_NUMBER: ${{ github.event.pull_request.number }} + COMMIT_SHA: ${{ github.event.pull_request.head.sha }} + run: python .github/scripts/run_release_review.py From 3c543935ca4e1de797fbe8e62ee4293963d34841 Mon Sep 17 00:00:00 2001 From: Ryan Kuo Date: Tue, 10 Mar 2026 17:05:47 -0400 Subject: [PATCH 2/4] add v26.2.0-alpha.1 release notes --- src/current/_data/releases.yml | 27 ++++ .../releases/v26.2/v26.2.0-alpha.1.md | 120 ++++++++++-------- 2 files changed, 94 insertions(+), 53 deletions(-) diff --git a/src/current/_data/releases.yml b/src/current/_data/releases.yml index f116479fa35..76845db639f 100644 --- a/src/current/_data/releases.yml +++ b/src/current/_data/releases.yml @@ -10923,6 +10923,33 @@ previous_release: v26.2.0-alpha.1 +- release_name: v26.2.0-alpha.1 + major_version: v26.2 + release_date: '2026-03-11' + release_type: Testing + go_version: go1.25.5 + sha: 0a2d99d3fc6dced4f627d643ddb65e790a7f9b7b + has_sql_only: true + has_sha256sum: true + mac: + mac_arm: true + mac_arm_experimental: true + mac_arm_limited_access: false + windows: true + linux: + linux_arm: true + linux_arm_experimental: false + linux_arm_limited_access: false + linux_intel_fips: true + linux_arm_fips: false + docker: + docker_image: cockroachdb/cockroach-unstable + docker_arm: true + docker_arm_experimental: false + docker_arm_limited_access: false + source: true + + - release_name: v26.2.0-beta.1 major_version: v26.2 release_date: '2026-03-25' diff --git a/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md b/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md index a17c000a6dc..a111c0acbf0 100644 --- a/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md +++ b/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md @@ -6,21 +6,12 @@ Release Date: March 11, 2026

Backward-incompatible changes

+- Removed the `incremental_location` option from `BACKUP` and `CREATE SCHEDULE FOR BACKUP`. [#159189][#159189] +- Removed the `incremental_location` option from `SHOW BACKUP` and `RESTORE`. [#160416][#160416] - Increased the default value of `sql.stats.automatic_full_concurrency_limit` (which controls the maximum number of concurrent full statistics collections) from `1` to number of vCPUs divided by 2 (e.g., 4 vCPU nodes will have the value of `2`). [#161806][#161806] - The `TG_ARGV` trigger function parameter now uses 0-based indexing to match PostgreSQL behavior. Previously, `TG_ARGV[1]` returned the first argument; now `TG_ARGV[0]` returns the first argument and `TG_ARGV[1]` returns the second argument. Additionally, usage of `TG_ARGV` no longer requires setting the `allow_create_trigger_function_with_argv_references` session variable. [#161925][#161925] -- Lowered the default value of the `sql.guardrails.max_row_size_log` cluster setting from `64 MiB` to `16 MiB`, and the default value of `sql.guardrails.max_row_size_err` from `512 MiB` to `80 MiB`. These settings control the maximum size of a row (or column family) that SQL can write before logging a warning or returning an error, respectively. The previous defaults were high enough that large rows would hit other limits first (such as the Raft command size limit or the backup SST size limit), producing confusing errors. The new defaults align with existing system limits to provide clearer diagnostics. If your workload legitimately writes rows larger than these new defaults, you can restore the previous behavior by increasing these settings. [#164468][#164468] -- Changed the default value of the `sql.catalog.allow_leased_descriptors.enabled` cluster setting to `true`. This setting allows introspection tables like `information_schema` and `pg_catalog` to use cached descriptors when building the table results, which improves the performance of introspection queries when there are many tables in the cluster. [#159162][#159162] -- The `bulkio.import.elastic_control.enabled` cluster setting is now enabled by default, allowing import operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163867][#163867] -- The `bulkio.ingest.sst_batcher_elastic_control.enabled` cluster setting is now enabled by default, allowing SST batcher operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163868][#163868] -- The session variable `distsql_prevent_partitioning_soft_limited_scans` is now enabled by default. This prevents scans with soft limits from being planned as multiple TableReaders, which decreases the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Creating or altering a changefeed or Kafka/Pub/Sub external connection now returns an error when the `topic_name` query parameter is explicitly set to an empty string in the sink URI, rather than silently falling back to using the table name as the topic name. Existing changefeeds with an empty `topic_name` are not affected. [#164225][#164225] -- TTL jobs are now owned by the schedule owner instead of the `node` user. This allows users with `CONTROLJOB` privilege to cancel TTL jobs, provided the schedule owner is not an admin (`CONTROLJOB` does not grant control over admin-owned jobs). [#161226][#161226] -- Calling `information_schema.crdb_rewrite_inline_hints` now requires the `REPAIRCLUSTER` privilege. [#160716][#160716] -- The **Statement Details** page URL format has changed from `/statement/{implicitTxn}/{statementId}` to `/statement/{statementId}`. As a result, bookmarks using the old URL structure will no longer work. [#159558][#159558] -- Changed the unit of measurement for admission control duration metrics from microseconds to nanoseconds. The following metrics are affected: `admission.granter.slots_exhausted_duration.kv`, `admission.granter.cpu_load_short_period_duration.kv`, `admission.granter.cpu_load_long_period_duration.kv`, `admission.granter.io_tokens_exhausted_duration.kv`, `admission.granter.elastic_io_tokens_exhausted_duration.kv`, and `admission.elastic_cpu.nanos_exhausted_duration`. Note that dashboards displaying these metrics will show a discontinuity at upgrade time, with pre-upgrade values appearing much lower due to the unit change. [#160956][#160956] -- Renamed the builtin function `crdb_internal.inject_hint` (introduced in v26.1.0-alpha.2) to `information_schema.crdb_rewrite_inline_hints`. [#160716][#160716] -- Removed the `incremental_location` option from `BACKUP` and `CREATE SCHEDULE FOR BACKUP`. [#159189][#159189] -- Removed the `incremental_location` option from `SHOW BACKUP` and `RESTORE`. [#160416][#160416] +- Lowered the default value of the `sql.guardrails.max_row_size_log` cluster setting from `64 MiB` to `16 MiB`, and the default value of `sql.guardrails.max_row_size_err` from `512 MiB` to `80 MiB`. These settings control the maximum size of a row (or column family) that SQL can write before logging a warning or returning an error, respectively. The previous defaults were high enough that large rows would hit other limits first (such as the Raft command size limit or the backup SST size limit), producing confusing errors. The new defaults align with existing system limits to provide clearer diagnostics. If your workload legitimately writes rows larger than these new defaults, you can restore the previous behavior by increasing these settings. [#164468][#164468]

Security updates

@@ -28,7 +19,7 @@ Release Date: March 11, 2026

General changes

-- Changefeeds now support the `partition_alg` option for specifying a Kafka partitioning algorithm. Currently `fnv-1a` (default) and `murmur2` are supported. The option is only valid on Kafka v2 sinks. This is protected by the cluster setting `changefeed.partition_alg.enabled`. An example usage: `SET CLUSTER SETTING changefeed.partition_alg.enabled=true; CREATE CHANGEFEED ... INTO 'kafka://...' WITH partition_alg='murmur2';`. Note that if a changefeed is created using the `murmur2` algorithm, and then the cluster setting is disabled, the changefeed will continue using the `murmur2` algorithm unless the changefeed is altered to use a different `partition_alg`. [#161265][#161265] +- Changefeeds now support the `partition_alg` option for specifying a Kafka partitioning algorithm. Currently `fnv-1a` (default) and `murmur2` are supported. The option is only valid on Kafka v2 sinks. This is protected by the cluster setting `changefeed.partition_alg.enabled`. An example usage: `SET CLUSTER SETTING changefeed.partition_alg.enabled=true; CREATE CHANGEFEED ... INTO 'kafka://...' WITH partition_alg='murmur2';`. Note that if a changefeed is created using the murmur2 algorithm, and then the cluster setting is disabled, the changefeed will continue using the murmur2 algorithm unless the changefeed is altered to use a different `partition_alg`. [#161265][#161265]

{{ site.data.products.enterprise }} edition changes

@@ -37,58 +28,65 @@ Release Date: March 11, 2026

SQL language changes

-- Added the `MAINTAIN` privilege, which can be granted on tables and materialized views. Users with the `MAINTAIN` privilege on a materialized view can execute `REFRESH MATERIALIZED VIEW` without being the owner. Users with the `MAINTAIN` privilege on a table can execute `ANALYZE` without needing `SELECT`. This aligns with PostgreSQL 17 behavior. [#164236][#164236] +- Fixed a bug that caused a routine with an `INSERT` statement to unnecessarily block dropping a hash-sharded index or computed column on the target table. This fix applies only to newly created routines. In releases prior to v25.3, the fix must be enabled by setting the session variable `use_improved_routine_dependency_tracking` to `on`. [#146250][#146250] +- The output of `EXPLAIN [ANALYZE]` in non-`VERBOSE` mode is now more succinct. [#153361][#153361] +- A database-level changefeed with no tables will periodically poll to check for tables added to the database. The new option `hibernation_polling_frequency` sets the frequency at which the polling occurs, until a table is found, at which point polling ceases. [#156771][#156771] +- `crdb_internal.datums_to_bytes` is now available in the `information_schema` system catalog as `information_schema.crdb_datums_to_bytes`. [#156963][#156963] +- Queries executed via the vectorized engine now display their progress in the `phase` column of `SHOW QUERIES`. Previously, this feature was only available in the row-by-row engine. [#158029][#158029] - Added cluster settings to control the number of concurrent automatic statistics collection jobs: - `sql.stats.automatic_full_concurrency_limit` controls the maximum number of concurrent full statistics collections. The default is 1. - `sql.stats.automatic_extremes_concurrency_limit` controls the maximum number of concurrent partial statistics collections using extremes. The default is 128. Note that at most one statistics collection job can run on a single table at a time. [#158835][#158835] +- Added the `STRICT` option for locality-aware backups. When enabled, backups fail if data from a KV node with one locality tag would be backed up to a bucket with a different locality tag, ensuring data domiciling compliance. [#158999][#158999] +- Fixed a bug where creating a routine could create unnecessary column dependencies when the routine references columns through CHECK constraints (including those for RLS policies and hash-sharded indexes) or partial index predicates. These unnecessary dependencies prevented dropping the column without first dropping the routine. The fix is gated behind the session setting `use_improved_routine_deps_triggers_and_computed_cols`, which is off by default prior to v26.1. [#159126][#159126] +- Changed the default value of the `sql.catalog.allow_leased_descriptors.enabled` cluster setting to `true`. This setting allows introspection tables like `information_schema` and `pg_catalog` to use cached descriptors when building the table results, which improves the performance of introspection queries when there are many tables in the cluster. [#159162][#159162] +- Added support for `SHOW STATEMENT HINTS`, which displays information about the statement hints (if any) associated with the given statement fingerprint string. The fingerprint is normalized in the same way as `EXPLAIN (FINGERPRINT)` before hints are matched. Example usage: `SHOW STATEMENT HINTS FOR ' SELECT * FROM xy WHERE x = 10 '` or `SHOW STATEMENT HINTS FOR $$ SELECT * FROM xy WHERE x = 10 $$ WITH DETAILS`. [#159231][#159231] - Added a new cluster setting `bulkio.import.distributed_merge.mode` to enable distributed merge support for `IMPORT` operations. When enabled (default: false), `IMPORT` jobs will use a two-phase approach where import processors first write SST files to local storage, then a coordinator merges and ingests them. This can improve performance for large imports by reducing L0 file counts and enabling merge-time optimizations. This feature requires all nodes to be running v26.1 or later. [#159330][#159330] -- CockroachDB now supports the PostgreSQL session variables `tcp_keepalives_idle`, `tcp_keepalives_interval`, `tcp_keepalives_count`, and `tcp_user_timeout`. These allow per-session control over TCP keepalive behavior on each connection. A value of 0 (the default) uses the corresponding cluster setting. Non-zero values override the cluster setting for that session only. Units match PostgreSQL: seconds for keepalive settings, milliseconds for `tcp_user_timeout`. [#164369][#164369] -- Added the `optimizer_inline_any_unnest_subquery` session setting to enable/disable the optimizer rule `InlineAnyProjectSet`. The setting is on by default in v26.2 and later. [#161880][#161880] +- `INSPECT` is now a generally available (GA) feature. The `enable_inspect_command` session variable has been deprecated, and is now effectively always set to `true`. [#159659][#159659] - Users can now set the `use_backups_with_ids` session setting to enable a new `SHOW BACKUPS IN` experience. When enabled, `SHOW BACKUPS IN {collection}` displays all backups in the collection. Results can be filtered by backup end time using `OLDER THAN {timestamp}` or `NEWER THAN {timestamp}` clauses. Example usage: `SET use_backups_with_ids = true; SHOW BACKUPS IN '{collection}' OLDER THAN '2026-01-09 12:13:14' NEWER THAN '2026-01-04 15:16:17';` [#160137][#160137] -- If the new `SHOW BACKUP` experience is enabled by setting the `use_backups_with_ids` session variable to true, `SHOW BACKUP` will parse the IDs provided by `SHOW BACKUPS` and display contents for single backups. [#160812][#160812] -- If the new `RESTORE` experience is enabled by setting the `use_backups_with_ids` session variable to true, `RESTORE` will parse the IDs provided by `SHOW BACKUPS` and will restore the specified backup without the use of `AS OF SYSTEM TIME`. [#161294][#161294] -- `SHOW BACKUP` and `RESTORE` now allow backup IDs even if the `use_backups_with_ids` session variable is not set. Setting the variable only configures whether `LATEST` is resolved using the new or legacy path. [#162329][#162329] -- Added the `REVISION START TIME` option to the new `SHOW BACKUPS` experience enabled via the `use_backups_with_ids` session variable. Use the `REVISION START TIME` option to view the revision start times of revision history backups. [#161328][#161328] -- Added support for `SHOW STATEMENT HINTS`, which displays information about the statement hints (if any) associated with the given statement fingerprint string. The fingerprint is normalized in the same way as `EXPLAIN (FINGERPRINT)` before hints are matched. Example usage: `SHOW STATEMENT HINTS FOR ' SELECT * FROM xy WHERE x = 10 '` or `SHOW STATEMENT HINTS FOR $$ SELECT * FROM xy WHERE x = 10 $$ WITH DETAILS`. [#159231][#159231] -- `CREATE OR REPLACE TRIGGER` is now supported. If a trigger with the same name already exists on the same table, it is replaced with the new definition. If no trigger with that name exists, a new trigger is created. [#162633][#162633] -- Added support for `ALTER TABLE ENABLE TRIGGER` and `ALTER TABLE DISABLE TRIGGER` syntax. This allows users to temporarily disable triggers without dropping them, and later re-enable them. The syntax supports disabling/enabling individual triggers by name, or all triggers on a table using the `ALL` or `USER` keywords. [#161924][#161924] +- The `information_schema.crdb_datums_to_bytes` built-in function is now documented. [#160486][#160486] +- Calling `information_schema.crdb_rewrite_inline_hints` now requires the REPAIRCLUSTER privilege. [#160716][#160716] +- Renamed the builtin function `crdb_internal.inject_hint` (introduced in v26.1.0-alpha.2) to `information_schema.crdb_rewrite_inline_hints`. [#160716][#160716] +- If the `use_backups_with_ids` session variable is set to true, the new `SHOW BACKUP` experience will be enabled. `SHOW BACKUP` will parse the IDs provided by `SHOW BACKUPS` and display contents for single backups. [#160812][#160812] +- None [#161211][#161211] +- If the `use_backups_with_ids` session variable is set, the new `RESTORE` experience will be enabled. `RESTORE` will be able to parse the IDs provided by `SHOW BACKUPS` and will restore the specified backup without the use of `AS OF SYSTEM TIME`. [#161294][#161294] +- Updated CockroachDB to allow a prefix of index key columns to be used for the shard column in a hash-sharded index. The `shard_columns` storage parameter may be used to override the default, which uses all index key columns in the shard column. [#161422][#161422] +- CockroachDB now shows execution statistics (like `execution time`) on `EXPLAIN ANALYZE` output for `render` nodes, which often handle built-in functions. [#161509][#161509] +- Added the `optimizer_inline_any_unnest_subquery` session setting to enable/disable the optimizer rule `InlineAnyProjectSet`. The setting is on by default in v26.2 and later. [#161880][#161880] - Updated `DROP TRIGGER` to accept the `CASCADE` option for PostgreSQL compatibility. Since triggers in CockroachDB cannot have dependents, `CASCADE` behaves the same as `RESTRICT` or omitting the option entirely. [#161915][#161915] +- Added support for `ALTER TABLE ENABLE TRIGGER` and `ALTER TABLE DISABLE TRIGGER` syntax. This allows users to temporarily disable triggers without dropping them, and later re-enable them. The syntax supports disabling/enabling individual triggers by name, or all triggers on a table using the `ALL` or `USER` keywords. [#161924][#161924] +- Added support for the `pg_trigger_depth()` builtin function, which returns the current nesting level of PostgreSQL triggers (0 if not called from inside a trigger). [#162286][#162286] - `ALTER TABLE ... DROP CONSTRAINT` can now be used to drop `UNIQUE` constraints. The backing `UNIQUE` index will also be dropped, as CockroachDB treats the constraint and index as the same thing. [#162345][#162345] +- `CREATE OR REPLACE TRIGGER` is now supported. If a trigger with the same name already exists on the same table, it is replaced with the new definition. If no trigger with that name exists, a new trigger is created. [#162633][#162633] - `DROP COLUMN` and `DROP INDEX` with `CASCADE` now properly drop dependent triggers. Previously, these operations would fail with an unimplemented error when a trigger depended on the column or index being dropped. [#163296][#163296] - `CREATE OR REPLACE FUNCTION` now works on trigger functions that have active triggers. Previously, this was blocked with an unimplemented error, requiring users to drop and recreate triggers. The replacement now atomically updates all dependent triggers to execute the new function body. [#163348][#163348] -- Updated CockroachDB to allow a prefix of index key columns to be used for the shard column in a hash-sharded index. The `shard_columns` storage parameter may be used to override the default, which uses all index key columns in the shard column. [#161422][#161422] -- Added support for the `pg_trigger_depth()` builtin function, which returns the current nesting level of PostgreSQL triggers (0 if not called from inside a trigger). [#162286][#162286] -- A database-level changefeed with no tables will periodically poll to check for tables added to the database. The new option `hibernation_polling_frequency` sets the frequency at which the polling occurs, until a table is found, at which point polling ceases. [#156771][#156771] -- `INSPECT` is now a generally available (GA) feature. The `enable_inspect_command` session variable has been deprecated, and is now effectively always set to `true`. [#159659][#159659] -- Added the `STRICT` option for locality-aware backups. When enabled, backups fail if data from a KV node with one locality tag would be backed up to a bucket with a different locality tag, ensuring data domiciling compliance. [#158999][#158999] - Added support for the `dmetaphone()`, `dmetaphone_alt()`, and `daitch_mokotoff()` built-in functions, completing CockroachDB's implementation of the PostgreSQL `fuzzystrmatch` extension. `dmetaphone` and `dmetaphone_alt` return Double Metaphone phonetic codes for a string, and `daitch_mokotoff` returns an array of Daitch-Mokotoff soundex codes. These functions are useful for fuzzy string matching based on phonetic similarity. [#163430][#163430] -- `crdb_internal.datums_to_bytes` is now available in the `information_schema` system catalog as `information_schema.crdb_datums_to_bytes`. [#156963][#156963] -- The `information_schema.crdb_datums_to_bytes` built-in function is now documented. [#160486][#160486] - Row count validation after `IMPORT` is now enabled by default in async mode. After an `IMPORT` completes, a background `INSPECT` job validates that the imported row count matches expectations. The `IMPORT` result now includes an `inspect_job_id` column so the `INSPECT` job can be viewed separately. The `bulkio.import.row_count_validation.mode` cluster setting controls this behavior, with valid values of `off`, `async` (default), and `sync`. [#163543][#163543] -- Queries executed via the vectorized engine now display their progress in the `phase` column of `SHOW QUERIES`. Previously, this feature was only available in the row-by-row engine. [#158029][#158029] -- CockroachDB now shows execution statistics (like `execution time`) on `EXPLAIN ANALYZE` output for `render` nodes, which often handle built-in functions. [#161509][#161509] -- The output of `EXPLAIN [ANALYZE]` in non-`VERBOSE` mode is now more succinct. [#153361][#153361] +- Added the `MAINTAIN` privilege, which can be granted on tables and materialized views. Users with the `MAINTAIN` privilege on a materialized view can execute `REFRESH MATERIALIZED VIEW` without being the owner. Users with the `MAINTAIN` privilege on a table can execute `ANALYZE` without needing `SELECT`. This aligns with PostgreSQL 17 behavior. [#164236][#164236] +- CockroachDB now supports the PostgreSQL session variables `tcp_keepalives_idle`, `tcp_keepalives_interval`, `tcp_keepalives_count`, and `tcp_user_timeout`. These allow per-session control over TCP keepalive behavior on each connection. A value of 0 (the default) uses the corresponding cluster setting. Non-zero values override the cluster setting for that session only. Units match PostgreSQL: seconds for keepalive settings, milliseconds for `tcp_user_timeout`. [#164369][#164369]

Operational changes

-- The new `cockroach gen dashboard` command generates standardized monitoring dashboards from an embedded configuration file. It outputs a dashboard JSON file for either Datadog (`--tool=datadog`) or Grafana (`--tool=grafana`), with Grafana dashboards using Prometheus queries. The generated dashboards include metrics across Overview, Hardware, Runtime, Networking, SQL, and Storage categories. Use `--output` to set the output file path and `--rollup-interval` to control metric aggregation. [#161050][#161050] -- Added the `server.sql_tcp_user.timeout` cluster setting, which specifies the maximum amount of time transmitted data can remain unacknowledged before the underlying TCP connection is forcefully closed. This setting is enabled by default with a value of 30 seconds and is supported on Linux and macOS (Darwin). [#164037][#164037] -- Introduced a new cluster setting `kvadmission.store.snapshot_ingest_bandwidth_control.min_rate.enabled`. When this setting is enabled and disk bandwidth-based admission control is active, snapshot ingestion will be admitted at a minimum rate. This prevents snapshot ingestion from being starved by other elastic work. [#159436][#159436] +- A new cluster setting `kvadmission.store.snapshot_ingest_bandwidth_control.min_rate.enabled` is introduced. When this setting is enabled and disk bandwidth-based admission control is active, snapshot ingestion will be admitted at a minimum rate. This prevents snapshot ingestion from being starved by other elastic work. [#159436][#159436] - The `kv.range_split.load_sample_reset_duration` cluster setting now defaults to `30m`. This should improve load-based splitting in rare edge cases. [#159499][#159499] - Added the `kv.protectedts.protect`, `kv.protectedts.release`, `kv.protectedts.update_timestamp`, `kv.protectedts.get_record`, and `kv.protectedts.mark_verified` metrics to track protected timestamp storage operations. These metrics help diagnose issues with excessive protected timestamp churn and operational errors. Each operation tracks both successful completions (`.success`) and failures (`.failed`, such as `ErrExists` or `ErrNotExists`). Operators can monitor these metrics to understand PTS system behavior and identify performance issues related to backups, changefeeds, and other features that use protected timestamps. [#160129][#160129] -- Added a new metric `sql.rls.policies_applied.count` that tracks the number of SQL statements where row-level security (RLS) policies were applied during query planning. [#164405][#164405] -- External connections can now be used with online restore. [#159090][#159090] - Changed goroutine profile dumps from human-readable `.txt.gz` files to binary proto `.pb.gz` files. This improves the performance of the goroutine dumper by eliminating brief in-process pauses that occurred when collecting goroutine stacks. [#160798][#160798] -- Added a new structured event of type `rewrite_inline_hints` that is emitted when an inline-hints rewrite rule is added using `information_schema.crdb_rewrite_inline_hints`. This event is written to both the event log and the `OPS` channel. [#160901][#160901] +- Added a new structured event of type `rewrite_inline_hints` that is emitted when an inline-hints rewrite rule is added using `information_schema.crdb_rewrite_inline_hints`. This event is written to both the event log and the OPS channel. [#160901][#160901] - Added a new metric `sql.query.with_statement_hints.count` that is incremented whenever a statement is executed with one or more external statement hints applied. An example of an external statement hint is an inline-hints rewrite rule added by calling `information_schema.crdb_rewrite_inline_hints`. [#161043][#161043] -- Logical Data Replication (LDR) now supports hash-sharded indexes and secondary indexes with virtual computed columns. Previously, tables with these index types could not be replicated using LDR. [#161062][#161062] +- The new `cockroach gen dashboard` command generates standardized monitoring dashboards from an embedded configuration file. It outputs a dashboard JSON file for either Datadog (`--tool=datadog`) or Grafana (`--tool=grafana`), with Grafana dashboards using Prometheus queries. The generated dashboards include metrics across Overview, Hardware, Runtime, Networking, SQL, and Storage categories. Use `--output` to set the output file path and `--rollup-interval` to control metric aggregation. [#161050][#161050] +- this patch enables hash sharded indexes and + secondary indexes with virtually computed columns in LDR. [#161062][#161062] +- TTL jobs are now owned by the schedule owner instead of the `node` user. This allows users with `CONTROLJOB` privilege to cancel TTL jobs, provided the schedule owner is not an admin (`CONTROLJOB` does not grant control over admin-owned jobs). [#161226][#161226] - Backup schedules that utilize the `revision_history` option now apply that option only to incremental backups triggered by that schedule, rather than duplicating the revision history in the full backups as well. [#162105][#162105] -- The `build.timestamp` Prometheus metric now carries `major` and `minor` labels identifying the release series of the running CockroachDB binary (e.g., `major="26", minor="1"` for any v26.1.x build). [#163834][#163834] -- Jobs now clear their running status messages upon successful completion. [#163765][#163765] -- Changefeed ranges are now more accurately reported as lagging. [#163427][#163427] +- ranges are now more accurately + and likely to be reported as lagging. [#163427][#163427] +- The `build.timestamp` Prometheus metric now carries `major` and `minor` labels identifying the release series of the running CockroachDB binary (e.g. major=26, minor=1 for any v26.1.x build). [#163834][#163834] +- The `bulkio.import.elastic_control.enabled` cluster setting is now enabled by default, allowing import operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163867][#163867] +- The `bulkio.ingest.sst_batcher_elastic_control.enabled` cluster setting is now enabled by default, allowing SST batcher operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163868][#163868] +- Added the `server.sql_tcp_user.timeout` cluster setting, which specifies the maximum amount of time transmitted data can remain unacknowledged before the underlying TCP connection is forcefully closed. This setting is enabled by default with a value of 30 seconds and is supported on Linux and macOS (Darwin). [#164037][#164037] +- Added a new metric `sql.rls.policies_applied.count` that tracks the number of SQL statements where row-level security (RLS) policies were applied during query planning. [#164405][#164405]

Command-line changes

@@ -99,19 +97,18 @@ Release Date: March 11, 2026

DB Console changes

-- Added a new time-series bar graph called **Plan Distribution Over Time** to the **Statement Fingerprint** page, on the **Explain Plans** tab. It shows which execution plans were used in each time interval, helping detect shifts in query plan distributions. [#161011][#161011] +- The statement details page URL format has changed from `/statement/{implicitTxn}/{statementId}` to `/statement/{statementId}`. As a result, bookmarks using the old URL structure will no longer work. [#159558][#159558] - The **SQL Activity** > **Sessions** page now defaults the **Session Status** filter to **Active, Idle** to exclude closed sessions. [#160576][#160576] +- Changed the unit of measurement for admission control duration metrics from microseconds to nanoseconds. The following metrics are affected: `admission.granter.slots_exhausted_duration.kv`, `admission.granter.cpu_load_short_period_duration.kv`, `admission.granter.cpu_load_long_period_duration.kv`, `admission.granter.io_tokens_exhausted_duration.kv`, `admission.granter.elastic_io_tokens_exhausted_duration.kv`, and `admission.elastic_cpu.nanos_exhausted_duration`. Note that dashboards displaying these metrics will show a discontinuity at upgrade time, with pre-upgrade values appearing much lower due to the unit change. [#160956][#160956] +- A new time-series bar graph called **Plan Distribution Over Time** has been added to the **Statement Fingerprint** page, on the **Explain Plans** tab. It shows which execution plans were used in each time interval, helping detect shifts in query plan distributions. [#161011][#161011]

Bug fixes

-- The fix for `node descriptor not found` errors for changefeeds with `execution_locality` filters in CockroachDB Basic and Standard clusters is now controlled by cluster setting `sql.instance_info.use_instance_resolver.enabled` (default: `true`). [#163947][#163947] -- Fixed a bug that caused a routine with an `INSERT` statement to unnecessarily block dropping a hash-sharded index or computed column on the target table. This fix applies only to newly created routines. In releases prior to v25.3, the fix must be enabled by setting the session variable `use_improved_routine_dependency_tracking` to `on`. [#146250][#146250] -- Fixed a bug where creating a routine could create unnecessary column dependencies when the routine references columns through CHECK constraints (including those for RLS policies and hash-sharded indexes) or partial index predicates. These unnecessary dependencies prevented dropping the column without first dropping the routine. The fix is gated behind the session setting `use_improved_routine_deps_triggers_and_computed_cols`, which is off by default prior to v26.1. [#159126][#159126] - Fixed a bug that allowed a column to be dropped from a table even if it was referenced in the `RETURNING` clause of an `UPDATE` or `DELETE` statement in a routine. In releases prior to v25.3, the fix must be enabled by setting the session variable `use_improved_routine_dependency_tracking` to `on`. [#146250][#146250] - CockroachDB could previously encounter internal errors like `column statistics cannot be determined for empty column set` and `invalid union` in some edge cases with `UNION`, `EXCEPT`, and `INTERCEPT`. This has now been fixed. [#150706][#150706] - Fixed a bug that could cause a scan over a secondary index to read significantly more KVs than necessary in order to satisfy a limit when the scanned index had more than one column family. [#156672][#156672] - Fixed an issue where long-running transactions with many statements could cause unbounded memory growth in the SQL statistics subsystem. When a transaction includes a large number of statements, the SQL statistics ingester now automatically flushes buffered statistics before the transaction commits. As a side effect, the flushed statement statistics might not have an associated transaction fingerprint ID because the transaction has not yet completed. In such cases, the transaction fingerprint ID cannot be backfilled after the fact. [#158527][#158527] -- Fixed a bug that allowed columns to be dropped despite being referenced by a routine. This could occur when a column was only referenced as a target column in the `SET` clause of an `UPDATE` statement within the routine. This fix only applies to newly-created routines. In versions prior to v26.1, the fix must be enabled by setting the session variable `prevent_update_set_column_drop`. [#158935][#158935] +- Fixed a bug that allowed columns to be dropped despite being referenced by a routine. This could occur when a column was only referenced as a target column in the SET clause of an UPDATE statement within the routine. This fix only applies to newly-created routines. In versions prior to v26.1, the fix must be enabled by setting the session variable `prevent_update_set_column_drop`. [#158935][#158935] - Fixed a bug that caused newly-created routines to incorrectly prevent dropping columns that were not directly referenced, most notably columns referenced by computed column expressions. The fix is gated behind the session setting `use_improved_routine_deps_triggers_and_computed_cols`, which is off by default prior to v26.1. [#158935][#158935] - Fixed a bug where schema changes could fail after a `RESTORE` due to missing session data. [#159176][#159176] - The `ascii` built-in function now returns `0` when the input is the empty string instead of an error. [#159178][#159178] @@ -147,7 +144,8 @@ Release Date: March 11, 2026 - Fixed an error that occurred when using generic query plans that generates a lookup join on indexes containing identity computed columns. [#162036][#162036] - Fixed a bug that could cause changefeeds using Kafka v1 sinks to hang when the changefeed was cancelled. [#162058][#162058] - Fixed an internal error `could not find format code for column N` that occurred when executing `EXPLAIN ANALYZE EXECUTE` statements via JDBC or other clients using the PostgreSQL binary protocol. [#162115][#162115] -- Fixed a bug where statement bundles were missing `CREATE TYPE` statements for user-defined types used as array column types. [#162357][#162357] +- have statement bundles contains the CREATE TYPE + for User Defined Types (udt) where columns are of udt array type. [#162357][#162357] - Fixed a bug in which PL/pgSQL UDFs with many `IF` statements would cause a timeout and/or OOM when executed from a prepared statement. This bug was introduced in v23.2.22, v24.1.15, v24.3.9, v25.1.2, and v25.2.0. [#162512][#162512] - Fixed a bug where an error would occur when defining a foreign key on a hash-sharded primary key without explicitly providing the primary key columns. [#162608][#162608] - Fixed a bug where generating a debug zip could trigger an out-of-memory (OOM) condition on a node if malformed log entries were present in logs using `json` or `json-compact` formatting. This bug was introduced in v24.1. [#163224][#163224] @@ -163,20 +161,22 @@ Release Date: March 11, 2026 - Fixed a bug where `EXPLAIN ANALYZE (DEBUG)` statement bundles did not include triggers, their functions, or tables modified by those triggers. The bundle's `schema.sql` file now contains the `CREATE TRIGGER`, `CREATE FUNCTION`, and `CREATE TABLE` statements needed to fully reproduce the query environment when triggers are involved. [#163584][#163584] - Fixed a rare data race during parallel constraint checks where a fresh descriptor collection could resolve a stale enum type version. This bug was introduced in v26.1.0. [#163883][#163883] - Fixed a bug where running **changefeeds** with `envelope=enriched` and `enriched_properties` containing `source` would cause failures during a **cluster upgrade**. [#163885][#163885] +- The fix for `node descriptor not found` errors for changefeeds with `execution_locality` filters in CockroachDB Basic and Standard clusters is now controlled by cluster setting `sql.instance_info.use_instance_resolver.enabled` (default: `true`). [#163947][#163947] - Fixed a bug where dropped columns appeared in `pg_catalog.pg_attribute` with the `atttypid` column equal to 2283 (`anyelement`). Now this column will be 0 for dropped columns. This matches PostgreSQL behavior, where `atttypid=0` is used for dropped columns. [#163950][#163950] - Fixed a race condition/conflict between concurrent `ALTER FUNCTION ... SET SCHEMA` and `DROP SCHEMA` operations. [#164043][#164043] - Fixed a bug where super region zone configurations did not constrain all replicas to regions within the super region. [#164285][#164285] - Fixed a bug where CockroachDB returned "cached plan must not change result type" errors during the `Execute` phase instead of the `Bind` phase of the extended pgwire protocol. This caused compatibility issues with drivers like pgx that expect the error before `BindComplete` is sent, particularly when using batch operations with prepared statements after schema changes. [#164406][#164406] -- Statistics histogram collection is now skipped for JSON columns referenced in partial index predicates, except when `sql.stats.non_indexed_json_histograms.enabled` is true (default: false). [#164477][#164477] -- Fixed a bug where import rollback could incorrectly revert data in a table that was already online. This could only occur if an import job was cancelled or failed after the import had already succeeded and the table was made available for use. [#159627][#159627] -- Invalid `avro_schema_prefix` is now caught during statement time. The prefix must start with `[A-Za-z_]` and subsequently contain only `[A-Za-z0-9_]`, as specified in the [Avro specification](https://avro.apache.org/docs/1.8.1/spec.html). [#159869][#159869] +- ensure the generation of JSON column referenced + in partial idnex predicates respects the + sql.stats.non_indexed_json_histograms.enabled cluster setting (default false). [#164477][#164477]

Performance improvements

-- Added a new session variable, `distsql_prevent_partitioning_soft_limited_scans`, which, when true, prevents scans with soft limits from being planned as multiple TableReaders by the physical planner. This should decrease the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Database- and table-level backups no longer fetch all object descriptors from disk in order to resolve the backup targets. Now only the objects that are referenced by the targeted objects will be fetched. This improves performance when there are many tables in the cluster. [#157790][#157790] - Various background tasks and jobs now more actively yield to foreground work when that work is waiting to run. [#159205][#159205] - Improved changefeed performance when filtering unwatched column families and offline tables by replacing expensive error chain traversal with direct status enum comparisons. [#159745][#159745] +- The session variable `distsql_prevent_partitioning_soft_limited_scans` is now enabled by default. This prevents scans with soft limits from being planned as multiple TableReaders, which decreases the initial setup costs of some fully-distributed query plans. [#160051][#160051] +- Added a new session variable, `distsql_prevent_partitioning_soft_limited_scans`, which, when true, prevents scans with soft limits from being planned as multiple TableReaders by the physical planner. This should decrease the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Fixed a performance regression in `pg_catalog.pg_roles` and `pg_catalog.pg_authid` by avoiding privilege lookups for each row in the table. [#160121][#160121] - Queries that have comparison expressions with the `levenshtein` built-in are now up to 30% faster. [#160394][#160394] - The optimizer now better optimizes query plans of statements within UDFs and stored procedures that have `IN` subqueries. [#160503][#160503] @@ -186,6 +186,20 @@ Release Date: March 11, 2026 - Improved changefeed checkpointing performance when changefeeds are lagging. Previously, checkpoint updates could be redundantly applied multiple times per checkpoint operation. [#162546][#162546] - The query optimizer now eliminates redundant filter and projection operators over inputs with zero cardinality, even when the filter or projection expressions are not leakproof. This produces simpler, more efficient query plans in cases where joins or other operations fold to zero rows. [#164212][#164212] +

Build changes

+ +- Replaces bors with Trunk merge queue for better performance and reliability. Configuration-only change with no runtime impact - maintains same safety checks while improving CI workflow. [#161230][#161230] + +

Miscellaneous

+ +- External connections can now be used with online restore. [#159090][#159090] +- Fixed a bug where import rollback could incorrectly revert data in a table that was already online. This could only occur if an import job was cancelled or failed after the import had already succeeded and the table was made available for use. [#159627][#159627] +- Invalid `avro_schema_prefix` is now caught during statement time. The prefix must start with `[A-Za-z_]` and subsequently contain only `[A-Za-z0-9_]`, as specified in the [Avro specification](https://avro.apache.org/docs/1.8.1/spec.html). [#159869][#159869] +- Added the `REVISION START TIME` option to the new `SHOW BACKUPS` experience enabled via the `use_backups_with_ids` session variable. Use the `REVISION START TIME` option to view the revision start times of revision history backups. [#161328][#161328] +- `SHOW BACKUP` and `RESTORE` now allow backup IDs even if the `use_backups_with_ids` session variable is not set. Setting the variable only configures whether `LATEST` is resolved using the new or legacy path. [#162329][#162329] +- Job running statuses are now cleared after completing successfully. [#163765][#163765] + + [#161806]: https://github.com/cockroachdb/cockroach/pull/161806 [#164236]: https://github.com/cockroachdb/cockroach/pull/164236 [#158527]: https://github.com/cockroachdb/cockroach/pull/158527 From 47353495d85488f24c7727a3e44d3940be04f7de Mon Sep 17 00:00:00 2001 From: Ryan Kuo Date: Wed, 11 Mar 2026 13:09:35 -0400 Subject: [PATCH 3/4] re-sequence and manually edit release notes --- .../releases/v26.2/v26.2.0-alpha.1.md | 120 ++++++++---------- 1 file changed, 53 insertions(+), 67 deletions(-) diff --git a/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md b/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md index a111c0acbf0..e545cdfb192 100644 --- a/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md +++ b/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md @@ -6,12 +6,22 @@ Release Date: March 11, 2026

Backward-incompatible changes

-- Removed the `incremental_location` option from `BACKUP` and `CREATE SCHEDULE FOR BACKUP`. [#159189][#159189] -- Removed the `incremental_location` option from `SHOW BACKUP` and `RESTORE`. [#160416][#160416] - Increased the default value of `sql.stats.automatic_full_concurrency_limit` (which controls the maximum number of concurrent full statistics collections) from `1` to number of vCPUs divided by 2 (e.g., 4 vCPU nodes will have the value of `2`). [#161806][#161806] - The `TG_ARGV` trigger function parameter now uses 0-based indexing to match PostgreSQL behavior. Previously, `TG_ARGV[1]` returned the first argument; now `TG_ARGV[0]` returns the first argument and `TG_ARGV[1]` returns the second argument. Additionally, usage of `TG_ARGV` no longer requires setting the `allow_create_trigger_function_with_argv_references` session variable. [#161925][#161925] -- Creating or altering a changefeed or Kafka/Pub/Sub external connection now returns an error when the `topic_name` query parameter is explicitly set to an empty string in the sink URI, rather than silently falling back to using the table name as the topic name. Existing changefeeds with an empty `topic_name` are not affected. [#164225][#164225] - Lowered the default value of the `sql.guardrails.max_row_size_log` cluster setting from `64 MiB` to `16 MiB`, and the default value of `sql.guardrails.max_row_size_err` from `512 MiB` to `80 MiB`. These settings control the maximum size of a row (or column family) that SQL can write before logging a warning or returning an error, respectively. The previous defaults were high enough that large rows would hit other limits first (such as the Raft command size limit or the backup SST size limit), producing confusing errors. The new defaults align with existing system limits to provide clearer diagnostics. If your workload legitimately writes rows larger than these new defaults, you can restore the previous behavior by increasing these settings. [#164468][#164468] +- Changed the default value of the `sql.catalog.allow_leased_descriptors.enabled` cluster setting to `true`. This setting allows introspection tables like `information_schema` and `pg_catalog` to use cached descriptors when building the table results, which improves the performance of introspection queries when there are many tables in the cluster. [#159162][#159162] +- The `bulkio.import.elastic_control.enabled` cluster setting is now enabled by default, allowing import operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163867][#163867] +- The `bulkio.ingest.sst_batcher_elastic_control.enabled` cluster setting is now enabled by default, allowing SST batcher operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163868][#163868] +- The session variable `distsql_prevent_partitioning_soft_limited_scans` is now enabled by default. This prevents scans with soft limits from being planned as multiple TableReaders, which decreases the initial setup costs of some fully-distributed query plans. [#160051][#160051] +- Creating or altering a changefeed or Kafka/Pub/Sub external connection now returns an error when the `topic_name` query parameter is explicitly set to an empty string in the sink URI, rather than silently falling back to using the table name as the topic name. Existing changefeeds with an empty `topic_name` are not affected. [#164225][#164225] +- Row count validation after `IMPORT` is now enabled by default in async mode. After an `IMPORT` completes, a background `INSPECT` job validates that the imported row count matches expectations. The `IMPORT` result now includes an `inspect_job_id` column so the `INSPECT` job can be viewed separately. The `bulkio.import.row_count_validation.mode` cluster setting controls this behavior, with valid values of `off`, `async` (default), and `sync`. [#163543][#163543] +- TTL jobs are now owned by the schedule owner instead of the `node` user. This allows users with `CONTROLJOB` privilege to cancel TTL jobs, provided the schedule owner is not an admin (`CONTROLJOB` does not grant control over admin-owned jobs). [#161226][#161226] +- Calling `information_schema.crdb_rewrite_inline_hints` now requires the `REPAIRCLUSTER` privilege. [#160716][#160716] +- The **Statement Details** page URL format has changed from `/statement/{implicitTxn}/{statementId}` to `/statement/{statementId}`. As a result, bookmarks using the old URL structure will no longer work. [#159558][#159558] +- Changed the unit of measurement for admission control duration metrics from microseconds to nanoseconds. The following metrics are affected: `admission.granter.slots_exhausted_duration.kv`, `admission.granter.cpu_load_short_period_duration.kv`, `admission.granter.cpu_load_long_period_duration.kv`, `admission.granter.io_tokens_exhausted_duration.kv`, `admission.granter.elastic_io_tokens_exhausted_duration.kv`, and `admission.elastic_cpu.nanos_exhausted_duration`. Note that dashboards displaying these metrics will show a discontinuity at upgrade time, with pre-upgrade values appearing much lower due to the unit change. [#160956][#160956] +- Renamed the builtin function `crdb_internal.inject_hint` (introduced in v26.1.0-alpha.2) to `information_schema.crdb_rewrite_inline_hints`. [#160716][#160716] +- Removed the `incremental_location` option from `BACKUP` and `CREATE SCHEDULE FOR BACKUP`. [#159189][#159189] +- Removed the `incremental_location` option from `SHOW BACKUP` and `RESTORE`. [#160416][#160416]

Security updates

@@ -19,7 +29,7 @@ Release Date: March 11, 2026

General changes

-- Changefeeds now support the `partition_alg` option for specifying a Kafka partitioning algorithm. Currently `fnv-1a` (default) and `murmur2` are supported. The option is only valid on Kafka v2 sinks. This is protected by the cluster setting `changefeed.partition_alg.enabled`. An example usage: `SET CLUSTER SETTING changefeed.partition_alg.enabled=true; CREATE CHANGEFEED ... INTO 'kafka://...' WITH partition_alg='murmur2';`. Note that if a changefeed is created using the murmur2 algorithm, and then the cluster setting is disabled, the changefeed will continue using the murmur2 algorithm unless the changefeed is altered to use a different `partition_alg`. [#161265][#161265] +- Changefeeds now support the `partition_alg` option for specifying a Kafka partitioning algorithm. Currently `fnv-1a` (default) and `murmur2` are supported. The option is only valid on Kafka v2 sinks. This is protected by the cluster setting `changefeed.partition_alg.enabled`. An example usage: `SET CLUSTER SETTING changefeed.partition_alg.enabled=true; CREATE CHANGEFEED ... INTO 'kafka://...' WITH partition_alg='murmur2';`. Note that if a changefeed is created using the `murmur2` algorithm, and then the cluster setting is disabled, the changefeed will continue using the `murmur2` algorithm unless the changefeed is altered to use a different `partition_alg`. [#161265][#161265]

{{ site.data.products.enterprise }} edition changes

@@ -28,65 +38,57 @@ Release Date: March 11, 2026

SQL language changes

-- Fixed a bug that caused a routine with an `INSERT` statement to unnecessarily block dropping a hash-sharded index or computed column on the target table. This fix applies only to newly created routines. In releases prior to v25.3, the fix must be enabled by setting the session variable `use_improved_routine_dependency_tracking` to `on`. [#146250][#146250] -- The output of `EXPLAIN [ANALYZE]` in non-`VERBOSE` mode is now more succinct. [#153361][#153361] -- A database-level changefeed with no tables will periodically poll to check for tables added to the database. The new option `hibernation_polling_frequency` sets the frequency at which the polling occurs, until a table is found, at which point polling ceases. [#156771][#156771] -- `crdb_internal.datums_to_bytes` is now available in the `information_schema` system catalog as `information_schema.crdb_datums_to_bytes`. [#156963][#156963] -- Queries executed via the vectorized engine now display their progress in the `phase` column of `SHOW QUERIES`. Previously, this feature was only available in the row-by-row engine. [#158029][#158029] +- Added the `MAINTAIN` privilege, which can be granted on tables and materialized views. Users with the `MAINTAIN` privilege on a materialized view can execute `REFRESH MATERIALIZED VIEW` without being the owner. Users with the `MAINTAIN` privilege on a table can execute `ANALYZE` without needing `SELECT`. This aligns with PostgreSQL 17 behavior. [#164236][#164236] - Added cluster settings to control the number of concurrent automatic statistics collection jobs: - `sql.stats.automatic_full_concurrency_limit` controls the maximum number of concurrent full statistics collections. The default is 1. - `sql.stats.automatic_extremes_concurrency_limit` controls the maximum number of concurrent partial statistics collections using extremes. The default is 128. Note that at most one statistics collection job can run on a single table at a time. [#158835][#158835] -- Added the `STRICT` option for locality-aware backups. When enabled, backups fail if data from a KV node with one locality tag would be backed up to a bucket with a different locality tag, ensuring data domiciling compliance. [#158999][#158999] -- Fixed a bug where creating a routine could create unnecessary column dependencies when the routine references columns through CHECK constraints (including those for RLS policies and hash-sharded indexes) or partial index predicates. These unnecessary dependencies prevented dropping the column without first dropping the routine. The fix is gated behind the session setting `use_improved_routine_deps_triggers_and_computed_cols`, which is off by default prior to v26.1. [#159126][#159126] -- Changed the default value of the `sql.catalog.allow_leased_descriptors.enabled` cluster setting to `true`. This setting allows introspection tables like `information_schema` and `pg_catalog` to use cached descriptors when building the table results, which improves the performance of introspection queries when there are many tables in the cluster. [#159162][#159162] -- Added support for `SHOW STATEMENT HINTS`, which displays information about the statement hints (if any) associated with the given statement fingerprint string. The fingerprint is normalized in the same way as `EXPLAIN (FINGERPRINT)` before hints are matched. Example usage: `SHOW STATEMENT HINTS FOR ' SELECT * FROM xy WHERE x = 10 '` or `SHOW STATEMENT HINTS FOR $$ SELECT * FROM xy WHERE x = 10 $$ WITH DETAILS`. [#159231][#159231] - Added a new cluster setting `bulkio.import.distributed_merge.mode` to enable distributed merge support for `IMPORT` operations. When enabled (default: false), `IMPORT` jobs will use a two-phase approach where import processors first write SST files to local storage, then a coordinator merges and ingests them. This can improve performance for large imports by reducing L0 file counts and enabling merge-time optimizations. This feature requires all nodes to be running v26.1 or later. [#159330][#159330] -- `INSPECT` is now a generally available (GA) feature. The `enable_inspect_command` session variable has been deprecated, and is now effectively always set to `true`. [#159659][#159659] -- Users can now set the `use_backups_with_ids` session setting to enable a new `SHOW BACKUPS IN` experience. When enabled, `SHOW BACKUPS IN {collection}` displays all backups in the collection. Results can be filtered by backup end time using `OLDER THAN {timestamp}` or `NEWER THAN {timestamp}` clauses. Example usage: `SET use_backups_with_ids = true; SHOW BACKUPS IN '{collection}' OLDER THAN '2026-01-09 12:13:14' NEWER THAN '2026-01-04 15:16:17';` [#160137][#160137] -- The `information_schema.crdb_datums_to_bytes` built-in function is now documented. [#160486][#160486] -- Calling `information_schema.crdb_rewrite_inline_hints` now requires the REPAIRCLUSTER privilege. [#160716][#160716] -- Renamed the builtin function `crdb_internal.inject_hint` (introduced in v26.1.0-alpha.2) to `information_schema.crdb_rewrite_inline_hints`. [#160716][#160716] -- If the `use_backups_with_ids` session variable is set to true, the new `SHOW BACKUP` experience will be enabled. `SHOW BACKUP` will parse the IDs provided by `SHOW BACKUPS` and display contents for single backups. [#160812][#160812] -- None [#161211][#161211] -- If the `use_backups_with_ids` session variable is set, the new `RESTORE` experience will be enabled. `RESTORE` will be able to parse the IDs provided by `SHOW BACKUPS` and will restore the specified backup without the use of `AS OF SYSTEM TIME`. [#161294][#161294] -- Updated CockroachDB to allow a prefix of index key columns to be used for the shard column in a hash-sharded index. The `shard_columns` storage parameter may be used to override the default, which uses all index key columns in the shard column. [#161422][#161422] -- CockroachDB now shows execution statistics (like `execution time`) on `EXPLAIN ANALYZE` output for `render` nodes, which often handle built-in functions. [#161509][#161509] +- CockroachDB now supports the PostgreSQL session variables `tcp_keepalives_idle`, `tcp_keepalives_interval`, `tcp_keepalives_count`, and `tcp_user_timeout`. These allow per-session control over TCP keepalive behavior on each connection. A value of 0 (the default) uses the corresponding cluster setting. Non-zero values override the cluster setting for that session only. Units match PostgreSQL: seconds for keepalive settings, milliseconds for `tcp_user_timeout`. [#164369][#164369] - Added the `optimizer_inline_any_unnest_subquery` session setting to enable/disable the optimizer rule `InlineAnyProjectSet`. The setting is on by default in v26.2 and later. [#161880][#161880] -- Updated `DROP TRIGGER` to accept the `CASCADE` option for PostgreSQL compatibility. Since triggers in CockroachDB cannot have dependents, `CASCADE` behaves the same as `RESTRICT` or omitting the option entirely. [#161915][#161915] +- Users can now set the `use_backups_with_ids` session setting to enable a new `SHOW BACKUPS IN` experience. When enabled, `SHOW BACKUPS IN {collection}` displays all backups in the collection. Results can be filtered by backup end time using `OLDER THAN {timestamp}` or `NEWER THAN {timestamp}` clauses. Example usage: `SET use_backups_with_ids = true; SHOW BACKUPS IN '{collection}' OLDER THAN '2026-01-09 12:13:14' NEWER THAN '2026-01-04 15:16:17';` [#160137][#160137] +- If the new `SHOW BACKUP` experience is enabled by setting the `use_backups_with_ids` session variable to true, `SHOW BACKUP` will parse the IDs provided by `SHOW BACKUPS` and display contents for single backups. [#160812][#160812] +- If the new `RESTORE` experience is enabled by setting the `use_backups_with_ids` session variable to true, `RESTORE` will parse the IDs provided by `SHOW BACKUPS` and will restore the specified backup without the use of `AS OF SYSTEM TIME`. [#161294][#161294] +- `SHOW BACKUP` and `RESTORE` now allow backup IDs even if the `use_backups_with_ids` session variable is not set. Setting the variable only configures whether `LATEST` is resolved using the new or legacy path. [#162329][#162329] +- Added the `REVISION START TIME` option to the new `SHOW BACKUPS` experience enabled via the `use_backups_with_ids` session variable. Use the `REVISION START TIME` option to view the revision start times of revision history backups. [#161328][#161328] +- Added support for `SHOW STATEMENT HINTS`, which displays information about the statement hints (if any) associated with the given statement fingerprint string. The fingerprint is normalized in the same way as `EXPLAIN (FINGERPRINT)` before hints are matched. Example usage: `SHOW STATEMENT HINTS FOR ' SELECT * FROM xy WHERE x = 10 '` or `SHOW STATEMENT HINTS FOR $$ SELECT * FROM xy WHERE x = 10 $$ WITH DETAILS`. [#159231][#159231] +- `CREATE OR REPLACE TRIGGER` is now supported. If a trigger with the same name already exists on the same table, it is replaced with the new definition. If no trigger with that name exists, a new trigger is created. [#162633][#162633] - Added support for `ALTER TABLE ENABLE TRIGGER` and `ALTER TABLE DISABLE TRIGGER` syntax. This allows users to temporarily disable triggers without dropping them, and later re-enable them. The syntax supports disabling/enabling individual triggers by name, or all triggers on a table using the `ALL` or `USER` keywords. [#161924][#161924] -- Added support for the `pg_trigger_depth()` builtin function, which returns the current nesting level of PostgreSQL triggers (0 if not called from inside a trigger). [#162286][#162286] +- Updated `DROP TRIGGER` to accept the `CASCADE` option for PostgreSQL compatibility. Since triggers in CockroachDB cannot have dependents, `CASCADE` behaves the same as `RESTRICT` or omitting the option entirely. [#161915][#161915] - `ALTER TABLE ... DROP CONSTRAINT` can now be used to drop `UNIQUE` constraints. The backing `UNIQUE` index will also be dropped, as CockroachDB treats the constraint and index as the same thing. [#162345][#162345] -- `CREATE OR REPLACE TRIGGER` is now supported. If a trigger with the same name already exists on the same table, it is replaced with the new definition. If no trigger with that name exists, a new trigger is created. [#162633][#162633] - `DROP COLUMN` and `DROP INDEX` with `CASCADE` now properly drop dependent triggers. Previously, these operations would fail with an unimplemented error when a trigger depended on the column or index being dropped. [#163296][#163296] - `CREATE OR REPLACE FUNCTION` now works on trigger functions that have active triggers. Previously, this was blocked with an unimplemented error, requiring users to drop and recreate triggers. The replacement now atomically updates all dependent triggers to execute the new function body. [#163348][#163348] +- Updated CockroachDB to allow a prefix of index key columns to be used for the shard column in a hash-sharded index. The `shard_columns` storage parameter may be used to override the default, which uses all index key columns in the shard column. [#161422][#161422] +- Added support for the `pg_trigger_depth()` builtin function, which returns the current nesting level of PostgreSQL triggers (0 if not called from inside a trigger). [#162286][#162286] +- A database-level changefeed with no tables will periodically poll to check for tables added to the database. The new option `hibernation_polling_frequency` sets the frequency at which the polling occurs, until a table is found, at which point polling ceases. [#156771][#156771] +- `INSPECT` is now a generally available (GA) feature. The `enable_inspect_command` session variable has been deprecated, and is now effectively always set to `true`. [#159659][#159659] +- Added the `STRICT` option for locality-aware backups. When enabled, backups fail if data from a KV node with one locality tag would be backed up to a bucket with a different locality tag, ensuring data domiciling compliance. [#158999][#158999] - Added support for the `dmetaphone()`, `dmetaphone_alt()`, and `daitch_mokotoff()` built-in functions, completing CockroachDB's implementation of the PostgreSQL `fuzzystrmatch` extension. `dmetaphone` and `dmetaphone_alt` return Double Metaphone phonetic codes for a string, and `daitch_mokotoff` returns an array of Daitch-Mokotoff soundex codes. These functions are useful for fuzzy string matching based on phonetic similarity. [#163430][#163430] -- Row count validation after `IMPORT` is now enabled by default in async mode. After an `IMPORT` completes, a background `INSPECT` job validates that the imported row count matches expectations. The `IMPORT` result now includes an `inspect_job_id` column so the `INSPECT` job can be viewed separately. The `bulkio.import.row_count_validation.mode` cluster setting controls this behavior, with valid values of `off`, `async` (default), and `sync`. [#163543][#163543] -- Added the `MAINTAIN` privilege, which can be granted on tables and materialized views. Users with the `MAINTAIN` privilege on a materialized view can execute `REFRESH MATERIALIZED VIEW` without being the owner. Users with the `MAINTAIN` privilege on a table can execute `ANALYZE` without needing `SELECT`. This aligns with PostgreSQL 17 behavior. [#164236][#164236] -- CockroachDB now supports the PostgreSQL session variables `tcp_keepalives_idle`, `tcp_keepalives_interval`, `tcp_keepalives_count`, and `tcp_user_timeout`. These allow per-session control over TCP keepalive behavior on each connection. A value of 0 (the default) uses the corresponding cluster setting. Non-zero values override the cluster setting for that session only. Units match PostgreSQL: seconds for keepalive settings, milliseconds for `tcp_user_timeout`. [#164369][#164369] +- `crdb_internal.datums_to_bytes` is now available in the `information_schema` system catalog as `information_schema.crdb_datums_to_bytes`. [#156963][#156963] +- The `information_schema.crdb_datums_to_bytes` built-in function is now documented. [#160486][#160486] +- Queries executed via the vectorized engine now display their progress in the `phase` column of `SHOW QUERIES`. Previously, this feature was only available in the row-by-row engine. [#158029][#158029] +- CockroachDB now shows execution statistics (like `execution time`) on `EXPLAIN ANALYZE` output for `render` nodes, which often handle built-in functions. [#161509][#161509] +- The output of `EXPLAIN [ANALYZE]` in non-`VERBOSE` mode is now more succinct. [#153361][#153361]

Operational changes

-- A new cluster setting `kvadmission.store.snapshot_ingest_bandwidth_control.min_rate.enabled` is introduced. When this setting is enabled and disk bandwidth-based admission control is active, snapshot ingestion will be admitted at a minimum rate. This prevents snapshot ingestion from being starved by other elastic work. [#159436][#159436] +- The new `cockroach gen dashboard` command generates standardized monitoring dashboards from an embedded configuration file. It outputs a dashboard JSON file for either Datadog (`--tool=datadog`) or Grafana (`--tool=grafana`), with Grafana dashboards using Prometheus queries. The generated dashboards include metrics across Overview, Hardware, Runtime, Networking, SQL, and Storage categories. Use `--output` to set the output file path and `--rollup-interval` to control metric aggregation. [#161050][#161050] +- Added the `server.sql_tcp_user.timeout` cluster setting, which specifies the maximum amount of time transmitted data can remain unacknowledged before the underlying TCP connection is forcefully closed. This setting is enabled by default with a value of 30 seconds and is supported on Linux and macOS (Darwin). [#164037][#164037] +- Introduced a new cluster setting `kvadmission.store.snapshot_ingest_bandwidth_control.min_rate.enabled`. When this setting is enabled and disk bandwidth-based admission control is active, snapshot ingestion will be admitted at a minimum rate. This prevents snapshot ingestion from being starved by other elastic work. [#159436][#159436] - The `kv.range_split.load_sample_reset_duration` cluster setting now defaults to `30m`. This should improve load-based splitting in rare edge cases. [#159499][#159499] - Added the `kv.protectedts.protect`, `kv.protectedts.release`, `kv.protectedts.update_timestamp`, `kv.protectedts.get_record`, and `kv.protectedts.mark_verified` metrics to track protected timestamp storage operations. These metrics help diagnose issues with excessive protected timestamp churn and operational errors. Each operation tracks both successful completions (`.success`) and failures (`.failed`, such as `ErrExists` or `ErrNotExists`). Operators can monitor these metrics to understand PTS system behavior and identify performance issues related to backups, changefeeds, and other features that use protected timestamps. [#160129][#160129] +- Added a new metric `sql.rls.policies_applied.count` that tracks the number of SQL statements where row-level security (RLS) policies were applied during query planning. [#164405][#164405] +- External connections can now be used with online restore. [#159090][#159090] - Changed goroutine profile dumps from human-readable `.txt.gz` files to binary proto `.pb.gz` files. This improves the performance of the goroutine dumper by eliminating brief in-process pauses that occurred when collecting goroutine stacks. [#160798][#160798] -- Added a new structured event of type `rewrite_inline_hints` that is emitted when an inline-hints rewrite rule is added using `information_schema.crdb_rewrite_inline_hints`. This event is written to both the event log and the OPS channel. [#160901][#160901] +- Added a new structured event of type `rewrite_inline_hints` that is emitted when an inline-hints rewrite rule is added using `information_schema.crdb_rewrite_inline_hints`. This event is written to both the event log and the `OPS` channel. [#160901][#160901] - Added a new metric `sql.query.with_statement_hints.count` that is incremented whenever a statement is executed with one or more external statement hints applied. An example of an external statement hint is an inline-hints rewrite rule added by calling `information_schema.crdb_rewrite_inline_hints`. [#161043][#161043] -- The new `cockroach gen dashboard` command generates standardized monitoring dashboards from an embedded configuration file. It outputs a dashboard JSON file for either Datadog (`--tool=datadog`) or Grafana (`--tool=grafana`), with Grafana dashboards using Prometheus queries. The generated dashboards include metrics across Overview, Hardware, Runtime, Networking, SQL, and Storage categories. Use `--output` to set the output file path and `--rollup-interval` to control metric aggregation. [#161050][#161050] -- this patch enables hash sharded indexes and - secondary indexes with virtually computed columns in LDR. [#161062][#161062] -- TTL jobs are now owned by the schedule owner instead of the `node` user. This allows users with `CONTROLJOB` privilege to cancel TTL jobs, provided the schedule owner is not an admin (`CONTROLJOB` does not grant control over admin-owned jobs). [#161226][#161226] +- Logical Data Replication (LDR) now supports hash-sharded indexes and secondary indexes with virtual computed columns. Previously, tables with these index types could not be replicated using LDR. [#161062][#161062] - Backup schedules that utilize the `revision_history` option now apply that option only to incremental backups triggered by that schedule, rather than duplicating the revision history in the full backups as well. [#162105][#162105] -- ranges are now more accurately - and likely to be reported as lagging. [#163427][#163427] -- The `build.timestamp` Prometheus metric now carries `major` and `minor` labels identifying the release series of the running CockroachDB binary (e.g. major=26, minor=1 for any v26.1.x build). [#163834][#163834] -- The `bulkio.import.elastic_control.enabled` cluster setting is now enabled by default, allowing import operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163867][#163867] -- The `bulkio.ingest.sst_batcher_elastic_control.enabled` cluster setting is now enabled by default, allowing SST batcher operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163868][#163868] -- Added the `server.sql_tcp_user.timeout` cluster setting, which specifies the maximum amount of time transmitted data can remain unacknowledged before the underlying TCP connection is forcefully closed. This setting is enabled by default with a value of 30 seconds and is supported on Linux and macOS (Darwin). [#164037][#164037] -- Added a new metric `sql.rls.policies_applied.count` that tracks the number of SQL statements where row-level security (RLS) policies were applied during query planning. [#164405][#164405] +- The `build.timestamp` Prometheus metric now carries `major` and `minor` labels identifying the release series of the running CockroachDB binary (e.g., `major="26", minor="1"` for any v26.1.x build). [#163834][#163834] +- Jobs now clear their running status messages upon successful completion. [#163765][#163765] +- Changefeed ranges are now more accurately reported as lagging. [#163427][#163427]

Command-line changes

@@ -97,18 +99,19 @@ Release Date: March 11, 2026

DB Console changes

-- The statement details page URL format has changed from `/statement/{implicitTxn}/{statementId}` to `/statement/{statementId}`. As a result, bookmarks using the old URL structure will no longer work. [#159558][#159558] +- Added a new time-series bar graph called **Plan Distribution Over Time** to the **Statement Fingerprint** page, on the **Explain Plans** tab. It shows which execution plans were used in each time interval, helping detect shifts in query plan distributions. [#161011][#161011] - The **SQL Activity** > **Sessions** page now defaults the **Session Status** filter to **Active, Idle** to exclude closed sessions. [#160576][#160576] -- Changed the unit of measurement for admission control duration metrics from microseconds to nanoseconds. The following metrics are affected: `admission.granter.slots_exhausted_duration.kv`, `admission.granter.cpu_load_short_period_duration.kv`, `admission.granter.cpu_load_long_period_duration.kv`, `admission.granter.io_tokens_exhausted_duration.kv`, `admission.granter.elastic_io_tokens_exhausted_duration.kv`, and `admission.elastic_cpu.nanos_exhausted_duration`. Note that dashboards displaying these metrics will show a discontinuity at upgrade time, with pre-upgrade values appearing much lower due to the unit change. [#160956][#160956] -- A new time-series bar graph called **Plan Distribution Over Time** has been added to the **Statement Fingerprint** page, on the **Explain Plans** tab. It shows which execution plans were used in each time interval, helping detect shifts in query plan distributions. [#161011][#161011]

Bug fixes

+- The fix for `node descriptor not found` errors for changefeeds with `execution_locality` filters in CockroachDB Basic and Standard clusters is now controlled by cluster setting `sql.instance_info.use_instance_resolver.enabled` (default: `true`). [#163947][#163947] +- Fixed a bug that caused a routine with an `INSERT` statement to unnecessarily block dropping a hash-sharded index or computed column on the target table. This fix applies only to newly created routines. In releases prior to v25.3, the fix must be enabled by setting the session variable `use_improved_routine_dependency_tracking` to `on`. [#146250][#146250] +- Fixed a bug where creating a routine could create unnecessary column dependencies when the routine references columns through CHECK constraints (including those for RLS policies and hash-sharded indexes) or partial index predicates. These unnecessary dependencies prevented dropping the column without first dropping the routine. The fix is gated behind the session setting `use_improved_routine_deps_triggers_and_computed_cols`, which is off by default prior to v26.1. [#159126][#159126] - Fixed a bug that allowed a column to be dropped from a table even if it was referenced in the `RETURNING` clause of an `UPDATE` or `DELETE` statement in a routine. In releases prior to v25.3, the fix must be enabled by setting the session variable `use_improved_routine_dependency_tracking` to `on`. [#146250][#146250] - CockroachDB could previously encounter internal errors like `column statistics cannot be determined for empty column set` and `invalid union` in some edge cases with `UNION`, `EXCEPT`, and `INTERCEPT`. This has now been fixed. [#150706][#150706] - Fixed a bug that could cause a scan over a secondary index to read significantly more KVs than necessary in order to satisfy a limit when the scanned index had more than one column family. [#156672][#156672] - Fixed an issue where long-running transactions with many statements could cause unbounded memory growth in the SQL statistics subsystem. When a transaction includes a large number of statements, the SQL statistics ingester now automatically flushes buffered statistics before the transaction commits. As a side effect, the flushed statement statistics might not have an associated transaction fingerprint ID because the transaction has not yet completed. In such cases, the transaction fingerprint ID cannot be backfilled after the fact. [#158527][#158527] -- Fixed a bug that allowed columns to be dropped despite being referenced by a routine. This could occur when a column was only referenced as a target column in the SET clause of an UPDATE statement within the routine. This fix only applies to newly-created routines. In versions prior to v26.1, the fix must be enabled by setting the session variable `prevent_update_set_column_drop`. [#158935][#158935] +- Fixed a bug that allowed columns to be dropped despite being referenced by a routine. This could occur when a column was only referenced as a target column in the `SET` clause of an `UPDATE` statement within the routine. This fix only applies to newly-created routines. In versions prior to v26.1, the fix must be enabled by setting the session variable `prevent_update_set_column_drop`. [#158935][#158935] - Fixed a bug that caused newly-created routines to incorrectly prevent dropping columns that were not directly referenced, most notably columns referenced by computed column expressions. The fix is gated behind the session setting `use_improved_routine_deps_triggers_and_computed_cols`, which is off by default prior to v26.1. [#158935][#158935] - Fixed a bug where schema changes could fail after a `RESTORE` due to missing session data. [#159176][#159176] - The `ascii` built-in function now returns `0` when the input is the empty string instead of an error. [#159178][#159178] @@ -144,8 +147,7 @@ Release Date: March 11, 2026 - Fixed an error that occurred when using generic query plans that generates a lookup join on indexes containing identity computed columns. [#162036][#162036] - Fixed a bug that could cause changefeeds using Kafka v1 sinks to hang when the changefeed was cancelled. [#162058][#162058] - Fixed an internal error `could not find format code for column N` that occurred when executing `EXPLAIN ANALYZE EXECUTE` statements via JDBC or other clients using the PostgreSQL binary protocol. [#162115][#162115] -- have statement bundles contains the CREATE TYPE - for User Defined Types (udt) where columns are of udt array type. [#162357][#162357] +- Fixed a bug where statement bundles were missing `CREATE TYPE` statements for user-defined types used as array column types. [#162357][#162357] - Fixed a bug in which PL/pgSQL UDFs with many `IF` statements would cause a timeout and/or OOM when executed from a prepared statement. This bug was introduced in v23.2.22, v24.1.15, v24.3.9, v25.1.2, and v25.2.0. [#162512][#162512] - Fixed a bug where an error would occur when defining a foreign key on a hash-sharded primary key without explicitly providing the primary key columns. [#162608][#162608] - Fixed a bug where generating a debug zip could trigger an out-of-memory (OOM) condition on a node if malformed log entries were present in logs using `json` or `json-compact` formatting. This bug was introduced in v24.1. [#163224][#163224] @@ -161,21 +163,19 @@ Release Date: March 11, 2026 - Fixed a bug where `EXPLAIN ANALYZE (DEBUG)` statement bundles did not include triggers, their functions, or tables modified by those triggers. The bundle's `schema.sql` file now contains the `CREATE TRIGGER`, `CREATE FUNCTION`, and `CREATE TABLE` statements needed to fully reproduce the query environment when triggers are involved. [#163584][#163584] - Fixed a rare data race during parallel constraint checks where a fresh descriptor collection could resolve a stale enum type version. This bug was introduced in v26.1.0. [#163883][#163883] - Fixed a bug where running **changefeeds** with `envelope=enriched` and `enriched_properties` containing `source` would cause failures during a **cluster upgrade**. [#163885][#163885] -- The fix for `node descriptor not found` errors for changefeeds with `execution_locality` filters in CockroachDB Basic and Standard clusters is now controlled by cluster setting `sql.instance_info.use_instance_resolver.enabled` (default: `true`). [#163947][#163947] - Fixed a bug where dropped columns appeared in `pg_catalog.pg_attribute` with the `atttypid` column equal to 2283 (`anyelement`). Now this column will be 0 for dropped columns. This matches PostgreSQL behavior, where `atttypid=0` is used for dropped columns. [#163950][#163950] - Fixed a race condition/conflict between concurrent `ALTER FUNCTION ... SET SCHEMA` and `DROP SCHEMA` operations. [#164043][#164043] - Fixed a bug where super region zone configurations did not constrain all replicas to regions within the super region. [#164285][#164285] - Fixed a bug where CockroachDB returned "cached plan must not change result type" errors during the `Execute` phase instead of the `Bind` phase of the extended pgwire protocol. This caused compatibility issues with drivers like pgx that expect the error before `BindComplete` is sent, particularly when using batch operations with prepared statements after schema changes. [#164406][#164406] -- ensure the generation of JSON column referenced - in partial idnex predicates respects the - sql.stats.non_indexed_json_histograms.enabled cluster setting (default false). [#164477][#164477] +- Statistics histogram collection is now skipped for JSON columns referenced in partial index predicates, except when `sql.stats.non_indexed_json_histograms.enabled` is true (default: false). [#164477][#164477] +- Fixed a bug where import rollback could incorrectly revert data in a table that was already online. This could only occur if an import job was cancelled or failed after the import had already succeeded and the table was made available for use. [#159627][#159627] +- Invalid `avro_schema_prefix` is now caught during statement time. The prefix must start with `[A-Za-z_]` and subsequently contain only `[A-Za-z0-9_]`, as specified in the [Avro specification](https://avro.apache.org/docs/1.8.1/spec.html). [#159869][#159869]

Performance improvements

- Database- and table-level backups no longer fetch all object descriptors from disk in order to resolve the backup targets. Now only the objects that are referenced by the targeted objects will be fetched. This improves performance when there are many tables in the cluster. [#157790][#157790] - Various background tasks and jobs now more actively yield to foreground work when that work is waiting to run. [#159205][#159205] - Improved changefeed performance when filtering unwatched column families and offline tables by replacing expensive error chain traversal with direct status enum comparisons. [#159745][#159745] -- The session variable `distsql_prevent_partitioning_soft_limited_scans` is now enabled by default. This prevents scans with soft limits from being planned as multiple TableReaders, which decreases the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Added a new session variable, `distsql_prevent_partitioning_soft_limited_scans`, which, when true, prevents scans with soft limits from being planned as multiple TableReaders by the physical planner. This should decrease the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Fixed a performance regression in `pg_catalog.pg_roles` and `pg_catalog.pg_authid` by avoiding privilege lookups for each row in the table. [#160121][#160121] - Queries that have comparison expressions with the `levenshtein` built-in are now up to 30% faster. [#160394][#160394] @@ -186,20 +186,6 @@ Release Date: March 11, 2026 - Improved changefeed checkpointing performance when changefeeds are lagging. Previously, checkpoint updates could be redundantly applied multiple times per checkpoint operation. [#162546][#162546] - The query optimizer now eliminates redundant filter and projection operators over inputs with zero cardinality, even when the filter or projection expressions are not leakproof. This produces simpler, more efficient query plans in cases where joins or other operations fold to zero rows. [#164212][#164212] -

Build changes

- -- Replaces bors with Trunk merge queue for better performance and reliability. Configuration-only change with no runtime impact - maintains same safety checks while improving CI workflow. [#161230][#161230] - -

Miscellaneous

- -- External connections can now be used with online restore. [#159090][#159090] -- Fixed a bug where import rollback could incorrectly revert data in a table that was already online. This could only occur if an import job was cancelled or failed after the import had already succeeded and the table was made available for use. [#159627][#159627] -- Invalid `avro_schema_prefix` is now caught during statement time. The prefix must start with `[A-Za-z_]` and subsequently contain only `[A-Za-z0-9_]`, as specified in the [Avro specification](https://avro.apache.org/docs/1.8.1/spec.html). [#159869][#159869] -- Added the `REVISION START TIME` option to the new `SHOW BACKUPS` experience enabled via the `use_backups_with_ids` session variable. Use the `REVISION START TIME` option to view the revision start times of revision history backups. [#161328][#161328] -- `SHOW BACKUP` and `RESTORE` now allow backup IDs even if the `use_backups_with_ids` session variable is not set. Setting the variable only configures whether `LATEST` is resolved using the new or legacy path. [#162329][#162329] -- Job running statuses are now cleared after completing successfully. [#163765][#163765] - - [#161806]: https://github.com/cockroachdb/cockroach/pull/161806 [#164236]: https://github.com/cockroachdb/cockroach/pull/164236 [#158527]: https://github.com/cockroachdb/cockroach/pull/158527 From 5299bfb2dd3bd11cc1736a353068101a369b3696 Mon Sep 17 00:00:00 2001 From: Ryan Kuo Date: Wed, 11 Mar 2026 13:21:30 -0400 Subject: [PATCH 4/4] sequencing tweaks --- src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md b/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md index e545cdfb192..a17c000a6dc 100644 --- a/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md +++ b/src/current/_includes/releases/v26.2/v26.2.0-alpha.1.md @@ -14,7 +14,6 @@ Release Date: March 11, 2026 - The `bulkio.ingest.sst_batcher_elastic_control.enabled` cluster setting is now enabled by default, allowing SST batcher operations to integrate with elastic CPU control and automatically throttle based on available resources. [#163868][#163868] - The session variable `distsql_prevent_partitioning_soft_limited_scans` is now enabled by default. This prevents scans with soft limits from being planned as multiple TableReaders, which decreases the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Creating or altering a changefeed or Kafka/Pub/Sub external connection now returns an error when the `topic_name` query parameter is explicitly set to an empty string in the sink URI, rather than silently falling back to using the table name as the topic name. Existing changefeeds with an empty `topic_name` are not affected. [#164225][#164225] -- Row count validation after `IMPORT` is now enabled by default in async mode. After an `IMPORT` completes, a background `INSPECT` job validates that the imported row count matches expectations. The `IMPORT` result now includes an `inspect_job_id` column so the `INSPECT` job can be viewed separately. The `bulkio.import.row_count_validation.mode` cluster setting controls this behavior, with valid values of `off`, `async` (default), and `sync`. [#163543][#163543] - TTL jobs are now owned by the schedule owner instead of the `node` user. This allows users with `CONTROLJOB` privilege to cancel TTL jobs, provided the schedule owner is not an admin (`CONTROLJOB` does not grant control over admin-owned jobs). [#161226][#161226] - Calling `information_schema.crdb_rewrite_inline_hints` now requires the `REPAIRCLUSTER` privilege. [#160716][#160716] - The **Statement Details** page URL format has changed from `/statement/{implicitTxn}/{statementId}` to `/statement/{statementId}`. As a result, bookmarks using the old URL structure will no longer work. [#159558][#159558] @@ -68,6 +67,7 @@ Release Date: March 11, 2026 - Added support for the `dmetaphone()`, `dmetaphone_alt()`, and `daitch_mokotoff()` built-in functions, completing CockroachDB's implementation of the PostgreSQL `fuzzystrmatch` extension. `dmetaphone` and `dmetaphone_alt` return Double Metaphone phonetic codes for a string, and `daitch_mokotoff` returns an array of Daitch-Mokotoff soundex codes. These functions are useful for fuzzy string matching based on phonetic similarity. [#163430][#163430] - `crdb_internal.datums_to_bytes` is now available in the `information_schema` system catalog as `information_schema.crdb_datums_to_bytes`. [#156963][#156963] - The `information_schema.crdb_datums_to_bytes` built-in function is now documented. [#160486][#160486] +- Row count validation after `IMPORT` is now enabled by default in async mode. After an `IMPORT` completes, a background `INSPECT` job validates that the imported row count matches expectations. The `IMPORT` result now includes an `inspect_job_id` column so the `INSPECT` job can be viewed separately. The `bulkio.import.row_count_validation.mode` cluster setting controls this behavior, with valid values of `off`, `async` (default), and `sync`. [#163543][#163543] - Queries executed via the vectorized engine now display their progress in the `phase` column of `SHOW QUERIES`. Previously, this feature was only available in the row-by-row engine. [#158029][#158029] - CockroachDB now shows execution statistics (like `execution time`) on `EXPLAIN ANALYZE` output for `render` nodes, which often handle built-in functions. [#161509][#161509] - The output of `EXPLAIN [ANALYZE]` in non-`VERBOSE` mode is now more succinct. [#153361][#153361] @@ -173,10 +173,10 @@ Release Date: March 11, 2026

Performance improvements

+- Added a new session variable, `distsql_prevent_partitioning_soft_limited_scans`, which, when true, prevents scans with soft limits from being planned as multiple TableReaders by the physical planner. This should decrease the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Database- and table-level backups no longer fetch all object descriptors from disk in order to resolve the backup targets. Now only the objects that are referenced by the targeted objects will be fetched. This improves performance when there are many tables in the cluster. [#157790][#157790] - Various background tasks and jobs now more actively yield to foreground work when that work is waiting to run. [#159205][#159205] - Improved changefeed performance when filtering unwatched column families and offline tables by replacing expensive error chain traversal with direct status enum comparisons. [#159745][#159745] -- Added a new session variable, `distsql_prevent_partitioning_soft_limited_scans`, which, when true, prevents scans with soft limits from being planned as multiple TableReaders by the physical planner. This should decrease the initial setup costs of some fully-distributed query plans. [#160051][#160051] - Fixed a performance regression in `pg_catalog.pg_roles` and `pg_catalog.pg_authid` by avoiding privilege lookups for each row in the table. [#160121][#160121] - Queries that have comparison expressions with the `levenshtein` built-in are now up to 30% faster. [#160394][#160394] - The optimizer now better optimizes query plans of statements within UDFs and stored procedures that have `IN` subqueries. [#160503][#160503]