diff --git a/scripts/aidlc-codereview/.gitignore b/scripts/aidlc-codereview/.gitignore new file mode 100644 index 00000000..e690b1a7 --- /dev/null +++ b/scripts/aidlc-codereview/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +.venv/ +reports/ +dist/ +build/ diff --git a/scripts/aidlc-codereview/CHANGELOG.md b/scripts/aidlc-codereview/CHANGELOG.md new file mode 100644 index 00000000..31fd48e9 --- /dev/null +++ b/scripts/aidlc-codereview/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +All notable changes to AIDLC Code Reviewer are documented in this file. + +## [0.2.0] - 2026-05-14 + +### Added + +- Initial release as a pip-installable package under `scripts/aidlc-codereview/` +- Static analysis pipeline: runs configured CLI tools in parallel (bandit, flake8, pyflakes, and more via auto-generation) +- AI-powered critical findings agent (Amazon Bedrock / Claude): identifies critical code sections requiring human review +- AI-powered code structure critique agent: evaluates logging, measurability, scalability, efficiency, complexity, and structure +- AI-powered business logic review agent: surfaces domain logic and business rules for human inspection +- Auto-generation of tool wrappers for unknown tools via LLM with Level 1 (static) and Level 2 (live) verification +- HTML and Markdown report generation (technical report, business logic report, and summary entry page) +- Pre-flight check command (`aidlc-code-reviewer --preflight`) to validate AWS credentials and Bedrock access +- Language detection to skip irrelevant tools automatically +- `code_reviewer.common.cli:main` entry point installable as `aidlc-code-reviewer` CLI +- Configuration via `review-config.yaml` (tool list) and `agent-config.yaml` (Bedrock model and AWS settings) diff --git a/scripts/aidlc-codereview/LICENSE b/scripts/aidlc-codereview/LICENSE new file mode 100644 index 00000000..76b355f5 --- /dev/null +++ b/scripts/aidlc-codereview/LICENSE @@ -0,0 +1,16 @@ +MIT No Attribution + +Copyright 2026 Amazon.com, Inc. or its affiliates. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, +merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/scripts/aidlc-codereview/README.md b/scripts/aidlc-codereview/README.md new file mode 100644 index 00000000..c4376551 --- /dev/null +++ b/scripts/aidlc-codereview/README.md @@ -0,0 +1,53 @@ +# AIDLC Code Reviewer + +Automated, language-agnostic code quality analysis with AI-powered review via Amazon Bedrock. + +## Overview + +AIDLC Code Reviewer combines static analysis tools with AI agents to produce actionable +code review reports — covering security findings, structural critique, and business logic +review — all from a single CLI command. + +## Quick Start + +```bash +# Install +cd scripts/aidlc-codereview +pip install -e . + +# Run pre-flight checks (validates AWS credentials and Bedrock access) +aidlc-code-reviewer --preflight + +# Review a directory +aidlc-code-reviewer path/to/your/code + +# Technical report only (no business logic review) +aidlc-code-reviewer path/to/your/code --technical-report + +# Business logic report only +aidlc-code-reviewer path/to/your/code --business-report + +# Verbose output +aidlc-code-reviewer path/to/your/code --verbose +``` + +Reports are written to `./reports/` by default (override with `--output-dir`). + +## Configuration + +- `src/code_reviewer/review-config.yaml` — list of static analysis tools to run +- `src/code_reviewer/agent-config.yaml` — Bedrock model ID, AWS region, and profile + +See [`docs/SETUP.md`](docs/SETUP.md) for detailed setup instructions. + +## Reports Generated + +| Report | Description | +|---|---| +| `code_review_summary_*.html` | Entry page linking all reports | +| `code_review_technical_*.{html,md}` | Static tool findings + critical sections + structure critique | +| `code_review_business_*.{html,md}` | Business logic and domain rule analysis | + +## License + +MIT-0 — see [LICENSE](LICENSE). diff --git a/scripts/aidlc-codereview/docs/SETUP.md b/scripts/aidlc-codereview/docs/SETUP.md new file mode 100644 index 00000000..c3d0af20 --- /dev/null +++ b/scripts/aidlc-codereview/docs/SETUP.md @@ -0,0 +1,281 @@ +# AIDLC Code Reviewer - Setup Guide + +## Quick Start + +```bash +# Clone and enter the project +cd AIDLC-CodeReviewer + +# Create and activate a virtual environment +python3 -m venv .venv +source .venv/bin/activate # macOS / Linux +# .venv\Scripts\activate # Windows + +# Install the package +pip install -e . + +# Run a review +aidlc-code-reviewer ./src +``` + +This installs the CLI with all dependencies: Python static analysis tools (bandit, ruff, mypy, radon, vulture, semgrep) and the AI agent (strands-agents, boto3, pydantic, etc.) for auto-generating wrappers and producing AI-powered report sections. + +--- + +## Prerequisites + +### Python + +- **Python 3.11 or higher** is required (`pyproject.toml` specifies `>=3.11`) +- Verify with: `python3 --version` + +### AWS Credentials (required for agent features) + +The agent needs AWS credentials with Amazon Bedrock access. Choose the highest-priority option available to you. + +> **Note:** Under the AWS Shared Responsibility Model, you are responsible for securing your AWS credentials, configuring least-privilege IAM policies, and rotating credentials regularly. + +#### Priority order + +| Priority | Option | When to use | +|----------|--------|-------------| +| 1 | IAM Role (EC2 / ECS / Lambda) | Running on AWS compute | +| 2 | AWS IAM Identity Center (SSO) | Running from a workstation with org access | +| 3 | Named AWS profile with SSO | Local development against an SSO-enabled account | +| 4 | Named AWS profile with static keys | Local development without SSO (least preferred) | + +#### Priority 1: IAM Role (recommended for AWS compute) + +**Security control:** AWS STS issues temporary credentials to the compute resource via the instance metadata service. Credentials are automatically rotated approximately every 6 hours and are scoped to the role's trust and permission policies. No secrets are stored on disk. + +**Security improvement:** Eliminates long-term credential exposure entirely. Credential rotation is automatic with no developer action required. + +If running on Amazon EC2, Amazon ECS, or AWS Lambda, boto3 picks up the role credentials automatically — no configuration needed. Attach a role with `bedrock:InvokeModel` permission to your compute resource. + +#### Priority 2: AWS IAM Identity Center / SSO (recommended for workstations) + +**Security control:** AWS IAM Identity Center federates authentication through your identity provider (e.g., Okta, Azure AD) and issues short-lived credentials via AWS STS. Default session duration is 1–12 hours (configurable by your admin). Credentials are never written as long-term keys. + +**Security improvement:** Provides temporary credentials that expire automatically, enforces MFA through the identity provider, and centralizes access auditing in CloudTrail. + +```bash +aws configure sso +# Follow the prompts to authenticate via your identity provider. +# Then set the resulting profile: +export AWS_PROFILE=my-sso-profile +``` + +Or in `agent-config.yaml`: +```yaml +aws: + profile_name: "my-sso-profile" +``` + +#### Priority 3–4: Named AWS profile (local development fallback) + +**Security control:** A named profile in `~/.aws/credentials` or `~/.aws/config`. When backed by SSO (Priority 3), it inherits the same temporary-credential benefits above. When backed by static access keys (Priority 4), credentials do not expire unless manually rotated. + +**Security improvement (SSO-backed):** Same as Priority 2 — temporary, auto-expiring credentials. + +**Security risk (static keys):** Credentials are long-lived and stored in plaintext on disk. If compromised, they remain valid until manually revoked. Use this only when SSO is unavailable, and rotate keys at least every 90 days. + +```bash +aws configure --profile my-profile +# Then set in agent-config.yaml: +# aws: +# profile_name: "my-profile" +# Or via environment variable: +export AWS_PROFILE=my-profile +``` + +> **Important:** Avoid static access keys whenever possible. Prefer IAM roles (Priority 1) or IAM Identity Center (Priority 2) for temporary, automatically rotated credentials. + +### Enable Amazon Bedrock Model Access + +In the AWS Console: +1. Go to **Amazon Bedrock** > **Model access** +2. Request access to **Anthropic Claude Sonnet 4.6** (or your preferred model) +3. Wait for access to be granted (usually immediate for on-demand) + +The default model is `us.anthropic.claude-sonnet-4-6`. To change it: + +```yaml +# agent-config.yaml +agent: + model_id: "us.anthropic.claude-sonnet-4-6" +``` + +Or via environment variable: +```bash +export BEDROCK_MODEL_ID=us.anthropic.claude-sonnet-4-6 +``` + +### Java Tools (optional) + +For Java analysis (PMD, PMD-CPD, Checkstyle, javac): + +- **Java JDK 17+**: `java --version` +- **PMD**: Download from [pmd.github.io](https://pmd.github.io/) or `brew install pmd` +- **Checkstyle**: `brew install checkstyle` or download the JAR + +These are only needed if you have Java tools in your `review-config.yaml` and Java files in your target. + +### Third-Party CLI Tools + +Each tool listed in `review-config.yaml` needs its CLI installed and on PATH for the wrapper to run it. Built-in Python tools (bandit, ruff, etc.) are installed as pip dependencies. External tools must be installed separately: + +| Tool | Install | Purpose | +|------|---------|---------| +| bandit | `pip install bandit` (auto) | Python security | +| ruff | `pip install ruff` (auto) | Python linting | +| mypy | `pip install mypy` (auto) | Python type checking | +| radon | `pip install radon` (auto) | Python complexity | +| vulture | `pip install vulture` (auto) | Python dead code | +| semgrep | `pip install semgrep` (auto) | Multi-language security | +| gitleaks | `brew install gitleaks` | Secret detection | +| pmd | `brew install pmd` | Java linting/complexity | +| checkstyle | `brew install checkstyle` | Java style | +| javac | Comes with JDK | Java compilation | +| pylint | `pip install pylint` | Python linting (no built-in wrapper) | + +Tools marked **(auto)** are installed automatically with `pip install -e .`. Others need manual installation. If a tool isn't installed, its wrapper returns an error and the tool is skipped. + +--- + +## Verify Setup + +```bash +aidlc-code-reviewer ./src --preflight +``` + +If you encounter `LLM invocation failed`, check your AWS credentials and Amazon Bedrock model access. + +--- + +## Configuration + +### review-config.yaml + +Defines which tools to run: + +```yaml +tools: + - pylint + - flake8 + - bandit +``` + +Just add a tool name to the list. If a built-in wrapper exists in `tools/`, it's used. Otherwise the agent generates one automatically. + +### agent-config.yaml + +Agent and Amazon Bedrock settings: + +```yaml +agent: + model_id: "us.anthropic.claude-sonnet-4-6" # Amazon Bedrock inference profile ID + max_tokens: 8192 # Max response tokens + max_retries: 2 # Retries on verification failure + +aws: + region: "us-east-1" # AWS region + profile_name: null # Named AWS profile (or null) +``` + +**Environment variable overrides** (take precedence over the YAML file): + +| Variable | Overrides | +|----------|-----------| +| `AWS_REGION` | `aws.region` | +| `AWS_PROFILE` | `aws.profile_name` | +| `BEDROCK_MODEL_ID` | `agent.model_id` | + +--- + +## CLI Usage + +```bash +# Default: generates both technical and business logic reports +aidlc-code-reviewer + +# Technical report only (static tools + critical findings + structure critique) +aidlc-code-reviewer --technical-report + +# Business logic report only (AI-driven, skips static tools — faster) +aidlc-code-reviewer --business-report + +# Both flags = same as default +aidlc-code-reviewer --technical-report --business-report + +# With custom config +aidlc-code-reviewer --config my-config.yaml + +# Custom output directory (default: ./reports/) +aidlc-code-reviewer --output-dir ./my-reports + +# Disable auto-generation (skip tools without built-in wrappers) +aidlc-code-reviewer --no-generate +``` + +### Flags + +| Flag | Description | +|------|-------------| +| `` | Path to file or directory to analyze (required) | +| `-c`, `--config` | Path to review-config.yaml (default: built-in) | +| `-o`, `--output-dir` | Output directory for reports (default: `./reports/`) | +| `--technical-report` | Generate only the technical report (tools + AI critique) | +| `--business-report` | Generate only the business logic review report | +| `--no-generate` | Skip AI wrapper generation for unknown tools | +| `--preflight` | Run pre-flight checks for agent setup, then exit | +| `-v`, `--verbose` | Show detailed progress output | + +### Example Output + +``` +Activating AIDLC Code Reviewer... + Tools run: 5, Skipped: 2, Findings: 47 + Critical sections: 3 + Business logic findings: 12, Consistency issues: 2 + + Reports: + → Start here: reports/code_review_summary_20260421_143000.html + Technical (Markdown): reports/code_review_technical_20260421_143000.md + Technical (HTML): reports/code_review_technical_20260421_143000.html + Business Logic (Markdown): reports/code_review_business_20260421_143000.md + Business Logic (HTML): reports/code_review_business_20260421_143000.html +``` + +Open the summary HTML first — it links to the detailed reports. + +--- + +## How Auto-Generation Works + +When a tool in `review-config.yaml` has no built-in wrapper: + +1. **Doc Fetch** - Attempts to fetch the tool's documentation from known URLs or PyPI (non-blocking) +2. **Prompt Assembly** - Builds a prompt containing the project's data models, utility functions, severity policy, three example wrappers, and the tool's config +3. **LLM Call** - Sends the prompt to Amazon Bedrock (Claude Sonnet) via Strands SDK +4. **Code Extraction** - Parses the Python code from the LLM response +5. **Level 1 Verification (static)** - Checks syntax, imports, `run()` signature, required constants (`CATEGORY`, `TOOL`, `SUPPORTED_LANGUAGES`), and return type via dry run +6. **Retry** - If Level 1 fails, feeds errors back to the LLM and retries (up to 2 times) +7. **Level 2 Verification (live)** - If the tool CLI is on PATH, runs the wrapper against the actual target and validates the output structure +8. **Write & Register** - Saves the wrapper to `tools/.py` and registers it in memory +9. **Run** - The wrapper is immediately used for the current review + +Generated wrappers persist in the `tools/` directory and are reused on subsequent runs (no regeneration needed). + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| `Agent dependencies not installed` | Packages missing or corrupt install | Reinstall with `pip install -e .` | +| `LLM invocation failed: ValidationException` | Wrong model ID or no model access | Check `agent-config.yaml` model_id, enable access in Amazon Bedrock console | +| `LLM invocation failed: AccessDeniedException` | AWS credentials lack Amazon Bedrock permissions | Add `bedrock:InvokeModel` permission to your IAM policy | +| `Level 2 verification failed: Tool not installed` | Tool CLI not on PATH | Install the tool or add `.venv/bin` to PATH. Wrapper is still accepted (Level 1 passed). | +| `Generation failed: Could not extract valid Python code` | LLM response didn't contain parseable code | Retry, or try a different model via `BEDROCK_MODEL_ID` | +| Tool skipped with no message | `--no-generate` flag or no wrapper exists | Remove `--no-generate` flag | + diff --git a/scripts/aidlc-codereview/pyproject.toml b/scripts/aidlc-codereview/pyproject.toml new file mode 100644 index 00000000..e17349a6 --- /dev/null +++ b/scripts/aidlc-codereview/pyproject.toml @@ -0,0 +1,36 @@ +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "aidlc-code-reviewer" +version = "0.2.0" +description = "AIDLC Code Reviewer — automated, language-agnostic code quality analysis" +requires-python = ">=3.11" +license = "MIT-0" +dependencies = [ + "pyyaml", + "bandit", + "ruff", + "mypy", + "radon", + "vulture", + "strands-agents", + "strands-agents-tools", + "boto3", + "backoff", + "pydantic>=2.0", + "beautifulsoup4", +] + +[project.scripts] +aidlc-code-reviewer = "code_reviewer.common.cli:main" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] + +[tool.ruff] +line-length = 120 diff --git a/scripts/aidlc-codereview/src/code_reviewer/__init__.py b/scripts/aidlc-codereview/src/code_reviewer/__init__.py new file mode 100644 index 00000000..e7f5ed1f --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/__init__.py @@ -0,0 +1,6 @@ +"""AIDLC Code Reviewer — automated, language-agnostic code quality analysis.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +__version__ = "0.2.0" diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent-config.yaml b/scripts/aidlc-codereview/src/code_reviewer/agent-config.yaml new file mode 100644 index 00000000..b1013323 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent-config.yaml @@ -0,0 +1,52 @@ +# AIDLC Code Reviewer — Agent Configuration +# +# Controls the AI-powered features: critical findings analysis, +# code structure critique, and auto-generation of tool wrappers. +# +# Requires: pip install -e . +# +# All settings here can be overridden by environment variables +# (see "Environment overrides" comments below). + +# Security Considerations: Select an AWS region that meets your +# data residency and compliance requirements. Code analyzed by +# this tool is transmitted to Amazon Bedrock endpoints in the +# configured region. Customers are responsible for ensuring this +# aligns with their data governance policies. AWS secures data in +# transit and at rest within Bedrock; customers must classify data +# appropriately before analysis. + +agent: + # Amazon Bedrock model ID or inference profile ID. + # Must have model access enabled in the Amazon Bedrock console. + # Override with: export BEDROCK_MODEL_ID= + model_id: "us.anthropic.claude-sonnet-4-6" + + # Maximum response tokens for LLM calls. + max_tokens: 16384 + + # How many times to retry wrapper generation if verification fails. + # Each retry feeds the errors back to the LLM for self-correction. + max_retries: 2 + +aws: + # AWS region where Amazon Bedrock is available. + # Override with: export AWS_REGION= + region: "us-east-1" + + # Named AWS CLI profile (from ~/.aws/credentials or ~/.aws/config). + # Set to null to use the default credential chain (env vars, IAM role, SSO). + # Override with: export AWS_PROFILE= + profile_name: null + +doc_fetch: + # When generating a wrapper for an unknown tool, the agent can fetch + # the tool's documentation to improve the quality of the generated code. + enabled: true + + # HTTP timeout (seconds) for fetching tool documentation pages. + timeout_seconds: 10 + + # Max characters of documentation text to include in the prompt. + # Larger values give the LLM more context but increase token usage. + max_doc_length: 8000 diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/__init__.py b/scripts/aidlc-codereview/src/code_reviewer/agent/__init__.py new file mode 100644 index 00000000..333cd3ad --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/__init__.py @@ -0,0 +1 @@ +"""AI agent subsystem for auto-generating tool wrappers.""" diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/base_agent.py b/scripts/aidlc-codereview/src/code_reviewer/agent/base_agent.py new file mode 100644 index 00000000..a1633ff9 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/base_agent.py @@ -0,0 +1,121 @@ +"""Base agent ABC wrapping Strands SDK with BedrockModel. + +Transport security: All Amazon Bedrock API calls are made via the boto3 SDK, +which enforces HTTPS with TLS 1.2+ on every request. The Bedrock service +endpoints do not accept plaintext HTTP connections. No application-level TLS +configuration is required or possible — encryption in transit is handled by +the SDK and the service. See: +https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import logging +import time +from abc import ABC, abstractmethod +from typing import Any, Tuple + +import backoff +import boto3 +from strands import Agent as StrandsAgent +from strands.models import BedrockModel + +from code_reviewer.agent.config import AgentConfig, load_agent_config +from code_reviewer.agent.retry import is_retryable +from code_reviewer.common.output import is_verbose + +logger = logging.getLogger("aidlc_code_reviewer.agent") + + +class BaseAgent(ABC): + """Abstract base agent wrapping Strands SDK. + + Subclasses implement execute() for their specific task. + """ + + def __init__(self, config: AgentConfig | None = None) -> None: + self.config = config or load_agent_config() + + boto_session = None + if self.config.profile_name: + boto_session = boto3.Session( + profile_name=self.config.profile_name, + region_name=self.config.region, + ) + + bedrock_kwargs: dict[str, Any] = { + "model_id": self.config.model_id, + "max_tokens": self.config.max_tokens, + } + if boto_session: + bedrock_kwargs["boto_session"] = boto_session + else: + bedrock_kwargs["region_name"] = self.config.region + + bedrock_model = BedrockModel(**bedrock_kwargs) + + # Suppress Strands streaming output unless verbose mode is active + agent_kwargs: dict[str, Any] = {"model": bedrock_model} + if not is_verbose(): + agent_kwargs["callback_handler"] = None + self._strands_agent = StrandsAgent(**agent_kwargs) + + @abstractmethod + def execute(self, **kwargs: Any) -> Any: + """Execute the agent's task.""" + ... + + @backoff.on_exception( + backoff.expo, + Exception, + max_tries=4, + base=2, + giveup=lambda e: not is_retryable(e), + on_backoff=lambda details: logging.getLogger("aidlc_code_reviewer.agent").warning( + "Retry %d: %s", details["tries"], details["exception"], + ), + ) + def _invoke_model(self, prompt: str) -> Tuple[str, dict]: + """Invoke model via Strands agent with backoff retry. + + Returns (response_text, usage_metadata). + """ + start = time.perf_counter() + try: + response = self._strands_agent(prompt) + elapsed = time.perf_counter() - start + text = str(response) + usage = self._extract_token_usage(response) + logger.info( + "Agent completed in %.2fs (input: %s, output: %s tokens)", + elapsed, + usage.get("input_tokens", "?"), + usage.get("output_tokens", "?"), + ) + return text, usage + except Exception as e: + logger.warning("Model invocation failed: %s", e) + raise + + def _extract_token_usage(self, response: Any) -> dict: + """Extract token counts from Strands AgentResult.""" + if hasattr(response, "metrics") and hasattr(response.metrics, "accumulated_usage"): + usage = response.metrics.accumulated_usage + if isinstance(usage, dict): + return { + "input_tokens": usage.get("inputTokens", 0) or 0, + "output_tokens": usage.get("outputTokens", 0) or 0, + } + return { + "input_tokens": getattr(usage, "inputTokens", 0) or 0, + "output_tokens": getattr(usage, "outputTokens", 0) or 0, + } + + if hasattr(response, "usage") and isinstance(response.usage, dict): + return { + "input_tokens": response.usage.get("inputTokens", 0), + "output_tokens": response.usage.get("outputTokens", 0), + } + + return {"input_tokens": 0, "output_tokens": 0} diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/business_logic_agent.py b/scripts/aidlc-codereview/src/code_reviewer/agent/business_logic_agent.py new file mode 100644 index 00000000..674bc948 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/business_logic_agent.py @@ -0,0 +1,178 @@ +"""Agent that performs AI-powered business logic review. + +Identifies core business rules, formulas, and domain logic in the codebase +so a human reviewer knows exactly what to inspect — even when every static +analysis tool reports zero findings. Produces a separate report. +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import json +import logging +from pathlib import Path +from typing import Any + +from code_reviewer.agent.base_agent import BaseAgent +from code_reviewer.agent.config import AgentConfig +from code_reviewer.agent.critical_findings_agent import _collect_source_files, _format_source_block +from code_reviewer.common.output import vprint +from code_reviewer.common.models import ( + BusinessLogicCategory, + BusinessLogicFinding, + BusinessLogicReview, + ConsistencyIssue, + ConsistencyIssueLocation, + ConsistencyIssueType, +) + +logger = logging.getLogger("aidlc_code_reviewer.business_logic") + +_PROJECT_ROOT = Path(__file__).resolve().parent.parent +_TEMPLATE_PATH = _PROJECT_ROOT / "prompts" / "business-logic-review.md" + +# Valid enum values for safe parsing +_VALID_CATEGORIES = {c.value for c in BusinessLogicCategory} +_VALID_ISSUE_TYPES = {t.value for t in ConsistencyIssueType} + + +def _build_prompt(sources: dict[str, str]) -> str: + """Assemble the business logic review prompt from the template.""" + try: + template = _TEMPLATE_PATH.read_text() + except OSError: + logger.warning("Business logic review prompt template not found at %s", _TEMPLATE_PATH) + raise + + source_block = _format_source_block(sources) + return template.replace("INSERT_SOURCE_CODE", source_block) + + +def _parse_response(response_text: str) -> BusinessLogicReview | None: + """Parse the LLM JSON response into a BusinessLogicReview.""" + text = response_text.strip() + # Strip markdown code fences if present + if text.startswith("```"): + first_newline = text.index("\n") if "\n" in text else 3 + text = text[first_newline + 1:] + if text.endswith("```"): + text = text[:-3] + text = text.strip() + + try: + data = json.loads(text) + except json.JSONDecodeError as exc: + logger.error("Failed to parse business logic review JSON: %s", exc) + return None + + if not isinstance(data, dict): + logger.error("Expected JSON object, got %s", type(data).__name__) + return None + + # Parse findings + findings: list[BusinessLogicFinding] = [] + for item in data.get("findings", []): + raw_cat = item.get("category", "") + if raw_cat not in _VALID_CATEGORIES: + logger.warning("Skipping finding with unknown category: %s", raw_cat) + continue + + findings.append(BusinessLogicFinding( + category=BusinessLogicCategory(raw_cat), + title=item.get("title", ""), + file=item.get("file", ""), + start_line=int(item.get("start_line", 0)), + end_line=int(item.get("end_line", 0)), + what_it_does=item.get("what_it_does", ""), + review_guidance=item.get("review_guidance", ""), + code_block=item.get("code_block", ""), + risk_if_wrong=item.get("risk_if_wrong", ""), + )) + + # Parse consistency issues + consistency_issues: list[ConsistencyIssue] = [] + for item in data.get("consistency_issues", []): + raw_type = item.get("issue_type", "") + if raw_type not in _VALID_ISSUE_TYPES: + logger.warning("Skipping consistency issue with unknown type: %s", raw_type) + continue + + locations: list[ConsistencyIssueLocation] = [] + for loc in item.get("locations", []): + locations.append(ConsistencyIssueLocation( + file=loc.get("file", ""), + start_line=int(loc.get("start_line", 0)), + end_line=int(loc.get("end_line", 0)), + )) + + consistency_issues.append(ConsistencyIssue( + issue_type=ConsistencyIssueType(raw_type), + description=item.get("description", ""), + locations=locations, + code_blocks=list(item.get("code_blocks", [])), + recommended_action=item.get("recommended_action", ""), + )) + + # Enforce sort order: category (taxonomy order), then file path, then start_line. + # The LLM doesn't always respect the requested sort, so we do it here. + _CAT_ORDER = {cat: i for i, cat in enumerate(BusinessLogicCategory)} + findings.sort(key=lambda f: (_CAT_ORDER.get(f.category, 99), f.file, f.start_line)) + + return BusinessLogicReview( + executive_summary=data.get("executive_summary", ""), + findings=findings, + consistency_issues=consistency_issues, + ) + + +class BusinessLogicAgent(BaseAgent): + """Agent that identifies core business logic for human review.""" + + def __init__(self, config: AgentConfig | None = None) -> None: + super().__init__(config) + + def execute( + self, + target: Path | None = None, + **kwargs: Any, + ) -> BusinessLogicReview | None: + """Analyze codebase for business logic requiring human review. + + Returns BusinessLogicReview or None on failure (non-blocking). + """ + if target is None: + logger.error("BusinessLogicAgent requires target") + return None + + vprint(" Collecting source files for business logic review...", flush=True) + sources = _collect_source_files(target) + if not sources: + logger.warning("No source files found in %s", target) + return None + vprint(f" Collected {len(sources)} source files", flush=True) + + vprint(" Building business logic review prompt...", flush=True) + prompt = _build_prompt(sources) + + vprint(" Invoking agent for business logic analysis...", flush=True) + try: + response_text, usage = self._invoke_model(prompt) + logger.info( + "Business logic agent: input=%s, output=%s tokens", + usage.get("input_tokens", "?"), + usage.get("output_tokens", "?"), + ) + except Exception as e: + logger.error("Business logic agent invocation failed: %s", e) + print(f" Business logic review failed: {e}", flush=True) + return None + + vprint(" Parsing business logic review...", flush=True) + review = _parse_response(response_text) + if review: + vprint( + f" Business logic review complete: {len(review.findings)} findings, " + f"{len(review.consistency_issues)} consistency issues", + flush=True, + ) + return review diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/code_structure_agent.py b/scripts/aidlc-codereview/src/code_reviewer/agent/code_structure_agent.py new file mode 100644 index 00000000..46e3edbe --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/code_structure_agent.py @@ -0,0 +1,220 @@ +"""Agent that performs AI-powered code structure critique. + +Evaluates the codebase across six dimensions: logging, measurability, +scalability, efficiency, complexity, and structure. Feeds in critical +findings from the prior agent pass plus all tool results. +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import json +import logging +from pathlib import Path +from typing import Any + +from code_reviewer.agent.base_agent import BaseAgent +from code_reviewer.agent.config import AgentConfig +from code_reviewer.agent.critical_findings_agent import _collect_source_files, _format_source_block, _format_tool_findings +from code_reviewer.common.output import vprint +from code_reviewer.common.models import ( + CodeStructureCritique, + CriticalFinding, + Finding, + StructureDimension, + StructureIssue, + StructureRating, + ToolResult, +) + +logger = logging.getLogger("aidlc_code_reviewer.code_structure") + +_PROJECT_ROOT = Path(__file__).resolve().parent.parent +_TEMPLATE_PATH = _PROJECT_ROOT / "prompts" / "structure-critique-v1.md" + + +def _format_critical_findings(critical_findings: list[CriticalFinding]) -> str: + """Format critical findings as context for the structure agent.""" + if not critical_findings: + return "(no critical findings from prior analysis)" + parts: list[str] = [] + for i, cf in enumerate(critical_findings, 1): + parts.append( + f"{i}. [{cf.category.value}] {cf.file}:{cf.start_line}-{cf.end_line} — {cf.verdict}" + ) + return "\n".join(parts) + + +def _build_prompt( + sources: dict[str, str], + results: list[ToolResult], + critical_findings: list[CriticalFinding], +) -> str: + """Assemble the structure critique prompt from the template.""" + try: + template = _TEMPLATE_PATH.read_text(encoding="utf-8") + except OSError: + logger.warning("Structure critique prompt template not found at %s", _TEMPLATE_PATH) + raise + if len(template) > 1_000_000: + raise ValueError(f"Template file unexpectedly large: {len(template)} bytes") + + source_block = _format_source_block(sources) + findings_block = _format_tool_findings(results) + critical_block = _format_critical_findings(critical_findings) + + prompt = template.replace("INSERT_SOURCE_CODE", source_block) + prompt = prompt.replace("INSERT_TOOL_FINDINGS", findings_block) + prompt = prompt.replace("INSERT_CRITICAL_FINDINGS", critical_block) + return prompt + + +def _find_tool_findings_for_range( + file_path: str, + start: int, + end: int, + findings_by_file: dict[str, list[Finding]], +) -> list[Finding]: + """Match tool findings by file path (handles absolute vs relative mismatch).""" + matched: list[Finding] = [] + for tool_path, tool_findings in findings_by_file.items(): + if (tool_path == file_path + or tool_path.endswith("/" + file_path) + or file_path.endswith("/" + tool_path) + or Path(tool_path).name == Path(file_path).name): + for f in tool_findings: + if start <= f.line <= end: + matched.append(f) + return matched + + +def _parse_response( + response_text: str, + results: list[ToolResult], +) -> CodeStructureCritique | None: + """Parse the LLM JSON response into a CodeStructureCritique.""" + text = response_text.strip() + if text.startswith("```"): + first_newline = text.index("\n") if "\n" in text else 3 + text = text[first_newline + 1:] + if text.endswith("```"): + text = text[:-3] + text = text.strip() + + try: + data = json.loads(text) + except json.JSONDecodeError as exc: + logger.error("Failed to parse structure critique JSON: %s", exc) + return None + + if not isinstance(data, dict): + logger.error("Expected JSON object, got %s", type(data).__name__) + return None + + # Build tool findings lookup + findings_by_file: dict[str, list[Finding]] = {} + for r in results: + for f in r.findings: + findings_by_file.setdefault(f.file, []).append(f) + + rating_map = { + "GOOD": StructureRating.GOOD, + "NEEDS_IMPROVEMENT": StructureRating.NEEDS_IMPROVEMENT, + "POOR": StructureRating.POOR, + } + + dimensions: list[StructureDimension] = [] + for dim_data in data.get("dimensions", []): + rating = rating_map.get(dim_data.get("rating", ""), StructureRating.NEEDS_IMPROVEMENT) + + issues: list[StructureIssue] = [] + for issue_data in dim_data.get("findings", []): + file_path = issue_data.get("file", "") + try: + start_line = max(0, int(issue_data.get("start_line", 0))) + end_line = max(0, int(issue_data.get("end_line", 0))) + except (ValueError, TypeError): + logger.warning("Non-numeric line values in structure issue, skipping entry") + continue + + related = _find_tool_findings_for_range( + file_path, start_line, end_line, findings_by_file + ) + + source = "tool_assisted" if related else "agent_only" + + issues.append(StructureIssue( + file=file_path, + start_line=start_line, + end_line=end_line, + issue=issue_data.get("issue", ""), + recommendation=issue_data.get("recommendation", ""), + code_block=issue_data.get("code_block", ""), + source=source, + related_tool_findings=related, + highlight_lines=[int(ln) for ln in issue_data.get("highlight_lines", []) if isinstance(ln, (int, float))], + )) + + dimensions.append(StructureDimension( + dimension=dim_data.get("dimension", "UNKNOWN"), + rating=rating, + summary=dim_data.get("summary", ""), + findings=issues, + )) + + return CodeStructureCritique( + overall_summary=data.get("overall_summary", ""), + dimensions=dimensions, + ) + + +class CodeStructureAgent(BaseAgent): + """Agent that performs holistic code structure critique.""" + + def __init__(self, config: AgentConfig | None = None) -> None: + super().__init__(config) + + def execute( + self, + target: Path | None = None, + results: list[ToolResult] | None = None, + critical_findings: list[CriticalFinding] | None = None, + **kwargs: Any, + ) -> CodeStructureCritique | None: + """Analyze codebase structure across six quality dimensions. + + Returns CodeStructureCritique or None on failure (non-blocking). + """ + if target is None or results is None: + logger.error("CodeStructureAgent requires target and results") + return None + + vprint(" Collecting source files for structure critique...", flush=True) + sources = _collect_source_files(target) + if not sources: + logger.warning("No source files found in %s", target) + return None + vprint(f" Collected {len(sources)} source files", flush=True) + + vprint(" Building structure critique prompt...", flush=True) + prompt = _build_prompt(sources, results, critical_findings or []) + + vprint(" Invoking agent for code structure analysis...", flush=True) + try: + response_text, usage = self._invoke_model(prompt) + logger.info( + "Structure critique agent: input=%s, output=%s tokens", + usage.get("input_tokens", "?"), + usage.get("output_tokens", "?"), + ) + except Exception as e: + logger.error("Structure critique agent invocation failed: %s", e) + print(f" Structure critique analysis failed: {e}", flush=True) + return None + + vprint(" Parsing structure critique...", flush=True) + critique = _parse_response(response_text, results) + if critique: + total_issues = sum(len(d.findings) for d in critique.dimensions) + vprint(f" Structure critique complete: {len(critique.dimensions)} dimensions, {total_issues} issues", flush=True) + return critique diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/config.py b/scripts/aidlc-codereview/src/code_reviewer/agent/config.py new file mode 100644 index 00000000..4ed9ca7c --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/config.py @@ -0,0 +1,51 @@ +"""Configuration for the agent subsystem. + +Reads from agent-config.yaml or environment variables. +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import os +from dataclasses import dataclass +from pathlib import Path + +import yaml + + +@dataclass +class AgentConfig: + model_id: str = "us.anthropic.claude-sonnet-4-6" + max_tokens: int = 8192 + max_retries: int = 2 + region: str = "us-east-1" + profile_name: str | None = None + + +_CONFIG_FILE = Path(__file__).resolve().parent.parent / "agent-config.yaml" + + +def load_agent_config() -> AgentConfig: + """Load agent config from YAML file, with env var overrides.""" + config = AgentConfig() + + if _CONFIG_FILE.exists(): + with open(_CONFIG_FILE, encoding="utf-8") as f: + raw = yaml.safe_load(f) or {} + + agent = raw.get("agent", {}) + config.model_id = agent.get("model_id", config.model_id) + config.max_tokens = agent.get("max_tokens", config.max_tokens) + config.max_retries = agent.get("max_retries", config.max_retries) + + aws = raw.get("aws", {}) + config.region = aws.get("region", config.region) + config.profile_name = aws.get("profile_name", config.profile_name) + + # Environment variable overrides + config.region = os.environ.get("AWS_REGION", config.region) + config.profile_name = os.environ.get("AWS_PROFILE", config.profile_name) + if model_env := os.environ.get("BEDROCK_MODEL_ID"): + config.model_id = model_env + + return config diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/critical_findings_agent.py b/scripts/aidlc-codereview/src/code_reviewer/agent/critical_findings_agent.py new file mode 100644 index 00000000..a288ef97 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/critical_findings_agent.py @@ -0,0 +1,319 @@ +"""Agent that analyzes tool results and source code to identify critical +sections requiring human review. + +Runs after all tools complete. Three-pass analysis: +1. Reads full source code of the target codebase +2. Analyzes all tool findings +3. Cross-references flagged files/lines with actual source code +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import json +import logging +from pathlib import Path +from typing import Any + +from code_reviewer.agent.base_agent import BaseAgent +from code_reviewer.agent.config import AgentConfig +from code_reviewer.common.language_detector import EXTENSION_MAP +from code_reviewer.common.output import vprint +from code_reviewer.common.models import ( + CriticalCategory, + CriticalFinding, + Finding, + Severity, + ToolResult, +) + +logger = logging.getLogger("aidlc_code_reviewer.critical_findings") + +_PROJECT_ROOT = Path(__file__).resolve().parent.parent +_TEMPLATE_PATH = _PROJECT_ROOT / "prompts" / "critical-findings-v1.md" + +# Skip binary / non-code files and large generated files +_SKIP_DIRS = {".git", "__pycache__", ".mypy_cache", ".ruff_cache", "node_modules", ".venv", "venv"} +_MAX_FILE_SIZE = 256 * 1024 # 256 KB per file + + +def _collect_source_files(target: Path) -> dict[str, str]: + """Read all source files under target into a {relative_path: content} dict.""" + sources: dict[str, str] = {} + code_extensions = set(EXTENSION_MAP.keys()) + + if target.is_file(): + if target.suffix.lower() in code_extensions: + try: + sources[target.name] = target.read_text(errors="replace") + except OSError: + pass + return sources + + for file_path in sorted(target.rglob("*")): + if any(part in _SKIP_DIRS for part in file_path.parts): + continue + if not file_path.is_file(): + continue + if file_path.suffix.lower() not in code_extensions: + continue + if file_path.stat().st_size > _MAX_FILE_SIZE: + continue + try: + rel = str(file_path.relative_to(target)) + sources[rel] = file_path.read_text(errors="replace") + except OSError: + pass + + return sources + + +def _format_source_block(sources: dict[str, str]) -> str: + """Format collected sources for the prompt.""" + parts: list[str] = [] + for path, content in sources.items(): + parts.append(f"### {path}\n```\n{content}\n```") + return "\n\n".join(parts) if parts else "(no source files found)" + + +def _format_tool_findings(results: list[ToolResult]) -> str: + """Format tool findings as a compact summary for the prompt.""" + lines: list[str] = [] + for r in results: + if not r.findings: + continue + lines.append(f"### {r.tool} ({r.category})") + for f in r.findings: + lines.append( + f"- [{f.severity.value}] {f.rule_id} in {f.file}:{f.line} — {f.message}" + ) + return "\n".join(lines) if lines else "(no tool findings)" + + +def _format_flagged_files(results: list[ToolResult], sources: dict[str, str]) -> str: + """For files that tools flagged, include the relevant code context.""" + flagged: dict[str, set[int]] = {} + for r in results: + for f in r.findings: + flagged.setdefault(f.file, set()).add(f.line) + + if not flagged: + return "(no flagged files)" + + parts: list[str] = [] + for file_path, line_nums in sorted(flagged.items()): + content = sources.get(file_path) + if not content: + parts.append(f"### {file_path}\n(source not available)") + continue + + src_lines = content.splitlines() + # Show context around each flagged line (5 lines before/after) + regions: list[tuple[int, int]] = [] + for ln in sorted(line_nums): + start = max(0, ln - 6) + end = min(len(src_lines), ln + 5) + regions.append((start, end)) + + # Merge overlapping regions + merged: list[tuple[int, int]] = [] + for start, end in regions: + if merged and start <= merged[-1][1]: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + else: + merged.append((start, end)) + + snippet_parts: list[str] = [] + for start, end in merged: + numbered = [ + f"{i + 1:>4} | {src_lines[i]}" for i in range(start, end) + ] + snippet_parts.append("\n".join(numbered)) + + parts.append(f"### {file_path}\n```\n" + "\n...\n".join(snippet_parts) + "\n```") + + return "\n\n".join(parts) + + +def _build_prompt( + sources: dict[str, str], + results: list[ToolResult], +) -> str: + """Assemble the critical findings prompt from the template.""" + try: + template = _TEMPLATE_PATH.read_text(encoding="utf-8") + except OSError: + logger.warning("Critical findings prompt template not found at %s", _TEMPLATE_PATH) + raise + if len(template) > 1_000_000: + raise ValueError(f"Template file unexpectedly large: {len(template)} bytes") + + source_block = _format_source_block(sources) + findings_block = _format_tool_findings(results) + flagged_block = _format_flagged_files(results, sources) + + prompt = template.replace("INSERT_SOURCE_CODE", source_block) + prompt = prompt.replace("INSERT_TOOL_FINDINGS", findings_block) + prompt = prompt.replace("INSERT_FLAGGED_FILES", flagged_block) + return prompt + + +def _parse_response(response_text: str, results: list[ToolResult]) -> list[CriticalFinding]: + """Parse the LLM JSON response into CriticalFinding objects.""" + # Strip markdown fences if the model wraps them anyway + text = response_text.strip() + if text.startswith("```"): + first_newline = text.index("\n") if "\n" in text else 3 + text = text[first_newline + 1 :] + if text.endswith("```"): + text = text[:-3] + text = text.strip() + + # Sanitize control characters that the LLM may embed in JSON string values + # (e.g. literal tabs, newlines inside code_block fields). Replace with spaces + # except for \n \r \t which we escape properly. + import re + text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', ' ', text) + + try: + items = json.loads(text) + except json.JSONDecodeError as exc: + logger.error("Failed to parse critical findings JSON: %s", exc) + return [] + + if not isinstance(items, list): + logger.error("Expected JSON array, got %s", type(items).__name__) + return [] + + # Build a lookup for matching related tool findings + # Tool findings may use absolute paths while agent returns relative paths, + # so we index by both the full path and the basename for flexible matching. + findings_by_file: dict[str, list[Finding]] = {} + for r in results: + for f in r.findings: + findings_by_file.setdefault(f.file, []).append(f) + + category_map = { + "COMPUTATION": CriticalCategory.COMPUTATION, + "CONTROL_FLOW": CriticalCategory.CONTROL_FLOW, + "DATA_TRANSFORM": CriticalCategory.DATA_TRANSFORM, + } + + _LOW_INFO_SEV = {Severity.LOW, Severity.INFO} + + def _find_tool_findings(file_path: str, start: int, end: int) -> list[Finding]: + """Match MEDIUM+ tool findings by file path (handles absolute vs relative mismatch).""" + matched: list[Finding] = [] + for tool_path, tool_findings in findings_by_file.items(): + if (tool_path == file_path + or tool_path.endswith("/" + file_path) + or file_path.endswith("/" + tool_path) + or Path(tool_path).name == Path(file_path).name): + for f in tool_findings: + if start <= f.line <= end and f.severity not in _LOW_INFO_SEV: + matched.append(f) + return matched + + parsed: list[CriticalFinding] = [] + for item in items: + cat_str = item.get("category", "") + cat = category_map.get(cat_str) + if cat is None: + logger.warning("Unknown critical category: %s, skipping", cat_str) + continue + + file_path = item.get("file", "") + try: + start_line = max(0, int(item.get("start_line", 0))) + end_line = max(0, int(item.get("end_line", 0))) + except (ValueError, TypeError): + logger.warning("Non-numeric line values in critical finding, skipping entry") + continue + + # Match related tool findings that overlap with this code region + related = _find_tool_findings(file_path, start_line, end_line) + + source = "tool_assisted" if related else "agent_only" + + parsed.append(CriticalFinding( + category=cat, + file=file_path, + start_line=start_line, + end_line=end_line, + verdict=item.get("verdict", ""), + code_block=item.get("code_block", ""), + why_critical=item.get("why_critical", ""), + recommended_action=item.get("recommended_action", ""), + source=source, + related_tool_findings=related, + highlight_lines=[int(ln) for ln in item.get("highlight_lines", []) if isinstance(ln, (int, float))], + )) + + return parsed + + +class CriticalFindingsAgent(BaseAgent): + """Agent that identifies critical code sections for human review.""" + + def __init__(self, config: AgentConfig | None = None) -> None: + super().__init__(config) + + def execute( + self, + target: Path | None = None, + results: list[ToolResult] | None = None, + **kwargs: Any, + ) -> list[CriticalFinding]: + """Analyze codebase and tool results to find critical sections. + + Returns a list of CriticalFinding objects, sorted by category then location. + Returns empty list on failure (non-blocking). + """ + if target is None or results is None: + logger.error("CriticalFindingsAgent requires target and results") + return [] + + vprint(" Collecting source files...", flush=True) + sources = _collect_source_files(target) + if not sources: + logger.warning("No source files found in %s", target) + return [] + vprint(f" Collected {len(sources)} source files", flush=True) + + vprint(" Building critical findings prompt...", flush=True) + prompt = _build_prompt(sources, results) + + vprint(" Invoking agent for critical code analysis...", flush=True) + try: + response_text, usage = self._invoke_model(prompt) + logger.info( + "Critical findings agent: input=%s, output=%s tokens", + usage.get("input_tokens", "?"), + usage.get("output_tokens", "?"), + ) + except Exception as e: + logger.error("Critical findings agent invocation failed: %s", e) + print(f" Critical findings analysis failed: {e}", flush=True) + return [] + + vprint(" Parsing critical findings...", flush=True) + findings = _parse_response(response_text, results) + + # Post-parse safety net: drop findings whose only related tool results + # are LOW or INFO. These are nonessential and should never appear as + # standalone critical findings. A LOW/INFO finding should typically + # accompany a MEDIUM-or-higher finding in the same code region. + _LOW_INFO = {Severity.LOW, Severity.INFO} + before = len(findings) + findings = [ + f for f in findings + if f.source == "agent_only" # agent-identified → keep + or not f.related_tool_findings # no tool findings → keep + or any(tf.severity not in _LOW_INFO for tf in f.related_tool_findings) + ] + dropped = before - len(findings) + if dropped: + vprint(f" Filtered {dropped} finding(s) backed only by LOW/INFO tool results", flush=True) + + vprint(f" Found {len(findings)} critical code sections", flush=True) + return findings diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/models.py b/scripts/aidlc-codereview/src/code_reviewer/agent/models.py new file mode 100644 index 00000000..7f6abe7a --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/models.py @@ -0,0 +1,43 @@ +"""Data models for the wrapper generator agent.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from code_reviewer.common.models import ToolResult + + +class GenerationStatus(str, Enum): + SUCCESS = "SUCCESS" + FAILED = "FAILED" + VERIFICATION_FAILED = "VERIFICATION_FAILED" + + +@dataclass +class VerificationResult: + """Result of wrapper verification.""" + + passed: bool + level: int # 1 = static, 2 = live + errors: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + +@dataclass +class GenerationResult: + """Result of wrapper generation.""" + + status: GenerationStatus + tool_name: str + wrapper_path: Optional[Path] = None + verification: Optional[VerificationResult] = None + error: Optional[str] = None + token_usage: Optional[dict] = None + tool_result: Optional[ToolResult] = None # reusable result from Level 2 diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/preflight.py b/scripts/aidlc-codereview/src/code_reviewer/agent/preflight.py new file mode 100644 index 00000000..ca490b4b --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/preflight.py @@ -0,0 +1,210 @@ +"""Pre-flight verification for the agent subsystem. + +Checks that all dependencies, credentials, and model access are in place +before the CLI runs. Designed to be called early so users get clear, +actionable errors instead of cryptic failures mid-run. +""" + +import logging +import shutil +from pathlib import Path + +from code_reviewer.common.config import load_config + +logger = logging.getLogger("aidlc_code_reviewer.preflight") + + +def _check_python_packages() -> list[str]: + """Check that all agent Python dependencies are importable.""" + errors = [] + required = { + "strands": "strands-agents", + "boto3": "boto3", + "backoff": "backoff", + "pydantic": "pydantic>=2.0", + "bs4": "beautifulsoup4", + } + for module_name, pip_name in required.items(): + try: + __import__(module_name) + except ImportError: + errors.append(f"Missing package '{pip_name}' (import {module_name} failed)") + return errors + + +def _check_aws_credentials(region: str, profile_name: str | None) -> list[str]: + """Check that AWS credentials are configured and can call STS.""" + errors = [] + logger.info( # nosemgrep: python-logger-credential-disclosure + "Loading AWS credentials: profile=%s, region=%s", + profile_name or "(default)", + region, + ) + try: + import boto3 + + kwargs = {"region_name": region} + if profile_name: + kwargs["profile_name"] = profile_name + + session = boto3.Session(**kwargs) + sts = session.client("sts") + identity = sts.get_caller_identity() + account = identity.get("Account", "unknown") + arn = identity.get("Arn", "unknown") + logger.info( # nosemgrep: python-logger-credential-disclosure + "AWS credentials loaded: profile=%s, region=%s, account=%s, arn=%s", + profile_name or "(default)", + region, + account, + arn, + ) + print(f" AWS identity: {arn} (account {account})") + except Exception as e: + logger.warning( # nosemgrep: python-logger-credential-disclosure + "AWS credentials check failed: profile=%s, region=%s, error=%s", + profile_name or "(default)", + region, + e, + ) + errors.append(f"AWS credentials check failed: {e}") + return errors + + +def _check_bedrock_access(model_id: str, region: str, profile_name: str | None) -> list[str]: + """Check that the Amazon Bedrock model is accessible with a minimal invoke.""" + errors = [] + try: + import boto3 + + kwargs = {"region_name": region} + if profile_name: + kwargs["profile_name"] = profile_name + + session = boto3.Session(**kwargs) + client = session.client("bedrock-runtime", region_name=region) + + # Minimal converse call to verify model access + response = client.converse( + modelId=model_id, + messages=[{ + "role": "user", + "content": [{"text": "Say OK"}], + }], + inferenceConfig={"maxTokens": 10}, + ) + stop = response.get("stopReason", "unknown") + print(f" Amazon Bedrock model '{model_id}' is accessible (stop: {stop})") + except Exception as e: + error_name = type(e).__name__ + errors.append(f"Amazon Bedrock model access failed ({error_name}): {e}") + return errors + + +def _check_configured_tools(config_path: Path | None) -> list[str]: + """Check which configured CLI tools are installed on PATH.""" + warnings = [] + try: + config = load_config(config_path) + except Exception as e: + return [f"Could not load config: {e}"] + + from code_reviewer.tools.registry import get_wrapper + + for tool_cfg in config.tools: + wrapper = get_wrapper(tool_cfg.name) + command = tool_cfg.command or tool_cfg.name + installed = shutil.which(command) is not None + + if wrapper and installed: + status = "wrapper + CLI" + elif wrapper and not installed: + status = "wrapper only (CLI not on PATH — will fail at runtime)" + warnings.append(f"'{tool_cfg.name}': has wrapper but '{command}' not on PATH") + elif not wrapper and installed: + status = "CLI only (wrapper will be auto-generated)" + else: + status = "missing both (wrapper will be auto-generated, needs CLI installed)" + warnings.append(f"'{tool_cfg.name}': no wrapper and '{command}' not on PATH") + + print(f" {tool_cfg.name:<20s} {status}") + + return warnings + + +def run_preflight(config_path: Path | None = None) -> bool: + """Run all pre-flight checks. Returns True if all critical checks pass.""" + all_errors: list[str] = [] + all_warnings: list[str] = [] + + print("\n=== Pre-flight Check: Agent Setup ===\n") + + # 1. Python packages + print("[1/4] Checking agent Python packages...") + pkg_errors = _check_python_packages() + if pkg_errors: + all_errors.extend(pkg_errors) + print(f" FAIL: {len(pkg_errors)} missing package(s)") + for e in pkg_errors: + print(f" - {e}") + print(" Fix: pip install -e .") + else: + print(" OK: All agent packages installed") + + # 2. Agent config + print("\n[2/4] Checking agent configuration...") + try: + from code_reviewer.agent.config import load_agent_config + agent_cfg = load_agent_config() + print(f" Model: {agent_cfg.model_id}") + print(f" Region: {agent_cfg.region}") + print(f" Profile: {agent_cfg.profile_name or '(default)'}") + print(f" Retries: {agent_cfg.max_retries}") + except Exception as e: + all_errors.append(f"Agent config failed: {e}") + print(f" FAIL: {e}") + agent_cfg = None + + # 3. AWS credentials + Amazon Bedrock access + print("\n[3/4] Checking AWS credentials...") + if agent_cfg and not pkg_errors: + cred_errors = _check_aws_credentials(agent_cfg.region, agent_cfg.profile_name) + if cred_errors: + all_errors.extend(cred_errors) + for e in cred_errors: + print(f" FAIL: {e}") + else: + print("\n Checking Amazon Bedrock model access...") + bedrock_errors = _check_bedrock_access( + agent_cfg.model_id, agent_cfg.region, agent_cfg.profile_name + ) + if bedrock_errors: + all_errors.extend(bedrock_errors) + for e in bedrock_errors: + print(f" FAIL: {e}") + else: + print(" SKIP: Cannot check without agent packages/config") + + # 4. Configured tools + print("\n[4/4] Checking configured tools...") + tool_warnings = _check_configured_tools(config_path) + all_warnings.extend(tool_warnings) + + # Summary + print("\n=== Pre-flight Summary ===\n") + if all_errors: + print(f"ERRORS ({len(all_errors)}):") + for e in all_errors: + print(f" ✗ {e}") + if all_warnings: + print(f"WARNINGS ({len(all_warnings)}):") + for w in all_warnings: + print(f" ! {w}") + if not all_errors and not all_warnings: + print("All checks passed. Agent is ready.") + elif not all_errors: + print("\nNo critical errors. Agent can run (warnings above may cause skips).") + else: + print(f"\n{len(all_errors)} critical error(s). Fix these before using the agent.") + + return len(all_errors) == 0 diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/prompt_builder.py b/scripts/aidlc-codereview/src/code_reviewer/agent/prompt_builder.py new file mode 100644 index 00000000..248eac55 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/prompt_builder.py @@ -0,0 +1,138 @@ +"""Builds the generation prompt by assembling context into the template.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from pathlib import Path + +from code_reviewer.common.config import ToolConfig + +_PROJECT_ROOT = Path(__file__).resolve().parent.parent +_TEMPLATE_PATH = _PROJECT_ROOT / "prompts" / "wrapper-generator-v1.md" + +# Few-shot example wrappers (JSON, XML, text output formats) +_EXAMPLE_WRAPPERS = { + "bandit (JSON)": _PROJECT_ROOT / "tools" / "bandit.py", + "checkstyle (XML)": _PROJECT_ROOT / "tools" / "checkstyle.py", + "vulture (text)": _PROJECT_ROOT / "tools" / "vulture.py", +} + + +def _read_file(path: Path) -> str: + """Read file contents, return empty string if missing.""" + try: + return path.read_text() + except Exception: + return "" + + +def _load_template() -> str: + return _read_file(_TEMPLATE_PATH) + + +def _build_examples_section() -> str: + """Load few-shot wrapper examples.""" + parts = [] + for label, path in _EXAMPLE_WRAPPERS.items(): + source = _read_file(path) + if source: + parts.append(f"### Example: {label}\n```python\n{source}\n```") + return "\n\n".join(parts) + + +def build_prompt(tool_config: ToolConfig, doc_text: str = "") -> str: + """Build the full generation prompt for the LLM. + + Uses the markdown template with INSERT markers, falling back to + inline prompt construction if the template is missing. + """ + models_source = _read_file(_PROJECT_ROOT / "common" / "models.py") + utils_source = _read_file(_PROJECT_ROOT / "common" / "utils.py") + severity_mapping = _read_file(_PROJECT_ROOT / "common" / "SEVERITY_MAPPING.md") + examples = _build_examples_section() + + tool_info = f"- **Name**: {tool_config.name}\n" + if tool_config.command and tool_config.command != tool_config.name: + tool_info += f"- **Command**: {tool_config.command}\n" + if tool_config.category: + tool_info += f"- **Category**: {tool_config.category}\n" + else: + tool_info += ( + "- **Category**: Determine the correct category from: " + "security, linting, type_safety, complexity, duplication, dead_code\n" + ) + if tool_config.output_format: + tool_info += f"- **Output format**: {tool_config.output_format}\n" + if tool_config.args: + tool_info += f"- **Args template**: {' '.join(tool_config.args)}\n" + + template = _load_template() + + if template and "": f"```python\n{models_source}\n```", + "": f"```python\n{utils_source}\n```", + "": severity_mapping, + "": examples, + "": tool_info, + "": doc_text if doc_text else "_No documentation available._", + } + result = template + for marker, content in replacements.items(): + result = result.replace(marker, content) + return result + + # Fallback: build prompt inline + return f"""You are an expert Python developer generating a tool wrapper module for the AIDLC Code Reviewer. + +## Your Task + +Generate a Python module that wraps the CLI tool described below. The module must follow the exact patterns shown in the examples. + +## Data Models (common/models.py) + +```python +{models_source} +``` + +## Utility Functions (common/utils.py) + +```python +{utils_source} +``` + +## Severity Classification Policy + +{severity_mapping} + +## Example Wrappers + +{examples} + +## Tool to Wrap + +{tool_info} + +## Tool Documentation + +{doc_text if doc_text else "_No documentation available._"} + +## Output Requirements + +1. Generate a COMPLETE Python module (not a snippet) +2. Must define: CATEGORY, TOOL or TOOL_NAME, SUPPORTED_LANGUAGES +3. Must define: `def run(target: Path) -> ToolResult` +4. Import from `common.models` (Finding, Severity, ToolResult) and `common.utils` (run_command, check_tool_installed) +5. Follow the severity mapping policy strictly — non-security categories cap at MEDIUM +6. Handle tool not installed, parse errors, and empty output gracefully +7. Return ToolResult with success=False and descriptive error on failure +8. Parse the tool's output format ({tool_config.output_format or 'json'}) to extract findings +9. Wrap the response in ```python ... ``` code block + +## CRITICAL Rules + +- Never import the tool as a Python module. Always invoke via subprocess using run_command(). +- Only use CLI flags you are 100% certain exist. Use the bare minimum: output format flag and target path. Do NOT guess or invent flags. +- Keep the CLI invocation simple. For example, for pylint: ["pylint", "--output-format=json", str(target)] — nothing more. +""" diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/response_parser.py b/scripts/aidlc-codereview/src/code_reviewer/agent/response_parser.py new file mode 100644 index 00000000..d9916f7c --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/response_parser.py @@ -0,0 +1,65 @@ +"""Extract Python code from LLM response (3-stage fallback).""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import re +from typing import Optional + + +def extract_code(response: str) -> tuple[Optional[str], Optional[str]]: + """Extract Python wrapper code from LLM response. + + Returns (code, error). One of them is None. + + Stages: + 1. Extract from ```python ... ``` code block + 2. Heuristic: find region containing `def run(` and module-level constants + 3. Try entire response as Python via compile() + 4. Fallback: return error + """ + # Stage 1: code block extraction + pattern = r"```python\s*\n(.*?)```" + matches = re.findall(pattern, response, re.DOTALL) + if matches: + # Pick the longest match (most likely the full wrapper) + code = max(matches, key=len).strip() + if "def run(" in code: + return code, None + + # Stage 2: heuristic — find region with def run( and constants + lines = response.split("\n") + code_lines: list[str] = [] + in_code = False + for line in lines: + stripped = line.rstrip() + # Start collecting at imports or module-level constants + if not in_code and ( + stripped.startswith("import ") + or stripped.startswith("from ") + or stripped.startswith("CATEGORY") + or stripped.startswith("TOOL") + or stripped.startswith('"""') + ): + in_code = True + if in_code: + code_lines.append(line) + + if code_lines: + candidate = "\n".join(code_lines).strip() + if "def run(" in candidate: + try: + compile(candidate, "", "exec") + return candidate, None + except SyntaxError: + pass + + # Stage 3: try entire response as Python + try: + compile(response.strip(), "", "exec") + if "def run(" in response: + return response.strip(), None + except SyntaxError: + pass + + return None, "Could not extract valid Python code from LLM response" diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/retry.py b/scripts/aidlc-codereview/src/code_reviewer/agent/retry.py new file mode 100644 index 00000000..f7c97128 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/retry.py @@ -0,0 +1,49 @@ +"""Retry predicate for Bedrock API calls. + +Classifies errors into retryable (throttle, timeout, 5xx) +vs non-retryable (auth, validation). +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + + +RETRYABLE_ERROR_CODES = { + "ThrottlingException", + "ServiceUnavailableException", + "InternalServerError", + "InternalServerException", + "ModelTimeoutException", + "TooManyRequestsException", +} + +NON_RETRYABLE_ERROR_CODES = { + "ValidationException", + "AccessDeniedException", + "ResourceNotFoundException", + "ModelNotReadyException", +} + + +def is_retryable(exc: Exception) -> bool: + """Determine if an exception is retryable for backoff giveup predicate. + + Returns True if the error should be retried, False to give up. + """ + # botocore.exceptions.ClientError (import-free check) + exc_type_name = type(exc).__name__ + if exc_type_name == "ClientError" and hasattr(exc, "response"): + error_code = exc.response.get("Error", {}).get("Code", "") + if error_code in NON_RETRYABLE_ERROR_CODES: + return False + if error_code in RETRYABLE_ERROR_CODES: + return True + return True + + if isinstance(exc, (ConnectionError, TimeoutError)): + return True + + if hasattr(exc, "__cause__") and exc.__cause__: + return is_retryable(exc.__cause__) + + return False diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/verification.py b/scripts/aidlc-codereview/src/code_reviewer/agent/verification.py new file mode 100644 index 00000000..edd73ee1 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/verification.py @@ -0,0 +1,173 @@ +"""Two-level verification pipeline for generated tool wrappers.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import importlib.util +import inspect +import sys +import tempfile +from pathlib import Path +from types import ModuleType +from typing import Optional + +from code_reviewer.agent.models import VerificationResult +from code_reviewer.common.models import Finding, Severity, ToolResult +from code_reviewer.common.utils import check_tool_installed + + +def _load_module_from_source(source: str, module_name: str = "_generated_wrapper") -> tuple[Optional[ModuleType], Optional[str]]: + """Write source to temp file, import it, return (module, error).""" + try: + with tempfile.NamedTemporaryFile( + mode="w", suffix=".py", prefix=f"{module_name}_", delete=False + ) as f: + f.write(source) + f.flush() + tmp_path = f.name + + spec = importlib.util.spec_from_file_location(module_name, tmp_path) + if spec is None or spec.loader is None: + return None, "Could not create module spec from source" + module = importlib.util.module_from_spec(spec) + # Add project root to sys.path so common imports work + project_root = str(Path(__file__).resolve().parent.parent) + if project_root not in sys.path: + sys.path.insert(0, project_root) + spec.loader.exec_module(module) + return module, None + except SyntaxError as e: + return None, f"Syntax error: {e}" + except Exception as e: + return None, f"Import error: {e}" + + +_VALID_CATEGORIES = { + "security", "linting", "type_safety", "complexity", "duplication", "dead_code", +} + + +def verify_level1(source: str, expected_category: str = "") -> VerificationResult: + """Static verification — no tool CLI required. + + Checks: syntax, importability, run() signature, SUPPORTED_LANGUAGES, + CATEGORY validity, TOOL constant, and return type from dry run. + """ + errors: list[str] = [] + warnings: list[str] = [] + + # 1. Syntax check + try: + compile(source, "", "exec") + except SyntaxError as e: + return VerificationResult(passed=False, level=1, errors=[f"Syntax error: {e}"]) + + # 2. Import check + module, import_err = _load_module_from_source(source) + if module is None: + return VerificationResult(passed=False, level=1, errors=[import_err or "Unknown import error"]) + + # 3. Verify run() callable with target: Path parameter + if not hasattr(module, "run") or not callable(module.run): + errors.append("Module must define a callable 'run' function") + else: + sig = inspect.signature(module.run) + params = list(sig.parameters.keys()) + if not params or params[0] != "target": + errors.append("run() must accept 'target' as its first parameter") + + # 4. SUPPORTED_LANGUAGES + if not hasattr(module, "SUPPORTED_LANGUAGES"): + errors.append("Module must define SUPPORTED_LANGUAGES list") + elif not isinstance(module.SUPPORTED_LANGUAGES, list): + errors.append("SUPPORTED_LANGUAGES must be a list") + elif not all(isinstance(s, str) for s in module.SUPPORTED_LANGUAGES): + errors.append("SUPPORTED_LANGUAGES must contain only strings") + + # 5. CATEGORY — must exist and be a valid category + if not hasattr(module, "CATEGORY"): + errors.append("Module must define a CATEGORY string constant") + elif module.CATEGORY not in _VALID_CATEGORIES: + errors.append( + f"CATEGORY '{module.CATEGORY}' is not valid. " + f"Must be one of: {', '.join(sorted(_VALID_CATEGORIES))}" + ) + elif expected_category and module.CATEGORY != expected_category: + # If the user explicitly set a category in config, warn on mismatch + warnings.append( + f"CATEGORY '{module.CATEGORY}' differs from config hint '{expected_category}'" + ) + + # 6. TOOL or TOOL_NAME constant + if not (hasattr(module, "TOOL") or hasattr(module, "TOOL_NAME")): + errors.append("Module must define a TOOL or TOOL_NAME string constant") + + if errors: + return VerificationResult(passed=False, level=1, errors=errors, warnings=warnings) + + # 7. Dry run with nonexistent path — must return ToolResult + try: + result = module.run(Path(tempfile.gettempdir()) / "_nonexistent_verification_target") + if not isinstance(result, ToolResult): + errors.append( + f"run() must return a ToolResult, got {type(result).__name__}" + ) + except Exception as e: + warnings.append(f"Dry run raised exception (acceptable): {e}") + + passed = len(errors) == 0 + return VerificationResult(passed=passed, level=1, errors=errors, warnings=warnings) + + +def verify_level2( + source: str, + tool_command: str, + target: Path, +) -> tuple[VerificationResult, Optional[ToolResult]]: + """Live verification — requires the tool CLI to be installed. + + Runs the wrapper against a real target and validates output. + + Returns (VerificationResult, ToolResult or None). The ToolResult is + returned so the caller can reuse it instead of running the tool twice. + """ + errors: list[str] = [] + warnings: list[str] = [] + + # Prerequisite: tool must be installed + if not check_tool_installed(tool_command): + return VerificationResult( + passed=False, level=2, + errors=[f"Tool '{tool_command}' not installed, skipping Level 2"], + ), None + + module, import_err = _load_module_from_source(source) + if module is None: + return VerificationResult(passed=False, level=2, errors=[import_err or "Import failed"]), None + + try: + result = module.run(target) + except Exception as e: + return VerificationResult( + passed=False, level=2, + errors=[f"run() raised exception: {e}"], + ), None + + if not isinstance(result, ToolResult): + return VerificationResult( + passed=False, level=2, + errors=[f"run() returned {type(result).__name__}, expected ToolResult"], + ), None + + if not result.success: + errors.append(f"Tool returned success=False: {result.error}") + + for i, finding in enumerate(result.findings): + if not isinstance(finding, Finding): + errors.append(f"Finding {i} is {type(finding).__name__}, expected Finding") + continue + if not isinstance(finding.severity, Severity): + errors.append(f"Finding {i} severity is not a valid Severity enum") + + passed = len(errors) == 0 + return VerificationResult(passed=passed, level=2, errors=errors, warnings=warnings), result diff --git a/scripts/aidlc-codereview/src/code_reviewer/agent/wrapper_generator.py b/scripts/aidlc-codereview/src/code_reviewer/agent/wrapper_generator.py new file mode 100644 index 00000000..f14de6f1 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/agent/wrapper_generator.py @@ -0,0 +1,212 @@ +"""Core agent that generates, verifies, and registers tool wrappers.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import importlib.util +import logging +import sys +from pathlib import Path +from typing import Any + +from code_reviewer.agent.base_agent import BaseAgent +from code_reviewer.agent.config import AgentConfig +from code_reviewer.common.output import vprint +from code_reviewer.agent.models import GenerationResult, GenerationStatus, VerificationResult +from code_reviewer.agent.prompt_builder import build_prompt +from code_reviewer.agent.response_parser import extract_code +from code_reviewer.agent.verification import verify_level1, verify_level2 +from code_reviewer.common.config import ToolConfig +from code_reviewer.tools.registry import register_wrapper + +logger = logging.getLogger("aidlc_code_reviewer.agent") + +_TOOLS_DIR = Path(__file__).resolve().parent.parent / "tools" + + +def _write_and_register(tool_name: str, source: str) -> Path: + """Write wrapper source to tools/.py and register it.""" + # Sanitize name for filename + safe_name = tool_name.replace("-", "_").replace(" ", "_") + wrapper_path = _TOOLS_DIR / f"{safe_name}.py" + wrapper_path.write_text(source) + + # Import and register + spec = importlib.util.spec_from_file_location(f"tools.{safe_name}", str(wrapper_path)) + if spec and spec.loader: + project_root = str(Path(__file__).resolve().parent.parent) + if project_root not in sys.path: + sys.path.insert(0, project_root) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + register_wrapper(tool_name, module) + + return wrapper_path + + +class WrapperGeneratorAgent(BaseAgent): + """Agent that auto-generates tool wrapper modules.""" + + def __init__(self, config: AgentConfig | None = None) -> None: + super().__init__(config) + + def execute(self, tool_config: ToolConfig | None = None, target: Path | None = None, **kwargs: Any) -> GenerationResult: + """Generate a wrapper for the given tool configuration. + + Steps: + 2. Build prompt + 3. Invoke LLM, extract code + 4. Verify Level 1 (static) — retry with error feedback on failure + 5. Verify Level 2 (live) — retry with error feedback on failure + 6. Write and register wrapper + """ + if tool_config is None: + return GenerationResult( + status=GenerationStatus.FAILED, + tool_name="unknown", + error="tool_config is required", + ) + + tool_name = tool_config.name + max_retries = self.config.max_retries + + # 1. Build prompt + doc_text = "" + prompt = build_prompt(tool_config, doc_text) + + # 3-7. Generate, verify (Level 1 + Level 2), retry on failure + last_errors: list[str] = [] + last_error_level: int = 0 + source: str | None = None + token_usage: dict = {} + v1: VerificationResult | None = None + v2: VerificationResult | None = None + live_tool_result = None + + for attempt in range(max_retries + 1): + # Invoke LLM + if attempt == 0: + current_prompt = prompt + else: + error_feedback = "\n".join(f"- {e}" for e in last_errors) + level_label = f"Level {last_error_level}" if last_error_level else "verification" + current_prompt = ( + f"{prompt}\n\n" + f"## IMPORTANT: Fix These Issues\n\n" + f"Your previous attempt failed {level_label} with these errors:\n{error_feedback}\n\n" + f"Please fix ALL of these issues in your new response." + ) + + try: + response_text, usage = self._invoke_model(current_prompt) + token_usage = usage + except Exception as e: + return GenerationResult( + status=GenerationStatus.FAILED, + tool_name=tool_name, + error=f"LLM invocation failed: {e}", + token_usage=token_usage, + ) + + # Extract code + code, extract_err = extract_code(response_text) + if code is None: + last_errors = [extract_err or "Code extraction failed"] + last_error_level = 0 + if attempt < max_retries: + logger.warning( + "Attempt %d: code extraction failed, retrying", attempt + 1 + ) + continue + return GenerationResult( + status=GenerationStatus.FAILED, + tool_name=tool_name, + error=extract_err, + token_usage=token_usage, + ) + + source = code + + # Level 1 verification + v1 = verify_level1(source, tool_config.category) + if not v1.passed: + last_errors = v1.errors + last_error_level = 1 + if attempt < max_retries: + logger.warning( + "Attempt %d: Level 1 verification failed (%s), retrying", + attempt + 1, + "; ".join(v1.errors), + ) + continue + return GenerationResult( + status=GenerationStatus.VERIFICATION_FAILED, + tool_name=tool_name, + verification=v1, + error=f"Level 1 verification failed: {'; '.join(v1.errors)}", + token_usage=token_usage, + ) + + # Level 1 passed + vprint(f" Level 1 (static) verification passed for '{tool_name}'", flush=True) + + # Level 2 verification (if tool is installed and target provided) + if target is not None: + v2, live_tool_result = verify_level2(source, tool_config.command, target) + if v2.passed: + logger.info("Level 2 (live) verification passed for %s", tool_name) + vprint(f" Level 2 (live) verification passed for '{tool_name}'", flush=True) + break # Both levels passed + else: + last_errors = v2.errors + last_error_level = 2 + if attempt < max_retries: + logger.warning( + "Attempt %d: Level 2 verification failed (%s), retrying", + attempt + 1, + "; ".join(v2.errors), + ) + continue + # Final attempt failed Level 2 — report failure + print( + f" Level 2 verification failed for '{tool_name}' " + f"after {max_retries + 1} attempts: {'; '.join(v2.errors)}", + flush=True, + ) + return GenerationResult( + status=GenerationStatus.VERIFICATION_FAILED, + tool_name=tool_name, + verification=v2, + error=f"Level 2 verification failed: {'; '.join(v2.errors)}", + token_usage=token_usage, + ) + else: + break # No target for Level 2, accept with Level 1 only + + if source is None: + return GenerationResult( + status=GenerationStatus.FAILED, + tool_name=tool_name, + error="No source code generated", + token_usage=token_usage, + ) + + # 8-9. Write and register + try: + wrapper_path = _write_and_register(tool_name, source) + except Exception as e: + return GenerationResult( + status=GenerationStatus.FAILED, + tool_name=tool_name, + error=f"Failed to write/register wrapper: {e}", + token_usage=token_usage, + ) + + return GenerationResult( + status=GenerationStatus.SUCCESS, + tool_name=tool_name, + wrapper_path=wrapper_path, + verification=v2 or v1, + token_usage=token_usage, + tool_result=live_tool_result, + ) diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/SEVERITY_MAPPING.md b/scripts/aidlc-codereview/src/code_reviewer/common/SEVERITY_MAPPING.md new file mode 100644 index 00000000..3afaf721 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/SEVERITY_MAPPING.md @@ -0,0 +1,221 @@ +# Severity Classification Policy + +This document defines the severity classification standard for the AIDLC Code Reviewer. All tool wrappers (Java and Python) must follow this policy. + +--- + +## Core Rule + +**HIGH and CRITICAL are reserved exclusively for security-category findings.** Non-security categories (linting, type safety, complexity, duplication, dead code) must cap at MEDIUM. + +--- + +## 1. Severity Levels + +| Level | When to Use | Examples | +|-------|------------|---------| +| **CRITICAL** | Exploitable security vulnerabilities with direct impact | RCE, SQL injection (high confidence), command injection | +| **HIGH** | Security issues requiring immediate attention | Hardcoded secrets, insecure crypto, SSRF, XSS, deserialization flaws | +| **MEDIUM** | Significant non-security issues, or lower-confidence security findings | Type errors, high complexity (D-F rank), style errors, large duplication, security warnings | +| **LOW** | Minor issues that should be addressed but aren't urgent | Style warnings, small duplication, low-confidence dead code, minor linting | +| **INFO** | Informational, no action required | Low complexity grades (A), notes, ignored findings | + +--- + +## 2. Tool Mappings + +### 2.1 Security Tools (HIGH/CRITICAL allowed) + +#### Semgrep (Java + Python) + +| Tool-Native Severity | Mapped Severity | +|----------------------|-----------------| +| `ERROR` | HIGH | +| `WARNING` | MEDIUM | +| `INFO` | LOW | +| Unknown / default | LOW | + +Consistent across both languages. + +#### Gitleaks (Java + Python) + +| Tool-Native Severity | Mapped Severity | +|----------------------|-----------------| +| All detected secrets | HIGH | + +Consistent across both languages. + +#### Bandit (Python only) + +| Tool-Native Severity | Mapped Severity | +|----------------------|-----------------| +| `HIGH` | HIGH | +| `MEDIUM` | MEDIUM | +| `LOW` | LOW | +| Unknown / default | INFO | + +### 2.2 Linting / Style Tools (capped at MEDIUM) + +#### Checkstyle (Java) + +| Tool-Native Severity | Mapped Severity | +|----------------------|-----------------| +| `error` | MEDIUM | +| `warning` | LOW | +| `info` | INFO | +| `ignore` | INFO | + +#### Ruff (Python) + +| Rule Code Prefix | Mapped Severity | +|-----------------|-----------------| +| `E` (pycodestyle errors) | MEDIUM | +| `F` (pyflakes errors) | MEDIUM | +| `W` (warnings) | LOW | +| `I` (import sorting) | LOW | +| `N` (naming conventions) | LOW | +| All other prefixes | LOW | + +#### PMD — Linting rules (Java) + +| PMD Priority | Mapped Severity | +|-------------|-----------------| +| 1 | MEDIUM | +| 2 | MEDIUM | +| 3 | MEDIUM | +| 4 | LOW | +| 5 | INFO | + +PMD covers linting, complexity, and dead code — all non-security categories. The PMD priority 1-5 scale is compressed to cap at MEDIUM. + +### 2.3 Type Safety Tools (capped at MEDIUM) + +#### javac (Java) + +| Tool-Native Severity | Mapped Severity | +|----------------------|-----------------| +| `error` | MEDIUM | +| `warning` | LOW | + +#### mypy (Python) + +| Tool-Native Severity | Mapped Severity | +|----------------------|-----------------| +| `error` | MEDIUM | +| `warning` | LOW | +| `note` | INFO | + +Consistent across both languages for the type safety category. + +### 2.4 Complexity Tools (capped at MEDIUM) + +#### Radon (Python) + +| Complexity Rank | Mapped Severity | +|----------------|-----------------| +| A (1-5) | INFO | +| B (6-10) | LOW | +| C (11-15) | MEDIUM | +| D (16-20) | MEDIUM | +| E (21-25) | MEDIUM | +| F (26+) | MEDIUM | + +#### PMD Complexity Rules (Java) + +Uses the standard PMD priority mapping (capped at MEDIUM, see 2.2). + +### 2.5 Code Duplication Tools (capped at MEDIUM) + +#### PMD-CPD (Java) + +| Condition | Mapped Severity | +|-----------|-----------------| +| Duplicated block < 30 lines | LOW | +| Duplicated block >= 30 lines | MEDIUM | + +Python duplication tool (jscpd) is not yet implemented. + +### 2.6 Dead Code Tools (capped at MEDIUM) + +#### Vulture (Python) + +| Condition | Mapped Severity | +|-----------|-----------------| +| Confidence >= 80% | MEDIUM | +| Confidence < 80% | LOW | + +#### PMD Dead Code Rules (Java) + +Uses the standard PMD priority mapping (capped at MEDIUM, see 2.2). + +--- + +## 3. Verdict Logic + +Source: `common/report.py` — `_overall_verdict()` + +The overall verdict is determined by counting findings across ALL tools: + +``` +if CRITICAL > 0 OR HIGH >= 5 --> "Critical" +if HIGH > 0 OR MEDIUM >= 10 --> "Needs Attention" +otherwise --> "Good" +``` + +Because HIGH/CRITICAL are reserved for security, this means: +- **"Critical"** can only be triggered by security findings (CRITICAL vulnerabilities or 5+ HIGH security issues) +- **"Needs Attention"** is triggered by any security finding (1+ HIGH) or by 10+ non-security issues at MEDIUM +- **"Good"** means no security findings and fewer than 10 medium-severity non-security issues + +This is intentional — security issues are non-negotiable and immediately escalate the verdict. + +--- + +## 4. Top Findings Selection + +Source: `common/report.py` — `_top_findings()` + +The executive summary "Top Findings" section uses this algorithm: + +1. **Include ALL CRITICAL and HIGH findings** — these are security issues and are not capped or hidden +2. Deduplicate by `rule_id:file` so repeated hits don't dominate +3. Fill remaining slots (up to 5 by default) with MEDIUM findings, round-robin across categories for diversity +4. LOW and INFO findings are not shown in the top findings + +This means: if there are 12 HIGH security findings, all 12 appear in the top findings. Security is non-negotiable and fully visible in the executive summary. + +All findings (including LOW and INFO) are still reported in full in the per-tool sections (Section 3.x of the report). + +--- + +## 5. Quick Reference Table + +| Tool | Language | Category | Native Value | Mapped Severity | +|------|----------|----------|-------------|-----------------| +| semgrep | Both | security | ERROR | HIGH | +| semgrep | Both | security | WARNING | MEDIUM | +| semgrep | Both | security | INFO | LOW | +| gitleaks | Both | security | (all secrets) | HIGH | +| bandit | Python | security | HIGH | HIGH | +| bandit | Python | security | MEDIUM | MEDIUM | +| bandit | Python | security | LOW | LOW | +| checkstyle | Java | linting | error | MEDIUM | +| checkstyle | Java | linting | warning | LOW | +| checkstyle | Java | linting | info | INFO | +| ruff | Python | linting | E/F prefix | MEDIUM | +| ruff | Python | linting | W/I/N prefix | LOW | +| pmd | Java | linting/complexity/dead_code | priority 1-3 | MEDIUM | +| pmd | Java | linting/complexity/dead_code | priority 4 | LOW | +| pmd | Java | linting/complexity/dead_code | priority 5 | INFO | +| javac | Java | type_safety | error | MEDIUM | +| javac | Java | type_safety | warning | LOW | +| mypy | Python | type_safety | error | MEDIUM | +| mypy | Python | type_safety | warning | LOW | +| mypy | Python | type_safety | note | INFO | +| radon | Python | complexity | A | INFO | +| radon | Python | complexity | B | LOW | +| radon | Python | complexity | C-F | MEDIUM | +| pmd-cpd | Java | duplication | < 30 lines | LOW | +| pmd-cpd | Java | duplication | >= 30 lines | MEDIUM | +| vulture | Python | dead_code | >= 80% confidence | MEDIUM | +| vulture | Python | dead_code | < 80% confidence | LOW | diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/__init__.py b/scripts/aidlc-codereview/src/code_reviewer/common/__init__.py new file mode 100644 index 00000000..43f9d656 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/__init__.py @@ -0,0 +1 @@ +"""Common shared modules for AIDLC Code Reviewer.""" diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/cli.py b/scripts/aidlc-codereview/src/code_reviewer/common/cli.py new file mode 100644 index 00000000..ca41f0e9 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/cli.py @@ -0,0 +1,10 @@ +"""CLI entry point for aidlc-code-reviewer.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from code_reviewer.runner import main as _run + + +def main() -> None: + _run() diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/config.py b/scripts/aidlc-codereview/src/code_reviewer/common/config.py new file mode 100644 index 00000000..9a6a4c43 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/config.py @@ -0,0 +1,112 @@ +"""YAML config loader and validation for AIDLC Code Reviewer.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path + +import yaml + + +@dataclass +class ToolConfig: + """Configuration for a single review tool. + + Only 'name' is required. The rest are optional — built-in wrappers + already define their own CATEGORY/TOOL/SUPPORTED_LANGUAGES, and the + agent infers them for generated wrappers. + """ + + name: str + command: str = "" # defaults to name + category: str = "" # read from wrapper module at runtime + output_format: str = "" # hint for agent, optional + args: list[str] = field(default_factory=list) + + def __post_init__(self) -> None: + if not self.command: + self.command = self.name + + +@dataclass +class ReviewConfig: + """Top-level review configuration.""" + + tools: list[ToolConfig] + + +# Path to the default config that ships with the package +_DEFAULT_CONFIG_PATH = Path(__file__).resolve().parent.parent / "review-config.yaml" + +VALID_CATEGORIES = { + "security", "linting", "type_safety", "complexity", "duplication", "dead_code", +} + + +def load_config(config_path: Path | None = None) -> ReviewConfig: + """Load and validate a review config from YAML. + + Config entries can be: + - A string: just the tool name (e.g. "pylint") + - A dict with 'name' required, everything else optional + + Args: + config_path: Path to YAML config file. If None, uses the default + config that ships with the package. + + Returns: + Validated ReviewConfig instance. + + Raises: + FileNotFoundError: If the config file does not exist. + ValueError: If the config is invalid. + """ + path = config_path or _DEFAULT_CONFIG_PATH + if not path.exists(): + raise FileNotFoundError(f"Config file not found: {path}") + + with open(path, encoding="utf-8") as f: + raw = yaml.safe_load(f) + + if not isinstance(raw, dict) or "tools" not in raw: + raise ValueError("Config must contain a 'tools' key with a list of tool definitions.") + + tools: list[ToolConfig] = [] + for i, entry in enumerate(raw["tools"]): + # Simple string entry: just the tool name + if isinstance(entry, str): + tools.append(ToolConfig(name=entry)) + continue + + if not isinstance(entry, dict): + raise ValueError(f"Tool entry {i} must be a string or a mapping.") + + if "name" not in entry: + raise ValueError(f"Tool entry {i} missing required field: 'name'") + + name = str(entry["name"]) + command = str(entry.get("command", "")) + category = str(entry.get("category", "")) + output_format = str(entry.get("output_format", "")) + + if category and category not in VALID_CATEGORIES: + raise ValueError( + f"Tool '{name}': invalid category '{category}'. " + f"Must be one of: {', '.join(sorted(VALID_CATEGORIES))}" + ) + + tools.append(ToolConfig( + name=name, + command=command or name, + category=category, + output_format=output_format, + args=[str(a) for a in entry.get("args", [])], + )) + + if not tools: + raise ValueError("Config must define at least one tool.") + + return ReviewConfig(tools=tools) diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/language_detector.py b/scripts/aidlc-codereview/src/code_reviewer/common/language_detector.py new file mode 100644 index 00000000..9247f222 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/language_detector.py @@ -0,0 +1,59 @@ +"""Language detection by scanning file extensions in a target directory.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from __future__ import annotations + +from pathlib import Path + +EXTENSION_MAP: dict[str, str] = { + ".py": "python", + ".java": "java", + ".js": "javascript", + ".jsx": "javascript", + ".ts": "typescript", + ".tsx": "typescript", + ".go": "go", + ".rb": "ruby", + ".rs": "rust", + ".cpp": "cpp", + ".cc": "cpp", + ".cxx": "cpp", + ".c": "c", + ".h": "c", + ".hpp": "cpp", + ".cs": "csharp", + ".kt": "kotlin", + ".kts": "kotlin", + ".swift": "swift", + ".scala": "scala", + ".php": "php", + ".r": "r", + ".R": "r", + ".sh": "shell", + ".bash": "shell", +} + + +def detect_languages(target: Path) -> set[str]: + """Scan a target path and return the set of detected programming languages. + + If target is a file, detects from that single file's extension. + If target is a directory, recursively scans all files. + """ + detected: set[str] = set() + + if target.is_file(): + lang = EXTENSION_MAP.get(target.suffix.lower()) + if lang: + detected.add(lang) + return detected + + for file_path in target.rglob("*"): + if file_path.is_file(): + lang = EXTENSION_MAP.get(file_path.suffix.lower()) + if lang: + detected.add(lang) + + return detected diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/models.py b/scripts/aidlc-codereview/src/code_reviewer/common/models.py new file mode 100644 index 00000000..d68c45c4 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/models.py @@ -0,0 +1,211 @@ +"""Shared data models for AIDLC Code Reviewer.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from dataclasses import dataclass, field +from enum import Enum +from typing import Optional + + +class Severity(str, Enum): + LOW = "LOW" + MEDIUM = "MEDIUM" + HIGH = "HIGH" + CRITICAL = "CRITICAL" + INFO = "INFO" + + +@dataclass +class Finding: + """A single issue found by a tool.""" + + file: str + line: int + rule_id: str + message: str + severity: Severity + tool: str + category: str # maps to rubric category + column: Optional[int] = None + end_line: Optional[int] = None + end_column: Optional[int] = None + + +@dataclass +class DuplicationBlock: + """A pair of duplicated code blocks.""" + + source_file: str + source_start_line: int + source_end_line: int + target_file: str + target_start_line: int + target_end_line: int + lines: int + tokens: int + + +@dataclass +class ToolResult: + """Standardized output from any tool wrapper.""" + + tool: str + category: str + success: bool + findings: list[Finding] = field(default_factory=list) + duplications: list[DuplicationBlock] = field(default_factory=list) + error: Optional[str] = None + raw_output: Optional[str] = None + + +@dataclass +class SkipRecord: + """Record of a tool that was skipped (no wrapper, not installed, or errored).""" + + tool: str + category: str + reason: str + + +class CriticalCategory(str, Enum): + """Categories of critical code that require human review (technical focus).""" + + COMPUTATION = "COMPUTATION" + CONTROL_FLOW = "CONTROL_FLOW" + DATA_TRANSFORM = "DATA_TRANSFORM" + + +@dataclass +class CriticalFinding: + """A critical code section flagged for human review. + + Designed for scanability: category tag, location, one-line verdict, + the actual code block, and any related tool errors. + """ + + category: CriticalCategory + file: str + start_line: int + end_line: int + verdict: str # one-line human-readable summary + code_block: str # the actual source code + why_critical: str # brief reason this needs human eyes + recommended_action: str = "" # one-line actionable fix suggestion + source: str = "agent_only" # "agent_only" or "tool_assisted" + related_tool_findings: list[Finding] = field(default_factory=list) + highlight_lines: list[int] = field(default_factory=list) # specific problematic lines (absolute) + + +class StructureRating(str, Enum): + """Rating for a code structure dimension.""" + + GOOD = "GOOD" + NEEDS_IMPROVEMENT = "NEEDS_IMPROVEMENT" + POOR = "POOR" + + +@dataclass +class StructureDimension: + """Assessment of one code quality dimension (e.g. logging, scalability).""" + + dimension: str # e.g. "logging", "scalability", "efficiency" + rating: StructureRating + summary: str # one-line assessment + findings: list["StructureIssue"] = field(default_factory=list) + + +@dataclass +class StructureIssue: + """A specific actionable issue within a structure dimension.""" + + file: str + start_line: int + end_line: int + issue: str # one-line description of the problem + recommendation: str # one-line actionable fix + code_block: str = "" # the relevant source code + source: str = "agent_only" # "agent_only" or "tool_assisted" + related_tool_findings: list[Finding] = field(default_factory=list) + highlight_lines: list[int] = field(default_factory=list) # specific problematic lines (absolute) + + +@dataclass +class CodeStructureCritique: + """Full AI-powered code structure critique (Section 2 of the report).""" + + overall_summary: str # 2-3 sentence high-level assessment + dimensions: list[StructureDimension] = field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Business Logic Review models +# --------------------------------------------------------------------------- + + +class BusinessLogicCategory(str, Enum): + """Categories of business logic detected in code.""" + + FINANCIAL_FORMULA = "FINANCIAL_FORMULA" + SCORING_AND_RANKING = "SCORING_AND_RANKING" + PRICING_AND_DISCOUNT = "PRICING_AND_DISCOUNT" + BUSINESS_RULE = "BUSINESS_RULE" + STATE_MACHINE = "STATE_MACHINE" + ROUNDING_AND_PRECISION = "ROUNDING_AND_PRECISION" + BOUNDARY_CONDITION = "BOUNDARY_CONDITION" + DATA_MAPPING = "DATA_MAPPING" + TEMPORAL_LOGIC = "TEMPORAL_LOGIC" + RECONCILIATION = "RECONCILIATION" + + +class ConsistencyIssueType(str, Enum): + """Types of self-consistency issues across business logic.""" + + CONSTANT_DRIFT = "CONSTANT_DRIFT" + LOGIC_DIVERGENCE = "LOGIC_DIVERGENCE" + NAMING_MISMATCH = "NAMING_MISMATCH" + REDUNDANT_IMPLEMENTATION = "REDUNDANT_IMPLEMENTATION" + + +@dataclass +class BusinessLogicFinding: + """A business logic section flagged for human review.""" + + category: BusinessLogicCategory + title: str # short, meaningful name (e.g. "Tax Rate Calculation") + file: str + start_line: int + end_line: int + what_it_does: str # plain-English description a PM can understand + review_guidance: str # what specifically the human should verify + code_block: str # exact source lines + risk_if_wrong: str # business impact if this code has a bug + + +@dataclass +class ConsistencyIssueLocation: + """A location involved in a consistency issue.""" + + file: str + start_line: int + end_line: int + + +@dataclass +class ConsistencyIssue: + """A self-consistency issue between business logic sections.""" + + issue_type: ConsistencyIssueType + description: str + locations: list[ConsistencyIssueLocation] = field(default_factory=list) + code_blocks: list[str] = field(default_factory=list) + recommended_action: str = "" + + +@dataclass +class BusinessLogicReview: + """Full business logic review result (separate report).""" + + executive_summary: str = "" # 2-3 sentence high-level assessment + findings: list[BusinessLogicFinding] = field(default_factory=list) + consistency_issues: list[ConsistencyIssue] = field(default_factory=list) diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/output.py b/scripts/aidlc-codereview/src/code_reviewer/common/output.py new file mode 100644 index 00000000..93c53cea --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/output.py @@ -0,0 +1,26 @@ +"""Shared verbose output helper for AIDLC Code Reviewer.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +_verbose = False + + +def set_verbose(enabled: bool) -> None: + """Enable or disable verbose terminal output.""" + global _verbose + _verbose = enabled + + +def is_verbose() -> bool: + """Return whether verbose mode is active.""" + return _verbose + + +def vprint(*args, **kwargs) -> None: + """Print only when verbose mode is enabled. + + Accepts the same arguments as built-in print(). + """ + if _verbose: + print(*args, **kwargs) diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/report.py b/scripts/aidlc-codereview/src/code_reviewer/common/report.py new file mode 100644 index 00000000..76f1a25e --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/report.py @@ -0,0 +1,1582 @@ +"""Unified report generator for Markdown and HTML output. + +Follows the AIDLC Code Reviewer rubric report structure: + 0. Executive Summary + 1. Critical Code Findings (top of report) + 2. Code Structure Critique (AI-powered) + 3. Code Quality Analysis (3.1-3.8) + 4. Appendix +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from __future__ import annotations + +import html as html_lib +from pathlib import Path + +from code_reviewer.common.models import ( + BusinessLogicCategory, + BusinessLogicReview, + CodeStructureCritique, + ConsistencyIssueType, + CriticalCategory, + CriticalFinding, + Finding, + Severity, + SkipRecord, + StructureRating, + ToolResult, +) + +SEV_ORDER = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO] + +CRITICAL_CATEGORY_LABELS = { + CriticalCategory.COMPUTATION: "COMPUTATION", + CriticalCategory.CONTROL_FLOW: "CONTROL_FLOW", + CriticalCategory.DATA_TRANSFORM: "DATA_TRANSFORM", +} + +CRITICAL_CATEGORY_ICONS = { + CriticalCategory.COMPUTATION: "🔢", + CriticalCategory.CONTROL_FLOW: "🔀", + CriticalCategory.DATA_TRANSFORM: "🔄", +} + +DIMENSION_ICONS = { + "LOGGING": "📋", + "MEASURABILITY": "📊", + "SCALABILITY": "📈", + "EFFICIENCY": "⚡", + "COMPLEXITY": "🧩", + "STRUCTURE": "🏗️", +} + +RATING_LABELS = { + StructureRating.GOOD: ("Good", "good"), + StructureRating.NEEDS_IMPROVEMENT: ("Needs Improvement", "attention"), + StructureRating.POOR: ("Poor", "critical"), +} + +CATEGORY_LABELS = { + "security": "Security", + "linting": "Linting / Style Conformance", + "type_safety": "Type Safety", + "complexity": "Complexity", + "duplication": "Code Duplication", + "dead_code": "Dead Code", +} + +# Which rubric section number each category maps to +CATEGORY_NUMBERS = { + "security": "3.1", + "linting": "3.3", + "type_safety": "3.4", + "complexity": "3.5", + "duplication": "3.6", + "dead_code": "3.7", +} + +BUSINESS_LOGIC_CATEGORY_LABELS = { + BusinessLogicCategory.FINANCIAL_FORMULA: "Financial Formula", + BusinessLogicCategory.SCORING_AND_RANKING: "Scoring & Ranking", + BusinessLogicCategory.PRICING_AND_DISCOUNT: "Pricing & Discount", + BusinessLogicCategory.BUSINESS_RULE: "Business Rule", + BusinessLogicCategory.STATE_MACHINE: "State Machine", + BusinessLogicCategory.ROUNDING_AND_PRECISION: "Rounding & Precision", + BusinessLogicCategory.BOUNDARY_CONDITION: "Boundary Condition", + BusinessLogicCategory.DATA_MAPPING: "Data Mapping", + BusinessLogicCategory.TEMPORAL_LOGIC: "Temporal Logic", + BusinessLogicCategory.RECONCILIATION: "Reconciliation", +} + +BUSINESS_LOGIC_CATEGORY_ICONS = { + BusinessLogicCategory.FINANCIAL_FORMULA: "💰", + BusinessLogicCategory.SCORING_AND_RANKING: "📊", + BusinessLogicCategory.PRICING_AND_DISCOUNT: "🏷️", + BusinessLogicCategory.BUSINESS_RULE: "📋", + BusinessLogicCategory.STATE_MACHINE: "🔄", + BusinessLogicCategory.ROUNDING_AND_PRECISION: "🔢", + BusinessLogicCategory.BOUNDARY_CONDITION: "🚧", + BusinessLogicCategory.DATA_MAPPING: "🗺️", + BusinessLogicCategory.TEMPORAL_LOGIC: "⏰", + BusinessLogicCategory.RECONCILIATION: "⚖️", +} + +CONSISTENCY_ISSUE_LABELS = { + ConsistencyIssueType.CONSTANT_DRIFT: "Constant Drift", + ConsistencyIssueType.LOGIC_DIVERGENCE: "Logic Divergence", + ConsistencyIssueType.NAMING_MISMATCH: "Naming Mismatch", + ConsistencyIssueType.REDUNDANT_IMPLEMENTATION: "Redundant Implementation", +} + +CONSISTENCY_ISSUE_ICONS = { + ConsistencyIssueType.CONSTANT_DRIFT: "📌", + ConsistencyIssueType.LOGIC_DIVERGENCE: "🔀", + ConsistencyIssueType.NAMING_MISMATCH: "🏷️", + ConsistencyIssueType.REDUNDANT_IMPLEMENTATION: "♻️", +} + +# Tools that map to the secrets sub-section (3.2) +SECRETS_TOOLS = {"gitleaks"} + + +def _render_code_block_md(code_block: str, start_line: int, highlight_lines: list[int]) -> list[str]: + """Render a code block in markdown with highlighted lines marked with '>>>'.""" + lines: list[str] = [] + highlight_set = set(highlight_lines) + code_lines = code_block.split("\n") + lines.append("```") + for i, code_line in enumerate(code_lines): + line_num = start_line + i + if highlight_set and line_num in highlight_set: + lines.append(f">>> {code_line}") + else: + lines.append(f" {code_line}") + lines.append("```") + return lines + + +def _count_by_severity(findings: list[Finding]) -> dict[str, int]: + counts: dict[str, int] = {} + for f in findings: + counts[f.severity.value] = counts.get(f.severity.value, 0) + 1 + return counts + + +def _severity_sort_key(f: Finding) -> int: + order = {Severity.CRITICAL: 0, Severity.HIGH: 1, Severity.MEDIUM: 2, Severity.LOW: 3, Severity.INFO: 4} + return order.get(f.severity, 5) + + +def _overall_verdict(findings: list[Finding]) -> str: + by_sev = _count_by_severity(findings) + crit = by_sev.get("CRITICAL", 0) + high = by_sev.get("HIGH", 0) + med = by_sev.get("MEDIUM", 0) + if crit > 0 or high >= 5: + return "Critical" + if high > 0 or med >= 10: + return "Needs Attention" + return "Good" + + +def _action_summary( + by_sev: dict[str, int], + verdict: str, + critical_findings: list[CriticalFinding] | None, + all_findings: list[Finding], +) -> str: + """Build a short, action-oriented summary telling the reader what to do.""" + parts: list[str] = [] + crit = by_sev.get("CRITICAL", 0) + high = by_sev.get("HIGH", 0) + med = by_sev.get("MEDIUM", 0) + _low = by_sev.get("LOW", 0) + by_sev.get("INFO", 0) + n_critical_sections = len(critical_findings) if critical_findings else 0 + + if verdict == "Good": + parts.append("No urgent issues found.") + if med: + parts.append(f"Review {med} medium-severity findings when convenient.") + return " ".join(parts) + + # Urgent items + urgent: list[str] = [] + if crit: + urgent.append(f"{crit} CRITICAL") + if high: + urgent.append(f"{high} HIGH") + if urgent: + parts.append(f"{' and '.join(urgent)}-severity findings require immediate action.") + + if n_critical_sections: + parts.append( + f"{n_critical_sections} code section{'s' if n_critical_sections != 1 else ''}" + f" flagged for human review — see Critical Code Findings below." + ) + + if med: + parts.append(f"Address {med} MEDIUM findings during regular development.") + + return " ".join(parts) + + +def _short_path(filepath: str) -> str: + """Return just the filename from a potentially absolute path.""" + return Path(filepath).name + + +def _short_rule(rule_id: str) -> str: + """Shorten dotted rule IDs like 'java.lang.security.audit.foo.bar' → 'bar'.""" + return rule_id.rsplit(".", 1)[-1] if "." in rule_id else rule_id + + + +def _top_findings(findings: list[Finding]) -> list[Finding]: + """Return CRITICAL and HIGH findings for the executive summary. + + Deduplicated by ``rule_id`` so the same rule across multiple files + appears only once (the first occurrence by severity order). + """ + sorted_f = sorted(findings, key=_severity_sort_key) + seen_rules: set[str] = set() + top: list[Finding] = [] + for f in sorted_f: + if f.severity not in (Severity.CRITICAL, Severity.HIGH): + break + if f.rule_id in seen_rules: + continue + seen_rules.add(f.rule_id) + top.append(f) + return top + + +# --------------------------------------------------------------------------- +# Markdown report +# --------------------------------------------------------------------------- + +def generate_markdown( + target: Path, + results: list[ToolResult], + skip_records: list[SkipRecord], + timestamp: str, + detected_languages: set[str], + critical_findings: list[CriticalFinding] | None = None, + code_structure_critique: CodeStructureCritique | None = None, +) -> str: + all_findings = [f for r in results for f in r.findings] + by_sev = _count_by_severity(all_findings) + verdict = _overall_verdict(all_findings) + top = _top_findings(all_findings) + lang_display = ", ".join(sorted(lang.title() for lang in detected_languages)) or "Unknown" + + lines: list[str] = [] + + # Header + lines.append("# AIDLC Code Reviewer — Analysis Report") + lines.append("") + lines.append(f"**Generated**: {timestamp} ") + lines.append(f"**Target**: `{target.resolve()}` ") + lines.append(f"**Detected languages**: {lang_display} ") + lines.append(f"**Total findings**: {len(all_findings)} ") + lines.append(f"**Overall verdict**: **{verdict}**") + lines.append("") + + # --- 1. Executive Summary --- + lines.append("## 1. Executive Summary") + lines.append("") + sev_parts = ", ".join(f"{by_sev[s]} {s}" for s in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] if by_sev.get(s)) + lines.append(f"**{verdict}** — {len(all_findings)} findings ({sev_parts})") + lines.append("") + action = _action_summary(by_sev, verdict, critical_findings, all_findings) + lines.append(f"> {action}") + lines.append("") + if top: + for i, f in enumerate(top, 1): + lines.append( + f"{i}. **[{f.severity.value}]** `{_short_rule(f.rule_id)}` in `{_short_path(f.file)}`" + f" — {f.message}" + ) + lines.append("") + + # --- Tool Summary --- + lines.append("### Tool Summary") + lines.append("") + lines.append("| Category | Tool | Status | Findings |") + lines.append("|----------|------|--------|----------|") + for r in results: + cat = CATEGORY_LABELS.get(r.category, r.category) + status = "✓ Ran" if r.success else "✗ Error" + lines.append(f"| {cat} | {r.tool} | {status} | {len(r.findings)} |") + for sk in skip_records: + cat = CATEGORY_LABELS.get(sk.category, sk.category) + lines.append(f"| {cat} | {sk.tool} | skipped | — |") + lines.append("") + + # --- Critical Code Findings --- + lines.append("## Critical Code Findings — Review Required") + lines.append("") + if critical_findings: + lines.append(f"**{len(critical_findings)}** critical code sections identified for human review.") + lines.append("") + for i, cf in enumerate(critical_findings, 1): + icon = CRITICAL_CATEGORY_ICONS.get(cf.category, "⚠️") + label = CRITICAL_CATEGORY_LABELS.get(cf.category, cf.category.value) + source_badge = "\U0001f916 Agent-identified" if cf.source == "agent_only" else "\U0001f527 Tool-assisted" + lines.append(f"### {i}. {icon} [{label}] `{cf.file}`:{cf.start_line}-{cf.end_line} {source_badge}") + lines.append("") + lines.append(f"**Finding**: {cf.verdict} ") + if cf.recommended_action: + lines.append(f"**Action**: {cf.recommended_action} ") + lines.append("") + if cf.why_critical: + lines.append("**Why it matters**:") + lines.append("") + lines.append(cf.why_critical) + lines.append("") + if cf.related_tool_findings: + lines.append(f"**Related Tool Findings ({len(cf.related_tool_findings)})**:") + lines.append("") + for tf in cf.related_tool_findings: + lines.append(f"- **{tf.tool}** `{tf.rule_id}` [{tf.severity.value}] — {tf.message}") + lines.append("") + lines.append("**Code**:") + lines.append("") + lines.extend(_render_code_block_md(cf.code_block, cf.start_line, cf.highlight_lines)) + lines.append("") + else: + lines.append("No critical code sections identified.") + lines.append("") + + # --- 2. Code Structure Critique --- + lines.append("## 2. Code Structure Critique") + lines.append("") + if code_structure_critique: + lines.append(code_structure_critique.overall_summary) + lines.append("") + # Dimension summary table + lines.append("| Dimension | Rating | Summary |") + lines.append("|-----------|--------|---------|") + for dim in code_structure_critique.dimensions: + icon = DIMENSION_ICONS.get(dim.dimension, "📌") + rating_label, _ = RATING_LABELS.get(dim.rating, (dim.rating.value, "")) + lines.append(f"| {icon} {dim.dimension} | {rating_label} | {dim.summary} |") + lines.append("") + # Detailed findings per dimension + for dim in code_structure_critique.dimensions: + if not dim.findings: + continue + icon = DIMENSION_ICONS.get(dim.dimension, "📌") + rating_label, _ = RATING_LABELS.get(dim.rating, (dim.rating.value, "")) + lines.append(f"### 2.x {icon} {dim.dimension} — {rating_label}") + lines.append("") + lines.append(dim.summary) + lines.append("") + for j, issue in enumerate(dim.findings, 1): + source_badge = "\U0001f916 Agent" if issue.source == "agent_only" else "\U0001f527 Tool-assisted" + lines.append(f"**{j}.** `{issue.file}`:{issue.start_line}-{issue.end_line} {source_badge}") + lines.append(f" - **Issue**: {issue.issue}") + lines.append(f" - **Fix**: {issue.recommendation}") + if issue.related_tool_findings: + lines.append(f" **Related Tool Findings ({len(issue.related_tool_findings)})**:") + lines.append("") + for tf in issue.related_tool_findings: + lines.append(f" - **{tf.tool}** `{tf.rule_id}` [{tf.severity.value}] — {tf.message}") + lines.append("") + if issue.code_block: + lines.append(" **Code**:") + lines.append("") + for cl in _render_code_block_md(issue.code_block, issue.start_line, issue.highlight_lines): + lines.append(f" {cl}") + lines.append("") + lines.append("") + else: + lines.append("*Code structure critique not available.*") + lines.append("") + + # --- 3. Code Quality Analysis --- + lines.append("## 3. Code Quality Analysis") + lines.append("") + + # Only iterate over results (tools that actually ran) + for r in results: + if r.tool in SECRETS_TOOLS: + section = "3.2" + label = "Secrets and Credentials" + else: + section = CATEGORY_NUMBERS.get(r.category, "3.x") + label = CATEGORY_LABELS.get(r.category, r.category) + + lines.append(f"### {section} {label} ({r.tool})") + lines.append("") + + if not r.success: + lines.append(f"> **Error**: {r.error}") + lines.append("") + continue + + if not r.findings: + lines.append("No findings.") + lines.append("") + continue + + r_sev = _count_by_severity(r.findings) + lines.append(f"**Findings**: {len(r.findings)}") + sev_parts = [f"{s}: {r_sev[s]}" for s in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] if r_sev.get(s)] + if sev_parts: + lines.append(f" ({', '.join(sev_parts)})") + lines.append("") + + lines.append("| # | Severity | Rule | File | Line | Message |") + lines.append("|---|----------|------|------|------|---------|") + sorted_findings = sorted(r.findings, key=_severity_sort_key) + for i, f in enumerate(sorted_findings, 1): + loc = str(f.line) + if f.column is not None: + loc += f":{f.column}" + msg = f.message.replace("|", "\\|") + lines.append(f"| {i} | {f.severity.value} | `{f.rule_id}` | `{f.file}` | {loc} | {msg} |") + lines.append("") + + # --- 5. Appendix --- + lines.append("## 5. Appendix") + lines.append("") + lines.append(f"**Timestamp**: {timestamp} ") + lines.append(f"**Target path**: `{target.resolve()}` ") + lines.append("") + + lines.append("### Files Analyzed") + lines.append("") + all_files = sorted({f.file for r in results for f in r.findings}) + if all_files: + for fp in all_files: + lines.append(f"- `{fp}`") + else: + lines.append("No files with findings.") + lines.append("") + + lines.append("### Tool Versions") + lines.append("") + lines.append("| Tool | Category |") + lines.append("|------|----------|") + for r in results: + cat = CATEGORY_LABELS.get(r.category, r.category) + lines.append(f"| {r.tool} | {cat} |") + lines.append("") + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# HTML report +# --------------------------------------------------------------------------- + +_THIRD_PARTY_MARKERS = ("node_modules/", "vendor/", "third_party/", ".venv/", "site-packages/") + + +def _is_third_party(filepath: str) -> bool: + """Return True if the file path looks like a third-party dependency.""" + return any(marker in filepath for marker in _THIRD_PARTY_MARKERS) + + +def _format_timestamp_human(iso_ts: str) -> str: + """Convert '2026-03-20T16:27:54Z' → '03/20/2026, 4:27 PM UTC'.""" + from datetime import datetime + try: + dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00")) + return dt.strftime("%m/%d/%Y, %-I:%M %p UTC") + except (ValueError, AttributeError): + return iso_ts + + +_CSS = """ +:root { + --bg: #1a1b26; --bg-surface: #24283b; --bg-card: #1f2335; + --text: #c0caf5; --text-dim: #6b7394; --text-bright: #e0e6ff; + --border: #3b4261; --accent: #7aa2f7; + --sev-critical: #f7768e; --sev-high: #ff9e64; --sev-medium: #e0af68; + --sev-low: #9ece6a; --sev-info: #7dcfff; + --good: #9ece6a; --attention: #e0af68; --critical: #f7768e; +} +* { box-sizing: border-box; margin: 0; padding: 0; } +body { + font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', monospace; + background: var(--bg); color: var(--text); + line-height: 1.6; padding: 2rem; max-width: 1200px; margin: 0 auto; +} +h1 { color: var(--accent); font-size: 1.5rem; margin-bottom: 0.25rem; } +h2 { color: var(--text-bright); font-size: 1.2rem; margin: 2rem 0 0.75rem; border-bottom: 1px solid var(--border); padding-bottom: 0.4rem; } +h3 { color: var(--accent); font-size: 1rem; margin: 1.5rem 0 0.5rem; } +.meta { color: var(--text-dim); font-size: 0.85rem; margin-bottom: 1.5rem; } +.meta span { margin-right: 2rem; } +.verdict { display: inline-block; padding: 0.2rem 0.8rem; border-radius: 4px; font-weight: bold; font-size: 0.9rem; } +.verdict-good { background: var(--good); color: #1a1b26; } +.verdict-attention { background: var(--attention); color: #1a1b26; } +.verdict-critical { background: var(--critical); color: #1a1b26; } +.sev-badge { display: inline-block; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.75rem; font-weight: bold; } +.sev-CRITICAL { background: var(--sev-critical); color: #1a1b26; } +.sev-HIGH { background: var(--sev-high); color: #1a1b26; } +.sev-MEDIUM { background: var(--sev-medium); color: #1a1b26; } +.sev-LOW { background: var(--sev-low); color: #1a1b26; } +.sev-INFO { background: var(--sev-info); color: #1a1b26; } +.status-pass { background: #9ece6a; color: #1a1b26; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.75rem; } +.status-fail { background: #f7768e; color: #1a1b26; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.75rem; } +.status-skipped { background: #565f89; color: #c0caf5; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.75rem; } +table { width: 100%; border-collapse: collapse; margin: 0.75rem 0; font-size: 0.85rem; } +th { background: var(--bg-surface); color: var(--text-bright); text-align: left; padding: 0.5rem 0.75rem; border: 1px solid var(--border); } +td { padding: 0.4rem 0.75rem; border: 1px solid var(--border); vertical-align: top; word-break: break-word; } +td:last-child { max-width: 400px; } +tr:nth-child(even) td { background: var(--bg-card); } +tr:hover td { background: var(--bg-surface); } +.card { background: var(--bg-card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; margin: 0.75rem 0; } +.stats { display: flex; gap: 1.5rem; flex-wrap: wrap; margin: 0.75rem 0; } +.stat { text-align: center; } +.stat a { text-decoration: none; color: inherit; } +.stat a:hover .stat-value { text-decoration: underline; } +.stat-value { font-size: 1.5rem; font-weight: bold; color: var(--text-bright); } +.stat-label { font-size: 0.75rem; color: var(--text-dim); } +.placeholder { color: var(--text-dim); font-style: italic; padding: 1rem; border: 1px dashed var(--border); border-radius: 4px; margin: 0.5rem 0; } +code { background: var(--bg-surface); padding: 0.15rem 0.4rem; border-radius: 3px; font-size: 0.85rem; } +td code.file-ref { background: var(--bg-surface); border: 1px solid var(--accent); border-radius: 4px; padding: 0.15rem 0.5rem; color: var(--accent); font-weight: bold; } +td .line-ref { background: var(--bg-surface); border: 1px solid var(--sev-medium); border-radius: 4px; padding: 0.15rem 0.5rem; color: var(--sev-medium); font-weight: bold; font-size: 0.8rem; display: inline-block; } +.top-finding { margin: 0.4rem 0; } +.file-list { columns: 2; column-gap: 2rem; } +.file-list li { font-size: 0.8rem; color: var(--text-dim); margin: 0.15rem 0; list-style: none; } +.critical-section { background: rgba(247, 118, 142, 0.07); border-left: 4px solid var(--sev-critical); border-radius: 0 6px 6px 0; padding: 1rem 1.25rem; margin: 0.75rem 0; } +.critical-section h4 { color: var(--sev-critical); margin-bottom: 0.25rem; font-size: 0.95rem; } +.critical-file { font-size: 0.8rem; margin-bottom: 0.5rem; background: var(--bg-surface); border: 1px solid var(--accent); border-radius: 4px; padding: 0.25rem 0.6rem; display: inline-block; color: var(--accent); font-weight: bold; } +.critical-file code { background: none; padding: 0; color: var(--accent); } +.critical-meta { font-size: 0.85rem; margin: 0.25rem 0; } +.critical-meta strong { color: var(--text-bright); } +.critical-tool { font-size: 0.8rem; color: var(--text-dim); margin: 0.2rem 0 0.2rem 1rem; } +.critical-code { margin-top: 0.5rem; } +.critical-code summary { cursor: pointer; color: var(--accent); font-size: 0.85rem; } +.critical-code pre { background: var(--bg); border: 1px solid var(--border); border-radius: 4px; padding: 0.75rem; margin-top: 0.5rem; font-size: 0.8rem; overflow-x: auto; white-space: pre-wrap; } +.highlight-line { background: rgba(255, 50, 50, 0.25); display: block; margin: 0 -0.75rem; padding: 0 0.75rem; border-left: 3px solid #ff5050; } +.cat-badge { display: inline-block; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.7rem; font-weight: bold; margin-right: 0.5rem; } +.cat-COMPUTATION { background: #bb9af7; color: #1a1b26; } +.cat-CONTROL_FLOW { background: #7aa2f7; color: #1a1b26; } +.cat-DATA_TRANSFORM { background: #2ac3de; color: #1a1b26; } +.source-badge { display: inline-block; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.7rem; font-weight: bold; margin-left: 0.5rem; vertical-align: middle; } +.source-agent { background: #bb9af7; color: #1a1b26; } +.source-tool { background: #ff9e64; color: #1a1b26; } +.critical-tools { margin: 0.5rem 0; } +.critical-tools summary { cursor: pointer; color: var(--accent); font-size: 0.85rem; } +.dim-section { background: var(--bg-card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; margin: 0.75rem 0; } +.dim-section > details > summary { cursor: pointer; list-style: none; } +.dim-section > details > summary::-webkit-details-marker { display: none; } +.dim-header { display: flex; align-items: center; gap: 0.75rem; margin-bottom: 0; } +.dim-header h4 { color: var(--accent); font-size: 0.95rem; margin: 0; } +.dim-summary-text { font-size: 0.85rem; margin: 0.25rem 0 0; color: var(--text-dim); } +.dim-issues { margin-top: 0.75rem; } +.rating-badge { display: inline-block; padding: 0.15rem 0.6rem; border-radius: 3px; font-size: 0.7rem; font-weight: bold; } +.rating-good { background: var(--good); color: #1a1b26; } +.rating-attention { background: var(--attention); color: #1a1b26; } +.rating-critical { background: var(--critical); color: #1a1b26; } +.dim-issue { border-left: 3px solid var(--border); padding: 0.5rem 0.75rem; margin: 0.5rem 0; font-size: 0.85rem; } +.dim-issue-meta { font-size: 0.8rem; } +.dim-issue-meta code { background: var(--bg-surface); border: 1px solid var(--accent); border-radius: 4px; padding: 0.15rem 0.5rem; color: var(--accent); font-weight: bold; } +.dim-issue-fix { color: var(--good); font-size: 0.8rem; } +.back-to-top { text-align: right; margin: 0.5rem 0; } +.back-to-top a { color: var(--text-dim); font-size: 0.75rem; text-decoration: none; } +.back-to-top a:hover { color: var(--accent); } +.toc { background: var(--bg-card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; margin: 1rem 0; } +.toc summary { cursor: pointer; color: var(--accent); font-size: 0.95rem; font-weight: bold; } +.toc ul { list-style: none; margin: 0.5rem 0 0 0; padding: 0; } +.toc li { margin: 0.3rem 0; } +.toc a { color: var(--text); text-decoration: none; font-size: 0.85rem; } +.toc a:hover { color: var(--accent); } +.tool-findings-section summary { cursor: pointer; color: var(--accent); font-size: 0.95rem; } +.tool-findings-section { margin: 0.75rem 0; } +.third-party-badge { display: inline-block; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.65rem; font-weight: bold; background: #565f89; color: #c0caf5; margin-left: 0.5rem; vertical-align: middle; } +.skipped-section summary { cursor: pointer; color: var(--text-dim); font-size: 0.85rem; } +.skipped-section { margin: 0.75rem 0; } +.legend { background: var(--bg-card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; margin: 0.75rem 0; } +.legend summary { cursor: pointer; color: var(--accent); font-size: 0.9rem; font-weight: bold; } +.legend-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 0.75rem; margin-top: 0.75rem; } +.legend-group h4 { color: var(--text-bright); font-size: 0.8rem; margin-bottom: 0.4rem; text-transform: uppercase; letter-spacing: 0.05em; } +.legend-item { display: flex; align-items: center; gap: 0.5rem; font-size: 0.8rem; color: var(--text-dim); margin: 0.3rem 0; } +.legend-item .sev-badge, .legend-item .cat-badge, .legend-item .source-badge, +.legend-item .rating-badge, .legend-item .third-party-badge, .legend-item .verdict, +.legend-item .status-pass, .legend-item .status-fail, .legend-item .status-skipped { margin: 0; } +[title] { cursor: help; } +""" + + +def _esc(text: str) -> str: + return html_lib.escape(str(text)) + + +# Tooltip descriptions for badges +_BADGE_TIPS = { + "source-agent": "Found by AI analysis without tool confirmation", + "source-tool": "Confirmed or assisted by a static analysis tool", + "cat-COMPUTATION": "Cryptographic, precision, or concurrency-sensitive computation", + "cat-CONTROL_FLOW": "Auth gates, error handling, or security-sensitive control flow", + "cat-DATA_TRANSFORM": "Data conversion or mapping requiring human verification", + "rating-good": "Meets quality standards", + "rating-attention": "Has addressable issues", + "rating-critical": "Requires significant rework", + "third-party": "File from an external dependency (node_modules, vendor, etc.)", +} + + +def _verdict_class(verdict: str) -> str: + return {"Good": "verdict-good", "Needs Attention": "verdict-attention", "Critical": "verdict-critical"}.get(verdict, "") + + +def _render_code_block_html(code_block: str, start_line: int, highlight_lines: list[int]) -> str: + """Render a code block as HTML
 with highlighted lines in red."""
+    highlight_set = set(highlight_lines)
+    if not highlight_set:
+        return f"
{_esc(code_block)}
" + code_lines = code_block.split("\n") + parts: list[str] = [] + parts.append("
")
+    for i, code_line in enumerate(code_lines):
+        line_num = start_line + i
+        escaped = _esc(code_line)
+        if line_num in highlight_set:
+            parts.append(f'{escaped}')
+        else:
+            parts.append(escaped)
+    parts.append("
") + return "\n".join(parts) + + + +def generate_html( + target: Path, + results: list[ToolResult], + skip_records: list[SkipRecord], + timestamp: str, + detected_languages: set[str], + critical_findings: list[CriticalFinding] | None = None, + code_structure_critique: CodeStructureCritique | None = None, + summary_filename: str | None = None, + sibling_report: tuple[str, str] | None = None, +) -> str: + all_findings = [f for r in results for f in r.findings] + by_sev = _count_by_severity(all_findings) + verdict = _overall_verdict(all_findings) + top = _top_findings(all_findings) + lang_display = ", ".join(sorted(lang.title() for lang in detected_languages)) or "Unknown" + human_ts = _format_timestamp_human(timestamp) + + h: list[str] = [] + h.append("") + h.append('') + h.append('') + h.append(f"AIDLC Code Review \u2014 {_esc(human_ts)}") + h.append(f"") + h.append("") + + # Header + h.append('
') + h.append("

AIDLC Code Reviewer \u2014 Analysis Report

") + h.append('
') + h.append(f"Generated: {_esc(human_ts)}") + h.append(f"Target: {_esc(str(target.resolve()))}") + h.append(f"Detected languages: {_esc(lang_display)}") + h.append("
") + + if summary_filename: + h.append(f'') + + # --- Table of Contents --- + h.append('
') + h.append("Table of Contents") + h.append("") + h.append("
") + + # --- Badge Legend --- + h.append('
') + h.append("\U0001f3f7\ufe0f Badge Legend") + h.append('
') + # Severity + h.append('

Severity

') + h.append('
CRITICAL Immediate fix required
') + h.append('
HIGH Significant issue, fix soon
') + h.append('
MEDIUM Moderate concern
') + h.append('
LOW Minor improvement
') + h.append('
INFO Informational note
') + h.append("
") + # Critical categories + h.append('

Critical Categories

') + h.append('
\U0001f522 COMPUTATION Crypto / precision / concurrency
') + h.append('
\U0001f500 CONTROL_FLOW Auth / error handling / security flow
') + h.append('
\U0001f504 DATA_TRANSFORM Data conversion / mapping
') + h.append("
") + # Source + h.append('

Finding Source

') + h.append('
\U0001f916 Agent-identified Found by AI analysis
') + h.append('
\U0001f527 Tool-assisted Confirmed by static analysis tool
') + h.append("
") + # Ratings + h.append('

Structure Ratings

') + h.append('
Good Meets quality standards
') + h.append('
Needs Improvement Has addressable issues
') + h.append('
Poor Requires significant rework
') + h.append("
") + # Status & other + h.append('

Tool Status

') + h.append('
\u2713 Ran Tool executed successfully
') + h.append('
\u2717 Error Tool encountered an error
') + h.append('
3rd party File from external dependency
') + h.append("
") + h.append("
") # legend-grid + h.append("
") + + # --- 1. Executive Summary --- + h.append('

1. Executive Summary

') + h.append(f'

{_esc(verdict)}

') + action = _action_summary(by_sev, verdict, critical_findings, all_findings) + h.append(f'

{_esc(action)}

') + + # Stats row - linked to sections + h.append('
') + h.append( + f'' + ) + for sev in SEV_ORDER: + count = by_sev.get(sev.value, 0) + if count: + h.append( + f'' + ) + h.append("
") + + # Top findings (CRITICAL + HIGH only) + if top: + for i, f in enumerate(top, 1): + third_party = ' 3rd party' if _is_third_party(f.file) else "" + h.append( + f'
{i}. {f.severity.value} ' + f"{_esc(_short_rule(f.rule_id))} in {_esc(_short_path(f.file))}" + f" \u2014 {_esc(f.message)}{third_party}
" + ) + + # Tool summary table - active tools only + h.append("

Tool Summary

") + h.append("") + for r in results: + cat = _esc(CATEGORY_LABELS.get(r.category, r.category)) + status_text = "\u2713 Ran" if r.success else "\u2717 Error" + status_cls = "status-pass" if r.success else "status-fail" + status_tip = "Tool executed successfully" if r.success else "Tool encountered an error" + h.append( + f'' + f"" + ) + h.append("
CategoryToolStatusFindings
{cat}{_esc(r.tool)}{status_text}{len(r.findings)}
") + + # Skipped tools - collapsible + if skip_records: + h.append('
') + h.append(f"Skipped tools ({len(skip_records)})") + h.append("") + for sk in skip_records: + cat = _esc(CATEGORY_LABELS.get(sk.category, sk.category)) + h.append(f"") + h.append("
CategoryToolReason
{cat}{_esc(sk.tool)}{_esc(sk.reason)}
") + h.append("
") + + h.append('') + + # --- Critical Code Findings --- + h.append('

\u26a0 Critical Code Findings \u2014 Review Required

') + if critical_findings: + h.append(f'

{len(critical_findings)} critical code sections identified for human review.

') + for i, cf in enumerate(critical_findings, 1): + label = CRITICAL_CATEGORY_LABELS.get(cf.category, cf.category.value) + source_icon = "\U0001f916" if cf.source == "agent_only" else "\U0001f527" + source_label = "Agent-identified" if cf.source == "agent_only" else "Tool-assisted" + source_cls = "source-agent" if cf.source == "agent_only" else "source-tool" + cat_tip = _esc(_BADGE_TIPS.get(f"cat-{label}", label)) + src_tip = _esc(_BADGE_TIPS.get(source_cls, source_label)) + h.append('
') + h.append( + f'

{i}. {label} ' + f'{source_icon} {source_label}

' + ) + h.append(f'
{_esc(cf.file)}:{cf.start_line}-{cf.end_line}
') + h.append(f'
Finding: {_esc(cf.verdict)}
') + if cf.recommended_action: + h.append(f'
Action: {_esc(cf.recommended_action)}
') + h.append('
Why it matters') + h.append(f'
{_esc(cf.why_critical)}
') + h.append("
") + if cf.related_tool_findings: + h.append(f'
Related Tool Findings ({len(cf.related_tool_findings)})') + for tf in cf.related_tool_findings: + h.append( + f'
\U0001f527 {_esc(tf.tool)} {_esc(tf.rule_id)} ' + f'{tf.severity.value} \u2014 {_esc(tf.message)}
' + ) + h.append("
") + h.append('
View Code') + h.append(_render_code_block_html(cf.code_block, cf.start_line, cf.highlight_lines)) + h.append("
") + h.append("
") + else: + h.append('
No critical code sections identified.
') + h.append('') + + # --- 2. Code Structure Critique --- + h.append('

2. Code Structure Critique

') + if code_structure_critique: + h.append(f"

{_esc(code_structure_critique.overall_summary)}

") + # Dimension summary table + h.append("") + for dim in code_structure_critique.dimensions: + icon = DIMENSION_ICONS.get(dim.dimension, "\U0001f4cc") + rating_label, rating_cls = RATING_LABELS.get(dim.rating, (dim.rating.value, "")) + r_tip = _esc(_BADGE_TIPS.get(f"rating-{rating_cls}", rating_label)) + h.append( + f"" + f'' + f"" + ) + h.append("
DimensionRatingSummary
{icon} {_esc(dim.dimension)}{_esc(rating_label)}{_esc(dim.summary)}
") + # Detailed findings per dimension - collapsible + for dim in code_structure_critique.dimensions: + if not dim.findings: + continue + icon = DIMENSION_ICONS.get(dim.dimension, "\U0001f4cc") + rating_label, rating_cls = RATING_LABELS.get(dim.rating, (dim.rating.value, "")) + h.append('
') + h.append("
") + h.append("") + r_tip = _esc(_BADGE_TIPS.get(f"rating-{rating_cls}", rating_label)) + h.append( + f'

{icon} {_esc(dim.dimension)}

' + f'{_esc(rating_label)}' + f'' + f"{len(dim.findings)} issues
" + ) + h.append(f'

{_esc(dim.summary)}

') + h.append("
") + h.append('
') + for j, issue in enumerate(dim.findings, 1): + h.append('
') + source_icon = "\U0001f916" if issue.source == "agent_only" else "\U0001f527" + source_label = "Agent" if issue.source == "agent_only" else "Tool-assisted" + source_cls = "source-agent" if issue.source == "agent_only" else "source-tool" + src_tip = _esc(_BADGE_TIPS.get(source_cls, source_label)) + h.append( + f'
{j}. ' + f"{_esc(issue.file)}:{issue.start_line}-{issue.end_line} " + f'{source_icon} {source_label}
' + ) + h.append(f"
Issue: {_esc(issue.issue)}
") + h.append(f'
Fix: {_esc(issue.recommendation)}
') + if issue.related_tool_findings: + h.append(f'
Related Tool Findings ({len(issue.related_tool_findings)})') + for tf in issue.related_tool_findings: + h.append( + f'
\U0001f527 {_esc(tf.tool)} {_esc(tf.rule_id)} ' + f'{tf.severity.value} \u2014 {_esc(tf.message)}
' + ) + h.append("
") + if issue.code_block: + h.append('
View Code') + h.append(_render_code_block_html(issue.code_block, issue.start_line, issue.highlight_lines)) + h.append("
") + h.append("
") + h.append("
") + h.append("
") + h.append("
") + else: + h.append('
Code structure critique not available.
') + h.append('') + + # --- 3. Code Quality Analysis (only tools that ran) --- + h.append('

3. Code Quality Analysis

') + + for r in results: + if r.tool in SECRETS_TOOLS: + section = "3.2" + label = "Secrets and Credentials" + else: + section = CATEGORY_NUMBERS.get(r.category, "3.x") + label = CATEGORY_LABELS.get(r.category, r.category) + + if not r.success: + h.append(f"

{section} {_esc(label)} ({_esc(r.tool)})

") + h.append(f'
Error: {_esc(r.error or "unknown")}
') + continue + + if not r.findings: + h.append(f"

{section} {_esc(label)} ({_esc(r.tool)})

") + h.append('
No findings.
') + continue + + # Determine if this should be expanded by default + r_sev = _count_by_severity(r.findings) + has_important = any(r_sev.get(s, 0) > 0 for s in ["CRITICAL", "HIGH", "MEDIUM"]) + sev_parts = " ".join( + f'{s}: {r_sev[s]}' + for s in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"] + if r_sev.get(s) + ) + open_attr = " open" if has_important else "" + + # Check for third-party findings + third_party_count = sum(1 for f in r.findings if _is_third_party(f.file)) + tp_note = "" + if third_party_count: + tp_note = f' {third_party_count} from 3rd party' + + h.append(f'
') + h.append( + f"{section} {_esc(label)} ({_esc(r.tool)}) \u2014 " + f"{len(r.findings)} findings {sev_parts}{tp_note}" + ) + + h.append( + "" + "" + ) + sorted_findings = sorted(r.findings, key=_severity_sort_key) + for i, f in enumerate(sorted_findings, 1): + loc = str(f.line) + if f.column is not None: + loc += f":{f.column}" + msg = _esc(f.message) + tp_badge = "" + if _is_third_party(f.file): + tp_badge = ' 3rd party' + h.append( + f"" + f'' + f"" + f'' + f'' + ) + h.append("
#SeverityRuleFileLineMessage
{i}{f.severity.value}{_esc(f.rule_id)}{_esc(f.file)}{tp_badge}{loc}{msg}
") + h.append("
") + + h.append('') + + # --- 5. Appendix --- + h.append('

5. Appendix

') + h.append(f"

Timestamp: {_esc(human_ts)}
Target: {_esc(str(target.resolve()))}

") + + h.append("

Files Analyzed

") + all_files = sorted({f.file for r in results for f in r.findings}) + if all_files: + h.append('
    ') + for fp in all_files: + tp_badge = "" + if _is_third_party(fp): + tp_badge = ' 3rd party' + h.append(f"
  • {_esc(fp)}{tp_badge}
  • ") + h.append("
") + else: + h.append("

No files with findings.

") + + h.append("

Tool Configuration

") + h.append("") + for r in results: + cat = _esc(CATEGORY_LABELS.get(r.category, r.category)) + h.append(f"") + h.append("
ToolCategory
{_esc(r.tool)}{cat}
") + + # Bottom navigation + nav_parts: list[str] = [] + if summary_filename: + nav_parts.append(f'\u2190 Back to Summary') + if sibling_report: + sib_file, sib_label = sibling_report + nav_parts.append(f'{_esc(sib_label)} \u2192') + if nav_parts: + h.append(f'
{"".join(nav_parts)}
') + + h.append("") + return "\n".join(h) + + +# --------------------------------------------------------------------------- +# Business Logic Review — Separate Report (Markdown) +# --------------------------------------------------------------------------- + + +def generate_business_logic_markdown( + target: Path, + timestamp: str, + detected_languages: set[str], + review: BusinessLogicReview, +) -> str: + """Generate a standalone markdown report for business logic review.""" + lang_display = ", ".join(sorted(lang.title() for lang in detected_languages)) or "Unknown" + + lines: list[str] = [] + + # Header + lines.append("# Business Logic Review — Human Review Checkpoint") + lines.append("") + lines.append(f"**Generated**: {timestamp} ") + lines.append(f"**Target**: `{target.resolve()}` ") + lines.append(f"**Detected languages**: {lang_display} ") + lines.append(f"**Business logic findings**: {len(review.findings)} ") + lines.append(f"**Consistency issues**: {len(review.consistency_issues)}") + lines.append("") + + # --- Executive Summary --- + lines.append("## Executive Summary") + lines.append("") + if review.executive_summary: + lines.append(review.executive_summary) + else: + lines.append("This report identifies code sections that encode core business rules,") + lines.append("formulas, and domain logic. Every finding is flagged for human review") + lines.append("regardless of whether static analysis tools reported issues.") + lines.append("") + + # --- Summary by category --- + lines.append("## Summary by Category") + lines.append("") + cat_counts: dict[BusinessLogicCategory, int] = {} + for f in review.findings: + cat_counts[f.category] = cat_counts.get(f.category, 0) + 1 + if cat_counts: + lines.append("| Category | Findings |") + lines.append("|----------|----------|") + for cat in BusinessLogicCategory: + count = cat_counts.get(cat, 0) + if count: + icon = BUSINESS_LOGIC_CATEGORY_ICONS.get(cat, "📌") + label = BUSINESS_LOGIC_CATEGORY_LABELS.get(cat, cat.value) + lines.append(f"| {icon} {label} | {count} |") + lines.append("") + else: + lines.append("No business logic findings identified.") + lines.append("") + + # --- Findings --- + lines.append("## Business Logic Findings") + lines.append("") + if review.findings: + # Group by category + current_cat: str | None = None + finding_num = 0 + for f in review.findings: + cat_label = BUSINESS_LOGIC_CATEGORY_LABELS.get(f.category, f.category.value) + cat_icon = BUSINESS_LOGIC_CATEGORY_ICONS.get(f.category, "📌") + if f.category.value != current_cat: + current_cat = f.category.value + lines.append(f"### {cat_icon} {cat_label}") + lines.append("") + + finding_num += 1 + lines.append(f"#### {finding_num}. {f.title}") + lines.append("") + lines.append(f"`{f.file}`:{f.start_line}-{f.end_line}") + lines.append("") + lines.append(f"**What it does**: {f.what_it_does} ") + lines.append(f"**Review guidance**: {f.review_guidance} ") + lines.append(f"**Risk if wrong**: {f.risk_if_wrong}") + lines.append("") + lines.append("**Code**:") + lines.append("") + lines.append("```") + lines.append(f"{f.code_block}") + lines.append("```") + lines.append("") + else: + lines.append("No business logic findings identified. However, a human reviewer should") + lines.append("still verify the core functional areas of this codebase.") + lines.append("") + + # --- Consistency Issues --- + lines.append("## Self-Consistency Issues") + lines.append("") + if review.consistency_issues: + lines.append(f"**{len(review.consistency_issues)}** consistency issues detected across business logic sections.") + lines.append("") + for i, ci in enumerate(review.consistency_issues, 1): + icon = CONSISTENCY_ISSUE_ICONS.get(ci.issue_type, "⚠️") + label = CONSISTENCY_ISSUE_LABELS.get(ci.issue_type, ci.issue_type.value) + lines.append(f"### {i}. {icon} {label}") + lines.append("") + lines.append(f"**Issue**: {ci.description} ") + if ci.recommended_action: + lines.append(f"**Recommended action**: {ci.recommended_action}") + lines.append("") + lines.append("**Locations**:") + lines.append("") + for loc in ci.locations: + lines.append(f"- `{loc.file}`:{loc.start_line}-{loc.end_line}") + lines.append("") + if ci.code_blocks: + for j, cb in enumerate(ci.code_blocks): + loc_label = f"`{ci.locations[j].file}`" if j < len(ci.locations) else f"Location {j + 1}" + lines.append(f"**{loc_label}**:") + lines.append("") + lines.append("```") + lines.append(cb) + lines.append("```") + lines.append("") + else: + lines.append("No self-consistency issues detected.") + lines.append("") + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Business Logic Review — Separate Report (HTML) +# --------------------------------------------------------------------------- + +_BUSINESS_CSS = """ +:root { + --bg: #1a1b26; --bg-surface: #24283b; --bg-card: #1f2335; + --text: #c0caf5; --text-dim: #6b7394; --text-bright: #e0e6ff; + --border: #3b4261; --accent: #7aa2f7; + --good: #9ece6a; --attention: #e0af68; --critical: #f7768e; +} +* { box-sizing: border-box; margin: 0; padding: 0; } +body { + font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', monospace; + background: var(--bg); color: var(--text); + line-height: 1.6; padding: 2rem; max-width: 1200px; margin: 0 auto; +} +h1 { color: var(--accent); font-size: 1.5rem; margin-bottom: 0.25rem; } +h2 { color: var(--text-bright); font-size: 1.2rem; margin: 2rem 0 0.75rem; border-bottom: 1px solid var(--border); padding-bottom: 0.4rem; } +h3 { color: var(--accent); font-size: 1rem; margin: 1.5rem 0 0.5rem; } +.meta { color: var(--text-dim); font-size: 0.85rem; margin-bottom: 1.5rem; } +.meta span { margin-right: 2rem; } +.intro { color: var(--text-dim); font-size: 0.85rem; font-style: italic; margin-bottom: 1.5rem; border-left: 3px solid var(--accent); padding-left: 0.75rem; } +table { width: 100%; border-collapse: collapse; margin: 0.75rem 0; font-size: 0.85rem; } +th { background: var(--bg-surface); color: var(--text-bright); text-align: left; padding: 0.5rem 0.75rem; border: 1px solid var(--border); } +td { padding: 0.4rem 0.75rem; border: 1px solid var(--border); vertical-align: top; } +tr:nth-child(even) td { background: var(--bg-card); } +tr:hover td { background: var(--bg-surface); } +code { background: var(--bg-surface); padding: 0.15rem 0.4rem; border-radius: 3px; font-size: 0.85rem; } +pre { background: var(--bg); border: 1px solid var(--border); border-radius: 4px; padding: 0.75rem; font-size: 0.8rem; overflow-x: auto; white-space: pre-wrap; margin: 0.5rem 0; } +.card { background: var(--bg-card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; margin: 0.75rem 0; } +.stats { display: flex; gap: 1.5rem; flex-wrap: wrap; margin: 0.75rem 0; } +.stat { text-align: center; } +.stat-value { font-size: 1.5rem; font-weight: bold; color: var(--text-bright); } +.stat-label { font-size: 0.75rem; color: var(--text-dim); } +.cat-badge { display: inline-block; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.7rem; font-weight: bold; margin-right: 0.5rem; } +.cat-FINANCIAL_FORMULA { background: #bb9af7; color: #1a1b26; } +.cat-SCORING_AND_RANKING { background: #7aa2f7; color: #1a1b26; } +.cat-PRICING_AND_DISCOUNT { background: #e0af68; color: #1a1b26; } +.cat-BUSINESS_RULE { background: #2ac3de; color: #1a1b26; } +.cat-STATE_MACHINE { background: #ff9e64; color: #1a1b26; } +.cat-ROUNDING_AND_PRECISION { background: #9ece6a; color: #1a1b26; } +.cat-BOUNDARY_CONDITION { background: #f7768e; color: #1a1b26; } +.cat-DATA_MAPPING { background: #73daca; color: #1a1b26; } +.cat-TEMPORAL_LOGIC { background: #b4f9f8; color: #1a1b26; } +.cat-RECONCILIATION { background: #c0caf5; color: #1a1b26; } +.issue-badge { display: inline-block; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.7rem; font-weight: bold; background: var(--attention); color: #1a1b26; } +.finding-section { background: var(--bg-card); border-left: 4px solid var(--accent); border-radius: 0 6px 6px 0; padding: 1rem 1.25rem; margin: 0.75rem 0; } +.finding-section h4 { color: var(--accent); margin-bottom: 0.25rem; font-size: 0.95rem; } +.finding-file { font-size: 0.8rem; margin-bottom: 0.5rem; background: var(--bg-surface); border: 1px solid var(--accent); border-radius: 4px; padding: 0.25rem 0.6rem; display: inline-block; color: var(--accent); font-weight: bold; } +.finding-meta { font-size: 0.85rem; margin: 0.25rem 0; } +.finding-meta strong { color: var(--text-bright); } +.finding-risk { color: var(--critical); font-size: 0.85rem; margin: 0.25rem 0; } +.finding-code { margin-top: 0.5rem; } +.finding-code summary { cursor: pointer; color: var(--accent); font-size: 0.85rem; } +.consistency-section { background: var(--bg-card); border-left: 4px solid var(--attention); border-radius: 0 6px 6px 0; padding: 1rem 1.25rem; margin: 0.75rem 0; } +.consistency-section h4 { color: var(--attention); margin-bottom: 0.25rem; font-size: 0.95rem; } +.back-to-top { text-align: right; margin: 0.5rem 0; } +.back-to-top a { color: var(--text-dim); font-size: 0.75rem; text-decoration: none; } +.back-to-top a:hover { color: var(--accent); } +.toc { background: var(--bg-card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; margin: 1rem 0; } +.toc summary { cursor: pointer; color: var(--accent); font-size: 0.95rem; font-weight: bold; } +.toc ul { list-style: none; margin: 0.5rem 0 0 0; padding: 0; } +.toc li { margin: 0.3rem 0; } +.toc a { color: var(--text); text-decoration: none; font-size: 0.85rem; } +.toc a:hover { color: var(--accent); } +.cat-group { margin: 1rem 0; } +.cat-group > details > summary { cursor: pointer; list-style: none; } +.cat-group > details > summary::-webkit-details-marker { display: none; } +.cat-header { display: flex; align-items: center; gap: 0.75rem; } +.cat-header h4 { color: var(--accent); font-size: 0.95rem; margin: 0; } +.legend { background: var(--bg-card); border: 1px solid var(--border); border-radius: 6px; padding: 1rem 1.25rem; margin: 1rem 0; } +.legend summary { cursor: pointer; color: var(--accent); font-size: 0.95rem; font-weight: bold; } +.legend-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 1rem; margin-top: 0.75rem; } +.legend-group h4 { color: var(--text-bright); font-size: 0.85rem; margin-bottom: 0.4rem; border-bottom: 1px solid var(--border); padding-bottom: 0.25rem; } +.legend-item { font-size: 0.8rem; margin: 0.3rem 0; display: flex; align-items: center; gap: 0.5rem; } +""" + + +def generate_business_logic_html( + target: Path, + timestamp: str, + detected_languages: set[str], + review: BusinessLogicReview, + summary_filename: str | None = None, + sibling_report: tuple[str, str] | None = None, +) -> str: + """Generate a standalone HTML report for business logic review.""" + lang_display = ", ".join(sorted(lang.title() for lang in detected_languages)) or "Unknown" + human_ts = _format_timestamp_human(timestamp) + + h: list[str] = [] + h.append("") + h.append('') + h.append('') + h.append(f"Business Logic Review \u2014 {_esc(human_ts)}") + h.append(f"") + h.append("") + + # Header + h.append('
') + h.append("

Business Logic Review \u2014 Human Review Checkpoint

") + h.append('
') + h.append(f"Generated: {_esc(human_ts)}") + h.append(f"Target: {_esc(str(target.resolve()))}") + h.append(f"Languages: {_esc(lang_display)}") + h.append("
") + + if summary_filename: + h.append(f'') + + # Badge Legend + h.append('
') + h.append("\U0001f3f7\ufe0f Badge Legend") + h.append('
') + h.append('

Business Logic Categories

') + _BIZ_CAT_DESCRIPTIONS = { + BusinessLogicCategory.FINANCIAL_FORMULA: "Monetary calculations, interest rates, tax logic", + BusinessLogicCategory.SCORING_AND_RANKING: "Score computations, ranking algorithms, weighted evaluations", + BusinessLogicCategory.PRICING_AND_DISCOUNT: "Price derivation, discount tiers, promotional rules", + BusinessLogicCategory.BUSINESS_RULE: "Domain-specific if/then rules, eligibility checks", + BusinessLogicCategory.STATE_MACHINE: "Workflow transitions, status progressions, approval gates", + BusinessLogicCategory.ROUNDING_AND_PRECISION: "Decimal handling, truncation, precision-sensitive math", + BusinessLogicCategory.BOUNDARY_CONDITION: "Edge cases, off-by-one, min/max threshold logic", + BusinessLogicCategory.DATA_MAPPING: "Field mapping, schema translation, key transformations", + BusinessLogicCategory.TEMPORAL_LOGIC: "Date/time calculations, scheduling, expiration rules", + BusinessLogicCategory.RECONCILIATION: "Cross-system consistency checks, balance verification", + } + for cat in BusinessLogicCategory: + icon = BUSINESS_LOGIC_CATEGORY_ICONS.get(cat, "\U0001f4cc") + label = BUSINESS_LOGIC_CATEGORY_LABELS.get(cat, cat.value) + desc = _BIZ_CAT_DESCRIPTIONS.get(cat, "") + h.append(f'
{icon} {_esc(label)} {_esc(desc)}
') + h.append("
") + h.append('

Consistency Issue Types

') + _CONSISTENCY_DESCRIPTIONS = { + ConsistencyIssueType.CONSTANT_DRIFT: "Same constant defined with different values across files", + ConsistencyIssueType.LOGIC_DIVERGENCE: "Similar logic implemented differently in separate locations", + ConsistencyIssueType.NAMING_MISMATCH: "Inconsistent naming for the same concept across the codebase", + ConsistencyIssueType.REDUNDANT_IMPLEMENTATION: "Duplicate functionality that should be consolidated", + } + for issue_type in ConsistencyIssueType: + icon = CONSISTENCY_ISSUE_ICONS.get(issue_type, "\u26a0\ufe0f") + label = CONSISTENCY_ISSUE_LABELS.get(issue_type, issue_type.value) + desc = _CONSISTENCY_DESCRIPTIONS.get(issue_type, "") + h.append(f'
{icon} {_esc(label)} {_esc(desc)}
') + h.append("
") + h.append("
") + h.append("
") + # Table of Contents + h.append('
') + h.append("Table of Contents") + h.append("") + h.append("
") + + # --- Executive Summary --- + h.append('

Executive Summary

') + h.append('
') + if review.executive_summary: + h.append(f"

{_esc(review.executive_summary)}

") + else: + h.append("

This report identifies code sections that encode core business rules, ") + h.append("formulas, and domain logic. Every finding is flagged for human review regardless of ") + h.append("whether static analysis tools reported issues.

") + h.append("
") + + # Stats + h.append('
') + h.append(f'') + h.append(f'') + # Count by category for stats + cat_counts: dict[BusinessLogicCategory, int] = {} + for f in review.findings: + cat_counts[f.category] = cat_counts.get(f.category, 0) + 1 + h.append(f'
{len(cat_counts)}
Categories
') + h.append("
") + + h.append('') + + # --- Summary by category --- + h.append('

Summary by Category

') + if cat_counts: + h.append("") + for cat in BusinessLogicCategory: + count = cat_counts.get(cat, 0) + if count: + icon = BUSINESS_LOGIC_CATEGORY_ICONS.get(cat, "\U0001f4cc") + label = BUSINESS_LOGIC_CATEGORY_LABELS.get(cat, cat.value) + h.append(f'') + h.append("
CategoryFindings
{icon} {_esc(label)}{count}
") + else: + h.append('
No business logic findings identified.
') + + h.append('') + + # --- Findings --- + h.append('

Business Logic Findings

') + if review.findings: + h.append(f"

{len(review.findings)} business logic sections identified for human review.

") + current_cat: str | None = None + finding_num = 0 + for f in review.findings: + cat_label = BUSINESS_LOGIC_CATEGORY_LABELS.get(f.category, f.category.value) + cat_icon = BUSINESS_LOGIC_CATEGORY_ICONS.get(f.category, "\U0001f4cc") + + if f.category.value != current_cat: + # Close previous group + if current_cat is not None: + h.append("") + current_cat = f.category.value + cat_count = cat_counts.get(f.category, 0) + h.append('
') + h.append("") + h.append(f'

{cat_icon} {_esc(cat_label)}

') + h.append(f'{cat_count} findings
') + h.append("
") + h.append("
") + + finding_num += 1 + h.append('
') + h.append(f'

{finding_num}. {_esc(f.title)}

') + h.append(f'
{_esc(f.file)}:{f.start_line}-{f.end_line}
') + h.append(f'
What it does: {_esc(f.what_it_does)}
') + h.append(f'
Review guidance: {_esc(f.review_guidance)}
') + h.append(f'
Risk if wrong: {_esc(f.risk_if_wrong)}
') + h.append('
View Code') + h.append(f"
{_esc(f.code_block)}
") + h.append("
") + h.append("
") + + # Close last group + if current_cat is not None: + h.append("
") + else: + h.append('
No business logic findings identified. However, a human reviewer ') + h.append("should still verify the core functional areas of this codebase.
") + + h.append('') + + # --- Consistency Issues --- + h.append('

Self-Consistency Issues

') + if review.consistency_issues: + h.append(f"

{len(review.consistency_issues)} consistency issues detected across business logic sections.

") + for i, ci in enumerate(review.consistency_issues, 1): + icon = CONSISTENCY_ISSUE_ICONS.get(ci.issue_type, "\u26a0\ufe0f") + label = CONSISTENCY_ISSUE_LABELS.get(ci.issue_type, ci.issue_type.value) + h.append('
') + h.append(f'

{i}. {icon} {_esc(label)}

') + h.append(f'
Issue: {_esc(ci.description)}
') + if ci.recommended_action: + h.append(f'
Recommended action: {_esc(ci.recommended_action)}
') + # Locations + h.append('
Locations:
') + for loc in ci.locations: + h.append(f'
{_esc(loc.file)}:{loc.start_line}-{loc.end_line}
') + # Code blocks — collapsible + if ci.code_blocks: + h.append('
View Code') + for j, cb in enumerate(ci.code_blocks): + loc_label = f"{_esc(ci.locations[j].file)}" if j < len(ci.locations) else f"Location {j + 1}" + h.append(f'
{loc_label}:
') + h.append(f"
{_esc(cb)}
") + h.append("
") + h.append("
") + else: + h.append('
No self-consistency issues detected.
') + + h.append('') + + # Bottom navigation + nav_parts: list[str] = [] + if summary_filename: + nav_parts.append(f'\u2190 Back to Summary') + if sibling_report: + sib_file, sib_label = sibling_report + nav_parts.append(f'{_esc(sib_label)} \u2192') + if nav_parts: + h.append(f'
{"".join(nav_parts)}
') + + h.append("") + return "\n".join(h) + + + +# --------------------------------------------------------------------------- +# Summary / Entry Page (HTML only) +# --------------------------------------------------------------------------- + +_SUMMARY_CSS = """ +:root { + --bg: #1a1b26; --bg-surface: #24283b; --bg-card: #1f2335; + --text: #c0caf5; --text-dim: #565f89; --text-bright: #e0e6ff; + --border: #3b4261; --accent: #7aa2f7; + --sev-critical: #f7768e; --sev-high: #ff9e64; --sev-medium: #e0af68; + --sev-low: #9ece6a; --sev-info: #7dcfff; + --good: #9ece6a; --attention: #e0af68; --critical: #f7768e; +} +* { box-sizing: border-box; margin: 0; padding: 0; } +body { + font-family: 'SF Mono', 'Fira Code', 'JetBrains Mono', monospace; + background: var(--bg); color: var(--text); + line-height: 1.6; padding: 2rem; max-width: 1200px; margin: 0 auto; +} +h1 { color: var(--accent); font-size: 1.5rem; margin-bottom: 0.25rem; } +h2 { color: var(--text-bright); font-size: 1.2rem; margin: 2rem 0 0.75rem; border-bottom: 1px solid var(--border); padding-bottom: 0.4rem; } +.meta { color: var(--text-dim); font-size: 0.85rem; margin-bottom: 1.5rem; } +.meta span { margin-right: 2rem; } +.report-card { + background: var(--bg-card); border: 1px solid var(--border); border-radius: 8px; + padding: 1.5rem; margin: 1rem 0; transition: border-color 0.2s; +} +.report-card:hover { border-color: var(--accent); } +.report-card h3 { color: var(--accent); font-size: 1.1rem; margin-bottom: 0.5rem; } +.report-card .description { color: var(--text-dim); font-size: 0.85rem; margin-bottom: 1rem; } +.report-card .stats { display: flex; gap: 1.5rem; flex-wrap: wrap; margin: 0.75rem 0; } +.report-card .stat { text-align: center; } +.report-card .stat-value { font-size: 1.3rem; font-weight: bold; color: var(--text-bright); } +.report-card .stat-label { font-size: 0.7rem; color: var(--text-dim); } +.report-link { + display: inline-block; margin-top: 0.75rem; padding: 0.5rem 1.25rem; + background: var(--accent); color: #1a1b26; border-radius: 4px; + text-decoration: none; font-weight: bold; font-size: 0.85rem; + transition: opacity 0.2s; +} +.report-link:hover { opacity: 0.85; } +.not-generated { + background: var(--bg-card); border: 1px dashed var(--border); border-radius: 8px; + padding: 1.25rem; margin: 1rem 0; color: var(--text-dim); font-size: 0.85rem; +} +.not-generated h3 { color: var(--text-dim); font-size: 1rem; margin-bottom: 0.4rem; } +.verdict { display: inline-block; padding: 0.2rem 0.8rem; border-radius: 4px; font-weight: bold; font-size: 0.85rem; } +.verdict-good { background: var(--good); color: #1a1b26; } +.verdict-attention { background: var(--attention); color: #1a1b26; } +.verdict-critical { background: var(--critical); color: #1a1b26; } +.sev-badge { display: inline-block; padding: 0.1rem 0.5rem; border-radius: 3px; font-size: 0.7rem; font-weight: bold; } +.sev-CRITICAL { background: var(--sev-critical); color: #1a1b26; } +.sev-HIGH { background: var(--sev-high); color: #1a1b26; } +.sev-MEDIUM { background: var(--sev-medium); color: #1a1b26; } +.sev-LOW { background: var(--sev-low); color: #1a1b26; } +.sev-INFO { background: var(--sev-info); color: #1a1b26; } +.footer { margin-top: 2rem; padding-top: 1rem; border-top: 1px solid var(--border); color: var(--text-dim); font-size: 0.75rem; } +""" + + +def generate_summary_html( + target: Path, + timestamp: str, + detected_languages: set[str], + *, + technical_filename: str | None = None, + business_filename: str | None = None, + results: list[ToolResult] | None = None, + critical_findings: list[CriticalFinding] | None = None, + code_structure_critique: CodeStructureCritique | None = None, + business_logic_review: BusinessLogicReview | None = None, +) -> str: + """Generate a lightweight HTML entry page linking to the individual reports.""" + lang_display = ", ".join(sorted(lang.title() for lang in detected_languages)) or "Unknown" + human_ts = _format_timestamp_human(timestamp) + + h: list[str] = [] + h.append("") + h.append('') + h.append('') + h.append(f"Code Review Summary \u2014 {_esc(human_ts)}") + h.append(f"") + h.append("") + + # Header + h.append("

AIDLC Code Review

") + h.append('
') + h.append(f"Generated: {_esc(human_ts)}") + h.append(f"Target: {_esc(str(target.resolve()))}") + h.append(f"Languages: {_esc(lang_display)}") + h.append("
") + + h.append("

Reports

") + + # --- Technical Report Card --- + if technical_filename and results is not None: + all_findings = [f for r in results for f in r.findings] + by_sev = _count_by_severity(all_findings) + verdict = _overall_verdict(all_findings) + n_critical = len(critical_findings) if critical_findings else 0 + n_dimensions = len(code_structure_critique.dimensions) if code_structure_critique else 0 + + h.append('
') + h.append("

\U0001f527 Technical Report

") + h.append('
Static analysis findings, critical code sections flagged for human review, ' + "and AI-powered code structure critique.
") + + # Verdict + h.append(f'

{_esc(verdict)}

') + + # Stats + h.append('
') + h.append(f'
{len(all_findings)}
' + '
Findings
') + for sev in SEV_ORDER: + count = by_sev.get(sev.value, 0) + if count: + h.append(f'
{count}
' + f'
{sev.value}
') + if n_critical: + h.append(f'
{n_critical}
' + '
Critical Sections
') + if n_dimensions: + h.append(f'
{n_dimensions}
' + '
Structure Dimensions
') + h.append("
") + + h.append(f'Open Technical Report \u2192') + h.append("
") + else: + h.append('
') + h.append("

\U0001f527 Technical Report

") + h.append("

Not generated. Run with default mode or --technical-report to include it.

") + h.append("
") + + # --- Business Logic Report Card --- + if business_filename and business_logic_review is not None: + n_findings = len(business_logic_review.findings) + n_consistency = len(business_logic_review.consistency_issues) + cat_counts: dict[BusinessLogicCategory, int] = {} + for f in business_logic_review.findings: + cat_counts[f.category] = cat_counts.get(f.category, 0) + 1 + + h.append('
') + h.append("

\U0001f4cb Business Logic Report

") + h.append('
AI-driven analysis of business rules, formulas, and domain logic. ' + "Every finding is flagged for human review regardless of static tool results.
") + + # Stats + h.append('
') + h.append(f'
{n_findings}
' + '
Findings
') + h.append(f'
{n_consistency}
' + '
Consistency Issues
') + h.append(f'
{len(cat_counts)}
' + '
Categories
') + h.append("
") + + h.append(f'Open Business Logic Report \u2192') + h.append("
") + else: + h.append('
') + h.append("

\U0001f4cb Business Logic Report

") + h.append("

Not generated. Run with default mode or --business-report to include it.

") + h.append("
") + + # Footer + h.append('") + + h.append("") + return "\n".join(h) diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/spinner.py b/scripts/aidlc-codereview/src/code_reviewer/common/spinner.py new file mode 100644 index 00000000..2087d1bd --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/spinner.py @@ -0,0 +1,53 @@ +"""Terminal spinner for long-running operations.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import itertools +import sys +import threading +import time + + +class Spinner: + """A simple terminal spinner that runs in a background thread. + + Usage: + with Spinner("Analyzing code"): + do_long_running_work() + """ + + _FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] + + def __init__(self, message: str = "Working", interval: float = 0.08): + self.message = message + self.interval = interval + self._stop_event = threading.Event() + self._thread: threading.Thread | None = None + + def _spin(self) -> None: + frames = itertools.cycle(self._FRAMES) + while not self._stop_event.is_set(): + frame = next(frames) + sys.stdout.write(f"\r {frame} {self.message}...") + sys.stdout.flush() + time.sleep(self.interval) + # Clear the spinner line when done + sys.stdout.write(f"\r ✔ {self.message}... done\n") + sys.stdout.flush() + + def start(self) -> "Spinner": + self._thread = threading.Thread(target=self._spin, daemon=True) + self._thread.start() + return self + + def stop(self) -> None: + self._stop_event.set() + if self._thread: + self._thread.join() + + def __enter__(self) -> "Spinner": + return self.start() + + def __exit__(self, *_) -> None: + self.stop() diff --git a/scripts/aidlc-codereview/src/code_reviewer/common/utils.py b/scripts/aidlc-codereview/src/code_reviewer/common/utils.py new file mode 100644 index 00000000..72562083 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/common/utils.py @@ -0,0 +1,42 @@ +"""Shared subprocess utilities for tool wrappers. + +Subprocess execution is core to running static-analysis CLI tools. +Command arguments are controlled by ReviewConfig / tool wrappers, not user input. +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import subprocess # nosec B404 +import shutil + + +def check_tool_installed(command: str) -> bool: + """Check if a CLI tool is available on PATH.""" + return shutil.which(command) is not None + + +def run_command( + args: list[str], + timeout: int = 300, + cwd: str | None = None, +) -> tuple[int, str, str]: + """Run a subprocess command and return (returncode, stdout, stderr). + + Many static analysis tools use non-zero exit codes to indicate findings + (not errors), so callers should interpret return codes per tool. + """ + try: + result = subprocess.run( # nosec B603 # nosemgrep: dangerous-subprocess-use-audit + args, + capture_output=True, + text=True, + timeout=timeout, + cwd=cwd, + shell=False, + ) + return result.returncode, result.stdout, result.stderr + except subprocess.TimeoutExpired: + return -1, "", f"Command timed out after {timeout}s: {' '.join(args)}" + except FileNotFoundError: + return -1, "", f"Command not found: {args[0]}" diff --git a/scripts/aidlc-codereview/src/code_reviewer/prompts/business-logic-review.md b/scripts/aidlc-codereview/src/code_reviewer/prompts/business-logic-review.md new file mode 100644 index 00000000..4fbe5b1d --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/prompts/business-logic-review.md @@ -0,0 +1,212 @@ +# Business Logic Review — Human Review Checkpoint Agent + +You are a **principal engineer** performing a business logic review. +Your job: identify code sections that encode **core business rules, formulas, +and domain logic** so a human reviewer knows exactly what to inspect — even when +every static analysis tool reports zero findings. + +> "What should a human review to gain confidence this software does what the +> business intends it to do?" + +--- + +## Two Levels of Analysis + +### Level 1 — Identification + +Locate every section of code that implements a business rule, formula, or +domain-specific decision. Flag it for human review with a clear description +of **what business behavior it controls**. + +### Level 2 — Self-Consistency + +Compare the flagged sections **against each other** within the codebase. +Report any inconsistencies: different constants for the same concept, conflicting +branching logic, duplicate implementations that disagree, or naming that implies +one behavior while the code does another. + +--- + +## Detection Taxonomy + +Flag code that falls into any of these categories. Each category includes +detection signals — patterns to look for in the source code. + +### 1. FINANCIAL_FORMULA — Coded Math & Financial Calculations +- Interest rates, APR/APY conversions, amortization schedules +- Tax computations (rate application, bracket logic, inclusive vs exclusive) +- Pricing formulas, margin calculations, fee structures +- Currency conversions, FX rate application +- **Detection signals**: arithmetic operators on money values, hardcoded rates/constants, + `Decimal`/`BigNumber` arithmetic, functions named `calc_*`/`compute_*`/`calculate_*` + +### 2. SCORING_AND_RANKING — Algorithms That Produce Scores or Rankings +- Credit scoring, risk scoring, eligibility scoring +- Grade calculations, weighted averages, GPA computations +- Search ranking, recommendation weights, priority scores +- **Detection signals**: weighted sums, score normalization, threshold comparisons, + `weight`, `score`, `rank`, `grade` in variable/function names + +### 3. PRICING_AND_DISCOUNT — Price Determination & Promotional Logic +- Discount stacking order (percentage before flat? which applies first?) +- Coupon/promotion eligibility and mutual exclusivity rules +- Volume/tier pricing breakpoints +- "Best price" or "best discount" selection logic +- **Detection signals**: discount application sequences, `min()`/`max()` on prices, + promotion rule iteration, coupon validation chains + +### 4. BUSINESS_RULE — Decision Trees & Eligibility Logic +- If/then/else chains that determine accept/reject outcomes +- Enrollment eligibility, qualification checks, approval gates +- Age verification, geographic restrictions, waiting periods +- Policy rules encoded as conditionals (insurance, lending, compliance) +- **Detection signals**: multi-branch conditionals on domain fields, threshold checks + with business-meaningful values, functions named `is_eligible`/`check_*`/`validate_*` + +### 5. STATE_MACHINE — Lifecycle & Workflow Transitions +- Order status transitions (pending -> paid -> shipped -> delivered) +- Account lifecycle (active, suspended, closed) +- Claim/ticket state management and valid transition rules +- **Detection signals**: status/state enum comparisons, transition validation, + `status` field updates, state-dependent behavior branching + +### 6. ROUNDING_AND_PRECISION — Numeric Precision in Business Context +- Rounding mode selection (half-up, half-even/banker's, truncation) +- Order of operations — rounding before vs after aggregation +- Precision loss in multi-step financial calculations +- **Detection signals**: `round()`, `toFixed()`, `ROUND_*` constants, + `float` used for currency, `decimalPlaces`, precision parameters + +### 7. BOUNDARY_CONDITION — Domain-Significant Thresholds +- Tax bracket boundaries (> vs >=) +- Regulatory reporting thresholds (e.g., $10,000 BSA/AML) +- Rate tier cutoffs, volume break-points +- Date-based cutoffs (fiscal year, enrollment windows) +- **Detection signals**: comparison operators at hardcoded thresholds, + boundary values in constants, tier/bracket lookup logic + +### 8. DATA_MAPPING — Business-Meaningful Data Transformations +- Field mapping between systems (different names for same concept) +- Unit conversions with business impact (cents vs dollars, kg vs lbs) +- External ID translation (routing numbers, account codes, SKUs) +- Schema migrations that change business semantics +- **Detection signals**: mapping dicts/objects, unit conversion functions, + cross-service data translation, field rename operations + +### 9. TEMPORAL_LOGIC — Time-Dependent Business Rules +- Proration calculations (partial-period billing, partial-year tax) +- Business day calculations (excluding weekends/holidays) +- Effective date / expiration date logic +- Timezone-sensitive cutoffs (end of business day, market close) +- **Detection signals**: date arithmetic, calendar/business-day functions, + timezone handling, fiscal period calculations + +### 10. RECONCILIATION — Multi-Party Balance & Consistency +- Double-entry bookkeeping logic +- Payment waterfall application order +- Refund calculations (must mirror original charge structure) +- Marketplace commission/payout splits +- **Detection signals**: debit/credit pairs, balance assertions, + fee split calculations, refund-mirrors-charge patterns + +--- + +## Self-Consistency Checks (Level 2) + +After identifying all business logic sections, cross-reference them and report: + +1. **Constant Drift** — Same business value defined in multiple places with different + values (e.g., tax rate 0.0825 in one file, 0.085 in another) +2. **Logic Divergence** — Same business rule implemented differently in two code paths + (e.g., discount applied before tax in checkout but after tax in refund) +3. **Naming Mismatch** — Variable/function name implies one behavior, code does another + (e.g., `calculate_net_price` actually returns gross price) +4. **Redundant Implementation** — Same calculation exists in multiple places and could + diverge over time + +--- + +## Input + +You will receive: + +1. **SOURCE CODE** — the full codebase being reviewed + +--- + +## Output Format + +Return **ONLY** a JSON object with three keys. No markdown fences, no explanation. + +```json +{ + "executive_summary": "2-3 sentence high-level assessment: what kinds of business logic were found, how many areas need human review, and the most important thing the reviewer should focus on first.", + "findings": [ + { + "category": "FINANCIAL_FORMULA | SCORING_AND_RANKING | PRICING_AND_DISCOUNT | BUSINESS_RULE | STATE_MACHINE | ROUNDING_AND_PRECISION | BOUNDARY_CONDITION | DATA_MAPPING | TEMPORAL_LOGIC | RECONCILIATION", + "title": "Short, meaningful title for this finding (e.g. 'Tax Rate Calculation', 'Order Status Transitions', 'ACH Routing Validation')", + "file": "relative/path/to/file.py", + "start_line": 42, + "end_line": 58, + "what_it_does": "One sentence: what business behavior this code controls", + "review_guidance": "One sentence: what specifically the human reviewer should verify", + "code_block": "the exact source code lines", + "risk_if_wrong": "One sentence: business impact if this code has a bug" + } + ], + "consistency_issues": [ + { + "issue_type": "CONSTANT_DRIFT | LOGIC_DIVERGENCE | NAMING_MISMATCH | REDUNDANT_IMPLEMENTATION", + "description": "One sentence describing the inconsistency", + "locations": [ + {"file": "path/a.py", "start_line": 10, "end_line": 15}, + {"file": "path/b.py", "start_line": 30, "end_line": 40} + ], + "code_blocks": ["exact code from location 1", "exact code from location 2"], + "recommended_action": "One sentence: what the developer should do" + } + ] +} +``` + +## Field Definitions + +### findings[] + +- `category` — Which taxonomy category this falls into +- `title` — Short, meaningful name for this finding. Should read like a section heading + (e.g. "Tax Rate Calculation", "Discount Stacking Order", "ACH Routing Validation"). + NOT a generic label like "Business Rule #1". +- `file`, `start_line`, `end_line` — Exact location +- `what_it_does` — Plain-English description of the business behavior. A PM should understand this. +- `review_guidance` — Tell the human reviewer **what to check**. Not "review this code" but + "verify the tax rate constant matches the current rate for the jurisdiction" or + "confirm the discount stacking order matches the business requirements document" +- `code_block` — The exact source lines, not paraphrased +- `risk_if_wrong` — Business impact in concrete terms (money, data, compliance, user experience) + +### consistency_issues[] + +- `issue_type` — Which self-consistency check failed +- `locations` — The two (or more) code locations that are inconsistent +- `code_blocks` — The exact code from each location for side-by-side comparison +- `recommended_action` — Concrete fix suggestion + +## Rules + +- Return `{"findings": [], "consistency_issues": []}` if nothing is found +- Keep all text fields to ONE sentence — the reviewer is scanning, not reading essays +- `code_block` must be the **exact** source lines, not paraphrased +- Sort findings by category, then file path, then start_line +- Do NOT flag trivial code (config loading, import statements, logging setup, test assertions) +- Do NOT flag general code quality issues — that is a different agent's job +- Focus on code where **a human needs domain knowledge to verify correctness** +- Every finding MUST have actionable `review_guidance` — generic "review this" is useless +- For `consistency_issues`, only report genuine inconsistencies, not intentional variations + (e.g., a checkout tax calculation and a refund tax calculation may legitimately differ) +- Prefer fewer, high-quality findings over many low-value ones. Aim for the 5-20 findings + that matter most, not an exhaustive list of every conditional. + +## SOURCE CODE + +INSERT_SOURCE_CODE diff --git a/scripts/aidlc-codereview/src/code_reviewer/prompts/critical-findings-v1.md b/scripts/aidlc-codereview/src/code_reviewer/prompts/critical-findings-v1.md new file mode 100644 index 00000000..a7dfc984 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/prompts/critical-findings-v1.md @@ -0,0 +1,151 @@ +# Critical Code Findings — Senior Review Agent + +You are a **senior software engineer** performing a critical code review. +Your job: identify code sections that **require mandatory human review** because +they carry high technical risk if implemented incorrectly. + +## Your Lens + +> "As a senior developer, I want to highlight critical **technical** code that +> needs immediate attention — things a static analysis tool might miss." + +## Three Categories to Flag + +### 1. COMPUTATION — Dangerous Technical Computations +- Cryptographic calculations, hash comparisons, signature verification +- Numeric precision bugs: float arithmetic where exactness matters, integer overflow, off-by-one in algorithms +- Concurrency-sensitive calculations (race conditions in counters, balances, sequences) +- **Why**: A subtle technical error can silently produces wrong results + +### 2. CONTROL_FLOW — Dangerous Control Flow +- Authentication / authorization gates (who can access what) +- Security-sensitive validation (input sanitization, injection prevention, privilege escalation checks) +- Error handling paths that silently swallow failures or skip critical steps +- Retry / idempotency logic where failure means duplicate side effects +- **Why**: A missed branch or wrong condition creates a security or reliability gap + +### 3. DATA_TRANSFORM — Data Inversions & Format Conversions +- JSON ↔ YAML, XML ↔ dict, CSV parsing/generation +- Database → in-memory structure hydration (especially hierarchical data) +- Schema migrations, ETL transforms, serialization/deserialization +- **Why**: Silent data loss or corruption during conversion + +## Input + +You will receive: + +1. **SOURCE CODE** — the full codebase being reviewed +2. **TOOL FINDINGS** — structured findings from static analysis tools (bandit, ruff, mypy, etc.) +3. **FLAGGED FILES** — files that tools already flagged with errors + +## Instructions + +1. **Read the entire codebase** to understand the domain and architecture +2. **Cross-reference tool findings** with the source code +3. **Identify critical sections** that fall into the three categories above +4. For each critical section, extract the **exact code block** and note any **related tool errors** + +## Output Format + +Return **ONLY** a JSON array. No markdown fences, no explanation, no preamble. + +Each element must have exactly these fields: + +```json +{ + "category": "COMPUTATION | CONTROL_FLOW | DATA_TRANSFORM", + "file": "relative/path/to/file.py", + "start_line": 42, + "end_line": 58, + "highlight_lines": [45, 46, 51], + "verdict": "One-line summary of what this code does and why it needs review", + "code_block": "the exact source code lines", + "why_critical": "Brief reason a human must verify this", + "recommended_action": "One concrete action the developer should take to fix or verify this", + "related_tool_findings": [ + { + "tool": "bandit", + "rule_id": "B105", + "severity": "HIGH", + "message": "the tool's error message" + } + ] +} +``` + +### `highlight_lines` field: +- An array of **absolute line numbers** (matching the file) that are the specific problematic lines within the code block +- These are the lines that are the root cause or most critical part of the finding +- Must be a subset of the range `[start_line, end_line]` +- If the entire block is equally problematic, include all line numbers in the range +``` + +## Severity Escalation + +Tool findings that are individually classified as MEDIUM by their respective +tools may, when **combined across multiple tools**, reveal a more severe issue. + +- When two or more tools flag the **same code region** (overlapping file + line range), + evaluate the **combined risk**. The intersection may warrant a higher effective + severity than any single tool assigned. +- Example: a linter flags a complex conditional (MEDIUM) and a security scanner flags + an input used in that same branch (MEDIUM) — together they may indicate a critical + vulnerability. +- When you escalate, include **all** contributing tool findings in `related_tool_findings`. +- Only include MEDIUM or higher tool findings. Do not include LOW or INFO. + +## Severity Escalation + +Tool findings that are individually classified as LOW or MEDIUM by their respective +tools may, when **combined across multiple tools**, reveal a more severe issue. + +- When two or more tools flag the **same code region** (overlapping file + line range), + evaluate the **combined risk**. The intersection may warrant a higher effective + severity than any single tool assigned. +- Example: a linter flags a complex conditional (MEDIUM) and a security scanner flags + an input used in that same branch (MEDIUM) — together they may indicate a critical + business-logic vulnerability. +- When you escalate, set `source` to `"tool_assisted"` and include **all** contributing + tool findings in `related_tool_findings`. + +## Filtering — What Does NOT Belong Here + +This section is for findings that carry **real business risk**. The following should +**not** appear as standalone critical findings: + +- Findings whose **only** related tool results are classified LOW or INFO by the + deterministic tools (e.g., missing comments, import ordering, naming conventions, + minor style warnings, low-confidence dead code). +- Pure style or cosmetic issues (formatting, whitespace, docstring presence). +- Informational notes with no actionable business impact. + +A LOW/INFO tool finding **may** appear in `related_tool_findings` if it is part of a +**combined escalation** with MEDIUM or higher findings in the same code region. It should +not be the sole reason a section is flagged as critical. + +## Rules + +- Return `[]` if no critical sections are found +- Keep `verdict` to ONE sentence — the reviewer is scanning, not reading essays +- Keep `why_critical` to ONE sentence +- Keep `recommended_action` to ONE sentence — a specific, concrete action (e.g. "Replace MD5 with bcrypt for password hashing") +- `code_block` must be the **exact** source lines, not paraphrased +- `related_tool_findings` can be empty `[]` if no tools flagged that area +- Only include MEDIUM or higher severity tool findings in `related_tool_findings` +- Sort results: COMPUTATION first, then CONTROL_FLOW, then DATA_TRANSFORM +- Within each category, sort by file path then start_line +- Do NOT flag trivial code (simple getters, config constants, imports) +- Do NOT surface findings driven solely by LOW or INFO tool results +- Focus on code where **a technical bug would cause security issues, data loss, or silent corruption** + +## SOURCE CODE + +INSERT_SOURCE_CODE + +## TOOL FINDINGS + +INSERT_TOOL_FINDINGS + +## FLAGGED FILES DETAIL + +INSERT_FLAGGED_FILES diff --git a/scripts/aidlc-codereview/src/code_reviewer/prompts/structure-critique-v1.md b/scripts/aidlc-codereview/src/code_reviewer/prompts/structure-critique-v1.md new file mode 100644 index 00000000..4401b584 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/prompts/structure-critique-v1.md @@ -0,0 +1,129 @@ +# Code Structure Critique — AI Code Reviewer + +You are a **principal software engineer** performing a holistic code structure review. +Your job: evaluate the codebase across key quality dimensions and provide +**actionable, specific feedback** that a developer can act on immediately. + +## Your Lens + +> "As a principal engineer reviewing this codebase for production readiness, +> I need to assess whether this code is observable, scalable, efficient, +> maintainable, and well-structured." + +## Evaluation Dimensions + +Evaluate the codebase across ALL of the following dimensions: + +### 1. LOGGING — Observability & Logging +- Are logs placed at the right points? (entry/exit of critical functions, error paths, state transitions) +- Are log levels used correctly? (DEBUG vs INFO vs WARNING vs ERROR) +- Is there enough context in log messages? (user IDs, request IDs, relevant state) +- Are sensitive values excluded from logs? (passwords, tokens, PII) +- Are there silent failures? (bare except, swallowed errors with no logging) + +### 2. MEASURABILITY — Metrics & Monitoring Readiness +- Can you measure request latency, throughput, error rates from the code? +- Are there health check endpoints or readiness probes? +- Are business-critical operations instrumented? +- Can you tell when something goes wrong from the outside? + +### 3. SCALABILITY — Scale Readiness +- Are there N+1 query patterns or unbounded loops? +- Is there hardcoded state that prevents horizontal scaling? +- Are database queries efficient? (missing indexes, full table scans) +- Are there connection pool or resource management issues? +- Is there proper pagination for list endpoints? + +### 4. EFFICIENCY — Performance & Resource Usage +- Are there unnecessary computations or redundant operations? +- Is memory usage reasonable? (loading entire files/datasets into memory) +- Are there blocking I/O calls that should be async? +- Are expensive operations cached where appropriate? + +### 5. COMPLEXITY — Code Simplicity & Maintainability +- Are there overly complex functions? (too many branches, deep nesting) +- Is the code DRY? (duplicated logic across files) +- Are responsibilities well-separated? (single responsibility principle) +- Are there magic numbers or hardcoded values that should be constants? +- Is error handling consistent and predictable? + +### 6. STRUCTURE — Architecture & Organization +- Is the project structure logical and navigable? +- Are dependencies well-managed? (circular imports, tight coupling) +- Is there a clear separation of concerns? (routes vs business logic vs data access) +- Are interfaces/contracts well-defined? + +## Input + +You will receive: + +1. **SOURCE CODE** — the full codebase being reviewed +2. **TOOL FINDINGS** — structured findings from static analysis tools +3. **CRITICAL FINDINGS** — high-priority critical code sections already identified + +## Instructions + +1. Read the entire codebase to understand architecture and patterns +2. Evaluate each dimension above +3. Cross-reference with tool findings and critical findings for supporting evidence +4. For each issue, cite the EXACT file and line range with the relevant code +5. Provide ONE specific, actionable recommendation per issue + +## Output Format + +Return **ONLY** a JSON object. No markdown fences, no explanation, no preamble. + +```json +{ + "overall_summary": "2-3 sentence high-level assessment of the codebase structure", + "dimensions": [ + { + "dimension": "LOGGING | MEASURABILITY | SCALABILITY | EFFICIENCY | COMPLEXITY | STRUCTURE", + "rating": "GOOD | NEEDS_IMPROVEMENT | POOR", + "summary": "One-line assessment of this dimension", + "findings": [ + { + "file": "relative/path/to/file.py", + "start_line": 10, + "end_line": 25, + "highlight_lines": [14, 15, 22], + "issue": "One-line description of the specific problem", + "recommendation": "One-line actionable fix", + "code_block": "the exact source code lines" + } + ] + } + ] +} +``` + +### `highlight_lines` field: +- An array of **absolute line numbers** (matching the file) that are the specific problematic lines within the code block +- These are the lines that are the root cause or most critical part of the issue +- Must be a subset of the range `[start_line, end_line]` +- If the entire block is equally problematic, include all line numbers in the range + +## Rules + +- Return ALL 6 dimensions, even if rating is GOOD (with empty findings array) +- Keep `summary` to ONE sentence per dimension +- Keep `issue` and `recommendation` to ONE sentence each +- `code_block` must be the **exact** source lines, not paraphrased +- `findings` array can be empty `[]` for dimensions rated GOOD +- Sort findings within each dimension by severity (worst first) +- Do NOT flag trivial style issues (those belong in linting, not structure critique) +- Focus on issues that affect **production readiness, reliability, and maintainability** +- Be specific: "Add request_id to the log in auth_handler line 45" not "improve logging" +- Limit to the **top 5 most impactful findings per dimension** to keep the report scannable + +## SOURCE CODE + +INSERT_SOURCE_CODE + +## TOOL FINDINGS + +INSERT_TOOL_FINDINGS + +## CRITICAL FINDINGS + +INSERT_CRITICAL_FINDINGS diff --git a/scripts/aidlc-codereview/src/code_reviewer/prompts/wrapper-generator-v1.md b/scripts/aidlc-codereview/src/code_reviewer/prompts/wrapper-generator-v1.md new file mode 100644 index 00000000..a8bc8812 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/prompts/wrapper-generator-v1.md @@ -0,0 +1,78 @@ +--- +template: wrapper-generator +version: 1 +--- + +# Tool Wrapper Generator + +You are an expert Python developer generating a tool wrapper module for the AIDLC Code Reviewer static analysis framework. + +## Your Task + +Generate a **complete** Python module that wraps the CLI tool described below. The module must follow the exact patterns shown in the examples. Output ONLY the Python code wrapped in a ```python code block. + +--- + +## Data Models (common/models.py) + +These are the data classes your wrapper must use: + + + +--- + +## Utility Functions (common/utils.py) + +Use these helpers for running commands and checking tool availability: + + + +--- + +## Severity Classification Policy + +Follow this policy strictly when mapping tool-native severities: + + + +--- + +## Example Wrappers + +Study these examples carefully. Your generated wrapper must follow the same structure and conventions: + + + +--- + +## Tool to Wrap + +Generate a wrapper for this tool: + + + +--- + +## Tool Documentation + + + +--- + +## Output Requirements + +1. Generate a **COMPLETE** Python module (not a snippet or partial code) +2. Must define module-level constants: `CATEGORY`, `TOOL` or `TOOL_NAME`, `SUPPORTED_LANGUAGES` +3. Must define: `def run(target: Path) -> ToolResult` +4. Import from `common.models` (Finding, Severity, ToolResult) and `common.utils` (run_command, check_tool_installed) +5. Follow the severity mapping policy strictly -- non-security categories cap at MEDIUM +6. Handle gracefully: tool not installed, parse errors, empty output, command timeouts +7. Return `ToolResult(success=False, error=...)` on any failure +8. Parse the tool's actual CLI output format to extract findings +9. Wrap your entire response in a single ```python ... ``` code block + +## CRITICAL Rules + +- **Never import the tool as a Python module.** Always invoke it as a subprocess via `run_command()`. Do NOT use `import pylint`, `import flake8`, etc. +- **Only use CLI flags you are 100% certain exist.** Use the bare minimum flags needed: the output format flag and the target path. Do NOT guess or invent flags. If you are unsure whether a flag exists, leave it out. +- **Keep the CLI invocation simple.** For example, for pylint: `["pylint", "--output-format=json", str(target)]` — nothing more. diff --git a/scripts/aidlc-codereview/src/code_reviewer/review-config.yaml b/scripts/aidlc-codereview/src/code_reviewer/review-config.yaml new file mode 100644 index 00000000..d59b5b3c --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/review-config.yaml @@ -0,0 +1,21 @@ +# AIDLC Code Reviewer — Tool Configuration +# Add tool names to enable them. The agent auto-generates a wrapper +# for each tool on first run (requires Amazon Bedrock access). + +tools: + # Python + - bandit + # - ruff + # - mypy + # - radon + # - vulture + # - semgrep + # - pylint + # - gitleaks + - flake8 + - pyflakes + + # Java + # - checkstyle + # - javac + # - pmd \ No newline at end of file diff --git a/scripts/aidlc-codereview/src/code_reviewer/runner.py b/scripts/aidlc-codereview/src/code_reviewer/runner.py new file mode 100644 index 00000000..5e795895 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/runner.py @@ -0,0 +1,388 @@ +"""Unified CLI entry point for AIDLC Code Reviewer. + +Usage: + aidlc-code-reviewer [--config path] [--output-dir path] [--verbose] + aidlc-code-reviewer --technical-report # technical report only + aidlc-code-reviewer --business-report # business logic report only +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import argparse +import logging +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime, timezone +from pathlib import Path + +from code_reviewer.common.config import load_config +from code_reviewer.common.language_detector import detect_languages +from code_reviewer.common.models import BusinessLogicReview, CodeStructureCritique, CriticalFinding, SkipRecord, ToolResult +from code_reviewer.common.output import set_verbose, vprint +from code_reviewer.common.report import ( + generate_business_logic_html, + generate_business_logic_markdown, + generate_html, + generate_markdown, + generate_summary_html, +) +from code_reviewer.common.spinner import Spinner +from code_reviewer.tools.registry import get_wrapper, get_supported_languages + + +def _run_critical_findings(target: Path, results: list[ToolResult]) -> list[CriticalFinding]: + """Run the critical findings agent after all tools complete. + + Returns empty list on failure (non-blocking). + """ + try: + from code_reviewer.agent.critical_findings_agent import CriticalFindingsAgent + + vprint("\n--- Critical Code Findings Analysis ---", flush=True) + agent = CriticalFindingsAgent() + return agent.execute(target=target, results=results) + except ImportError: + print(" Agent dependencies not installed, skipping critical findings.", file=sys.stderr) + return [] + except Exception as exc: + print(f" Critical findings analysis error: {exc}", file=sys.stderr) + return [] + + +def _run_code_structure_critique( + target: Path, + results: list[ToolResult], + critical_findings: list[CriticalFinding], +) -> CodeStructureCritique | None: + """Run the code structure critique agent. + + Returns None on failure (non-blocking). + """ + try: + from code_reviewer.agent.code_structure_agent import CodeStructureAgent + + vprint("\n--- Code Structure Critique ---", flush=True) + agent = CodeStructureAgent() + return agent.execute(target=target, results=results, critical_findings=critical_findings) + except ImportError: + print(" Agent dependencies not installed, skipping structure critique.", file=sys.stderr) + return None + except Exception as exc: + print(f" Structure critique error: {exc}", file=sys.stderr) + return None + + +def _run_business_logic_review(target: Path) -> BusinessLogicReview | None: + """Run the business logic review agent. + + Returns None on failure (non-blocking). + """ + try: + from code_reviewer.agent.business_logic_agent import BusinessLogicAgent + + vprint("\n--- Business Logic Review ---", flush=True) + agent = BusinessLogicAgent() + return agent.execute(target=target) + except ImportError: + print(" Agent dependencies not installed, skipping business logic review.", file=sys.stderr) + return None + except Exception as exc: + print(f" Business logic review error: {exc}", file=sys.stderr) + return None + + +def _run_single_tool( + tool_cfg, + target: Path, + detected: set[str], + no_generate: bool, +) -> ToolResult | SkipRecord: + """Run a single tool's full pipeline: generate wrapper if needed, check language, execute. + + Returns either a ToolResult (success or tool error) or a SkipRecord. + """ + wrapper = get_wrapper(tool_cfg.name) + cached_result: ToolResult | None = None + + if wrapper is None and not no_generate: + vprint(f" Generating wrapper for '{tool_cfg.name}'...", flush=True) + try: + from code_reviewer.agent.wrapper_generator import WrapperGeneratorAgent + from code_reviewer.agent.models import GenerationStatus + + gen_agent = WrapperGeneratorAgent() + gen_result = gen_agent.execute(tool_config=tool_cfg, target=target) + if gen_result.status == GenerationStatus.SUCCESS: + wrapper = get_wrapper(tool_cfg.name) + cached_result = gen_result.tool_result + vprint(f" Generated wrapper for '{tool_cfg.name}'", flush=True) + else: + print(f" Generation failed for '{tool_cfg.name}': {gen_result.error}", file=sys.stderr) + except ImportError: + print(" Agent dependencies not installed, skipping generation.", file=sys.stderr) + except Exception as exc: + print(f" Generation error for '{tool_cfg.name}': {exc}", file=sys.stderr) + + category = getattr(wrapper, "CATEGORY", None) or tool_cfg.category or "unknown" + + if wrapper is None: + return SkipRecord( + tool=tool_cfg.name, + category=category, + reason=f"No wrapper for '{tool_cfg.name}'", + ) + + supported = get_supported_languages(tool_cfg.name) + if "*" not in supported and not (set(supported) & detected): + reason = f"No {', '.join(supported)} files detected" + vprint(f" Skipping {tool_cfg.name} — {reason}", flush=True) + return SkipRecord(tool=tool_cfg.name, category=category, reason=reason) + + if cached_result is not None: + vprint(f" Running {tool_cfg.name}...", flush=True) + result = cached_result + else: + vprint(f" Running {tool_cfg.name}...", flush=True) + try: + result = wrapper.run(target) + except Exception as exc: + result = ToolResult( + tool=tool_cfg.name, + category=category, + success=False, + error=str(exc), + ) + + if not result.success: + reason = result.error or "Tool returned an error" + vprint(f" Skipping {tool_cfg.name} — {reason}", flush=True) + return SkipRecord(tool=tool_cfg.name, category=category, reason=reason) + + return result + + +def run_review( + target: Path, + config_path: Path | None = None, + no_generate: bool = False, +) -> tuple[list[ToolResult], list[SkipRecord], set[str]]: + """Run all configured tools against target in parallel, returning results and skip records.""" + config = load_config(config_path) + detected = detect_languages(target) + + if not detected: + print("Warning: No recognized programming languages detected in target.", file=sys.stderr) + + results: list[ToolResult] = [] + skipped: list[SkipRecord] = [] + + with ThreadPoolExecutor() as executor: + futures = { + executor.submit(_run_single_tool, tool_cfg, target, detected, no_generate): tool_cfg + for tool_cfg in config.tools + } + for future in as_completed(futures): + outcome = future.result() + if isinstance(outcome, SkipRecord): + skipped.append(outcome) + else: + results.append(outcome) + + return results, skipped, detected + + +def main() -> None: + parser = argparse.ArgumentParser( + prog="aidlc-code-reviewer", + description="AIDLC Code Reviewer — automated code quality analysis.", + ) + parser.add_argument("target", type=Path, nargs="?", default=None, help="Path to directory or file to analyze") + parser.add_argument( + "-c", "--config", type=Path, default=None, + help="Path to review-config.yaml (default: built-in config)", + ) + parser.add_argument( + "-o", "--output-dir", type=Path, default=None, + help="Output directory for reports (default: ./reports/)", + ) + parser.add_argument( + "--no-generate", action="store_true", default=False, + help="Skip auto-generation of missing tool wrappers", + ) + parser.add_argument( + "--preflight", action="store_true", default=False, + help="Run pre-flight checks for agent setup, then exit", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", default=False, + help="Show detailed progress output for each tool and agent step", + ) + parser.add_argument( + "--technical-report", action="store_true", default=False, + help="Generate only the technical report (static tools + critical findings + structure critique)", + ) + parser.add_argument( + "--business-report", action="store_true", default=False, + help="Generate only the business logic review report (AI-driven, no static tools)", + ) + args = parser.parse_args() + + # Configure verbose output and logging + set_verbose(args.verbose) + logging.basicConfig( + level=logging.INFO if args.verbose else logging.ERROR, + format="%(message)s", + ) + + # Pre-flight check mode + if args.preflight: + try: + from code_reviewer.agent.preflight import run_preflight + ok = run_preflight(config_path=args.config) + sys.exit(0 if ok else 1) + except ImportError: + print("Error: Agent packages not installed. Run: pip install -e .", file=sys.stderr) + sys.exit(1) + + if args.target is None: + parser.error("the following arguments are required: target") + + target = args.target.resolve() + if not target.exists(): + print(f"Error: target not found: {target}", file=sys.stderr) + sys.exit(1) + + # Determine which reports to generate. + # Default (no flags): both. If either flag is set, only that report type. + run_technical = not args.business_report or args.technical_report + run_business = not args.technical_report or args.business_report + # If both flags are explicitly set, run both (same as default). + if args.technical_report and args.business_report: + run_technical = True + run_business = True + + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + ts_file = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + + print("Activating AIDLC Code Reviewer...") + vprint(f" Target: {target}") + + results: list[ToolResult] = [] + skipped: list[SkipRecord] = [] + detected: set[str] = set() + critical_findings: list[CriticalFinding] = [] + structure_critique: CodeStructureCritique | None = None + business_logic_review: BusinessLogicReview | None = None + + # --- Technical report pipeline --- + if run_technical: + with Spinner("Running code review"): + results, skipped, detected = run_review(target, args.config, no_generate=args.no_generate) + + total_findings = sum(len(r.findings) for r in results) + print(f" Tools run: {len(results)}, Skipped: {len(skipped)}, " + f"Findings: {total_findings}") + + with Spinner("Analyzing critical findings"): + critical_findings = _run_critical_findings(target, results) + + with Spinner("Generating structure critique"): + structure_critique = _run_code_structure_critique(target, results, critical_findings) + else: + # Still need detected languages for the business report header + detected = detect_languages(target) + + # --- Business logic report pipeline --- + if run_business: + with Spinner("Analyzing business logic"): + business_logic_review = _run_business_logic_review(target) + + # --- Write reports --- + output_dir = args.output_dir or Path("reports") + output_dir.mkdir(parents=True, exist_ok=True) + + # Build filenames + summary_html_name = f"code_review_summary_{ts_file}.html" + tech_html_name = f"code_review_technical_{ts_file}.html" + tech_md_name = f"code_review_technical_{ts_file}.md" + biz_html_name = f"code_review_business_{ts_file}.html" + biz_md_name = f"code_review_business_{ts_file}.md" + + # When both reports are generated, remove critical findings already covered + # by the business logic report to avoid duplicate reporting. + tech_critical_findings = critical_findings + if business_logic_review and critical_findings: + biz_files = { + (Path(blf.file).name, blf.start_line) + for blf in business_logic_review.findings + } + tech_critical_findings = [ + cf for cf in critical_findings + if (Path(cf.file).name, cf.start_line) not in biz_files + ] + + with Spinner("Writing reports"): + if run_technical: + md_path = output_dir / tech_md_name + html_path = output_dir / tech_html_name + + md_content = generate_markdown(target, results, skipped, timestamp, detected, tech_critical_findings, structure_critique) + tech_sibling = (biz_html_name, "Business Logic Report") if run_business else None + html_content = generate_html( + target, results, skipped, timestamp, detected, + tech_critical_findings, structure_critique, + summary_filename=summary_html_name, + sibling_report=tech_sibling, + ) + + md_path.write_text(md_content) + html_path.write_text(html_content) + + if run_business and business_logic_review: + biz_md_path = output_dir / biz_md_name + biz_html_path = output_dir / biz_html_name + + biz_md_content = generate_business_logic_markdown(target, timestamp, detected, business_logic_review) + biz_sibling = (tech_html_name, "Technical Report") if run_technical else None + biz_html_content = generate_business_logic_html( + target, timestamp, detected, business_logic_review, + summary_filename=summary_html_name, + sibling_report=biz_sibling, + ) + + biz_md_path.write_text(biz_md_content) + biz_html_path.write_text(biz_html_content) + + # Summary entry page (always generated) + summary_path = output_dir / summary_html_name + summary_content = generate_summary_html( + target, timestamp, detected, + technical_filename=tech_html_name if run_technical else None, + business_filename=biz_html_name if (run_business and business_logic_review) else None, + results=results if run_technical else None, + critical_findings=critical_findings if run_technical else None, + code_structure_critique=structure_critique if run_technical else None, + business_logic_review=business_logic_review if run_business else None, + ) + summary_path.write_text(summary_content) + + # --- Print summary --- + if run_technical: + print(f" Critical sections: {len(critical_findings)}") + if run_business and business_logic_review: + print(f" Business logic findings: {len(business_logic_review.findings)}, " + f"Consistency issues: {len(business_logic_review.consistency_issues)}") + print() + print(" Reports:") + print(f" \u2192 Start here: {summary_path}") + if run_technical: + print(f" Technical (Markdown): {md_path}") + print(f" Technical (HTML): {html_path}") + if run_business and business_logic_review: + print(f" Business Logic (Markdown): {biz_md_path}") + print(f" Business Logic (HTML): {biz_html_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/aidlc-codereview/src/code_reviewer/tools/__init__.py b/scripts/aidlc-codereview/src/code_reviewer/tools/__init__.py new file mode 100644 index 00000000..e2a8cf79 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/tools/__init__.py @@ -0,0 +1,4 @@ +"""Tool wrappers for AIDLC Code Reviewer.""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 diff --git a/scripts/aidlc-codereview/src/code_reviewer/tools/registry.py b/scripts/aidlc-codereview/src/code_reviewer/tools/registry.py new file mode 100644 index 00000000..ba691339 --- /dev/null +++ b/scripts/aidlc-codereview/src/code_reviewer/tools/registry.py @@ -0,0 +1,83 @@ +"""Tool registry — maps tool names to their wrapper modules. + +The registry provides a lookup from config tool names to the Python modules +that implement the `run(target: Path) -> ToolResult` interface. + +Wrappers are discovered dynamically from tools/.py files on disk. +No hardcoded imports — the agent generates wrappers as needed. +""" + +# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +from __future__ import annotations + +import importlib.util +import sys +import threading +from pathlib import Path +from types import ModuleType + +TOOL_REGISTRY: dict[str, ModuleType] = {} + +_TOOLS_DIR = Path(__file__).resolve().parent +_registry_lock = threading.Lock() + + +def _try_load_from_disk(tool_name: str) -> ModuleType | None: + """Try to load a wrapper from tools/.py.""" + safe_name = tool_name.replace("-", "_").replace(" ", "_") + wrapper_path = _TOOLS_DIR / f"{safe_name}.py" + if not wrapper_path.exists(): + return None + try: + module_name = f"tools.{safe_name}" + spec = importlib.util.spec_from_file_location(module_name, str(wrapper_path)) + if spec is None or spec.loader is None: + return None + module = importlib.util.module_from_spec(spec) + project_root = str(_TOOLS_DIR.parent) + if project_root not in sys.path: + sys.path.insert(0, project_root) + spec.loader.exec_module(module) + TOOL_REGISTRY[tool_name] = module + return module + except Exception: + return None + + +def get_wrapper(tool_name: str) -> ModuleType | None: + """Look up a tool wrapper module by name. + + Checks the in-memory registry first, then looks for a wrapper file + on disk (tools/.py). + """ + with _registry_lock: + wrapper = TOOL_REGISTRY.get(tool_name) + if wrapper is not None: + return wrapper + return _try_load_from_disk(tool_name) + + +def get_supported_languages(tool_name: str) -> list[str]: + """Return the supported languages for a tool by reading its SUPPORTED_LANGUAGES. + + Falls back to ["*"] if the wrapper doesn't declare SUPPORTED_LANGUAGES. + """ + with _registry_lock: + wrapper = TOOL_REGISTRY.get(tool_name) + if wrapper and hasattr(wrapper, "SUPPORTED_LANGUAGES"): + return wrapper.SUPPORTED_LANGUAGES + return ["*"] + + +def register_wrapper(tool_name: str, module: ModuleType) -> None: + """Register a dynamically generated wrapper module.""" + with _registry_lock: + TOOL_REGISTRY[tool_name] = module + + +def is_registered(tool_name: str) -> bool: + """Check if a tool has a registered wrapper.""" + with _registry_lock: + return tool_name in TOOL_REGISTRY