diff --git a/hodor/agent.py b/hodor/agent.py index 6fce9fc..fd1caa3 100644 --- a/hodor/agent.py +++ b/hodor/agent.py @@ -15,8 +15,21 @@ from openhands.sdk.event import Event from openhands.sdk.workspace import LocalWorkspace -from .github import GitHubAPIError, fetch_github_pr_info, normalize_github_metadata -from .gitlab import GitLabAPIError, fetch_gitlab_mr_info, post_gitlab_mr_comment +from .github import ( + GitHubAPIError, + create_github_pr_comment, + fetch_github_pr_info, + find_hodor_comment_github, + normalize_github_metadata, + update_github_pr_comment, +) +from .gitlab import ( + GitLabAPIError, + fetch_gitlab_mr_info, + find_hodor_comment, + post_gitlab_mr_comment, + update_gitlab_mr_comment, +) from .llm import create_hodor_agent, get_api_key from .prompts.pr_review_prompt import build_pr_review_prompt from .skills import discover_skills @@ -88,18 +101,62 @@ def parse_pr_url(pr_url: str) -> tuple[str, str, int, str]: ) +def _compute_diff_hash(workspace: Path, diff_base_sha: str | None, target_branch: str) -> str: + """Compute SHA-256 hash of the diff for deduplication.""" + base_ref = diff_base_sha or f"origin/{target_branch}" + result = subprocess.run( + ["git", "diff", base_ref, "HEAD"], + capture_output=True, + text=True, + cwd=str(workspace), + ) + if result.returncode != 0: + return "" # Can't compute hash, don't skip + import hashlib + + return hashlib.sha256(result.stdout.encode()).hexdigest() + + +def _extract_diff_hash(body: str) -> str | None: + """Extract diff hash from a Hodor comment footer.""" + import re + + match = re.search(r"", body) + return match.group(1) if match else None + + +def _find_existing_hodor_comment( + platform: Platform, + owner: str, + repo: str, + pr_number: int, + host: str | None = None, +) -> dict[str, Any] | None: + """Find the existing Hodor comment on a PR/MR, regardless of platform.""" + if platform == "gitlab": + return find_hodor_comment(owner, repo, pr_number, host=host) + elif platform == "github": + return find_hodor_comment_github(owner, repo, pr_number) + return None + + def post_review_comment( pr_url: str, review_text: str, model: str | None = None, + diff_hash: str | None = None, ) -> dict[str, Any]: """ Post a review comment on a GitHub PR or GitLab MR using CLI tools. + Uses edit-in-place: if a previous Hodor comment exists, updates it + instead of creating a new one. + Args: pr_url: URL of the pull request or merge request review_text: The review text to post as a comment model: LLM model used for the review (optional, for transparency) + diff_hash: SHA-256 hash of the diff (optional, embedded in footer for skip logic) Returns: Dictionary with comment posting result @@ -112,45 +169,48 @@ def post_review_comment( except ValueError as e: return {"success": False, "error": str(e)} - # Append model information to review text for transparency + # Build footer with model info and optional diff hash + footer_parts = [] if model: - review_text_with_footer = f"{review_text}\n\n---\n\n*Review generated by Hodor using `{model}`*" + footer_parts.append(f"*Review generated by Hodor using `{model}`*") else: - review_text_with_footer = review_text + footer_parts.append("*Review generated by Hodor*") + if diff_hash: + footer_parts.append(f"") + + review_text_with_footer = f"{review_text}\n\n---\n\n" + "\n".join(footer_parts) try: + # Try to find an existing Hodor comment for edit-in-place + existing = _find_existing_hodor_comment(platform, owner, repo, pr_number, host=host) + if platform == "github": - # Use gh CLI to post comment - subprocess.run( - [ - "gh", - "pr", - "review", - str(pr_number), - "--repo", - f"{owner}/{repo}", - "--comment", - "--body", - review_text_with_footer, - ], - check=True, - capture_output=True, - text=True, - ) - logger.info(f"Successfully posted review to GitHub PR #{pr_number}") - return {"success": True, "platform": "github", "pr_number": pr_number} + if existing: + # Update existing comment + update_github_pr_comment(owner, repo, existing["id"], review_text_with_footer) + logger.info(f"Successfully updated existing review on GitHub PR #{pr_number}") + return {"success": True, "platform": "github", "pr_number": pr_number, "updated": True} + else: + # Create new issue comment (not review comment, so we can edit later) + create_github_pr_comment(owner, repo, pr_number, review_text_with_footer) + logger.info(f"Successfully posted review to GitHub PR #{pr_number}") + return {"success": True, "platform": "github", "pr_number": pr_number} elif platform == "gitlab": - # Use glab CLI to post comment - post_gitlab_mr_comment( - owner, - repo, - pr_number, - review_text_with_footer, - host=host, - ) - logger.info(f"Successfully posted review to GitLab MR !{pr_number} on {owner}/{repo}") - return {"success": True, "platform": "gitlab", "mr_number": pr_number} + if existing: + # Update existing comment + update_gitlab_mr_comment( + owner, repo, pr_number, existing["id"], review_text_with_footer, host=host, + ) + logger.info(f"Successfully updated existing review on GitLab MR !{pr_number}") + return {"success": True, "platform": "gitlab", "mr_number": pr_number, "updated": True} + else: + # Create new comment + post_gitlab_mr_comment( + owner, repo, pr_number, review_text_with_footer, host=host, + ) + logger.info(f"Successfully posted review to GitLab MR !{pr_number} on {owner}/{repo}") + return {"success": True, "platform": "gitlab", "mr_number": pr_number} else: return {"success": False, "error": f"Unsupported platform: {platform}"} @@ -181,7 +241,9 @@ def review_pr( output_format: str = "markdown", max_iterations: int = 500, model_canonical_name: str | None = None, -) -> str: + post: bool = False, + skip_if_unchanged: bool = True, +) -> str | tuple[str, str] | None: """ Review a pull request using OpenHands agent with bash tools. @@ -198,9 +260,13 @@ def review_pr( workspace_dir: Directory to use for workspace (if None, creates temp dir). Reuses if same repo. output_format: Output format - "markdown" or "json" (default: "markdown") max_iterations: Maximum number of agent iterations (default: 500, use -1 for unlimited) + post: Whether the review will be posted (enables diff-hash skip logic) + skip_if_unchanged: Skip review if diff hash matches existing comment (default: True) Returns: - Review text as string (format depends on output_format) + - None if review was skipped (diff unchanged) + - (review_text, diff_hash) tuple when post=True + - review_text string when post=False Raises: ValueError: If URL is invalid @@ -240,6 +306,21 @@ def review_pr( logger.error(f"Failed to setup workspace: {e}") raise RuntimeError(f"Failed to setup workspace: {e}") from e + # Compute diff hash for deduplication (before expensive agent creation) + diff_hash = _compute_diff_hash(workspace, diff_base_sha, target_branch) + + # If posting and skip_if_unchanged, check if diff matches existing comment + if post and skip_if_unchanged and diff_hash: + try: + existing = _find_existing_hodor_comment(platform, owner, repo, pr_number, host=host) + if existing and f"hodor:diff-hash:sha256:{diff_hash}" in existing.get("body", ""): + logger.info("Diff unchanged since last review — skipping") + if workspace and cleanup: + cleanup_workspace(workspace) + return None + except Exception as e: + logger.warning(f"Failed to check existing comment for skip logic: {e}") + # Discover repository skills (from .cursorrules, agents.md, .hodor/skills/) skills = [] try: @@ -449,6 +530,8 @@ def on_event(event: Any) -> None: except Exception as e: logger.warning(f"Failed to get metrics: {e}") + if post: + return (review_content, diff_hash) return review_content except Exception as e: diff --git a/hodor/cli.py b/hodor/cli.py index fdb228e..7e82754 100644 --- a/hodor/cli.py +++ b/hodor/cli.py @@ -123,6 +123,12 @@ def parse_llm_args(ctx, param, value): help="Canonical model name for feature detection (e.g. 'claude-opus-4-6'). " "Required for opaque ARN-based models to enable prompt caching.", ) +@click.option( + "--skip-if-unchanged/--no-skip-if-unchanged", + default=True, + help="Skip review if the diff hasn't changed since last posted review (default: enabled). " + "Only applies when --post is used.", +) def main( pr_url: str, model: str, @@ -138,6 +144,7 @@ def main( max_iterations: int, ultrathink: bool, model_canonical_name: str | None, + skip_if_unchanged: bool, ): """ Review a GitHub pull request or GitLab merge request using AI. @@ -252,11 +259,23 @@ def main( output_format="json" if output_json else "markdown", max_iterations=max_iterations, model_canonical_name=model_canonical_name, + post=post, + skip_if_unchanged=skip_if_unchanged, ) progress.update(task, description="Review complete!") progress.stop() + # Handle skip (diff unchanged) + if review_output is None: + console.print("\n[bold green]Review skipped — diff unchanged since last review[/bold green]") + return + + # Unpack diff_hash when in post mode + diff_hash = None + if isinstance(review_output, tuple): + review_output, diff_hash = review_output + # Display result if post: # Post to PR/MR (always as markdown, never raw JSON) @@ -275,6 +294,7 @@ def main( pr_url=pr_url, review_text=review_text, model=model, + diff_hash=diff_hash, ) if result.get("success"): diff --git a/hodor/github.py b/hodor/github.py index 76a56f1..e06d7d8 100644 --- a/hodor/github.py +++ b/hodor/github.py @@ -91,6 +91,83 @@ def normalize_github_metadata(raw: dict[str, Any]) -> dict[str, Any]: return metadata +def find_hodor_comment_github( + owner: str, + repo: str, + pr_number: str | int, +) -> dict[str, Any] | None: + """Find the existing Hodor review comment on a GitHub PR.""" + + try: + result = subprocess.run( + [ + "gh", "api", + f"repos/{owner}/{repo}/issues/{pr_number}/comments", + "--paginate", + "--jq", + '.[] | select(.body | contains("Review generated by Hodor")) | {id, body}', + ], + capture_output=True, + text=True, + ) + except Exception as exc: + logger.warning(f"Failed to search for existing Hodor comment: {exc}") + return None + + if result.returncode != 0 or not result.stdout.strip(): + return None + + lines = [line for line in result.stdout.strip().split("\n") if line.strip()] + if not lines: + return None + + try: + return json.loads(lines[-1]) + except json.JSONDecodeError: + logger.warning("Failed to parse Hodor comment JSON from gh api output") + return None + + +def update_github_pr_comment( + owner: str, + repo: str, + comment_id: int, + body: str, +) -> None: + """Update an existing comment on a GitHub PR.""" + + subprocess.run( + [ + "gh", "api", "--method", "PATCH", + f"repos/{owner}/{repo}/issues/comments/{comment_id}", + "-f", f"body={body}", + ], + check=True, + capture_output=True, + text=True, + ) + + +def create_github_pr_comment( + owner: str, + repo: str, + pr_number: str | int, + body: str, +) -> None: + """Create a new issue comment on a GitHub PR.""" + + subprocess.run( + [ + "gh", "api", "--method", "POST", + f"repos/{owner}/{repo}/issues/{pr_number}/comments", + "-f", f"body={body}", + ], + check=True, + capture_output=True, + text=True, + ) + + def _github_comments_to_notes( comments: dict[str, Any] | list[dict[str, Any]] | None, ) -> list[dict[str, Any]]: diff --git a/hodor/gitlab.py b/hodor/gitlab.py index 32156f6..b18c8b7 100644 --- a/hodor/gitlab.py +++ b/hodor/gitlab.py @@ -165,6 +165,55 @@ def post_gitlab_mr_comment( return note.attributes +def find_hodor_comment( + owner: str, + repo: str, + mr_number: str | int, + host: str | None = None, +) -> dict[str, Any] | None: + """Find the existing Hodor review comment on a GitLab MR, if any.""" + + client = _create_gitlab_client(host) + project = _get_project(client, owner, repo) + mr = _get_merge_request(project, mr_number) + + try: + notes = mr.notes.list(all=True, sort="desc", per_page=100) + except gitlab_exceptions.GitlabError as exc: + logger.warning(f"Failed to list notes for MR !{mr_number}: {exc}") + return None + + for note in notes: + if "Review generated by Hodor" in (note.body or ""): + return {"id": note.id, "body": note.body} + return None + + +def update_gitlab_mr_comment( + owner: str, + repo: str, + mr_number: str | int, + note_id: int, + body: str, + *, + host: str | None = None, +) -> dict[str, Any]: + """Update an existing note on a GitLab merge request.""" + + client = _create_gitlab_client(host) + project = _get_project(client, owner, repo) + mr = _get_merge_request(project, mr_number) + + try: + note = mr.notes.get(note_id) + note.body = body + note.save() + except gitlab_exceptions.GitlabError as exc: + raise GitLabAPIError(f"Failed to update note {note_id} on MR !{mr_number}: {exc}") from exc + + return note.attributes + + def summarize_gitlab_notes( notes: list[dict[str, Any]] | None, *, diff --git a/hodor/prompts/templates/default_review.md b/hodor/prompts/templates/default_review.md index be3f2e4..3b9188b 100644 --- a/hodor/prompts/templates/default_review.md +++ b/hodor/prompts/templates/default_review.md @@ -158,6 +158,10 @@ Total issues: X critical, Y important, Z minor. *Correct = existing code won't break, no bugs, free of blocking issues.* ``` +### Clean Review (No Issues Found) + +If you find zero qualifying issues, omit the "Issues Found" section entirely (do NOT list empty categories with "None"). Jump straight to the Summary and Overall Verdict. + ### Formatting Rules - Brief: 1 paragraph max per finding, no unnecessary line breaks diff --git a/tests/test_agent_utils.py b/tests/test_agent_utils.py index 930c42e..cb9df51 100644 --- a/tests/test_agent_utils.py +++ b/tests/test_agent_utils.py @@ -1,6 +1,9 @@ +import hashlib +from unittest.mock import patch, MagicMock + import pytest -from hodor.agent import detect_platform, parse_pr_url +from hodor.agent import detect_platform, parse_pr_url, _compute_diff_hash, _extract_diff_hash @pytest.mark.parametrize( @@ -35,3 +38,80 @@ def test_parse_pr_url_gitlab_subgroups(): def test_parse_pr_url_invalid(): with pytest.raises(ValueError): parse_pr_url("https://gitlab.com/foo/bar/issues/1") + + +# --- _extract_diff_hash tests --- + + +def test_extract_diff_hash_present(): + body = ( + "Some review text\n\n---\n\n" + "*Review generated by Hodor using `some-model`*\n" + "" + ) + assert _extract_diff_hash(body) == "abcdef1234567890" + + +def test_extract_diff_hash_missing(): + body = "Some review text\n\n---\n\n*Review generated by Hodor using `some-model`*" + assert _extract_diff_hash(body) is None + + +def test_extract_diff_hash_empty(): + assert _extract_diff_hash("") is None + + +# --- _compute_diff_hash tests --- + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_with_diff_base_sha(mock_run): + diff_content = "diff --git a/foo.py b/foo.py\n+hello\n" + mock_run.return_value = MagicMock(returncode=0, stdout=diff_content) + + from pathlib import Path + result = _compute_diff_hash(Path("/tmp/workspace"), "abc123", "main") + + assert result == hashlib.sha256(diff_content.encode()).hexdigest() + mock_run.assert_called_once_with( + ["git", "diff", "abc123", "HEAD"], + capture_output=True, text=True, cwd="/tmp/workspace", + ) + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_falls_back_to_target_branch(mock_run): + diff_content = "diff --git a/bar.py b/bar.py\n+world\n" + mock_run.return_value = MagicMock(returncode=0, stdout=diff_content) + + from pathlib import Path + result = _compute_diff_hash(Path("/tmp/workspace"), None, "develop") + + assert result == hashlib.sha256(diff_content.encode()).hexdigest() + mock_run.assert_called_once_with( + ["git", "diff", "origin/develop", "HEAD"], + capture_output=True, text=True, cwd="/tmp/workspace", + ) + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_returns_empty_on_failure(mock_run): + mock_run.return_value = MagicMock(returncode=128, stdout="") + + from pathlib import Path + result = _compute_diff_hash(Path("/tmp/workspace"), None, "main") + + assert result == "" + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_consistent(mock_run): + """Same diff content should always produce the same hash.""" + diff_content = "diff --git a/x.py b/x.py\n+consistent\n" + mock_run.return_value = MagicMock(returncode=0, stdout=diff_content) + + from pathlib import Path + hash1 = _compute_diff_hash(Path("/tmp/w1"), "sha1", "main") + hash2 = _compute_diff_hash(Path("/tmp/w2"), "sha2", "main") + + assert hash1 == hash2