From 4aab2d6e3b53c2685698da97a839b16072a4ddce Mon Sep 17 00:00:00 2001 From: Karan Sharma Date: Wed, 18 Feb 2026 16:36:04 +0530 Subject: [PATCH] Add edit-in-place comments and diff-hash skip for CI pipelines When running in CI (GitLab CI / GitHub Actions), hodor now: - Updates the existing review comment instead of creating a new one - Computes a SHA-256 hash of the diff and embeds it in the comment footer - Skips the review entirely if the diff hasn't changed since the last review This eliminates stale comment trails after force pushes and avoids wasted LLM costs on unchanged diffs. GitHub comments switch from review comments (gh pr review) to issue comments (gh api) to enable editing. Adds --skip-if-unchanged/--no-skip-if-unchanged CLI flag (default: on). Also instructs the review template to omit empty issue categories when no bugs are found. --- hodor/agent.py | 155 +++++++++++++++++----- hodor/cli.py | 20 +++ hodor/github.py | 77 +++++++++++ hodor/gitlab.py | 49 +++++++ hodor/prompts/templates/default_review.md | 4 + tests/test_agent_utils.py | 82 +++++++++++- 6 files changed, 350 insertions(+), 37 deletions(-) diff --git a/hodor/agent.py b/hodor/agent.py index 6fce9fc..fd1caa3 100644 --- a/hodor/agent.py +++ b/hodor/agent.py @@ -15,8 +15,21 @@ from openhands.sdk.event import Event from openhands.sdk.workspace import LocalWorkspace -from .github import GitHubAPIError, fetch_github_pr_info, normalize_github_metadata -from .gitlab import GitLabAPIError, fetch_gitlab_mr_info, post_gitlab_mr_comment +from .github import ( + GitHubAPIError, + create_github_pr_comment, + fetch_github_pr_info, + find_hodor_comment_github, + normalize_github_metadata, + update_github_pr_comment, +) +from .gitlab import ( + GitLabAPIError, + fetch_gitlab_mr_info, + find_hodor_comment, + post_gitlab_mr_comment, + update_gitlab_mr_comment, +) from .llm import create_hodor_agent, get_api_key from .prompts.pr_review_prompt import build_pr_review_prompt from .skills import discover_skills @@ -88,18 +101,62 @@ def parse_pr_url(pr_url: str) -> tuple[str, str, int, str]: ) +def _compute_diff_hash(workspace: Path, diff_base_sha: str | None, target_branch: str) -> str: + """Compute SHA-256 hash of the diff for deduplication.""" + base_ref = diff_base_sha or f"origin/{target_branch}" + result = subprocess.run( + ["git", "diff", base_ref, "HEAD"], + capture_output=True, + text=True, + cwd=str(workspace), + ) + if result.returncode != 0: + return "" # Can't compute hash, don't skip + import hashlib + + return hashlib.sha256(result.stdout.encode()).hexdigest() + + +def _extract_diff_hash(body: str) -> str | None: + """Extract diff hash from a Hodor comment footer.""" + import re + + match = re.search(r"", body) + return match.group(1) if match else None + + +def _find_existing_hodor_comment( + platform: Platform, + owner: str, + repo: str, + pr_number: int, + host: str | None = None, +) -> dict[str, Any] | None: + """Find the existing Hodor comment on a PR/MR, regardless of platform.""" + if platform == "gitlab": + return find_hodor_comment(owner, repo, pr_number, host=host) + elif platform == "github": + return find_hodor_comment_github(owner, repo, pr_number) + return None + + def post_review_comment( pr_url: str, review_text: str, model: str | None = None, + diff_hash: str | None = None, ) -> dict[str, Any]: """ Post a review comment on a GitHub PR or GitLab MR using CLI tools. + Uses edit-in-place: if a previous Hodor comment exists, updates it + instead of creating a new one. + Args: pr_url: URL of the pull request or merge request review_text: The review text to post as a comment model: LLM model used for the review (optional, for transparency) + diff_hash: SHA-256 hash of the diff (optional, embedded in footer for skip logic) Returns: Dictionary with comment posting result @@ -112,45 +169,48 @@ def post_review_comment( except ValueError as e: return {"success": False, "error": str(e)} - # Append model information to review text for transparency + # Build footer with model info and optional diff hash + footer_parts = [] if model: - review_text_with_footer = f"{review_text}\n\n---\n\n*Review generated by Hodor using `{model}`*" + footer_parts.append(f"*Review generated by Hodor using `{model}`*") else: - review_text_with_footer = review_text + footer_parts.append("*Review generated by Hodor*") + if diff_hash: + footer_parts.append(f"") + + review_text_with_footer = f"{review_text}\n\n---\n\n" + "\n".join(footer_parts) try: + # Try to find an existing Hodor comment for edit-in-place + existing = _find_existing_hodor_comment(platform, owner, repo, pr_number, host=host) + if platform == "github": - # Use gh CLI to post comment - subprocess.run( - [ - "gh", - "pr", - "review", - str(pr_number), - "--repo", - f"{owner}/{repo}", - "--comment", - "--body", - review_text_with_footer, - ], - check=True, - capture_output=True, - text=True, - ) - logger.info(f"Successfully posted review to GitHub PR #{pr_number}") - return {"success": True, "platform": "github", "pr_number": pr_number} + if existing: + # Update existing comment + update_github_pr_comment(owner, repo, existing["id"], review_text_with_footer) + logger.info(f"Successfully updated existing review on GitHub PR #{pr_number}") + return {"success": True, "platform": "github", "pr_number": pr_number, "updated": True} + else: + # Create new issue comment (not review comment, so we can edit later) + create_github_pr_comment(owner, repo, pr_number, review_text_with_footer) + logger.info(f"Successfully posted review to GitHub PR #{pr_number}") + return {"success": True, "platform": "github", "pr_number": pr_number} elif platform == "gitlab": - # Use glab CLI to post comment - post_gitlab_mr_comment( - owner, - repo, - pr_number, - review_text_with_footer, - host=host, - ) - logger.info(f"Successfully posted review to GitLab MR !{pr_number} on {owner}/{repo}") - return {"success": True, "platform": "gitlab", "mr_number": pr_number} + if existing: + # Update existing comment + update_gitlab_mr_comment( + owner, repo, pr_number, existing["id"], review_text_with_footer, host=host, + ) + logger.info(f"Successfully updated existing review on GitLab MR !{pr_number}") + return {"success": True, "platform": "gitlab", "mr_number": pr_number, "updated": True} + else: + # Create new comment + post_gitlab_mr_comment( + owner, repo, pr_number, review_text_with_footer, host=host, + ) + logger.info(f"Successfully posted review to GitLab MR !{pr_number} on {owner}/{repo}") + return {"success": True, "platform": "gitlab", "mr_number": pr_number} else: return {"success": False, "error": f"Unsupported platform: {platform}"} @@ -181,7 +241,9 @@ def review_pr( output_format: str = "markdown", max_iterations: int = 500, model_canonical_name: str | None = None, -) -> str: + post: bool = False, + skip_if_unchanged: bool = True, +) -> str | tuple[str, str] | None: """ Review a pull request using OpenHands agent with bash tools. @@ -198,9 +260,13 @@ def review_pr( workspace_dir: Directory to use for workspace (if None, creates temp dir). Reuses if same repo. output_format: Output format - "markdown" or "json" (default: "markdown") max_iterations: Maximum number of agent iterations (default: 500, use -1 for unlimited) + post: Whether the review will be posted (enables diff-hash skip logic) + skip_if_unchanged: Skip review if diff hash matches existing comment (default: True) Returns: - Review text as string (format depends on output_format) + - None if review was skipped (diff unchanged) + - (review_text, diff_hash) tuple when post=True + - review_text string when post=False Raises: ValueError: If URL is invalid @@ -240,6 +306,21 @@ def review_pr( logger.error(f"Failed to setup workspace: {e}") raise RuntimeError(f"Failed to setup workspace: {e}") from e + # Compute diff hash for deduplication (before expensive agent creation) + diff_hash = _compute_diff_hash(workspace, diff_base_sha, target_branch) + + # If posting and skip_if_unchanged, check if diff matches existing comment + if post and skip_if_unchanged and diff_hash: + try: + existing = _find_existing_hodor_comment(platform, owner, repo, pr_number, host=host) + if existing and f"hodor:diff-hash:sha256:{diff_hash}" in existing.get("body", ""): + logger.info("Diff unchanged since last review — skipping") + if workspace and cleanup: + cleanup_workspace(workspace) + return None + except Exception as e: + logger.warning(f"Failed to check existing comment for skip logic: {e}") + # Discover repository skills (from .cursorrules, agents.md, .hodor/skills/) skills = [] try: @@ -449,6 +530,8 @@ def on_event(event: Any) -> None: except Exception as e: logger.warning(f"Failed to get metrics: {e}") + if post: + return (review_content, diff_hash) return review_content except Exception as e: diff --git a/hodor/cli.py b/hodor/cli.py index fdb228e..7e82754 100644 --- a/hodor/cli.py +++ b/hodor/cli.py @@ -123,6 +123,12 @@ def parse_llm_args(ctx, param, value): help="Canonical model name for feature detection (e.g. 'claude-opus-4-6'). " "Required for opaque ARN-based models to enable prompt caching.", ) +@click.option( + "--skip-if-unchanged/--no-skip-if-unchanged", + default=True, + help="Skip review if the diff hasn't changed since last posted review (default: enabled). " + "Only applies when --post is used.", +) def main( pr_url: str, model: str, @@ -138,6 +144,7 @@ def main( max_iterations: int, ultrathink: bool, model_canonical_name: str | None, + skip_if_unchanged: bool, ): """ Review a GitHub pull request or GitLab merge request using AI. @@ -252,11 +259,23 @@ def main( output_format="json" if output_json else "markdown", max_iterations=max_iterations, model_canonical_name=model_canonical_name, + post=post, + skip_if_unchanged=skip_if_unchanged, ) progress.update(task, description="Review complete!") progress.stop() + # Handle skip (diff unchanged) + if review_output is None: + console.print("\n[bold green]Review skipped — diff unchanged since last review[/bold green]") + return + + # Unpack diff_hash when in post mode + diff_hash = None + if isinstance(review_output, tuple): + review_output, diff_hash = review_output + # Display result if post: # Post to PR/MR (always as markdown, never raw JSON) @@ -275,6 +294,7 @@ def main( pr_url=pr_url, review_text=review_text, model=model, + diff_hash=diff_hash, ) if result.get("success"): diff --git a/hodor/github.py b/hodor/github.py index 76a56f1..e06d7d8 100644 --- a/hodor/github.py +++ b/hodor/github.py @@ -91,6 +91,83 @@ def normalize_github_metadata(raw: dict[str, Any]) -> dict[str, Any]: return metadata +def find_hodor_comment_github( + owner: str, + repo: str, + pr_number: str | int, +) -> dict[str, Any] | None: + """Find the existing Hodor review comment on a GitHub PR.""" + + try: + result = subprocess.run( + [ + "gh", "api", + f"repos/{owner}/{repo}/issues/{pr_number}/comments", + "--paginate", + "--jq", + '.[] | select(.body | contains("Review generated by Hodor")) | {id, body}', + ], + capture_output=True, + text=True, + ) + except Exception as exc: + logger.warning(f"Failed to search for existing Hodor comment: {exc}") + return None + + if result.returncode != 0 or not result.stdout.strip(): + return None + + lines = [line for line in result.stdout.strip().split("\n") if line.strip()] + if not lines: + return None + + try: + return json.loads(lines[-1]) + except json.JSONDecodeError: + logger.warning("Failed to parse Hodor comment JSON from gh api output") + return None + + +def update_github_pr_comment( + owner: str, + repo: str, + comment_id: int, + body: str, +) -> None: + """Update an existing comment on a GitHub PR.""" + + subprocess.run( + [ + "gh", "api", "--method", "PATCH", + f"repos/{owner}/{repo}/issues/comments/{comment_id}", + "-f", f"body={body}", + ], + check=True, + capture_output=True, + text=True, + ) + + +def create_github_pr_comment( + owner: str, + repo: str, + pr_number: str | int, + body: str, +) -> None: + """Create a new issue comment on a GitHub PR.""" + + subprocess.run( + [ + "gh", "api", "--method", "POST", + f"repos/{owner}/{repo}/issues/{pr_number}/comments", + "-f", f"body={body}", + ], + check=True, + capture_output=True, + text=True, + ) + + def _github_comments_to_notes( comments: dict[str, Any] | list[dict[str, Any]] | None, ) -> list[dict[str, Any]]: diff --git a/hodor/gitlab.py b/hodor/gitlab.py index 32156f6..b18c8b7 100644 --- a/hodor/gitlab.py +++ b/hodor/gitlab.py @@ -165,6 +165,55 @@ def post_gitlab_mr_comment( return note.attributes +def find_hodor_comment( + owner: str, + repo: str, + mr_number: str | int, + host: str | None = None, +) -> dict[str, Any] | None: + """Find the existing Hodor review comment on a GitLab MR, if any.""" + + client = _create_gitlab_client(host) + project = _get_project(client, owner, repo) + mr = _get_merge_request(project, mr_number) + + try: + notes = mr.notes.list(all=True, sort="desc", per_page=100) + except gitlab_exceptions.GitlabError as exc: + logger.warning(f"Failed to list notes for MR !{mr_number}: {exc}") + return None + + for note in notes: + if "Review generated by Hodor" in (note.body or ""): + return {"id": note.id, "body": note.body} + return None + + +def update_gitlab_mr_comment( + owner: str, + repo: str, + mr_number: str | int, + note_id: int, + body: str, + *, + host: str | None = None, +) -> dict[str, Any]: + """Update an existing note on a GitLab merge request.""" + + client = _create_gitlab_client(host) + project = _get_project(client, owner, repo) + mr = _get_merge_request(project, mr_number) + + try: + note = mr.notes.get(note_id) + note.body = body + note.save() + except gitlab_exceptions.GitlabError as exc: + raise GitLabAPIError(f"Failed to update note {note_id} on MR !{mr_number}: {exc}") from exc + + return note.attributes + + def summarize_gitlab_notes( notes: list[dict[str, Any]] | None, *, diff --git a/hodor/prompts/templates/default_review.md b/hodor/prompts/templates/default_review.md index be3f2e4..3b9188b 100644 --- a/hodor/prompts/templates/default_review.md +++ b/hodor/prompts/templates/default_review.md @@ -158,6 +158,10 @@ Total issues: X critical, Y important, Z minor. *Correct = existing code won't break, no bugs, free of blocking issues.* ``` +### Clean Review (No Issues Found) + +If you find zero qualifying issues, omit the "Issues Found" section entirely (do NOT list empty categories with "None"). Jump straight to the Summary and Overall Verdict. + ### Formatting Rules - Brief: 1 paragraph max per finding, no unnecessary line breaks diff --git a/tests/test_agent_utils.py b/tests/test_agent_utils.py index 930c42e..cb9df51 100644 --- a/tests/test_agent_utils.py +++ b/tests/test_agent_utils.py @@ -1,6 +1,9 @@ +import hashlib +from unittest.mock import patch, MagicMock + import pytest -from hodor.agent import detect_platform, parse_pr_url +from hodor.agent import detect_platform, parse_pr_url, _compute_diff_hash, _extract_diff_hash @pytest.mark.parametrize( @@ -35,3 +38,80 @@ def test_parse_pr_url_gitlab_subgroups(): def test_parse_pr_url_invalid(): with pytest.raises(ValueError): parse_pr_url("https://gitlab.com/foo/bar/issues/1") + + +# --- _extract_diff_hash tests --- + + +def test_extract_diff_hash_present(): + body = ( + "Some review text\n\n---\n\n" + "*Review generated by Hodor using `some-model`*\n" + "" + ) + assert _extract_diff_hash(body) == "abcdef1234567890" + + +def test_extract_diff_hash_missing(): + body = "Some review text\n\n---\n\n*Review generated by Hodor using `some-model`*" + assert _extract_diff_hash(body) is None + + +def test_extract_diff_hash_empty(): + assert _extract_diff_hash("") is None + + +# --- _compute_diff_hash tests --- + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_with_diff_base_sha(mock_run): + diff_content = "diff --git a/foo.py b/foo.py\n+hello\n" + mock_run.return_value = MagicMock(returncode=0, stdout=diff_content) + + from pathlib import Path + result = _compute_diff_hash(Path("/tmp/workspace"), "abc123", "main") + + assert result == hashlib.sha256(diff_content.encode()).hexdigest() + mock_run.assert_called_once_with( + ["git", "diff", "abc123", "HEAD"], + capture_output=True, text=True, cwd="/tmp/workspace", + ) + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_falls_back_to_target_branch(mock_run): + diff_content = "diff --git a/bar.py b/bar.py\n+world\n" + mock_run.return_value = MagicMock(returncode=0, stdout=diff_content) + + from pathlib import Path + result = _compute_diff_hash(Path("/tmp/workspace"), None, "develop") + + assert result == hashlib.sha256(diff_content.encode()).hexdigest() + mock_run.assert_called_once_with( + ["git", "diff", "origin/develop", "HEAD"], + capture_output=True, text=True, cwd="/tmp/workspace", + ) + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_returns_empty_on_failure(mock_run): + mock_run.return_value = MagicMock(returncode=128, stdout="") + + from pathlib import Path + result = _compute_diff_hash(Path("/tmp/workspace"), None, "main") + + assert result == "" + + +@patch("hodor.agent.subprocess.run") +def test_compute_diff_hash_consistent(mock_run): + """Same diff content should always produce the same hash.""" + diff_content = "diff --git a/x.py b/x.py\n+consistent\n" + mock_run.return_value = MagicMock(returncode=0, stdout=diff_content) + + from pathlib import Path + hash1 = _compute_diff_hash(Path("/tmp/w1"), "sha1", "main") + hash2 = _compute_diff_hash(Path("/tmp/w2"), "sha2", "main") + + assert hash1 == hash2