diff --git a/openhands_resolver/issue_definitions.py b/openhands_resolver/issue_definitions.py index e8d2e2c..91453e3 100644 --- a/openhands_resolver/issue_definitions.py +++ b/openhands_resolver/issue_definitions.py @@ -76,6 +76,11 @@ def _extract_image_urls(self, issue_body: str) -> list[str]: image_pattern = r'!\[.*?\]\((https?://[^\s)]+)\)' return re.findall(image_pattern, issue_body) + def _extract_issue_references(self, body: str) -> list[int]: + pattern = r"#(\d+)" + return [int(match) for match in re.findall(pattern, body)] + + def _get_issue_comments(self, issue_number: int, comment_id: int | None = None) -> list[str] | None: """Download comments for a specific issue from Github.""" url = f"https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}/comments" @@ -197,7 +202,7 @@ def __init__(self, owner: str, repo: str, token: str): - def __download_pr_metadata(self, pull_number: int, comment_id: int | None = None) -> tuple[list[str], list[str], list[ReviewThread], list[str]]: + def __download_pr_metadata(self, pull_number: int, comment_id: int | None = None) -> tuple[list[str], list[int], list[str], list[ReviewThread], list[str]]: """ Run a GraphQL query against the GitHub API for information on @@ -222,6 +227,7 @@ def __download_pr_metadata(self, pull_number: int, comment_id: int | None = None edges { node { body + number } } } @@ -278,6 +284,7 @@ def __download_pr_metadata(self, pull_number: int, comment_id: int | None = None # Get closing issues closing_issues = pr_data.get("closingIssuesReferences", {}).get("edges", []) closing_issues_bodies = [issue["node"]["body"] for issue in closing_issues] + closing_issue_numbers = [issue["node"]["number"] for issue in closing_issues] # Extract issue numbers # Get review comments reviews = pr_data.get("reviews", {}).get("nodes", []) @@ -320,7 +327,7 @@ def __download_pr_metadata(self, pull_number: int, comment_id: int | None = None review_threads.append(unresolved_thread) thread_ids.append(id) - return closing_issues_bodies, review_bodies, review_threads, thread_ids + return closing_issues_bodies, closing_issue_numbers, review_bodies, review_threads, thread_ids # Override processing of downloaded issues @@ -353,6 +360,51 @@ def _get_pr_comments(self, pr_number: int, comment_id: int | None = None) -> lis return all_comments if all_comments else None + def __get_context_from_external_issues_references( + self, + closing_issues: list[str], + closing_issue_numbers: list[int], + issue_body: str, + review_comments: list[str], + review_threads: list[ReviewThread], + thread_comments: list[str] | None + ): + new_issue_references = [] + + if issue_body: + new_issue_references.extend(self._extract_issue_references(issue_body)) + + if review_comments: + for comment in review_comments: + new_issue_references.extend(self._extract_issue_references(comment)) + + if review_threads: + for review_thread in review_threads: + new_issue_references.extend(self._extract_issue_references(review_thread.comment)) + + if thread_comments: + for thread_comment in thread_comments: + new_issue_references.extend(self._extract_issue_references(thread_comment)) + + non_duplicate_references = set(new_issue_references) + unique_issue_references = non_duplicate_references.difference(closing_issue_numbers) + + for issue_number in unique_issue_references: + url = f"https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}" + headers = { + "Authorization": f"Bearer {self.token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get(url, headers=headers) + response.raise_for_status() + issue_data = response.json() + issue_body = issue_data.get("body", "") + if issue_body: + closing_issues.append(issue_body) + + + return closing_issues + def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssue]: all_issues = self._download_issues_from_github() converted_issues = [] @@ -366,11 +418,20 @@ def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssu # Handle None body for PRs body = issue.get("body") if issue.get("body") is not None else "" - closing_issues, review_comments, review_threads, thread_ids = self.__download_pr_metadata(issue["number"], comment_id=comment_id) + closing_issues, closing_issues_numbers, review_comments, review_threads, thread_ids = self.__download_pr_metadata(issue["number"], comment_id=comment_id) head_branch = issue["head"]["ref"] - + # Get PR thread comments thread_comments = self._get_pr_comments(issue["number"], comment_id=comment_id) + + + closing_issues = self.__get_context_from_external_issues_references(closing_issues, + closing_issues_numbers, + body, + review_comments, + review_threads, + thread_comments) + issue_details = GithubIssue( owner=self.owner, repo=self.repo, diff --git a/tests/test_issue_handler.py b/tests/test_issue_handler.py index ead983e..f0b226a 100644 --- a/tests/test_issue_handler.py +++ b/tests/test_issue_handler.py @@ -96,7 +96,7 @@ def test_pr_handler_get_converted_issues_with_comments(): mock_prs_response.json.return_value = [{ 'number': 1, 'title': 'Test PR', - 'body': 'Test Body', + 'body': 'Test Body fixes #1', 'head': {'ref': 'test-branch'} }] @@ -125,12 +125,19 @@ def test_pr_handler_get_converted_issues_with_comments(): # We need to return empty responses for subsequent pages mock_empty_response = MagicMock() mock_empty_response.json.return_value = [] + + # Mock the response for fetching the external issue referenced in PR body + mock_external_issue_response = MagicMock() + mock_external_issue_response.json.return_value = { + "body": "This is additional context from an externally referenced issue." + } mock_get.side_effect = [ mock_prs_response, # First call for PRs mock_empty_response, # Second call for PRs (empty page) mock_comments_response, # Third call for PR comments mock_empty_response, # Fourth call for PR comments (empty page) + mock_external_issue_response # Mock response for the external issue reference #1 ] # Mock the post request for GraphQL @@ -152,10 +159,11 @@ def test_pr_handler_get_converted_issues_with_comments(): # Verify other fields are set correctly assert prs[0].number == 1 assert prs[0].title == 'Test PR' - assert prs[0].body == 'Test Body' + assert prs[0].body == 'Test Body fixes #1' assert prs[0].owner == 'test-owner' assert prs[0].repo == 'test-repo' assert prs[0].head_branch == 'test-branch' + assert prs[0].closing_issues == ['This is additional context from an externally referenced issue.'] def test_pr_handler_guess_success_only_review_comments(): # Create a PR handler instance @@ -434,4 +442,202 @@ def test_pr_handler_get_converted_issues_with_specific_review_thread_comment(): assert prs[0].body == 'Test Body' assert prs[0].owner == 'test-owner' assert prs[0].repo == 'test-repo' - assert prs[0].head_branch == 'test-branch' \ No newline at end of file + assert prs[0].head_branch == 'test-branch' + + + + + +def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs(): + # Define the specific comment_id to filter + specific_comment_id = 123 + + # Mock GraphQL response for review threads + with patch('requests.get') as mock_get: + # Mock the response for PRs + mock_prs_response = MagicMock() + mock_prs_response.json.return_value = [{ + 'number': 1, + 'title': 'Test PR fixes #3', + 'body': 'Test Body', + 'head': {'ref': 'test-branch'} + }] + + # Mock the response for PR comments + mock_comments_response = MagicMock() + mock_comments_response.json.return_value = [ + {'body': 'First comment', 'id': 120}, + {'body': 'Second comment', 'id': 124} + ] + + # Mock the response for PR metadata (GraphQL) + mock_graphql_response = MagicMock() + mock_graphql_response.json.return_value = { + 'data': { + 'repository': { + 'pullRequest': { + 'closingIssuesReferences': {'edges': []}, + 'reviews': {'nodes': []}, + 'reviewThreads': { + 'edges': [ + { + 'node': { + 'id': 'review-thread-1', + 'isResolved': False, + 'comments': { + 'nodes': [ + {'fullDatabaseId': specific_comment_id, 'body': 'Specific review comment that references #6', 'path': 'file1.txt'}, + {'fullDatabaseId': 456, 'body': 'Another review comment referencing #7', 'path': 'file2.txt'} + ] + } + } + } + ] + } + } + } + } + } + + # Set up the mock to return different responses + # We need to return empty responses for subsequent pages + mock_empty_response = MagicMock() + mock_empty_response.json.return_value = [] + + # Mock the response for fetching the external issue referenced in PR body + mock_external_issue_response_in_body = MagicMock() + mock_external_issue_response_in_body.json.return_value = { + "body": "External context #1." + } + + + # Mock the response for fetching the external issue referenced in review thread + mock_external_issue_response_review_thread = MagicMock() + mock_external_issue_response_review_thread.json.return_value = { + "body": "External context #2." + } + + mock_get.side_effect = [ + mock_prs_response, # First call for PRs + mock_empty_response, # Second call for PRs (empty page) + mock_comments_response, # Third call for PR comments + mock_empty_response, # Fourth call for PR comments (empty page) + mock_external_issue_response_in_body, + mock_external_issue_response_review_thread + ] + + # Mock the post request for GraphQL + with patch('requests.post') as mock_post: + mock_post.return_value = mock_graphql_response + + # Create an instance of PRHandler + handler = PRHandler('test-owner', 'test-repo', 'test-token') + + # Get converted issues + prs = handler.get_converted_issues(comment_id=specific_comment_id) + + # Verify that we got exactly one PR + assert len(prs) == 1 + + # Verify that thread_comments are set correctly + assert prs[0].thread_comments is None + assert prs[0].review_comments == [] + assert len(prs[0].review_threads) == 1 + assert isinstance(prs[0].review_threads[0], ReviewThread) + assert prs[0].review_threads[0].comment == "Specific review comment that references #6\n---\nlatest feedback:\nAnother review comment referencing #7\n" + assert prs[0].closing_issues == ['External context #1.', 'External context #2.'] # Only includes references inside comment ID and body PR + + # Verify other fields are set correctly + assert prs[0].number == 1 + assert prs[0].title == 'Test PR fixes #3' + assert prs[0].body == 'Test Body' + assert prs[0].owner == 'test-owner' + assert prs[0].repo == 'test-repo' + assert prs[0].head_branch == 'test-branch' + + + +def test_pr_handler_get_converted_issues_with_duplicate_issue_refs(): +# Mock the necessary dependencies + with patch('requests.get') as mock_get: + # Mock the response for PRs + mock_prs_response = MagicMock() + mock_prs_response.json.return_value = [{ + 'number': 1, + 'title': 'Test PR', + 'body': 'Test Body fixes #1', + 'head': {'ref': 'test-branch'} + }] + + # Mock the response for PR comments + mock_comments_response = MagicMock() + mock_comments_response.json.return_value = [ + {'body': 'First comment addressing #1'}, + {'body': 'Second comment addressing #2'} + ] + + # Mock the response for PR metadata (GraphQL) + mock_graphql_response = MagicMock() + mock_graphql_response.json.return_value = { + 'data': { + 'repository': { + 'pullRequest': { + 'closingIssuesReferences': {'edges': []}, + 'reviews': {'nodes': []}, + 'reviewThreads': {'edges': []} + } + } + } + } + + # Set up the mock to return different responses + # We need to return empty responses for subsequent pages + mock_empty_response = MagicMock() + mock_empty_response.json.return_value = [] + + # Mock the response for fetching the external issue referenced in PR body + mock_external_issue_response_in_body = MagicMock() + mock_external_issue_response_in_body.json.return_value = { + "body": "External context #1." + } + + + # Mock the response for fetching the external issue referenced in review thread + mock_external_issue_response_in_comment = MagicMock() + mock_external_issue_response_in_comment.json.return_value = { + "body": "External context #2." + } + + mock_get.side_effect = [ + mock_prs_response, # First call for PRs + mock_empty_response, # Second call for PRs (empty page) + mock_comments_response, # Third call for PR comments + mock_empty_response, # Fourth call for PR comments (empty page) + mock_external_issue_response_in_body, # Mock response for the external issue reference #1 + mock_external_issue_response_in_comment + ] + + # Mock the post request for GraphQL + with patch('requests.post') as mock_post: + mock_post.return_value = mock_graphql_response + + # Create an instance of PRHandler + handler = PRHandler('test-owner', 'test-repo', 'test-token') + + # Get converted issues + prs = handler.get_converted_issues() + + # Verify that we got exactly one PR + assert len(prs) == 1 + + # Verify that thread_comments are set correctly + assert prs[0].thread_comments == ['First comment addressing #1', 'Second comment addressing #2'] + + # Verify other fields are set correctly + assert prs[0].number == 1 + assert prs[0].title == 'Test PR' + assert prs[0].body == 'Test Body fixes #1' + assert prs[0].owner == 'test-owner' + assert prs[0].repo == 'test-repo' + assert prs[0].head_branch == 'test-branch' + assert prs[0].closing_issues == ['External context #1.', 'External context #2.'] \ No newline at end of file diff --git a/tests/test_resolve_issues.py b/tests/test_resolve_issues.py index 14d6444..df1f9e4 100644 --- a/tests/test_resolve_issues.py +++ b/tests/test_resolve_issues.py @@ -149,8 +149,8 @@ def test_download_pr_from_github(): "pullRequest": { "closingIssuesReferences": { "edges": [ - {"node": {"body": "Issue 1 body"}}, - {"node": {"body": "Issue 2 body"}} + {"node": {"body": "Issue 1 body", "number": 1}}, + {"node": {"body": "Issue 2 body", "number": 2}} ] }, "reviewThreads": {