Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 60 additions & 35 deletions src/sentry/issues/auto_source_code_config/code_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)
from .frame_info import FrameInfo, create_frame_info
from .integration_utils import InstallationNotFoundError, get_installation
from .utils.java import find_java_source_roots
from .utils.misc import get_straight_path_prefix_end_index

logger = logging.getLogger(__name__)
Expand All @@ -41,6 +42,8 @@ class CodeMapping(NamedTuple):
SLASH = "/"
BACKSLASH = "\\" # This is the Python representation of a single backslash

CodeMappingKey = tuple[str, str]


def derive_code_mappings(
organization: Organization,
Expand All @@ -67,7 +70,8 @@ class CodeMappingTreesHelper:

def __init__(self, trees: Mapping[str, RepoTree]):
self.trees = trees
self.code_mappings: dict[str, CodeMapping] = {}
# Multiple source roots may legitimately share the same stack root in one monorepo.
self.code_mappings: dict[CodeMappingKey, CodeMapping] = {}

def generate_code_mappings(
self, frames: Sequence[Mapping[str, Any]], platform: str | None = None
Expand Down Expand Up @@ -111,7 +115,9 @@ def get_file_and_repo_matches(self, frame_filename: FrameInfo) -> list[dict[str,
extra = {"stack_path": stack_path, "source_path": source_path}

try:
stack_root, source_root = find_roots(frame_filename, source_path)
stack_root, source_root = find_roots(
frame_filename, source_path, repo_tree.files
)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
continue
Expand Down Expand Up @@ -160,19 +166,19 @@ def _stacktrace_buckets(
def _process_stackframes(self, buckets: Mapping[str, Sequence[FrameInfo]]) -> bool:
"""This processes all stackframes and returns if a new code mapping has been generated"""
reprocess = False
for stackframe_root, stackframes in buckets.items():
if not self.code_mappings.get(stackframe_root):
for frame_filename in stackframes:
code_mapping = self._find_code_mapping(frame_filename)
if code_mapping:
for stackframes in buckets.values():
for frame_filename in stackframes:
for code_mapping in self._find_code_mappings(frame_filename):
mapping_key = (code_mapping.stacktrace_root, code_mapping.source_path)
if mapping_key not in self.code_mappings:
# This allows processing some stack frames that
# were matching more than one file
reprocess = True
self.code_mappings[stackframe_root] = code_mapping
self.code_mappings[mapping_key] = code_mapping
return reprocess

def _find_code_mapping(self, frame_filename: FrameInfo) -> CodeMapping | None:
"""Look for the file path through all the trees and a generate code mapping for it if a match is found"""
def _find_code_mappings(self, frame_filename: FrameInfo) -> list[CodeMapping]:
"""Look for the file path through all the trees and generate code mappings for it."""
code_mappings: list[CodeMapping] = []
# XXX: This will need optimization by changing the data structure of the trees
for repo_full_name in self.trees.keys():
Expand All @@ -191,13 +197,17 @@ def _find_code_mapping(self, frame_filename: FrameInfo) -> CodeMapping | None:

if len(code_mappings) == 0:
logger.warning("No files matched for %s", frame_filename.raw_path)
return None
# This means that the file has been found in more than one repo
elif len(code_mappings) > 1:
return []

unique_code_mappings = {
(code_mapping.stacktrace_root, code_mapping.source_path): code_mapping
for code_mapping in code_mappings
}
if len({code_mapping.repo.name for code_mapping in unique_code_mappings.values()}) > 1:
logger.warning("More than one repo matched %s", frame_filename.raw_path)
return None
return []

return code_mappings[0]
return list(unique_code_mappings.values())

def _generate_code_mapping_from_tree(
self,
Expand All @@ -214,34 +224,43 @@ def _generate_code_mapping_from_tree(
if self._is_potential_match(src_path, frame_filename)
]

if len(matched_files) != 1:
if len(matched_files) == 0:
return []

stack_path = frame_filename.raw_path
source_path = matched_files[0]

extra = {"stack_path": stack_path, "source_path": source_path}
try:
stack_root, source_root = find_roots(frame_filename, source_path)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
if len(matched_files) > 1 and not all(
find_java_source_roots(source_path, repo_tree.files) for source_path in matched_files
):
return []

extra.update({"stack_root": stack_root, "source_root": source_root})
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.warning(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra=extra,
)
return []
code_mappings: dict[tuple[str, str], CodeMapping] = {}
for source_path in matched_files:
stack_path = frame_filename.raw_path
extra = {"stack_path": stack_path, "source_path": source_path}
try:
stack_root, source_root = find_roots(frame_filename, source_path, repo_tree.files)
except UnexpectedPathException:
logger.warning("Unexpected format for stack_path or source_path", extra=extra)
continue

extra.update({"stack_root": stack_root, "source_root": source_root})
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.warning(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra=extra,
)
continue

return [
CodeMapping(
code_mapping = CodeMapping(
repo=repo_tree.repo,
stacktrace_root=stack_root,
source_path=source_root,
)
]
code_mappings[(code_mapping.stacktrace_root, code_mapping.source_path)] = code_mapping

if len(matched_files) > 1 and len(code_mappings) != len(matched_files):
return []

return list(code_mappings.values())

def _is_potential_match(self, src_file: str, frame_filename: FrameInfo) -> bool:
"""
Expand Down Expand Up @@ -419,7 +438,9 @@ def get_sorted_code_mapping_configs(project: Project) -> list[RepositoryProjectP
return sorted_configs


def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
def find_roots(
frame_filename: FrameInfo, source_path: str, repo_files: Sequence[str] | None = None
) -> tuple[str, str]:
"""
Returns a tuple containing the stack_root, and the source_root.
If there is no overlap, raise an exception since this should not happen
Expand All @@ -444,6 +465,10 @@ def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
# "Packaged" logic
# e.g. stack_path: some_package/src/foo.py -> source_path: src/foo.py
source_prefix = source_path.rpartition(stack_path)[0]

if java_source_roots := find_java_source_roots(source_path, repo_files):
return java_source_roots
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated

return (
f"{stack_root}{frame_filename.stack_root}/".replace("//", "/"),
f"{source_prefix}{frame_filename.stack_root}/".replace("//", "/"),
Expand Down
89 changes: 89 additions & 0 deletions src/sentry/issues/auto_source_code_config/utils/java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from collections.abc import Sequence

SLASH = "/"
JAVA_SOURCE_ROOT_MARKERS = ("src/main/java/", "src/main/kotlin/")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per convo with @romtsn , these are to cover the majority of cases.



def get_java_source_set_root(source_path: str) -> str | None:
"""Return the repo path through the Java/Kotlin source-set marker.

Example:
`module/src/main/java/io/sentry/Foo.java` -> `module/src/main/java/`
"""
for marker in JAVA_SOURCE_ROOT_MARKERS:
prefix, separator, _ = source_path.partition(marker)
if separator:
return f"{prefix}{separator}"

return None


def find_package_root_relative_to_source_set(
source_root: str, repo_files: Sequence[str]
) -> str | None:
"""Walk a source set until the directory tree stops being a single-child chain.

Examples:
`["module/src/main/java/io/sentry/graphql/Foo.java"]` with
`source_root="module/src/main/java/"` returns `io/sentry/graphql/`.

`["module/src/main/java/io/sentry/asyncprofiler/jfr/JfrParser.java",
"module/src/main/java/io/sentry/asyncprofiler/metrics/ProfileMetric.java"]`
with `source_root="module/src/main/java/"` returns `io/sentry/asyncprofiler/`.
"""
relative_paths = [
file.removeprefix(source_root) for file in repo_files if file.startswith(source_root)
]
if not relative_paths:
return None

package_root = ""
while True:
has_file = False
subdirs: set[str] = set()

for relative_path in relative_paths:
if package_root:
if not relative_path.startswith(package_root):
continue
remainder = relative_path[len(package_root) :]
else:
remainder = relative_path

if not remainder:
continue

if SLASH not in remainder:
has_file = True
break

subdirs.add(remainder.split(SLASH, 1)[0])
if len(subdirs) > 1:
break

if has_file or len(subdirs) != 1:
return package_root

package_root = f"{package_root}{subdirs.pop()}{SLASH}"


def find_java_source_roots(
source_path: str, repo_files: Sequence[str] | None
) -> tuple[str, str] | None:
"""Return `(stack_root, source_root)` from a Java/Kotlin repo path.

Example:
`sentry-graphql-core/src/main/java/io/sentry/graphql/GraphQLFetcher.java`
becomes
`("io/sentry/graphql/", "sentry-graphql-core/src/main/java/io/sentry/graphql/")`.
"""
if not repo_files:
return None

if not (source_root := get_java_source_set_root(source_path)):
return None

if (package_root := find_package_root_relative_to_source_set(source_root, repo_files)) is None:
return None

return package_root, f"{source_root}{package_root}"
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,49 @@ def test_get_frame_with_module(self, mock_get_trees_for_org: Any) -> None:
assert response.status_code == 200, response.content
assert response.data == expected_matches

@patch("sentry.integrations.github.integration.GitHubIntegration.get_trees_for_org")
def test_get_frame_with_module_multiple_same_repo_matches(
self, mock_get_trees_for_org: Any
) -> None:
config_data = {
"absPath": "GraphQLFetcher.java",
"module": "io.sentry.graphql.GraphQLFetcher",
"platform": "java",
"stacktraceFilename": "GraphQLFetcher.java",
}
expected_matches = [
{
"filename": "sentry-graphql/src/main/java/io/sentry/graphql/GraphQLFetcher.java",
"repo_name": "getsentry/codemap",
"repo_branch": "master",
"stacktrace_root": "io/sentry/graphql/",
"source_path": "sentry-graphql/src/main/java/io/sentry/graphql/",
},
{
"filename": "sentry-graphql-core/src/main/java/io/sentry/graphql/GraphQLFetcher.java",
"repo_name": "getsentry/codemap",
"repo_branch": "master",
"stacktrace_root": "io/sentry/graphql/",
"source_path": "sentry-graphql-core/src/main/java/io/sentry/graphql/",
},
]

mock_get_trees_for_org.return_value = {
"getsentry/codemap": RepoTree(
RepoAndBranch(
name="getsentry/codemap",
branch="master",
),
files=[
"sentry-graphql/src/main/java/io/sentry/graphql/GraphQLFetcher.java",
"sentry-graphql-core/src/main/java/io/sentry/graphql/GraphQLFetcher.java",
],
)
}
response = self.client.get(self.url, data=config_data, format="json")
assert response.status_code == 200, response.content
assert response.data == expected_matches

@patch("sentry.integrations.github.integration.GitHubIntegration.get_trees_for_org")
def test_get_start_with_backslash(self, mock_get_trees_for_org: Any) -> None:
file = "stack/root/file.py"
Expand Down
Loading
Loading