Compute line_by_pos for resources with matches

JonoYang · JonoYang · commit 506cf1dd2274 · 2025-01-28T16:04:05.000-08:00
Signed-off-by: Jono Yang &lt;jyang@nexb.com&gt;
diff --git a/scanpipe/pipes/matchcode.py b/scanpipe/pipes/matchcode.py
@@ -28,6 +28,7 @@
 import requests
 from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
 from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes
+from matchcode_toolkit.fingerprinting import get_line_by_pos
 from scancode import Scanner
 
 from scanpipe.pipes import codebase
@@ -367,4 +368,9 @@ def create_packages_from_match_results(project, match_results):
         match_resource_extra_data = match_resource["extra_data"]
         if match_resource_extra_data:
             resource = project.codebaseresources.get(path=match_resource["path"])
+            # compute line_by_pos for displaying matches in CodebaseResource detail view
+            with open(resource.location) as f:
+                content = f.read()
+                line_by_pos = get_line_by_pos(content)
+            match_resource_extra_data["line_by_pos"] = line_by_pos
             resource.update_extra_data(match_resource_extra_data)
diff --git a/scanpipe/views.py b/scanpipe/views.py
@@ -61,9 +61,8 @@
 import saneyaml
 import xlsxwriter
 from django_filters.views import FilterView
-from packageurl.contrib.django.models import PACKAGE_URL_FIELDS
 from licensedcode.spans import Span
-from matchcode_toolkit.fingerprinting import get_line_by_pos
+from packageurl.contrib.django.models import PACKAGE_URL_FIELDS
 
 from scancodeio.auth import ConditionalLoginRequired
 from scancodeio.auth import conditional_login_required
@@ -1937,24 +1936,21 @@ def get_context_data(self, **kwargs):
         matched_snippet_annotations = []
         matched_snippets = resource.extra_data.get("matched_snippets")
         if matched_snippets:
-            # tokenize file content and map tokens to line numbers
-            line_by_pos = get_line_by_pos(resource.file_content)
-            last_pos = max(line_by_pos, key=line_by_pos.get)
+            line_by_pos = resource.extra_data.get("line_by_pos")
             for matched_snippet in matched_snippets:
                 qspan = Span(matched_snippet["qspan"])
-                matched_snippet_annotations.append
                 matched_snippet["qspan"] = qspan
                 for span in qspan.subspans():
-                # Convert qstart and qends to start_line and end_lines
-                    end = min(span.end, last_pos)
+                    # line_by_pos is stored as JSON and keys in JSON are always
+                    # strings
                     matched_snippet_annotations.append(
                         {
-                            "start_line": line_by_pos[span.start],
-                            "end_line": line_by_pos[end],
+                            "start_line": line_by_pos[str(span.start)],
+                            "end_line": line_by_pos[str(span.end)],
                         }
                     )
-
         context["detected_values"]["matched_snippets"] = matched_snippet_annotations
+
         return context