freelawproject · flooie · Mar 6, 2025 · Mar 5, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -11,7 +11,8 @@ Changes:
 - None
 
 Fixes:
-- None
+- Prefer the other full citation on overlap with nominative reporter 
+  citations #237
 
 
 ## Current

diff --git a/eyecite/tokenizers.py b/eyecite/tokenizers.py
@@ -55,6 +55,46 @@
 EXTRACTORS = []
 EDITIONS_LOOKUP = defaultdict(list)
 
+NOMINATIVE_REPORTER_NAMES = {
+    "Thompson",
+    "Cooke",
+    "Holmes",
+    "Olcott",
+    "Chase",
+    "Gilmer",
+    "Bee",
+    "Deady",
+    "Taney",
+}
+
+
+def token_is_from_nominative_reporter(token: Token) -> bool:
+    """Returns true if the token is a citation from a nominative reporter
+
+    Cleaner way to do this would be via an attribute or named group from
+    reporters-db. However; this tagging is currently not complete, so we can
+    use a list of the most problematic names `NOMINATIVE_REPORTER_NAMES`
+
+    ```
+    volume_nominative = token.groups.get("volume_nominative", False)
+    reporter_nominative = token.groups.get("reporter_nominative", False)
+    token.exact_editions[0].reporter.name
+    return volume_nominative is None or volume_nominative
+        or reporter_nominative is None or reporter_nominative
+    ```
+
+    :param token: the token
+    :return: True if the token has a `volume_nominative` group, even if there
+        was no match; False if it didn't have the group
+    """
+    if not isinstance(token, CitationToken):
+        return False
+    if token.exact_editions:
+        name = token.exact_editions[0].reporter.short_name
+    else:
+        name = token.variation_editions[0].reporter.short_name
+    return name in NOMINATIVE_REPORTER_NAMES
+
 
 def _populate_reporter_extractors():
     """Populate EXTRACTORS and EDITIONS_LOOKUP."""
@@ -313,8 +353,19 @@ def tokenize(self, text: str) -> Tuple[Tokens, List[Tuple[int, Token]]]:
                 if merged:
                     continue
             if offset > token.start:
-                # skip overlaps
-                continue
+                if (
+                    last_token
+                    and isinstance(token, CitationToken)
+                    and token_is_from_nominative_reporter(last_token)
+                ):
+                    # if a token has overlapping matches between a nominative
+                    # reporter and another type of case citation, prefer the
+                    # other case citation. See #221 and #174
+                    citation_tokens.pop(-1)
+                    all_tokens.pop(-1)
+                else:
+                    # skip overlaps
+                    continue
             if offset < token.start:
                 # capture plain text before each match
                 self.append_text(all_tokens, text[offset : token.start])
@@ -326,6 +377,7 @@ def tokenize(self, text: str) -> Tuple[Tokens, List[Tuple[int, Token]]]:
         # capture plain text after final match
         if offset < len(text):
             self.append_text(all_tokens, text[offset:])
+
         return all_tokens, citation_tokens
 
     def get_extractors(self, text: str):

diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py
@@ -878,6 +878,55 @@ def test_disambiguate_citations(self):
         ]
         self.run_test_pairs(test_pairs, "Disambiguation")
 
+    def test_nominative_reporter_overlaps(self):
+        """Can we parse a full citation where a name looks like a nominative
+        reporter?"""
+        pairs = [
+            (
+                "In re Cooke, 93 Wn. App. 526, 529",
+                case_citation(volume="93", reporter="Wn. App.", page="526"),
+            ),
+            (
+                "Shapiro v. Thompson, 394 U. S. 618",
+                case_citation(volume="394", reporter="U. S.", page="618"),
+            ),
+            (
+                "MacArdell v. Olcott, 82 N.E. 161",
+                case_citation(volume="82", reporter="N.E.", page="161"),
+            ),
+            (
+                "Connecticut v. Holmes, 221 A.3d 407",
+                case_citation(volume="221", reporter="A.3d", page="407"),
+            ),
+            (
+                "Kern v Taney, 11 Pa. D. & C.5th 558 [2010])",
+                case_citation(
+                    volume="11", reporter="Pa. D. & C.5th", page="558"
+                ),
+            ),
+            (
+                "Ellenburg v. Chase, 2004 MT 66",
+                case_citation(volume="2004", reporter="MT", page="66"),
+            ),
+            (
+                "Gilmer, 500 U.S. at 25;",
+                case_citation(
+                    volume="500", reporter="U. S.", page="25", short=True
+                ),
+            ),
+            (
+                "Bison Bee, 778 F. 13 App’x at 73.",
+                case_citation(volume="778", reporter="F.", page="13"),
+            ),
+        ]
+        for cite_string, cite_object in pairs:
+            parsed_cite = get_citations(cite_string)[0]
+            self.assertEqual(
+                parsed_cite,
+                cite_object,
+                f"Nominative reporters getting in the way of parsing: {parsed_cite}",
+            )
+
     def test_custom_tokenizer(self):
         extractors = []
         for e in EXTRACTORS:
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,7 +11,8 @@ Changes: @@
     - None
     Fixes:
-    - None
+    - Prefer the other full citation on overlap with nominative reporter
+      citations #237
     ## Current
@@ Expand Down @@