aqlaboratory · jandom · Feb 7, 2026 · Feb 7, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/openfold3/core/data/primitives/caches/filtering.py b/openfold3/core/data/primitives/caches/filtering.py
@@ -27,7 +27,6 @@
 from pathlib import Path
 from typing import NamedTuple
 
-import requests
 from tqdm import tqdm
 
 from openfold3.core.data.io.dataset_cache import read_datacache
@@ -60,6 +59,7 @@
     LIGAND_EXCLUSION_LIST,
 )
 from openfold3.core.data.resources.residues import MoleculeType
+from openfold3.core.data.tools.rscb import get_model_ranking_fit
 
 logger = logging.getLogger(__name__)
 
@@ -855,83 +855,6 @@ def set_nan_fallback_conformer_flag(
     return None
 
 
-# TODO: Do this in preprocessing instead to avoid it going out-of-sync with the data?
-def get_model_ranking_fit(pdb_id):
-    """Fetches the model ranking fit entries for all ligands of a single PDB-ID.
-
-    Uses the PDB GraphQL API to fetch the model ranking fit values for all ligands in a
-    single PDB entry. Note that this function will always fetch from the newest version
-    of the PDB and can therefore occasionally give incorrect results for old datasets
-    whose structures have been updated since.
-    """
-    url = "https://data.rcsb.org/graphql"  # RCSB PDB's GraphQL API endpoint
-
-    query = """
-    query GetRankingFit($pdb_id: String!) {
-        entry(entry_id: $pdb_id) {
-            nonpolymer_entities {
-                nonpolymer_entity_instances {
-                    rcsb_id
-                    rcsb_nonpolymer_instance_validation_score {
-                        ranking_model_fit
-                    }
-                }
-            }
-        }
-    }
-    """
-
-    # Prepare the request with the pdb_id as a variable
-    variables = {"pdb_id": pdb_id}
-
-    # Make the request to the GraphQL endpoint using the variables
-    response = requests.post(url, json={"query": query, "variables": variables})
-
-    # Check if the request was successful
-    if response.status_code == 200:
-        try:
-            # Parse the JSON response
-            data = response.json()
-
-            # Safely navigate through data
-            entry_data = data.get("data", {}).get("entry", {})
-            if not entry_data:
-                return {}
-
-            extracted_data = {}
-
-            # Check for nonpolymer_entities
-            nonpolymer_entities = entry_data.get("nonpolymer_entities", [])
-
-            if nonpolymer_entities:
-                for entity in nonpolymer_entities:
-                    for instance in entity.get("nonpolymer_entity_instances", []):
-                        rcsb_id = instance.get("rcsb_id")
-                        validation_score = instance.get(
-                            "rcsb_nonpolymer_instance_validation_score"
-                        )
-
-                        if (
-                            validation_score
-                            and isinstance(validation_score, list)
-                            and validation_score[0]
-                        ):
-                            ranking_model_fit = validation_score[0].get(
-                                "ranking_model_fit"
-                            )
-                            if ranking_model_fit is not None:
-                                extracted_data[rcsb_id] = ranking_model_fit
-
-            return extracted_data
-
-        except (KeyError, TypeError, ValueError) as e:
-            print(f"Error processing response for {pdb_id}: {e}")
-            return {}
-    else:
-        print(f"Request failed with status code {response.status_code}")
-        return {}
-
-
 def assign_ligand_model_fits(
     structure_cache: ValidationDatasetCache, num_threads: int = 3
 ) -> None:

diff --git a/openfold3/core/data/primitives/structure/metadata.py b/openfold3/core/data/primitives/structure/metadata.py
@@ -323,6 +323,29 @@ def get_asym_id_to_canonical_seq_dict(
     }
 
 
+def get_author_to_label_chain_ids(
+    label_to_author: dict[str, str],
+) -> dict[str, list[str]]:
+    """Get a mapping from author (pdb_strand_id) chain ID to label asym_ids.
+
+    Multiple label asym_ids can map to the same author chain ID for homomeric
+    chains.  The returned lists are sorted by label asym_id for determinism.
+
+    Args:
+        label_to_author:
+            Dictionary mapping label asym IDs to author chain IDs.
+
+    Returns:
+        A dictionary mapping author chain IDs to sorted lists of label asym IDs.
+    """
+    author_to_labels: dict[str, list[str]] = defaultdict(list)
+    for label, author in label_to_author.items():
+        author_to_labels[author].append(label)
+    for labels in author_to_labels.values():
+        labels.sort()
+    return dict(author_to_labels)
+
+
 def get_entity_to_three_letter_codes_dict(cif_data: CIFBlock) -> dict[int, list[str]]:
     """Get a dictionary mapping entity IDs to their three-letter-code sequences.