-
Notifications
You must be signed in to change notification settings - Fork 92
fix: chain template alignments auth labelling (inference) #117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
561018c
0787889
c3c14a5
080d0d0
e57d303
865ec86
b29f1f6
a78196f
be1fc4e
6f7d487
126bcca
11420df
65c292f
fbc389a
8450fa8
4a26bc6
78570ff
f16b160
bf286be
f5cb6b9
1364f45
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| 101 1b27_B 1.0 110 0 0 1 110 1 110 1.022e-45 160 110M | ||
| 101 6pqk_A 0.981 108 2 0 3 110 1 108 2.6399999999999998e-45 159 108M | ||
| 101 1bse_C 0.99 108 1 0 3 110 1 108 4.9679999999999996e-45 158 108M | ||
| 101 1b2s_C 0.99 110 1 0 1 110 1 110 6.815e-45 158 110M | ||
| 101 1bsb_C 0.99 108 1 0 3 110 1 108 9.349999999999999e-45 158 108M | ||
| 101 1x1y_B 0.99 110 1 0 1 110 1 110 9.349999999999999e-45 158 110M | ||
| 101 2za4_A 0.99 108 1 0 3 110 1 108 1.76e-44 157 108M | ||
| 101 1ban_B 0.99 108 1 0 3 110 1 108 3.3119999999999996e-44 156 108M | ||
| 101 1b20_B 0.99 108 1 0 3 110 1 108 3.3119999999999996e-44 156 108M | ||
| 101 1buj_A 0.842 108 17 0 3 110 2 109 3.3119999999999996e-44 156 108M | ||
| 101 1bsd_C 0.99 107 1 0 4 110 1 107 4.544e-44 156 107M | ||
| 101 1brg_C 0.99 108 1 0 3 110 1 108 4.544e-44 156 108M | ||
| 101 1brk_C 0.99 107 1 0 4 110 1 107 6.233e-44 155 107M | ||
| 101 1bsa_C 0.99 107 1 0 4 110 1 107 6.233e-44 155 107M | ||
| 101 1b2z_C 0.99 107 1 0 4 110 1 107 8.552e-44 155 107M | ||
| 101 1b21_B 0.981 108 2 0 3 110 1 108 8.552e-44 155 108M | ||
| 101 1rnb_A 0.981 109 2 0 2 110 1 109 8.552e-44 155 109M | ||
| 101 1bsc_B 0.99 107 1 0 4 110 1 107 1.173e-43 154 107M | ||
| 101 1bao_C 0.99 107 1 0 4 110 1 107 1.173e-43 154 107M | ||
| 101 1brj_C 0.99 108 1 0 3 110 1 108 1.173e-43 154 108M | ||
| 101 1brh_C 0.99 108 1 0 3 110 1 108 1.173e-43 154 108M | ||
| 101 4haa_D 0.833 108 18 0 3 110 2 109 1.173e-43 154 108M | ||
| 101 1bri_C 0.99 107 1 0 4 110 1 107 1.61e-43 154 107M | ||
| 101 1bns_C 0.99 107 1 0 4 110 1 107 2.208e-43 154 107M | ||
| 101 1bnf_A 0.981 108 2 0 3 110 1 108 4.156e-43 153 108M | ||
| 101 2rbi_B 0.833 108 18 0 3 110 1 108 5.701e-43 153 108M | ||
| 101 2kf3_A 0.99 108 1 0 3 110 1 108 5.701e-43 153 108M | ||
| 101 2c4b_B 0.99 108 1 0 3 110 1 108 5.701e-43 153 108M | ||
| 101 1bng_C 0.981 107 2 0 4 110 1 107 5.2150000000000006e-42 150 107M | ||
| 101 3q3f_A 0.936 110 7 0 1 110 1 110 5.2150000000000006e-42 150 110M | ||
| 101 1goy_B 0.796 108 22 0 3 110 1 108 7.155e-42 149 108M | ||
| 101 3da7_A 1.0 66 0 0 1 66 44 109 4.0910000000000004e-25 101 66M | ||
| 101 3da7_E 1.0 60 0 0 7 66 44 103 2.286e-22 93 60M | ||
| 101 3da7_B 1.0 57 0 0 8 64 44 100 1.525e-21 91 57M | ||
| 101 3da7_G 0.892 65 7 0 2 66 38 102 1.525e-21 91 65M | ||
| 101 3d5g_C 0.31 58 39 1 53 109 35 92 5.437e-14 69 30M1D27M | ||
| 101 1mgr_A 0.322 59 38 2 53 109 35 93 7.458e-14 69 30M1D11M1D16M | ||
| 101 3dgy_C 0.35 57 36 1 53 109 32 87 1.023e-13 68 24M1I32M | ||
| 101 3dgy_A 0.35 57 36 1 53 109 33 88 1.023e-13 68 24M1I32M | ||
| 101 3d5i_C 0.315 57 37 1 53 109 32 86 4.964e-13 66 27M2I28M | ||
| 101 3d4a_C 0.315 57 36 1 53 109 32 85 6.808e-13 66 27M3I27M | ||
| 101 4j5g_B 0.338 59 37 2 53 109 34 92 9.337e-13 66 30M1D9M1D18M | ||
| 101 1ay7_A 0.338 59 37 2 53 109 34 92 9.337e-13 66 30M1D9M1D18M | ||
| 101 1ynv_X 0.338 59 37 2 53 109 34 92 9.337e-13 66 30M1D9M1D18M | ||
| 101 1c54_A 0.338 59 37 2 53 109 34 92 9.337e-13 66 30M1D9M1D18M | ||
| 101 1uci_B 0.338 59 37 2 53 109 34 92 9.337e-13 66 30M1D9M1D18M | ||
| 101 4gho_B 0.338 59 37 2 53 109 34 92 9.337e-13 66 30M1D9M1D18M | ||
| 101 4j5g_A 0.338 59 37 2 53 109 34 92 9.337e-13 66 30M1D9M1D18M | ||
| 101 1rsn_A 0.338 59 37 2 53 109 34 92 1.281e-12 65 30M1D9M1D18M | ||
| 101 1ucj_B 0.338 59 37 2 53 109 34 92 1.281e-12 65 30M1D9M1D18M | ||
| 101 1t2i_A 0.338 59 37 2 53 109 34 92 1.756e-12 65 30M1D11M1D16M | ||
| 101 1uck_B 0.338 59 37 2 53 109 34 92 1.756e-12 65 30M1D9M1D18M | ||
| 101 4j5k_A 0.338 59 37 2 53 109 34 92 3.303e-12 64 30M1D9M1D18M | ||
| 101 4j5k_B 0.338 59 37 2 53 109 34 92 3.303e-12 64 30M1D9M1D18M | ||
| 101 1i8v_B 0.322 59 38 2 53 109 34 92 4.529e-12 64 30M1D9M1D18M | ||
| 101 3a5e_A 0.322 59 38 2 53 109 34 92 4.529e-12 64 30M1D9M1D18M | ||
| 101 1t2h_B 0.338 59 37 2 53 109 34 92 6.211e-12 63 30M1D9M1D18M | ||
| 101 1box_A 0.322 59 38 2 53 109 33 91 8.517e-12 63 30M1D9M1D18M | ||
| 101 1i70_B 0.322 59 38 2 53 109 34 92 8.517e-12 63 30M1D9M1D18M | ||
| 101 1ucl_B 0.355 59 36 2 53 109 34 92 8.517e-12 63 22M1D17M1D18M | ||
| 101 1zgx_A 0.321 28 19 0 53 80 34 61 0.002519 38 28M | ||
| 101 1zgx_B 0.392 28 16 1 83 109 2 29 0.01637 36 9M1D18M |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| from openfold3.core.data.io.sequence.template import ( | ||
| A3mParser, | ||
| parse_template_alignment, | ||
| ) | ||
| import pytest | ||
|
Check failure on line 5 in openfold3/tests/core/data/pipelines/preprocessing/test_template.py
|
||
| from openfold3.core.data.io.structure.cif import _load_ciffile | ||
|
|
||
| from biotite.database.rcsb import fetch | ||
|
|
||
| from openfold3.core.data.primitives.structure.metadata import ( | ||
| get_chain_to_canonical_seq_dict, | ||
|
Check failure on line 11 in openfold3/tests/core/data/pipelines/preprocessing/test_template.py
|
||
| get_cif_block, | ||
|
Check failure on line 12 in openfold3/tests/core/data/pipelines/preprocessing/test_template.py
|
||
|
|
||
| ) | ||
|
|
||
| from openfold3.core.data.io.sequence.template import ( | ||
| A3mParser | ||
|
Check failure on line 17 in openfold3/tests/core/data/pipelines/preprocessing/test_template.py
|
||
| ) | ||
| from openfold3.core.data.primitives.structure.metadata import ( | ||
| get_asym_id_to_canonical_seq_dict, | ||
| get_label_to_author_chain_id_dict, | ||
| ) | ||
|
|
||
|
|
||
| from pathlib import Path | ||
|
Check failure on line 25 in openfold3/tests/core/data/pipelines/preprocessing/test_template.py
|
||
|
|
||
| class TestTemplatePreprocessor(): | ||
|
Check failure on line 27 in openfold3/tests/core/data/pipelines/preprocessing/test_template.py
|
||
|
|
||
| def test_template_has_author_chain_id(self, tmp_path): | ||
| """ | ||
| https://github.com/aqlaboratory/openfold-3/issues/101 | ||
|
|
||
| """ | ||
|
|
||
| alignment_file = Path(__file__).parent / "colabfold_template.m8" | ||
| query_seq_str = "AQVINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR" | ||
| templates = parse_template_alignment( | ||
| aln_path=Path(alignment_file), | ||
| query_seq_str=query_seq_str, | ||
| max_sequences=200 | ||
|
|
||
| ) | ||
|
|
||
| # find the offending "1rnb_A" | ||
| template = templates[16] | ||
| assert template.chain_id == "A" and template.entry_id == "1rnb" | ||
|
|
||
| fetch( | ||
| pdb_ids=template.entry_id, | ||
| format="cif", | ||
| target_path=tmp_path, | ||
| ) | ||
|
|
||
|
|
||
| template_structure_file = tmp_path / f"{template.entry_id}.cif" | ||
|
|
||
| cif_file = _load_ciffile(template_structure_file) | ||
|
|
||
| chain_id_seq_map = get_asym_id_to_canonical_seq_dict(cif_file) | ||
|
|
||
| # template.chain_id is an author chain ID; map it to label asym_id | ||
| label_to_author = get_label_to_author_chain_id_dict(cif_file) | ||
| author_to_label = {v: k for k, v in label_to_author.items()} | ||
| label_chain_id = author_to_label[template.chain_id] | ||
|
||
|
|
||
| template_sequence = chain_id_seq_map.get(label_chain_id) | ||
|
|
||
| parser = A3mParser(max_sequences=None) | ||
| parsed = parser( | ||
| ( | ||
| f">query_X/1-{len(query_seq_str)}\n" | ||
| f"{query_seq_str}\n" | ||
| f">{template.entry_id}_{label_chain_id}/{1}-{len(template_sequence)}\n" | ||
| f"{template_sequence}\n" | ||
| ), | ||
| query_seq_str, | ||
| realign=True, | ||
| ) | ||
|
|
||
| assert len(parsed) == 2 | ||
| assert parsed[0].seq_id == 1 | ||
| assert parsed[1].seq_id < 1 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we mock this call instead of explicitly calling the RCSB database using
fetch?As this is a unit test, it would be good to remove dependencies on web servers so that we don't have latency issues / failures due to the availability of the service.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Switched to just a cif file as fixture