forked from prihoda/AbNumber
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathposition.py
158 lines (127 loc) · 6.54 KB
/
position.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import copy
from typing import List, Union
from abnumber.common import _validate_chain_type, SCHEME_POSITION_TO_REGION, SCHEME_VERNIER, POS_REGEX
class Position:
"""Numbered position using a given numbering scheme
Used as a key to store Position -> Amino acid information.
Position objects are sortable according to the schema simply using ``sorted()``.
"""
def __init__(self, chain_type: str, number: int, letter: str, scheme: str):
_validate_chain_type(chain_type)
self.chain_type: str = chain_type
self.number: int = int(number)
self.letter: str = letter.strip()
self.scheme: str = scheme
self.cdr_definition: str = self.scheme
self.cdr_definition_position: int = self.number
def copy(self):
return copy.copy(self)
def _key(self):
# Note: We are not including chain_type, but just Heavy/Light flag, to keep Kappa and Lambda chain positions equal
return self.chain_type_prefix(), self.number, self.letter, self.scheme
def __repr__(self):
return f'{self.chain_type_prefix()}{self.number}{self.letter} ({self.scheme})'
def __str__(self):
return self.format()
def set_cdr_definition(self, cdr_definition: str, cdr_definition_position: int):
assert cdr_definition is not None, 'cdr_definition is required'
assert cdr_definition_position is not None, 'cdr_definition_position is required'
self.cdr_definition = cdr_definition
self.cdr_definition_position = cdr_definition_position
def format(self, chain_type=True, region=False, rjust=False, ljust=False, fillchar=' '):
"""Format Position to string
:param chain_type: Add chain type prefix (H/L)
:param region: Add region prefix (FR1, CDR1, ...)
:param rjust: Align text to the right
:param ljust: Align text to the left
:param fillchar: Characer to use for alignment padding
:return: formatted string
"""
formatted = f'{self.number}{self.letter}'
if chain_type:
formatted = f'{self.chain_type_prefix()}{formatted}'
if region:
formatted = f'{self.get_region()} {formatted}'
just = 4 + 1* int(chain_type) + 5 * int(region)
if rjust:
formatted = formatted.rjust(just, fillchar)
if ljust:
formatted = formatted.ljust(just, fillchar)
return formatted
def __hash__(self):
return self._key().__hash__()
def __eq__(self, other):
return isinstance(other, Position) and self._key() == other._key()
def __ge__(self, other):
return self == other or self > other
def __le__(self, other):
return self == other or self < other
def __lt__(self, other):
if not isinstance(other, Position):
raise TypeError(f'Cannot compare Position object with {type(other)}: {other}')
assert self.is_heavy_chain() == other.is_heavy_chain(), f'Positions do not come from the same chain: {self}, {other}'
assert self.scheme == other.scheme, 'Comparing positions in different schemes is not implemented'
return self._sort_key() < other._sort_key()
def chain_type_prefix(self):
if self.chain_type == 'H':
return 'H'
if self.chain_type in ['K', 'L']:
return 'L'
raise NotImplementedError(f'Unknown chain type "{self.chain_type}"')
def _sort_key(self):
letter_ord = ord(self.letter) if self.letter else 0
if self.scheme == 'imgt':
if self.number in [33, 61, 112]:
# position 112 is sorted in reverse
letter_ord = -letter_ord
elif self.scheme in ['chothia', 'kabat', 'aho']:
# all letters are sorted alphabetically for these schemes
pass
else:
raise NotImplementedError(f'Cannot compare positions of scheme: {self.scheme}')
return self.is_heavy_chain(), self.number, letter_ord
def get_region(self):
"""Get string name of this position's region
:return: uppercase string, one of: ``"FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"``
"""
if self.cdr_definition in SCHEME_POSITION_TO_REGION:
regions = SCHEME_POSITION_TO_REGION[self.cdr_definition]
else:
regions = SCHEME_POSITION_TO_REGION[f'{self.cdr_definition}_{self.chain_type}']
return regions[self.cdr_definition_position]
def is_in_cdr(self):
"""Check if given position is found in the CDR regions"""
return self.get_region().lower().startswith('cdr')
def is_in_vernier(self):
if self.cdr_definition != 'kabat':
raise NotImplementedError('Vernier zone identification is currently supported '
f'only with Kabat CDR definitions, got: {self.cdr_definition}')
return self.cdr_definition_position in SCHEME_VERNIER.get(f'{self.cdr_definition}_{self.chain_type}', [])
@classmethod
def from_string(cls, position, chain_type, scheme):
"""Create Position object from string, e.g. "H5"
Note that Positions parsed from string do not support separate CDR definitions.
"""
match = POS_REGEX.match(position.upper())
_validate_chain_type(chain_type)
expected_chain_prefix = 'H' if chain_type == 'H' else 'L'
if match is None:
raise IndexError(f'Expected position format chainNumberLetter '
f'(e.g. "{expected_chain_prefix}112A" or "112A"), got: "{position}"')
chain_prefix, number, letter = match.groups()
number = int(number)
if chain_prefix and expected_chain_prefix != chain_prefix:
raise IndexError(f'Use no prefix or "{expected_chain_prefix}" prefix for "{chain_type}" chain. '
f'Got: "{chain_prefix}".')
return cls(chain_type=chain_type, number=number, letter=letter, scheme=scheme)
def is_heavy_chain(self):
return self.chain_type == 'H'
def is_light_chain(self):
return self.chain_type in 'KL'
def sort_positions(positions: List[str], chain_type: str, scheme: str) -> List:
"""Sort position strings to correct order based on given scheme"""
has_prefix = [p.startswith('H') or p.startswith('L') for p in positions]
assert all(has_prefix) or not any(has_prefix), 'Inconsistent position prefix'
has_prefix = all(has_prefix)
position_objects = [Position.from_string(p, chain_type=chain_type, scheme=scheme) for p in positions]
return [p.format(chain_type=has_prefix) for p in sorted(position_objects)]