forked from prihoda/AbNumber
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor modules and improve FASTA parsing
- Break into multiple modules - Support passing Chain args in FASTA parsing - Support getting unique germlines - Raise error on ANARCI IMGT position 61A bug
- Loading branch information
Showing
7 changed files
with
519 additions
and
455 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
from abnumber.__version__ import __version__ | ||
from abnumber.chain import Chain, Position, Alignment, SUPPORTED_SCHEMES, SUPPORTED_CDR_DEFINITIONS | ||
from abnumber.chain import Chain | ||
from abnumber.position import Position, sort_positions | ||
from abnumber.alignment import Alignment | ||
from abnumber.common import SUPPORTED_SCHEMES, SUPPORTED_CDR_DEFINITIONS | ||
from abnumber.exceptions import ChainParseError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
from typing import Union | ||
|
||
from abnumber.common import is_similar_residue, is_integer | ||
from abnumber.position import Position | ||
|
||
|
||
class Alignment: | ||
"""Antibody chain alignment of two or more chains | ||
>>> from abnumber import Chain | ||
>>> | ||
>>> seq1 = 'QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSSAKTTAP' | ||
>>> chain1 = Chain(seq1, scheme='imgt') | ||
>>> | ||
>>> seq2 = 'QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDYLDRWGQGTTLTVSSAKTTAP' | ||
>>> chain2 = Chain(seq2, scheme='imgt') | ||
>>> alignment = chain1.align(chain2) | ||
Alignment can be sliced and iterated: | ||
>>> for pos, (aa, bb) in alignment[:'5']: | ||
>>> print(pos, aa, bb) | ||
H1 Q Q | ||
H2 V V | ||
H3 Q Q | ||
H4 L L | ||
H5 Q V | ||
... | ||
""" | ||
def __init__(self, positions, residues, scheme, chain_type): | ||
assert isinstance(positions, list), 'Expected list of positions and residues. ' \ | ||
'Use chain.align(other) to create an alignment.' | ||
assert len(positions) == len(residues) | ||
unique_cdr_definitions = set(pos.cdr_definition for pos in positions) | ||
assert len(unique_cdr_definitions) <= 1, f'Aligned chains should use the same CDR definitions, got: {unique_cdr_definitions}' | ||
self.positions = positions | ||
self.residues = residues | ||
self.scheme = scheme | ||
self.chain_type = chain_type | ||
self._zipped = list(zip(self.positions, self.residues)) | ||
|
||
def __repr__(self): | ||
return self.format() | ||
|
||
def __iter__(self): | ||
yield from self._zipped.__iter__() | ||
|
||
def __len__(self): | ||
return len(self.positions) | ||
|
||
def __getitem__(self, item): | ||
if isinstance(item, slice): | ||
if item.step is not None and item.step != 1: | ||
raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}') | ||
return self.slice(start=item.start, stop=item.stop) | ||
pos = self._parse_position(item) | ||
raw_pos = self.positions.index(pos) | ||
return self.residues[raw_pos] | ||
|
||
def slice(self, start: Union[str, int, 'Position'] = None, stop: Union[str, int, 'Position'] = None, | ||
stop_inclusive: bool = True, allow_raw: bool = False): | ||
"""Create a slice of this alignment | ||
You can also slice directly using ``alignment['111':'112A']`` or ``alignment.raw[10:20]``. | ||
:param start: Slice start position (inclusive), :class:`Position` or string (e.g. '111A') | ||
:param stop: Slice stop position (inclusive), :class:`Position` or string (e.g. '112A') | ||
:param stop_inclusive: Include stop position in slice | ||
:param allow_raw: Allow unaligned numeric indexing from 0 to length of sequence - 1 | ||
:return: new sliced Alignment object | ||
""" | ||
|
||
start = self._parse_position(start, allow_raw=allow_raw) if start is not None else None | ||
stop = self._parse_position(stop, allow_raw=allow_raw) if stop is not None else None | ||
|
||
new_positions = [] | ||
new_residues = [] | ||
for pos, residues in zip(self.positions, self.residues): | ||
if start is not None and pos < start: | ||
continue | ||
if stop is not None and (pos > stop or (not stop_inclusive and pos >= stop)): | ||
break | ||
new_positions.append(pos) | ||
new_residues.append(residues) | ||
|
||
return Alignment(positions=new_positions, residues=new_residues, scheme=self.scheme, chain_type=self.chain_type) | ||
|
||
def _parse_position(self, position: Union[int, str, 'Position'], allow_raw=False): | ||
"""Create :class:`Position` key object from string or int. | ||
Note: The position should only be used for indexing, CDR definition is not preserved! | ||
:param position: Numeric or string position representation | ||
:param allow_raw: Also allow unaligned numeric (int) indexing from 0 to length of sequence - 1 | ||
:return: new Position object, should only be used for indexing, CDR definition is not preserved! | ||
""" | ||
if isinstance(position, str): | ||
return Position.from_string(position, chain_type=self.chain_type, scheme=self.scheme) | ||
if isinstance(position, Position): | ||
return position | ||
try: | ||
position = int(position) | ||
except TypeError: | ||
raise IndexError(f'Invalid position key, expected Position, string or integer, got {type(position)}: "{position}"') | ||
if not allow_raw: | ||
raise IndexError("Use chain.raw[i] for raw numeric indexing or pass allow_raw=True. " | ||
"For named position indexing, use string (e.g. chain['111A'] or chain['H111A'])") | ||
if position >= len(self.positions): | ||
return None | ||
return self.positions[position] | ||
|
||
def format(self, mark_identity=True, mark_cdrs=True): | ||
"""Format alignment to string | ||
:param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``) | ||
:param mark_cdrs: Add line highlighting CDR regions using ``^`` | ||
:return: formatted string | ||
""" | ||
|
||
def _identity_symbol(a, b): | ||
return '|' if a == b else ('+' if is_similar_residue(a, b) else '.') | ||
|
||
lines = [] | ||
for i in range(len(self.residues[0])): | ||
if mark_identity and i != 0: | ||
lines.append(''.join(_identity_symbol(aas[i], aas[i-1]) for pos, aas in self)) | ||
lines.append(''.join(aas[i] for pos, aas in self)) | ||
if mark_cdrs: | ||
if self.positions[0].cdr_definition == 'kabat': | ||
lines.append(''.join('^' if pos.is_in_cdr() else ("°" if pos.is_in_vernier() else ' ') for pos in self.positions)) | ||
else: | ||
lines.append(''.join('^' if pos.is_in_cdr() else ' ' for pos in self.positions)) | ||
return '\n'.join(lines) | ||
|
||
def print(self, mark_identity=True, mark_cdrs=True): | ||
"""Print string representation of alignment created using :meth:`Alignment.format` | ||
>>> alignment.print() | ||
QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS | ||
||||.||||||.||||+|||||||||||.||||||||||||||||+||||||||.|.||||||||||||||||||||||||||.+|||||||||||||||||....||.||||||||||| | ||
QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS | ||
^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^ | ||
>>> alignment.print(mark_identity=False, mark_cdrs=False) | ||
QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS | ||
QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS | ||
:param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``) | ||
:param mark_cdrs: Add line highlighting CDR regions using ``^`` | ||
""" | ||
print(self.format(mark_identity=mark_identity, mark_cdrs=mark_cdrs)) | ||
|
||
def has_mutation(self): | ||
"""Check if there is a mutation in the alignment or not""" | ||
return any(len(set(aas)) != 1 for aas in self.residues) | ||
|
||
def num_mutations(self): | ||
"""Get number of mutations (positions with more than one type of residue)""" | ||
return sum(len(set(aas)) != 1 for aas in self.residues) | ||
|
||
@property | ||
def raw(self): | ||
"""Access raw representation of this alignment to allow unaligned numeric indexing and slicing | ||
>>> # Numbering of ``chain.raw`` starts at 0 | ||
>>> alignment.raw[0] | ||
'H1' | ||
>>> # Slicing with string is based on schema numbering, the end is inclusive | ||
>>> chain['1':'10'] | ||
'QVQLQQSGAE' | ||
>>> # Slicing with ``chain.raw`` starts at 0, the end is exclusive (Python style) | ||
>>> chain.raw[0:10] | ||
'QVQLQQSGAE' | ||
:return: Raw alignment accessor that can be sliced or indexed to produce a new :class:`Alignment` object | ||
""" | ||
return RawAlignmentAccessor(self) | ||
|
||
|
||
class RawAlignmentAccessor: | ||
def __init__(self, alignment: Alignment): | ||
self.alignment = alignment | ||
|
||
def __getitem__(self, item): | ||
if isinstance(item, slice): | ||
if item.step is not None and item.step != 1: | ||
raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}') | ||
if item.start is not None and not is_integer(item.start): | ||
raise IndexError(f'Expected int start index for alignment.raw, got {type(item.start)}: {item.start}') | ||
if item.stop is not None and not is_integer(item.stop): | ||
raise IndexError(f'Expected int end index for alignment.raw, got {type(item.stop)}: {item.stop}') | ||
return self.alignment.slice(start=item.start, stop=item.stop, stop_inclusive=False, allow_raw=True) | ||
if not is_integer(item): | ||
raise IndexError(f'Expected int indexing for alignment.raw, got {type(item)}: {item}') | ||
pos = self.alignment.positions[item] | ||
return self.alignment[pos] |
Oops, something went wrong.