Skip to content

Commit

Permalink
Improve aligned input error message, add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
prihoda committed Oct 8, 2021
1 parent a2ec3a2 commit 3573bf4
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions abnumber/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def __init__(self, sequence, scheme, cdr_definition=None, name=None, assign_germ
raise ChainParseError('Expected sequence, got None')
if not isinstance(sequence, str) and not isinstance(sequence, Seq):
raise ChainParseError(f'Expected string or Seq, got {type(sequence)}: {sequence}')
if '-' in sequence:
raise ChainParseError(f'Please provide an unaligned sequence, got: {sequence}')
if chain_type is not None:
raise ChainParseError('Do not use chain_type= when providing sequence=, it will be inferred automatically')
if tail is not None:
Expand Down Expand Up @@ -206,6 +208,7 @@ def __eq__(self, other):

@classmethod
def to_fasta(cls, chains, path_or_fd, keep_tail=False, description=''):
"""Save multiple chains to FASTA"""
if isinstance(chains, Chain):
records = chains.to_seq_record(keep_tail=keep_tail, description=description)
else:
Expand All @@ -214,6 +217,7 @@ def to_fasta(cls, chains, path_or_fd, keep_tail=False, description=''):

@classmethod
def from_fasta(cls, path_or_handle, scheme, cdr_definition=None, as_series=False, as_generator=False, **kwargs) -> Union[List['Chain'], pd.Series, Generator['Chain', None, None]]:
"""Read multiple chains from FASTA"""
generator = (cls(record.seq, name=record.name, scheme=scheme, cdr_definition=cdr_definition, **kwargs)
for record in SeqIO.parse(path_or_handle, 'fasta'))
if as_generator:
Expand All @@ -224,18 +228,25 @@ def from_fasta(cls, path_or_handle, scheme, cdr_definition=None, as_series=False
return chains

def to_seq_record(self, keep_tail=False, description=''):
"""Create BioPython SeqRecord object from this Chain"""
if not self.name:
raise ValueError('Name needs to be present to convert to a SeqRecord')
seq = Seq(self.seq + self.tail if keep_tail else self.seq)
return SeqRecord(seq, id=self.name, description=description)

@classmethod
def to_anarci_csv(cls, chains: List['Chain'], path):
"""Save multiple chains to ANARCI-like CSV"""
df = cls.to_dataframe(chains)
df.to_csv(path)

@classmethod
def to_dataframe(cls, chains: List['Chain']):
"""Produce a Pandas dataframe with aligned chain sequences in the columns
Note: Contains only positions (columns) that are present in the provided chains,
so number of columns can differ based on the input.
"""
series_list = [chain.to_series() for chain in chains]

# Each chain can have a different set of positions
Expand Down

0 comments on commit 3573bf4

Please sign in to comment.