Skip to content

Commit 2347825

Browse files
committed
MERGEFIX
2 parents fb97558 + 67c76e2 commit 2347825

5 files changed

Lines changed: 21 additions & 126 deletions

File tree

epytope/Core/Result.py

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@
1818
from copy import deepcopy
1919
from sys import exit
2020
import logging
21-
<<<<<<< HEAD
22-
import math
23-
=======
24-
>>>>>>> repo-b2/main
2521

2622

2723
class AResult(pandas.DataFrame, metaclass=abc.ABCMeta):
@@ -95,42 +91,25 @@ def filter_result(self, expressions, scoretype='Score'):
9591
"""
9692
if isinstance(expressions, tuple):
9793
expressions = [expressions]
98-
<<<<<<< HEAD
99-
100-
=======
10194

102-
>>>>>>> repo-b2/main
10395
df = deepcopy(self)
10496
methods = list(set(df.columns.get_level_values(1)))
10597
scoretypes = list(set(df.columns.get_level_values(2)))
10698
if scoretype not in scoretypes:
107-
<<<<<<< HEAD
108-
raise ValueError(
109-
"Specified ScoreType {} does not match ScoreTypes of data frame {}.".format(scoretype, scoretypes))
110-
111-
=======
11299
raise ValueError("Specified ScoreType {} does not match ScoreTypes of data frame {}.".format(scoretype, scoretypes))
113100

114-
>>>>>>> repo-b2/main
115101
for expr in expressions:
116102
method, comp, thr = expr
117103
if method not in methods:
118104
raise ValueError("Specified method {} does not match methods of data frame {}.".format(method, methods))
119105
else:
120-
<<<<<<< HEAD
121-
filt = comp(df.xs(method, axis=1, level=1).xs(scoretype, axis=1, level=1), thr).values
122-
=======
123106
filt = comp(df.xs(method, axis = 1, level = 1).xs(scoretype, axis = 1, level = 1), thr).values
124-
>>>>>>> repo-b2/main
125107
# Only keep rows which contain values fulfilling the comparators logic in the specified method
126108
keep_row = [bool.any() for bool in filt]
127109
df = df.loc[keep_row]
128110

129111
return EpitopePredictionResult(df)
130-
<<<<<<< HEAD
131-
=======
132112

133-
>>>>>>> repo-b2/main
134113

135114
def merge_results(self, others):
136115
"""
@@ -145,23 +124,14 @@ def merge_results(self, others):
145124

146125
if type(others) == type(self):
147126
others = [others]
148-
<<<<<<< HEAD
149-
150-
=======
151127

152-
>>>>>>> repo-b2/main
153128
# Concatenates self and to be merged dataframe(s)
154129
for other in others:
155130
df = pandas.concat([df, other], axis=1)
156131

157132
# Merge result of multiple predictors in others per allele
158-
<<<<<<< HEAD
159-
df_merged = pandas.concat([group[1] for group in df.groupby(level=[0, 1], axis=1)], axis=1)
160-
161-
=======
162133
df_merged = pandas.concat([group[1] for group in df.groupby(level=[0,1], axis=1)], axis=1)
163134

164-
>>>>>>> repo-b2/main
165135
return EpitopePredictionResult(df_merged)
166136

167137
def from_dict(d, peps, method):
@@ -176,28 +146,17 @@ def from_dict(d, peps, method):
176146
"""
177147
scoreType = numpy.asarray([list(m.keys()) for m in [metrics for a, metrics in d.items()]]).flatten()
178148
alleles = numpy.asarray([numpy.repeat(a, len(set(scoreType))) for a in d]).flatten()
179-
<<<<<<< HEAD
180-
181-
meth = numpy.repeat(method, len(scoreType))
182-
multi_cols = pandas.MultiIndex.from_arrays([alleles, meth, scoreType], names=["Allele", "Method", "ScoreType"])
183-
df = pandas.DataFrame(float(0), index=pandas.Index(peps), columns=multi_cols)
184-
=======
185149

186150
meth = numpy.repeat(method, len(scoreType))
187151
multi_cols = pandas.MultiIndex.from_arrays([alleles, meth, scoreType], names=["Allele", "Method", "ScoreType"])
188152
df = pandas.DataFrame(float(0),index=pandas.Index(peps), columns=multi_cols)
189-
>>>>>>> repo-b2/main
190153
df.index.name = 'Peptides'
191154
# Fill DataFrame
192155
for allele, metrics in d.items():
193156
for metric, pep_scores in metrics.items():
194157
for pep, score in pep_scores.items():
195158
df[allele][method][metric][pep] = score
196-
<<<<<<< HEAD
197-
198-
=======
199159

200-
>>>>>>> repo-b2/main
201160
return EpitopePredictionResult(df)
202161

203162

@@ -207,19 +166,11 @@ class Distance2SelfResult(AResult):
207166
"""
208167

209168
def filter_result(self, expressions):
210-
<<<<<<< HEAD
211-
# TODO: has to be implemented
212-
pass
213-
214-
def merge_results(self, others):
215-
# TODO: has to be implemented
216-
=======
217169
#TODO: has to be implemented
218170
pass
219171

220172
def merge_results(self, others):
221173
#TODO: has to be implemented
222-
>>>>>>> repo-b2/main
223174
pass
224175

225176

@@ -261,22 +212,14 @@ def filter_result(self, expressions):
261212
if isinstance(expressions, tuple):
262213
expressions = [expressions]
263214

264-
<<<<<<< HEAD
265-
# builde logical expression
266-
=======
267215
#builde logical expression
268-
>>>>>>> repo-b2/main
269216
masks = [list(comp(self.loc[:, method], thr)) for method, comp, thr in expressions]
270217

271218
if len(masks) > 1:
272219
masks = numpy.logical_and(*masks)
273220
else:
274221
masks = masks[0]
275-
<<<<<<< HEAD
276-
# apply to all rows
277-
=======
278222
#apply to all rows
279-
>>>>>>> repo-b2/main
280223

281224
return CleavageSitePredictionResult(self.loc[masks, :])
282225

@@ -296,11 +239,7 @@ def merge_results(self, others):
296239

297240
for i in range(len(others)):
298241
o = others[i]
299-
<<<<<<< HEAD
300-
df1a, df2a = df.align(o, )
301-
=======
302242
df1a, df2a = df.align(o,)
303-
>>>>>>> repo-b2/main
304243

305244
o_diff = o.index.difference(df.index)
306245
d_diff = df.index.difference(o.index)
@@ -324,11 +263,7 @@ def merge_results(self, others):
324263
df1 = df1a.fillna(0)
325264
df2 = df2a.fillna(0)
326265

327-
<<<<<<< HEAD
328-
df_merged = df1 + df2
329-
=======
330266
df_merged = df1+df2
331-
>>>>>>> repo-b2/main
332267
false_zero = df_merged == 0
333268
zero = true_zero & false_zero
334269

@@ -377,11 +312,7 @@ def filter_result(self, expressions):
377312
masks = numpy.logical_and(*masks)
378313
else:
379314
masks = masks[0]
380-
<<<<<<< HEAD
381-
# apply to all rows
382-
=======
383315
#apply to all rows
384-
>>>>>>> repo-b2/main
385316
return CleavageFragmentPredictionResult(self.loc[masks, :])
386317

387318
def merge_results(self, others):
@@ -398,11 +329,7 @@ def merge_results(self, others):
398329
if type(others) == type(self):
399330
others = [others]
400331

401-
<<<<<<< HEAD
402-
return CleavageFragmentPredictionResult(pandas.concat([self] + others, axis=1))
403-
=======
404332
return CleavageFragmentPredictionResult(pandas.concat([self]+others, axis=1))
405-
>>>>>>> repo-b2/main
406333

407334

408335
class TAPPredictionResult(AResult):
@@ -442,11 +369,7 @@ def filter_result(self, expressions):
442369
masks = numpy.logical_and(*masks)
443370
else:
444371
masks = masks[0]
445-
<<<<<<< HEAD
446-
# apply to all rows
447-
=======
448372
#apply to all rows
449-
>>>>>>> repo-b2/main
450373

451374
return TAPPredictionResult(self.loc[masks, :])
452375

@@ -463,7 +386,6 @@ def merge_results(self, others):
463386
if type(others) == type(self):
464387
others = [others]
465388

466-
<<<<<<< HEAD
467389
return TAPPredictionResult(pandas.concat([self] + others, axis=1))
468390

469391

@@ -551,6 +473,3 @@ def merge_results(self, others):
551473
tcr.columns = pandas.MultiIndex.from_tuples(tuples)
552474
result = pandas.concat([tcr, result], axis=1)
553475
return result
554-
=======
555-
return TAPPredictionResult(pandas.concat([self]+others, axis=1))
556-
>>>>>>> repo-b2/main

epytope/IO/UniProtAdapter.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,9 @@
1212
import bisect
1313

1414
from Bio import SeqIO
15-
<<<<<<< HEAD
16-
from epytope.Core.Base import deprecated
1715

1816

1917
class UniProtDB:
20-
@deprecated # TODO: refactor ... function based on old code
21-
=======
22-
23-
class UniProtDB:
24-
>>>>>>> repo-b2/main
2518
def __init__(self, name='fdb'):
2619
"""
2720
UniProtDB class to give quick access to entries (fast exact match searches) and convenient ways to produce
@@ -42,11 +35,7 @@ def __init__(self, name='fdb'):
4235
"""
4336
self.name = name
4437
self.collection = {} # all the biopython seq records in a dict keyed by the id of the record
45-
<<<<<<< HEAD
46-
self.searchstring = '' # all sequences concatenated with a '#'
47-
=======
4838
self.search_string = '' # all sequences concatenated with a '#'
49-
>>>>>>> repo-b2/main
5039
self.accs = list() # all accessions in respective order to searchstring
5140
self.idx = list() # all indices of starting strings in the searchstring in respective order
5241

@@ -61,11 +50,7 @@ def read_seqs(self, sequence_file):
6150
recs = sequence_file
6251
if not isinstance(sequence_file, dict) and not isinstance(sequence_file, list):
6352
try:
64-
<<<<<<< HEAD
65-
with open(sequence_file, 'rb') as f:
66-
=======
6753
with open(sequence_file, 'r') as f:
68-
>>>>>>> repo-b2/main
6954
if sequence_file.endswith('.fa') or sequence_file.endswith('.fasta'):
7055
recs = SeqIO.to_dict(SeqIO.parse(f, "fasta"))
7156
else: # assume it is a dat file
@@ -77,11 +62,7 @@ def read_seqs(self, sequence_file):
7762
recs = SeqIO.to_dict(sequence_file)
7863
if recs:
7964
self.collection.update(recs)
80-
<<<<<<< HEAD
81-
self.searchstring = '#'.join([str(x.seq) for x in self.collection.values()]).decode('ascii')
82-
=======
8365
self.search_string = '#'.join([str(x.seq) for x in self.collection.values()])#.decode('ascii')
84-
>>>>>>> repo-b2/main
8566
self.accs = list(self.collection.keys())
8667
self.idx = list()
8768
self.idx.append(0)
@@ -106,11 +87,7 @@ def exists(self, seq):
10687
:return: True, if it is found somewhere, False otherwise
10788
"""
10889
if isinstance(seq, str):
109-
<<<<<<< HEAD
110-
index = self.searchstring.find(seq)
111-
=======
11290
index = self.search_string.find(seq)
113-
>>>>>>> repo-b2/main
11491
if index >= 0:
11592
return True
11693
else:
@@ -127,11 +104,7 @@ def search(self, seq):
127104
"""
128105
if isinstance(seq, str):
129106
ids = 'null'
130-
<<<<<<< HEAD
131-
index = self.searchstring.find(seq)
132-
=======
133107
index = self.search_string.find(seq)
134-
>>>>>>> repo-b2/main
135108
if index >= 0:
136109
j = bisect.bisect(self.idx, index) - 1
137110
ids = self.accs[j]
@@ -141,11 +114,7 @@ def search(self, seq):
141114
for i in seq:
142115
ids.append('null')
143116
for i, v in enumerate(seq):
144-
<<<<<<< HEAD
145-
index = self.searchstring.find(v)
146-
=======
147117
index = self.search_string.find(v)
148-
>>>>>>> repo-b2/main
149118
if index >= 0:
150119
j = bisect.bisect(self.idx, index) - 1
151120
ids[i] = self.accs[j]
@@ -164,13 +133,8 @@ def search_all(self, seq):
164133
ids = 'null'
165134
index = 0
166135
searchstring_length = len(seq)
167-
<<<<<<< HEAD
168-
while index < len(self.searchstring):
169-
index = self.searchstring.find(seq, index)
170-
=======
171136
while index < len(self.search_string):
172137
index = self.search_string.find(seq, index)
173-
>>>>>>> repo-b2/main
174138
if index == -1:
175139
break
176140
j = bisect.bisect(self.idx, index) - 1
@@ -187,13 +151,8 @@ def search_all(self, seq):
187151
for i, v in enumerate(seq):
188152
index = 0
189153
searchstring_length = len(v)
190-
<<<<<<< HEAD
191-
while index < len(self.searchstring):
192-
index = self.searchstring.find(v, index)
193-
=======
194154
while index < len(self.search_string):
195155
index = self.search_string.find(v, index)
196-
>>>>>>> repo-b2/main
197156
if index == -1:
198157
break
199158
j = bisect.bisect(self.idx, index) - 1
@@ -203,4 +162,4 @@ def search_all(self, seq):
203162
ids[i] = ids[i] + ',' + self.accs[j]
204163
index += searchstring_length
205164
return dict(zip(seq, ids))
206-
return None
165+
return None

epytope/TCRSpecificityPrediction/External.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,11 @@ def format_tcr_data(self, tcrs, epitopes, pairwise, **kwargs):
790790
df_tcrs = self.filter_by_length(df_tcrs, None, "CDR3b", "epitope")
791791
df_tcrs = df_tcrs.drop_duplicates()
792792
df_tcrs = df_tcrs[(~df_tcrs["CDR3b"].isna()) & (df_tcrs["CDR3b"] != "")]
793+
794+
if len(df_tcrs) % 50 == 0:
795+
# If the length of samples is devisible by the batch size, something weird happens
796+
df_tcrs.loc[df_tcrs.index.max()+1] = df_tcrs.iloc[-1]
797+
793798
return df_tcrs
794799

795800
def save_tmp_files(self, data, **kwargs):
@@ -811,6 +816,7 @@ def get_base_cmd(self, filenames, tmp_folder, interpreter=None, conda=None, cmd_
811816

812817
def format_results(self, filenames, tmp_folder, tcrs, epitopes, pairwise, **kwargs):
813818
results_predictor = pd.read_csv(filenames[1], sep='\t', names=["VDJ_cdr3", "Epitope", "Score"], header=None)
819+
results_predictor = results_predictor.drop_duplicates()
814820
joining_list = ["Epitope", "VDJ_cdr3"]
815821
results_predictor = results_predictor[joining_list + ["Score"]]
816822
df_out = self.transform_output(results_predictor, tcrs, epitopes, pairwise, joining_list)

epytope/TCRSpecificityPrediction/ML.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,15 @@ def organism(self):
341341

342342
def format_tcr_data(self, tcrs, epitopes, pairwise, **kwargs):
343343
df_tcrs = tcrs.to_pandas()
344+
if df_tcrs["organism"].unique()[0] == "MusMusculus" and df_tcrs["organism"].nunique() == 1:
345+
self.organism_in = "mouse"
346+
else:
347+
self.organism_in = "human"
348+
344349
df_tcrs["VDJ_v_gene"] = df_tcrs["VDJ_v_gene"].apply(lambda x: x if re.search(r"\*\d+$", x) else x + "*01")
345350
df_tcrs["VDJ_j_gene"] = df_tcrs["VDJ_j_gene"].apply(lambda x: x if re.search(r"\*\d+$", x) else x + "*01")
346-
df_tcrs = df_tcrs[df_tcrs["VDJ_v_gene"].isin(self._v_regions) & df_tcrs["VDJ_j_gene"].isin(self._j_regions)]
351+
if self.organism_in == "human":
352+
df_tcrs = df_tcrs[df_tcrs["VDJ_v_gene"].isin(self._v_regions) & df_tcrs["VDJ_j_gene"].isin(self._j_regions)]
347353
df_tcrs = df_tcrs[(~df_tcrs["VDJ_cdr3"].isna()) & (df_tcrs["VDJ_cdr3"] != "")]
348354

349355
df_tcrs_unique = df_tcrs.drop_duplicates().copy()
@@ -398,7 +404,11 @@ def save_tmp_files(self, data, **kwargs):
398404
def get_base_cmd(self, filenames, tmp_folder, interpreter=None, conda=None, cmd_prefix=None, **kwargs):
399405
path_module = self.get_package_dir("paccmann_tcr", interpreter, conda, cmd_prefix).split(os.sep)[:-1] + [".."]
400406

401-
path_imgt = os.sep.join(path_module + ["datasets", "imgt"])
407+
if self.organism_in == "mouse":
408+
path_imgt = os.sep.join(path_module + ["datasets", "imgt_mouse"])
409+
else:
410+
path_imgt = os.sep.join(path_module + ["datasets", "imgt"])
411+
402412
if not os.path.exists(os.sep.join([path_imgt, "V_segment_sequences.fasta"])) or \
403413
not os.path.exists(os.sep.join([path_imgt, "J_segment_sequences.fasta"])):
404414
raise NotADirectoryError(f"Please download the V and J segment files from "

0 commit comments

Comments
 (0)