Skip to content

Commit 1a5f2b3

Browse files
author
Ralf
committed
cleanup
1 parent c5b5be5 commit 1a5f2b3

File tree

3 files changed

+1
-42
lines changed

3 files changed

+1
-42
lines changed

dataContainer.py

-2
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@
22
import records
33
import os
44
import numpy as np
5-
from Bio import SeqIO
65
from tensorflow import dtypes as tfdtypes
76
from scipy.sparse import save_npz, csr_matrix
87
from tqdm import tqdm
9-
import pandas as pd
108

119
class DataContainer():
1210
def __init__(self, chromosome, matrixfilepath, chromatinFolder, binsize=None):

training.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
help="loss weight for the discriminator error")
5151
@click.option("--lossTypePixel", "-ltp", required=False,
5252
type=click.Choice(["L1", "L2"]),
53-
default="L2", show_default=True,
53+
default="L1", show_default=True,
5454
help="Type of per-pixel loss to use for the generator; choose from L1 (mean abs. error) or L2 (mean squared error)")
5555
@click.option("--lossWeightTv", "-lvt", required=False,
5656
type=click.FloatRange(min=0.0),

utils.py

-39
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
from matplotlib.ticker import MultipleLocator
1010
from tqdm import tqdm
1111
from scipy import sparse
12-
from Bio import SeqIO
13-
from sklearn.preprocessing import MultiLabelBinarizer
1412
from sklearn import metrics as metrics
15-
import pandas as pd
1613

1714
def getBigwigFileList(pDirectory):
1815
#returns a list of bigwig files in pDirectory
@@ -405,42 +402,6 @@ def clampArray(pArray):
405402
clampedArray[clampedArray > upperClampingBound] = upperClampingBound
406403
return clampedArray
407404

408-
def encodeSequence(pSequenceStr, pClasses=None):
409-
#one-hot encoding for DNA sequences
410-
if pSequenceStr is None or pSequenceStr == "":
411-
msg = "Aborting. DNA sequence is empty"
412-
raise SystemExit(msg)
413-
mlb = MultiLabelBinarizer(classes=pClasses)
414-
encodedSequenceArray = mlb.fit_transform(pSequenceStr).astype("uint8")
415-
if encodedSequenceArray.shape[1] != 4:
416-
msg = "Warning: DNA sequence contains more than the 4 nucleotide symbols A,C,G,T\n"
417-
msg += "Check your input sequence, if this is not intended."
418-
print(msg)
419-
print("Contained symbols:", ", ".join(mlb.classes_))
420-
return encodedSequenceArray
421-
422-
def fillEncodedSequence(pEncodedSequenceArray, pBinSizeInt):
423-
#fill one-hot encoded sequence array with zero vectors such that
424-
#the length matches the number of bins
425-
if pBinSizeInt is None or not isinstance(pBinSizeInt, int):
426-
return
427-
actualLengthInt = pEncodedSequenceArray.shape[0] #here, length in basepairs
428-
targetLengthInt = int(np.ceil(actualLengthInt/pBinSizeInt))*pBinSizeInt #in basepairs
429-
returnArray = None
430-
if targetLengthInt > actualLengthInt:
431-
#append zero vectors to the array to fill the last bin
432-
#in case the chromosome length is not divisible by bin size (as is normal)
433-
toAppendArray = np.zeros((targetLengthInt-actualLengthInt,pEncodedSequenceArray.shape[1]),dtype="uint8")
434-
returnArray = np.append(pEncodedSequenceArray,toAppendArray,axis=0)
435-
else:
436-
msg = "Warning: could not append zeros to end of array.\n"
437-
msg += "Target length {:d}, actual length {:d}\n"
438-
msg += "Array left unchanged."
439-
msg = msg.format(targetLengthInt, actualLengthInt)
440-
print(msg)
441-
returnArray = pEncodedSequenceArray
442-
return returnArray
443-
444405
def computePearsonCorrelation(pCoolerFile1, pCoolerFile2,
445406
pWindowsize_bp,
446407
pModelChromList, pTargetChromStr,

0 commit comments

Comments
 (0)