diff --git a/ga4gh/server/backend.py b/ga4gh/server/backend.py index c316b7020..2760292d1 100644 --- a/ga4gh/server/backend.py +++ b/ga4gh/server/backend.py @@ -425,6 +425,24 @@ def featuresGenerator(self, request): request, featureSet, parentId) return iterator + def continuousGenerator(self, request): + """ + Returns a generator over the (continuous, nextPageToken) pairs + defined by the (JSON string) request. + """ + compoundId = None + if request.continuous_set_id != "": + compoundId = datamodel.ContinuousSetCompoundId.parse( + request.continuous_set_id) + if compoundId is None: + raise exceptions.ContinuousSetNotSpecifiedException() + + dataset = self.getDataRepository().getDataset( + compoundId.dataset_id) + continuousSet = dataset.getContinuousSet(request.continuous_set_id) + iterator = paging.ContinuousIterator(request, continuousSet) + return iterator + def phenotypesGenerator(self, request): """ Returns a generator over the (phenotypes, nextPageToken) pairs @@ -489,6 +507,16 @@ def featureSetsGenerator(self, request): request, dataset.getNumFeatureSets(), dataset.getFeatureSetByIndex) + def continuousSetsGenerator(self, request): + """ + Returns a generator over the (continuousSet, nextPageToken) pairs + defined by the specified request. + """ + dataset = self.getDataRepository().getDataset(request.dataset_id) + return self._topLevelObjectGenerator( + request, dataset.getNumContinuousSets(), + dataset.getContinuousSetByIndex) + def rnaQuantificationSetsGenerator(self, request): """ Returns a generator over the (rnaQuantificationSet, nextPageToken) @@ -746,6 +774,15 @@ def runGetFeatureSet(self, id_): featureSet = dataset.getFeatureSet(id_) return self.runGetRequest(featureSet) + def runGetContinuousSet(self, id_): + """ + Runs a getContinuousSet request for the specified ID. + """ + compoundId = datamodel.ContinuousSetCompoundId.parse(id_) + dataset = self.getDataRepository().getDataset(compoundId.dataset_id) + continuousSet = dataset.getContinuousSet(id_) + return self.runGetRequest(continuousSet) + def runGetDataset(self, id_): """ Runs a getDataset request for the specified ID. @@ -929,6 +966,29 @@ def runSearchFeatures(self, request): protocol.SearchFeaturesResponse, self.featuresGenerator) + def runSearchContinuousSets(self, request): + """ + Returns a SearchContinuousSetsResponse for the specified + SearchContinuousSetsRequest object. + """ + return self.runSearchRequest( + request, protocol.SearchContinuousSetsRequest, + protocol.SearchContinuousSetsResponse, + self.continuousSetsGenerator) + + def runSearchContinuous(self, request): + """ + Returns a SearchContinuousResponse for the specified + SearchContinuousRequest object. + + :param request: JSON string representing searchContinuousRequest + :return: JSON string representing searchContinuousResponse + """ + return self.runSearchRequest( + request, protocol.SearchContinuousRequest, + protocol.SearchContinuousResponse, + self.continuousGenerator) + def runSearchGenotypePhenotypes(self, request): return self.runSearchRequest( request, protocol.SearchGenotypePhenotypeRequest, diff --git a/ga4gh/server/cli/repomanager.py b/ga4gh/server/cli/repomanager.py index 9a21e12e6..99eb8a009 100644 --- a/ga4gh/server/cli/repomanager.py +++ b/ga4gh/server/cli/repomanager.py @@ -22,6 +22,7 @@ import ga4gh.server.datamodel.references as references import ga4gh.server.datamodel.rna_quantification as rna_quantification import ga4gh.server.datamodel.sequence_annotations as sequence_annotations +import ga4gh.server.datamodel.continuous as continuous import ga4gh.server.datamodel.variants as variants import ga4gh.server.datarepo as datarepo import ga4gh.server.exceptions as exceptions @@ -429,6 +430,38 @@ def func(): self._updateRepo(self._repo.removeFeatureSet, featureSet) self._confirmDelete("FeatureSet", featureSet.getLocalId(), func) + def addContinuousSet(self): + """ + Adds a new continuous set into this repo + """ + self._openRepo() + dataset = self._repo.getDatasetByName(self._args.datasetName) + filePath = self._getFilePath(self._args.filePath, + self._args.relativePath) + name = getNameFromPath(self._args.filePath) + continuousSet = continuous.FileContinuousSet(dataset, name) + referenceSetName = self._args.referenceSetName + if referenceSetName is None: + raise exceptions.RepoManagerException( + "A reference set name must be provided") + referenceSet = self._repo.getReferenceSetByName(referenceSetName) + continuousSet.setReferenceSet(referenceSet) + continuousSet.populateFromFile(filePath) + self._updateRepo(self._repo.insertContinuousSet, continuousSet) + + def removeContinuousSet(self): + """ + Removes a continuous set from this repo + """ + self._openRepo() + dataset = self._repo.getDatasetByName(self._args.datasetName) + continuousSet = dataset.getContinuousSetByName( + self._args.continuousSetName) + + def func(): + self._updateRepo(self._repo.removeContinuousSet, continuousSet) + self._confirmDelete("ContinuousSet", continuousSet.getLocalId(), func) + def addBiosample(self): """ Adds a new biosample into this repo @@ -639,6 +672,12 @@ def addFeatureSetNameArgument(cls, subparser): "featureSetName", help="the name of the feature set") + @classmethod + def addContinuousSetNameArgument(cls, subparser): + subparser.add_argument( + "continuousSetName", + help="the name of the continuous set") + @classmethod def addIndividualNameArgument(cls, subparser): subparser.add_argument( @@ -924,6 +963,28 @@ def getParser(cls): cls.addFeatureSetNameArgument(removeFeatureSetParser) cls.addForceOption(removeFeatureSetParser) + addContinuousSetParser = common_cli.addSubparser( + subparsers, "add-continuousset", + "Add a continuous set to the data repo") + addContinuousSetParser.set_defaults(runner="addContinuousSet") + cls.addRepoArgument(addContinuousSetParser) + cls.addDatasetNameArgument(addContinuousSetParser) + cls.addRelativePathOption(addContinuousSetParser) + cls.addFilePathArgument( + addContinuousSetParser, + "The path to the file contianing the continuous data ") + cls.addReferenceSetNameOption(addContinuousSetParser, "continuous set") + cls.addClassNameOption(addContinuousSetParser, "continuous set") + + removeContinuousSetParser = common_cli.addSubparser( + subparsers, "remove-continuousset", + "Remove a continuous set from the repo") + removeContinuousSetParser.set_defaults(runner="removeContinuousSet") + cls.addRepoArgument(removeContinuousSetParser) + cls.addDatasetNameArgument(removeContinuousSetParser) + cls.addContinuousSetNameArgument(removeContinuousSetParser) + cls.addForceOption(removeContinuousSetParser) + addBiosampleParser = common_cli.addSubparser( subparsers, "add-biosample", "Add a Biosample to the dataset") addBiosampleParser.set_defaults(runner="addBiosample") diff --git a/ga4gh/server/datamodel/__init__.py b/ga4gh/server/datamodel/__init__.py index 532795eb1..51150298a 100644 --- a/ga4gh/server/datamodel/__init__.py +++ b/ga4gh/server/datamodel/__init__.py @@ -418,6 +418,14 @@ class FeatureCompoundId(FeatureSetCompoundId): fields = FeatureSetCompoundId.fields + ['featureId'] +class ContinuousSetCompoundId(DatasetCompoundId): + """ + The compound id for a continuous set + """ + fields = DatasetCompoundId.fields + ['continuous_set'] + containerIds = DatasetCompoundId.containerIds + [('continuous_set_id', 1)] + + class ReadGroupSetCompoundId(DatasetCompoundId): """ The compound id for a read group set diff --git a/ga4gh/server/datamodel/continuous.py b/ga4gh/server/datamodel/continuous.py new file mode 100644 index 000000000..8bfd7d61c --- /dev/null +++ b/ga4gh/server/datamodel/continuous.py @@ -0,0 +1,430 @@ +""" +Module responsible for translating continuous sequence annotation data +into GA4GH native objects. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import random +import re +import math + +# no step/span; requires numpy +import pyBigWig + +# for running bigwig tool externally +import subprocess + +import ga4gh.server.datamodel as datamodel +import ga4gh.server.exceptions as exceptions +import ga4gh.schemas.pb as pb +import ga4gh.schemas.protocol as protocol + + +""" +These classes handle continuous message. +This message defines a format for exchanging continuous valued signal data, +such as those produced experimentally (e.g. ChIP-Seq data) or through +calculations (e.g. conservation scores). It can be used, for example, +to share data stored in Wiggle, BigWig, and BedGraph formats. + +Assumes 0-based for everything (wiggle is 1-based). + +The pyBiWig package is used to read the bigwig file it doesn't return step +and span, though. Other packages were +considered. The bx-python package is missing the bigwig reader in the +pypi verions. The ngslib package is not being maintained is difficult +to install. + +""" + + +class WiggleReader: + """ + Class for reading Wiggle data (from a file or a pipe) and returning + protocol objects within the defined query region. + + Currently, only a single protocol object is returned, using getData(). + Unsampled values are assigned NaN in the object values array. + The object start is the position of the first NaN value in the + the sampled region. The values array extends to the last value + that is not NaN. So, rather than covering the query region, the + values array drops any initial NaN or final NaN. This was done + to shorten the message and to avoid sending values containing only NaN. + """ + + # Wiggle has two modes: + # 1. variable step, where each data line has a position and value. + # 2. fixed step, where each data line has only a value. Positions + # are calculated as a fixed length from a start position, which + # are given in the header line (i.e. 'fixedStep ...'). + _VARIABLE_STEP = 1 + _FIXED_STEP = 2 + + def __init__(self, reference, start, end): + self._queryReference = reference + self._queryStart = start + self._queryEnd = end + self._mode = None + self._span = 1 + self._step = 1 + self._data = protocol.Continuous() + self._position = None + + def getData(self): + return self._data + + def parseStep(self, line): + """ + Parse the line describing the mode. + + One of: + variableStep chrom= [span=] + fixedStep chrom= start= step= + [span=] + + Span is optional, defaulting to 1. It indicates that each value + applies to region, starting at the given position and extending + positions. + """ + fields = dict([field.split('=') for field in line.split()[1:]]) + + if 'chrom' in fields: + self._reference = fields['chrom'] + else: + raise ValueError("Missing chrom field in %s" % line.strip()) + + if line.startswith("fixedStep"): + if 'start' in fields: + self._start = int(fields['start']) - 1 # to 0-based + else: + raise ValueError("Missing start field in %s" % line.strip()) + + if 'span' in fields: + self._span = int(fields['span']) + if 'step' in fields: + self._step = int(fields['step']) + + def readWiggleLine(self, line): + """ + Read a wiggle line. If it is a data line, add values to the + protocol object. + """ + if(line.isspace() or line.startswith("#") + or line.startswith("browser") or line.startswith("track")): + return + elif line.startswith("variableStep"): + self._mode = self._VARIABLE_STEP + self.parseStep(line) + return + elif line.startswith("fixedStep"): + self._mode = self._FIXED_STEP + self.parseStep(line) + return + elif self._mode is None: + raise ValueError("Unexpected input line: %s" % line.strip()) + + if self._queryReference != self._reference: + return + + # read data lines + fields = line.split() + if self._mode == self._VARIABLE_STEP: + start = int(fields[0])-1 # to 0-based + val = float(fields[1]) + else: + start = self._start + self._start += self._step + val = float(fields[0]) + + if start < self._queryEnd and start > self._queryStart: + if self._position is None: + self._position = start + self._data.start = start + + # fill gap + while self._position < start: + self._data.values.append(float('NaN')) + self._position += 1 + for _ in xrange(self._span): + self._data.values.append(val) + self._position += self._span + + def fillEnd(self): + """ + Pad end values with NaN to fill a query range. + """ + while self._position < self._queryEnd: + self._data.values.append(float('NaN')) + self._position += 1 + + def wiggleFileHandleToProtocol(self, fileHandle): + """ + Return a continuous protocol object satsifiying the given query + parameters from the given wiggle file handle. + """ + for line in fileHandle: + self.readWiggleLine(line) + # self.fillEnd() + return self._data + + def wiggleFileToProtocol(self, fileName): + """ + Return a continuous protocol object satsifiying the given query + parameters from the given wiggle file. + """ + with open(fileName, 'r') as f: + return self.wiggleFileHandleToProtocol(f) + + +class BigWigDataSource: + """ + Class for reading from bigwig files. + + Two different readers are implemented: + 1. pyBigWig: a python package that wraps custom C code. It + doesn't seem to return span and step values, limiting it's use. + 2. bigWigToWig: This is a command line tool for the Kent library. + It must be installed separately. + """ + + def __init__(self, sourceFile): + self._sourceFile = sourceFile + self._INCREMENT = 10000 # max results per bw query + self._MAX_VALUES = 1000 # max values length + + def checkReference(self, reference): + """ + Check the reference for security. Tries to avoid any characters + necessary for doing a script injection. + """ + pattern = re.compile(r'[\s,;"\'&\\]') + if pattern.findall(reference.strip()): + return False + return True + + def readValuesPyBigWig(self, reference, start, end): + """ + Use pyBigWig package to read a BigWig file for the + given range and return a protocol object. + + pyBigWig returns an array of values that fill the query range. + Not sure if it is possible to get the step and span. + + This method trims NaN values from the start and end. + + pyBigWig throws an exception if end is outside of the + reference range. This function checks the query range + and throws its own exceptions to avoid the ones thrown + by pyBigWig. + """ + if not self.checkReference(reference): + raise exceptions.ReferenceNameNotFoundException(reference) + if start < 0: + start = 0 + bw = pyBigWig.open(self._sourceFile) + referenceLen = bw.chroms(reference) + if referenceLen is None: + raise exceptions.ReferenceNameNotFoundException(reference) + if end > referenceLen: + end = referenceLen + if start >= end: + raise exceptions.ReferenceRangeErrorException( + reference, start, end) + + data = protocol.Continuous() + curStart = start + curEnd = curStart + self._INCREMENT + while curStart < end: + if curEnd > end: + curEnd = end + for i, val in enumerate(bw.values(reference, curStart, curEnd)): + if not math.isnan(val): + if len(data.values) == 0: + data.start = curStart + i + data.values.append(val) + if len(data.values) == self._MAX_VALUES: + yield data + data = protocol.Continuous() + elif len(data.values) > 0: + # data.values.append(float('NaN')) + yield data + data = protocol.Continuous() + curStart = curEnd + curEnd = curStart + self._INCREMENT + + bw.close() + if len(data.values) > 0: + yield data + + def readValuesBigWigToWig(self, reference, start, end): + """ + Read a bigwig file and return a protocol object with values + within the query range. + + This method uses the bigWigToWig command line tool from UCSC + GoldenPath. The tool is used to return values within a query region. + The output is in wiggle format, which is processed by the WiggleReader + class. + + There could be memory issues if the returned results are large. + + The input reference can be a security problem (script injection). + Ideally, it should be checked against a list of known chromosomes. + Start and end should not be problems since they are integers. + """ + if not self.checkReference(reference): + raise exceptions.ReferenceNameNotFoundException(reference) + if start < 0: + raise exceptions.ReferenceRangeErrorException( + reference, start, end) + # TODO: CHECK IF QUERY IS BEYOND END + + cmd = ["bigWigToWig", self._sourceFile, "stdout", "-chrom="+reference, + "-start="+str(start), "-end="+str(end)] + wiggleReader = WiggleReader(reference, start, end) + try: + # run command and grab output simultaneously + process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + while True: + line = process.stdout.readline() + if line == '' and process.poll() is not None: + break + wiggleReader.readWiggleLine(line.strip()) + except ValueError: + raise + except: + raise Exception("bigWigToWig failed to run") + + # wiggleReader.fillEnd() + return wiggleReader.getData() + + def bigWigToProtocol(self, reference, start, end): + # return self.readValuesBigWigToWig(reference, start, end) + for continuousObj in self.readValuesPyBigWig(reference, start, end): + yield continuousObj + + +class AbstractContinuousSet(datamodel.DatamodelObject): + """ + A continuous sequence annotation set + """ + compoundIdClass = datamodel.ContinuousSetCompoundId + + def __init__(self, parentContainer, localId): + super(AbstractContinuousSet, self).__init__(parentContainer, localId) + self._name = localId + self._sourceUri = "" + self._referenceSet = None + + def getReferenceSet(self): + """ + Returns the reference set associated with this ContinuousSet. + """ + return self._referenceSet + + def setReferenceSet(self, referenceSet): + """ + Sets the reference set associated with this ContinuousSet to the + specified value. + """ + self._referenceSet = referenceSet + + def toProtocolElement(self): + """ + Returns the representation of this ContinuousSet as the corresponding + ProtocolElement. + """ + gaContinuousSet = protocol.ContinuousSet() + gaContinuousSet.id = self.getId() + gaContinuousSet.dataset_id = self.getParentContainer().getId() + gaContinuousSet.reference_set_id = pb.string( + self._referenceSet.getId()) + gaContinuousSet.name = self._name + gaContinuousSet.source_uri = self._sourceUri + attributes = self.getAttributes() + for key in attributes: + gaContinuousSet.attributes.attr[key] \ + .values.extend(protocol.encodeValue(attributes[key])) + return gaContinuousSet + + +class FileContinuousSet(AbstractContinuousSet): + """ + Data associated with a file containing continuous data. + """ + def __init__(self, parentContainer, localId): + super(FileContinuousSet, self).__init__(parentContainer, localId) + self._filePath = None + + def populateFromFile(self, dataUrl): + """ + Populates the instance variables of this ContinuousSet from the + specified data URL. + """ + self._filePath = dataUrl + + def populateFromRow(self, continuousSetRecord): + """ + Populates the instance variables of this ContinuousSet from the + specified DB row. + """ + self._filePath = continuousSetRecord.dataurl + self.setAttributesJson(continuousSetRecord.attributes) + + def getDataUrl(self): + """ + Returns the URL providing the data source for this ContinuousSet. + """ + return self._filePath + + def getContinuous(self, referenceName=None, start=None, end=None): + """ + Method passed to runSearchRequest to fulfill the request to + yield continuous protocol objects that satisfy the given query. + + :param str referenceName: name of reference (ex: "chr1") + :param start: castable to int, start position on reference + :param end: castable to int, end position on reference + :return: yields a protocol.Continuous at a time + """ + bigWigReader = BigWigDataSource(self._filePath) + for continuousObj in bigWigReader.bigWigToProtocol( + referenceName, start, end): + yield continuousObj + + +class SimulatedContinuousSet(AbstractContinuousSet): + """ + Simulated data backend for ContinuousSet, used for internal testing. + """ + def __init__(self, parentContainer, localId, randomSeed=1): + self._randomSeed = randomSeed + super(SimulatedContinuousSet, self).__init__(parentContainer, localId) + + def _generateSimulatedContinuous(self, randomNumberGenerator): + continuous = protocol.Continuous() + continuous.start = randomNumberGenerator.randint(1000, 2000) + continuous.values = [100, 200.3, 400] + + def getContinuousData(self, referenceName=None, start=None, end=None): + """ + Returns a set number of simulated continuous data. + + :param referenceName: name of reference to "search" on + :param start: start coordinate of query + :param end: end coordinate of query + :return: Yields continuous list + """ + randomNumberGenerator = random.Random() + randomNumberGenerator.seed(self._randomSeed) + for i in range(100): + gaContinuous = self._generateSimulatedContinuous( + randomNumberGenerator) + match = ( + gaContinuous.start < end and + gaContinuous.end > start and + gaContinuous.reference_name == referenceName) + if match: + yield gaContinuous diff --git a/ga4gh/server/datamodel/datasets.py b/ga4gh/server/datamodel/datasets.py index 91f26fa07..ee488fd25 100644 --- a/ga4gh/server/datamodel/datasets.py +++ b/ga4gh/server/datamodel/datasets.py @@ -8,6 +8,7 @@ import ga4gh.server.datamodel as datamodel import ga4gh.server.datamodel.reads as reads import ga4gh.server.datamodel.sequence_annotations as sequence_annotations +import ga4gh.server.datamodel.continuous as continuous import ga4gh.server.datamodel.variants as variants import ga4gh.server.exceptions as exceptions import ga4gh.server.datamodel.bio_metadata as biodata @@ -33,6 +34,9 @@ def __init__(self, localId): self._featureSetIds = [] self._featureSetIdMap = {} self._featureSetNameMap = {} + self._continuousSetIds = [] + self._continuousSetIdMap = {} + self._continuousSetNameMap = {} self._readGroupSetIds = [] self._readGroupSetIdMap = {} self._readGroupSetNameMap = {} @@ -92,7 +96,7 @@ def addIndividual(self, individual): def addFeatureSet(self, featureSet): """ - Adds the specified variantSet to this dataset. + Adds the specified featureSet to this dataset. """ id_ = featureSet.getId() self._featureSetIdMap[id_] = featureSet @@ -100,6 +104,16 @@ def addFeatureSet(self, featureSet): name = featureSet.getLocalId() self._featureSetNameMap[name] = featureSet + def addContinuousSet(self, continuousSet): + """ + Adds the specified continuousSet to this dataset. + """ + id_ = continuousSet.getId() + self._continuousSetIdMap[id_] = continuousSet + self._continuousSetIds.append(id_) + name = continuousSet.getLocalId() + self._continuousSetNameMap[name] = continuousSet + def addReadGroupSet(self, readGroupSet): """ Adds the specified readGroupSet to this dataset. @@ -231,6 +245,43 @@ def getFeatureSetByIndex(self, index): """ return self._featureSetIdMap[self._featureSetIds[index]] + def getContinuousSets(self): + """ + Returns the list of ContinuousSets in this dataset + """ + return [self._continuousSetIdMap[id_] + for id_ in self._continuousSetIds] + + def getNumContinuousSets(self): + """ + Returns the number of continuous sets in this dataset. + """ + return len(self._continuousSetIds) + + def getContinuousSet(self, id_): + """ + Returns the ContinuousSet with the specified id, or raises a + ContinuousSetNotFoundException otherwise. + """ + if id_ not in self._continuousSetIdMap: + raise exceptions.ContinuousSetNotFoundException(id_) + return self._continuousSetIdMap[id_] + + def getContinuousSetByName(self, name): + """ + Returns the ContinuousSet with the specified name, or raises + an exception otherwise. + """ + if name not in self._continuousSetNameMap: + raise exceptions.ContinuousSetNameNotFoundException(name) + return self._continuousSetNameMap[name] + + def getContinuousSetByIndex(self, index): + """ + Returns the continuous set at the specified index in this dataset. + """ + return self._continuousSetIdMap[self._continuousSetIds[index]] + def getNumBiosamples(self): """ Returns the number of biosamples sets in this dataset. @@ -399,7 +450,8 @@ def __init__( self, localId, referenceSet, randomSeed=0, numVariantSets=1, numCalls=1, variantDensity=0.5, numReadGroupSets=1, numReadGroupsPerReadGroupSet=1, - numAlignments=1, numFeatureSets=1, numPhenotypeAssociationSets=1, + numAlignments=1, numFeatureSets=1, numContinuousSets=1, + numPhenotypeAssociationSets=1, numPhenotypeAssociations=2, numRnaQuantSets=2, numExpressionLevels=2): super(SimulatedDataset, self).__init__(localId) @@ -462,6 +514,14 @@ def __init__( self, localId, seed) featureSet.setReferenceSet(referenceSet) self.addFeatureSet(featureSet) + # Continuous + for i in range(numContinuousSets): + localId = "simConts{}".format(i) + seed = randomSeed + i + continuousSet = continuous.SimulatedContinuousSet( + self, localId, seed) + continuousSet.setReferenceSet(referenceSet) + self.addContinuousSet(continuousSet) # RnaQuantificationSets for i in range(numRnaQuantSets): localId = 'simRqs{}'.format(i) diff --git a/ga4gh/server/datamodel/sequence_annotations.py b/ga4gh/server/datamodel/sequence_annotations.py index 4afc4fe32..665e66a80 100644 --- a/ga4gh/server/datamodel/sequence_annotations.py +++ b/ga4gh/server/datamodel/sequence_annotations.py @@ -168,7 +168,6 @@ def __init__(self, parentContainer, localId): self._name = localId self._sourceUri = "" self._referenceSet = None - self._info = {} def getReferenceSet(self): """ diff --git a/ga4gh/server/datarepo.py b/ga4gh/server/datarepo.py index e300eabb3..47415f116 100644 --- a/ga4gh/server/datarepo.py +++ b/ga4gh/server/datarepo.py @@ -17,6 +17,7 @@ import ga4gh.server.datamodel.references as references import ga4gh.server.datamodel.variants as variants import ga4gh.server.datamodel.sequence_annotations as sequence_annotations +import ga4gh.server.datamodel.continuous as continuous import ga4gh.server.datamodel.bio_metadata as biodata import ga4gh.server.datamodel.genotype_phenotype as genotype_phenotype import ga4gh.server.datamodel.genotype_phenotype_featureset as g2pFeatureset @@ -240,6 +241,13 @@ def printSummary(self): featureSet.getOntology().getName(), featureSet.getId(), sep="\t") + print("\tContinuousSets:") + for continuousSet in dataset.getContinuousSets(): + print( + "\t", continuousSet.getLocalId(), + continuousSet.getReferenceSet().getLocalId(), + continuousSet.getId(), + sep="\t") print("\tPhenotypeAssociationSets:") for phenotypeAssociationSet in \ dataset.getPhenotypeAssociationSets(): @@ -326,6 +334,14 @@ def allFeatures(self): for feature in featureSet.getFeatures(): yield feature + def allContinuousSets(self): + """ + Return an iterator over all continuous sets in the data repo + """ + for dataset in self.getDatasets(): + for continuousSet in dataset.getContinuousSets(): + yield continuousSet + def allCallSets(self): """ Return an iterator over all call sets in the data repo @@ -554,6 +570,8 @@ def verify(self): reference.getLocalId(), 0, length, None, 3) for feature in features: print("\t{}".format(feature)) + # for continuousSet in dataset.getContinuousSets(): + # -- there is no getContinuous for readGroupSet in dataset.getReadGroupSets(): print( "\tVerifying ReadGroupSet", readGroupSet.getLocalId(), @@ -775,6 +793,14 @@ def removeFeatureSet(self, featureSet): m.Featureset.id == featureSet.getId()) q.execute() + def removeContinuousSet(self, continuousSet): + """ + Removes the specified continuousSet from this repository. + """ + q = m.ContinuousSet.delete().where( + m.ContinuousSet.id == continuousSet.getId()) + q.execute() + def _readDatasetTable(self): for datasetRecord in m.Dataset.select(): dataset = datasets.Dataset(datasetRecord.name) @@ -1060,6 +1086,37 @@ def _readFeatureSetTable(self): assert featureSet.getId() == featureSetRecord.id dataset.addFeatureSet(featureSet) + def _createContinuousSetTable(self): + self.database.create_table(m.ContinuousSet) + + def insertContinuousSet(self, continuousSet): + """ + Inserts a the specified continuousSet into this repository. + """ + # TODO add support for info and sourceUri fields. + try: + m.ContinuousSet.create( + id=continuousSet.getId(), + datasetid=continuousSet.getParentContainer().getId(), + referencesetid=continuousSet.getReferenceSet().getId(), + name=continuousSet.getLocalId(), + dataurl=continuousSet.getDataUrl(), + attributes=json.dumps(continuousSet.getAttributes())) + except Exception as e: + raise exceptions.RepoManagerException(e) + + def _readContinuousSetTable(self): + for continuousSetRecord in m.ContinuousSet.select(): + dataset = self.getDataset(continuousSetRecord.datasetid.id) + continuousSet = continuous.FileContinuousSet( + dataset, continuousSetRecord.name) + continuousSet.setReferenceSet( + self.getReferenceSet( + continuousSetRecord.referencesetid.id)) + continuousSet.populateFromRow(continuousSetRecord) + assert continuousSet.getId() == continuousSetRecord.id + dataset.addContinuousSet(continuousSet) + def _createBiosampleTable(self): self.database.create_table(m.Biosample) @@ -1216,6 +1273,7 @@ def initialise(self): self._createVariantSetTable() self._createVariantAnnotationSetTable() self._createFeatureSetTable() + self._createContinuousSetTable() self._createBiosampleTable() self._createIndividualTable() self._createPhenotypeAssociationSetTable() @@ -1256,6 +1314,7 @@ def load(self): self._readCallSetTable() self._readVariantAnnotationSetTable() self._readFeatureSetTable() + self._readContinuousSetTable() self._readBiosampleTable() self._readIndividualTable() self._readPhenotypeAssociationSetTable() diff --git a/ga4gh/server/exceptions.py b/ga4gh/server/exceptions.py index 89be6ac90..bcd610c06 100644 --- a/ga4gh/server/exceptions.py +++ b/ga4gh/server/exceptions.py @@ -479,6 +479,18 @@ def __init__(self): ) +class ContinuousSetNotFoundException(NotFoundException): + def __init__(self, continuousSetId): + self.message = ( + "ContinuousSet with id '{0}' not found".format(continuousSetId)) + + +class ContinuousSetNameNotFoundException(NotFoundException): + def __init__(self, continuousSetId): + self.message = ( + "ContinuousSet with name '{0}' not found".format(continuousSetId)) + + class SequenceAnnotationNotFoundException(NotFoundException): def __init__(self, name): self.message = ( diff --git a/ga4gh/server/frontend.py b/ga4gh/server/frontend.py index 3e493f7a7..37de7361c 100644 --- a/ga4gh/server/frontend.py +++ b/ga4gh/server/frontend.py @@ -156,6 +156,13 @@ def getFeatureSets(self, datasetId): return app.backend.getDataRepository().getDataset( datasetId).getFeatureSets() + def getContinuousSets(self, datasetId): + """ + Returns the list of continuous sets for the dataset + """ + return app.backend.getDataRepository().getDataset( + datasetId).getContinuousSets() + def getReadGroupSets(self, datasetId): """ Returns the list of ReadGroupSets for the dataset @@ -695,6 +702,20 @@ def searchFeatures(): flask.request, app.backend.runSearchFeatures) +@DisplayedRoute('/continuoussets/search', postMethod=True) +@requires_auth +def searchContinuousSets(): + return handleFlaskPostRequest( + flask.request, app.backend.runSearchContinuousSets) + + +@DisplayedRoute('/continuous/search', postMethod=True) +@requires_auth +def searchContinuous(): + return handleFlaskPostRequest( + flask.request, app.backend.runSearchContinuous) + + @DisplayedRoute('/biosamples/search', postMethod=True) @requires_auth def searchBiosamples(): @@ -809,6 +830,15 @@ def getFeature(id): id, flask.request, app.backend.runGetFeature) +@DisplayedRoute( + '/continuoussets/', + pathDisplay='/continuoussets/') +@requires_auth +def getcontinuousSet(id): + return handleFlaskGetRequest( + id, flask.request, app.backend.runGetContinuousSet) + + @DisplayedRoute( '/rnaquantificationsets/', pathDisplay='/rnaquantificationsets/') diff --git a/ga4gh/server/paging.py b/ga4gh/server/paging.py index f467571a1..a2d646d87 100644 --- a/ga4gh/server/paging.py +++ b/ga4gh/server/paging.py @@ -420,3 +420,31 @@ def _search(self): def _prepare(self, obj): return obj + + +class ContinuousIterator(SequenceIterator): + """ + Iterates through continuous data + """ + def __init__(self, request, continuousSet): + self._continuousSet = continuousSet + super(ContinuousIterator, self).__init__(request) + + def _initialize(self): + if self._request.start == self._request.end == 0: + self._start = self._end = None + else: + self._start = self._request.start + self._end = self._request.end + self._startIndex = self._request.page_token + self._maxResults = self._request.page_size + + def _search(self): + iterator = list(self._continuousSet.getContinuous( + self._request.reference_name, + self._start, + self._end)) + return iterator + + def _prepare(self, obj): + return obj diff --git a/ga4gh/server/repo/models.py b/ga4gh/server/repo/models.py index 4ec727dd4..fa7fb9dfe 100644 --- a/ga4gh/server/repo/models.py +++ b/ga4gh/server/repo/models.py @@ -149,6 +149,25 @@ class Meta: ) +class ContinuousSet(BaseModel): + dataurl = pw.TextField(db_column='dataUrl') + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + id = pw.TextField(primary_key=True) + info = pw.TextField(null=True) + name = pw.TextField() + referencesetid = pw.ForeignKeyField( + db_column='referenceSetId', rel_model=Referenceset, to_field='id') + sourceuri = pw.TextField( + db_column='sourceUri', null=True) + + class Meta: + db_table = 'ContinuousSet' + indexes = ( + (('datasetid', 'name'), True), + ) + + class Individual(BaseModel): created = pw.TextField() datasetid = pw.ForeignKeyField( diff --git a/ga4gh/server/templates/index.html b/ga4gh/server/templates/index.html index 03835eb67..11d67e69c 100644 --- a/ga4gh/server/templates/index.html +++ b/ga4gh/server/templates/index.html @@ -123,6 +123,19 @@
FeatureSets
{% endfor %} +
ContinuousSets
+ + + + + + {% for continuousSet in info.getContinuousSets(dataset.getId()) %} + + + + + {% endfor %} +
NameId
{{ continuousSet.getLocalId() }}{{ continuousSet.getId() }}
ReadGroupSets
diff --git a/requirements.txt b/requirements.txt index a419cebfe..ec81c81d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,6 +53,7 @@ requests==2.7.0 oic==0.7.6 pyOpenSSL==0.15.1 lxml==3.4.4 +pyBigWig==0.3.2 # We need sphinx-argparse to build on readthedocs. sphinx-argparse==0.1.15 diff --git a/scripts/build_test_data.py b/scripts/build_test_data.py index 06c1aec5b..9498aa4c5 100644 --- a/scripts/build_test_data.py +++ b/scripts/build_test_data.py @@ -67,6 +67,13 @@ def buildTestData( dataFile, "-R NCBI37", "-O", sequenceOntologyName, "-C ga4gh.datamodel.sequence_annotations.Gff3DbFeatureSet") + pattern = os.path.join( + prefix, "datasets/dataset1/continuous", "*.bw") + for dataFile in glob.glob(pattern): + run("add-continuousset", repoFile, datasetName, useRelativePath, + dataFile, "-R NCBI37", + "-C ga4gh.datamodel.continuous.FileContinuousSet") + pattern = os.path.join(prefix, "datasets/dataset1/phenotypes", "*") for dataFile in glob.glob(pattern): # coordinate featureset name and g2p name diff --git a/scripts/prepare_compliance_data.py b/scripts/prepare_compliance_data.py index 2f4025e2e..4cf2f6415 100644 --- a/scripts/prepare_compliance_data.py +++ b/scripts/prepare_compliance_data.py @@ -34,6 +34,7 @@ import ga4gh.server.datamodel.reads as reads # NOQA import ga4gh.server.datamodel.ontologies as ontologies # NOQA import ga4gh.server.datamodel.sequence_annotations as sequence_annotations # NOQA +import ga4gh.server.datamodel.continuous as continuous # NOQA import ga4gh.server.datamodel.bio_metadata as biodata # NOQA import ga4gh.server.datamodel.genotype_phenotype_featureset as g2p_featureset # NOQA import ga4gh.server.datamodel.genotype_phenotype as g2p_associationset # NOQA @@ -259,6 +260,19 @@ def run(self): self.repo.insertFeatureSet(gencode) + # Continuous data + continuousFile = ("wgEncodeCaltechRnaSeqNhekR1x75dTh1014Ilna" + "MinusSignalRep1.bigWig") + continuousFileSrc = os.path.join( + self.inputDirectory, continuousFile) + continuousFileDest = os.path.join( + self.outputDirectory, continuousFile) + shutil.copy(continuousFileSrc, continuousFileDest) + signalData = continuous.FileContinuousSet(dataset, "signalData") + signalData.populateFromFile(os.path.abspath(continuousFileDest)) + signalData.setReferenceSet(referenceSet) + self.repo.insertContinuousSet(signalData) + # add g2p featureSet g2pPath = os.path.join(self.inputDirectory, "cgd") # copy all files input directory to output path diff --git a/tests/data/datasets/dataset1/continuous/bigwig_1.bw b/tests/data/datasets/dataset1/continuous/bigwig_1.bw new file mode 100644 index 000000000..119e99610 Binary files /dev/null and b/tests/data/datasets/dataset1/continuous/bigwig_1.bw differ diff --git a/tests/data/datasets/dataset1/continuous/bigwig_2.bw b/tests/data/datasets/dataset1/continuous/bigwig_2.bw new file mode 100644 index 000000000..7ddc4ee1e Binary files /dev/null and b/tests/data/datasets/dataset1/continuous/bigwig_2.bw differ diff --git a/tests/data/datasets/dataset1/continuous/chromSizes.txt b/tests/data/datasets/dataset1/continuous/chromSizes.txt new file mode 100644 index 000000000..a7ee98fa9 --- /dev/null +++ b/tests/data/datasets/dataset1/continuous/chromSizes.txt @@ -0,0 +1 @@ +chr19 50000000 diff --git a/tests/data/datasets/dataset1/continuous/wiggle.txt b/tests/data/datasets/dataset1/continuous/wiggle.txt new file mode 100644 index 000000000..475f9cd55 --- /dev/null +++ b/tests/data/datasets/dataset1/continuous/wiggle.txt @@ -0,0 +1,11 @@ +track type=wiggle_0 name="variableStep" description="variableStep format" visibility=full autoScale=off viewLimits=0.0:25.0 color=50,150,255 yLineMark=11.76 yLineOnOff=on priority=10 +variableStep chrom=chr19 span=5 +49304701 10.0 +49304901 12.5 +49305401 15.0 +49305601 17.5 +49305901 20.0 +49306081 17.5 +49306301 15.0 +49306691 12.5 +49307871 10.0 diff --git a/tests/data/datasets/dataset1/continuous/wiggle_2.txt b/tests/data/datasets/dataset1/continuous/wiggle_2.txt new file mode 100644 index 000000000..6f1b2edbc --- /dev/null +++ b/tests/data/datasets/dataset1/continuous/wiggle_2.txt @@ -0,0 +1,16 @@ +# 200 base wide points graph at every 300 bases, 50 pixel high graph +# autoScale off and viewing range set to [0:1000] +# priority = 20 positions this as the second graph +# Note, one-relative coordinate system in use for this format +track type=wiggle_0 name="fixedStep" description="fixedStep format" +fixedStep chrom=chr19 start=49307401 step=300 span=2 +1000 + 900 + 800 + 700 + 600 + 500 + 400 + 300 + 200 + 100 diff --git a/tests/datadriven/test_continuous.py b/tests/datadriven/test_continuous.py new file mode 100644 index 000000000..847235f8d --- /dev/null +++ b/tests/datadriven/test_continuous.py @@ -0,0 +1,108 @@ +""" +Unit tests for continuous objects. This is used for all tests +that can be performed in isolation from input data. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import unittest +import math + +from nose.tools import raises + +import ga4gh.server.datarepo as datarepo +import ga4gh.server.datamodel.continuous as continuous +import ga4gh.server.datamodel.datasets as datasets +import ga4gh.server.exceptions as exceptions + +import tests.paths as paths + + +class TestContinuous(unittest.TestCase): + """ + Unit tests for continuous data + """ + def _createContinuousSet(self): + """ + Creates a ContinuousSet from the specified directory. + """ + self._continuousSetName = "testContinuous" + self._repo = datarepo.SqlDataRepository(paths.testDataRepo) + self._repo.open(datarepo.MODE_READ) + self._dataset = datasets.Dataset("testDs") + self._continuousSet = continuous.readSet( + self._dataset, self._continuousSetName) + + def setUp(self): + dataDir = "tests/data/datasets/dataset1/continuous" + self._wiggleFile = dataDir + "/wiggle_2.txt" + self._bigWigFile = dataDir + "/bigwig_1.bw" + + def testReadWiggle(self): + continuousObj = continuous.WiggleReader( + 'chr19', 49307698, 49308020) + obj = continuousObj.wiggleFileToProtocol(self._wiggleFile) + self.assertEqual(obj.start, 49307700) + self.assertEqual(obj.values[0], 900) + self.assertEqual(obj.values[300], 800) + self.assertEqual(len(obj.values), 302) + + def getTuples(self, generator): + """ + Convert a generator of continuous objects into tuples of + (position,value). + """ + tuples = [] + for obj in generator: + for i, value in enumerate(obj.values): + if not math.isnan(value): + tuples.append((obj.start+i, value)) + return tuples + + def testReadBigWig(self): + continuousObj = continuous.BigWigDataSource(self._bigWigFile) + generator = continuousObj.bigWigToProtocol("chr19", 49305897, 49306090) + tuples = self.getTuples(generator) + self.assertEqual(tuples[0], (49305900, 20.0)) + self.assertEqual(tuples[4], (49305904, 20.0)) + self.assertEqual(tuples[5], (49306080, 17.5)) + self.assertEqual(tuples[9], (49306084, 17.5)) + self.assertEqual(len(tuples), 10) + + def testReadBigWigAllNan(self): + continuousObj = continuous.BigWigDataSource(self._bigWigFile) + generator = continuousObj.bigWigToProtocol( + "chr19", 49305927, 49305997) + tuples = self.getTuples(generator) + self.assertEqual(len(tuples), 0) + + @raises(exceptions.ReferenceRangeErrorException) + def testReadBigWigInvalidRange(self): + continuousObj = continuous.BigWigDataSource(self._bigWigFile) + generator = continuousObj.bigWigToProtocol( + "chr19", 493059030, 49305934) + next(generator) + + def testReadBigWigOutsideReferenceRange(self): + continuousObj = continuous.BigWigDataSource(self._bigWigFile) + generator = continuousObj.bigWigToProtocol( + "chr19", 49306897, 493059304) + tuples = self.getTuples(generator) + self.assertEqual(len(tuples), 5) + + def testReadBigWigNegativeReferenceRange(self): + continuousObj = continuous.BigWigDataSource(self._bigWigFile) + generator = continuousObj.bigWigToProtocol("chr19", -1, 5) + tuples = self.getTuples(generator) + self.assertEqual(len(tuples), 0) + + @raises(exceptions.ReferenceNameNotFoundException) + def testReadBigWigChromsomeException(self): + """ + Test for catching bad chromosome names. + """ + continuousObj = continuous.BigWigDataSource(self._bigWigFile) + generator = continuousObj.bigWigToProtocol( + "chr&19", 49305602, 49308000) + next(generator) diff --git a/tests/end_to_end/test_client_json.py b/tests/end_to_end/test_client_json.py index fe971ad37..837588888 100644 --- a/tests/end_to_end/test_client_json.py +++ b/tests/end_to_end/test_client_json.py @@ -423,6 +423,34 @@ def testSearchFeatureSets(self): iterator, "featuresets-search", "--datasetId {}".format(dataset.id)) + def testSearchContinuous(self): + for dataset in self._client.search_datasets(): + datasetId = dataset.id + for continuousSet in self._client.search_continuous_sets( + datasetId): + iterator = self._client.search_continuous( + continuousSet.id, 'chr19', 49305897, 49306090) + self.verifyParsedOutputsEqual( + iterator, "continuous-search", + "--continuousSetId {} --referenceName {}" + " --start {} --end {}".format( + continuousSet.id, 'chr19', 49305897, 49306090)) + + def testGetContinuousSets(self): + for dataset in self._client.search_datasets(): + datasetId = dataset.id + for continuousSet in self._client.search_continuous_sets( + datasetId): + self.verifyParsedOutputsEqual( + [continuousSet], "continuoussets-get", continuousSet.id) + + def testSearchContinuousSets(self): + for dataset in self._client.search_datasets(): + iterator = self._client.search_continuous_sets(dataset.id) + self.verifyParsedOutputsEqual( + iterator, "continuoussets-search", + "--datasetId {}".format(dataset.id)) + def testSearchGenotypePhenotype(self): phenotype_id = "http://ohsu.edu/cgd/87795e43" test_executed = 0 diff --git a/tests/end_to_end/test_sequence_annotations.py b/tests/end_to_end/test_sequence_annotations.py index c802cc265..ac77712cb 100644 --- a/tests/end_to_end/test_sequence_annotations.py +++ b/tests/end_to_end/test_sequence_annotations.py @@ -151,3 +151,26 @@ def sendJsonPostRequest(self, path, data): return self.app.post( path, headers={'Content-type': 'application/json'}, data=data) + + def getAllContinuousSets(self): + datasetId = self.getAllDatasets()[0].id + path = 'continuoussets/search' + request = protocol.SearchContinuousSetsRequest() + request.dataset_id = datasetId + responseData = self.sendSearchRequest( + path, request, protocol.SearchContinuousSetsResponse) + return responseData.continuous_sets + + def testSearchContinuous(self): + continuousSets = self.getAllContinuousSets() + for continuousSet in continuousSets: + path = "continuous/search" + request = protocol.SearchContinuousRequest() + request.continuous_set_id = continuousSet.id + request.start = 49200000 + request.end = 49308000 + request.reference_name = "chr19" + responseData = self.sendSearchRequest( + path, request, protocol.SearchContinuousResponse) + for continuous in responseData.continuous: + self.assertGreater(len(continuous.values), 0) diff --git a/tests/paths.py b/tests/paths.py index efc26a401..742d30892 100644 --- a/tests/paths.py +++ b/tests/paths.py @@ -69,6 +69,11 @@ def getGa4ghFilePath(): featuresPath = os.path.join(featuresDir, 'gencodeV21Set1.db') featuresPath2 = os.path.join(featuresDir, 'specialCasesTest.db') +# continuous +continuousSetName = 'bigwig_1' +continuousDir = os.path.join(datasetDir, 'continuous') +continuousPath = os.path.join(continuousDir, 'bigwig_1.bw') + # g2p phenotypesDir = os.path.join(datasetDir, 'phenotypes') phenotypeAssociationSetPath = os.path.join(phenotypesDir, 'cgd') diff --git a/tests/unit/test_compound_ids.py b/tests/unit/test_compound_ids.py index 6cdddad54..75d908ec6 100644 --- a/tests/unit/test_compound_ids.py +++ b/tests/unit/test_compound_ids.py @@ -17,6 +17,7 @@ import ga4gh.server.datamodel.reads as reads import ga4gh.server.datamodel.rna_quantification as rna_quantification import ga4gh.server.datamodel.sequence_annotations as sequence_annotations +import ga4gh.server.datamodel.continuous as continuous class ExampleCompoundId(datamodel.CompoundId): @@ -190,6 +191,10 @@ def getFeatureSet(self): return sequence_annotations.AbstractFeatureSet( self.getDataset(), "featureSet") + def getContinuousSet(self): + return continuous.AbstractContinuousSet( + self.getDataset(), "continuousSet") + def getRnaQuantificationSet(self): return rna_quantification.AbstractRnaQuantificationSet( self.getDataset(), "rnaQuantificationSet") @@ -461,6 +466,28 @@ def testFeatureSetParse(self): self.assertEqual(cid.feature_set, "b") self.verifyParseFailure(idStr, datamodel.FeatureSetCompoundId) + def testContinuousSet(self): + continuousSet = self.getContinuousSet() + dataset = continuousSet.getParentContainer() + localId = "continuousSet" + cid = datamodel.ContinuousSetCompoundId( + dataset.getCompoundId(), localId) + self.assertRaises( + ValueError, datamodel.ContinuousSetCompoundId, + dataset.getCompoundId()) + self.assertEqual(cid.dataset, dataset.getLocalId()) + self.assertEqual(cid.continuous_set, continuousSet.getLocalId()) + self.assertEqual(cid.dataset_id, dataset.getId()) + self.assertEqual(cid.continuous_set_id, continuousSet.getId()) + + def testContinuous(self): + idStr = '["a","b"]' + obfuscated = datamodel.CompoundId.obfuscate(idStr) + cid = datamodel.ContinuousSetCompoundId.parse(obfuscated) + self.assertEqual(cid.dataset, "a") + self.assertEqual(cid.continuous_set, "b") + self.verifyParseFailure(idStr, datamodel.ContinuousSetCompoundId) + def testRnaQuantification(self): rnaQuantification = self.getRnaQuantification() rnaQuantificationSet = rnaQuantification.getParentContainer() diff --git a/tests/unit/test_data_interface.py b/tests/unit/test_data_interface.py index 95f1bd660..e8c65e845 100644 --- a/tests/unit/test_data_interface.py +++ b/tests/unit/test_data_interface.py @@ -123,6 +123,10 @@ def testGetFeature(self): feature = self._client.get_feature(featureId) self.assertEqual(repoFeature, feature) + def testGetContinuousSet(self): + self._testGetMethod( + self._repo.allContinuousSets, self._client.get_continuous_set) + def testGetReferenceSet(self): self._testGetMethod( self._repo.getReferenceSets, self._client.get_reference_set) @@ -216,6 +220,12 @@ def testSearchFeatureSets(self): self._client.search_feature_sets, self._repo.getDatasets()) + def testSearchContinuousSets(self): + self._testSearchMethodInContainer( + 'getContinuousSets', + self._client.search_continuous_sets, + self._repo.getDatasets()) + def testSearchCallSets(self): self._testSearchMethodInContainer( 'getCallSets', diff --git a/tests/unit/test_imports.py b/tests/unit/test_imports.py index a793b0d38..faf7ade0c 100644 --- a/tests/unit/test_imports.py +++ b/tests/unit/test_imports.py @@ -186,6 +186,7 @@ class ImportGraphLayerChecker(object): 'ga4gh/server/datamodel/ontologies.py', 'ga4gh/server/datamodel/obo_parser.py', 'ga4gh/server/datamodel/sequence_annotations.py', + 'ga4gh/server/datamodel/continuous.py', 'ga4gh/server/datamodel/genotype_phenotype.py', 'ga4gh/server/datamodel/genotype_phenotype_featureset.py', 'ga4gh/server/gff3.py', diff --git a/tests/unit/test_repo_manager.py b/tests/unit/test_repo_manager.py index 7194da877..84ceb4757 100644 --- a/tests/unit/test_repo_manager.py +++ b/tests/unit/test_repo_manager.py @@ -127,6 +127,15 @@ def addFeatureSet(self): self._referenceSetName, self._ontologyName) self.runCommand(cmd) + def addContinuousSet(self): + continuousPath = paths.continuousPath + self._continuousSetName = paths.continuousSetName + cmd = ( + "add-continuousset {} {} {} --referenceSetName={} ").format( + self._repoPath, self._datasetName, continuousPath, + self._referenceSetName) + self.runCommand(cmd) + def addPhenotypeAssociationSet(self): phenotypeAssociationSetPath = paths.phenotypeAssociationSetPath self._phenotypeAssociationSetName = "test_phenotypeAssociationSet" @@ -144,6 +153,12 @@ def getFeatureSet(self): featureSet = dataset.getFeatureSetByName(self._featureSetName) return featureSet + def getContinuousSet(self): + repo = self.readRepo() + dataset = repo.getDatasetByName(self._datasetName) + continuousSet = dataset.getContinuousSetByName(self._continuousSetName) + return continuousSet + class TestAddFeatureSet(AbstractRepoManagerTest): @@ -222,6 +237,61 @@ def testRemoveFeatureSet(self): self.getFeatureSet() +class TestAddContinuousSet(AbstractRepoManagerTest): + + def setUp(self): + super(TestAddContinuousSet, self).setUp() + self.init() + self.addDataset() + self.addReferenceSet() + + def testAddContinuousSet(self): + self.addContinuousSet() + continuousSet = self.getContinuousSet() + self.assertEqual(continuousSet.getLocalId(), self._continuousSetName) + self.assertEqual( + continuousSet._parentContainer.getLocalId(), self._datasetName) + self.assertEqual( + continuousSet.getReferenceSet().getLocalId(), + self._referenceSetName) + # self.assertEqual( + # continuousSet.getSourceUri(), self._sourceUri) + + def testAddContinuousSetNoReferenceSet(self): + continuousPath = paths.continuousPath + cmd = "add-continuousset {} {} {}".format( + self._repoPath, self._datasetName, continuousPath) + self.assertRaises( + exceptions.RepoManagerException, self.runCommand, cmd) + + def testAddContinuousSetBadReferenceSet(self): + continuousPath = paths.continuousPath + cmd = ( + "add-continuousset {} {} {} --referenceSetName=notafefset" + ).format(self._repoPath, self._datasetName, continuousPath) + self.assertRaises( + exceptions.ReferenceSetNameNotFoundException, + self.runCommand, cmd) + + +class TestRemoveContinuousSet(AbstractRepoManagerTest): + + def setUp(self): + super(TestRemoveContinuousSet, self).setUp() + self.init() + self.addDataset() + self.addReferenceSet() + self.addContinuousSet() + + def testRemoveContinuousSet(self): + continuousSet = self.getContinuousSet() + cmd = "remove-continuousset {} {} {} -f".format( + self._repoPath, self._datasetName, continuousSet.getLocalId()) + self.runCommand(cmd) + with self.assertRaises(exceptions.ContinuousSetNameNotFoundException): + self.getContinuousSet() + + class TestAddDataset(AbstractRepoManagerTest): def setUp(self): @@ -500,6 +570,7 @@ def testVerify(self): self.addReferenceSet() self.addReadGroupSet() self.addFeatureSet() + self.addContinuousSet() self.addVariantSet() cmd = "verify {}".format(self._repoPath) self.runCommand(cmd) @@ -858,6 +929,22 @@ def testFeatureSetDelete(self): self.assertEqual(len(self.dataset1.getFeatureSets()), 0) self.assertEqual(len(self.dataset2.getFeatureSets()), 1) + def testContinuousSetDelete(self): + cmdString = "add-continuousset {} {} {} -R {}" + addContinuousSetCmd1 = cmdString.format( + self._repoPath, self.dataset1Name, paths.continuousPath, + self._referenceSetName) + self.runCommand(addContinuousSetCmd1) + addContinuousSetCmd2 = cmdString.format( + self._repoPath, self.dataset2Name, paths.continuousPath, + self._referenceSetName) + self.runCommand(addContinuousSetCmd2) + removeCmd = "remove-continuousset {} {} {} -f".format( + self._repoPath, self.dataset1Name, paths.continuousSetName) + self.runCommand(removeCmd) + self.readDatasets() + self.assertEqual(len(self.dataset1.getContinuousSets()), 0) + class TestInvalidVariantIndexFile(AbstractRepoManagerTest): """ diff --git a/tests/unit/test_simulated_stack.py b/tests/unit/test_simulated_stack.py index a9954976e..03e604398 100644 --- a/tests/unit/test_simulated_stack.py +++ b/tests/unit/test_simulated_stack.py @@ -15,6 +15,7 @@ import ga4gh.server.datamodel.references as references import ga4gh.server.datamodel.variants as variants import ga4gh.server.datamodel.sequence_annotations as sequence_annotations +import ga4gh.server.datamodel.continuous as continuous import ga4gh.server.frontend as frontend import ga4gh.schemas.protocol as protocol @@ -196,6 +197,12 @@ def verifyFeaturesEquivalent(self, f1, f2): self.assertEqual(f1.parent_id, f2.parent_id) self.assertEqual(f1.feature_set_id, f2.feature_set_id) + def verifyContinuousSetsEqual(self, gaContinuousSet, continuousSet): + dataset = continuousSet.getParentContainer() + self.assertEqual(gaContinuousSet.id, continuousSet.getId()) + self.assertEqual(gaContinuousSet.dataset_id, dataset.getId()) + self.assertEqual(gaContinuousSet.name, continuousSet.getLocalId()) + def verifyReferencesEqual(self, gaReference, reference): self.assertEqual(gaReference.id, reference.getId()) self.assertEqual(gaReference.name, reference.getName()) @@ -783,6 +790,34 @@ def testFeatureSetsSearch(self): request.dataset_id = badId self.verifySearchMethodFails(request, path) + def testGetContinuousSet(self): + path = "/continuoussets" + for dataset in self.dataRepo.getDatasets(): + for continuousSet in dataset.getContinuousSets(): + responseObject = self.sendGetObject( + path, continuousSet.getId(), protocol.ContinuousSet) + self.verifyContinuousSetsEqual(responseObject, continuousSet) + for badId in self.getBadIds(): + continuousSet = continuous.AbstractContinuousSet( + dataset, badId) + self.verifyGetMethodFails(path, continuousSet.getId()) + for badId in self.getBadIds(): + self.verifyGetMethodFails(path, badId) + + def testContinuousSetsSearch(self): + path = '/continuoussets/search' + for dataset in self.dataRepo.getDatasets(): + continuousSets = dataset.getContinuousSets() + request = protocol.SearchContinuousSetsRequest() + request.dataset_id = dataset.getId() + self.verifySearchMethod( + request, path, protocol.SearchContinuousSetsResponse, + continuousSets, self.verifyContinuousSetsEqual) + for badId in self.getBadIds(): + request = protocol.SearchContinuousSetsRequest() + request.dataset_id = badId + self.verifySearchMethodFails(request, path) + def testGetFeature(self): dataset = self.dataRepo.getDatasets()[0] featureSet = dataset.getFeatureSets()[0]