From 2afdfdb18354be5e89a50b614a3224d24952b989 Mon Sep 17 00:00:00 2001 From: David Steinberg Date: Wed, 1 Feb 2017 22:35:18 -0800 Subject: [PATCH] Use ORM for datarepo (#1485) * Use peewee models for datarepo * Adding other data types * Fix exception name for ontology * Move models to their own file * Add models to test imports * Flake fixes * Remove commits from prepare compliance data * Adjust visual indent * Add future imports Write out short variable names, thanks @dcolligan * Biosample refactor Better exception reporting * Remove short names * Change which exception is expected Flake fixes. Fix requirements * Lengthen variable names * Add URL to peewee API * Use updated client * Set constraints back * New species field replaces ncbitaxonid Missed some biosample refactor --- ga4gh/server/datamodel/bio_metadata.py | 30 +- ga4gh/server/datamodel/datasets.py | 6 +- .../genotype_phenotype_featureset.py | 4 +- ga4gh/server/datamodel/ontologies.py | 6 +- ga4gh/server/datamodel/reads.py | 25 +- ga4gh/server/datamodel/references.py | 37 +- ga4gh/server/datamodel/rna_quantification.py | 4 +- .../server/datamodel/sequence_annotations.py | 4 +- ga4gh/server/datamodel/variants.py | 26 +- ga4gh/server/datarepo.py | 1052 ++++++----------- ga4gh/server/repo/models.py | 285 +++++ requirements.txt | 1 + scripts/prepare_compliance_data.py | 5 +- tests/unit/test_imports.py | 1 + tests/unit/test_repo_manager.py | 16 +- 15 files changed, 756 insertions(+), 746 deletions(-) create mode 100644 ga4gh/server/repo/models.py diff --git a/ga4gh/server/datamodel/bio_metadata.py b/ga4gh/server/datamodel/bio_metadata.py index e2d05876d..ecdcdb6e9 100644 --- a/ga4gh/server/datamodel/bio_metadata.py +++ b/ga4gh/server/datamodel/bio_metadata.py @@ -66,14 +66,14 @@ def populateFromJson(self, jsonString): self._info[key] = {"values": protocol.toJsonDict(parsed.info[key])} return self - def populateFromRow(self, row): + def populateFromRow(self, biosampleRecord): # TODO coerce to types - self._created = row[b'created'] - self._updated = row[b'updated'] - self._description = row[b'description'] - self._disease = json.loads(row[b'disease']) - self._individualId = row[b'individualId'] - self._info = json.loads(row[b'info']) + self._created = biosampleRecord.created + self._updated = biosampleRecord.updated + self._description = biosampleRecord.description + self._disease = json.loads(biosampleRecord.disease) + self._individualId = biosampleRecord.individualid + self._info = json.loads(biosampleRecord.info) return self def setIndividualId(self, individualId): @@ -146,15 +146,15 @@ def toProtocolElement(self): gaIndividual.info[key].values.add().string_value = value return gaIndividual - def populateFromRow(self, row): + def populateFromRow(self, individualRecord): # TODO coerce to types - self._name = row[b'name'] - self._created = row[b'created'] - self._updated = row[b'updated'] - self._description = row[b'description'] - self._species = json.loads(row[b'species']) - self._sex = json.loads(row[b'sex']) - self._info = json.loads(row[b'info']) + self._name = individualRecord.name + self._created = individualRecord.created + self._updated = individualRecord.updated + self._description = individualRecord.description + self._species = json.loads(individualRecord.species) + self._sex = json.loads(individualRecord.sex) + self._info = json.loads(individualRecord.info) return self def populateFromJson(self, jsonString): diff --git a/ga4gh/server/datamodel/datasets.py b/ga4gh/server/datamodel/datasets.py index 328469a81..eb8779926 100644 --- a/ga4gh/server/datamodel/datasets.py +++ b/ga4gh/server/datamodel/datasets.py @@ -60,13 +60,13 @@ def __init__(self, localId): self._rnaQuantificationSetNameMap = {} self._info = {} - def populateFromRow(self, row): + def populateFromRow(self, dataset): """ Populates the instance variables of this Dataset from the specified database row. """ - self._description = row[b'description'] - self._info = json.loads(row[b'info']) + self._description = dataset.description + self._info = json.loads(dataset.info) def setDescription(self, description): """ diff --git a/ga4gh/server/datamodel/genotype_phenotype_featureset.py b/ga4gh/server/datamodel/genotype_phenotype_featureset.py index 77b000380..0db7c137d 100644 --- a/ga4gh/server/datamodel/genotype_phenotype_featureset.py +++ b/ga4gh/server/datamodel/genotype_phenotype_featureset.py @@ -44,12 +44,12 @@ def __init__(self, parentContainer, localId): parentContainer, localId) # mimic featureset - def populateFromRow(self, row): + def populateFromRow(self, featureSetRecord): """ Populates the instance variables of this FeatureSet from the specified DB row. """ - self._dbFilePath = row[b'dataUrl'] + self._dbFilePath = featureSetRecord.dataurl self.populateFromFile(self._dbFilePath) def populateFromFile(self, dataUrl): diff --git a/ga4gh/server/datamodel/ontologies.py b/ga4gh/server/datamodel/ontologies.py index 170fec485..4b4946eee 100644 --- a/ga4gh/server/datamodel/ontologies.py +++ b/ga4gh/server/datamodel/ontologies.py @@ -74,12 +74,12 @@ def populateFromFile(self, dataUrl): self._dataUrl = dataUrl self._readFile() - def populateFromRow(self, row): + def populateFromRow(self, ontologyRecord): """ Populates this Ontology using values in the specified DB row. """ - self._id = row[b'id'] - self._dataUrl = row[b'dataUrl'] + self._id = ontologyRecord.id + self._dataUrl = ontologyRecord.dataurl self._readFile() # TODO sanity check the stored values against what we have just read. diff --git a/ga4gh/server/datamodel/reads.py b/ga4gh/server/datamodel/reads.py index 826fcd9d5..4820ffcf8 100644 --- a/ga4gh/server/datamodel/reads.py +++ b/ga4gh/server/datamodel/reads.py @@ -382,19 +382,19 @@ def getBamHeaderReferenceSetName(self): """ return self._bamHeaderReferenceSetName - def populateFromRow(self, row): + def populateFromRow(self, readGroupSetRecord): """ Populates the instance variables of this ReadGroupSet from the specified database row. """ - self._dataUrl = row[b'dataUrl'] - self._indexFile = row[b'indexFile'] + self._dataUrl = readGroupSetRecord.dataurl + self._indexFile = readGroupSetRecord.indexfile self._programs = [] - for jsonDict in json.loads(row[b'programs']): + for jsonDict in json.loads(readGroupSetRecord.programs): program = protocol.fromJson(json.dumps(jsonDict), protocol.Program) self._programs.append(program) - stats = protocol.fromJson(row[b'stats'], protocol.ReadStats) + stats = protocol.fromJson(readGroupSetRecord.stats, protocol.ReadStats) self._numAlignedReads = stats.aligned_read_count self._numUnalignedReads = stats.unaligned_read_count @@ -747,18 +747,19 @@ def populateFromHeader(self, readGroupHeader): self._platformUnit = readGroupHeader.get('PU', None) self._runTime = readGroupHeader.get('DT', None) - def populateFromRow(self, row): + def populateFromRow(self, readGroupRecord): """ Populate the instance variables using the specified DB row. """ - self._sampleName = row[b'sampleName'] - self._biosampleId = row[b'biosampleId'] - self._description = row[b'description'] - self._predictedInsertSize = row[b'predictedInsertSize'] - stats = protocol.fromJson(row[b'stats'], protocol.ReadStats) + self._sampleName = readGroupRecord.samplename + self._biosampleId = readGroupRecord.biosampleid + self._description = readGroupRecord.description + self._predictedInsertSize = readGroupRecord.predictedinsertsize + stats = protocol.fromJson(readGroupRecord.stats, protocol.ReadStats) self._numAlignedReads = stats.aligned_read_count self._numUnalignedReads = stats.unaligned_read_count - experiment = protocol.fromJson(row[b'experiment'], protocol.Experiment) + experiment = protocol.fromJson( + readGroupRecord.experiment, protocol.Experiment) self._instrumentModel = experiment.instrument_model self._sequencingCenter = experiment.sequencing_center self._experimentDescription = experiment.description diff --git a/ga4gh/server/datamodel/references.py b/ga4gh/server/datamodel/references.py index aa28a339d..a6ceef41e 100644 --- a/ga4gh/server/datamodel/references.py +++ b/ga4gh/server/datamodel/references.py @@ -481,19 +481,21 @@ def populateFromFile(self, dataUrl): reference.setLength(len(bases)) self.addReference(reference) - def populateFromRow(self, row): + def populateFromRow(self, referenceSetRecord): """ Populates this reference set from the values in the specified DB row. """ - self._dataUrl = row[b'dataUrl'] - self._description = row[b'description'] - self._assemblyId = row[b'assemblyId'] - self._isDerived = bool(row[b'isDerived']) - self._md5checksum = row[b'md5checksum'] - self._species = json.loads(row[b'species']) - self._sourceAccessions = json.loads(row[b'sourceAccessions']) - self._sourceUri = row[b'sourceUri'] + self._dataUrl = referenceSetRecord.dataurl + self._description = referenceSetRecord.description + self._assemblyId = referenceSetRecord.assemblyid + self._isDerived = bool(referenceSetRecord.isderived) + self._md5checksum = referenceSetRecord.md5checksum + if referenceSetRecord.species is not None: + self.setSpeciesFromJson(referenceSetRecord.species) + self._sourceAccessions = json.loads( + referenceSetRecord.sourceaccessions) + self._sourceUri = referenceSetRecord.sourceuri def getDataUrl(self): """ @@ -519,17 +521,18 @@ class HtslibReference(datamodel.PysamDatamodelMixin, AbstractReference): def __init__(self, parentContainer, localId): super(HtslibReference, self).__init__(parentContainer, localId) - def populateFromRow(self, row): + def populateFromRow(self, referenceRecord): """ Populates this reference from the values in the specified DB row. """ - self._length = row[b'length'] - self._isDerived = bool(row[b'isDerived']) - self._md5checksum = row[b'md5checksum'] - self._species = json.loads(row[b'species']) - self._sourceAccessions = json.loads(row[b'sourceAccessions']) - self._sourceDivergence = row[b'sourceDivergence'] - self._sourceUri = row[b'sourceUri'] + self._length = referenceRecord.length + self._isDerived = bool(referenceRecord.isderived) + self._md5checksum = referenceRecord.md5checksum + if referenceRecord.species is not None: + self.setSpeciesFromJson(referenceRecord.species) + self._sourceAccessions = json.loads(referenceRecord.sourceaccessions) + self._sourceDivergence = referenceRecord.sourcedivergence + self._sourceUri = referenceRecord.sourceuri def getBases(self, start, end): self.checkQueryRange(start, end) diff --git a/ga4gh/server/datamodel/rna_quantification.py b/ga4gh/server/datamodel/rna_quantification.py index 75b17123c..545615a2c 100644 --- a/ga4gh/server/datamodel/rna_quantification.py +++ b/ga4gh/server/datamodel/rna_quantification.py @@ -179,12 +179,12 @@ def populateFromFile(self, dataUrl): self._db = SqliteRnaBackend(self._dbFilePath) self.addRnaQuants() - def populateFromRow(self, row): + def populateFromRow(self, quantificationSetRecord): """ Populates the instance variables of this RnaQuantificationSet from the specified DB row. """ - self._dbFilePath = row[b'dataUrl'] + self._dbFilePath = quantificationSetRecord.dataurl self._db = SqliteRnaBackend(self._dbFilePath) self.addRnaQuants() diff --git a/ga4gh/server/datamodel/sequence_annotations.py b/ga4gh/server/datamodel/sequence_annotations.py index 1c6255b51..b3e1ab272 100644 --- a/ga4gh/server/datamodel/sequence_annotations.py +++ b/ga4gh/server/datamodel/sequence_annotations.py @@ -340,12 +340,12 @@ def populateFromFile(self, dataUrl): self._dbFilePath = dataUrl self._db = Gff3DbBackend(self._dbFilePath) - def populateFromRow(self, row): + def populateFromRow(self, featureSetRecord): """ Populates the instance variables of this FeatureSet from the specified DB row. """ - self._dbFilePath = row[b'dataUrl'] + self._dbFilePath = featureSetRecord.dataurl self._db = Gff3DbBackend(self._dbFilePath) def getDataUrl(self): diff --git a/ga4gh/server/datamodel/variants.py b/ga4gh/server/datamodel/variants.py index 41ec2521b..71cb7be84 100644 --- a/ga4gh/server/datamodel/variants.py +++ b/ga4gh/server/datamodel/variants.py @@ -49,12 +49,11 @@ def __init__(self, parentContainer, localId): self._info = {} self._biosampleId = None - def populateFromRow(self, row): + def populateFromRow(self, callSetRecord): """ Populates this CallSet from the specified DB row. """ - # currently a noop - self._biosampleId = row[b'biosampleId'] + self._biosampleId = callSetRecord.biosampleid def toProtocolElement(self): """ @@ -459,19 +458,19 @@ def getDataUrlIndexPairs(self): """ return set(self._chromFileMap.values()) - def populateFromRow(self, row): + def populateFromRow(self, variantSetRecord): """ Populates this VariantSet from the specified DB row. """ - self._created = row[b'created'] - self._updated = row[b'updated'] + self._created = variantSetRecord.created + self._updated = variantSetRecord.updated self._chromFileMap = {} # We can't load directly as we want tuples to be stored # rather than lists. - for key, value in json.loads(row[b'dataUrlIndexMap']).items(): + for key, value in json.loads(variantSetRecord.dataurlindexmap).items(): self._chromFileMap[key] = tuple(value) self._metadata = [] - for jsonDict in json.loads(row[b'metadata']): + for jsonDict in json.loads(variantSetRecord.metadata): metadata = protocol.fromJson(json.dumps(jsonDict), protocol.VariantSetMetadata) self._metadata.append(metadata) @@ -1048,14 +1047,15 @@ def populateFromFile(self, varFile, annotationType): self._creationTime = self._analysis.created self._updatedTime = datetime.datetime.now().isoformat() + "Z" - def populateFromRow(self, row): + def populateFromRow(self, annotationSetRecord): """ Populates this VariantAnnotationSet from the specified DB row. """ - self._annotationType = row[b'annotationType'] - self._analysis = protocol.fromJson(row[b'analysis'], protocol.Analysis) - self._creationTime = row[b'created'] - self._updatedTime = row[b'updated'] + self._annotationType = annotationSetRecord.annotationtype + self._analysis = protocol.fromJson( + annotationSetRecord.analysis, protocol.Analysis) + self._creationTime = annotationSetRecord.created + self._updatedTime = annotationSetRecord.updated def getAnnotationType(self): """ diff --git a/ga4gh/server/datarepo.py b/ga4gh/server/datarepo.py index 16194fdc1..ee9ac7dd8 100644 --- a/ga4gh/server/datarepo.py +++ b/ga4gh/server/datarepo.py @@ -8,6 +8,7 @@ import json import os import sqlite3 +import datetime import ga4gh.server.datamodel as datamodel import ga4gh.server.datamodel.datasets as datasets @@ -21,8 +22,9 @@ import ga4gh.server.datamodel.genotype_phenotype_featureset as g2pFeatureset import ga4gh.server.datamodel.rna_quantification as rna_quantification import ga4gh.server.exceptions as exceptions +import ga4gh.server.protocol as protocol -from ga4gh.server import protocol +import repo.models as m MODE_READ = 'r' MODE_WRITE = 'w' @@ -465,6 +467,8 @@ def __init__(self, fileName): self._creationTimeStamp = None # Connection to the DB. self._dbConnection = None + self.database = m.SqliteDatabase(self._dbFilename, **{}) + m.databaseProxy.initialize(self.database) def _checkWriteMode(self): if self._openMode != MODE_WRITE: @@ -489,11 +493,6 @@ def open(self, mode=MODE_READ): self._openMode = mode if mode == MODE_READ: self.assertExists() - self._safeConnect() - # Turn on foreign key constraints. - cursor = self._dbConnection.cursor() - cursor.execute("PRAGMA foreign_keys = ON;") - self._dbConnection.commit() if mode == MODE_READ: # This is part of the transitional behaviour where # we load the whole DB into memory to get access to @@ -506,7 +505,6 @@ def commit(self): this function if the repo is not opened in write-mode. """ self._checkWriteMode() - self._dbConnection.commit() def close(self): """ @@ -515,8 +513,6 @@ def close(self): if self._openMode is None: raise ValueError("Repo already closed") self._openMode = None - self._dbConnection.close() - self._dbConnection = None def verify(self): """ @@ -618,390 +614,268 @@ def _safeConnect(self): # raised e.g. when directory passed as dbFilename raise exceptions.RepoInvalidDatabaseException(self._dbFilename) - def _createSystemTable(self, cursor): - sql = """ - CREATE TABLE System ( - key TEXT NOT NULL PRIMARY KEY, - value TEXT NOT NULL - ); - """ - cursor.execute(sql) - cursor.execute( - "INSERT INTO System VALUES " - "('{}', '{}')".format( - self.systemKeySchemaVersion, self.version)) - cursor.execute( - "INSERT INTO System VALUES ('{}', datetime('now'))".format( - self.systemKeyCreationTimeStamp)) - - def _readSystemTable(self, cursor): - sql = "SELECT key, value FROM System;" - cursor.execute(sql) - config = {} - for row in cursor: - config[row[0]] = row[1] - row = cursor.fetchone() - self._schemaVersion = config[self.systemKeySchemaVersion] - self._creationTimeStamp = config[self.systemKeyCreationTimeStamp] + def _createSystemTable(self): + self.database.create_table(m.System) + m.System.create( + key=self.systemKeySchemaVersion, value=self.version) + m.System.create( + key=self.systemKeyCreationTimeStamp, value=datetime.datetime.now()) + + def _readSystemTable(self): + if not self.exists(): + raise exceptions.RepoNotFoundException( + self._dbFilename) + try: + self._schemaVersion = m.System.get( + m.System.key == self.systemKeySchemaVersion).value + self._creationTimeStamp = m.System.get( + m.System.key == self.systemKeyCreationTimeStamp).value + except Exception: + raise exceptions.RepoInvalidDatabaseException(self._dbFilename) schemaVersion = self.SchemaVersion(self._schemaVersion) if schemaVersion.major != self.version.major: raise exceptions.RepoSchemaVersionMismatchException( schemaVersion, self.version) - def _createOntologyTable(self, cursor): - sql = """ - CREATE TABLE Ontology( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - dataUrl TEXT NOT NULL, - ontologyPrefix TEXT NOT NULL, - UNIQUE (name) - ); - """ - cursor.execute(sql) + def _createOntologyTable(self): + self.database.create_table(m.Ontology) def insertOntology(self, ontology): """ Inserts the specified ontology into this repository. """ - sql = """ - INSERT INTO Ontology(id, name, dataUrl, ontologyPrefix) - VALUES (?, ?, ?, ?); - """ - cursor = self._dbConnection.cursor() + try: + m.Ontology.create( + id=ontology.getName(), + name=ontology.getName(), + dataurl=ontology.getDataUrl(), + ontologyprefix=ontology.getOntologyPrefix()) + except Exception: + raise exceptions.DuplicateNameException( + ontology.getName()) # TODO we need to create a proper ID when we're doing ID generation # for the rest of the container objects. - try: - cursor.execute(sql, ( - ontology.getName(), - ontology.getName(), - ontology.getDataUrl(), - ontology.getOntologyPrefix())) - except sqlite3.IntegrityError: - raise exceptions.DuplicateNameException(ontology.getName()) - - def _readOntologyTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM Ontology;") - for row in cursor: - ontology = ontologies.Ontology(row[b'name']) - ontology.populateFromRow(row) + + def _readOntologyTable(self): + for ont in m.Ontology.select(): + ontology = ontologies.Ontology(ont.name) + ontology.populateFromRow(ont) self.addOntology(ontology) def removeOntology(self, ontology): """ Removes the specified ontology term map from this repository. """ - sql = "DELETE FROM Ontology WHERE name=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (ontology.getName(),)) - - def _createReferenceTable(self, cursor): - sql = """ - CREATE TABLE Reference ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL, - referenceSetId TEXT NOT NULL, - length INTEGER, - isDerived INTEGER, - md5checksum TEXT, - species TEXT, - sourceAccessions TEXT, - sourceDivergence REAL, - sourceUri TEXT, - UNIQUE (referenceSetId, name), - FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id) - ON DELETE CASCADE - ); - """ - cursor.execute(sql) + q = m.Ontology.delete().where(id == ontology.getId()) + q.execute() + + def _createReferenceTable(self): + self.database.create_table(m.Reference) def insertReference(self, reference): """ Inserts the specified reference into this repository. """ - sql = """ - INSERT INTO Reference ( - id, referenceSetId, name, length, isDerived, md5checksum, - species, sourceAccessions, sourceUri) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?); - """ - cursor = self._dbConnection.cursor() - cursor.execute(sql, ( - reference.getId(), reference.getParentContainer().getId(), - reference.getLocalId(), reference.getLength(), - reference.getIsDerived(), reference.getMd5Checksum(), - json.dumps(reference.getSpecies()), - # We store the list of sourceAccessions as a JSON string. Perhaps - # this should be another table? - json.dumps(reference.getSourceAccessions()), - reference.getSourceUri())) - - def _readReferenceTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM Reference;") - for row in cursor: - referenceSet = self.getReferenceSet(row[b'referenceSetId']) - reference = references.HtslibReference(referenceSet, row[b'name']) - reference.populateFromRow(row) - assert reference.getId() == row[b"id"] + m.Reference.create( + id=reference.getId(), + referencesetid=reference.getParentContainer().getId(), + name=reference.getLocalId(), + length=reference.getLength(), + isderived=reference.getIsDerived(), + species=reference.getSpecies(), + md5checksum=reference.getMd5Checksum(), + sourceaccessions=json.dumps(reference.getSourceAccessions()), + sourceuri=reference.getSourceUri()) + + def _readReferenceTable(self): + for referenceRecord in m.Reference.select(): + referenceSet = self.getReferenceSet( + referenceRecord.referencesetid.id) + reference = references.HtslibReference( + referenceSet, referenceRecord.name) + reference.populateFromRow(referenceRecord) + assert reference.getId() == referenceRecord.id referenceSet.addReference(reference) - def _createReferenceSetTable(self, cursor): - sql = """ - CREATE TABLE ReferenceSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - assemblyId TEXT, - isDerived INTEGER, - md5checksum TEXT, - species TEXT, - sourceAccessions TEXT, - sourceUri TEXT, - dataUrl TEXT NOT NULL, - UNIQUE (name) - ); - """ - cursor.execute(sql) + def _createReferenceSetTable(self): + self.database.create_table(m.Referenceset) def insertReferenceSet(self, referenceSet): """ Inserts the specified referenceSet into this repository. """ - sql = """ - INSERT INTO ReferenceSet ( - id, name, description, assemblyId, isDerived, md5checksum, - species, sourceAccessions, sourceUri, dataUrl) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?); - """ - cursor = self._dbConnection.cursor() try: - cursor.execute(sql, ( - referenceSet.getId(), referenceSet.getLocalId(), - referenceSet.getDescription(), referenceSet.getAssemblyId(), - referenceSet.getIsDerived(), referenceSet.getMd5Checksum(), - json.dumps(referenceSet.getSpecies(), protocol.OntologyTerm), - # We store the list of sourceAccessions as a JSON string. - # Perhaps this should be another table? - json.dumps(referenceSet.getSourceAccessions()), - referenceSet.getSourceUri(), referenceSet.getDataUrl())) - except sqlite3.IntegrityError: - raise exceptions.DuplicateNameException(referenceSet.getLocalId()) - for reference in referenceSet.getReferences(): - self.insertReference(reference) - - def _readReferenceSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM ReferenceSet;") - for row in cursor: - referenceSet = references.HtslibReferenceSet(row[b'name']) - referenceSet.populateFromRow(row) - assert referenceSet.getId() == row[b"id"] + m.Referenceset.create( + id=referenceSet.getId(), + name=referenceSet.getLocalId(), + description=referenceSet.getDescription(), + assemblyid=referenceSet.getAssemblyId(), + isderived=referenceSet.getIsDerived(), + species=referenceSet.getSpecies(), + md5checksum=referenceSet.getMd5Checksum(), + sourceaccessions=json.dumps( + referenceSet.getSourceAccessions()), + sourceuri=referenceSet.getSourceUri(), + dataurl=referenceSet.getDataUrl()) + for reference in referenceSet.getReferences(): + self.insertReference(reference) + except Exception: + raise exceptions.DuplicateNameException( + referenceSet.getLocalId()) + + def _readReferenceSetTable(self): + for referenceSetRecord in m.Referenceset.select(): + referenceSet = references.HtslibReferenceSet( + referenceSetRecord.name) + referenceSet.populateFromRow(referenceSetRecord) + assert referenceSet.getId() == referenceSetRecord.id # Insert the referenceSet into the memory-based object model. self.addReferenceSet(referenceSet) - def _createDatasetTable(self, cursor): - sql = """ - CREATE TABLE Dataset ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - info TEXT, - UNIQUE (name) - ); - """ - cursor.execute(sql) + def _createDatasetTable(self): + self.database.create_table(m.Dataset) def insertDataset(self, dataset): """ Inserts the specified dataset into this repository. """ - sql = """ - INSERT INTO Dataset (id, name, description, info) - VALUES (?, ?, ?, ?); - """ - cursor = self._dbConnection.cursor() try: - cursor.execute(sql, ( - dataset.getId(), dataset.getLocalId(), - dataset.getDescription(), - json.dumps(dataset.getInfo()))) - except sqlite3.IntegrityError: - raise exceptions.DuplicateNameException(dataset.getLocalId()) + m.Dataset.create( + id=dataset.getId(), + name=dataset.getLocalId(), + description=dataset.getDescription(), + info=json.dumps(dataset.getInfo())) + except Exception: + raise exceptions.DuplicateNameException( + dataset.getLocalId()) def removeDataset(self, dataset): """ Removes the specified dataset from this repository. This performs a cascading removal of all items within this dataset. """ - sql = "DELETE FROM Dataset WHERE id=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (dataset.getId(),)) + for datasetRecord in m.Dataset.select().where( + m.Dataset.id == dataset.getId()): + datasetRecord.delete_instance(recursive=True) def removePhenotypeAssociationSet(self, phenotypeAssociationSet): """ Remove a phenotype association set from the repo """ - sql = "DELETE FROM PhenotypeAssociationSet WHERE id=? " - cursor = self._dbConnection.cursor() - cursor.execute(sql, (phenotypeAssociationSet.getId(),)) + q = m.Phenotypeassociationset.delete().where( + m.Phenotypeassociationset.id == phenotypeAssociationSet.getId()) + q.execute() def removeFeatureSet(self, featureSet): """ Removes the specified featureSet from this repository. """ - sql = "DELETE FROM FeatureSet WHERE id=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (featureSet.getId(),)) - - def _readDatasetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM Dataset;") - for row in cursor: - dataset = datasets.Dataset(row[b'name']) - dataset.populateFromRow(row) - assert dataset.getId() == row[b"id"] + q = m.Featureset.delete().where( + m.Featureset.id == featureSet.getId()) + q.execute() + + def _readDatasetTable(self): + for datasetRecord in m.Dataset.select(): + dataset = datasets.Dataset(datasetRecord.name) + dataset.populateFromRow(datasetRecord) + assert dataset.getId() == datasetRecord.id # Insert the dataset into the memory-based object model. self.addDataset(dataset) - def _createReadGroupTable(self, cursor): - sql = """ - CREATE TABLE ReadGroup ( - id TEXT NOT NULL PRIMARY KEY, - readGroupSetId TEXT NOT NULL, - name TEXT NOT NULL, - predictedInsertSize INTEGER, - sampleName TEXT, - description TEXT, - stats TEXT NOT NULL, - experiment TEXT NOT NULL, - biosampleId TEXT, - created TEXT, - updated TEXT, - UNIQUE (readGroupSetId, name), - FOREIGN KEY(readGroupSetId) REFERENCES ReadGroupSet(id) - ON DELETE CASCADE - ); - """ - cursor.execute(sql) + def _createReadGroupTable(self): + self.database.create_table(m.Readgroup) def insertReadGroup(self, readGroup): """ Inserts the specified readGroup into the DB. """ - sql = """ - INSERT INTO ReadGroup ( - id, readGroupSetId, name, predictedInsertSize, - sampleName, description, stats, experiment, - biosampleId, created, updated) - VALUES - (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now')); - """ - cursor = self._dbConnection.cursor() statsJson = json.dumps(protocol.toJsonDict(readGroup.getStats())) experimentJson = json.dumps( protocol.toJsonDict(readGroup.getExperiment())) - cursor.execute(sql, ( - readGroup.getId(), readGroup.getParentContainer().getId(), - readGroup.getLocalId(), readGroup.getPredictedInsertSize(), - readGroup.getSampleName(), readGroup.getDescription(), - statsJson, experimentJson, readGroup.getBiosampleId())) + try: + m.Readgroup.create( + id=readGroup.getId(), + readgroupsetid=readGroup.getParentContainer().getId(), + name=readGroup.getLocalId(), + predictedinsertedsize=readGroup.getPredictedInsertSize(), + samplename=readGroup.getSampleName(), + description=readGroup.getDescription(), + stats=statsJson, + experiment=experimentJson, + biosampleid=readGroup.getBiosampleId()) + except Exception as e: + raise exceptions.RepoManagerException( + e) def removeReadGroupSet(self, readGroupSet): """ Removes the specified readGroupSet from this repository. This performs a cascading removal of all items within this readGroupSet. """ - sql = "DELETE FROM ReadGroupSet WHERE id=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (readGroupSet.getId(),)) + for readGroupSetRecord in m.Readgroupset.select().where( + m.Readgroupset.id == readGroupSet.getId()): + readGroupSetRecord.delete_instance(recursive=True) def removeVariantSet(self, variantSet): """ Removes the specified variantSet from this repository. This performs a cascading removal of all items within this variantSet. """ - sql = "DELETE FROM VariantSet WHERE id=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (variantSet.getId(),)) + for variantSetRecord in m.Variantset.select().where( + m.Variantset.id == variantSet.getId()): + variantSetRecord.delete_instance(recursive=True) def removeBiosample(self, biosample): """ Removes the specified biosample from this repository. """ - sql = "DELETE FROM Biosample WHERE id=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (biosample.getId(),)) + q = m.Biosample.delete().where(m.Biosample.id == biosample.getId()) + q.execute() def removeIndividual(self, individual): """ Removes the specified individual from this repository. """ - sql = "DELETE FROM Individual WHERE id=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (individual.getId(),)) + q = m.Individual.delete().where(m.Individual.id == individual.getId()) + q.execute() - def _readReadGroupTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM ReadGroup;") - for row in cursor: - readGroupSet = self.getReadGroupSet(row[b'readGroupSetId']) - readGroup = reads.HtslibReadGroup(readGroupSet, row[b'name']) + def _readReadGroupTable(self): + for readGroupRecord in m.Readgroup.select(): + readGroupSet = self.getReadGroupSet( + readGroupRecord.readgroupsetid.id) + readGroup = reads.HtslibReadGroup( + readGroupSet, readGroupRecord.name) # TODO set the reference set. - readGroup.populateFromRow(row) - assert readGroup.getId() == row[b'id'] + readGroup.populateFromRow(readGroupRecord) + assert readGroup.getId() == readGroupRecord.id # Insert the readGroupSet into the memory-based object model. readGroupSet.addReadGroup(readGroup) - def _createReadGroupSetTable(self, cursor): - sql = """ - CREATE TABLE ReadGroupSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - datasetId TEXT NOT NULL, - referenceSetId TEXT NOT NULL, - programs TEXT, - stats TEXT NOT NULL, - dataUrl TEXT NOT NULL, - indexFile TEXT NOT NULL, - UNIQUE (datasetId, name), - FOREIGN KEY(datasetId) REFERENCES Dataset(id) - ON DELETE CASCADE, - FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id) - ); - """ - cursor.execute(sql) + def _createReadGroupSetTable(self): + self.database.create_table(m.Readgroupset) def insertReadGroupSet(self, readGroupSet): """ Inserts a the specified readGroupSet into this repository. """ - sql = """ - INSERT INTO ReadGroupSet ( - id, datasetId, referenceSetId, name, programs, stats, - dataUrl, indexFile) - VALUES (?, ?, ?, ?, ?, ?, ?, ?); - """ programsJson = json.dumps( [protocol.toJsonDict(program) for program in readGroupSet.getPrograms()]) statsJson = json.dumps(protocol.toJsonDict(readGroupSet.getStats())) - cursor = self._dbConnection.cursor() try: - cursor.execute(sql, ( - readGroupSet.getId(), - readGroupSet.getParentContainer().getId(), - readGroupSet.getReferenceSet().getId(), - readGroupSet.getLocalId(), - programsJson, statsJson, readGroupSet.getDataUrl(), - readGroupSet.getIndexFile())) - except sqlite3.IntegrityError: - raise exceptions.DuplicateNameException( - readGroupSet.getLocalId(), - readGroupSet.getParentContainer().getLocalId()) - for readGroup in readGroupSet.getReadGroups(): - self.insertReadGroup(readGroup) + m.Readgroupset.create( + id=readGroupSet.getId(), + datasetid=readGroupSet.getParentContainer().getId(), + referencesetid=readGroupSet.getReferenceSet().getId(), + name=readGroupSet.getLocalId(), + programs=programsJson, + stats=statsJson, + dataurl=readGroupSet.getDataUrl(), + indexfile=readGroupSet.getIndexFile()) + for readGroup in readGroupSet.getReadGroups(): + self.insertReadGroup(readGroup) + except Exception as e: + raise exceptions.RepoManagerException(e) def removeReferenceSet(self, referenceSet): """ @@ -1011,157 +885,100 @@ def removeReferenceSet(self, referenceSet): refer to this ReferenceSet. These must be deleted before the referenceSet can be removed. """ - sql = "DELETE FROM ReferenceSet WHERE id=?" - cursor = self._dbConnection.cursor() try: - cursor.execute(sql, (referenceSet.getId(),)) - except sqlite3.IntegrityError: + q = m.Reference.delete().where( + m.Reference.referencesetid == referenceSet.getId()) + q.execute() + q = m.Referenceset.delete().where( + m.Referenceset.id == referenceSet.getId()) + q.execute() + except Exception: msg = ("Unable to delete reference set. " "There are objects currently in the registry which are " "aligned against it. Remove these objects before removing " "the reference set.") raise exceptions.RepoManagerException(msg) - def _readReadGroupSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM ReadGroupSet;") - for row in cursor: - dataset = self.getDataset(row[b'datasetId']) - readGroupSet = reads.HtslibReadGroupSet(dataset, row[b'name']) - referenceSet = self.getReferenceSet(row[b'referenceSetId']) + def _readReadGroupSetTable(self): + for readGroupSetRecord in m.Readgroupset.select(): + dataset = self.getDataset(readGroupSetRecord.datasetid.id) + readGroupSet = reads.HtslibReadGroupSet( + dataset, readGroupSetRecord.name) + referenceSet = self.getReferenceSet( + readGroupSetRecord.referencesetid.id) readGroupSet.setReferenceSet(referenceSet) - readGroupSet.populateFromRow(row) - assert readGroupSet.getId() == row[b'id'] + readGroupSet.populateFromRow(readGroupSetRecord) + assert readGroupSet.getId() == readGroupSetRecord.id # Insert the readGroupSet into the memory-based object model. dataset.addReadGroupSet(readGroupSet) - def _createVariantAnnotationSetTable(self, cursor): - sql = """ - CREATE TABLE VariantAnnotationSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - variantSetId TEXT NOT NULL, - ontologyId TEXT NOT NULL, - analysis TEXT, - annotationType TEXT, - created TEXT, - updated TEXT, - UNIQUE (variantSetId, name), - FOREIGN KEY(variantSetId) REFERENCES VariantSet(id) - ON DELETE CASCADE, - FOREIGN KEY(ontologyId) REFERENCES Ontology(id) - ); - """ - cursor.execute(sql) + def _createVariantAnnotationSetTable(self): + self.database.create_table(m.Variantannotationset) def insertVariantAnnotationSet(self, variantAnnotationSet): """ Inserts a the specified variantAnnotationSet into this repository. """ - sql = """ - INSERT INTO VariantAnnotationSet ( - id, variantSetId, ontologyId, name, analysis, annotationType, - created, updated) - VALUES (?, ?, ?, ?, ?, ?, ?, ?); - """ analysisJson = json.dumps( protocol.toJsonDict(variantAnnotationSet.getAnalysis())) - cursor = self._dbConnection.cursor() - cursor.execute(sql, ( - variantAnnotationSet.getId(), - variantAnnotationSet.getParentContainer().getId(), - variantAnnotationSet.getOntology().getId(), - variantAnnotationSet.getLocalId(), - analysisJson, - variantAnnotationSet.getAnnotationType(), - variantAnnotationSet.getCreationTime(), - variantAnnotationSet.getUpdatedTime())) - - def _readVariantAnnotationSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM VariantAnnotationSet;") - for row in cursor: - variantSet = self.getVariantSet(row[b'variantSetId']) - ontology = self.getOntology(row[b'ontologyId']) + try: + m.Variantannotationset.create( + id=variantAnnotationSet.getId(), + variantsetid=variantAnnotationSet.getParentContainer().getId(), + ontologyid=variantAnnotationSet.getOntology().getId(), + name=variantAnnotationSet.getLocalId(), + analysis=analysisJson, + annotationtype=variantAnnotationSet.getAnnotationType(), + created=variantAnnotationSet.getCreationTime(), + updated=variantAnnotationSet.getUpdatedTime()) + except Exception as e: + raise exceptions.RepoManagerException(e) + + def _readVariantAnnotationSetTable(self): + for annotationSetRecord in m.Variantannotationset.select(): + variantSet = self.getVariantSet( + annotationSetRecord.variantsetid.id) + ontology = self.getOntology(annotationSetRecord.ontologyid.id) variantAnnotationSet = variants.HtslibVariantAnnotationSet( - variantSet, row[b'name']) + variantSet, annotationSetRecord.name) variantAnnotationSet.setOntology(ontology) - variantAnnotationSet.populateFromRow(row) - assert variantAnnotationSet.getId() == row[b'id'] + variantAnnotationSet.populateFromRow(annotationSetRecord) + assert variantAnnotationSet.getId() == annotationSetRecord.id # Insert the variantAnnotationSet into the memory-based model. variantSet.addVariantAnnotationSet(variantAnnotationSet) - def _createCallSetTable(self, cursor): - sql = """ - CREATE TABLE CallSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - variantSetId TEXT NOT NULL, - biosampleId TEXT, - UNIQUE (variantSetId, name), - FOREIGN KEY(variantSetId) REFERENCES VariantSet(id) - ON DELETE CASCADE - ); - """ - cursor.execute(sql) + def _createCallSetTable(self): + self.database.create_table(m.Callset) def insertCallSet(self, callSet): """ Inserts a the specified callSet into this repository. """ - sql = """ - INSERT INTO CallSet ( - id, name, variantSetId, biosampleId) - VALUES (?, ?, ?, ?); - """ - cursor = self._dbConnection.cursor() - cursor.execute(sql, ( - callSet.getId(), - callSet.getLocalId(), - callSet.getParentContainer().getId(), - callSet.getBiosampleId())) - - def _readCallSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM CallSet;") - for row in cursor: - variantSet = self.getVariantSet(row[b'variantSetId']) - callSet = variants.CallSet(variantSet, row[b'name']) - callSet.populateFromRow(row) - assert callSet.getId() == row[b'id'] + try: + m.Callset.create( + id=callSet.getId(), + name=callSet.getLocalId(), + variantsetid=callSet.getParentContainer().getId(), + biosampleid=callSet.getBiosampleId()) + except Exception as e: + raise exceptions.RepoManagerException(e) + + def _readCallSetTable(self): + for callSetRecord in m.Callset.select(): + variantSet = self.getVariantSet(callSetRecord.variantsetid.id) + callSet = variants.CallSet(variantSet, callSetRecord.name) + callSet.populateFromRow(callSetRecord) + assert callSet.getId() == callSetRecord.id # Insert the callSet into the memory-based object model. variantSet.addCallSet(callSet) - def _createVariantSetTable(self, cursor): - sql = """ - CREATE TABLE VariantSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - datasetId TEXT NOT NULL, - referenceSetId TEXT NOT NULL, - created TEXT, - updated TEXT, - metadata TEXT, - dataUrlIndexMap TEXT NOT NULL, - UNIQUE (datasetID, name), - FOREIGN KEY(datasetId) REFERENCES Dataset(id) - ON DELETE CASCADE, - FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id) - ); - """ - cursor.execute(sql) + def _createVariantSetTable(self): + self.database.create_table(m.Variantset) def insertVariantSet(self, variantSet): """ Inserts a the specified variantSet into this repository. """ - sql = """ - INSERT INTO VariantSet ( - id, datasetId, referenceSetId, name, created, updated, - metadata, dataUrlIndexMap) - VALUES (?, ?, ?, ?, datetime('now'), datetime('now'), ?, ?); - """ - cursor = self._dbConnection.cursor() # We cheat a little here with the VariantSetMetadata, and encode these # within the table as a JSON dump. These should really be stored in # their own table @@ -1170,284 +987,196 @@ def insertVariantSet(self, variantSet): variantSet.getMetadata()]) urlMapJson = json.dumps(variantSet.getReferenceToDataUrlIndexMap()) try: - cursor.execute(sql, ( - variantSet.getId(), variantSet.getParentContainer().getId(), - variantSet.getReferenceSet().getId(), variantSet.getLocalId(), - metadataJson, urlMapJson)) - except sqlite3.IntegrityError: - raise exceptions.DuplicateNameException( - variantSet.getLocalId(), - variantSet.getParentContainer().getLocalId()) + m.Variantset.create( + id=variantSet.getId(), + datasetid=variantSet.getParentContainer().getId(), + referencesetid=variantSet.getReferenceSet().getId(), + name=variantSet.getLocalId(), + created=datetime.datetime.now(), + updated=datetime.datetime.now(), + metadata=metadataJson, + dataurlindexmap=urlMapJson) + except Exception as e: + raise exceptions.RepoManagerException(e) for callSet in variantSet.getCallSets(): self.insertCallSet(callSet) - def _readVariantSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM VariantSet;") - for row in cursor: - dataset = self.getDataset(row[b'datasetId']) - referenceSet = self.getReferenceSet(row[b'referenceSetId']) - variantSet = variants.HtslibVariantSet(dataset, row[b'name']) + def _readVariantSetTable(self): + for variantSetRecord in m.Variantset.select(): + dataset = self.getDataset(variantSetRecord.datasetid.id) + referenceSet = self.getReferenceSet( + variantSetRecord.referencesetid.id) + variantSet = variants.HtslibVariantSet( + dataset, variantSetRecord.name) variantSet.setReferenceSet(referenceSet) - variantSet.populateFromRow(row) - assert variantSet.getId() == row[b'id'] + variantSet.populateFromRow(variantSetRecord) + assert variantSet.getId() == variantSetRecord.id # Insert the variantSet into the memory-based object model. dataset.addVariantSet(variantSet) - def _createFeatureSetTable(self, cursor): - sql = """ - CREATE TABLE FeatureSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - datasetId TEXT NOT NULL, - referenceSetId TEXT NOT NULL, - ontologyId TEXT NOT NULL, - info TEXT, - sourceUri TEXT, - dataUrl TEXT NOT NULL, - UNIQUE (datasetId, name), - FOREIGN KEY(datasetId) REFERENCES Dataset(id) - ON DELETE CASCADE, - FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id) - FOREIGN KEY(ontologyId) REFERENCES Ontology(id) - ); - """ - cursor.execute(sql) + def _createFeatureSetTable(self): + self.database.create_table(m.Featureset) def insertFeatureSet(self, featureSet): """ Inserts a the specified featureSet into this repository. """ # TODO add support for info and sourceUri fields. - sql = """ - INSERT INTO FeatureSet ( - id, datasetId, referenceSetId, ontologyId, name, dataUrl) - VALUES (?, ?, ?, ?, ?, ?) - """ - cursor = self._dbConnection.cursor() - cursor.execute(sql, ( - featureSet.getId(), - featureSet.getParentContainer().getId(), - featureSet.getReferenceSet().getId(), - featureSet.getOntology().getId(), - featureSet.getLocalId(), - featureSet.getDataUrl())) - - def _readFeatureSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM FeatureSet;") - for row in cursor: - dataset = self.getDataset(row[b'datasetId']) - if 'cgd' in row[b'name']: + try: + m.Featureset.create( + id=featureSet.getId(), + datasetid=featureSet.getParentContainer().getId(), + referencesetid=featureSet.getReferenceSet().getId(), + ontologyid=featureSet.getOntology().getId(), + name=featureSet.getLocalId(), + dataurl=featureSet.getDataUrl()) + except Exception as e: + raise exceptions.RepoManagerException(e) + + def _readFeatureSetTable(self): + for featureSetRecord in m.Featureset.select(): + dataset = self.getDataset(featureSetRecord.datasetid.id) + # FIXME this should be handled elsewhere + if 'cgd' in featureSetRecord.name: featureSet = \ g2pFeatureset \ - .PhenotypeAssociationFeatureSet(dataset, row[b'name']) + .PhenotypeAssociationFeatureSet( + dataset, featureSetRecord.name) else: featureSet = sequence_annotations.Gff3DbFeatureSet( - dataset, row[b'name']) + dataset, featureSetRecord.name) featureSet.setReferenceSet( - self.getReferenceSet(row[b'referenceSetId'])) - featureSet.setOntology(self.getOntology(row[b'ontologyId'])) - featureSet.populateFromRow(row) - assert featureSet.getId() == row[b'id'] + self.getReferenceSet( + featureSetRecord.referencesetid.id)) + featureSet.setOntology( + self.getOntology(featureSetRecord.ontologyid.id)) + featureSet.populateFromRow(featureSetRecord) + assert featureSet.getId() == featureSetRecord.id dataset.addFeatureSet(featureSet) - def _createBiosampleTable(self, cursor): - sql = """ - CREATE TABLE Biosample ( - id TEXT NOT NULL PRIMARY KEY, - datasetId TEXT NOT NULL, - name TEXT NOT NULL, - description TEXT, - disease TEXT, - created TEXT, - updated TEXT, - individualId TEXT, - info TEXT, - UNIQUE (datasetId, name), - FOREIGN KEY(datasetId) REFERENCES Dataset(id) - ON DELETE CASCADE - ); - """ - cursor.execute(sql) + def _createBiosampleTable(self): + self.database.create_table(m.Biosample) def insertBiosample(self, biosample): """ Inserts the specified Biosample into this repository. """ - sql = """ - INSERT INTO Biosample ( - id, datasetId, name, description, disease, - created, updated, individualId, info) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - """ - cursor = self._dbConnection.cursor() - cursor.execute(sql, ( - biosample.getId(), - biosample.getParentContainer().getId(), - biosample.getLocalId(), - biosample.getDescription(), - json.dumps(biosample.getDisease()), - biosample.getCreated(), - biosample.getUpdated(), - biosample.getIndividualId(), - json.dumps(biosample.getInfo()))) - - def _readBiosampleTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM Biosample;") - for row in cursor: - dataset = self.getDataset(row[b'datasetId']) + try: + m.Biosample.create( + id=biosample.getId(), + datasetid=biosample.getParentContainer().getId(), + name=biosample.getLocalId(), + description=biosample.getDescription(), + disease=json.dumps(biosample.getDisease()), + created=biosample.getCreated(), + updated=biosample.getUpdated(), + individualid=biosample.getIndividualId(), + info=json.dumps(biosample.getInfo())) + except Exception: + raise exceptions.DuplicateNameException( + biosample.getLocalId(), + biosample.getParentContainer().getLocalId()) + + def _readBiosampleTable(self): + for biosampleRecord in m.Biosample.select(): + dataset = self.getDataset(biosampleRecord.datasetid.id) biosample = biodata.Biosample( - dataset, row[b'name']) - biosample.populateFromRow(row) - assert biosample.getId() == row[b'id'] + dataset, biosampleRecord.name) + biosample.populateFromRow(biosampleRecord) + assert biosample.getId() == biosampleRecord.id dataset.addBiosample(biosample) - def _createIndividualTable(self, cursor): - sql = """ - CREATE TABLE Individual ( - id TEXT NOT NULL PRIMARY KEY, - datasetId TEXT NOT NULL, - name TEXT, - description TEXT, - created TEXT NOT NULL, - updated TEXT, - species TEXT, - sex TEXT, - info TEXT, - UNIQUE (datasetId, name), - FOREIGN KEY(datasetId) REFERENCES Dataset(id) - ON DELETE CASCADE - ); - """ - cursor.execute(sql) + def _createIndividualTable(self): + self.database.create_table(m.Individual) def insertIndividual(self, individual): """ Inserts the specified individual into this repository. """ # TODO add support for info and sourceUri fields. - sql = """ - INSERT INTO Individual ( - id, datasetId, name, description, created, - updated, species, sex, info) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - """ - cursor = self._dbConnection.cursor() - cursor.execute(sql, ( - individual.getId(), - individual.getParentContainer().getId(), - individual.getLocalId(), - individual.getDescription(), - individual.getCreated(), - individual.getUpdated(), - json.dumps(individual.getSpecies()), - json.dumps(individual.getSex()), - json.dumps(individual.getInfo()))) - - def _readIndividualTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM Individual;") - for row in cursor: - dataset = self.getDataset(row[b'datasetId']) + try: + m.Individual.create( + id=individual.getId(), + datasetId=individual.getParentContainer().getId(), + name=individual.getLocalId(), + description=individual.getDescription(), + created=individual.getCreated(), + updated=individual.getUpdated(), + species=json.dumps(individual.getSpecies()), + sex=json.dumps(individual.getSex()), + info=json.dumps(individual.getInfo())) + except Exception: + raise exceptions.DuplicateNameException( + individual.getLocalId(), + individual.getParentContainer().getLocalId()) + + def _readIndividualTable(self): + for individualRecord in m.Individual.select(): + dataset = self.getDataset(individualRecord.datasetid.id) individual = biodata.Individual( - dataset, row[b'name']) - individual.populateFromRow(row) - assert individual.getId() == row[b'id'] + dataset, individualRecord.name) + individual.populateFromRow(individualRecord) + assert individual.getId() == individualRecord.id dataset.addIndividual(individual) - def _createPhenotypeAssociationSetTable(self, cursor): - sql = """ - CREATE TABLE PhenotypeAssociationSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT, - datasetId TEXT NOT NULL, - dataUrl TEXT NOT NULL, - UNIQUE (datasetId, name), - FOREIGN KEY(datasetId) REFERENCES Dataset(id) - ON DELETE CASCADE - ); - """ - cursor.execute(sql) - - def _createRnaQuantificationSetTable(self, cursor): - sql = """ - CREATE TABLE RnaQuantificationSet ( - id TEXT NOT NULL PRIMARY KEY, - name TEXT NOT NULL, - datasetId TEXT NOT NULL, - referenceSetId TEXT NOT NULL, - info TEXT, - dataUrl TEXT NOT NULL, - UNIQUE (datasetId, name), - FOREIGN KEY(datasetId) REFERENCES Dataset(id) - ON DELETE CASCADE, - FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id) - ); - """ - cursor.execute(sql) + def _createPhenotypeAssociationSetTable(self): + self.database.create_table(m.Phenotypeassociationset) + + def _createRnaQuantificationSetTable(self): + self.database.create_table(m.Rnaquantificationset) def insertPhenotypeAssociationSet(self, phenotypeAssociationSet): """ - Inserts the specified individual into this repository. + Inserts the specified phenotype annotation set into this repository. """ # TODO add support for info and sourceUri fields. - sql = """ - INSERT INTO PhenotypeAssociationSet ( - id, name, datasetId, dataUrl ) - VALUES (?, ?, ?, ?) - """ - cursor = self._dbConnection.cursor() try: - cursor.execute(sql, ( - phenotypeAssociationSet.getId(), - phenotypeAssociationSet.getLocalId(), - phenotypeAssociationSet.getParentContainer().getId(), - phenotypeAssociationSet._dataUrl)) - except sqlite3.IntegrityError: + m.Phenotypeassociationset.create( + id=phenotypeAssociationSet.getId(), + name=phenotypeAssociationSet.getLocalId(), + datasetid=phenotypeAssociationSet.getParentContainer().getId(), + dataurl=phenotypeAssociationSet._dataUrl) + except Exception: raise exceptions.DuplicateNameException( phenotypeAssociationSet.getParentContainer().getId()) - def _readPhenotypeAssociationSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM PhenotypeAssociationSet;") - for row in cursor: - dataset = self.getDataset(row[b'datasetId']) + def _readPhenotypeAssociationSetTable(self): + for associationSetRecord in m.Phenotypeassociationset.select(): + dataset = self.getDataset(associationSetRecord.datasetid.id) phenotypeAssociationSet = \ genotype_phenotype.RdfPhenotypeAssociationSet( - dataset, row[b'name'], row[b'dataUrl']) + dataset, + associationSetRecord.name, + associationSetRecord.dataurl) dataset.addPhenotypeAssociationSet(phenotypeAssociationSet) def insertRnaQuantificationSet(self, rnaQuantificationSet): """ Inserts a the specified rnaQuantificationSet into this repository. """ - sql = """ - INSERT INTO RnaQuantificationSet ( - id, datasetId, referenceSetId, name, dataUrl) - VALUES (?, ?, ?, ?, ?) - """ - cursor = self._dbConnection.cursor() - cursor.execute(sql, ( - rnaQuantificationSet.getId(), - rnaQuantificationSet.getParentContainer().getId(), - rnaQuantificationSet.getReferenceSet().getId(), - rnaQuantificationSet.getLocalId(), - rnaQuantificationSet.getDataUrl())) - - def _readRnaQuantificationSetTable(self, cursor): - cursor.row_factory = sqlite3.Row - cursor.execute("SELECT * FROM RnaQuantificationSet;") - for row in cursor: - dataset = self.getDataset(row[b'datasetId']) - referenceSet = self.getReferenceSet(row[b'referenceSetId']) + try: + m.Rnaquantificationset.create( + id=rnaQuantificationSet.getId(), + datasetid=rnaQuantificationSet.getParentContainer().getId(), + referencesetid=rnaQuantificationSet.getReferenceSet().getId(), + name=rnaQuantificationSet.getLocalId(), + dataurl=rnaQuantificationSet.getDataUrl()) + except Exception: + raise exceptions.DuplicateNameException( + rnaQuantificationSet.getLocalId(), + rnaQuantificationSet.getParentContainer().getLocalId()) + + def _readRnaQuantificationSetTable(self): + for quantificationSetRecord in m.Rnaquantificationset.select(): + dataset = self.getDataset(quantificationSetRecord.datasetid.id) + referenceSet = self.getReferenceSet( + quantificationSetRecord.referencesetid.id) rnaQuantificationSet = \ rna_quantification.SqliteRnaQuantificationSet( - dataset, row[b'name']) + dataset, quantificationSetRecord.name) rnaQuantificationSet.setReferenceSet(referenceSet) - rnaQuantificationSet.populateFromRow(row) - assert rnaQuantificationSet.getId() == row[b'id'] + rnaQuantificationSet.populateFromRow(quantificationSetRecord) + assert rnaQuantificationSet.getId() == quantificationSetRecord.id dataset.addRnaQuantificationSet(rnaQuantificationSet) def removeRnaQuantificationSet(self, rnaQuantificationSet): @@ -1456,9 +1185,9 @@ def removeRnaQuantificationSet(self, rnaQuantificationSet): performs a cascading removal of all items within this rnaQuantificationSet. """ - sql = "DELETE FROM RnaQuantificationSet WHERE id=?" - cursor = self._dbConnection.cursor() - cursor.execute(sql, (rnaQuantificationSet.getId(),)) + q = m.Rnaquantificationset.delete.where( + m.Rnaquantificationset.id == rnaQuantificationSet.getId()) + q.execute() def initialise(self): """ @@ -1466,22 +1195,21 @@ def initialise(self): and file paths. """ self._checkWriteMode() - cursor = self._dbConnection - self._createSystemTable(cursor) - self._createOntologyTable(cursor) - self._createReferenceSetTable(cursor) - self._createReferenceTable(cursor) - self._createDatasetTable(cursor) - self._createReadGroupSetTable(cursor) - self._createReadGroupTable(cursor) - self._createCallSetTable(cursor) - self._createVariantSetTable(cursor) - self._createVariantAnnotationSetTable(cursor) - self._createFeatureSetTable(cursor) - self._createBiosampleTable(cursor) - self._createIndividualTable(cursor) - self._createPhenotypeAssociationSetTable(cursor) - self._createRnaQuantificationSetTable(cursor) + self._createSystemTable() + self._createOntologyTable() + self._createReferenceSetTable() + self._createReferenceTable() + self._createDatasetTable() + self._createReadGroupSetTable() + self._createReadGroupTable() + self._createCallSetTable() + self._createVariantSetTable() + self._createVariantAnnotationSetTable() + self._createFeatureSetTable() + self._createBiosampleTable() + self._createIndividualTable() + self._createPhenotypeAssociationSetTable() + self._createRnaQuantificationSetTable() def exists(self): """ @@ -1507,24 +1235,18 @@ def load(self): """ Loads this data repository into memory. """ - with sqlite3.connect(self._dbFilename) as db: - cursor = db.cursor() - try: - self._readSystemTable(cursor) - except (sqlite3.OperationalError, sqlite3.DatabaseError): - raise exceptions.RepoInvalidDatabaseException( - self._dbFilename) - self._readOntologyTable(cursor) - self._readReferenceSetTable(cursor) - self._readReferenceTable(cursor) - self._readDatasetTable(cursor) - self._readReadGroupSetTable(cursor) - self._readReadGroupTable(cursor) - self._readVariantSetTable(cursor) - self._readCallSetTable(cursor) - self._readVariantAnnotationSetTable(cursor) - self._readFeatureSetTable(cursor) - self._readBiosampleTable(cursor) - self._readIndividualTable(cursor) - self._readPhenotypeAssociationSetTable(cursor) - self._readRnaQuantificationSetTable(cursor) + self._readSystemTable() + self._readOntologyTable() + self._readReferenceSetTable() + self._readReferenceTable() + self._readDatasetTable() + self._readReadGroupSetTable() + self._readReadGroupTable() + self._readVariantSetTable() + self._readCallSetTable() + self._readVariantAnnotationSetTable() + self._readFeatureSetTable() + self._readBiosampleTable() + self._readIndividualTable() + self._readPhenotypeAssociationSetTable() + self._readRnaQuantificationSetTable() diff --git a/ga4gh/server/repo/models.py b/ga4gh/server/repo/models.py new file mode 100644 index 000000000..a18283611 --- /dev/null +++ b/ga4gh/server/repo/models.py @@ -0,0 +1,285 @@ +""" +peewee is a lightweight ORM with SQLite, postgresql, +and MySQL support. This file presents models for the +registry database. + +Partially auto-generated using pwiz. + + python -m pwiz -e sqlite ga4gh-example-data/registry.db > models.py + +For more on the peewee model API see: + +https://peewee.readthedocs.io/en/latest/peewee/models.html + +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import peewee as pw + +# The databaseProxy is used to dynamically changed the +# backing database and needs to be set to an actual +# database instance to use these models. +databaseProxy = pw.Proxy() + + +class SqliteDatabase(pw.SqliteDatabase): + def __init__(self, *_, **__): + super(SqliteDatabase, self).__init__(*_, **__) + + +class UnknownField(object): + def __init__(self, *_, **__): pass + + +class BaseModel(pw.Model): + class Meta: + database = databaseProxy + + +class Dataset(BaseModel): + description = pw.TextField(null=True) + id = pw.TextField(primary_key=True) + info = pw.TextField(null=True) + name = pw.TextField(unique=True) + + class Meta: + db_table = 'Dataset' + + +class Biosample(BaseModel): + created = pw.TextField(null=True) + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + description = pw.TextField(null=True) + disease = pw.TextField(null=True) + id = pw.TextField(primary_key=True) + individualid = pw.TextField(db_column='individualId', null=True) + info = pw.TextField(null=True) + name = pw.TextField() + updated = pw.TextField(null=True) + + class Meta: + db_table = 'Biosample' + indexes = ( + (('datasetid', 'name'), True), + ) + + +class Referenceset(BaseModel): + assemblyid = pw.TextField(db_column='assemblyId', null=True) + dataurl = pw.TextField(db_column='dataUrl') + description = pw.TextField(null=True) + id = pw.TextField(primary_key=True) + isderived = pw.IntegerField(db_column='isDerived', null=True) + md5checksum = pw.TextField(null=True) + name = pw.TextField(unique=True) + species = pw.TextField(db_column='species', null=True) + sourceaccessions = pw.TextField(db_column='sourceAccessions', null=True) + sourceuri = pw.TextField(db_column='sourceUri', null=True) + + class Meta: + db_table = 'ReferenceSet' + + +class Variantset(BaseModel): + created = pw.TextField(null=True) + dataurlindexmap = pw.TextField(db_column='dataUrlIndexMap') + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + id = pw.TextField(primary_key=True) + metadata = pw.TextField(null=True) + name = pw.TextField() + referencesetid = pw.ForeignKeyField( + db_column='referenceSetId', rel_model=Referenceset, to_field='id') + updated = pw.TextField(null=True) + + class Meta: + db_table = 'VariantSet' + indexes = ( + (('datasetid', 'name'), True), + ) + + +class Callset(BaseModel): + biosampleid = pw.TextField(db_column='biosampleId', null=True) + id = pw.TextField(primary_key=True) + name = pw.TextField() + variantsetid = pw.ForeignKeyField( + db_column='variantSetId', rel_model=Variantset, to_field='id') + + class Meta: + db_table = 'CallSet' + indexes = ( + (('variantsetid', 'name'), True), + ) + + +class Ontology(BaseModel): + dataurl = pw.TextField(db_column='dataUrl') + id = pw.TextField(primary_key=True) + name = pw.TextField(unique=True) + ontologyprefix = pw.TextField(db_column='ontologyPrefix') + + class Meta: + db_table = 'Ontology' + + +class Featureset(BaseModel): + dataurl = pw.TextField(db_column='dataUrl') + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + id = pw.TextField(primary_key=True) + info = pw.TextField(null=True) + name = pw.TextField() + ontologyid = pw.ForeignKeyField( + db_column='ontologyId', rel_model=Ontology, to_field='id') + referencesetid = pw.ForeignKeyField( + db_column='referenceSetId', rel_model=Referenceset, to_field='id') + sourceuri = pw.TextField( + db_column='sourceUri', null=True) + + class Meta: + db_table = 'FeatureSet' + indexes = ( + (('datasetid', 'name'), True), + ) + + +class Individual(BaseModel): + created = pw.TextField() + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + description = pw.TextField(null=True) + id = pw.TextField(primary_key=True) + info = pw.TextField(null=True) + name = pw.TextField(null=True) + sex = pw.TextField(null=True) + species = pw.TextField(null=True) + updated = pw.TextField(null=True) + + class Meta: + db_table = 'Individual' + indexes = ( + (('datasetid', 'name'), True), + ) + + +class Phenotypeassociationset(BaseModel): + dataurl = pw.TextField(db_column='dataUrl') + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + id = pw.TextField(primary_key=True) + name = pw.TextField(null=True) + + class Meta: + db_table = 'PhenotypeAssociationSet' + indexes = ( + (('datasetid', 'name'), True), + ) + + +class Readgroupset(BaseModel): + dataurl = pw.TextField(db_column='dataUrl') + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + id = pw.TextField(primary_key=True) + indexfile = pw.TextField(db_column='indexFile') + name = pw.TextField() + programs = pw.TextField(null=True) + referencesetid = pw.ForeignKeyField( + db_column='referenceSetId', rel_model=Referenceset, to_field='id') + stats = pw.TextField() + + class Meta: + db_table = 'ReadGroupSet' + indexes = ( + (('datasetid', 'name'), True), + ) + + +class Readgroup(BaseModel): + biosampleid = pw.TextField(db_column='biosampleId', null=True) + created = pw.TextField(null=True) + description = pw.TextField(null=True) + experiment = pw.TextField() + id = pw.TextField(primary_key=True) + name = pw.TextField() + predictedinsertsize = pw.IntegerField( + db_column='predictedInsertSize', null=True) + readgroupsetid = pw.ForeignKeyField( + db_column='readGroupSetId', rel_model=Readgroupset, to_field='id') + samplename = pw.TextField(db_column='sampleName', null=True) + stats = pw.TextField() + updated = pw.TextField(null=True) + + class Meta: + db_table = 'ReadGroup' + indexes = ( + (('readgroupsetid', 'name'), True), + ) + + +class Reference(BaseModel): + id = pw.TextField(null=True, primary_key=True) + isderived = pw.IntegerField(db_column='isDerived', null=True) + length = pw.IntegerField(null=True) + md5checksum = pw.TextField(null=True) + name = pw.TextField() + species = pw.TextField(db_column='species', null=True) + referencesetid = pw.ForeignKeyField( + db_column='referenceSetId', rel_model=Referenceset, to_field='id') + sourceaccessions = pw.TextField(db_column='sourceAccessions', null=True) + sourcedivergence = pw.FloatField(db_column='sourceDivergence', null=True) + sourceuri = pw.TextField(db_column='sourceUri', null=True) + + class Meta: + db_table = 'Reference' + indexes = ( + (('referencesetid', 'name'), True), + ) + + +class Rnaquantificationset(BaseModel): + dataurl = pw.TextField(db_column='dataUrl') + datasetid = pw.ForeignKeyField( + db_column='datasetId', rel_model=Dataset, to_field='id') + id = pw.TextField(primary_key=True) + info = pw.TextField(null=True) + name = pw.TextField() + referencesetid = pw.ForeignKeyField( + db_column='referenceSetId', rel_model=Referenceset, to_field='id') + + class Meta: + db_table = 'RnaQuantificationSet' + indexes = ( + (('datasetid', 'name'), True), + ) + + +class System(BaseModel): + key = pw.TextField(primary_key=True) + value = pw.TextField() + + class Meta: + db_table = 'System' + + +class Variantannotationset(BaseModel): + analysis = pw.TextField(null=True) + annotationtype = pw.TextField(db_column='annotationType', null=True) + created = pw.TextField(null=True) + id = pw.TextField(primary_key=True) + name = pw.TextField() + ontologyid = pw.ForeignKeyField( + db_column='ontologyId', rel_model=Ontology, to_field='id') + updated = pw.TextField(null=True) + variantsetid = pw.ForeignKeyField( + db_column='variantSetId', rel_model=Variantset, to_field='id') + + class Meta: + db_table = 'VariantAnnotationSet' + indexes = ( + (('variantsetid', 'name'), True), + ) diff --git a/requirements.txt b/requirements.txt index 2660fc4a1..a419cebfe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,6 +36,7 @@ Jinja2==2.7.3 future==0.15.2 pyjwkest==1.0.1 PyJWT==1.4.2 +peewee==2.8.5 ### This section is for the actual libraries ### # these libraries are imported in code that can be reached via diff --git a/scripts/prepare_compliance_data.py b/scripts/prepare_compliance_data.py index aa8d380d2..d9b87e49f 100644 --- a/scripts/prepare_compliance_data.py +++ b/scripts/prepare_compliance_data.py @@ -189,7 +189,6 @@ def run(self): hg00101Biosample.populateFromJson(jsonString.read()) hg00101Biosample.setIndividualId(hg00101Individual.getId()) self.repo.insertBiosample(hg00101Biosample) - readFiles = [ "brca1_HG00096.sam", "brca1_HG00099.sam", @@ -282,7 +281,6 @@ def run(self): dataset, "cgd", os.path.abspath(outputG2PPath)) self.repo.insertPhenotypeAssociationSet(phenotypeAssociationSet) - self.repo.commit() dataset.addFeatureSet(gencode) # RNA Quantification @@ -303,8 +301,6 @@ def run(self): rnaQuantificationSet.populateFromFile(os.path.abspath(rnaDbName)) self.repo.insertRnaQuantificationSet(rnaQuantificationSet) - self.repo.commit() - def addVariantSet( self, variantFileName, dataset, referenceSet, ontology, biosamples): @@ -326,6 +322,7 @@ def addVariantSet( if biosample.getLocalId() == callSet.getLocalId(): callSet.setBiosampleId(biosample.getId()) self.repo.insertVariantSet(variantSet) + for annotationSet in variantSet.getVariantAnnotationSets(): annotationSet.setOntology(ontology) self.repo.insertVariantAnnotationSet(annotationSet) diff --git a/tests/unit/test_imports.py b/tests/unit/test_imports.py index 8c72a5f07..6949f5220 100644 --- a/tests/unit/test_imports.py +++ b/tests/unit/test_imports.py @@ -202,6 +202,7 @@ class ImportGraphLayerChecker(object): ], 'repo': [ 'ga4gh/server/repo/rnaseq2ga.py', + 'ga4gh/server/repo/models.py', ], } diff --git a/tests/unit/test_repo_manager.py b/tests/unit/test_repo_manager.py index 78924775b..7194da877 100644 --- a/tests/unit/test_repo_manager.py +++ b/tests/unit/test_repo_manager.py @@ -240,7 +240,7 @@ def testSameName(self): cmd = "add-dataset {} {}".format(self._repoPath, name) self.runCommand(cmd) self.assertRaises( - exceptions.DuplicateNameException, self.runCommand, cmd) + exceptions.RepoManagerException, self.runCommand, cmd) class TestAddPhenotypeAssociationSet(AbstractRepoManagerTest): @@ -256,7 +256,7 @@ def testDefaults(self): def testSameName(self): self.addDataset() self.addPhenotypeAssociationSet() - with self.assertRaises(exceptions.DuplicateNameException): + with self.assertRaises(exceptions.RepoManagerException): self.addPhenotypeAssociationSet() @@ -336,7 +336,7 @@ def testWithSameName(self): self._repoPath, fastaFile) self.runCommand(cmd) self.assertRaises( - exceptions.DuplicateNameException, self.runCommand, cmd) + exceptions.RepoManagerException, self.runCommand, cmd) class TestAddOntology(AbstractRepoManagerTest): @@ -377,7 +377,7 @@ def testWithSameName(self): self._repoPath, ontologyFile) self.runCommand(cmd) self.assertRaises( - exceptions.DuplicateNameException, self.runCommand, cmd) + exceptions.RepoManagerException, self.runCommand, cmd) def testMissingFile(self): cmd = "add-ontology {} {}".format(self._repoPath, "/no/such/file") @@ -586,7 +586,7 @@ def testAddReadGroupSetWithSameName(self): self._referenceSetName) self.runCommand(cmd) self.assertRaises( - exceptions.DuplicateNameException, self.runCommand, cmd) + exceptions.RepoManagerException, self.runCommand, cmd) # Specified name name = "test_rgs" cmd = ( @@ -596,7 +596,7 @@ def testAddReadGroupSetWithSameName(self): self._referenceSetName, name) self.runCommand(cmd) self.assertRaises( - exceptions.DuplicateNameException, self.runCommand, cmd) + exceptions.RepoManagerException, self.runCommand, cmd) def testUrlWithMissingIndex(self): bamFile = "http://example.com/example.bam" @@ -694,7 +694,7 @@ def testAddVariantSetWithSameName(self): self._referenceSetName) self.runCommand(cmd) self.assertRaises( - exceptions.DuplicateNameException, self.runCommand, cmd) + exceptions.RepoManagerException, self.runCommand, cmd) # Specified name name = "test_vs" cmd = ( @@ -704,7 +704,7 @@ def testAddVariantSetWithSameName(self): self._referenceSetName, name) self.runCommand(cmd) self.assertRaises( - exceptions.DuplicateNameException, self.runCommand, cmd) + exceptions.RepoManagerException, self.runCommand, cmd) def testUrlWithMissingIndex(self): dataFile = "http://example.com/example.vcf.gz"