Skip to content

Commit

Permalink
Use ORM for datarepo (ga4gh#1485)
Browse files Browse the repository at this point in the history
* Use peewee models for datarepo

* Adding other data types

* Fix exception name for ontology

* Move models to their own file

* Add models to test imports

* Flake fixes

* Remove commits from prepare compliance data

* Adjust visual indent

* Add future imports
Write out short variable names, thanks @dcolligan

* Biosample refactor
Better exception reporting

* Remove short names

* Change which exception is expected
Flake fixes.
Fix requirements

* Lengthen variable names

* Add URL to peewee API

* Use updated client

* Set constraints back

* New species field replaces ncbitaxonid
Missed some biosample refactor
  • Loading branch information
david4096 authored Feb 2, 2017
1 parent f7a9990 commit 2afdfdb
Show file tree
Hide file tree
Showing 15 changed files with 756 additions and 746 deletions.
30 changes: 15 additions & 15 deletions ga4gh/server/datamodel/bio_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ def populateFromJson(self, jsonString):
self._info[key] = {"values": protocol.toJsonDict(parsed.info[key])}
return self

def populateFromRow(self, row):
def populateFromRow(self, biosampleRecord):
# TODO coerce to types
self._created = row[b'created']
self._updated = row[b'updated']
self._description = row[b'description']
self._disease = json.loads(row[b'disease'])
self._individualId = row[b'individualId']
self._info = json.loads(row[b'info'])
self._created = biosampleRecord.created
self._updated = biosampleRecord.updated
self._description = biosampleRecord.description
self._disease = json.loads(biosampleRecord.disease)
self._individualId = biosampleRecord.individualid
self._info = json.loads(biosampleRecord.info)
return self

def setIndividualId(self, individualId):
Expand Down Expand Up @@ -146,15 +146,15 @@ def toProtocolElement(self):
gaIndividual.info[key].values.add().string_value = value
return gaIndividual

def populateFromRow(self, row):
def populateFromRow(self, individualRecord):
# TODO coerce to types
self._name = row[b'name']
self._created = row[b'created']
self._updated = row[b'updated']
self._description = row[b'description']
self._species = json.loads(row[b'species'])
self._sex = json.loads(row[b'sex'])
self._info = json.loads(row[b'info'])
self._name = individualRecord.name
self._created = individualRecord.created
self._updated = individualRecord.updated
self._description = individualRecord.description
self._species = json.loads(individualRecord.species)
self._sex = json.loads(individualRecord.sex)
self._info = json.loads(individualRecord.info)
return self

def populateFromJson(self, jsonString):
Expand Down
6 changes: 3 additions & 3 deletions ga4gh/server/datamodel/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ def __init__(self, localId):
self._rnaQuantificationSetNameMap = {}
self._info = {}

def populateFromRow(self, row):
def populateFromRow(self, dataset):
"""
Populates the instance variables of this Dataset from the
specified database row.
"""
self._description = row[b'description']
self._info = json.loads(row[b'info'])
self._description = dataset.description
self._info = json.loads(dataset.info)

def setDescription(self, description):
"""
Expand Down
4 changes: 2 additions & 2 deletions ga4gh/server/datamodel/genotype_phenotype_featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ def __init__(self, parentContainer, localId):
parentContainer, localId)

# mimic featureset
def populateFromRow(self, row):
def populateFromRow(self, featureSetRecord):
"""
Populates the instance variables of this FeatureSet from the specified
DB row.
"""
self._dbFilePath = row[b'dataUrl']
self._dbFilePath = featureSetRecord.dataurl
self.populateFromFile(self._dbFilePath)

def populateFromFile(self, dataUrl):
Expand Down
6 changes: 3 additions & 3 deletions ga4gh/server/datamodel/ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ def populateFromFile(self, dataUrl):
self._dataUrl = dataUrl
self._readFile()

def populateFromRow(self, row):
def populateFromRow(self, ontologyRecord):
"""
Populates this Ontology using values in the specified DB row.
"""
self._id = row[b'id']
self._dataUrl = row[b'dataUrl']
self._id = ontologyRecord.id
self._dataUrl = ontologyRecord.dataurl
self._readFile()
# TODO sanity check the stored values against what we have just read.

Expand Down
25 changes: 13 additions & 12 deletions ga4gh/server/datamodel/reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,19 +382,19 @@ def getBamHeaderReferenceSetName(self):
"""
return self._bamHeaderReferenceSetName

def populateFromRow(self, row):
def populateFromRow(self, readGroupSetRecord):
"""
Populates the instance variables of this ReadGroupSet from the
specified database row.
"""
self._dataUrl = row[b'dataUrl']
self._indexFile = row[b'indexFile']
self._dataUrl = readGroupSetRecord.dataurl
self._indexFile = readGroupSetRecord.indexfile
self._programs = []
for jsonDict in json.loads(row[b'programs']):
for jsonDict in json.loads(readGroupSetRecord.programs):
program = protocol.fromJson(json.dumps(jsonDict),
protocol.Program)
self._programs.append(program)
stats = protocol.fromJson(row[b'stats'], protocol.ReadStats)
stats = protocol.fromJson(readGroupSetRecord.stats, protocol.ReadStats)
self._numAlignedReads = stats.aligned_read_count
self._numUnalignedReads = stats.unaligned_read_count

Expand Down Expand Up @@ -747,18 +747,19 @@ def populateFromHeader(self, readGroupHeader):
self._platformUnit = readGroupHeader.get('PU', None)
self._runTime = readGroupHeader.get('DT', None)

def populateFromRow(self, row):
def populateFromRow(self, readGroupRecord):
"""
Populate the instance variables using the specified DB row.
"""
self._sampleName = row[b'sampleName']
self._biosampleId = row[b'biosampleId']
self._description = row[b'description']
self._predictedInsertSize = row[b'predictedInsertSize']
stats = protocol.fromJson(row[b'stats'], protocol.ReadStats)
self._sampleName = readGroupRecord.samplename
self._biosampleId = readGroupRecord.biosampleid
self._description = readGroupRecord.description
self._predictedInsertSize = readGroupRecord.predictedinsertsize
stats = protocol.fromJson(readGroupRecord.stats, protocol.ReadStats)
self._numAlignedReads = stats.aligned_read_count
self._numUnalignedReads = stats.unaligned_read_count
experiment = protocol.fromJson(row[b'experiment'], protocol.Experiment)
experiment = protocol.fromJson(
readGroupRecord.experiment, protocol.Experiment)
self._instrumentModel = experiment.instrument_model
self._sequencingCenter = experiment.sequencing_center
self._experimentDescription = experiment.description
Expand Down
37 changes: 20 additions & 17 deletions ga4gh/server/datamodel/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,19 +481,21 @@ def populateFromFile(self, dataUrl):
reference.setLength(len(bases))
self.addReference(reference)

def populateFromRow(self, row):
def populateFromRow(self, referenceSetRecord):
"""
Populates this reference set from the values in the specified DB
row.
"""
self._dataUrl = row[b'dataUrl']
self._description = row[b'description']
self._assemblyId = row[b'assemblyId']
self._isDerived = bool(row[b'isDerived'])
self._md5checksum = row[b'md5checksum']
self._species = json.loads(row[b'species'])
self._sourceAccessions = json.loads(row[b'sourceAccessions'])
self._sourceUri = row[b'sourceUri']
self._dataUrl = referenceSetRecord.dataurl
self._description = referenceSetRecord.description
self._assemblyId = referenceSetRecord.assemblyid
self._isDerived = bool(referenceSetRecord.isderived)
self._md5checksum = referenceSetRecord.md5checksum
if referenceSetRecord.species is not None:
self.setSpeciesFromJson(referenceSetRecord.species)
self._sourceAccessions = json.loads(
referenceSetRecord.sourceaccessions)
self._sourceUri = referenceSetRecord.sourceuri

def getDataUrl(self):
"""
Expand All @@ -519,17 +521,18 @@ class HtslibReference(datamodel.PysamDatamodelMixin, AbstractReference):
def __init__(self, parentContainer, localId):
super(HtslibReference, self).__init__(parentContainer, localId)

def populateFromRow(self, row):
def populateFromRow(self, referenceRecord):
"""
Populates this reference from the values in the specified DB row.
"""
self._length = row[b'length']
self._isDerived = bool(row[b'isDerived'])
self._md5checksum = row[b'md5checksum']
self._species = json.loads(row[b'species'])
self._sourceAccessions = json.loads(row[b'sourceAccessions'])
self._sourceDivergence = row[b'sourceDivergence']
self._sourceUri = row[b'sourceUri']
self._length = referenceRecord.length
self._isDerived = bool(referenceRecord.isderived)
self._md5checksum = referenceRecord.md5checksum
if referenceRecord.species is not None:
self.setSpeciesFromJson(referenceRecord.species)
self._sourceAccessions = json.loads(referenceRecord.sourceaccessions)
self._sourceDivergence = referenceRecord.sourcedivergence
self._sourceUri = referenceRecord.sourceuri

def getBases(self, start, end):
self.checkQueryRange(start, end)
Expand Down
4 changes: 2 additions & 2 deletions ga4gh/server/datamodel/rna_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,12 @@ def populateFromFile(self, dataUrl):
self._db = SqliteRnaBackend(self._dbFilePath)
self.addRnaQuants()

def populateFromRow(self, row):
def populateFromRow(self, quantificationSetRecord):
"""
Populates the instance variables of this RnaQuantificationSet from the
specified DB row.
"""
self._dbFilePath = row[b'dataUrl']
self._dbFilePath = quantificationSetRecord.dataurl
self._db = SqliteRnaBackend(self._dbFilePath)
self.addRnaQuants()

Expand Down
4 changes: 2 additions & 2 deletions ga4gh/server/datamodel/sequence_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,12 +340,12 @@ def populateFromFile(self, dataUrl):
self._dbFilePath = dataUrl
self._db = Gff3DbBackend(self._dbFilePath)

def populateFromRow(self, row):
def populateFromRow(self, featureSetRecord):
"""
Populates the instance variables of this FeatureSet from the specified
DB row.
"""
self._dbFilePath = row[b'dataUrl']
self._dbFilePath = featureSetRecord.dataurl
self._db = Gff3DbBackend(self._dbFilePath)

def getDataUrl(self):
Expand Down
26 changes: 13 additions & 13 deletions ga4gh/server/datamodel/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,11 @@ def __init__(self, parentContainer, localId):
self._info = {}
self._biosampleId = None

def populateFromRow(self, row):
def populateFromRow(self, callSetRecord):
"""
Populates this CallSet from the specified DB row.
"""
# currently a noop
self._biosampleId = row[b'biosampleId']
self._biosampleId = callSetRecord.biosampleid

def toProtocolElement(self):
"""
Expand Down Expand Up @@ -459,19 +458,19 @@ def getDataUrlIndexPairs(self):
"""
return set(self._chromFileMap.values())

def populateFromRow(self, row):
def populateFromRow(self, variantSetRecord):
"""
Populates this VariantSet from the specified DB row.
"""
self._created = row[b'created']
self._updated = row[b'updated']
self._created = variantSetRecord.created
self._updated = variantSetRecord.updated
self._chromFileMap = {}
# We can't load directly as we want tuples to be stored
# rather than lists.
for key, value in json.loads(row[b'dataUrlIndexMap']).items():
for key, value in json.loads(variantSetRecord.dataurlindexmap).items():
self._chromFileMap[key] = tuple(value)
self._metadata = []
for jsonDict in json.loads(row[b'metadata']):
for jsonDict in json.loads(variantSetRecord.metadata):
metadata = protocol.fromJson(json.dumps(jsonDict),
protocol.VariantSetMetadata)
self._metadata.append(metadata)
Expand Down Expand Up @@ -1048,14 +1047,15 @@ def populateFromFile(self, varFile, annotationType):
self._creationTime = self._analysis.created
self._updatedTime = datetime.datetime.now().isoformat() + "Z"

def populateFromRow(self, row):
def populateFromRow(self, annotationSetRecord):
"""
Populates this VariantAnnotationSet from the specified DB row.
"""
self._annotationType = row[b'annotationType']
self._analysis = protocol.fromJson(row[b'analysis'], protocol.Analysis)
self._creationTime = row[b'created']
self._updatedTime = row[b'updated']
self._annotationType = annotationSetRecord.annotationtype
self._analysis = protocol.fromJson(
annotationSetRecord.analysis, protocol.Analysis)
self._creationTime = annotationSetRecord.created
self._updatedTime = annotationSetRecord.updated

def getAnnotationType(self):
"""
Expand Down
Loading

0 comments on commit 2afdfdb

Please sign in to comment.