From 2afdfdb18354be5e89a50b614a3224d24952b989 Mon Sep 17 00:00:00 2001
From: David Steinberg <david@resium.com>
Date: Wed, 1 Feb 2017 22:35:18 -0800
Subject: [PATCH] Use ORM for datarepo (#1485)

* Use peewee models for datarepo

* Adding other data types

* Fix exception name for ontology

* Move models to their own file

* Add models to test imports

* Flake fixes

* Remove commits from prepare compliance data

* Adjust visual indent

* Add future imports
Write out short variable names, thanks @dcolligan

* Biosample refactor
Better exception reporting

* Remove short names

* Change which exception is expected
Flake fixes.
Fix requirements

* Lengthen variable names

* Add URL to peewee API

* Use updated client

* Set constraints back

* New species field replaces ncbitaxonid
Missed some biosample refactor
---
 ga4gh/server/datamodel/bio_metadata.py        |   30 +-
 ga4gh/server/datamodel/datasets.py            |    6 +-
 .../genotype_phenotype_featureset.py          |    4 +-
 ga4gh/server/datamodel/ontologies.py          |    6 +-
 ga4gh/server/datamodel/reads.py               |   25 +-
 ga4gh/server/datamodel/references.py          |   37 +-
 ga4gh/server/datamodel/rna_quantification.py  |    4 +-
 .../server/datamodel/sequence_annotations.py  |    4 +-
 ga4gh/server/datamodel/variants.py            |   26 +-
 ga4gh/server/datarepo.py                      | 1052 ++++++-----------
 ga4gh/server/repo/models.py                   |  285 +++++
 requirements.txt                              |    1 +
 scripts/prepare_compliance_data.py            |    5 +-
 tests/unit/test_imports.py                    |    1 +
 tests/unit/test_repo_manager.py               |   16 +-
 15 files changed, 756 insertions(+), 746 deletions(-)
 create mode 100644 ga4gh/server/repo/models.py

diff --git a/ga4gh/server/datamodel/bio_metadata.py b/ga4gh/server/datamodel/bio_metadata.py
index e2d05876d..ecdcdb6e9 100644
--- a/ga4gh/server/datamodel/bio_metadata.py
+++ b/ga4gh/server/datamodel/bio_metadata.py
@@ -66,14 +66,14 @@ def populateFromJson(self, jsonString):
             self._info[key] = {"values": protocol.toJsonDict(parsed.info[key])}
         return self
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, biosampleRecord):
         # TODO coerce to types
-        self._created = row[b'created']
-        self._updated = row[b'updated']
-        self._description = row[b'description']
-        self._disease = json.loads(row[b'disease'])
-        self._individualId = row[b'individualId']
-        self._info = json.loads(row[b'info'])
+        self._created = biosampleRecord.created
+        self._updated = biosampleRecord.updated
+        self._description = biosampleRecord.description
+        self._disease = json.loads(biosampleRecord.disease)
+        self._individualId = biosampleRecord.individualid
+        self._info = json.loads(biosampleRecord.info)
         return self
 
     def setIndividualId(self, individualId):
@@ -146,15 +146,15 @@ def toProtocolElement(self):
                 gaIndividual.info[key].values.add().string_value = value
         return gaIndividual
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, individualRecord):
         # TODO coerce to types
-        self._name = row[b'name']
-        self._created = row[b'created']
-        self._updated = row[b'updated']
-        self._description = row[b'description']
-        self._species = json.loads(row[b'species'])
-        self._sex = json.loads(row[b'sex'])
-        self._info = json.loads(row[b'info'])
+        self._name = individualRecord.name
+        self._created = individualRecord.created
+        self._updated = individualRecord.updated
+        self._description = individualRecord.description
+        self._species = json.loads(individualRecord.species)
+        self._sex = json.loads(individualRecord.sex)
+        self._info = json.loads(individualRecord.info)
         return self
 
     def populateFromJson(self, jsonString):
diff --git a/ga4gh/server/datamodel/datasets.py b/ga4gh/server/datamodel/datasets.py
index 328469a81..eb8779926 100644
--- a/ga4gh/server/datamodel/datasets.py
+++ b/ga4gh/server/datamodel/datasets.py
@@ -60,13 +60,13 @@ def __init__(self, localId):
         self._rnaQuantificationSetNameMap = {}
         self._info = {}
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, dataset):
         """
         Populates the instance variables of this Dataset from the
         specified database row.
         """
-        self._description = row[b'description']
-        self._info = json.loads(row[b'info'])
+        self._description = dataset.description
+        self._info = json.loads(dataset.info)
 
     def setDescription(self, description):
         """
diff --git a/ga4gh/server/datamodel/genotype_phenotype_featureset.py b/ga4gh/server/datamodel/genotype_phenotype_featureset.py
index 77b000380..0db7c137d 100644
--- a/ga4gh/server/datamodel/genotype_phenotype_featureset.py
+++ b/ga4gh/server/datamodel/genotype_phenotype_featureset.py
@@ -44,12 +44,12 @@ def __init__(self, parentContainer, localId):
             parentContainer, localId)
 
     # mimic featureset
-    def populateFromRow(self, row):
+    def populateFromRow(self, featureSetRecord):
         """
         Populates the instance variables of this FeatureSet from the specified
         DB row.
         """
-        self._dbFilePath = row[b'dataUrl']
+        self._dbFilePath = featureSetRecord.dataurl
         self.populateFromFile(self._dbFilePath)
 
     def populateFromFile(self, dataUrl):
diff --git a/ga4gh/server/datamodel/ontologies.py b/ga4gh/server/datamodel/ontologies.py
index 170fec485..4b4946eee 100644
--- a/ga4gh/server/datamodel/ontologies.py
+++ b/ga4gh/server/datamodel/ontologies.py
@@ -74,12 +74,12 @@ def populateFromFile(self, dataUrl):
         self._dataUrl = dataUrl
         self._readFile()
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, ontologyRecord):
         """
         Populates this Ontology using values in the specified DB row.
         """
-        self._id = row[b'id']
-        self._dataUrl = row[b'dataUrl']
+        self._id = ontologyRecord.id
+        self._dataUrl = ontologyRecord.dataurl
         self._readFile()
         # TODO sanity check the stored values against what we have just read.
 
diff --git a/ga4gh/server/datamodel/reads.py b/ga4gh/server/datamodel/reads.py
index 826fcd9d5..4820ffcf8 100644
--- a/ga4gh/server/datamodel/reads.py
+++ b/ga4gh/server/datamodel/reads.py
@@ -382,19 +382,19 @@ def getBamHeaderReferenceSetName(self):
         """
         return self._bamHeaderReferenceSetName
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, readGroupSetRecord):
         """
         Populates the instance variables of this ReadGroupSet from the
         specified database row.
         """
-        self._dataUrl = row[b'dataUrl']
-        self._indexFile = row[b'indexFile']
+        self._dataUrl = readGroupSetRecord.dataurl
+        self._indexFile = readGroupSetRecord.indexfile
         self._programs = []
-        for jsonDict in json.loads(row[b'programs']):
+        for jsonDict in json.loads(readGroupSetRecord.programs):
             program = protocol.fromJson(json.dumps(jsonDict),
                                         protocol.Program)
             self._programs.append(program)
-        stats = protocol.fromJson(row[b'stats'], protocol.ReadStats)
+        stats = protocol.fromJson(readGroupSetRecord.stats, protocol.ReadStats)
         self._numAlignedReads = stats.aligned_read_count
         self._numUnalignedReads = stats.unaligned_read_count
 
@@ -747,18 +747,19 @@ def populateFromHeader(self, readGroupHeader):
         self._platformUnit = readGroupHeader.get('PU', None)
         self._runTime = readGroupHeader.get('DT', None)
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, readGroupRecord):
         """
         Populate the instance variables using the specified DB row.
         """
-        self._sampleName = row[b'sampleName']
-        self._biosampleId = row[b'biosampleId']
-        self._description = row[b'description']
-        self._predictedInsertSize = row[b'predictedInsertSize']
-        stats = protocol.fromJson(row[b'stats'], protocol.ReadStats)
+        self._sampleName = readGroupRecord.samplename
+        self._biosampleId = readGroupRecord.biosampleid
+        self._description = readGroupRecord.description
+        self._predictedInsertSize = readGroupRecord.predictedinsertsize
+        stats = protocol.fromJson(readGroupRecord.stats, protocol.ReadStats)
         self._numAlignedReads = stats.aligned_read_count
         self._numUnalignedReads = stats.unaligned_read_count
-        experiment = protocol.fromJson(row[b'experiment'], protocol.Experiment)
+        experiment = protocol.fromJson(
+            readGroupRecord.experiment, protocol.Experiment)
         self._instrumentModel = experiment.instrument_model
         self._sequencingCenter = experiment.sequencing_center
         self._experimentDescription = experiment.description
diff --git a/ga4gh/server/datamodel/references.py b/ga4gh/server/datamodel/references.py
index aa28a339d..a6ceef41e 100644
--- a/ga4gh/server/datamodel/references.py
+++ b/ga4gh/server/datamodel/references.py
@@ -481,19 +481,21 @@ def populateFromFile(self, dataUrl):
             reference.setLength(len(bases))
             self.addReference(reference)
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, referenceSetRecord):
         """
         Populates this reference set from the values in the specified DB
         row.
         """
-        self._dataUrl = row[b'dataUrl']
-        self._description = row[b'description']
-        self._assemblyId = row[b'assemblyId']
-        self._isDerived = bool(row[b'isDerived'])
-        self._md5checksum = row[b'md5checksum']
-        self._species = json.loads(row[b'species'])
-        self._sourceAccessions = json.loads(row[b'sourceAccessions'])
-        self._sourceUri = row[b'sourceUri']
+        self._dataUrl = referenceSetRecord.dataurl
+        self._description = referenceSetRecord.description
+        self._assemblyId = referenceSetRecord.assemblyid
+        self._isDerived = bool(referenceSetRecord.isderived)
+        self._md5checksum = referenceSetRecord.md5checksum
+        if referenceSetRecord.species is not None:
+            self.setSpeciesFromJson(referenceSetRecord.species)
+        self._sourceAccessions = json.loads(
+            referenceSetRecord.sourceaccessions)
+        self._sourceUri = referenceSetRecord.sourceuri
 
     def getDataUrl(self):
         """
@@ -519,17 +521,18 @@ class HtslibReference(datamodel.PysamDatamodelMixin, AbstractReference):
     def __init__(self, parentContainer, localId):
         super(HtslibReference, self).__init__(parentContainer, localId)
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, referenceRecord):
         """
         Populates this reference from the values in the specified DB row.
         """
-        self._length = row[b'length']
-        self._isDerived = bool(row[b'isDerived'])
-        self._md5checksum = row[b'md5checksum']
-        self._species = json.loads(row[b'species'])
-        self._sourceAccessions = json.loads(row[b'sourceAccessions'])
-        self._sourceDivergence = row[b'sourceDivergence']
-        self._sourceUri = row[b'sourceUri']
+        self._length = referenceRecord.length
+        self._isDerived = bool(referenceRecord.isderived)
+        self._md5checksum = referenceRecord.md5checksum
+        if referenceRecord.species is not None:
+            self.setSpeciesFromJson(referenceRecord.species)
+        self._sourceAccessions = json.loads(referenceRecord.sourceaccessions)
+        self._sourceDivergence = referenceRecord.sourcedivergence
+        self._sourceUri = referenceRecord.sourceuri
 
     def getBases(self, start, end):
         self.checkQueryRange(start, end)
diff --git a/ga4gh/server/datamodel/rna_quantification.py b/ga4gh/server/datamodel/rna_quantification.py
index 75b17123c..545615a2c 100644
--- a/ga4gh/server/datamodel/rna_quantification.py
+++ b/ga4gh/server/datamodel/rna_quantification.py
@@ -179,12 +179,12 @@ def populateFromFile(self, dataUrl):
         self._db = SqliteRnaBackend(self._dbFilePath)
         self.addRnaQuants()
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, quantificationSetRecord):
         """
         Populates the instance variables of this RnaQuantificationSet from the
         specified DB row.
         """
-        self._dbFilePath = row[b'dataUrl']
+        self._dbFilePath = quantificationSetRecord.dataurl
         self._db = SqliteRnaBackend(self._dbFilePath)
         self.addRnaQuants()
 
diff --git a/ga4gh/server/datamodel/sequence_annotations.py b/ga4gh/server/datamodel/sequence_annotations.py
index 1c6255b51..b3e1ab272 100644
--- a/ga4gh/server/datamodel/sequence_annotations.py
+++ b/ga4gh/server/datamodel/sequence_annotations.py
@@ -340,12 +340,12 @@ def populateFromFile(self, dataUrl):
         self._dbFilePath = dataUrl
         self._db = Gff3DbBackend(self._dbFilePath)
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, featureSetRecord):
         """
         Populates the instance variables of this FeatureSet from the specified
         DB row.
         """
-        self._dbFilePath = row[b'dataUrl']
+        self._dbFilePath = featureSetRecord.dataurl
         self._db = Gff3DbBackend(self._dbFilePath)
 
     def getDataUrl(self):
diff --git a/ga4gh/server/datamodel/variants.py b/ga4gh/server/datamodel/variants.py
index 41ec2521b..71cb7be84 100644
--- a/ga4gh/server/datamodel/variants.py
+++ b/ga4gh/server/datamodel/variants.py
@@ -49,12 +49,11 @@ def __init__(self, parentContainer, localId):
         self._info = {}
         self._biosampleId = None
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, callSetRecord):
         """
         Populates this CallSet from the specified DB row.
         """
-        # currently a noop
-        self._biosampleId = row[b'biosampleId']
+        self._biosampleId = callSetRecord.biosampleid
 
     def toProtocolElement(self):
         """
@@ -459,19 +458,19 @@ def getDataUrlIndexPairs(self):
         """
         return set(self._chromFileMap.values())
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, variantSetRecord):
         """
         Populates this VariantSet from the specified DB row.
         """
-        self._created = row[b'created']
-        self._updated = row[b'updated']
+        self._created = variantSetRecord.created
+        self._updated = variantSetRecord.updated
         self._chromFileMap = {}
         # We can't load directly as we want tuples to be stored
         # rather than lists.
-        for key, value in json.loads(row[b'dataUrlIndexMap']).items():
+        for key, value in json.loads(variantSetRecord.dataurlindexmap).items():
             self._chromFileMap[key] = tuple(value)
         self._metadata = []
-        for jsonDict in json.loads(row[b'metadata']):
+        for jsonDict in json.loads(variantSetRecord.metadata):
             metadata = protocol.fromJson(json.dumps(jsonDict),
                                          protocol.VariantSetMetadata)
             self._metadata.append(metadata)
@@ -1048,14 +1047,15 @@ def populateFromFile(self, varFile, annotationType):
         self._creationTime = self._analysis.created
         self._updatedTime = datetime.datetime.now().isoformat() + "Z"
 
-    def populateFromRow(self, row):
+    def populateFromRow(self, annotationSetRecord):
         """
         Populates this VariantAnnotationSet from the specified DB row.
         """
-        self._annotationType = row[b'annotationType']
-        self._analysis = protocol.fromJson(row[b'analysis'], protocol.Analysis)
-        self._creationTime = row[b'created']
-        self._updatedTime = row[b'updated']
+        self._annotationType = annotationSetRecord.annotationtype
+        self._analysis = protocol.fromJson(
+            annotationSetRecord.analysis, protocol.Analysis)
+        self._creationTime = annotationSetRecord.created
+        self._updatedTime = annotationSetRecord.updated
 
     def getAnnotationType(self):
         """
diff --git a/ga4gh/server/datarepo.py b/ga4gh/server/datarepo.py
index 16194fdc1..ee9ac7dd8 100644
--- a/ga4gh/server/datarepo.py
+++ b/ga4gh/server/datarepo.py
@@ -8,6 +8,7 @@
 import json
 import os
 import sqlite3
+import datetime
 
 import ga4gh.server.datamodel as datamodel
 import ga4gh.server.datamodel.datasets as datasets
@@ -21,8 +22,9 @@
 import ga4gh.server.datamodel.genotype_phenotype_featureset as g2pFeatureset
 import ga4gh.server.datamodel.rna_quantification as rna_quantification
 import ga4gh.server.exceptions as exceptions
+import ga4gh.server.protocol as protocol
 
-from ga4gh.server import protocol
+import repo.models as m
 
 MODE_READ = 'r'
 MODE_WRITE = 'w'
@@ -465,6 +467,8 @@ def __init__(self, fileName):
         self._creationTimeStamp = None
         # Connection to the DB.
         self._dbConnection = None
+        self.database = m.SqliteDatabase(self._dbFilename, **{})
+        m.databaseProxy.initialize(self.database)
 
     def _checkWriteMode(self):
         if self._openMode != MODE_WRITE:
@@ -489,11 +493,6 @@ def open(self, mode=MODE_READ):
         self._openMode = mode
         if mode == MODE_READ:
             self.assertExists()
-        self._safeConnect()
-        # Turn on foreign key constraints.
-        cursor = self._dbConnection.cursor()
-        cursor.execute("PRAGMA foreign_keys = ON;")
-        self._dbConnection.commit()
         if mode == MODE_READ:
             # This is part of the transitional behaviour where
             # we load the whole DB into memory to get access to
@@ -506,7 +505,6 @@ def commit(self):
         this function if the repo is not opened in write-mode.
         """
         self._checkWriteMode()
-        self._dbConnection.commit()
 
     def close(self):
         """
@@ -515,8 +513,6 @@ def close(self):
         if self._openMode is None:
             raise ValueError("Repo already closed")
         self._openMode = None
-        self._dbConnection.close()
-        self._dbConnection = None
 
     def verify(self):
         """
@@ -618,390 +614,268 @@ def _safeConnect(self):
             # raised e.g. when directory passed as dbFilename
             raise exceptions.RepoInvalidDatabaseException(self._dbFilename)
 
-    def _createSystemTable(self, cursor):
-        sql = """
-            CREATE TABLE System (
-                key TEXT NOT NULL PRIMARY KEY,
-                value TEXT NOT NULL
-            );
-        """
-        cursor.execute(sql)
-        cursor.execute(
-            "INSERT INTO System VALUES "
-            "('{}', '{}')".format(
-                self.systemKeySchemaVersion, self.version))
-        cursor.execute(
-            "INSERT INTO System VALUES ('{}', datetime('now'))".format(
-                self.systemKeyCreationTimeStamp))
-
-    def _readSystemTable(self, cursor):
-        sql = "SELECT key, value FROM System;"
-        cursor.execute(sql)
-        config = {}
-        for row in cursor:
-            config[row[0]] = row[1]
-        row = cursor.fetchone()
-        self._schemaVersion = config[self.systemKeySchemaVersion]
-        self._creationTimeStamp = config[self.systemKeyCreationTimeStamp]
+    def _createSystemTable(self):
+        self.database.create_table(m.System)
+        m.System.create(
+            key=self.systemKeySchemaVersion, value=self.version)
+        m.System.create(
+            key=self.systemKeyCreationTimeStamp, value=datetime.datetime.now())
+
+    def _readSystemTable(self):
+        if not self.exists():
+            raise exceptions.RepoNotFoundException(
+                self._dbFilename)
+        try:
+            self._schemaVersion = m.System.get(
+                m.System.key == self.systemKeySchemaVersion).value
+            self._creationTimeStamp = m.System.get(
+                m.System.key == self.systemKeyCreationTimeStamp).value
+        except Exception:
+            raise exceptions.RepoInvalidDatabaseException(self._dbFilename)
         schemaVersion = self.SchemaVersion(self._schemaVersion)
         if schemaVersion.major != self.version.major:
             raise exceptions.RepoSchemaVersionMismatchException(
                 schemaVersion, self.version)
 
-    def _createOntologyTable(self, cursor):
-        sql = """
-            CREATE TABLE Ontology(
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                dataUrl TEXT NOT NULL,
-                ontologyPrefix TEXT NOT NULL,
-                UNIQUE (name)
-            );
-        """
-        cursor.execute(sql)
+    def _createOntologyTable(self):
+        self.database.create_table(m.Ontology)
 
     def insertOntology(self, ontology):
         """
         Inserts the specified ontology into this repository.
         """
-        sql = """
-            INSERT INTO Ontology(id, name, dataUrl, ontologyPrefix)
-            VALUES (?, ?, ?, ?);
-        """
-        cursor = self._dbConnection.cursor()
+        try:
+            m.Ontology.create(
+                    id=ontology.getName(),
+                    name=ontology.getName(),
+                    dataurl=ontology.getDataUrl(),
+                    ontologyprefix=ontology.getOntologyPrefix())
+        except Exception:
+            raise exceptions.DuplicateNameException(
+                ontology.getName())
         # TODO we need to create a proper ID when we're doing ID generation
         # for the rest of the container objects.
-        try:
-            cursor.execute(sql, (
-                ontology.getName(),
-                ontology.getName(),
-                ontology.getDataUrl(),
-                ontology.getOntologyPrefix()))
-        except sqlite3.IntegrityError:
-            raise exceptions.DuplicateNameException(ontology.getName())
-
-    def _readOntologyTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM Ontology;")
-        for row in cursor:
-            ontology = ontologies.Ontology(row[b'name'])
-            ontology.populateFromRow(row)
+
+    def _readOntologyTable(self):
+        for ont in m.Ontology.select():
+            ontology = ontologies.Ontology(ont.name)
+            ontology.populateFromRow(ont)
             self.addOntology(ontology)
 
     def removeOntology(self, ontology):
         """
         Removes the specified ontology term map from this repository.
         """
-        sql = "DELETE FROM Ontology WHERE name=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (ontology.getName(),))
-
-    def _createReferenceTable(self, cursor):
-        sql = """
-            CREATE TABLE Reference (
-                id TEXT PRIMARY KEY,
-                name TEXT NOT NULL,
-                referenceSetId TEXT NOT NULL,
-                length INTEGER,
-                isDerived INTEGER,
-                md5checksum TEXT,
-                species TEXT,
-                sourceAccessions TEXT,
-                sourceDivergence REAL,
-                sourceUri TEXT,
-                UNIQUE (referenceSetId, name),
-                FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id)
-                    ON DELETE CASCADE
-            );
-        """
-        cursor.execute(sql)
+        q = m.Ontology.delete().where(id == ontology.getId())
+        q.execute()
+
+    def _createReferenceTable(self):
+        self.database.create_table(m.Reference)
 
     def insertReference(self, reference):
         """
         Inserts the specified reference into this repository.
         """
-        sql = """
-            INSERT INTO Reference (
-                id, referenceSetId, name, length, isDerived, md5checksum,
-                species, sourceAccessions, sourceUri)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
-        """
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (
-            reference.getId(), reference.getParentContainer().getId(),
-            reference.getLocalId(), reference.getLength(),
-            reference.getIsDerived(), reference.getMd5Checksum(),
-            json.dumps(reference.getSpecies()),
-            # We store the list of sourceAccessions as a JSON string. Perhaps
-            # this should be another table?
-            json.dumps(reference.getSourceAccessions()),
-            reference.getSourceUri()))
-
-    def _readReferenceTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM Reference;")
-        for row in cursor:
-            referenceSet = self.getReferenceSet(row[b'referenceSetId'])
-            reference = references.HtslibReference(referenceSet, row[b'name'])
-            reference.populateFromRow(row)
-            assert reference.getId() == row[b"id"]
+        m.Reference.create(
+            id=reference.getId(),
+            referencesetid=reference.getParentContainer().getId(),
+            name=reference.getLocalId(),
+            length=reference.getLength(),
+            isderived=reference.getIsDerived(),
+            species=reference.getSpecies(),
+            md5checksum=reference.getMd5Checksum(),
+            sourceaccessions=json.dumps(reference.getSourceAccessions()),
+            sourceuri=reference.getSourceUri())
+
+    def _readReferenceTable(self):
+        for referenceRecord in m.Reference.select():
+            referenceSet = self.getReferenceSet(
+                referenceRecord.referencesetid.id)
+            reference = references.HtslibReference(
+                referenceSet, referenceRecord.name)
+            reference.populateFromRow(referenceRecord)
+            assert reference.getId() == referenceRecord.id
             referenceSet.addReference(reference)
 
-    def _createReferenceSetTable(self, cursor):
-        sql = """
-            CREATE TABLE ReferenceSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                description TEXT,
-                assemblyId TEXT,
-                isDerived INTEGER,
-                md5checksum TEXT,
-                species TEXT,
-                sourceAccessions TEXT,
-                sourceUri TEXT,
-                dataUrl TEXT NOT NULL,
-                UNIQUE (name)
-            );
-        """
-        cursor.execute(sql)
+    def _createReferenceSetTable(self):
+        self.database.create_table(m.Referenceset)
 
     def insertReferenceSet(self, referenceSet):
         """
         Inserts the specified referenceSet into this repository.
         """
-        sql = """
-            INSERT INTO ReferenceSet (
-                id, name, description, assemblyId, isDerived, md5checksum,
-                species, sourceAccessions, sourceUri, dataUrl)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
-        """
-        cursor = self._dbConnection.cursor()
         try:
-            cursor.execute(sql, (
-                referenceSet.getId(), referenceSet.getLocalId(),
-                referenceSet.getDescription(), referenceSet.getAssemblyId(),
-                referenceSet.getIsDerived(), referenceSet.getMd5Checksum(),
-                json.dumps(referenceSet.getSpecies(), protocol.OntologyTerm),
-                # We store the list of sourceAccessions as a JSON string.
-                # Perhaps this should be another table?
-                json.dumps(referenceSet.getSourceAccessions()),
-                referenceSet.getSourceUri(), referenceSet.getDataUrl()))
-        except sqlite3.IntegrityError:
-            raise exceptions.DuplicateNameException(referenceSet.getLocalId())
-        for reference in referenceSet.getReferences():
-            self.insertReference(reference)
-
-    def _readReferenceSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM ReferenceSet;")
-        for row in cursor:
-            referenceSet = references.HtslibReferenceSet(row[b'name'])
-            referenceSet.populateFromRow(row)
-            assert referenceSet.getId() == row[b"id"]
+            m.Referenceset.create(
+                id=referenceSet.getId(),
+                name=referenceSet.getLocalId(),
+                description=referenceSet.getDescription(),
+                assemblyid=referenceSet.getAssemblyId(),
+                isderived=referenceSet.getIsDerived(),
+                species=referenceSet.getSpecies(),
+                md5checksum=referenceSet.getMd5Checksum(),
+                sourceaccessions=json.dumps(
+                    referenceSet.getSourceAccessions()),
+                sourceuri=referenceSet.getSourceUri(),
+                dataurl=referenceSet.getDataUrl())
+            for reference in referenceSet.getReferences():
+                self.insertReference(reference)
+        except Exception:
+            raise exceptions.DuplicateNameException(
+                referenceSet.getLocalId())
+
+    def _readReferenceSetTable(self):
+        for referenceSetRecord in m.Referenceset.select():
+            referenceSet = references.HtslibReferenceSet(
+                referenceSetRecord.name)
+            referenceSet.populateFromRow(referenceSetRecord)
+            assert referenceSet.getId() == referenceSetRecord.id
             # Insert the referenceSet into the memory-based object model.
             self.addReferenceSet(referenceSet)
 
-    def _createDatasetTable(self, cursor):
-        sql = """
-            CREATE TABLE Dataset (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                description TEXT,
-                info TEXT,
-                UNIQUE (name)
-            );
-        """
-        cursor.execute(sql)
+    def _createDatasetTable(self):
+        self.database.create_table(m.Dataset)
 
     def insertDataset(self, dataset):
         """
         Inserts the specified dataset into this repository.
         """
-        sql = """
-            INSERT INTO Dataset (id, name, description, info)
-            VALUES (?, ?, ?, ?);
-        """
-        cursor = self._dbConnection.cursor()
         try:
-            cursor.execute(sql, (
-                dataset.getId(), dataset.getLocalId(),
-                dataset.getDescription(),
-                json.dumps(dataset.getInfo())))
-        except sqlite3.IntegrityError:
-            raise exceptions.DuplicateNameException(dataset.getLocalId())
+            m.Dataset.create(
+                id=dataset.getId(),
+                name=dataset.getLocalId(),
+                description=dataset.getDescription(),
+                info=json.dumps(dataset.getInfo()))
+        except Exception:
+            raise exceptions.DuplicateNameException(
+                dataset.getLocalId())
 
     def removeDataset(self, dataset):
         """
         Removes the specified dataset from this repository. This performs
         a cascading removal of all items within this dataset.
         """
-        sql = "DELETE FROM Dataset WHERE id=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (dataset.getId(),))
+        for datasetRecord in m.Dataset.select().where(
+                        m.Dataset.id == dataset.getId()):
+            datasetRecord.delete_instance(recursive=True)
 
     def removePhenotypeAssociationSet(self, phenotypeAssociationSet):
         """
         Remove a phenotype association set from the repo
         """
-        sql = "DELETE FROM PhenotypeAssociationSet WHERE id=? "
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (phenotypeAssociationSet.getId(),))
+        q = m.Phenotypeassociationset.delete().where(
+            m.Phenotypeassociationset.id == phenotypeAssociationSet.getId())
+        q.execute()
 
     def removeFeatureSet(self, featureSet):
         """
         Removes the specified featureSet from this repository.
         """
-        sql = "DELETE FROM FeatureSet WHERE id=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (featureSet.getId(),))
-
-    def _readDatasetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM Dataset;")
-        for row in cursor:
-            dataset = datasets.Dataset(row[b'name'])
-            dataset.populateFromRow(row)
-            assert dataset.getId() == row[b"id"]
+        q = m.Featureset.delete().where(
+            m.Featureset.id == featureSet.getId())
+        q.execute()
+
+    def _readDatasetTable(self):
+        for datasetRecord in m.Dataset.select():
+            dataset = datasets.Dataset(datasetRecord.name)
+            dataset.populateFromRow(datasetRecord)
+            assert dataset.getId() == datasetRecord.id
             # Insert the dataset into the memory-based object model.
             self.addDataset(dataset)
 
-    def _createReadGroupTable(self, cursor):
-        sql = """
-            CREATE TABLE ReadGroup (
-                id TEXT NOT NULL PRIMARY KEY,
-                readGroupSetId TEXT NOT NULL,
-                name TEXT NOT NULL,
-                predictedInsertSize INTEGER,
-                sampleName TEXT,
-                description TEXT,
-                stats TEXT NOT NULL,
-                experiment TEXT NOT NULL,
-                biosampleId TEXT,
-                created TEXT,
-                updated TEXT,
-                UNIQUE (readGroupSetId, name),
-                FOREIGN KEY(readGroupSetId) REFERENCES ReadGroupSet(id)
-                    ON DELETE CASCADE
-            );
-        """
-        cursor.execute(sql)
+    def _createReadGroupTable(self):
+        self.database.create_table(m.Readgroup)
 
     def insertReadGroup(self, readGroup):
         """
         Inserts the specified readGroup into the DB.
         """
-        sql = """
-            INSERT INTO ReadGroup (
-                id, readGroupSetId, name, predictedInsertSize,
-                sampleName, description, stats, experiment,
-                biosampleId, created, updated)
-            VALUES
-                (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'));
-        """
-        cursor = self._dbConnection.cursor()
         statsJson = json.dumps(protocol.toJsonDict(readGroup.getStats()))
         experimentJson = json.dumps(
             protocol.toJsonDict(readGroup.getExperiment()))
-        cursor.execute(sql, (
-            readGroup.getId(), readGroup.getParentContainer().getId(),
-            readGroup.getLocalId(), readGroup.getPredictedInsertSize(),
-            readGroup.getSampleName(), readGroup.getDescription(),
-            statsJson, experimentJson, readGroup.getBiosampleId()))
+        try:
+            m.Readgroup.create(
+                id=readGroup.getId(),
+                readgroupsetid=readGroup.getParentContainer().getId(),
+                name=readGroup.getLocalId(),
+                predictedinsertedsize=readGroup.getPredictedInsertSize(),
+                samplename=readGroup.getSampleName(),
+                description=readGroup.getDescription(),
+                stats=statsJson,
+                experiment=experimentJson,
+                biosampleid=readGroup.getBiosampleId())
+        except Exception as e:
+            raise exceptions.RepoManagerException(
+                e)
 
     def removeReadGroupSet(self, readGroupSet):
         """
         Removes the specified readGroupSet from this repository. This performs
         a cascading removal of all items within this readGroupSet.
         """
-        sql = "DELETE FROM ReadGroupSet WHERE id=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (readGroupSet.getId(),))
+        for readGroupSetRecord in m.Readgroupset.select().where(
+                        m.Readgroupset.id == readGroupSet.getId()):
+            readGroupSetRecord.delete_instance(recursive=True)
 
     def removeVariantSet(self, variantSet):
         """
         Removes the specified variantSet from this repository. This performs
         a cascading removal of all items within this variantSet.
         """
-        sql = "DELETE FROM VariantSet WHERE id=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (variantSet.getId(),))
+        for variantSetRecord in m.Variantset.select().where(
+                        m.Variantset.id == variantSet.getId()):
+            variantSetRecord.delete_instance(recursive=True)
 
     def removeBiosample(self, biosample):
         """
         Removes the specified biosample from this repository.
         """
-        sql = "DELETE FROM Biosample WHERE id=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (biosample.getId(),))
+        q = m.Biosample.delete().where(m.Biosample.id == biosample.getId())
+        q.execute()
 
     def removeIndividual(self, individual):
         """
         Removes the specified individual from this repository.
         """
-        sql = "DELETE FROM Individual WHERE id=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (individual.getId(),))
+        q = m.Individual.delete().where(m.Individual.id == individual.getId())
+        q.execute()
 
-    def _readReadGroupTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM ReadGroup;")
-        for row in cursor:
-            readGroupSet = self.getReadGroupSet(row[b'readGroupSetId'])
-            readGroup = reads.HtslibReadGroup(readGroupSet, row[b'name'])
+    def _readReadGroupTable(self):
+        for readGroupRecord in m.Readgroup.select():
+            readGroupSet = self.getReadGroupSet(
+                readGroupRecord.readgroupsetid.id)
+            readGroup = reads.HtslibReadGroup(
+                readGroupSet, readGroupRecord.name)
             # TODO set the reference set.
-            readGroup.populateFromRow(row)
-            assert readGroup.getId() == row[b'id']
+            readGroup.populateFromRow(readGroupRecord)
+            assert readGroup.getId() == readGroupRecord.id
             # Insert the readGroupSet into the memory-based object model.
             readGroupSet.addReadGroup(readGroup)
 
-    def _createReadGroupSetTable(self, cursor):
-        sql = """
-            CREATE TABLE ReadGroupSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                datasetId TEXT NOT NULL,
-                referenceSetId TEXT NOT NULL,
-                programs TEXT,
-                stats TEXT NOT NULL,
-                dataUrl TEXT NOT NULL,
-                indexFile TEXT NOT NULL,
-                UNIQUE (datasetId, name),
-                FOREIGN KEY(datasetId) REFERENCES Dataset(id)
-                    ON DELETE CASCADE,
-                FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id)
-            );
-        """
-        cursor.execute(sql)
+    def _createReadGroupSetTable(self):
+        self.database.create_table(m.Readgroupset)
 
     def insertReadGroupSet(self, readGroupSet):
         """
         Inserts a the specified readGroupSet into this repository.
         """
-        sql = """
-            INSERT INTO ReadGroupSet (
-                id, datasetId, referenceSetId, name, programs, stats,
-                dataUrl, indexFile)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?);
-        """
         programsJson = json.dumps(
             [protocol.toJsonDict(program) for program in
              readGroupSet.getPrograms()])
         statsJson = json.dumps(protocol.toJsonDict(readGroupSet.getStats()))
-        cursor = self._dbConnection.cursor()
         try:
-            cursor.execute(sql, (
-                readGroupSet.getId(),
-                readGroupSet.getParentContainer().getId(),
-                readGroupSet.getReferenceSet().getId(),
-                readGroupSet.getLocalId(),
-                programsJson, statsJson, readGroupSet.getDataUrl(),
-                readGroupSet.getIndexFile()))
-        except sqlite3.IntegrityError:
-            raise exceptions.DuplicateNameException(
-                readGroupSet.getLocalId(),
-                readGroupSet.getParentContainer().getLocalId())
-        for readGroup in readGroupSet.getReadGroups():
-            self.insertReadGroup(readGroup)
+            m.Readgroupset.create(
+                id=readGroupSet.getId(),
+                datasetid=readGroupSet.getParentContainer().getId(),
+                referencesetid=readGroupSet.getReferenceSet().getId(),
+                name=readGroupSet.getLocalId(),
+                programs=programsJson,
+                stats=statsJson,
+                dataurl=readGroupSet.getDataUrl(),
+                indexfile=readGroupSet.getIndexFile())
+            for readGroup in readGroupSet.getReadGroups():
+                self.insertReadGroup(readGroup)
+        except Exception as e:
+            raise exceptions.RepoManagerException(e)
 
     def removeReferenceSet(self, referenceSet):
         """
@@ -1011,157 +885,100 @@ def removeReferenceSet(self, referenceSet):
         refer to this ReferenceSet. These must be deleted before the
         referenceSet can be removed.
         """
-        sql = "DELETE FROM ReferenceSet WHERE id=?"
-        cursor = self._dbConnection.cursor()
         try:
-            cursor.execute(sql, (referenceSet.getId(),))
-        except sqlite3.IntegrityError:
+            q = m.Reference.delete().where(
+                    m.Reference.referencesetid == referenceSet.getId())
+            q.execute()
+            q = m.Referenceset.delete().where(
+                    m.Referenceset.id == referenceSet.getId())
+            q.execute()
+        except Exception:
             msg = ("Unable to delete reference set.  "
                    "There are objects currently in the registry which are "
                    "aligned against it.  Remove these objects before removing "
                    "the reference set.")
             raise exceptions.RepoManagerException(msg)
 
-    def _readReadGroupSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM ReadGroupSet;")
-        for row in cursor:
-            dataset = self.getDataset(row[b'datasetId'])
-            readGroupSet = reads.HtslibReadGroupSet(dataset, row[b'name'])
-            referenceSet = self.getReferenceSet(row[b'referenceSetId'])
+    def _readReadGroupSetTable(self):
+        for readGroupSetRecord in m.Readgroupset.select():
+            dataset = self.getDataset(readGroupSetRecord.datasetid.id)
+            readGroupSet = reads.HtslibReadGroupSet(
+                dataset, readGroupSetRecord.name)
+            referenceSet = self.getReferenceSet(
+                readGroupSetRecord.referencesetid.id)
             readGroupSet.setReferenceSet(referenceSet)
-            readGroupSet.populateFromRow(row)
-            assert readGroupSet.getId() == row[b'id']
+            readGroupSet.populateFromRow(readGroupSetRecord)
+            assert readGroupSet.getId() == readGroupSetRecord.id
             # Insert the readGroupSet into the memory-based object model.
             dataset.addReadGroupSet(readGroupSet)
 
-    def _createVariantAnnotationSetTable(self, cursor):
-        sql = """
-            CREATE TABLE VariantAnnotationSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                variantSetId TEXT NOT NULL,
-                ontologyId TEXT NOT NULL,
-                analysis TEXT,
-                annotationType TEXT,
-                created TEXT,
-                updated TEXT,
-                UNIQUE (variantSetId, name),
-                FOREIGN KEY(variantSetId) REFERENCES VariantSet(id)
-                    ON DELETE CASCADE,
-                FOREIGN KEY(ontologyId) REFERENCES Ontology(id)
-            );
-        """
-        cursor.execute(sql)
+    def _createVariantAnnotationSetTable(self):
+        self.database.create_table(m.Variantannotationset)
 
     def insertVariantAnnotationSet(self, variantAnnotationSet):
         """
         Inserts a the specified variantAnnotationSet into this repository.
         """
-        sql = """
-            INSERT INTO VariantAnnotationSet (
-                id, variantSetId, ontologyId, name, analysis, annotationType,
-                created, updated)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?);
-        """
         analysisJson = json.dumps(
             protocol.toJsonDict(variantAnnotationSet.getAnalysis()))
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (
-            variantAnnotationSet.getId(),
-            variantAnnotationSet.getParentContainer().getId(),
-            variantAnnotationSet.getOntology().getId(),
-            variantAnnotationSet.getLocalId(),
-            analysisJson,
-            variantAnnotationSet.getAnnotationType(),
-            variantAnnotationSet.getCreationTime(),
-            variantAnnotationSet.getUpdatedTime()))
-
-    def _readVariantAnnotationSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM VariantAnnotationSet;")
-        for row in cursor:
-            variantSet = self.getVariantSet(row[b'variantSetId'])
-            ontology = self.getOntology(row[b'ontologyId'])
+        try:
+            m.Variantannotationset.create(
+                id=variantAnnotationSet.getId(),
+                variantsetid=variantAnnotationSet.getParentContainer().getId(),
+                ontologyid=variantAnnotationSet.getOntology().getId(),
+                name=variantAnnotationSet.getLocalId(),
+                analysis=analysisJson,
+                annotationtype=variantAnnotationSet.getAnnotationType(),
+                created=variantAnnotationSet.getCreationTime(),
+                updated=variantAnnotationSet.getUpdatedTime())
+        except Exception as e:
+            raise exceptions.RepoManagerException(e)
+
+    def _readVariantAnnotationSetTable(self):
+        for annotationSetRecord in m.Variantannotationset.select():
+            variantSet = self.getVariantSet(
+                annotationSetRecord.variantsetid.id)
+            ontology = self.getOntology(annotationSetRecord.ontologyid.id)
             variantAnnotationSet = variants.HtslibVariantAnnotationSet(
-                variantSet, row[b'name'])
+                variantSet, annotationSetRecord.name)
             variantAnnotationSet.setOntology(ontology)
-            variantAnnotationSet.populateFromRow(row)
-            assert variantAnnotationSet.getId() == row[b'id']
+            variantAnnotationSet.populateFromRow(annotationSetRecord)
+            assert variantAnnotationSet.getId() == annotationSetRecord.id
             # Insert the variantAnnotationSet into the memory-based model.
             variantSet.addVariantAnnotationSet(variantAnnotationSet)
 
-    def _createCallSetTable(self, cursor):
-        sql = """
-            CREATE TABLE CallSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                variantSetId TEXT NOT NULL,
-                biosampleId TEXT,
-                UNIQUE (variantSetId, name),
-                FOREIGN KEY(variantSetId) REFERENCES VariantSet(id)
-                    ON DELETE CASCADE
-            );
-        """
-        cursor.execute(sql)
+    def _createCallSetTable(self):
+        self.database.create_table(m.Callset)
 
     def insertCallSet(self, callSet):
         """
         Inserts a the specified callSet into this repository.
         """
-        sql = """
-            INSERT INTO CallSet (
-                id, name, variantSetId, biosampleId)
-            VALUES (?, ?, ?, ?);
-        """
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (
-            callSet.getId(),
-            callSet.getLocalId(),
-            callSet.getParentContainer().getId(),
-            callSet.getBiosampleId()))
-
-    def _readCallSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM CallSet;")
-        for row in cursor:
-            variantSet = self.getVariantSet(row[b'variantSetId'])
-            callSet = variants.CallSet(variantSet, row[b'name'])
-            callSet.populateFromRow(row)
-            assert callSet.getId() == row[b'id']
+        try:
+            m.Callset.create(
+                id=callSet.getId(),
+                name=callSet.getLocalId(),
+                variantsetid=callSet.getParentContainer().getId(),
+                biosampleid=callSet.getBiosampleId())
+        except Exception as e:
+            raise exceptions.RepoManagerException(e)
+
+    def _readCallSetTable(self):
+        for callSetRecord in m.Callset.select():
+            variantSet = self.getVariantSet(callSetRecord.variantsetid.id)
+            callSet = variants.CallSet(variantSet, callSetRecord.name)
+            callSet.populateFromRow(callSetRecord)
+            assert callSet.getId() == callSetRecord.id
             # Insert the callSet into the memory-based object model.
             variantSet.addCallSet(callSet)
 
-    def _createVariantSetTable(self, cursor):
-        sql = """
-            CREATE TABLE VariantSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                datasetId TEXT NOT NULL,
-                referenceSetId TEXT NOT NULL,
-                created TEXT,
-                updated TEXT,
-                metadata TEXT,
-                dataUrlIndexMap TEXT NOT NULL,
-                UNIQUE (datasetID, name),
-                FOREIGN KEY(datasetId) REFERENCES Dataset(id)
-                    ON DELETE CASCADE,
-                FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id)
-            );
-        """
-        cursor.execute(sql)
+    def _createVariantSetTable(self):
+        self.database.create_table(m.Variantset)
 
     def insertVariantSet(self, variantSet):
         """
         Inserts a the specified variantSet into this repository.
         """
-        sql = """
-            INSERT INTO VariantSet (
-                id, datasetId, referenceSetId, name, created, updated,
-                metadata, dataUrlIndexMap)
-            VALUES (?, ?, ?, ?, datetime('now'), datetime('now'), ?, ?);
-        """
-        cursor = self._dbConnection.cursor()
         # We cheat a little here with the VariantSetMetadata, and encode these
         # within the table as a JSON dump. These should really be stored in
         # their own table
@@ -1170,284 +987,196 @@ def insertVariantSet(self, variantSet):
              variantSet.getMetadata()])
         urlMapJson = json.dumps(variantSet.getReferenceToDataUrlIndexMap())
         try:
-            cursor.execute(sql, (
-                variantSet.getId(), variantSet.getParentContainer().getId(),
-                variantSet.getReferenceSet().getId(), variantSet.getLocalId(),
-                metadataJson, urlMapJson))
-        except sqlite3.IntegrityError:
-            raise exceptions.DuplicateNameException(
-                variantSet.getLocalId(),
-                variantSet.getParentContainer().getLocalId())
+            m.Variantset.create(
+                id=variantSet.getId(),
+                datasetid=variantSet.getParentContainer().getId(),
+                referencesetid=variantSet.getReferenceSet().getId(),
+                name=variantSet.getLocalId(),
+                created=datetime.datetime.now(),
+                updated=datetime.datetime.now(),
+                metadata=metadataJson,
+                dataurlindexmap=urlMapJson)
+        except Exception as e:
+            raise exceptions.RepoManagerException(e)
         for callSet in variantSet.getCallSets():
             self.insertCallSet(callSet)
 
-    def _readVariantSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM VariantSet;")
-        for row in cursor:
-            dataset = self.getDataset(row[b'datasetId'])
-            referenceSet = self.getReferenceSet(row[b'referenceSetId'])
-            variantSet = variants.HtslibVariantSet(dataset, row[b'name'])
+    def _readVariantSetTable(self):
+        for variantSetRecord in m.Variantset.select():
+            dataset = self.getDataset(variantSetRecord.datasetid.id)
+            referenceSet = self.getReferenceSet(
+                variantSetRecord.referencesetid.id)
+            variantSet = variants.HtslibVariantSet(
+                dataset, variantSetRecord.name)
             variantSet.setReferenceSet(referenceSet)
-            variantSet.populateFromRow(row)
-            assert variantSet.getId() == row[b'id']
+            variantSet.populateFromRow(variantSetRecord)
+            assert variantSet.getId() == variantSetRecord.id
             # Insert the variantSet into the memory-based object model.
             dataset.addVariantSet(variantSet)
 
-    def _createFeatureSetTable(self, cursor):
-        sql = """
-            CREATE TABLE FeatureSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                datasetId TEXT NOT NULL,
-                referenceSetId TEXT NOT NULL,
-                ontologyId TEXT NOT NULL,
-                info TEXT,
-                sourceUri TEXT,
-                dataUrl TEXT NOT NULL,
-                UNIQUE (datasetId, name),
-                FOREIGN KEY(datasetId) REFERENCES Dataset(id)
-                    ON DELETE CASCADE,
-                FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id)
-                FOREIGN KEY(ontologyId) REFERENCES Ontology(id)
-            );
-        """
-        cursor.execute(sql)
+    def _createFeatureSetTable(self):
+        self.database.create_table(m.Featureset)
 
     def insertFeatureSet(self, featureSet):
         """
         Inserts a the specified featureSet into this repository.
         """
         # TODO add support for info and sourceUri fields.
-        sql = """
-            INSERT INTO FeatureSet (
-                id, datasetId, referenceSetId, ontologyId, name, dataUrl)
-            VALUES (?, ?, ?, ?, ?, ?)
-        """
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (
-            featureSet.getId(),
-            featureSet.getParentContainer().getId(),
-            featureSet.getReferenceSet().getId(),
-            featureSet.getOntology().getId(),
-            featureSet.getLocalId(),
-            featureSet.getDataUrl()))
-
-    def _readFeatureSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM FeatureSet;")
-        for row in cursor:
-            dataset = self.getDataset(row[b'datasetId'])
-            if 'cgd' in row[b'name']:
+        try:
+            m.Featureset.create(
+                id=featureSet.getId(),
+                datasetid=featureSet.getParentContainer().getId(),
+                referencesetid=featureSet.getReferenceSet().getId(),
+                ontologyid=featureSet.getOntology().getId(),
+                name=featureSet.getLocalId(),
+                dataurl=featureSet.getDataUrl())
+        except Exception as e:
+            raise exceptions.RepoManagerException(e)
+
+    def _readFeatureSetTable(self):
+        for featureSetRecord in m.Featureset.select():
+            dataset = self.getDataset(featureSetRecord.datasetid.id)
+            # FIXME this should be handled elsewhere
+            if 'cgd' in featureSetRecord.name:
                 featureSet = \
                     g2pFeatureset \
-                    .PhenotypeAssociationFeatureSet(dataset, row[b'name'])
+                    .PhenotypeAssociationFeatureSet(
+                        dataset, featureSetRecord.name)
             else:
                 featureSet = sequence_annotations.Gff3DbFeatureSet(
-                    dataset, row[b'name'])
+                    dataset, featureSetRecord.name)
             featureSet.setReferenceSet(
-                self.getReferenceSet(row[b'referenceSetId']))
-            featureSet.setOntology(self.getOntology(row[b'ontologyId']))
-            featureSet.populateFromRow(row)
-            assert featureSet.getId() == row[b'id']
+                self.getReferenceSet(
+                    featureSetRecord.referencesetid.id))
+            featureSet.setOntology(
+                self.getOntology(featureSetRecord.ontologyid.id))
+            featureSet.populateFromRow(featureSetRecord)
+            assert featureSet.getId() == featureSetRecord.id
             dataset.addFeatureSet(featureSet)
 
-    def _createBiosampleTable(self, cursor):
-        sql = """
-            CREATE TABLE Biosample (
-                id TEXT NOT NULL PRIMARY KEY,
-                datasetId TEXT NOT NULL,
-                name TEXT NOT NULL,
-                description TEXT,
-                disease TEXT,
-                created TEXT,
-                updated TEXT,
-                individualId TEXT,
-                info TEXT,
-                UNIQUE (datasetId, name),
-                FOREIGN KEY(datasetId) REFERENCES Dataset(id)
-                    ON DELETE CASCADE
-            );
-        """
-        cursor.execute(sql)
+    def _createBiosampleTable(self):
+        self.database.create_table(m.Biosample)
 
     def insertBiosample(self, biosample):
         """
         Inserts the specified Biosample into this repository.
         """
-        sql = """
-            INSERT INTO Biosample (
-                id, datasetId, name, description, disease,
-                created, updated, individualId, info)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-        """
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (
-            biosample.getId(),
-            biosample.getParentContainer().getId(),
-            biosample.getLocalId(),
-            biosample.getDescription(),
-            json.dumps(biosample.getDisease()),
-            biosample.getCreated(),
-            biosample.getUpdated(),
-            biosample.getIndividualId(),
-            json.dumps(biosample.getInfo())))
-
-    def _readBiosampleTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM Biosample;")
-        for row in cursor:
-            dataset = self.getDataset(row[b'datasetId'])
+        try:
+            m.Biosample.create(
+                id=biosample.getId(),
+                datasetid=biosample.getParentContainer().getId(),
+                name=biosample.getLocalId(),
+                description=biosample.getDescription(),
+                disease=json.dumps(biosample.getDisease()),
+                created=biosample.getCreated(),
+                updated=biosample.getUpdated(),
+                individualid=biosample.getIndividualId(),
+                info=json.dumps(biosample.getInfo()))
+        except Exception:
+            raise exceptions.DuplicateNameException(
+                biosample.getLocalId(),
+                biosample.getParentContainer().getLocalId())
+
+    def _readBiosampleTable(self):
+        for biosampleRecord in m.Biosample.select():
+            dataset = self.getDataset(biosampleRecord.datasetid.id)
             biosample = biodata.Biosample(
-                dataset, row[b'name'])
-            biosample.populateFromRow(row)
-            assert biosample.getId() == row[b'id']
+                dataset, biosampleRecord.name)
+            biosample.populateFromRow(biosampleRecord)
+            assert biosample.getId() == biosampleRecord.id
             dataset.addBiosample(biosample)
 
-    def _createIndividualTable(self, cursor):
-        sql = """
-            CREATE TABLE Individual (
-                id TEXT NOT NULL PRIMARY KEY,
-                datasetId TEXT NOT NULL,
-                name TEXT,
-                description TEXT,
-                created TEXT NOT NULL,
-                updated TEXT,
-                species TEXT,
-                sex TEXT,
-                info TEXT,
-                UNIQUE (datasetId, name),
-                FOREIGN KEY(datasetId) REFERENCES Dataset(id)
-                    ON DELETE CASCADE
-            );
-        """
-        cursor.execute(sql)
+    def _createIndividualTable(self):
+        self.database.create_table(m.Individual)
 
     def insertIndividual(self, individual):
         """
         Inserts the specified individual into this repository.
         """
         # TODO add support for info and sourceUri fields.
-        sql = """
-            INSERT INTO Individual (
-                id, datasetId, name, description, created,
-                updated, species, sex, info)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-        """
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (
-            individual.getId(),
-            individual.getParentContainer().getId(),
-            individual.getLocalId(),
-            individual.getDescription(),
-            individual.getCreated(),
-            individual.getUpdated(),
-            json.dumps(individual.getSpecies()),
-            json.dumps(individual.getSex()),
-            json.dumps(individual.getInfo())))
-
-    def _readIndividualTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM Individual;")
-        for row in cursor:
-            dataset = self.getDataset(row[b'datasetId'])
+        try:
+            m.Individual.create(
+                id=individual.getId(),
+                datasetId=individual.getParentContainer().getId(),
+                name=individual.getLocalId(),
+                description=individual.getDescription(),
+                created=individual.getCreated(),
+                updated=individual.getUpdated(),
+                species=json.dumps(individual.getSpecies()),
+                sex=json.dumps(individual.getSex()),
+                info=json.dumps(individual.getInfo()))
+        except Exception:
+            raise exceptions.DuplicateNameException(
+                individual.getLocalId(),
+                individual.getParentContainer().getLocalId())
+
+    def _readIndividualTable(self):
+        for individualRecord in m.Individual.select():
+            dataset = self.getDataset(individualRecord.datasetid.id)
             individual = biodata.Individual(
-                dataset, row[b'name'])
-            individual.populateFromRow(row)
-            assert individual.getId() == row[b'id']
+                dataset, individualRecord.name)
+            individual.populateFromRow(individualRecord)
+            assert individual.getId() == individualRecord.id
             dataset.addIndividual(individual)
 
-    def _createPhenotypeAssociationSetTable(self, cursor):
-        sql = """
-            CREATE TABLE PhenotypeAssociationSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT,
-                datasetId TEXT NOT NULL,
-                dataUrl TEXT NOT NULL,
-                UNIQUE (datasetId, name),
-                FOREIGN KEY(datasetId) REFERENCES Dataset(id)
-                    ON DELETE CASCADE
-            );
-        """
-        cursor.execute(sql)
-
-    def _createRnaQuantificationSetTable(self, cursor):
-        sql = """
-            CREATE TABLE RnaQuantificationSet (
-                id TEXT NOT NULL PRIMARY KEY,
-                name TEXT NOT NULL,
-                datasetId TEXT NOT NULL,
-                referenceSetId TEXT NOT NULL,
-                info TEXT,
-                dataUrl TEXT NOT NULL,
-                UNIQUE (datasetId, name),
-                FOREIGN KEY(datasetId) REFERENCES Dataset(id)
-                    ON DELETE CASCADE,
-                FOREIGN KEY(referenceSetId) REFERENCES ReferenceSet(id)
-            );
-        """
-        cursor.execute(sql)
+    def _createPhenotypeAssociationSetTable(self):
+        self.database.create_table(m.Phenotypeassociationset)
+
+    def _createRnaQuantificationSetTable(self):
+        self.database.create_table(m.Rnaquantificationset)
 
     def insertPhenotypeAssociationSet(self, phenotypeAssociationSet):
         """
-        Inserts the specified individual into this repository.
+        Inserts the specified phenotype annotation set into this repository.
         """
         # TODO add support for info and sourceUri fields.
-        sql = """
-            INSERT INTO PhenotypeAssociationSet (
-                id, name, datasetId, dataUrl )
-            VALUES (?, ?, ?, ?)
-        """
-        cursor = self._dbConnection.cursor()
         try:
-            cursor.execute(sql, (
-                phenotypeAssociationSet.getId(),
-                phenotypeAssociationSet.getLocalId(),
-                phenotypeAssociationSet.getParentContainer().getId(),
-                phenotypeAssociationSet._dataUrl))
-        except sqlite3.IntegrityError:
+            m.Phenotypeassociationset.create(
+                id=phenotypeAssociationSet.getId(),
+                name=phenotypeAssociationSet.getLocalId(),
+                datasetid=phenotypeAssociationSet.getParentContainer().getId(),
+                dataurl=phenotypeAssociationSet._dataUrl)
+        except Exception:
             raise exceptions.DuplicateNameException(
                 phenotypeAssociationSet.getParentContainer().getId())
 
-    def _readPhenotypeAssociationSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM PhenotypeAssociationSet;")
-        for row in cursor:
-            dataset = self.getDataset(row[b'datasetId'])
+    def _readPhenotypeAssociationSetTable(self):
+        for associationSetRecord in m.Phenotypeassociationset.select():
+            dataset = self.getDataset(associationSetRecord.datasetid.id)
             phenotypeAssociationSet = \
                 genotype_phenotype.RdfPhenotypeAssociationSet(
-                    dataset, row[b'name'], row[b'dataUrl'])
+                    dataset,
+                    associationSetRecord.name,
+                    associationSetRecord.dataurl)
             dataset.addPhenotypeAssociationSet(phenotypeAssociationSet)
 
     def insertRnaQuantificationSet(self, rnaQuantificationSet):
         """
         Inserts a the specified rnaQuantificationSet into this repository.
         """
-        sql = """
-            INSERT INTO RnaQuantificationSet (
-                id, datasetId, referenceSetId, name, dataUrl)
-            VALUES (?, ?, ?, ?, ?)
-        """
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (
-            rnaQuantificationSet.getId(),
-            rnaQuantificationSet.getParentContainer().getId(),
-            rnaQuantificationSet.getReferenceSet().getId(),
-            rnaQuantificationSet.getLocalId(),
-            rnaQuantificationSet.getDataUrl()))
-
-    def _readRnaQuantificationSetTable(self, cursor):
-        cursor.row_factory = sqlite3.Row
-        cursor.execute("SELECT * FROM RnaQuantificationSet;")
-        for row in cursor:
-            dataset = self.getDataset(row[b'datasetId'])
-            referenceSet = self.getReferenceSet(row[b'referenceSetId'])
+        try:
+            m.Rnaquantificationset.create(
+                id=rnaQuantificationSet.getId(),
+                datasetid=rnaQuantificationSet.getParentContainer().getId(),
+                referencesetid=rnaQuantificationSet.getReferenceSet().getId(),
+                name=rnaQuantificationSet.getLocalId(),
+                dataurl=rnaQuantificationSet.getDataUrl())
+        except Exception:
+            raise exceptions.DuplicateNameException(
+                rnaQuantificationSet.getLocalId(),
+                rnaQuantificationSet.getParentContainer().getLocalId())
+
+    def _readRnaQuantificationSetTable(self):
+        for quantificationSetRecord in m.Rnaquantificationset.select():
+            dataset = self.getDataset(quantificationSetRecord.datasetid.id)
+            referenceSet = self.getReferenceSet(
+                quantificationSetRecord.referencesetid.id)
             rnaQuantificationSet = \
                 rna_quantification.SqliteRnaQuantificationSet(
-                    dataset, row[b'name'])
+                    dataset, quantificationSetRecord.name)
             rnaQuantificationSet.setReferenceSet(referenceSet)
-            rnaQuantificationSet.populateFromRow(row)
-            assert rnaQuantificationSet.getId() == row[b'id']
+            rnaQuantificationSet.populateFromRow(quantificationSetRecord)
+            assert rnaQuantificationSet.getId() == quantificationSetRecord.id
             dataset.addRnaQuantificationSet(rnaQuantificationSet)
 
     def removeRnaQuantificationSet(self, rnaQuantificationSet):
@@ -1456,9 +1185,9 @@ def removeRnaQuantificationSet(self, rnaQuantificationSet):
         performs a cascading removal of all items within this
         rnaQuantificationSet.
         """
-        sql = "DELETE FROM RnaQuantificationSet WHERE id=?"
-        cursor = self._dbConnection.cursor()
-        cursor.execute(sql, (rnaQuantificationSet.getId(),))
+        q = m.Rnaquantificationset.delete.where(
+            m.Rnaquantificationset.id == rnaQuantificationSet.getId())
+        q.execute()
 
     def initialise(self):
         """
@@ -1466,22 +1195,21 @@ def initialise(self):
         and file paths.
         """
         self._checkWriteMode()
-        cursor = self._dbConnection
-        self._createSystemTable(cursor)
-        self._createOntologyTable(cursor)
-        self._createReferenceSetTable(cursor)
-        self._createReferenceTable(cursor)
-        self._createDatasetTable(cursor)
-        self._createReadGroupSetTable(cursor)
-        self._createReadGroupTable(cursor)
-        self._createCallSetTable(cursor)
-        self._createVariantSetTable(cursor)
-        self._createVariantAnnotationSetTable(cursor)
-        self._createFeatureSetTable(cursor)
-        self._createBiosampleTable(cursor)
-        self._createIndividualTable(cursor)
-        self._createPhenotypeAssociationSetTable(cursor)
-        self._createRnaQuantificationSetTable(cursor)
+        self._createSystemTable()
+        self._createOntologyTable()
+        self._createReferenceSetTable()
+        self._createReferenceTable()
+        self._createDatasetTable()
+        self._createReadGroupSetTable()
+        self._createReadGroupTable()
+        self._createCallSetTable()
+        self._createVariantSetTable()
+        self._createVariantAnnotationSetTable()
+        self._createFeatureSetTable()
+        self._createBiosampleTable()
+        self._createIndividualTable()
+        self._createPhenotypeAssociationSetTable()
+        self._createRnaQuantificationSetTable()
 
     def exists(self):
         """
@@ -1507,24 +1235,18 @@ def load(self):
         """
         Loads this data repository into memory.
         """
-        with sqlite3.connect(self._dbFilename) as db:
-            cursor = db.cursor()
-            try:
-                self._readSystemTable(cursor)
-            except (sqlite3.OperationalError, sqlite3.DatabaseError):
-                raise exceptions.RepoInvalidDatabaseException(
-                    self._dbFilename)
-            self._readOntologyTable(cursor)
-            self._readReferenceSetTable(cursor)
-            self._readReferenceTable(cursor)
-            self._readDatasetTable(cursor)
-            self._readReadGroupSetTable(cursor)
-            self._readReadGroupTable(cursor)
-            self._readVariantSetTable(cursor)
-            self._readCallSetTable(cursor)
-            self._readVariantAnnotationSetTable(cursor)
-            self._readFeatureSetTable(cursor)
-            self._readBiosampleTable(cursor)
-            self._readIndividualTable(cursor)
-            self._readPhenotypeAssociationSetTable(cursor)
-            self._readRnaQuantificationSetTable(cursor)
+        self._readSystemTable()
+        self._readOntologyTable()
+        self._readReferenceSetTable()
+        self._readReferenceTable()
+        self._readDatasetTable()
+        self._readReadGroupSetTable()
+        self._readReadGroupTable()
+        self._readVariantSetTable()
+        self._readCallSetTable()
+        self._readVariantAnnotationSetTable()
+        self._readFeatureSetTable()
+        self._readBiosampleTable()
+        self._readIndividualTable()
+        self._readPhenotypeAssociationSetTable()
+        self._readRnaQuantificationSetTable()
diff --git a/ga4gh/server/repo/models.py b/ga4gh/server/repo/models.py
new file mode 100644
index 000000000..a18283611
--- /dev/null
+++ b/ga4gh/server/repo/models.py
@@ -0,0 +1,285 @@
+"""
+peewee is a lightweight ORM with SQLite, postgresql,
+and MySQL support. This file presents models for the
+registry database.
+
+Partially auto-generated using pwiz.
+
+    python -m pwiz -e sqlite ga4gh-example-data/registry.db > models.py
+
+For more on the peewee model API see:
+
+https://peewee.readthedocs.io/en/latest/peewee/models.html
+
+"""
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import peewee as pw
+
+# The databaseProxy is used to dynamically changed the
+# backing database and needs to be set to an actual
+# database instance to use these models.
+databaseProxy = pw.Proxy()
+
+
+class SqliteDatabase(pw.SqliteDatabase):
+    def __init__(self, *_, **__):
+        super(SqliteDatabase, self).__init__(*_, **__)
+
+
+class UnknownField(object):
+    def __init__(self, *_, **__): pass
+
+
+class BaseModel(pw.Model):
+    class Meta:
+        database = databaseProxy
+
+
+class Dataset(BaseModel):
+    description = pw.TextField(null=True)
+    id = pw.TextField(primary_key=True)
+    info = pw.TextField(null=True)
+    name = pw.TextField(unique=True)
+
+    class Meta:
+        db_table = 'Dataset'
+
+
+class Biosample(BaseModel):
+    created = pw.TextField(null=True)
+    datasetid = pw.ForeignKeyField(
+        db_column='datasetId', rel_model=Dataset, to_field='id')
+    description = pw.TextField(null=True)
+    disease = pw.TextField(null=True)
+    id = pw.TextField(primary_key=True)
+    individualid = pw.TextField(db_column='individualId', null=True)
+    info = pw.TextField(null=True)
+    name = pw.TextField()
+    updated = pw.TextField(null=True)
+
+    class Meta:
+        db_table = 'Biosample'
+        indexes = (
+            (('datasetid', 'name'), True),
+        )
+
+
+class Referenceset(BaseModel):
+    assemblyid = pw.TextField(db_column='assemblyId', null=True)
+    dataurl = pw.TextField(db_column='dataUrl')
+    description = pw.TextField(null=True)
+    id = pw.TextField(primary_key=True)
+    isderived = pw.IntegerField(db_column='isDerived', null=True)
+    md5checksum = pw.TextField(null=True)
+    name = pw.TextField(unique=True)
+    species = pw.TextField(db_column='species', null=True)
+    sourceaccessions = pw.TextField(db_column='sourceAccessions', null=True)
+    sourceuri = pw.TextField(db_column='sourceUri', null=True)
+
+    class Meta:
+        db_table = 'ReferenceSet'
+
+
+class Variantset(BaseModel):
+    created = pw.TextField(null=True)
+    dataurlindexmap = pw.TextField(db_column='dataUrlIndexMap')
+    datasetid = pw.ForeignKeyField(
+        db_column='datasetId', rel_model=Dataset, to_field='id')
+    id = pw.TextField(primary_key=True)
+    metadata = pw.TextField(null=True)
+    name = pw.TextField()
+    referencesetid = pw.ForeignKeyField(
+        db_column='referenceSetId', rel_model=Referenceset, to_field='id')
+    updated = pw.TextField(null=True)
+
+    class Meta:
+        db_table = 'VariantSet'
+        indexes = (
+            (('datasetid', 'name'), True),
+        )
+
+
+class Callset(BaseModel):
+    biosampleid = pw.TextField(db_column='biosampleId', null=True)
+    id = pw.TextField(primary_key=True)
+    name = pw.TextField()
+    variantsetid = pw.ForeignKeyField(
+        db_column='variantSetId', rel_model=Variantset, to_field='id')
+
+    class Meta:
+        db_table = 'CallSet'
+        indexes = (
+            (('variantsetid', 'name'), True),
+        )
+
+
+class Ontology(BaseModel):
+    dataurl = pw.TextField(db_column='dataUrl')
+    id = pw.TextField(primary_key=True)
+    name = pw.TextField(unique=True)
+    ontologyprefix = pw.TextField(db_column='ontologyPrefix')
+
+    class Meta:
+        db_table = 'Ontology'
+
+
+class Featureset(BaseModel):
+    dataurl = pw.TextField(db_column='dataUrl')
+    datasetid = pw.ForeignKeyField(
+        db_column='datasetId', rel_model=Dataset, to_field='id')
+    id = pw.TextField(primary_key=True)
+    info = pw.TextField(null=True)
+    name = pw.TextField()
+    ontologyid = pw.ForeignKeyField(
+        db_column='ontologyId', rel_model=Ontology, to_field='id')
+    referencesetid = pw.ForeignKeyField(
+        db_column='referenceSetId', rel_model=Referenceset, to_field='id')
+    sourceuri = pw.TextField(
+        db_column='sourceUri', null=True)
+
+    class Meta:
+        db_table = 'FeatureSet'
+        indexes = (
+            (('datasetid', 'name'), True),
+        )
+
+
+class Individual(BaseModel):
+    created = pw.TextField()
+    datasetid = pw.ForeignKeyField(
+        db_column='datasetId', rel_model=Dataset, to_field='id')
+    description = pw.TextField(null=True)
+    id = pw.TextField(primary_key=True)
+    info = pw.TextField(null=True)
+    name = pw.TextField(null=True)
+    sex = pw.TextField(null=True)
+    species = pw.TextField(null=True)
+    updated = pw.TextField(null=True)
+
+    class Meta:
+        db_table = 'Individual'
+        indexes = (
+            (('datasetid', 'name'), True),
+        )
+
+
+class Phenotypeassociationset(BaseModel):
+    dataurl = pw.TextField(db_column='dataUrl')
+    datasetid = pw.ForeignKeyField(
+        db_column='datasetId', rel_model=Dataset, to_field='id')
+    id = pw.TextField(primary_key=True)
+    name = pw.TextField(null=True)
+
+    class Meta:
+        db_table = 'PhenotypeAssociationSet'
+        indexes = (
+            (('datasetid', 'name'), True),
+        )
+
+
+class Readgroupset(BaseModel):
+    dataurl = pw.TextField(db_column='dataUrl')
+    datasetid = pw.ForeignKeyField(
+        db_column='datasetId', rel_model=Dataset, to_field='id')
+    id = pw.TextField(primary_key=True)
+    indexfile = pw.TextField(db_column='indexFile')
+    name = pw.TextField()
+    programs = pw.TextField(null=True)
+    referencesetid = pw.ForeignKeyField(
+        db_column='referenceSetId', rel_model=Referenceset, to_field='id')
+    stats = pw.TextField()
+
+    class Meta:
+        db_table = 'ReadGroupSet'
+        indexes = (
+            (('datasetid', 'name'), True),
+        )
+
+
+class Readgroup(BaseModel):
+    biosampleid = pw.TextField(db_column='biosampleId', null=True)
+    created = pw.TextField(null=True)
+    description = pw.TextField(null=True)
+    experiment = pw.TextField()
+    id = pw.TextField(primary_key=True)
+    name = pw.TextField()
+    predictedinsertsize = pw.IntegerField(
+        db_column='predictedInsertSize', null=True)
+    readgroupsetid = pw.ForeignKeyField(
+        db_column='readGroupSetId', rel_model=Readgroupset, to_field='id')
+    samplename = pw.TextField(db_column='sampleName', null=True)
+    stats = pw.TextField()
+    updated = pw.TextField(null=True)
+
+    class Meta:
+        db_table = 'ReadGroup'
+        indexes = (
+            (('readgroupsetid', 'name'), True),
+        )
+
+
+class Reference(BaseModel):
+    id = pw.TextField(null=True, primary_key=True)
+    isderived = pw.IntegerField(db_column='isDerived', null=True)
+    length = pw.IntegerField(null=True)
+    md5checksum = pw.TextField(null=True)
+    name = pw.TextField()
+    species = pw.TextField(db_column='species', null=True)
+    referencesetid = pw.ForeignKeyField(
+        db_column='referenceSetId', rel_model=Referenceset, to_field='id')
+    sourceaccessions = pw.TextField(db_column='sourceAccessions', null=True)
+    sourcedivergence = pw.FloatField(db_column='sourceDivergence', null=True)
+    sourceuri = pw.TextField(db_column='sourceUri', null=True)
+
+    class Meta:
+        db_table = 'Reference'
+        indexes = (
+            (('referencesetid', 'name'), True),
+        )
+
+
+class Rnaquantificationset(BaseModel):
+    dataurl = pw.TextField(db_column='dataUrl')
+    datasetid = pw.ForeignKeyField(
+        db_column='datasetId', rel_model=Dataset, to_field='id')
+    id = pw.TextField(primary_key=True)
+    info = pw.TextField(null=True)
+    name = pw.TextField()
+    referencesetid = pw.ForeignKeyField(
+        db_column='referenceSetId', rel_model=Referenceset, to_field='id')
+
+    class Meta:
+        db_table = 'RnaQuantificationSet'
+        indexes = (
+            (('datasetid', 'name'), True),
+        )
+
+
+class System(BaseModel):
+    key = pw.TextField(primary_key=True)
+    value = pw.TextField()
+
+    class Meta:
+        db_table = 'System'
+
+
+class Variantannotationset(BaseModel):
+    analysis = pw.TextField(null=True)
+    annotationtype = pw.TextField(db_column='annotationType', null=True)
+    created = pw.TextField(null=True)
+    id = pw.TextField(primary_key=True)
+    name = pw.TextField()
+    ontologyid = pw.ForeignKeyField(
+        db_column='ontologyId', rel_model=Ontology, to_field='id')
+    updated = pw.TextField(null=True)
+    variantsetid = pw.ForeignKeyField(
+        db_column='variantSetId', rel_model=Variantset, to_field='id')
+
+    class Meta:
+        db_table = 'VariantAnnotationSet'
+        indexes = (
+            (('variantsetid', 'name'), True),
+        )
diff --git a/requirements.txt b/requirements.txt
index 2660fc4a1..a419cebfe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,6 +36,7 @@ Jinja2==2.7.3
 future==0.15.2
 pyjwkest==1.0.1
 PyJWT==1.4.2
+peewee==2.8.5
 
 ### This section is for the actual libraries ###
 # these libraries are imported in code that can be reached via
diff --git a/scripts/prepare_compliance_data.py b/scripts/prepare_compliance_data.py
index aa8d380d2..d9b87e49f 100644
--- a/scripts/prepare_compliance_data.py
+++ b/scripts/prepare_compliance_data.py
@@ -189,7 +189,6 @@ def run(self):
             hg00101Biosample.populateFromJson(jsonString.read())
         hg00101Biosample.setIndividualId(hg00101Individual.getId())
         self.repo.insertBiosample(hg00101Biosample)
-
         readFiles = [
             "brca1_HG00096.sam",
             "brca1_HG00099.sam",
@@ -282,7 +281,6 @@ def run(self):
                 dataset, "cgd", os.path.abspath(outputG2PPath))
         self.repo.insertPhenotypeAssociationSet(phenotypeAssociationSet)
 
-        self.repo.commit()
         dataset.addFeatureSet(gencode)
 
         # RNA Quantification
@@ -303,8 +301,6 @@ def run(self):
         rnaQuantificationSet.populateFromFile(os.path.abspath(rnaDbName))
         self.repo.insertRnaQuantificationSet(rnaQuantificationSet)
 
-        self.repo.commit()
-
     def addVariantSet(
             self, variantFileName, dataset, referenceSet,
             ontology, biosamples):
@@ -326,6 +322,7 @@ def addVariantSet(
                 if biosample.getLocalId() == callSet.getLocalId():
                     callSet.setBiosampleId(biosample.getId())
         self.repo.insertVariantSet(variantSet)
+
         for annotationSet in variantSet.getVariantAnnotationSets():
             annotationSet.setOntology(ontology)
             self.repo.insertVariantAnnotationSet(annotationSet)
diff --git a/tests/unit/test_imports.py b/tests/unit/test_imports.py
index 8c72a5f07..6949f5220 100644
--- a/tests/unit/test_imports.py
+++ b/tests/unit/test_imports.py
@@ -202,6 +202,7 @@ class ImportGraphLayerChecker(object):
         ],
         'repo': [
             'ga4gh/server/repo/rnaseq2ga.py',
+            'ga4gh/server/repo/models.py',
         ],
     }
 
diff --git a/tests/unit/test_repo_manager.py b/tests/unit/test_repo_manager.py
index 78924775b..7194da877 100644
--- a/tests/unit/test_repo_manager.py
+++ b/tests/unit/test_repo_manager.py
@@ -240,7 +240,7 @@ def testSameName(self):
         cmd = "add-dataset {} {}".format(self._repoPath, name)
         self.runCommand(cmd)
         self.assertRaises(
-            exceptions.DuplicateNameException, self.runCommand, cmd)
+            exceptions.RepoManagerException, self.runCommand, cmd)
 
 
 class TestAddPhenotypeAssociationSet(AbstractRepoManagerTest):
@@ -256,7 +256,7 @@ def testDefaults(self):
     def testSameName(self):
         self.addDataset()
         self.addPhenotypeAssociationSet()
-        with self.assertRaises(exceptions.DuplicateNameException):
+        with self.assertRaises(exceptions.RepoManagerException):
             self.addPhenotypeAssociationSet()
 
 
@@ -336,7 +336,7 @@ def testWithSameName(self):
             self._repoPath, fastaFile)
         self.runCommand(cmd)
         self.assertRaises(
-            exceptions.DuplicateNameException, self.runCommand, cmd)
+            exceptions.RepoManagerException, self.runCommand, cmd)
 
 
 class TestAddOntology(AbstractRepoManagerTest):
@@ -377,7 +377,7 @@ def testWithSameName(self):
             self._repoPath, ontologyFile)
         self.runCommand(cmd)
         self.assertRaises(
-            exceptions.DuplicateNameException, self.runCommand, cmd)
+            exceptions.RepoManagerException, self.runCommand, cmd)
 
     def testMissingFile(self):
         cmd = "add-ontology {} {}".format(self._repoPath, "/no/such/file")
@@ -586,7 +586,7 @@ def testAddReadGroupSetWithSameName(self):
                 self._referenceSetName)
         self.runCommand(cmd)
         self.assertRaises(
-            exceptions.DuplicateNameException, self.runCommand, cmd)
+            exceptions.RepoManagerException, self.runCommand, cmd)
         # Specified name
         name = "test_rgs"
         cmd = (
@@ -596,7 +596,7 @@ def testAddReadGroupSetWithSameName(self):
             self._referenceSetName, name)
         self.runCommand(cmd)
         self.assertRaises(
-            exceptions.DuplicateNameException, self.runCommand, cmd)
+            exceptions.RepoManagerException, self.runCommand, cmd)
 
     def testUrlWithMissingIndex(self):
         bamFile = "http://example.com/example.bam"
@@ -694,7 +694,7 @@ def testAddVariantSetWithSameName(self):
                 self._referenceSetName)
         self.runCommand(cmd)
         self.assertRaises(
-            exceptions.DuplicateNameException, self.runCommand, cmd)
+            exceptions.RepoManagerException, self.runCommand, cmd)
         # Specified name
         name = "test_vs"
         cmd = (
@@ -704,7 +704,7 @@ def testAddVariantSetWithSameName(self):
             self._referenceSetName, name)
         self.runCommand(cmd)
         self.assertRaises(
-            exceptions.DuplicateNameException, self.runCommand, cmd)
+            exceptions.RepoManagerException, self.runCommand, cmd)
 
     def testUrlWithMissingIndex(self):
         dataFile = "http://example.com/example.vcf.gz"