Skip to content

Commit

Permalink
Update ontology term (ga4gh#1523)
Browse files Browse the repository at this point in the history
* Add new simplified ontology term

* Refactor usage of ontology term

* Fix g2p errors

* Use updated client

* Update build test data

* Remove species fields

* Remove remaining sourcename from tests
  • Loading branch information
david4096 authored Feb 2, 2017
1 parent b2549ef commit 8ff764f
Show file tree
Hide file tree
Showing 13 changed files with 45 additions and 86 deletions.
2 changes: 1 addition & 1 deletion docs/datarepo.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ using command line options.
$ ga4gh_repo add-referenceset registry.db hs37d5.fa.gz \
--description "NCBI37 assembly of the human genome" \
--species '{"id": "9606", "term": "Homo sapiens", "sourceName": "NCBI", "sourceVersion: "1.0"}' \
--species '{"termId": "NCBI:9606", "term": "Homo sapiens"}' \
--name NCBI37 \
--sourceUri ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
Expand Down
23 changes: 6 additions & 17 deletions ga4gh/server/datamodel/genotype_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,12 @@ def _formatOntologyTermObject(self, terms, element_type):
terms = [terms]
elements = []
for term in terms:
if not issubclass(term.__class__, dict):
term = protocol.toJsonDict(term)
if term['id']:
if term.term_id:
elements.append('?{} = <{}> '.format(
element_type, term['id']))
element_type, term.term_id))
else:
elements.append('?{} = <{}> '.format(
element_type, self._toNamespaceURL(term['term'])))
element_type, self._toNamespaceURL(term.term)))
elementClause = "({})".format(" || ".join(elements))
return elementClause

Expand Down Expand Up @@ -403,10 +401,7 @@ def _toGA4GH(self, association, featureSets=[]):

term = protocol.OntologyTerm()
term.term = association['evidence_type']
term.id = phenotype['id']
term.source_version = self._version
term.source_name = self._getPrefix(
self._getPrefixURL(association['id']))
term.term_id = phenotype['id']
evidence.evidence_type.MergeFrom(term)

evidence.description = self._getIdentifier(association['evidence'])
Expand All @@ -424,10 +419,7 @@ def _toGA4GH(self, association, featureSets=[]):

term = protocol.OntologyTerm()
term.term = environment['id']
term.id = 'http://purl.obolibrary.org/obo/RO_0002606'
term.source_version = self._version
term.source_name = self._getPrefix(
self._getPrefixURL(association['id']))
term.term_id = 'http://purl.obolibrary.org/obo/RO_0002606'
environmentalContext.environment_type.MergeFrom(term)

fpa.environmental_contexts.extend([environmentalContext])
Expand All @@ -436,10 +428,7 @@ def _toGA4GH(self, association, featureSets=[]):
phenotypeInstance = protocol.PhenotypeInstance()
term = protocol.OntologyTerm()
term.term = phenotype[TYPE]
term.id = phenotype['id']
term.source_version = self._version
term.source_name = self._getPrefix(
self._getPrefixURL(association['id']))
term.term_id = phenotype['id']
phenotypeInstance.type.MergeFrom(term)

phenotypeInstance.description = phenotype[LABEL]
Expand Down
2 changes: 1 addition & 1 deletion ga4gh/server/datamodel/genotype_phenotype_featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _getFeatureById(self, featureId):
for featureType in sorted(feature[TYPE]):
if "obolibrary" in featureType:
term.term = self._featureTypeLabel(featureType)
term.id = featureType
term.term_id = featureType
pbFeature.feature_type.MergeFrom(term)
break

Expand Down
6 changes: 1 addition & 5 deletions ga4gh/server/datamodel/ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
import ga4gh.server.exceptions as exceptions
import ga4gh.server.datamodel.obo_parser as obo_parser

import ga4gh.schemas.pb as pb


SEQUENCE_ONTOLOGY_PREFIX = "SO"

Expand Down Expand Up @@ -137,7 +135,5 @@ def getGaTermByName(self, name):
termId = termIds[0]
term = protocol.OntologyTerm()
term.term = name
term.id = termId
term.source_name = self._sourceName
term.source_version = pb.string(self._sourceVersion)
term.term_id = termId
return term
14 changes: 4 additions & 10 deletions ga4gh/server/datamodel/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,8 @@ def toProtocolElement(self):
if self.getSpecies():
term = protocol.fromJson(
json.dumps(self.getSpecies()), protocol.OntologyTerm)
ret.species.id = term.id
ret.species.term_id = term.term_id
ret.species.term = term.term
ret.species.source_name = term.source_name
ret.species.source_version = term.source_version
ret.source_accessions.extend(self.getSourceAccessions())
ret.source_uri = pb.string(self.getSourceUri())
ret.name = self.getLocalId()
Expand Down Expand Up @@ -355,10 +353,8 @@ def toProtocolElement(self):
if self.getSpecies():
term = protocol.fromJson(
json.dumps(self.getSpecies()), protocol.OntologyTerm)
reference.species.id = term.id
reference.species.term_id = term.term_id
reference.species.term = term.term
reference.species.source_name = term.source_name
reference.species.source_version = term.source_version
reference.source_accessions.extend(self.getSourceAccessions())
reference.source_divergence = pb.int(self.getSourceDivergence())
reference.source_uri = self.getSourceUri()
Expand Down Expand Up @@ -403,8 +399,7 @@ def __init__(self, localId, randomSeed=0, numReferences=1):
self._assemblyId = str(random.randint(0, 2**32))
self._isDerived = bool(random.randint(0, 1))
self._species = json.loads(
'{"sourceName": "NCBI", "sourceVersion": "",'
+ '"term": "Homo sapiens", "id": "9606"}')
'{"term": "Homo sapiens", "termId": "9606"}')
self._sourceAccessions = []
for i in range(random.randint(1, 3)):
self._sourceAccessions.append("sim_accession_{}".format(
Expand Down Expand Up @@ -437,8 +432,7 @@ def __init__(self, parentContainer, localId, randomSeed=0, length=200):
if self._isDerived:
self._sourceDivergence = rng.uniform(0, 0.1)
self._species = json.loads(
'{"sourceName": "NCBI", "sourceVersion": "",'
+ '"term": "Homo sapiens", "id": "9606"}')
'{"term": "Homo sapiens", "termId": "9606"}')
self._sourceAccessions = []
for i in range(random.randint(1, 3)):
self._sourceAccessions.append("sim_accession_{}".format(
Expand Down
4 changes: 1 addition & 3 deletions ga4gh/server/datamodel/sequence_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,7 @@ def _getRandomfeatureType(self, randomNumberGenerator):
("exon", "SO:0000147")]
term = protocol.OntologyTerm()
ontologyTuple = randomNumberGenerator.choice(ontologyTuples)
term.term, term.id = ontologyTuple[0], ontologyTuple[1]
term.source_name = "sequenceOntology"
term.source_version = "0"
term.term, term.term_id = ontologyTuple[0], ontologyTuple[1]
return term

def _generateSimulatedFeature(self, randomNumberGenerator):
Expand Down
4 changes: 1 addition & 3 deletions ga4gh/server/datamodel/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,9 +993,7 @@ def _getRandomOntologyTerm(self, randomNumberGenerator):
("exon_variant", "SO:0001791")]
term = protocol.OntologyTerm()
ontologyTuple = randomNumberGenerator.choice(ontologyTuples)
term.term, term.id = ontologyTuple[0], ontologyTuple[1]
term.source_name = "ontology"
term.source_version = "0"
term.term, term.term_id = ontologyTuple[0], ontologyTuple[1]
return term

def _addTranscriptEffectOntologyTerm(self, effect, randomNumberGenerator):
Expand Down
4 changes: 2 additions & 2 deletions ga4gh/server/paging.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,10 @@ def _checkIdEquality(self, requestedEffect, effect):
present in an annotation are equal.
"""
return self._idPresent(requestedEffect) and (
effect.id == requestedEffect.id)
effect.term_id == requestedEffect.term_id)

def _idPresent(self, requestedEffect):
return requestedEffect.id != ""
return requestedEffect.term_id != ""

def _matchAnyEffects(self, effect):
ret = False
Expand Down
4 changes: 2 additions & 2 deletions scripts/build_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def buildTestData(
pattern = os.path.join(prefix, "referenceSets", "*.fa.gz")
for dataFile in glob.glob(pattern):
run("add-referenceset", repoFile, useRelativePath, dataFile,
"--species ", '\'{"id": "9606", "term": "Homo sapiens", '
'"sourceName": "NCBI", "sourceVersion": "1.0"}\'')
"--species ", '\'{"termId": "NCBI:9606", '
'"term": "Homo sapiens"}\'')

pattern = os.path.join(prefix, "ontologies", "*.obo")
for dataFile in glob.glob(pattern):
Expand Down
5 changes: 1 addition & 4 deletions tests/datadriven/test_ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,7 @@ def testGoodMappings(self):
self.assertTrue(protocol.validate(protocol.toJson(gaTerm),
OntologyTerm))
self.assertEqual(gaTerm.term, term.name)
self.assertIn(gaTerm.id, ontology.getTermIds(term.name))
self.assertEqual(
gaTerm.source_version, ontology.getSourceVersion())
self.assertEqual(gaTerm.source_name, ontology.getName())
self.assertIn(gaTerm.term_id, ontology.getTermIds(term.name))

def testBadMappings(self):
for badName in ["Not a term", None, 1234]:
Expand Down
15 changes: 8 additions & 7 deletions tests/end_to_end/test_g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def testFeaturesSearchById(self):
self.assertIsNotNone(feature)
self.assertEqual(request.feature_id, feature.id)
self.assertIsNotNone(feature.feature_type)
self.assertIsNotNone(feature.feature_type.id)
self.assertIsNotNone(feature.feature_type.term_id)
self.assertEqual(feature.reference_name, "chr10")
self.assertEqual(feature.start, 43617416)
self.assertEqual(feature.end, 43617416)
Expand Down Expand Up @@ -258,7 +258,7 @@ def testPhenotypesSearchOntologyTerm(self):
request = protocol.SearchPhenotypesRequest()
request.phenotype_association_set_id = \
self.getPhenotypeAssociationSetId()
request.type.id = "http://ohsu.edu/cgd/5c895709"
request.type.term_id = "http://ohsu.edu/cgd/5c895709"
postUrl = '/phenotypes/search'
response = self.sendSearchRequest(
postUrl,
Expand All @@ -271,7 +271,7 @@ def testPhenotypeSearchQualifiersSensitivity(self):
request.phenotype_association_set_id = \
self.getPhenotypeAssociationSetId()
ontologyterm = protocol.OntologyTerm()
ontologyterm.id = "http://ohsu.edu/cgd/sensitivity"
ontologyterm.term_id = "http://ohsu.edu/cgd/sensitivity"
request.qualifiers.extend([ontologyterm])
postUrl = '/phenotypes/search'
response = self.sendSearchRequest(
Expand All @@ -285,7 +285,7 @@ def testPhenotypeSearchQualifiersSensitivityPATO_0000396(self):
request.phenotype_association_set_id = \
self.getPhenotypeAssociationSetId()
ontologyterm = protocol.OntologyTerm()
ontologyterm.id = "http://purl.obolibrary.org/obo/PATO_0000396"
ontologyterm.term_id = "http://purl.obolibrary.org/obo/PATO_0000396"
request.qualifiers.extend([ontologyterm])
postUrl = '/phenotypes/search'
response = self.sendSearchRequest(
Expand All @@ -299,9 +299,9 @@ def testPhenotypeSearchMultipleQualifiers(self):
request.phenotype_association_set_id = \
self.getPhenotypeAssociationSetId()
ontologyterm = protocol.OntologyTerm()
ontologyterm.id = "http://purl.obolibrary.org/obo/PATO_0000396"
ontologyterm.term_id = "http://purl.obolibrary.org/obo/PATO_0000396"
ontologyterm2 = protocol.OntologyTerm()
ontologyterm2.id = "http://purl.obolibrary.org/obo/PATO_0000460"
ontologyterm2.term_id = "http://purl.obolibrary.org/obo/PATO_0000460"
request.qualifiers.extend([ontologyterm, ontologyterm2])
postUrl = '/phenotypes/search'
response = self.sendSearchRequest(
Expand Down Expand Up @@ -341,7 +341,8 @@ def testPhenotypesSearchMultipleTerms(self):
request.phenotype_association_set_id = \
self.getPhenotypeAssociationSetId()
request.description = "Melanoma, NOS with response to therapy"
request.age_of_onset.id = "http://purl.obolibrary.org/obo/HP_0003581"
request.age_of_onset.term_id = \
"http://purl.obolibrary.org/obo/HP_0003581"
postUrl = '/phenotypes/search'
response = self.sendSearchRequest(
postUrl,
Expand Down
6 changes: 1 addition & 5 deletions tests/unit/test_bio_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
import ga4gh.server.datamodel.bio_metadata as bioMetadata
import ga4gh.server.protocol as protocol

import ga4gh.schemas.pb as pb


class TestIndividuals(unittest.TestCase):
"""
Expand All @@ -23,9 +21,7 @@ def testToProtocolElement(self):
dataset = datasets.Dataset('dataset1')
term = protocol.OntologyTerm()
term.term = "male genotypic sex"
term.id = "PATO:0020001"
term.source_name = "PATO"
term.source_version = pb.string("2015-11-18")
term.term_id = "PATO:0020001"
# Write out a valid input
print(protocol.toJsonDict(term))
validIndividual = protocol.Individual(
Expand Down
42 changes: 16 additions & 26 deletions tests/unit/test_simulated_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,9 @@ def verifyReferenceSetsEqual(self, gaReferenceSet, referenceSet):
sp = protocol.fromJson(
json.dumps(referenceSet.getSpecies()), protocol.OntologyTerm)
self.assertEqual(
gaReferenceSet.species.id, sp.id)
gaReferenceSet.species.term_id, sp.term_id)
self.assertEqual(
gaReferenceSet.species.term, sp.term)
self.assertEqual(
gaReferenceSet.species.source_name, sp.source_name)
self.assertEqual(
gaReferenceSet.species.source_version, sp.source_version)
self.assertEqual(
gaReferenceSet.assembly_id, referenceSet.getAssemblyId())
self.assertEqual(
Expand Down Expand Up @@ -204,14 +200,8 @@ def verifyReferencesEqual(self, gaReference, reference):
self.assertEqual(gaReference.md5checksum, reference.getMd5Checksum())
sp = protocol.fromJson(
json.dumps(reference.getSpecies()), protocol.OntologyTerm)
self.assertEqual(gaReference.species.id, sp.id)
self.assertEqual(gaReference.species.term_id, sp.term_id)
self.assertEqual(gaReference.species.term, sp.term)
self.assertEqual(
gaReference.species.source_name,
sp.source_name)
self.assertEqual(
gaReference.species.source_version,
sp.source_version)
self.assertEqual(gaReference.source_uri, reference.getSourceUri())
self.assertEqual(
gaReference.source_accessions, reference.getSourceAccessions())
Expand Down Expand Up @@ -651,7 +641,7 @@ def testVariantAnnotationsSearch(self):
request.end = 10
request.reference_name = "1"

request.effects.add().id = "ThisIsNotAnEffect"
request.effects.add().term_id = "ThisIsNotAnEffect"

response = self.sendJsonPostRequest(path, protocol.toJson(request))
responseData = protocol.fromJson(response.data, protocol.
Expand Down Expand Up @@ -680,8 +670,8 @@ def testVariantAnnotationsSearch(self):
request.start = 0
request.end = 5
request.reference_name = "1"
request.effects.add().id = "SO:0001627"
request.effects.add().id = "B4DID"
request.effects.add().term_id = "SO:0001627"
request.effects.add().term_id = "B4DID"
response = self.sendJsonPostRequest(path, protocol.toJson(request))
responseData = protocol.fromJson(response.data, protocol.
SearchVariantAnnotationsResponse)
Expand All @@ -693,8 +683,8 @@ def testVariantAnnotationsSearch(self):
effectPresent = False
for effect in ann.transcript_effects:
for featureType in effect.effects:
if featureType.id in map(
lambda e: e.id, request.effects):
if featureType.term_id in map(
lambda e: e.term_id, request.effects):
effectPresent = True
self.assertEquals(
True, effectPresent,
Expand All @@ -705,8 +695,8 @@ def testVariantAnnotationsSearch(self):
request.start = 0
request.end = 5
request.reference_name = "1"
request.effects.add().id = "B4DID"
request.effects.add().id = "SO:0001627"
request.effects.add().term_id = "B4DID"
request.effects.add().term_id = "SO:0001627"
response = self.sendJsonPostRequest(path, protocol.toJson(request))
responseData = protocol.fromJson(response.data, protocol.
SearchVariantAnnotationsResponse)
Expand All @@ -718,8 +708,8 @@ def testVariantAnnotationsSearch(self):
effectPresent = False
for effect in ann.transcript_effects:
for featureType in effect.effects:
if featureType.id in map(
lambda e: e.id, request.effects):
if featureType.term_id in map(
lambda e: e.term_id, request.effects):
effectPresent = True
self.assertEquals(
True,
Expand All @@ -731,7 +721,7 @@ def testVariantAnnotationsSearch(self):
request.start = 0
request.end = 5
request.reference_name = "1"
request.effects.add().id = "SO:0001627"
request.effects.add().term_id = "SO:0001627"
response = self.sendJsonPostRequest(path, protocol.toJson(request))
responseData = protocol.fromJson(response.data, protocol.
SearchVariantAnnotationsResponse)
Expand All @@ -744,8 +734,8 @@ def testVariantAnnotationsSearch(self):
"Transcript effects should be unique")
for effect in ann.transcript_effects:
for featureType in effect.effects:
if featureType.id in map(
lambda e: e.id, request.effects):
if featureType.term_id in map(
lambda e: e.term_id, request.effects):
effectPresent = True
self.assertEquals(True, effectPresent,
"The ontology term should appear at least once")
Expand All @@ -755,8 +745,8 @@ def testVariantAnnotationsSearch(self):
request.start = 0
request.end = 10
request.reference_name = "1"
request.effects.add().id = "SO:0001627"
request.effects.add().id = "SO:0001791"
request.effects.add().term_id = "SO:0001627"
request.effects.add().term_id = "SO:0001791"
response = self.sendJsonPostRequest(path, protocol.toJson(request))
responseData = protocol.fromJson(response.data, protocol.
SearchVariantAnnotationsResponse)
Expand Down

0 comments on commit 8ff764f

Please sign in to comment.