Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
76ce434
started work on providing better tracing of 'bad' ontology terms. Cur…
bobular Sep 25, 2019
f99bf8f
renamed to getDebugContext, improved usage and stopped null terms bei…
bobular Sep 26, 2019
a857f06
proper checking of empty ontology terms
bobular Sep 26, 2019
904f4df
skipping empty lines to avoid '_'=>undef in data structures
bobular Sep 26, 2019
e9f6081
added new DataEntity class so that Assays can have characteristics an…
bobular Oct 1, 2019
a5a0ac3
groundwork laid for loading p_phenotype and g_genotype files
bobular Oct 1, 2019
2cdf1bd
now reads in ISA-like popbio Phenotype data from p_*.txt data files
bobular Oct 1, 2019
00e0367
added new classes to handle VB popbio genotype sheets
bobular Oct 2, 2019
5e52b3e
added debugContext for ontology terms created from study-assay files
bobular Oct 2, 2019
3f1b3e1
Implemented the parsing of ontology source IDs from Characteristics h…
bobular Oct 10, 2019
9b4edb1
added handling for custom STUDY TAGS section
bobular Oct 15, 2019
5962b04
characteristics qualifier syntax to allow OBI:OBI_0012345 or SO:SO:01…
bobular Oct 22, 2019
c0db4d6
renamed Value attribute to PhenotypeValue to avoid method name clash …
bobular Nov 7, 2019
c50d312
Comment headings can now have optional white space between 'comment' …
bobular Nov 11, 2019
2fc8659
disabled the debug-context code
bobular Dec 5, 2019
d794c2d
fixed #38395 to allow zero values to be loaded from ISA-Tab
bobular Dec 5, 2019
1d87295
was missing parent child relationship between Phenotype, Genotype and…
bobular Dec 17, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ISA/lib/perl/Commentable.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ sub new {
my $commentValues = $args->{$ck};
next unless $commentValues;

$ck =~ m/$commentString\[(.+)\]/;
$ck =~ m/${commentString}_?\[(.+)\]/;
my $comment = CBIL::ISA::StudyAssayEntity::Comment->new({'_qualifier' => $1, '_value' => $commentValues});
push @{$obj->{_comments}}, $comment;
}
Expand Down
24 changes: 22 additions & 2 deletions ISA/lib/perl/Investigation.pm
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ my $STUDY_PROTOCOLS = "STUDY PROTOCOLS";
my $STUDY_FACTORS = "STUDY FACTORS";
my $STUDY_PUBLICATIONS = "STUDY PUBLICATIONS";
my $STUDY_ASSAYS = "STUDY ASSAYS";
my $STUDY_TAGS = "STUDY TAGS"; # extension introduced by VectorBase


sub setDebug {$_[0]->{_debug} = $_[1]}
Expand Down Expand Up @@ -110,6 +111,9 @@ sub makeStudy {
$study->setProtocols($hash->{$STUDY_PROTOCOLS},
$columnCounts->{$STUDY_PROTOCOLS});

$study->setTags($hash->{$STUDY_TAGS},
$columnCounts->{$STUDY_TAGS});

return $study;
}

Expand Down Expand Up @@ -186,6 +190,22 @@ sub parseStudy {
$assayFileReader->closeFh();
}

foreach my $file (@{$study->getAssayDataFiles()}) {
# only process phenotype (p_*) and genotype (g_*) ISA-like files
if ($file->getValue() =~ /^(?:p_|g_)/) {
my $assayDataFileName = $investigationDirectory . "/" . $file->getValue();
warn "About to load $assayDataFileName...\n";

my $assayDataFileReader = CBIL::ISA::StudyAssayFileReader->new($assayDataFileName, $delimiter);

while($assayDataFileReader->hasNextLine()) {
my $dataObjects = $assayDataFileReader->readLineToObjects();
$study->addNodesAndEdges($dataObjects, $assayDataFileName);
}
$assayDataFileReader->closeFh();
}
}

$study->setHasMoreData(0);
}

Expand Down Expand Up @@ -251,8 +271,8 @@ sub dealWithAllOntologies {
my $term = $ontologyTerm->getTerm();


unless(($accession && $source) || blessed($ontologyTerm) eq 'CBIL::ISA::StudyAssayEntity::Characteristic' || blessed($ontologyTerm) eq 'CBIL::ISA::StudyAssayEntity::ParameterValue') {
$self->handleError("OntologyTerm $term is required to have accession and source.");
unless(($accession && $source) or not $ontologyTerm->requiresAccessionedTerm()) {
$self->handleError("OntologyTerm '$term' (context: ".$ontologyTerm->getDebugContext().") is required to have accession and source.");
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions ISA/lib/perl/InvestigationFileReader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ my @CONTEXTS = ("ONTOLOGY SOURCE REFERENCE",
"STUDY ASSAYS",
"STUDY PROTOCOLS",
"STUDY CONTACTS",
"STUDY TAGS", # extension by VectorBase
);

sub new {
Expand Down Expand Up @@ -63,6 +64,9 @@ sub read {
# split and remove leading and trailing quotes
my @a = map { s/^"(.*)"$/$1/; $_; } split($delimiter, $_);

# skip completely empty lines
next unless (length(join '', @a)>0);

$studyIdentifier = $a[1] if(uc $a[0] eq 'STUDY IDENTIFIER');
if(&isContextSwitch($a[0])) {
$lineContext = uc $a[0];
Expand Down
23 changes: 23 additions & 0 deletions ISA/lib/perl/OntologyTerm.pm
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,25 @@ sub getTermAccessionNumber { $_[0]->{_term_accession_number} }
sub setTermSourceRef { $_[0]->{_term_source_ref} = $_[1] }
sub getTermSourceRef { $_[0]->{_term_source_ref} }

=head2 getDebugContext setDebugContext

Getter/setter for a string that gives an idea where the ontology term was attempted to be loaded from.
This can help debugging poorly constructed ISA-Tab.

=cut

sub setDebugContext { $_[0]->{_debug_context} = $_[1] }
sub getDebugContext { $_[0]->{_debug_context} }

sub new {
my ($class, $args) = @_;

my $seenTerm;

my $self = bless {}, $class;

$self->setDebugContext(delete $args->{_debug_context}) if ($args->{_debug_context});

foreach my $key (keys %$args) {
if($key =~ /term_source_ref/) {
$self->setTermSourceRef($args->{$key});
Expand Down Expand Up @@ -58,4 +70,15 @@ sub getValue { $_[0]->getTerm() }
sub setValue { $_[0]->setTerm($_[1]) }


#
# if this column must contain a fully specified ontology term
# then return true
#
# e.g. Characteristics and Parameter Value columns will override this to return false,
# because they can contain free text and/or numbers+units
#
sub requiresAccessionedTerm {
return 1;
}

1;
68 changes: 59 additions & 9 deletions ISA/lib/perl/Study.pm
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use CBIL::ISA::Publication;
use CBIL::ISA::Contact;
use CBIL::ISA::OntologyTerm;
use CBIL::ISA::StudyDesign;
use CBIL::ISA::Tag;
use CBIL::ISA::StudyFactor;
use CBIL::ISA::StudyAssay;
use CBIL::ISA::Protocol;
Expand Down Expand Up @@ -91,7 +92,21 @@ sub setStudyDesigns {
return $self->getStudyDesigns();
}
sub getStudyDesigns { $_[0]->{_study_designs} || [] }
sub addStudyDesign { push @{$_[0]->{_study_design}}, $_[1] }
sub addStudyDesign { push @{$_[0]->{_study_designs}}, $_[1] }


sub setTags {
my ($self, $hash, $columnCount) = @_;

my $otRegexs = ["_tag"];
my $otIsList = [ 0 ];
$self->{_tags} = $self->makeStudyObjectsFromHash($hash, $columnCount, "CBIL::ISA::Tag", $otRegexs, $otIsList);

return $self->getTags();
}
sub getTags { $_[0]->{_tags} || [] }
sub addTag { push @{$_[0]->{_tags}}, $_[1] }


sub setStudyFactors {
my ($self, $hash, $columnCount) = @_;
Expand Down Expand Up @@ -169,6 +184,26 @@ sub addEdge {
}
sub getEdges { $_[0]->{_edges} or [] }

#
# used to remember which File entities, such as "Raw Data File"
# have been encountered while parsing assay files
#
sub getAssayDataFiles { $_[0]->{_assay_data_files} or [] }
sub addAssayDataFile {
my ($self, $fileObj) = @_;

# check it's not already there
# can't use "$obj->equals($other)" because attributes
# are not nodes and are "never equal"
# so we just use the value (the filename)
foreach (@{$self->getAssayDataFiles()}) {
return $_ if ($fileObj->getValue() eq $_->getValue());
}
push @{$self->{_assay_data_files}}, $fileObj;
return $fileObj;
}



# Handle a chunk of the Investigation File
# Each section is made into an object
Expand Down Expand Up @@ -210,7 +245,6 @@ sub makeStudyObjectsFromHash {
my $setterName = $setOrAdd . join("", map { ucfirst } split("_", $otRegex));

my %initOtHash = map { $_ => $hash->{$_}->[$i] } @{$otKeys{$otRegex}};

my %otHash;

if($otIsList) {
Expand All @@ -226,13 +260,17 @@ sub makeStudyObjectsFromHash {
}

foreach my $n (keys %otHash) {
my $ontologyTerm = CBIL::ISA::OntologyTerm->new($otHash{$n});
eval {
$obj->$setterName($ontologyTerm);

};
if ($@) {
die "Unable to $setterName for class $class: $@";
my $otHash = $otHash{$n};
# don't create and attach a term if all values (name, accession, source) are empty.
if (length(join '', values %$otHash)>0) {
my $ontologyTerm = CBIL::ISA::OntologyTerm->new($otHash);
# $ontologyTerm->setDebugContext(join ', ', map { "$_=$hash{$_}" } keys %hash);
eval {
$obj->$setterName($ontologyTerm);
};
if ($@) {
die "Unable to $setterName for class $class: $@";
}
}
}
}
Expand Down Expand Up @@ -279,6 +317,10 @@ sub addNodesAndEdges {

$start = 1;
$wasNodeContext = $entity->isNode();

# now keep a track of all File entities
# for later processing by PopBio Genotype/Phenotype handler
map { $self->addAssayDataFile($_) } @{$entity->getFiles()} if ($entity->can('getFiles'));
}
}

Expand Down Expand Up @@ -374,6 +416,14 @@ inherited from L<CBIL::ISA::Commentable>

@return array of L<CBIL::ISA::StudyDesign>

=item C<addTag>

@param L<CBIL::ISA::Tag>

=item C<getTags>

@return array of L<CBIL::ISA::Tag>

=item C<addStudyFactor>

@param L<CBIL::ISA::StudyFactors>
Expand Down
18 changes: 17 additions & 1 deletion ISA/lib/perl/StudyAssayEntity/Assay.pm
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
package CBIL::ISA::StudyAssayEntity::Assay;
use base qw(CBIL::ISA::StudyAssayEntity::Fileable);
use base qw(CBIL::ISA::StudyAssayEntity::DataEntity CBIL::ISA::StudyAssayEntity::Fileable);

use strict;


sub getAttributeNames {
my ($self) = @_;

my @attributeQualifiers = ();

my $attrs1 = $self->CBIL::ISA::StudyAssayEntity::DataEntity::getAttributeNames();
my $attrs2 = $self->CBIL::ISA::StudyAssayEntity::Fileable::getAttributeNames();
my $attrs = [ @$attrs1, @$attrs2 ];

push @{$attrs}, @attributeQualifiers;

return $attrs;
}


1;
10 changes: 10 additions & 0 deletions ISA/lib/perl/StudyAssayEntity/Attribute.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package CBIL::ISA::StudyAssayEntity::Attribute;
use base qw(CBIL::ISA::OntologyTerm);

use strict;

sub getParents {
return [ "Phenotype" ];
}

1;
6 changes: 5 additions & 1 deletion ISA/lib/perl/StudyAssayEntity/Characteristic.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use base qw(CBIL::ISA::OntologyTermWithQualifier);
use strict;

sub getParents {
return ["Source", "Sample", "Extract", "LabeledExtract" ];
return ["Source", "Sample", "Extract", "LabeledExtract", "Assay", "Phenotype", "Genotype" ];
}

# @override
Expand Down Expand Up @@ -52,4 +52,8 @@ sub qualifierContextMethod {
return "addCharacteristic";
}

sub requiresAccessionedTerm {
return 0;
}

1;
32 changes: 32 additions & 0 deletions ISA/lib/perl/StudyAssayEntity/DataEntity.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package CBIL::ISA::StudyAssayEntity::DataEntity;
use base qw(CBIL::ISA::StudyAssayEntity::Fileable);

=head1

This class has been added to allow Description and Characteristics
attributes (as used in VectorBase PopBio) to be attached to "Assay",
"Phenotype" and "Genotype" entities.

=cut

use strict;

sub setDescription { $_[0]->{_description} = $_[1] }
sub getDescription { $_[0]->{_description} }

sub addCharacteristic { push @{$_[0]->{_characteristics}}, $_[1] }
sub getCharacteristics { $_[0]->{_characteristics} || [] }

sub getAttributeNames {
my ($self) = @_;

my @attributeQualifiers = ("Characteristic", "Description");

my $attrs = $self->SUPER::getAttributeNames();

push @{$attrs}, @attributeQualifiers;

return $attrs;
}

1;
2 changes: 1 addition & 1 deletion ISA/lib/perl/StudyAssayEntity/Description.pm
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ sub getAttributeNames {
}

sub getParents {
return ["Source", "Sample", "Extract", "LabeledExtract"];
return ["Source", "Sample", "Extract", "LabeledExtract", "Assay"];
}


Expand Down
22 changes: 22 additions & 0 deletions ISA/lib/perl/StudyAssayEntity/Genotype.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package CBIL::ISA::StudyAssayEntity::Genotype;
use base qw(CBIL::ISA::StudyAssayEntity::DataEntity);

use strict;

sub setType { $_[0]->{_genotype_type} = $_[1] }
sub getType { $_[0]->{_genotype_type} }

sub getAttributeNames {
my ($self) = @_;

my @attributeQualifiers = ("Type");

my $attrs = $self->SUPER::getAttributeNames();

push @{$attrs}, @attributeQualifiers;

return $attrs;
}


1;
10 changes: 10 additions & 0 deletions ISA/lib/perl/StudyAssayEntity/Observable.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package CBIL::ISA::StudyAssayEntity::Observable;
use base qw(CBIL::ISA::OntologyTerm);

use strict;

sub getParents {
return [ "Phenotype" ];
}

1;
4 changes: 4 additions & 0 deletions ISA/lib/perl/StudyAssayEntity/ParameterValue.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,8 @@ sub qualifierContextMethod {
return "addParameterValue";
}

sub requiresAccessionedTerm {
return 0;
}

1;
Loading