From eeb71efc0b06132bb49deec076718fb354ede2f3 Mon Sep 17 00:00:00 2001 From: George Powley Date: Thu, 21 Dec 2023 10:46:57 -0500 Subject: [PATCH] Handle resume ingestion for annotation VCFs (#643) --- libtiledbvcf/src/vcf/vcf_utils.cc | 8 +++++++- libtiledbvcf/src/write/writer.cc | 8 ++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/libtiledbvcf/src/vcf/vcf_utils.cc b/libtiledbvcf/src/vcf/vcf_utils.cc index 86fb727a0..b4a2e8ccb 100644 --- a/libtiledbvcf/src/vcf/vcf_utils.cc +++ b/libtiledbvcf/src/vcf/vcf_utils.cc @@ -59,7 +59,13 @@ bcf_hdr_t* VCFUtils::hdr_read_header(const std::string& path) { std::vector VCFUtils::get_sample_name_from_vcf( const std::string& path) { SafeBCFHdr hdr(hdr_read_header(path), bcf_hdr_destroy); - return hdr_get_samples(hdr.get()); + auto samples = hdr_get_samples(hdr.get()); + // If there are no samples, add an empty string to the list + // to indicate this is a sampleless VCF. + if (samples.empty()) { + samples.push_back(""); + } + return samples; } std::vector VCFUtils::hdr_get_samples(bcf_hdr_t* hdr) { diff --git a/libtiledbvcf/src/write/writer.cc b/libtiledbvcf/src/write/writer.cc index 5d9f7235e..dd37c803d 100644 --- a/libtiledbvcf/src/write/writer.cc +++ b/libtiledbvcf/src/write/writer.cc @@ -774,9 +774,9 @@ std::pair Writer::ingest_samples_v4( if (params.resume_sample_partial_ingestion && !existing_sample_contig_fragments.empty()) { const std::string first_sample_name = - VCFUtils::get_sample_name_from_vcf(samples.front().sample_uri)[0]; + VCFUtils::get_sample_name_from_vcf(samples.front().sample_uri).at(0); const std::string last_sample_name = - VCFUtils::get_sample_name_from_vcf(samples.back().sample_uri)[0]; + VCFUtils::get_sample_name_from_vcf(samples.back().sample_uri).at(0); LOG_INFO("Resume: checking for regions to skip"); LOG_DEBUG("Resume: regions before resume check = {}", regions_v4.size()); @@ -787,7 +787,7 @@ std::pair Writer::ingest_samples_v4( bool skip = false; LOG_DEBUG( - "Resume: Checking sample_range=({}, {}) contig={}", + "Resume: Checking sample_range=('{}', '{}') contig={}", first_sample_name, last_sample_name, contig); @@ -803,7 +803,7 @@ std::pair Writer::ingest_samples_v4( {first_sample_name, last_sample_name}); LOG_DEBUG( - "Resume: found fragments with sample_range=({}, {})", + "Resume: found fragments with sample_range=('{}', '{}')", first_sample_name, last_sample_name); // Loop over contigs for the sample range