Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce latency creating datasets #757

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions libtiledbvcf/src/dataset/tiledbvcfdataset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ void TileDBVCFDataset::create(const CreationParams& params) {
TILEDB_STRING_ASCII,
dataset_type.size(),
dataset_type.c_str());
group.close();

Metadata metadata;
metadata.tile_capacity = params.tile_capacity;
Expand All @@ -245,30 +244,32 @@ void TileDBVCFDataset::create(const CreationParams& params) {
}

// Create arrays and subgroups and add them to the root group
create_empty_metadata(ctx, params.uri, metadata, params.checksum);
create_empty_metadata(ctx, params.uri, metadata, params.checksum, group);
create_empty_data_array(
ctx,
params.uri,
metadata,
params.checksum,
params.allow_duplicates,
params.compress_sample_dim,
params.compression_level);
params.compression_level,
group);

if (params.enable_allele_count) {
AlleleCount::create(ctx, params.uri, params.checksum);
AlleleCount::create(ctx, params.uri, params.checksum, group);
}
if (params.enable_variant_stats) {
VariantStats::set_array_version(params.variant_stats_array_version);
VariantStats::create(ctx, params.uri, params.checksum);
VariantStats::create(ctx, params.uri, params.checksum, group);
}
if (params.enable_sample_stats) {
SampleStats::create(ctx, params.uri, params.checksum);
SampleStats::create(ctx, params.uri, group, params.checksum);
}

write_metadata_v4(ctx, params.uri, metadata);

// Log the group structure
group.close();
group.open(TILEDB_READ);
LOG_DEBUG("TileDB Groups: \n{}", group.dump(true));

Expand Down Expand Up @@ -310,7 +311,8 @@ void TileDBVCFDataset::create_empty_metadata(
const Context& ctx,
const std::string& root_uri,
const Metadata& metadata,
const tiledb_filter_type_t& checksum) {
const tiledb_filter_type_t& checksum,
Group& root_group) {
create_group(ctx, metadata_group_uri(root_uri));
create_sample_header_array(ctx, root_uri, checksum);

Expand All @@ -319,7 +321,6 @@ void TileDBVCFDataset::create_empty_metadata(

// Add arrays to the root group
// We add the vcf_header array to the root group to simplify array opening.
Group root_group(ctx, root_uri, TILEDB_WRITE);
auto array_uri = vcf_headers_uri(root_uri, relative);
LOG_DEBUG(
"Adding array name='{}' uri='{}' to group uri='{}'",
Expand All @@ -346,7 +347,8 @@ void TileDBVCFDataset::create_empty_data_array(
const tiledb_filter_type_t& checksum,
const bool allow_duplicates,
const bool compress_sample_dim,
const int compression_level) {
const int compression_level,
Group& root_group) {
ArraySchema schema(ctx, TILEDB_SPARSE);
schema.set_capacity(metadata.tile_capacity);
schema.set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});
Expand Down Expand Up @@ -464,7 +466,6 @@ void TileDBVCFDataset::create_empty_data_array(
DATA_ARRAY,
array_uri,
root_uri);
Group root_group(ctx, root_uri, TILEDB_WRITE);
root_group.add_member(array_uri, relative, DATA_ARRAY);
}

Expand Down
8 changes: 6 additions & 2 deletions libtiledbvcf/src/dataset/tiledbvcfdataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -854,12 +854,14 @@ class TileDBVCFDataset {
* @param root_uri Root URI of the dataset
* @param metadata General dataset metadata to write
* @param checksum optional checksum filter
* @param group TileDB-VCF group containing the array
*/
static void create_empty_metadata(
const Context& ctx,
const std::string& root_uri,
const Metadata& metadata,
const tiledb_filter_type_t& checksum);
const tiledb_filter_type_t& checksum,
Group& group);

/**
* Creates the empty sample data array for a new dataset.
Expand All @@ -868,6 +870,7 @@ class TileDBVCFDataset {
* @param root_uri Root URI of the dataset
* @param metadata Dataset metadata containing tile capacity etc. to use
* @param checksum optional checksum filter
* @param group TileDB-VCF group containing the array
*/
static void create_empty_data_array(
const Context& ctx,
Expand All @@ -876,7 +879,8 @@ class TileDBVCFDataset {
const tiledb_filter_type_t& checksum,
const bool allow_duplicates,
const bool compress_sample_dim,
const int compression_level);
const int compression_level,
Group& group);

/**
* Creates the empty sample header array for a new dataset.
Expand Down
6 changes: 4 additions & 2 deletions libtiledbvcf/src/stats/allele_count.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ std::string AlleleCount::get_uri(const Group& group) {
}

void AlleleCount::create(
Context& ctx, const std::string& root_uri, tiledb_filter_type_t checksum) {
Context& ctx,
const std::string& root_uri,
tiledb_filter_type_t checksum,
Group& root_group) {
LOG_DEBUG("[AlleleCount] Create array");

// Create filter lists
Expand Down Expand Up @@ -120,7 +123,6 @@ void AlleleCount::create(
auto relative = !utils::starts_with(root_uri, "tiledb://");
auto array_uri = get_uri(root_uri, relative);
LOG_DEBUG("Adding array '{}' to group '{}'", array_uri, root_uri);
Group root_group(ctx, root_uri, TILEDB_WRITE);
root_group.add_member(array_uri, relative, ALLELE_COUNT_ARRAY);
}

Expand Down
6 changes: 5 additions & 1 deletion libtiledbvcf/src/stats/allele_count.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,13 @@ class AlleleCount {
* @param ctx TileDB context
* @param root_uri TileDB-VCF dataset uri
* @param checksum TileDB checksum filter
* @param group TileDB-VCF group containing the array
*/
static void create(
Context& ctx, const std::string& root_uri, tiledb_filter_type_t checksum);
Context& ctx,
const std::string& root_uri,
tiledb_filter_type_t checksum,
Group& group);

/**
* @brief Check if the array exists.
Expand Down
6 changes: 4 additions & 2 deletions libtiledbvcf/src/stats/sample_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ std::string SampleStats::get_uri_(const std::string& root_uri, bool relative) {
}

void SampleStats::create(
Context& ctx, const std::string& root_uri, int compression_level) {
Context& ctx,
const std::string& root_uri,
Group& root_group,
int compression_level) {
// Create filter lists
FilterList int_fl(ctx);
FilterList float_fl(ctx);
Expand Down Expand Up @@ -155,7 +158,6 @@ void SampleStats::create(
auto array_uri = get_uri_(root_uri, relative);
LOG_DEBUG(
"[SampleStats] Adding array '{}' to group '{}'", array_uri, root_uri);
Group root_group(ctx, root_uri, TILEDB_WRITE);
root_group.add_member(array_uri, relative, SAMPLE_STATS_ARRAY);
}

Expand Down
6 changes: 5 additions & 1 deletion libtiledbvcf/src/stats/sample_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,14 @@ class SampleStats {
*
* @param ctx TileDB context
* @param root_uri URI of TileDB-VCF dataset
* @param group TileDB-VCF group containing the array
* @param compression_level zstd compression level
*/
static void create(
Context& ctx, const std::string& root_uri, int compression_level = 9);
Context& ctx,
const std::string& root_uri,
Group& group,
int compression_level = 9);

/**
* @brief Check if the array exists.
Expand Down
6 changes: 4 additions & 2 deletions libtiledbvcf/src/stats/variant_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ std::string VariantStats::get_uri(const Group& group) {
}

void VariantStats::create(
Context& ctx, const std::string& root_uri, tiledb_filter_type_t checksum) {
Context& ctx,
const std::string& root_uri,
tiledb_filter_type_t checksum,
Group& root_group) {
LOG_DEBUG("[VariantStats] Create array");

// Create filter lists
Expand Down Expand Up @@ -167,7 +170,6 @@ void VariantStats::create(
auto relative = !utils::starts_with(root_uri, "tiledb://");
auto array_uri = get_uri(root_uri, relative);
LOG_DEBUG("Adding array '{}' to group '{}'", array_uri, root_uri);
Group root_group(ctx, root_uri, TILEDB_WRITE);
root_group.add_member(array_uri, relative, VARIANT_STATS_ARRAY);
}

Expand Down
6 changes: 5 additions & 1 deletion libtiledbvcf/src/stats/variant_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,13 @@ class VariantStats {
*
* @param group TileDB-VCF dataset group
* @param checksum TileDB checksum filter
* @param group TileDB-VCF group containing the array
*/
static void create(
Context& ctx, const std::string& root_uri, tiledb_filter_type_t checksum);
Context& ctx,
const std::string& root_uri,
tiledb_filter_type_t checksum,
Group& group);

/**
* @brief Check if the array exists.
Expand Down
Loading