diff --git a/apis/spark/build.gradle b/apis/spark/build.gradle index a30d198a0..f8d755c60 100644 --- a/apis/spark/build.gradle +++ b/apis/spark/build.gradle @@ -53,6 +53,7 @@ repositories { dependencies { compileOnly 'org.apache.spark:spark-sql_2.12:2.4.3' compileOnly 'org.apache.spark:spark-core_2.12:2.4.3' + implementation 'io.tiledb:tiledb-java:0.24.0' compile group: 'io.tiledb', name: 'tiledb-vcf-java', version: version compile 'com.amazonaws:aws-java-sdk:1.11.650' diff --git a/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java b/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java index afbefcae5..7624617bb 100644 --- a/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java +++ b/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java @@ -76,6 +76,14 @@ public Optional getBedURI() { return Optional.empty(); } + /** @return Optional uri of BED array */ + public Optional getBedArrayURI() { + if (options.containsKey("bed_array")) { + return Optional.of(URI.create(options.get("bed_array"))); + } + return Optional.empty(); + } + /** @return Optional uri of SampleFile file */ public Optional getSampleURI() { if (options.containsKey("samplefile")) { diff --git a/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceReader.java b/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceReader.java index 1580f5b44..322035236 100644 --- a/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceReader.java +++ b/apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceReader.java @@ -1,9 +1,11 @@ package io.tiledb.vcf; +import io.tiledb.java.api.*; import io.tiledb.libvcfnative.VCFBedFile; import io.tiledb.libvcfnative.VCFReader; import io.tiledb.util.CredentialProviderUtils; import java.net.URI; +import java.util.*; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -215,7 +217,19 @@ public List> planBatchInputPartitions() { // Create Spark input partitions List> regions = null; if (options.getNewPartitionMethod().orElse(false)) { - regions = computeRegionPartitionsFromBedFile(numRangePartitions); + + // Compute regions from bed array or bed file + Optional bedArrayURI = options.getBedArrayURI(); + Optional bedURI = options.getBedURI(); + if (bedArrayURI.isPresent()) { + regions = computeRegionPartitionsFromBedArray(numRangePartitions, bedArrayURI.get()); + } else if (bedURI.isPresent()) { + regions = computeRegionPartitionsFromBedFile(numRangePartitions); + } else { + throw new RuntimeException( + "Can't use new_partition_method without setting bed_file or bed_array"); + } + numRangePartitions = regions.size(); ranges_end = regions.size(); log.info("New partition method has yielded " + numRangePartitions + " range partitions"); @@ -247,10 +261,99 @@ public List> planBatchInputPartitions() { return inputPartitions; } + List> computeRegionPartitionsFromBedArray( + int desiredNumRangePartitions, URI arrayURI) { + + // Read bed array + + try { + Map> mapOfRegions = new HashMap<>(); + int counter = 0; + + Context ctx = new Context(); + Array bedArray = new Array(ctx, arrayURI.toString(), QueryType.TILEDB_READ); + + String CONTIG = "alias contig"; + String START = "alias start"; + String END = "alias end"; + + String[] keys = new String[] {CONTIG, START, END}; + + NativeArray contigAliasNA = bedArray.getMetadata(CONTIG, Datatype.TILEDB_STRING_ASCII); + NativeArray startAliasNA = bedArray.getMetadata(START, Datatype.TILEDB_STRING_ASCII); + NativeArray endAliasNA = bedArray.getMetadata(END, Datatype.TILEDB_STRING_ASCII); + + String contigAlias = new String((byte[]) contigAliasNA.toJavaArray()); + String startAlias = new String((byte[]) startAliasNA.toJavaArray()); + String endAlias = new String((byte[]) endAliasNA.toJavaArray()); + + Query query = new Query(bedArray); + query.setLayout(Layout.TILEDB_UNORDERED); + + Pair estSize = query.getEstResultSizeVar(ctx, contigAlias); + + // todo unsafe casting needs to be addressed in the java api + // Prepare buffers + query.setDataBuffer( + contigAlias, + new NativeArray(ctx, estSize.getSecond().intValue(), Datatype.TILEDB_STRING_ASCII)); + query.setOffsetsBuffer( + contigAlias, new NativeArray(ctx, estSize.getFirst().intValue(), Datatype.TILEDB_UINT64)); + query.setDataBuffer( + startAlias, new NativeArray(ctx, estSize.getFirst().intValue(), Datatype.TILEDB_UINT64)); + query.setDataBuffer( + endAlias, new NativeArray(ctx, estSize.getFirst().intValue(), Datatype.TILEDB_UINT64)); + + do { + query.submit(); + // get buffers + long[] contigOffsets = (long[]) query.getVarBuffer(contigAlias); + byte[] contigData = (byte[]) query.getBuffer(contigAlias); + + String[] contigs = io.tiledb.java.api.Util.bytesToStrings(contigOffsets, contigData); + long[] start = (long[]) query.getBuffer(startAlias); + long[] end = (long[]) query.getBuffer(endAlias); + + if (!(contigs.length == start.length && start.length == end.length)) { + throw new RuntimeException("There was an error reading the bed array"); + } + + // Put regions in map + for (int i = 0; i < contigs.length; i++) { + String shortContig = contigs[i].replace("chr", ""); + String region = shortContig + ":" + start[i] + "-" + end[i] + ":" + counter; + counter++; + + // Check if the key exists in the map + if (mapOfRegions.containsKey(shortContig)) { + // If the key exists, append the region string to the existing list + mapOfRegions.get(shortContig).add(region); + } else { + // If the key doesn't exist, create a new list with the region string + List newList = new ArrayList<>(); + newList.add(region); + mapOfRegions.put(shortContig, newList); + } + } + } while (query.getQueryStatus() == QueryStatus.TILEDB_INCOMPLETE); + + List> res = new LinkedList<>(mapOfRegions.values()); + + sortRegions(res, desiredNumRangePartitions); + + return res; + + } catch (TileDBError err) { + throw new RuntimeException(err); + } + } + List> computeRegionPartitionsFromBedFile(int desiredNumRangePartitions) { Optional bedURI = options.getBedURI(); - if (!bedURI.isPresent()) { - throw new RuntimeException("Can't use new_partition_method without setting bed_file"); + Optional bedArrayURI = options.getBedArrayURI(); + if (!bedURI.isPresent() && !bedArrayURI.isPresent()) { + throw new RuntimeException( + "Can't use new_partition_method without setting bed_file or bed_array"); } log.info("Init VCFReader for partition calculation"); @@ -273,15 +376,22 @@ List> computeRegionPartitionsFromBedFile(int desiredNumRangePartiti Map> mapOfRegions = bedFile.getContigRegionStrings(); List> res = new LinkedList<>(mapOfRegions.values()); + sortRegions(res, desiredNumRangePartitions); + + bedFile.close(); + vcfReader.close(); + + return res; + } + + private void sortRegions(List> res, int desiredNumRangePartitions) { // Sort the region list by size of regions in contig, largest first res.sort(Comparator.comparingInt(List::size).reversed()); - // Keep splitting the larges region lists until we have the desired minimum number of range + // Keep splitting the largest region lists until we have the desired minimum number of range // Partitions, we stop if the large region has a size of 10 or less while (res.size() < desiredNumRangePartitions && res.get(0).size() >= 10) { - List top = res.remove(0); - List first = new LinkedList<>(top.subList(0, top.size() / 2)); List second = new LinkedList<>(top.subList(top.size() / 2, top.size())); res.add(first); @@ -291,10 +401,5 @@ List> computeRegionPartitionsFromBedFile(int desiredNumRangePartiti res.sort(Comparator.comparingInt(List::size)); Collections.reverse(res); } - - bedFile.close(); - vcfReader.close(); - - return res; } } diff --git a/apis/spark/src/main/java/io/tiledb/vcf/VCFInputPartitionReader.java b/apis/spark/src/main/java/io/tiledb/vcf/VCFInputPartitionReader.java index a423e02e4..2a120098e 100644 --- a/apis/spark/src/main/java/io/tiledb/vcf/VCFInputPartitionReader.java +++ b/apis/spark/src/main/java/io/tiledb/vcf/VCFInputPartitionReader.java @@ -304,6 +304,12 @@ private void initVCFReader() { if (bedURI.isPresent()) { vcfReader.setBedFile(bedURI.get().toString()); } + + // Set BED array + Optional bedArrayURI = options.getBedArrayURI(); + if (bedArrayURI.isPresent()) { + vcfReader.setBedFile(bedArrayURI.get().toString()); + } } else { if (rangePartitionInfo.getRegions().isEmpty()) { throw new RuntimeException( diff --git a/apis/spark/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java b/apis/spark/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java index 37a6d43f9..da41467ad 100644 --- a/apis/spark/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java +++ b/apis/spark/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java @@ -90,6 +90,12 @@ public void testBedURIOptionMissing() { Assert.assertFalse(options.getBedURI().isPresent()); } + @Test + public void testBedArrayURIOptionMissing() { + VCFDataSourceOptions options = new VCFDataSourceOptions(new DataSourceOptions(new HashMap<>())); + Assert.assertFalse(options.getBedArrayURI().isPresent()); + } + @Test public void testBedURIOption() { URI expectedURI = URI.create("s3://foo/bar"); @@ -100,12 +106,28 @@ public void testBedURIOption() { Assert.assertEquals(expectedURI, options.getBedURI().get()); } + @Test + public void testBedArrayURIOption() { + URI expectedURI = URI.create("s3://foo/bar"); + HashMap optionMap = new HashMap<>(); + optionMap.put("bed_array", expectedURI.toString()); + VCFDataSourceOptions options = new VCFDataSourceOptions(new DataSourceOptions(optionMap)); + Assert.assertTrue(options.getBedArrayURI().isPresent()); + Assert.assertEquals(expectedURI, options.getBedArrayURI().get()); + } + @Test public void testSampleURIOptionMissing() { VCFDataSourceOptions options = new VCFDataSourceOptions(new DataSourceOptions(new HashMap<>())); Assert.assertFalse(options.getBedURI().isPresent()); } + @Test + public void testArraySampleURIOptionMissing() { + VCFDataSourceOptions options = new VCFDataSourceOptions(new DataSourceOptions(new HashMap<>())); + Assert.assertFalse(options.getBedArrayURI().isPresent()); + } + @Test public void testSampleURIOption() { URI expectedURI = URI.create("s3://foo/bar"); diff --git a/apis/spark/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java b/apis/spark/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java index 406c28865..d348bbdb2 100644 --- a/apis/spark/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java +++ b/apis/spark/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java @@ -25,7 +25,7 @@ public class VCFDatasourceTest extends SharedJavaSparkSession { private String testSampleGroupURI(String sampleGroupName) { - Path arraysPath = Paths.get("src", "test", "resources", "arrays", "v3", sampleGroupName); + Path arraysPath = Paths.get("src", "test", "resources", "arrays", "v4", sampleGroupName); return "file://".concat(arraysPath.toAbsolutePath().toString()); } @@ -39,6 +39,16 @@ private String testSimpleBEDFile() { return "file://".concat(path.toAbsolutePath().toString()); } + private String testSimpleBEDArray() { + Path arrayPath = Paths.get("src", "test", "resources", "arrays", "bed_array"); + return "file://".concat(arrayPath.toAbsolutePath().toString()); + } + + private String testLargeBEDArray() { + Path arrayPath = Paths.get("src", "test", "resources", "arrays", "largebedarray"); + return "file://".concat(arrayPath.toAbsolutePath().toString()); + } + private String testLargeBEDFile() { Path path = Paths.get("src", "test", "resources", "E001_15_coreMarks_dense.bed.gz"); return "file://".concat(path.toAbsolutePath().toString()); @@ -450,6 +460,93 @@ public void testSamplePartition() { } } + @Test + public void testNewPartitionWithBedArray() { + int rangePartitions = 32; + int samplePartitions = 2; + Dataset dfRead = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples", "v4")) + .option("bed_array", testLargeBEDArray()) + .option("new_partition_method", true) + .option("range_partitions", rangePartitions) + .option("sample_partitions", samplePartitions) + .option("tiledb.vcf.log_level", "TRACE") + .load(); + + List rows = + dfRead + .select("sampleName", "contig", "posStart", "posEnd", "queryBedStart", "queryBedEnd") + .collectAsList(); + + // query from bed file line 184134 (0-indexed line numbers) + // 1 10600 540400 15_Quies 0 . 10600 540400 255,255,255 + + // NOTE: queryBedEnd returns the half-open value from the bed file, + // not the inclusive value used by tiledb-vcf + int expectedBedStart = 10600; // 0-indexed + int expectedBedEnd = 540400; // half-open + + for (int i = 0; i < rows.size(); i++) { + System.out.println( + String.format( + "*** %s, %s, pos=%d-%d, query=%d-%d", + rows.get(i).getString(0), + rows.get(i).getString(1), + rows.get(i).getInt(2), + rows.get(i).getInt(3), + rows.get(i).getInt(4), + rows.get(i).getInt(5))); + Assert.assertEquals(expectedBedStart, rows.get(i).getInt(4)); + Assert.assertEquals(expectedBedEnd, rows.get(i).getInt(5)); + } + } + + @Test + public void testNewPartitionWithBedArrayVsBedFile() { + int rangePartitions = 32; + int samplePartitions = 2; + Dataset dfRead = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples", "v4")) + .option("bedfile", testLargeBEDFile()) + .option("new_partition_method", true) + .option("range_partitions", rangePartitions) + .option("sample_partitions", samplePartitions) + .option("tiledb.vcf.log_level", "TRACE") + .load(); + + List rows = + dfRead + .select("sampleName", "contig", "posStart", "posEnd", "queryBedStart", "queryBedEnd") + .orderBy("contig") + .collectAsList(); + + Dataset dfRead2 = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples", "v4")) + .option("bed_array", testLargeBEDArray()) + .option("new_partition_method", true) + .option("range_partitions", rangePartitions) + .option("sample_partitions", samplePartitions) + .option("tiledb.vcf.log_level", "TRACE") + .load(); + + List rows2 = + dfRead2 + .select("sampleName", "contig", "posStart", "posEnd", "queryBedStart", "queryBedEnd") + .orderBy("contig") + .collectAsList(); + + Assert.assertEquals(rows, rows2); + } + @Test public void testNewPartition() { int rangePartitions = 32; @@ -521,6 +618,20 @@ public void testBedFile() { Assert.assertEquals(10, rows.size()); } + @Test + public void testBedArray() { + Dataset dfRead = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples")) + .option("samplefile", testSampleFile()) + .option("bed_array", testSimpleBEDArray()) + .load(); + List rows = dfRead.select("sampleName").collectAsList(); + Assert.assertEquals(10, rows.size()); + } + @Test public void testSchemaShowTopN() { Dataset dfRead = testSampleDataset(); diff --git a/apis/spark3/build.gradle b/apis/spark3/build.gradle index 2a535d2a7..dc5800dc4 100644 --- a/apis/spark3/build.gradle +++ b/apis/spark3/build.gradle @@ -53,6 +53,7 @@ repositories { dependencies { compileOnly 'org.apache.spark:spark-sql_2.12:3.2.0' compileOnly 'org.apache.spark:spark-core_2.12:3.2.0' + implementation 'io.tiledb:tiledb-java:0.24.0' compile group: 'io.tiledb', name: 'tiledb-vcf-java', version: version compile 'com.amazonaws:aws-java-sdk:1.11.650' diff --git a/apis/spark3/src/main/java/io/tiledb/vcf/VCFBatch.java b/apis/spark3/src/main/java/io/tiledb/vcf/VCFBatch.java index 94a3aa7fa..136c2f307 100644 --- a/apis/spark3/src/main/java/io/tiledb/vcf/VCFBatch.java +++ b/apis/spark3/src/main/java/io/tiledb/vcf/VCFBatch.java @@ -2,10 +2,12 @@ import static io.tiledb.vcf.VCFScanBuilder.pushedSampleNames; +import io.tiledb.java.api.*; import io.tiledb.libvcfnative.VCFBedFile; import io.tiledb.libvcfnative.VCFReader; import io.tiledb.util.CredentialProviderUtils; import java.net.URI; +import java.util.*; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -158,7 +160,19 @@ public InputPartition[] planInputPartitions() { // Create Spark input partitions List> regions = null; if (options.getNewPartitionMethod().orElse(false)) { - regions = computeRegionPartitionsFromBedFile(numRangePartitions); + + // Compute regions from bed array or bed file + Optional bedArrayURI = options.getBedArrayURI(); + Optional bedURI = options.getBedURI(); + if (bedArrayURI.isPresent()) { + regions = computeRegionPartitionsFromBedArray(numRangePartitions, bedArrayURI.get()); + } else if (bedURI.isPresent()) { + regions = computeRegionPartitionsFromBedFile(numRangePartitions); + } else { + throw new RuntimeException( + "Can't use new_partition_method without setting bed_file or bed_array"); + } + numRangePartitions = regions.size(); ranges_end = regions.size(); log.info("New partition method has yielded " + numRangePartitions + " range partitions"); @@ -192,11 +206,95 @@ public InputPartition[] planInputPartitions() { return partitionsArray; } + List> computeRegionPartitionsFromBedArray( + int desiredNumRangePartitions, URI arrayURI) { + + // Read bed array + + try { + Map> mapOfRegions = new HashMap<>(); + int counter = 0; + + Context ctx = new Context(); + Array bedArray = new Array(ctx, arrayURI.toString(), QueryType.TILEDB_READ); + + String CONTIG = "alias contig"; + String START = "alias start"; + String END = "alias end"; + + String[] keys = new String[] {CONTIG, START, END}; + + NativeArray contigAliasNA = bedArray.getMetadata(CONTIG, Datatype.TILEDB_STRING_ASCII); + NativeArray startAliasNA = bedArray.getMetadata(START, Datatype.TILEDB_STRING_ASCII); + NativeArray endAliasNA = bedArray.getMetadata(END, Datatype.TILEDB_STRING_ASCII); + + String contigAlias = new String((byte[]) contigAliasNA.toJavaArray()); + String startAlias = new String((byte[]) startAliasNA.toJavaArray()); + String endAlias = new String((byte[]) endAliasNA.toJavaArray()); + + Query query = new Query(bedArray); + query.setLayout(Layout.TILEDB_UNORDERED); + + Pair estSize = query.getEstResultSizeVar(ctx, contigAlias); + + // todo unsafe casting needs to be addressed in the java api + // Prepare buffers + query.setDataBuffer( + contigAlias, + new NativeArray(ctx, estSize.getSecond().intValue(), Datatype.TILEDB_STRING_ASCII)); + query.setOffsetsBuffer( + contigAlias, new NativeArray(ctx, estSize.getFirst().intValue(), Datatype.TILEDB_UINT64)); + query.setDataBuffer( + startAlias, new NativeArray(ctx, estSize.getFirst().intValue(), Datatype.TILEDB_UINT64)); + query.setDataBuffer( + endAlias, new NativeArray(ctx, estSize.getFirst().intValue(), Datatype.TILEDB_UINT64)); + + do { + query.submit(); + // get buffers + long[] contigOffsets = (long[]) query.getVarBuffer(contigAlias); + byte[] contigData = (byte[]) query.getBuffer(contigAlias); + + String[] contigs = io.tiledb.java.api.Util.bytesToStrings(contigOffsets, contigData); + long[] start = (long[]) query.getBuffer(startAlias); + long[] end = (long[]) query.getBuffer(endAlias); + + if (!(contigs.length == start.length && start.length == end.length)) { + throw new RuntimeException("There was an error reading the bed array"); + } + + // Put regions in map + for (int i = 0; i < contigs.length; i++) { + String shortContig = contigs[i].replace("chr", ""); + String region = shortContig + ":" + start[i] + "-" + end[i] + ":" + counter; + counter++; + + // Check if the key exists in the map + if (mapOfRegions.containsKey(shortContig)) { + // If the key exists, append the region string to the existing list + mapOfRegions.get(shortContig).add(region); + } else { + // If the key doesn't exist, create a new list with the region string + List newList = new ArrayList<>(); + newList.add(region); + mapOfRegions.put(shortContig, newList); + } + } + } while (query.getQueryStatus() == QueryStatus.TILEDB_INCOMPLETE); + + List> res = new LinkedList<>(mapOfRegions.values()); + + sortRegions(res, desiredNumRangePartitions); + + return res; + + } catch (TileDBError err) { + throw new RuntimeException(err); + } + } + List> computeRegionPartitionsFromBedFile(int desiredNumRangePartitions) { Optional bedURI = options.getBedURI(); - if (!bedURI.isPresent()) { - throw new RuntimeException("Can't use new_partition_method without setting bed_file"); - } log.info("Init VCFReader for partition calculation"); String uriString = options.getDatasetURI().get().toString(); @@ -217,16 +315,22 @@ List> computeRegionPartitionsFromBedFile(int desiredNumRangePartiti Map> mapOfRegions = bedFile.getContigRegionStrings(); List> res = new LinkedList<>(mapOfRegions.values()); + sortRegions(res, desiredNumRangePartitions); + + bedFile.close(); + vcfReader.close(); + return res; + } + + private void sortRegions(List> res, int desiredNumRangePartitions) { // Sort the region list by size of regions in contig, largest first res.sort(Comparator.comparingInt(List::size).reversed()); - // Keep splitting the larges region lists until we have the desired minimum number of range + // Keep splitting the largest region lists until we have the desired minimum number of range // Partitions, we stop if the large region has a size of 10 or less while (res.size() < desiredNumRangePartitions && res.get(0).size() >= 10) { - List top = res.remove(0); - List first = new LinkedList<>(top.subList(0, top.size() / 2)); List second = new LinkedList<>(top.subList(top.size() / 2, top.size())); res.add(first); @@ -236,11 +340,6 @@ List> computeRegionPartitionsFromBedFile(int desiredNumRangePartiti res.sort(Comparator.comparingInt(List::size)); Collections.reverse(res); } - - bedFile.close(); - vcfReader.close(); - - return res; } @Override diff --git a/apis/spark3/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java b/apis/spark3/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java index 55774f5bb..5e4d77456 100644 --- a/apis/spark3/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java +++ b/apis/spark3/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java @@ -75,6 +75,14 @@ public Optional getBedURI() { return Optional.empty(); } + /** @return Optional uri of BED array */ + public Optional getBedArrayURI() { + if (options.containsKey("bed_array")) { + return Optional.of(URI.create(options.get("bed_array"))); + } + return Optional.empty(); + } + /** @return Optional uri of SampleFile file */ public Optional getSampleURI() { if (options.containsKey("samplefile")) { diff --git a/apis/spark3/src/main/java/io/tiledb/vcf/VCFPartitionReader.java b/apis/spark3/src/main/java/io/tiledb/vcf/VCFPartitionReader.java index b63b5a66e..a465c3f69 100644 --- a/apis/spark3/src/main/java/io/tiledb/vcf/VCFPartitionReader.java +++ b/apis/spark3/src/main/java/io/tiledb/vcf/VCFPartitionReader.java @@ -305,6 +305,12 @@ private void initVCFReader() { if (bedURI.isPresent()) { vcfReader.setBedFile(bedURI.get().toString()); } + + // Set BED array + Optional bedArrayURI = options.getBedArrayURI(); + if (bedArrayURI.isPresent()) { + vcfReader.setBedFile(bedArrayURI.get().toString()); + } } else { if (rangePartitionInfo.getRegions().isEmpty()) { throw new RuntimeException( diff --git a/apis/spark3/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java b/apis/spark3/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java index 474bc71df..7e4878fd0 100644 --- a/apis/spark3/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java +++ b/apis/spark3/src/test/java/io/tiledb/vcf/VCFDataSourceOptionsTest.java @@ -99,12 +99,28 @@ public void testBedURIOption() { Assert.assertEquals(expectedURI, options.getBedURI().get()); } + @Test + public void testBedArrayURIOption() { + URI expectedURI = URI.create("s3://foo/bar"); + HashMap optionMap = new HashMap<>(); + optionMap.put("bed_array", expectedURI.toString()); + VCFDataSourceOptions options = new VCFDataSourceOptions(new DataSourceOptions(optionMap)); + Assert.assertTrue(options.getBedArrayURI().isPresent()); + Assert.assertEquals(expectedURI, options.getBedArrayURI().get()); + } + @Test public void testSampleURIOptionMissing() { VCFDataSourceOptions options = new VCFDataSourceOptions(new DataSourceOptions(new HashMap<>())); Assert.assertFalse(options.getBedURI().isPresent()); } + @Test + public void testBedArrayURIOptionMissing() { + VCFDataSourceOptions options = new VCFDataSourceOptions(new DataSourceOptions(new HashMap<>())); + Assert.assertFalse(options.getBedArrayURI().isPresent()); + } + @Test public void testSampleURIOption() { URI expectedURI = URI.create("s3://foo/bar"); diff --git a/apis/spark3/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java b/apis/spark3/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java index 406c28865..542909a1a 100644 --- a/apis/spark3/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java +++ b/apis/spark3/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java @@ -25,7 +25,7 @@ public class VCFDatasourceTest extends SharedJavaSparkSession { private String testSampleGroupURI(String sampleGroupName) { - Path arraysPath = Paths.get("src", "test", "resources", "arrays", "v3", sampleGroupName); + Path arraysPath = Paths.get("src", "test", "resources", "arrays", "v4", sampleGroupName); return "file://".concat(arraysPath.toAbsolutePath().toString()); } @@ -34,6 +34,16 @@ private String testSampleGroupURI(String sampleGroupName, String version) { return "file://".concat(arraysPath.toAbsolutePath().toString()); } + private String testSimpleBEDArray() { + Path arrayPath = Paths.get("src", "test", "resources", "arrays", "bed_array"); + return "file://".concat(arrayPath.toAbsolutePath().toString()); + } + + private String testLargeBEDArray() { + Path arrayPath = Paths.get("src", "test", "resources", "arrays", "largebedarray"); + return "file://".concat(arrayPath.toAbsolutePath().toString()); + } + private String testSimpleBEDFile() { Path path = Paths.get("src", "test", "resources", "simple.bed"); return "file://".concat(path.toAbsolutePath().toString()); @@ -507,6 +517,93 @@ public void testNewPartition() { } } + @Test + public void testNewPartitionWithBedArray() { + int rangePartitions = 32; + int samplePartitions = 2; + Dataset dfRead = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples", "v4")) + .option("bed_array", testLargeBEDArray()) + .option("new_partition_method", true) + .option("range_partitions", rangePartitions) + .option("sample_partitions", samplePartitions) + .option("tiledb.vcf.log_level", "TRACE") + .load(); + + List rows = + dfRead + .select("sampleName", "contig", "posStart", "posEnd", "queryBedStart", "queryBedEnd") + .collectAsList(); + + // query from bed file line 184134 (0-indexed line numbers) + // 1 10600 540400 15_Quies 0 . 10600 540400 255,255,255 + + // NOTE: queryBedEnd returns the half-open value from the bed file, + // not the inclusive value used by tiledb-vcf + int expectedBedStart = 10600; // 0-indexed + int expectedBedEnd = 540400; // half-open + + for (int i = 0; i < rows.size(); i++) { + System.out.println( + String.format( + "*** %s, %s, pos=%d-%d, query=%d-%d", + rows.get(i).getString(0), + rows.get(i).getString(1), + rows.get(i).getInt(2), + rows.get(i).getInt(3), + rows.get(i).getInt(4), + rows.get(i).getInt(5))); + Assert.assertEquals(expectedBedStart, rows.get(i).getInt(4)); + Assert.assertEquals(expectedBedEnd, rows.get(i).getInt(5)); + } + } + + @Test + public void testNewPartitionWithBedArrayVsBedFile() { + int rangePartitions = 32; + int samplePartitions = 2; + Dataset dfRead = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples", "v4")) + .option("bedfile", testLargeBEDFile()) + .option("new_partition_method", true) + .option("range_partitions", rangePartitions) + .option("sample_partitions", samplePartitions) + .option("tiledb.vcf.log_level", "TRACE") + .load(); + + List rows = + dfRead + .select("sampleName", "contig", "posStart", "posEnd", "queryBedStart", "queryBedEnd") + .orderBy("contig") + .collectAsList(); + + Dataset dfRead2 = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples", "v4")) + .option("bed_array", testLargeBEDArray()) + .option("new_partition_method", true) + .option("range_partitions", rangePartitions) + .option("sample_partitions", samplePartitions) + .option("tiledb.vcf.log_level", "TRACE") + .load(); + + List rows2 = + dfRead2 + .select("sampleName", "contig", "posStart", "posEnd", "queryBedStart", "queryBedEnd") + .orderBy("contig") + .collectAsList(); + + Assert.assertEquals(rows, rows2); + } + @Test public void testBedFile() { Dataset dfRead = @@ -521,6 +618,20 @@ public void testBedFile() { Assert.assertEquals(10, rows.size()); } + @Test + public void testBedArray() { + Dataset dfRead = + session() + .read() + .format("io.tiledb.vcf") + .option("uri", testSampleGroupURI("ingested_2samples")) + .option("samplefile", testSampleFile()) + .option("bed_array", testSimpleBEDArray()) + .load(); + List rows = dfRead.select("sampleName").collectAsList(); + Assert.assertEquals(10, rows.size()); + } + @Test public void testSchemaShowTopN() { Dataset dfRead = testSampleDataset(); diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__commits/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20.wrt b/libtiledbvcf/test/inputs/arrays/bed_array/__commits/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20.wrt new file mode 100644 index 000000000..e69de29bb diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/__fragment_metadata.tdb b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/__fragment_metadata.tdb new file mode 100644 index 000000000..0dd0bff86 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/__fragment_metadata.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a0.tdb b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a0.tdb new file mode 100644 index 000000000..37289c23f Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a0.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a0_var.tdb b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a0_var.tdb new file mode 100644 index 000000000..1961f325c Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a0_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a1.tdb b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a1.tdb new file mode 100644 index 000000000..cde4803ec Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a1.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a2.tdb b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a2.tdb new file mode 100644 index 000000000..7ecbf615d Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/a2.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/d0.tdb b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/d0.tdb new file mode 100644 index 000000000..37289c23f Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__fragments/__1704394421914_1704394421914_0c4b280ae02a4fcb84d4eaca629cba3e_20/d0.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__meta/__1704394421898_1704394421898_3232bb05077445b097ec812ee8dcb3c4 b/libtiledbvcf/test/inputs/arrays/bed_array/__meta/__1704394421898_1704394421898_3232bb05077445b097ec812ee8dcb3c4 new file mode 100644 index 000000000..15562847a Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__meta/__1704394421898_1704394421898_3232bb05077445b097ec812ee8dcb3c4 differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__meta/__1704394441555_1704394441555_14afe6300cc14af0ab7e728ccc7b1353 b/libtiledbvcf/test/inputs/arrays/bed_array/__meta/__1704394441555_1704394441555_14afe6300cc14af0ab7e728ccc7b1353 new file mode 100644 index 000000000..76f3345be Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__meta/__1704394441555_1704394441555_14afe6300cc14af0ab7e728ccc7b1353 differ diff --git a/libtiledbvcf/test/inputs/arrays/bed_array/__schema/__1704394421897_1704394421897_52be1c228f394206a626570e261005e8 b/libtiledbvcf/test/inputs/arrays/bed_array/__schema/__1704394421897_1704394421897_52be1c228f394206a626570e261005e8 new file mode 100644 index 000000000..454e16663 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/bed_array/__schema/__1704394421897_1704394421897_52be1c228f394206a626570e261005e8 differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__commits/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21.wrt b/libtiledbvcf/test/inputs/arrays/largebedarray/__commits/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21.wrt new file mode 100644 index 000000000..e69de29bb diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__commits/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21.wrt b/libtiledbvcf/test/inputs/arrays/largebedarray/__commits/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21.wrt new file mode 100644 index 000000000..e69de29bb diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/__fragment_metadata.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/__fragment_metadata.tdb new file mode 100644 index 000000000..f6a257510 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/__fragment_metadata.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a0.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a0.tdb new file mode 100644 index 000000000..22870405f Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a0.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a1.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a1.tdb new file mode 100644 index 000000000..6933981a4 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a1.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a1_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a1_var.tdb new file mode 100644 index 000000000..a26558a7e Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a1_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a2.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a2.tdb new file mode 100644 index 000000000..72cda8c4a Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a2.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a3.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a3.tdb new file mode 100644 index 000000000..e028bd403 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a3.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a3_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a3_var.tdb new file mode 100644 index 000000000..3d90219ed Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a3_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a4.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a4.tdb new file mode 100644 index 000000000..818070921 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a4.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a5.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a5.tdb new file mode 100644 index 000000000..4e1f1c1e7 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a5.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a6.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a6.tdb new file mode 100644 index 000000000..c3fc7cfb4 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a6.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a6_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a6_var.tdb new file mode 100644 index 000000000..d0d5cc7ab Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/a6_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d0.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d0.tdb new file mode 100644 index 000000000..33082fa5b Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d0.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d0_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d0_var.tdb new file mode 100644 index 000000000..edb788598 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d0_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d1.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d1.tdb new file mode 100644 index 000000000..7dac6000b Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d1.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d1_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d1_var.tdb new file mode 100644 index 000000000..fac8b22e9 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d1_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d2.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d2.tdb new file mode 100644 index 000000000..818070921 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813523990_1706813523990_40565d50e72b402105d07b7d1e8de386_21/d2.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/__fragment_metadata.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/__fragment_metadata.tdb new file mode 100644 index 000000000..c4225e438 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/__fragment_metadata.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a0.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a0.tdb new file mode 100644 index 000000000..577fde5ca Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a0.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a1.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a1.tdb new file mode 100644 index 000000000..4d42bb586 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a1.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a1_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a1_var.tdb new file mode 100644 index 000000000..9e1b31908 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a1_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a2.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a2.tdb new file mode 100644 index 000000000..41ef1ca09 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a2.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a3.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a3.tdb new file mode 100644 index 000000000..bcc4faa56 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a3.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a3_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a3_var.tdb new file mode 100644 index 000000000..2b36c5944 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a3_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a4.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a4.tdb new file mode 100644 index 000000000..73ba5a1c4 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a4.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a5.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a5.tdb new file mode 100644 index 000000000..b36d90faa Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a5.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a6.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a6.tdb new file mode 100644 index 000000000..95cd0f28e Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a6.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a6_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a6_var.tdb new file mode 100644 index 000000000..8cafb08b0 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/a6_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d0.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d0.tdb new file mode 100644 index 000000000..781db0655 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d0.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d0_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d0_var.tdb new file mode 100644 index 000000000..fa86c6564 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d0_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d1.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d1.tdb new file mode 100644 index 000000000..ea3834d4b Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d1.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d1_var.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d1_var.tdb new file mode 100644 index 000000000..82b9265cc Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d1_var.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d2.tdb b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d2.tdb new file mode 100644 index 000000000..73ba5a1c4 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__fragments/__1706813524799_1706813524799_4631aede2e67ffd18f9fccba4388a61f_21/d2.tdb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813523981_1706813523981_04eef2b4b5d860cc67a7aabe10d172d6 b/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813523981_1706813523981_04eef2b4b5d860cc67a7aabe10d172d6 new file mode 100644 index 000000000..6c6d443db Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813523981_1706813523981_04eef2b4b5d860cc67a7aabe10d172d6 differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813524020_1706813524020_8548dea130821acc583c502c832e0a3a b/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813524020_1706813524020_8548dea130821acc583c502c832e0a3a new file mode 100644 index 000000000..6685dad15 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813524020_1706813524020_8548dea130821acc583c502c832e0a3a differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813525041_1706813525041_23d9a035f5e095708b3a26b7aa4bcecb b/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813525041_1706813525041_23d9a035f5e095708b3a26b7aa4bcecb new file mode 100644 index 000000000..6685dad15 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__meta/__1706813525041_1706813525041_23d9a035f5e095708b3a26b7aa4bcecb differ diff --git a/libtiledbvcf/test/inputs/arrays/largebedarray/__schema/__1706813523980_1706813523980_b5ee8cb6abe457f8f258d22d4db91392 b/libtiledbvcf/test/inputs/arrays/largebedarray/__schema/__1706813523980_1706813523980_b5ee8cb6abe457f8f258d22d4db91392 new file mode 100644 index 000000000..d9ede16a1 Binary files /dev/null and b/libtiledbvcf/test/inputs/arrays/largebedarray/__schema/__1706813523980_1706813523980_b5ee8cb6abe457f8f258d22d4db91392 differ