Skip to content

Commit 57e7587

Browse files
authored
Merge pull request #91 from TileDB-Inc/sethshelnutt/ch2178/expose-driver-option-to-enable-stats-printing
Add support for enabling and dumping TileDB Stats
2 parents f7ba771 + 2d36830 commit 57e7587

File tree

19 files changed

+304
-3
lines changed

19 files changed

+304
-3
lines changed

apis/java/src/main/java/io/tiledb/libvcfnative/LibVCFNative.c

+48
Original file line numberDiff line numberDiff line change
@@ -559,4 +559,52 @@ Java_io_tiledb_libvcfnative_LibVCFNative_tiledb_1vcf_1reader_1get_1last_1error_1
559559
jstring result = (*env)->NewStringUTF(env, msg);
560560
tiledb_vcf_error_free(&error);
561561
return result;
562+
}
563+
564+
JNIEXPORT jint JNICALL
565+
Java_io_tiledb_libvcfnative_LibVCFNative_tiledb_1vcf_1reader_1set_1tiledb_1stats_1enabled(
566+
JNIEnv* env, jclass self, jlong readerPtr, jboolean statsEnabled) {
567+
(void)self;
568+
tiledb_vcf_reader_t* reader = (tiledb_vcf_reader_t*)readerPtr;
569+
if (reader == 0) {
570+
return TILEDB_VCF_ERR;
571+
}
572+
573+
const bool stats_enabled = statsEnabled ? true : false;
574+
575+
int32_t rc = tiledb_vcf_reader_set_tiledb_stats_enabled(reader, stats_enabled);
576+
577+
return rc;
578+
}
579+
580+
JNIEXPORT jboolean JNICALL
581+
Java_io_tiledb_libvcfnative_LibVCFNative_tiledb_1vcf_1reader_1tiledb_1stats_1enabled(
582+
JNIEnv* env, jclass self, jlong readerPtr) {
583+
(void)self;
584+
tiledb_vcf_reader_t* reader = (tiledb_vcf_reader_t*)readerPtr;
585+
if (reader == 0) {
586+
return TILEDB_VCF_ERR;
587+
}
588+
589+
bool stats_enabled;
590+
591+
int rc = tiledb_vcf_reader_get_tiledb_stats_enabled(reader, &stats_enabled);
592+
593+
return stats_enabled;
594+
}
595+
596+
JNIEXPORT jstring JNICALL
597+
Java_io_tiledb_libvcfnative_LibVCFNative_tiledb_1vcf_1reader_1tiledb_1stats(
598+
JNIEnv* env, jclass self, jlong readerPtr) {
599+
(void)self;
600+
tiledb_vcf_reader_t* reader = (tiledb_vcf_reader_t*)readerPtr;
601+
if (reader == 0) {
602+
return NULL;
603+
}
604+
605+
char *stats;
606+
tiledb_vcf_reader_get_tiledb_stats(reader, &stats);
607+
608+
jstring result = (*env)->NewStringUTF(env, stats);
609+
return result;
562610
}

apis/java/src/main/java/io/tiledb/libvcfnative/LibVCFNative.h

+24
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/java/src/main/java/io/tiledb/libvcfnative/LibVCFNative.java

+7
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,11 @@ public static final native int tiledb_vcf_reader_get_dataset_version(
8787
long readerPtr, int[] version);
8888

8989
public static final native String tiledb_vcf_reader_get_last_error_message(long readerPtr);
90+
91+
public static final native int tiledb_vcf_reader_set_tiledb_stats_enabled(
92+
long readerPtr, boolean statsEnabled);
93+
94+
public static final native boolean tiledb_vcf_reader_tiledb_stats_enabled(long readerPtr);
95+
96+
public static final native String tiledb_vcf_reader_tiledb_stats(long readerPtr);
9097
}

apis/java/src/main/java/io/tiledb/libvcfnative/VCFReader.java

+21
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,27 @@ public AttributeTypeInfo getAttributeDatatype(String attribute) {
355355
}
356356
}
357357

358+
public VCFReader setStatsEnabled(boolean statsEnabled) {
359+
int rc = LibVCFNative.tiledb_vcf_reader_set_tiledb_stats_enabled(this.readerPtr, statsEnabled);
360+
if (rc != 0) {
361+
String msg = getLastErrorMessage();
362+
throw new RuntimeException("Error setting stats enabled: " + msg);
363+
}
364+
return this;
365+
}
366+
367+
public boolean getStatsEnabled() {
368+
boolean statsEnabled = LibVCFNative.tiledb_vcf_reader_tiledb_stats_enabled(this.readerPtr);
369+
370+
return statsEnabled;
371+
}
372+
373+
public String stats() {
374+
String stats = LibVCFNative.tiledb_vcf_reader_tiledb_stats(this.readerPtr);
375+
376+
return stats;
377+
}
378+
358379
public VCFReader resetBuffers() {
359380
Iterator it = buffers.entrySet().iterator();
360381
while (it.hasNext()) {

apis/java/src/test/java/io/tiledb/libvcfnative/VCFReaderTest.java

+25
Original file line numberDiff line numberDiff line change
@@ -181,4 +181,29 @@ public void testSetSingleBuffer() throws IOException {
181181

182182
Assert.assertEquals(results, BED_FILE_EXPECTED_RECORDS);
183183
}
184+
185+
@Test
186+
public void testSetStatsEnabled() throws IOException {
187+
VCFReader reader = getVFCReader(Optional.empty(), Optional.of(constructBEDURI()));
188+
189+
reader.setStatsEnabled(true);
190+
}
191+
192+
@Test
193+
public void testGetStatsEnabled() throws IOException {
194+
VCFReader reader = getVFCReader(Optional.empty(), Optional.of(constructBEDURI()));
195+
196+
Assert.assertFalse(reader.getStatsEnabled());
197+
reader.setStatsEnabled(true);
198+
Assert.assertTrue(reader.getStatsEnabled());
199+
reader.setStatsEnabled(false);
200+
Assert.assertFalse(reader.getStatsEnabled());
201+
}
202+
203+
@Test
204+
public void testStats() throws IOException {
205+
VCFReader reader = getVFCReader(Optional.empty(), Optional.of(constructBEDURI()));
206+
reader.setStatsEnabled(true);
207+
Assert.assertNotNull(reader.stats());
208+
}
184209
}

apis/python/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
src/tiledbvcf/version.py
2+
src/tiledbvcf/libhts.so.1.8

apis/python/src/tiledbvcf/binding/libtiledbvcf.cc

+4-1
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@ PYBIND11_MODULE(libtiledbvcf, m) {
2929
.def("set_max_num_records", &Reader::set_max_num_records)
3030
.def("set_tiledb_config", &Reader::set_tiledb_config)
3131
.def("set_attributes", &Reader::set_attributes)
32+
.def("set_tiledb_stats_enabled", &Reader::set_tiledb_stats_enabled)
3233
.def("read", &Reader::read)
3334
.def("get_buffers", &Reader::get_buffers)
3435
.def("get_results_arrow", &Reader::get_results_arrow)
3536
.def("completed", &Reader::completed)
36-
.def("result_num_records", &Reader::result_num_records);
37+
.def("result_num_records", &Reader::result_num_records)
38+
.def("get_tiledb_stats_enabled", &Reader::get_tiledb_stats_enabled)
39+
.def("get_tiledb_stats", &Reader::get_tiledb_stats);
3740

3841
py::class_<Writer>(m, "Writer")
3942
.def(py::init())

apis/python/src/tiledbvcf/binding/reader.cc

+19
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ void Reader::set_attributes(const std::vector<std::string>& attributes) {
7676
attributes_ = attributes;
7777
}
7878

79+
void Reader::set_tiledb_stats_enabled(const bool stats_enabled) {
80+
auto reader = ptr.get();
81+
check_error(reader, tiledb_vcf_reader_set_tiledb_stats_enabled(reader, stats_enabled));
82+
}
83+
7984
void Reader::set_samples(const std::string& samples) {
8085
auto reader = ptr.get();
8186
check_error(reader, tiledb_vcf_reader_set_samples(reader, samples.c_str()));
@@ -325,6 +330,20 @@ bool Reader::completed() {
325330
return status == TILEDB_VCF_COMPLETED;
326331
}
327332

333+
bool Reader::get_tiledb_stats_enabled() {
334+
auto reader = ptr.get();
335+
bool stats_enabled;
336+
check_error(reader, tiledb_vcf_reader_get_tiledb_stats_enabled(reader, &stats_enabled));
337+
return stats_enabled;
338+
}
339+
340+
std::string Reader::get_tiledb_stats() {
341+
auto reader = ptr.get();
342+
char* stats;
343+
check_error(reader, tiledb_vcf_reader_get_tiledb_stats(reader, &stats));
344+
return std::string(stats);
345+
}
346+
328347
py::dtype Reader::to_numpy_dtype(tiledb_vcf_attr_datatype_t datatype) {
329348
switch (datatype) {
330349
case TILEDB_VCF_CHAR:

apis/python/src/tiledbvcf/binding/reader.h

+9
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ class Reader {
8585
/** Sets CSV TileDB config parameters. */
8686
void set_tiledb_config(const std::string& config_str);
8787

88+
/** Sets whether internal TileDB Statistics are Enabled or Disabled*/
89+
void set_tiledb_stats_enabled(const bool stats_enabled);
90+
8891
/** Performs a blocking read operation. */
8992
void read();
9093

@@ -106,6 +109,12 @@ class Reader {
106109
/** Returns true if the last read operation was complete. */
107110
bool completed();
108111

112+
/** Gets whether internal TileDB Statistics are Enabled or Disabled*/
113+
bool get_tiledb_stats_enabled();
114+
115+
/** Fetches TileDB statistics */
116+
std::string get_tiledb_stats();
117+
109118
private:
110119
/** Buffer struct to hold attribute data read from the dataset. */
111120
struct BufferInfo {

apis/python/src/tiledbvcf/dataset.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
class TileDBVCFDataset(object):
2525
"""A handle on a TileDB-VCF dataset."""
2626

27-
def __init__(self, uri, mode='r', cfg=None):
27+
def __init__(self, uri, mode='r', cfg=None, stats=False):
2828
""" Initializes a TileDB-VCF dataset for interaction.
2929
3030
:param uri: URI of TileDB-VCF dataset
@@ -38,6 +38,7 @@ def __init__(self, uri, mode='r', cfg=None):
3838
self.reader = libtiledbvcf.Reader()
3939
self._set_read_cfg(cfg)
4040
self.reader.init(uri)
41+
self.reader.set_tiledb_stats_enabled(stats)
4142
elif self.mode == 'w':
4243
self.writer = libtiledbvcf.Writer()
4344
self.writer.init(uri)
@@ -199,3 +200,12 @@ def ingest_samples(self, sample_uris=None, extra_attrs=None, checksum_type=None,
199200
self.writer.create_dataset()
200201
self.writer.register_samples()
201202
self.writer.ingest_samples()
203+
204+
def tiledb_stats(self):
205+
if self.mode != 'r':
206+
raise Exception('Stats can only be called for reader')
207+
208+
if not self.reader.get_tiledb_stats_enabled:
209+
raise Exception('Stats not enabled')
210+
211+
return self.reader.get_tiledb_stats();
-3.85 MB
Binary file not shown.

apis/spark/src/main/java/io/tiledb/vcf/VCFDataSourceOptions.java

+9
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,15 @@ public Optional<AWSSessionCredentialsProvider> getCredentialsProvider() {
115115
return Optional.empty();
116116
}
117117

118+
/** @return The log level for the VCFReader stats reporting */
119+
public Optional<String> getTileDBStatsLogLevel() {
120+
if (options.containsKey("tiledb_stats_log_level")) {
121+
String statsLogLevel = options.get("tiledb_stats_log_level");
122+
return Optional.of(statsLogLevel);
123+
}
124+
return Optional.empty();
125+
}
126+
118127
/** @return Optional CSV String of config parameters */
119128
public Optional<String> getConfigCSV() {
120129
return getConfigCSV(options);

apis/spark/src/main/java/io/tiledb/vcf/VCFInputPartitionReader.java

+16
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.apache.arrow.vector.types.FloatingPointPrecision;
1919
import org.apache.arrow.vector.types.pojo.ArrowType;
2020
import org.apache.arrow.vector.types.pojo.FieldType;
21+
import org.apache.log4j.Level;
2122
import org.apache.log4j.Logger;
2223
import org.apache.spark.sql.execution.arrow.ArrowUtils;
2324
import org.apache.spark.sql.sources.v2.reader.InputPartitionReader;
@@ -78,6 +79,8 @@ public class VCFInputPartitionReader implements InputPartitionReader<ColumnarBat
7879
/** Stats counter: number of bytes in allocated buffers. */
7980
private long statsTotalBufferBytes;
8081

82+
private Level enableStatsLogLevel;
83+
8184
/**
8285
* Creates a TileDB-VCF reader.
8386
*
@@ -115,6 +118,12 @@ public VCFInputPartitionReader(
115118
} else {
116119
this.samples = new String[] {};
117120
}
121+
122+
this.enableStatsLogLevel = Level.OFF;
123+
if (this.options.getTileDBStatsLogLevel().isPresent()) {
124+
// If an invalid log level is set, the default is DEBUG
125+
this.enableStatsLogLevel = Level.toLevel(this.options.getTileDBStatsLogLevel().get());
126+
}
118127
}
119128

120129
@Override
@@ -183,6 +192,10 @@ public ColumnarBatch get() {
183192
public void close() {
184193
log.info("Closing VCFReader for partition " + (this.partitionId));
185194

195+
if (!this.enableStatsLogLevel.equals(Level.OFF)) {
196+
log.log(this.enableStatsLogLevel, this.vcfReader.stats());
197+
}
198+
186199
if (vcfReader != null) {
187200
vcfReader.close();
188201
vcfReader = null;
@@ -251,6 +264,9 @@ private void initVCFReader() {
251264
vcfReader.setSortRegions(sortRegions.get().booleanValue());
252265
}
253266

267+
// Enable VCFReader stats
268+
if (!this.enableStatsLogLevel.equals(Level.OFF)) this.vcfReader.setStatsEnabled(true);
269+
254270
// Set logical partition in array
255271
vcfReader.setRangePartition(
256272
rangePartitionInfo.getNumPartitions(), rangePartitionInfo.getIndex());

apis/spark/src/test/java/io/tiledb/vcf/VCFDatasourceTest.java

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.util.List;
1111
import java.util.Map;
1212
import java.util.stream.Collectors;
13+
import org.apache.log4j.Level;
1314
import org.apache.spark.sql.Dataset;
1415
import org.apache.spark.sql.Row;
1516
import org.apache.spark.sql.SparkSession;
@@ -43,6 +44,7 @@ private Dataset<Row> testSampleDataset() {
4344
.option("samples", "HG01762,HG00280")
4445
.option("ranges", "1:12100-13360,1:13500-17350")
4546
.option("tiledb.vfs.num_threads", 1)
47+
.option("tiledb_stats_log_level", Level.INFO.toString())
4648
.load();
4749
return dfRead;
4850
}

libtiledbvcf/src/c_api/tiledbvcf.cc

+34
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,40 @@ int32_t tiledb_vcf_reader_set_tiledb_config(
354354
return TILEDB_VCF_OK;
355355
}
356356

357+
int32_t tiledb_vcf_reader_set_tiledb_stats_enabled(
358+
tiledb_vcf_reader_t* reader, const bool stats_enabled) {
359+
if (sanity_check(reader) == TILEDB_VCF_ERR)
360+
return TILEDB_VCF_ERR;
361+
362+
if (SAVE_ERROR_CATCH(
363+
reader, reader->reader_->set_tiledb_stats_enabled(stats_enabled)))
364+
return TILEDB_VCF_ERR;
365+
366+
return TILEDB_VCF_OK;
367+
}
368+
369+
int32_t tiledb_vcf_reader_get_tiledb_stats_enabled(
370+
tiledb_vcf_reader_t* reader, bool* enabled) {
371+
if (sanity_check(reader) == TILEDB_VCF_ERR)
372+
return TILEDB_VCF_ERR;
373+
374+
if (SAVE_ERROR_CATCH(reader, reader->reader_->tiledb_stats_enabled(enabled)))
375+
return TILEDB_VCF_ERR;
376+
377+
return TILEDB_VCF_OK;
378+
}
379+
380+
int32_t tiledb_vcf_reader_get_tiledb_stats(
381+
tiledb_vcf_reader_t* reader, char** stats) {
382+
if (sanity_check(reader) == TILEDB_VCF_ERR)
383+
return TILEDB_VCF_ERR;
384+
385+
if (SAVE_ERROR_CATCH(reader, reader->reader_->tiledb_stats(stats)))
386+
return TILEDB_VCF_ERR;
387+
388+
return TILEDB_VCF_OK;
389+
}
390+
357391
int32_t tiledb_vcf_reader_read(tiledb_vcf_reader_t* reader) {
358392
if (sanity_check(reader) == TILEDB_VCF_ERR)
359393
return TILEDB_VCF_ERR;

0 commit comments

Comments
 (0)