Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[perf] Added tests for data types (DateTime) #2243

Merged
merged 5 commits into from
Mar 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.clickhouse.benchmark;

import com.clickhouse.benchmark.clients.Compression;
import com.clickhouse.benchmark.clients.DataTypes;
import com.clickhouse.benchmark.clients.ConcurrentInsertClient;
import com.clickhouse.benchmark.clients.ConcurrentQueryClient;
import com.clickhouse.benchmark.clients.Deserializers;
Expand Down Expand Up @@ -46,6 +47,7 @@ public static void main(String[] args) throws Exception {
.include(Serializers.class.getName())
.include(Deserializers.class.getName())
.include(MixedWorkload.class.getName())
.include(DataTypes.class.getName())
.include(JDBCQuery.class.getName())
.include(JDBCInsert.class.getName())
.forks(1) // must be a fork. No fork only for debugging
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.clickhouse.benchmark.data.DataSet;
import com.clickhouse.benchmark.data.FileDataSet;
import com.clickhouse.benchmark.data.SimpleDataSet;
import com.clickhouse.benchmark.data.SyntheticDataSet;
import com.clickhouse.client.ClickHouseClient;
import com.clickhouse.client.ClickHouseClientBuilder;
import com.clickhouse.client.ClickHouseCredentials;
Expand Down Expand Up @@ -117,6 +118,8 @@ public static class DataState {

ByteBuffer datasetAsRowBinaryWithNamesAndTypes;

SyntheticDataSet syntheticDataSet;

public void setDataSet(DataSet dataSet) {
this.dataSet = dataSet;
}
Expand Down
171 changes: 171 additions & 0 deletions performance/src/test/com/clickhouse/benchmark/clients/DataTypes.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package com.clickhouse.benchmark.clients;

import com.clickhouse.benchmark.data.SyntheticDataSet;
import com.clickhouse.client.api.Client;
import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader;
import com.clickhouse.client.api.data_formats.internal.SerializerUtils;
import com.clickhouse.client.api.query.QueryResponse;
import com.clickhouse.client.api.query.QuerySettings;
import com.clickhouse.data.ClickHouseColumn;
import com.clickhouse.data.ClickHouseFormat;
import com.clickhouse.data.ClickHouseInputStream;
import com.clickhouse.data.ClickHouseOutputStream;
import com.clickhouse.data.format.BinaryDataProcessor;
import com.clickhouse.data.format.BinaryStreamUtils;
import com.clickhouse.data.value.ClickHouseDateTimeValue;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.infra.Blackhole;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.time.LocalDateTime;
import java.util.TimeZone;

public class DataTypes extends BenchmarkBase {

private static final Logger LOGGER = LoggerFactory.getLogger(DataTypes.class);

@Setup(Level.Iteration)
public void setUpIteration(DataState dataState) {
super.setUpIteration();

try (Client c = getClientV2(); QueryResponse r = c.query("SELECT * FROM " + dataState.tableNameFilled, new QuerySettings()
.setFormat(ClickHouseFormat.RowBinaryWithNamesAndTypes)).get()) {
dataState.datasetAsRowBinaryWithNamesAndTypes = ByteBuffer.wrap(r.getInputStream().readAllBytes());
LOGGER.info("Loaded {} from dataset", dataState.datasetAsRowBinaryWithNamesAndTypes.capacity());
} catch (Exception e) {
LOGGER.error("Failed to init data for components benchmark", e);
}

if (dataState.syntheticDataSet != null) {
dataState.syntheticDataSet = new SyntheticDataSet(dataState.limit);
}
}

@Benchmark
public void readDateTimeV1(DataState dataState, Blackhole blackhole) {
ClickHouseInputStream input = ClickHouseInputStream.of(dataState.syntheticDataSet.getDateTimeValuesRowBinaryStream());
BinaryDataProcessor.DateTime64SerDe serDe = new BinaryDataProcessor.DateTime64SerDe(3, TimeZone.getTimeZone("UTC"));

ClickHouseDateTimeValue valueHolder = ClickHouseDateTimeValue.ofNull(3, TimeZone.getTimeZone("UTC"));

int valueCount = 0;
while (valueCount <= dataState.limit) {
try {
serDe.deserialize(valueHolder, input);
blackhole.consume(valueHolder);
valueCount++;
} catch (IOException ex) {
if (valueCount < dataState.limit) {
throw new RuntimeException("Failed to read all values", ex);
}
break;
}
}
}

@Benchmark
public void readDateTimeV2(DataState dataState, Blackhole blackhole) {
ClickHouseInputStream input = ClickHouseInputStream.of(dataState.syntheticDataSet.getDateTimeValuesRowBinaryStream());

byte[] buffer = new byte[8];
TimeZone zoneId = TimeZone.getTimeZone("UTC");

int valueCount = 0;
while (valueCount <= dataState.limit) {
try {
blackhole.consume(BinaryStreamReader.readDateTime64(input, buffer, 3, zoneId));
valueCount++;
} catch (EOFException ex) {
if (valueCount < dataState.limit) {
throw new RuntimeException("Failed to read all values", ex);
}
break;
} catch (IOException ex) {
throw new RuntimeException("Failed to read all values", ex);
}
}
}


@Benchmark
public void DateTimeSerializerV1(DataState dataState, Blackhole blackhole) {
OutputStream empty = new BlackholeOutputStream(blackhole);
BinaryDataProcessor.DateTime64SerDe serDe =
new BinaryDataProcessor.DateTime64SerDe(3, TimeZone.getTimeZone("UTC"));

ClickHouseOutputStream chos = ClickHouseOutputStream.of(empty);
TimeZone tz = TimeZone.getTimeZone("UTC");

for (LocalDateTime dateTime : dataState.syntheticDataSet.getDateTimeValues()) {
try {
BinaryStreamUtils.writeDateTime64(chos, dateTime, 3, tz);
// App should wrap a value with a value object if it wants to use a data processor
// serDe.serialize(ClickHouseDateTimeValue.of(dateTime, 3, tz) , chos);
} catch (Exception e) {
LOGGER.error("Error: ", e);
}
}
try {
chos.flush();
} catch (Exception e) {
LOGGER.error("Error: ", e);
}
}

@Benchmark
public void DateTimeSerializerV2(DataState dataState, Blackhole blackhole) {
OutputStream empty = new BlackholeOutputStream(blackhole);
ClickHouseColumn column = ClickHouseColumn.of("a", "DateTime64(3, 'UTC')");

for (LocalDateTime dateTime : dataState.syntheticDataSet.getDateTimeValues()) {
try {
SerializerUtils.serializeData(empty, dateTime, column);
} catch (Exception e) {
LOGGER.error("Error: ", e);
}
}
}

private static class BlackholeOutputStream extends OutputStream {

private final Blackhole blackhole;
public long count = 0;

public BlackholeOutputStream(Blackhole blackhole) {
this.blackhole = blackhole;
}

@Override
public void write(int b) {
blackhole.consume(b);
count++;
}

@Override
public void write(byte[] b) {
write(b, 0, b.length);
}

@Override
public void write(byte[] b, int off, int len) {
blackhole.consume(b);
count += len;
}

@Override
public void flush() {

}

@Override
public void close() {
}
}
}
Original file line number Diff line number Diff line change
@@ -1,77 +1,25 @@
package com.clickhouse.benchmark.clients;

import com.clickhouse.benchmark.data.DataSet;
import com.clickhouse.client.ClickHouseConfig;
import com.clickhouse.client.api.Client;
import com.clickhouse.client.api.data_formats.RowBinaryFormatWriter;
import com.clickhouse.client.api.data_formats.RowBinaryWithNamesAndTypesFormatReader;
import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader;
import com.clickhouse.client.api.internal.ClickHouseLZ4OutputStream;
import com.clickhouse.client.api.query.QueryResponse;
import com.clickhouse.client.api.query.QuerySettings;
import com.clickhouse.client.config.ClickHouseClientOption;
import com.clickhouse.data.ClickHouseColumn;
import com.clickhouse.data.ClickHouseDataProcessor;
import com.clickhouse.data.ClickHouseFormat;
import com.clickhouse.data.ClickHouseInputStream;
import com.clickhouse.data.ClickHouseOutputStream;
import com.clickhouse.data.ClickHouseRecord;
import com.clickhouse.data.ClickHouseSerializer;
import com.clickhouse.data.format.ClickHouseRowBinaryProcessor;
import com.clickhouse.data.stream.Lz4OutputStream;
import net.jpountz.lz4.LZ4Factory;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.infra.Blackhole;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class Serializers extends BenchmarkBase {
private static final Logger LOGGER = LoggerFactory.getLogger(Serializers.class);

private OutputStream createEmptyOutputStream() {
return new OutputStream() {
private long count = 0;

@Override
public void write(int b) {
count++;
}

@Override
public void write(byte[] b) {
count += b.length;
}

@Override
public void write(byte[] b, int off, int len) {
count += len;
}

@Override
public void flush() {

}

@Override
public void close() {
}
};
}

@Benchmark
public void SerializerOutputStreamV1(DataState dataState) {
OutputStream empty = createEmptyOutputStream();
public void SerializerOutputStreamV1(DataState dataState, Blackhole blackhole) {
OutputStream empty = new BlackholeOutputStream(blackhole);
try {
ClickHouseOutputStream chos = ClickHouseOutputStream.of(empty);
ClickHouseDataProcessor p = dataState.dataSet.getClickHouseDataProcessor();
Expand All @@ -88,8 +36,8 @@ public void SerializerOutputStreamV1(DataState dataState) {
}

@Benchmark
public void SerializerOutputStreamV2(DataState dataState) {
OutputStream empty = createEmptyOutputStream();
public void SerializerOutputStreamV2(DataState dataState, Blackhole blackhole) {
OutputStream empty = new BlackholeOutputStream(blackhole);
try {
RowBinaryFormatWriter w = new RowBinaryFormatWriter(empty, dataState.dataSet.getSchema(), ClickHouseFormat.RowBinary);
for (List<Object> row : dataState.dataSet.getRowsOrdered()) {
Expand All @@ -105,4 +53,40 @@ public void SerializerOutputStreamV2(DataState dataState) {
LOGGER.error("Error: ", e);
}
}

private static class BlackholeOutputStream extends OutputStream {

private final Blackhole blackhole;
public long count = 0;

public BlackholeOutputStream(Blackhole blackhole) {
this.blackhole = blackhole;
}

@Override
public void write(int b) {
blackhole.consume(b);
count++;
}

@Override
public void write(byte[] b) {
write(b, 0, b.length);
}

@Override
public void write(byte[] b, int off, int len) {
blackhole.consume(b);
count += len;
}

@Override
public void flush() {

}

@Override
public void close() {
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package com.clickhouse.benchmark.data;

import com.clickhouse.data.format.BinaryStreamUtils;

import java.io.ByteArrayOutputStream;
import java.time.LocalDateTime;
import java.util.TimeZone;

public class SyntheticDataSet {

private final int capacity;

public SyntheticDataSet(int capacity) {
this.capacity = capacity;
generateData();
}

private void generateData() {
generateDateTimeValues();
}

private void generateDateTimeValues() {
ByteArrayOutputStream out = new ByteArrayOutputStream();
dateTimeValues = new LocalDateTime[capacity];
TimeZone tz = TimeZone.getTimeZone("UTC");

try {
for (int i = 0; i < capacity; i++) {
dateTimeValues[i] = LocalDateTime.now().plusSeconds(i);
BinaryStreamUtils.writeDateTime64(out, dateTimeValues[i], 3, tz);

}
} catch (Exception e) {
throw new RuntimeException("Failed to generate date time values", e);
}
dateTimeValuesRowBinary = out.toByteArray();
}

private LocalDateTime[] dateTimeValues;

private byte[] dateTimeValuesRowBinary;

public LocalDateTime[] getDateTimeValues() {
return dateTimeValues;
}

public byte[] getDateTimeValuesRowBinaryStream() {
return dateTimeValuesRowBinary;
}
}
Loading