Skip to content

Commit 39dfa80

Browse files
feat: bigtable sink using depot (#198)
* feat: bigtable sink using depot * docs: bigtable sink docs * chore: update depot version and bump firehose version to 0.6.0 Co-authored-by: MayurGubrele <[email protected]>
1 parent d9f014f commit 39dfa80

File tree

8 files changed

+38
-19
lines changed

8 files changed

+38
-19
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Discover why users choose Firehose as their main Kafka Consumer
2525
- Elasticsearch
2626
- Redis
2727
- Bigquery
28+
- BigTable
2829
- Blob Storage/Object Storage :
2930
- Google Cloud Storage
3031

build.gradle

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ lombok {
3333
}
3434

3535
group 'io.odpf'
36-
version '0.5.0'
36+
version '0.6.0'
3737

3838
def projName = "firehose"
3939

@@ -101,7 +101,7 @@ dependencies {
101101
implementation 'com.google.cloud:google-cloud-storage:1.114.0'
102102
implementation 'com.google.cloud:google-cloud-bigquery:1.115.0'
103103
implementation 'org.apache.logging.log4j:log4j-core:2.17.1'
104-
implementation group: 'io.odpf', name: 'depot', version: '0.2.1'
104+
implementation group: 'io.odpf', name: 'depot', version: '0.3.4'
105105
implementation group: 'com.networknt', name: 'json-schema-validator', version: '1.0.59' exclude group: 'org.slf4j'
106106

107107
testImplementation group: 'junit', name: 'junit', version: '4.11'

docs/docs/guides/create_firehose.md

+4
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,8 @@ _**Note:**_ [_**DATABASE**_](../sinks/influxdb-sink.md#sink_influx_db_name) _**a
136136
- The timestamp column is needed incase of partition table. It can be generated at the time of ingestion by setting the config. Please refer to config `SINK_BIGQUERY_ADD_EVENT_TIMESTAMP_ENABLE` in [depot bigquery sink config section](https://github.com/odpf/depot/blob/main/docs/reference/configuration/bigquery-sink.md#sink_bigquery_add_event_timestamp_enable)
137137
- Google cloud credential with some bigquery permission is required to run this sink.
138138

139+
## Create a Bigtable sink
140+
141+
- it requires the following environment [variables](https://github.com/odpf/depot/blob/main/docs/reference/configuration/bigtable.md) ,which are required by ODPF Depot library, to be set along with the generic firehose variables.
142+
139143
If you'd like to connect to a sink which is not yet supported, you can create a new sink by following the [contribution guidelines](../contribute/contribution.md)

docs/docs/introduction.md

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Following sinks are supported in the Firehose
3838
- [Prometheus](https://en.wikipedia.org/wiki/Prometheus_%28software) - A time-series database
3939
- [MongoDB](https://en.wikipedia.org/wiki/MongoDB) - A NoSQL database
4040
- [Bigquery](https://cloud.google.com/bigquery) - A data warehouse provided by Google Cloud
41+
- [Bigtable](https://cloud.google.com/bigtable) - A fully managed, scalable NoSQL database service for large analytical and operational workloads.
4142
- [Blob Storage](https://gocloud.dev/howto/blob/) - A data storage architecture for large stores of unstructured data like google cloud storage, amazon s3, apache hadoop distributed filesystem
4243

4344
## How can I get started?

docs/docs/sinks/bigtable-sink.md

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Bigtable Sink
2+
3+
Bigtable Sink is implemented in Firehose using the Bigtable sink connector implementation in ODPF Depot. You can check out ODPF Depot Github repository [here](https://github.com/odpf/depot).
4+
5+
### Configuration
6+
7+
For Bigtable sink in Firehose we need to set first (`SINK_TYPE`=`bigtable`). There are some generic configs which are common across different sink types which need to be set which are mentioned in [generic.md](../advance/generic.md). Bigtable sink specific configs are mentioned in ODPF Depot repository. You can check out the Bigtable Sink configs [here](https://github.com/odpf/depot/blob/main/docs/reference/configuration/bigtable.md)

src/main/java/io/odpf/firehose/config/enums/SinkType.java

+1
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ public enum SinkType {
1212
PROMETHEUS,
1313
BLOB,
1414
BIGQUERY,
15+
BIGTABLE,
1516
MONGODB
1617
}

src/main/java/io/odpf/firehose/sink/SinkFactory.java

+12
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import io.odpf.depot.bigquery.BigQuerySink;
44
import io.odpf.depot.bigquery.BigQuerySinkFactory;
55
import io.odpf.depot.config.BigQuerySinkConfig;
6+
import io.odpf.depot.bigtable.BigTableSinkFactory;
7+
import io.odpf.depot.bigtable.BigTableSink;
8+
import io.odpf.depot.config.BigTableSinkConfig;
69
import io.odpf.depot.log.LogSink;
710
import io.odpf.depot.log.LogSinkFactory;
811
import io.odpf.depot.metrics.StatsDReporter;
@@ -33,6 +36,7 @@ public class SinkFactory {
3336
private final StencilClient stencilClient;
3437
private final OffsetManager offsetManager;
3538
private BigQuerySinkFactory bigQuerySinkFactory;
39+
private BigTableSinkFactory bigTableSinkFactory;
3640
private LogSinkFactory logSinkFactory;
3741
private final Map<String, String> config;
3842

@@ -75,6 +79,12 @@ public void init() {
7579
BigquerySinkUtils.getRowIDCreator());
7680
bigQuerySinkFactory.init();
7781
return;
82+
case BIGTABLE:
83+
bigTableSinkFactory = new BigTableSinkFactory(
84+
ConfigFactory.create(BigTableSinkConfig.class, config),
85+
statsDReporter);
86+
bigTableSinkFactory.init();
87+
return;
7888
default:
7989
throw new ConfigurationException("Invalid Firehose SINK_TYPE");
8090
}
@@ -104,6 +114,8 @@ public Sink getSink() {
104114
return BlobSinkFactory.create(config, offsetManager, statsDReporter, stencilClient);
105115
case BIGQUERY:
106116
return new GenericOdpfSink(new FirehoseInstrumentation(statsDReporter, BigQuerySink.class), sinkType.name(), bigQuerySinkFactory.create());
117+
case BIGTABLE:
118+
return new GenericOdpfSink(new FirehoseInstrumentation(statsDReporter, BigTableSink.class), sinkType.name(), bigTableSinkFactory.create());
107119
case MONGODB:
108120
return MongoSinkFactory.create(config, statsDReporter, stencilClient);
109121
default:

src/test/java/io/odpf/firehose/sink/dlq/blobstorage/BlobStorageDlqWriterTest.java

+10-17
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import io.odpf.depot.error.ErrorInfo;
44
import io.odpf.depot.error.ErrorType;
5-
import io.odpf.firehose.message.Message;
65
import io.odpf.firehose.exception.DeserializerException;
7-
import io.odpf.firehose.sink.common.blobstorage.BlobStorageException;
6+
import io.odpf.firehose.message.Message;
87
import io.odpf.firehose.sink.common.blobstorage.BlobStorage;
8+
import io.odpf.firehose.sink.common.blobstorage.BlobStorageException;
99
import org.junit.Assert;
1010
import org.junit.Before;
1111
import org.junit.Test;
@@ -16,13 +16,10 @@
1616
import java.io.IOException;
1717
import java.time.Instant;
1818
import java.util.Arrays;
19-
import java.util.Base64;
2019
import java.util.Comparator;
2120
import java.util.List;
2221

2322
import static org.mockito.Mockito.*;
24-
import static org.mockito.Mockito.doThrow;
25-
import static org.mockito.Mockito.verify;
2623

2724
@RunWith(MockitoJUnitRunner.class)
2825
public class BlobStorageDlqWriterTest {
@@ -50,14 +47,12 @@ public void shouldWriteMessagesWithoutErrorInfoToObjectStorage() throws IOExcept
5047
List<Message> messages = Arrays.asList(message1, message2, message3, message4);
5148
Assert.assertEquals(0, blobStorageDLQWriter.write(messages).size());
5249

53-
String key = Base64.getEncoder().encodeToString("123".getBytes());
54-
String message = Base64.getEncoder().encodeToString("abc".getBytes());
5550
verify(blobStorage).store(contains("booking/2020-01-02"),
56-
eq(("{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":3,\"timestamp\":1577923200000,\"error\":\"DESERIALIZATION_ERROR\"}\n"
57-
+ "{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":4,\"timestamp\":1577923200000,\"error\":\"DESERIALIZATION_ERROR\"}").getBytes()));
51+
eq(("{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":3,\"timestamp\":1577923200000,\"error\":\"Exception test, ErrorType: DESERIALIZATION_ERROR\"}\n"
52+
+ "{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":4,\"timestamp\":1577923200000,\"error\":\"Exception test, ErrorType: DESERIALIZATION_ERROR\"}").getBytes()));
5853
verify(blobStorage).store(contains("booking/2020-01-01"),
59-
eq(("{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":1,\"timestamp\":1577836800000,\"error\":\"DESERIALIZATION_ERROR\"}\n"
60-
+ "{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":2,\"timestamp\":1577836800000,\"error\":\"DESERIALIZATION_ERROR\"}").getBytes()));
54+
eq(("{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":1,\"timestamp\":1577836800000,\"error\":\"Exception test, ErrorType: DESERIALIZATION_ERROR\"}\n"
55+
+ "{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":2,\"timestamp\":1577836800000,\"error\":\"Exception test, ErrorType: DESERIALIZATION_ERROR\"}").getBytes()));
6156
}
6257

6358
@Test
@@ -73,14 +68,12 @@ public void shouldWriteMessageErrorTypesToObjectStorage() throws IOException, Bl
7368
List<Message> messages = Arrays.asList(message1, message2, message3, message4);
7469
Assert.assertEquals(0, blobStorageDLQWriter.write(messages).size());
7570

76-
String key = Base64.getEncoder().encodeToString("123".getBytes());
77-
String message = Base64.getEncoder().encodeToString("abc".getBytes());
7871
verify(blobStorage).store(contains("booking/2020-01-02"),
79-
eq(("{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":3,\"timestamp\":1577923200000,\"error\":\"DESERIALIZATION_ERROR\"}\n"
80-
+ "{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":4,\"timestamp\":1577923200000,\"error\":\"SINK_UNKNOWN_ERROR\"}").getBytes()));
72+
eq(("{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":3,\"timestamp\":1577923200000,\"error\":\"Exception , ErrorType: DESERIALIZATION_ERROR\"}\n"
73+
+ "{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":4,\"timestamp\":1577923200000,\"error\":\"Exception , ErrorType: SINK_UNKNOWN_ERROR\"}").getBytes()));
8174
verify(blobStorage).store(contains("booking/2020-01-01"),
82-
eq(("{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":1,\"timestamp\":1577836800000,\"error\":\"DESERIALIZATION_ERROR\"}\n"
83-
+ "{\"key\":\"" + key + "\",\"value\":\"" + message + "\",\"topic\":\"booking\",\"partition\":1,\"offset\":2,\"timestamp\":1577836800000,\"error\":\"SINK_UNKNOWN_ERROR\"}").getBytes()));
75+
eq(("{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":1,\"timestamp\":1577836800000,\"error\":\"Exception , ErrorType: DESERIALIZATION_ERROR\"}\n"
76+
+ "{\"key\":\"MTIz\",\"value\":\"YWJj\",\"topic\":\"booking\",\"partition\":1,\"offset\":2,\"timestamp\":1577836800000,\"error\":\"Exception null, ErrorType: SINK_UNKNOWN_ERROR\"}").getBytes()));
8477
}
8578

8679
@Test

0 commit comments

Comments
 (0)