Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bitsail] support doris source connector #473

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ In the Runtime layer, it supports multiple execution modes, such as yarn, local,
<tr>
<td>Doris</td>
<td>-</td>
<td> </td>
<td></td>
<td>✅</td>
</tr>
<tr>
Expand Down
2 changes: 1 addition & 1 deletion README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ BitSail目前已被广泛使用,并支持数百万亿的大流量场景。同时
<tr>
<td>Doris</td>
<td>-</td>
<td> </td>
<td></td>
<td>✅</td>
</tr>
<tr>
Expand Down
82 changes: 82 additions & 0 deletions bitsail-connectors/connector-doris/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,97 @@
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<mysql.version>5.1.49</mysql.version>
<arrow.version>5.0.0</arrow.version>
<libthrift.version>0.16.0</libthrift.version>
<thrift-service.version>1.0.0</thrift-service.version>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>thrift-service</artifactId>
<version>${thrift-service.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<!-- doris requirement mysql client version greater than 5.1 -->
<version>${mysql.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${libthrift.version}</version>
<exclusions>
<exclusion>
<artifactId>httpclient</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
<exclusion>
<artifactId>httpcore</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.version}</version>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
<version>${arrow.version}</version>
<scope>runtime</scope>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-buffer</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- test -->
<dependency>
<groupId>com.bytedance.bitsail</groupId>
<artifactId>bitsail-connector-test</artifactId>
<version>${revision}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.bytedance.bitsail</groupId>
<artifactId>bitsail-connector-fake</artifactId>
<version>${revision}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.bytedance.bitsail</groupId>
<artifactId>bitsail-connector-print</artifactId>
<version>${revision}</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
* Copyright 2022 Bytedance Ltd. and/or its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.bytedance.bitsail.connector.doris.backend;

import com.bytedance.bitsail.common.BitSailException;
import com.bytedance.bitsail.connector.doris.backend.model.Routing;
import com.bytedance.bitsail.connector.doris.config.DorisExecutionOptions;
import com.bytedance.bitsail.connector.doris.error.DorisErrorCode;

import org.apache.doris.sdk.thrift.TDorisExternalService;
import org.apache.doris.sdk.thrift.TScanBatchResult;
import org.apache.doris.sdk.thrift.TScanCloseParams;
import org.apache.doris.sdk.thrift.TScanCloseResult;
import org.apache.doris.sdk.thrift.TScanNextBatchParams;
import org.apache.doris.sdk.thrift.TScanOpenParams;
import org.apache.doris.sdk.thrift.TScanOpenResult;
import org.apache.doris.sdk.thrift.TStatusCode;
import org.apache.thrift.TConfiguration;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Client to request Doris BE
*/
public class BackendClient {
private static final Logger LOGGER = LoggerFactory.getLogger(BackendClient.class);
private final Routing routing;
private TDorisExternalService.Client client;
private TTransport transport;
private boolean isConnected = false;
private final int retries;
private final int socketTimeout;
private final int connectTimeout;

public BackendClient(Routing routing, DorisExecutionOptions executionOptions) {
this.routing = routing;
this.connectTimeout = executionOptions.getRequestConnectTimeoutMs();
this.socketTimeout = executionOptions.getRequestReadTimeoutMs();
this.retries = executionOptions.getRequestRetries();
LOGGER.trace("connect timeout set to '{}'. socket timeout set to '{}'. retries set to '{}'.",
this.connectTimeout, this.socketTimeout, this.retries);
open();
}

private void open() {
LOGGER.debug("Open client to Doris BE '{}'.", routing);
TException ex = null;
for (int attempt = 0; !isConnected && attempt < retries; ++attempt) {
LOGGER.debug("Attempt {} to connect {}.", attempt, routing);
try {
TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory();
transport = new TSocket(new TConfiguration(), routing.getHost(), routing.getPort(), socketTimeout, connectTimeout);
TProtocol protocol = factory.getProtocol(transport);
client = new TDorisExternalService.Client(protocol);
if (isConnected) {
LOGGER.info("Success connect to {}.", routing);
return;
}
LOGGER.trace("Connect status before open transport to {} is '{}'.", routing, isConnected);
if (!transport.isOpen()) {
transport.open();
isConnected = true;
}
} catch (TTransportException e) {
LOGGER.warn("Failed to connect message, routing={}", routing, e);
ex = e;
}
}
if (!isConnected) {
String errMsg = String.format("Failed to connect message, routing=%s", routing);
LOGGER.error(errMsg, ex);
throw BitSailException.asBitSailException(DorisErrorCode.CONNECT_FAILED_MESSAGE, errMsg, ex);
}
}

private void close() {
LOGGER.trace("Connect status before close with '{}' is '{}'.", routing, isConnected);
isConnected = false;
if ((transport != null) && transport.isOpen()) {
transport.close();
LOGGER.info("Closed a connection to {}.", routing);
}
if (null != client) {
client = null;
}
}

/**
* Open a scanner for reading Doris data.
*
* @param openParams thrift struct to required by request
* @return scan open result
*/
public TScanOpenResult openScanner(TScanOpenParams openParams) {
LOGGER.debug("OpenScanner to '{}', parameter is '{}'.", routing, openParams);
if (!isConnected) {
open();
}
TException ex = null;
for (int attempt = 0; attempt < retries; ++attempt) {
LOGGER.debug("Attempt {} to openScanner {}.", attempt, routing);
try {
TScanOpenResult result = client.openScanner(openParams);
if (result == null) {
LOGGER.warn("Open scanner result from {} is null.", routing);
continue;
}
if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) {
LOGGER.warn("The status of open scanner result from {} is '{}', error message is: {}.",
routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs());
continue;
}
return result;
} catch (TException e) {
LOGGER.warn("Open scanner from {} failed.", routing, e);
ex = e;
}
}
String errMsg = String.format("Failed to connect message, routing=%s", routing);
LOGGER.error(errMsg, ex);
throw BitSailException.asBitSailException(DorisErrorCode.CONNECT_FAILED_MESSAGE, errMsg, ex);
}

/**
* get next row batch from Doris BE
*
* @param nextBatchParams thrift struct to required by request
* @return scan batch result
*/
public TScanBatchResult getNext(TScanNextBatchParams nextBatchParams) {
LOGGER.debug("GetNext to '{}', parameter is '{}'.", routing, nextBatchParams);
if (!isConnected) {
open();
}
TException ex = null;
TScanBatchResult result = null;
for (int attempt = 0; attempt < retries; ++attempt) {
LOGGER.debug("Attempt {} to getNext {}.", attempt, routing);
try {
result = client.getNext(nextBatchParams);
if (result == null) {
LOGGER.warn("GetNext result from {} is null.", routing);
continue;
}
if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) {
LOGGER.warn("The status of get next result from {} is '{}', error message is: {}.",
routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs());
continue;
}
return result;
} catch (TException e) {
LOGGER.warn("Get next from {} failed.", routing, e);
ex = e;
}
}
if (result != null && (TStatusCode.OK != (result.getStatus().getStatusCode()))) {
String errMsg = String.format("Doris Internal error, routing=%s, status=%s, errorMsgs=%s",
routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs());
LOGGER.error(errMsg);
throw BitSailException.asBitSailException(DorisErrorCode.INTERNAL_FAIL_MESSAGE, errMsg);
}
String errMsg = String.format("Failed to connect message, routing=%s", routing);
LOGGER.error(errMsg);
throw BitSailException.asBitSailException(DorisErrorCode.CONNECT_FAILED_MESSAGE, errMsg, ex);
}

/**
* close an scanner.
*
* @param closeParams thrift struct to required by request
*/
public void closeScanner(TScanCloseParams closeParams) {
LOGGER.debug("CloseScanner to '{}', parameter is '{}'.", routing, closeParams);
for (int attempt = 0; attempt < retries; ++attempt) {
LOGGER.debug("Attempt {} to closeScanner {}.", attempt, routing);
try {
TScanCloseResult result = client.closeScanner(closeParams);
if (result == null) {
LOGGER.warn("CloseScanner result from {} is null.", routing);
continue;
}
if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) {
LOGGER.warn("The status of get next result from {} is '{}', error message is: {}.",
routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs());
continue;
}
break;
} catch (TException e) {
LOGGER.warn("Close scanner from {} failed.", routing, e);
}
}
LOGGER.info("CloseScanner to Doris BE '{}' success.", routing);
close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2022 Bytedance Ltd. and/or its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.bytedance.bitsail.connector.doris.backend.model;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* present an Doris BE address.
*/
public class Routing {
private static final Logger LOGGER = LoggerFactory.getLogger(Routing.class);
private String host;
private int port;

public Routing(String routing) throws IllegalArgumentException {
parseRouting(routing);
}

private void parseRouting(String routing) throws IllegalArgumentException {
LOGGER.debug("Parse Doris BE address: '{}'.", routing);
String[] hostPort = routing.split(":");
if (hostPort.length != 2) {
String errMsg = String.format("Format of Doris BE address is illegal, routing=%s", routing);
LOGGER.error(errMsg);
throw new IllegalArgumentException(errMsg);
}
this.host = hostPort[0];
try {
this.port = Integer.parseInt(hostPort[1]);
} catch (NumberFormatException e) {
String errMsg = String.format("Failed to parse Doris BE's hostPort, host=%s, Port=%s", hostPort[0], hostPort[1]);
LOGGER.error(errMsg);
throw new IllegalArgumentException(errMsg, e);
}
}

public String getHost() {
return host;
}

public int getPort() {
return port;
}

@Override
public String toString() {
return "Doris BE{host='" + host + '\'' + ", port=" + port + '}';
}
}
Loading