-
Notifications
You must be signed in to change notification settings - Fork 336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[bitsail][connector]Doris batch replace model use recordStream buffer #305
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,34 +20,68 @@ | |
import com.bytedance.bitsail.connector.doris.committer.DorisCommittable; | ||
import com.bytedance.bitsail.connector.doris.config.DorisExecutionOptions; | ||
import com.bytedance.bitsail.connector.doris.config.DorisOptions; | ||
import com.bytedance.bitsail.connector.doris.error.DorisErrorCode; | ||
import com.bytedance.bitsail.connector.doris.http.model.RespContent; | ||
import com.bytedance.bitsail.connector.doris.sink.DorisWriterState; | ||
import com.bytedance.bitsail.connector.doris.sink.label.LabelGenerator; | ||
import com.bytedance.bitsail.connector.doris.sink.record.RecordStream; | ||
import com.bytedance.bitsail.connector.doris.sink.streamload.DorisStreamLoad; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import com.google.common.collect.ImmutableList; | ||
import org.apache.commons.lang3.concurrent.BasicThreadFactory; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
import java.nio.ByteBuffer; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import java.util.concurrent.BlockingQueue; | ||
import java.util.concurrent.Executors; | ||
import java.util.concurrent.LinkedBlockingQueue; | ||
import java.util.concurrent.ScheduledExecutorService; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
|
||
import static com.bytedance.bitsail.connector.doris.sink.streamload.LoadStatus.PUBLISH_TIMEOUT; | ||
import static com.bytedance.bitsail.connector.doris.sink.streamload.LoadStatus.SUCCESS; | ||
|
||
public class DorisReplaceProxy extends AbstractDorisWriteModeProxy { | ||
private static final Logger LOG = LoggerFactory.getLogger(DorisReplaceProxy.class); | ||
protected List dorisBatchBuffers; | ||
protected long dorisBatchBuffersSize; | ||
private RecordStream recordStream; | ||
private static final List<String> DORIS_SUCCESS_STATUS = new ArrayList<>(Arrays.asList(SUCCESS, PUBLISH_TIMEOUT)); | ||
private LabelGenerator labelGenerator; | ||
private DorisWriterState dorisWriterState; | ||
private AtomicInteger cacheRecordSize; | ||
private AtomicInteger cacheRecordCount; | ||
private volatile boolean loading = false; | ||
private final BlockingQueue<byte[]> cache = new LinkedBlockingQueue<>(); | ||
private volatile Exception loadException = null; | ||
private int flushRecordCacheSize; | ||
private int flushRecordCacheCount; | ||
private byte[] lineDelimiter; | ||
private int intervalTime; | ||
private ScheduledExecutorService scheduler; | ||
private final int initialDelay = 1000; | ||
|
||
public DorisReplaceProxy(DorisExecutionOptions dorisExecutionOptions, DorisOptions dorisOptions) { | ||
this.dorisExecutionOptions = dorisExecutionOptions; | ||
this.dorisBatchBuffers = new ArrayList(dorisExecutionOptions.getBufferCount()); | ||
this.dorisOptions = dorisOptions; | ||
this.recordStream = new RecordStream(dorisExecutionOptions.getBufferSize(), dorisExecutionOptions.getBufferCount()); | ||
this.dorisStreamLoad = new DorisStreamLoad(dorisExecutionOptions, dorisOptions, | ||
new LabelGenerator(dorisExecutionOptions.getLabelPrefix(), dorisExecutionOptions.isEnable2PC()), recordStream); | ||
this.dorisBatchBuffersSize = 0; | ||
this.labelGenerator = new LabelGenerator(dorisExecutionOptions.getLabelPrefix(), dorisExecutionOptions.isEnable2PC()); | ||
this.dorisStreamLoad = new DorisStreamLoad(dorisExecutionOptions, dorisOptions, labelGenerator, | ||
new RecordStream(dorisExecutionOptions.getBufferSize(), dorisExecutionOptions.getBufferCount())); | ||
this.dorisWriterState = new DorisWriterState(dorisExecutionOptions.getLabelPrefix()); | ||
this.lineDelimiter = dorisOptions.getLineDelimiter().getBytes(); | ||
this.intervalTime = dorisExecutionOptions.getCheckInterval(); | ||
this.cacheRecordSize = new AtomicInteger(); | ||
this.cacheRecordCount = new AtomicInteger(); | ||
this.scheduler = Executors.newScheduledThreadPool(1, | ||
new BasicThreadFactory.Builder().namingPattern("Doris-replace-writer").daemon(true).build()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this thread is daemon? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that when the user thread exits, the JVM not necessary to manage the |
||
scheduler.scheduleWithFixedDelay(this::checkDone, initialDelay, intervalTime, TimeUnit.MILLISECONDS); | ||
} | ||
|
||
@VisibleForTesting | ||
|
@@ -56,59 +90,106 @@ public DorisReplaceProxy() { | |
|
||
@Override | ||
public void write(String record) throws IOException { | ||
addBatchBuffers(record); | ||
checkLoadException(); | ||
byte[] bytes = record.getBytes(StandardCharsets.UTF_8); | ||
ArrayList<byte[]> tmpCache = null; | ||
if (cacheRecordCount.get() >= dorisExecutionOptions.getRecordCount() || cacheRecordSize.get() >= dorisExecutionOptions.getRecordSize()) { | ||
tmpCache = new ArrayList<>(cache); | ||
flushRecordCacheSize = cacheRecordSize.get(); | ||
flushRecordCacheCount = cacheRecordCount.get(); | ||
cache.clear(); | ||
cacheRecordCount.set(0); | ||
cacheRecordSize.set(0); | ||
} | ||
cacheRecordSize.getAndAdd(bytes.length); | ||
cacheRecordCount.getAndIncrement(); | ||
cache.add(bytes); | ||
|
||
if (Objects.nonNull(tmpCache)) { | ||
flush(tmpCache); | ||
} | ||
} | ||
|
||
private void addBatchBuffers(String record) throws IOException { | ||
this.dorisBatchBuffers.add(record); | ||
this.dorisBatchBuffersSize += record.getBytes().length; | ||
if (dorisBatchBuffers.size() >= dorisExecutionOptions.getRecordSize() | ||
|| this.dorisBatchBuffersSize >= dorisExecutionOptions.getRecordCount()) { | ||
flush(false); | ||
private void flush(ArrayList<byte[]> flushCache) { | ||
if (!loading) { | ||
LOG.info("start load by cache full, recordCount {}, recordSize {}", flushRecordCacheCount, flushRecordCacheSize); | ||
try { | ||
startLoad(flushCache); | ||
} catch (Exception e) { | ||
LOG.error("start stream load failed.", e); | ||
loadException = e; | ||
} | ||
} | ||
} | ||
|
||
@SuppressWarnings("checkstyle:MagicNumber") | ||
@Override | ||
public void flush(boolean endOfInput) throws IOException { | ||
if (dorisBatchBuffers.isEmpty()) { | ||
return; | ||
} | ||
String result; | ||
if (DorisOptions.LOAD_CONTENT_TYPE.JSON.equals(dorisOptions.getLoadDataFormat())) { | ||
result = dorisBatchBuffers.toString(); | ||
} else { | ||
result = String.join(dorisOptions.getLineDelimiter(), dorisBatchBuffers); | ||
} | ||
for (int i = 0; i <= dorisExecutionOptions.getMaxRetries(); i++) { | ||
try { | ||
dorisStreamLoad.load(result, dorisOptions, true); | ||
dorisBatchBuffers.clear(); | ||
this.dorisBatchBuffersSize = 0; | ||
break; | ||
} catch (BitSailException e) { | ||
LOG.error("doris sink error, retry times = {}", i, e); | ||
if (i >= dorisExecutionOptions.getMaxRetries()) { | ||
throw new IOException(e.getMessage()); | ||
} | ||
try { | ||
LOG.warn("StreamLoad error", e); | ||
Thread.sleep(1000L * i); | ||
} catch (InterruptedException ex) { | ||
Thread.currentThread().interrupt(); | ||
throw new IOException("unable to flush; interrupted while doing another attempt", e); | ||
} | ||
|
||
private synchronized void startLoad(List<byte[]> flushCache) throws IOException { | ||
this.dorisStreamLoad.startLoad(labelGenerator.generateLabel(), true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
if (!flushCache.isEmpty()) { | ||
// add line delimiter | ||
ByteBuffer buf = ByteBuffer.allocate(flushRecordCacheSize + (flushCache.size() - 1) * lineDelimiter.length); | ||
for (int i = 0; i < flushCache.size(); i++) { | ||
if (i > 0) { | ||
buf.put(lineDelimiter); | ||
} | ||
buf.put(flushCache.get(i)); | ||
} | ||
dorisStreamLoad.writeRecord(buf.array()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why there need invoke twice There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be what I forgot to delete when I deleted the redundant code at the end, I will improve here |
||
} | ||
this.loading = true; | ||
} | ||
|
||
@Override | ||
public List<DorisCommittable> prepareCommit() throws IOException { | ||
if (loading) { | ||
LOG.info("stop load by prepareCommit."); | ||
stopLoad(); | ||
return ImmutableList.of(new DorisCommittable(dorisStreamLoad.getHostPort(), dorisOptions.getDatabaseName(), 0)); | ||
} | ||
return Collections.emptyList(); | ||
} | ||
|
||
private synchronized void stopLoad() throws IOException { | ||
this.loading = false; | ||
this.flushRecordCacheSize = 0; | ||
RespContent respContent = dorisStreamLoad.stopLoad(); | ||
if (!DORIS_SUCCESS_STATUS.contains(respContent.getStatus())) { | ||
String errMsg = String.format("stream load error: %s, see more in %s", respContent.getMessage(), respContent.getErrorURL()); | ||
LOG.warn(errMsg); | ||
throw new BitSailException(DorisErrorCode.LOAD_FAILED, errMsg); | ||
} | ||
} | ||
|
||
@Override | ||
public List<DorisWriterState> snapshotState(long checkpointId) { | ||
return null; | ||
return Collections.singletonList(dorisWriterState); | ||
} | ||
|
||
private synchronized void checkDone() { | ||
LOG.info("start timer checker, interval {} ms", intervalTime); | ||
try { | ||
if (!loading) { | ||
LOG.info("not loading, skip timer checker"); | ||
return; | ||
} | ||
if (dorisStreamLoad.getPendingLoadFuture() != null | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have question about this line,does the pending load future change between the interval? i think the right pipeline should be
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is a problem. As long as |
||
&& !dorisStreamLoad.getPendingLoadFuture().isDone()) { | ||
LOG.info("stop load by timer checker"); | ||
stopLoad(); | ||
} | ||
} catch (Exception e) { | ||
LOG.error("stream load failed, thread exited:", e); | ||
loadException = e; | ||
} | ||
} | ||
|
||
private void checkLoadException() { | ||
if (loadException != null) { | ||
LOG.error("loading error.", loadException); | ||
throw new RuntimeException("error while loading data.", loadException); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be long type?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SINK_CHECK_INTERVAL
represents the time interval, not too large