11package io .delta .flink .sink ;
22
3- import io .delta .kernel .* ;
3+ import io .delta .flink . DeltaTable ;
44import io .delta .kernel .data .Row ;
5- import io .delta .kernel .defaults .internal .json .JsonUtils ;
65import io .delta .kernel .engine .Engine ;
7- import io .delta .kernel .exceptions .TableNotFoundException ;
8- import io .delta .kernel .internal .data .TransactionStateRow ;
9- import io .delta .kernel .internal .util .Preconditions ;
106import io .delta .kernel .internal .util .Utils ;
11- import io .delta .kernel .types .StructType ;
127import io .delta .kernel .utils .CloseableIterable ;
138import io .delta .kernel .utils .CloseableIterator ;
14- import java .io .IOException ;
15- import java .util .*;
16- import java .util .stream .Collectors ;
179import org .apache .flink .api .connector .sink2 .Committer ;
1810import org .apache .flink .metrics .groups .SinkCommitterMetricGroup ;
1911import org .slf4j .Logger ;
2012import org .slf4j .LoggerFactory ;
2113
14+ import java .io .IOException ;
15+ import java .util .Collection ;
16+ import java .util .List ;
17+ import java .util .Objects ;
18+ import java .util .TreeMap ;
19+ import java .util .stream .Collectors ;
20+
2221/**
2322 * The Committer is responsible for committing the data staged by the CommittingSinkWriter in the
2423 * second step of a two-phase commit protocol.
@@ -51,38 +50,23 @@ public class DeltaCommitter implements Committer<DeltaCommittable> {
5150 // All committables should have the same job id as the committer.
5251 // For simplicity, we get the job id from constructor.
5352 private String jobId ;
54- private Engine engine ;
55- private Table table ;
56- private final Row committerContext ;
53+ private DeltaTable deltaTable ;
5754
5855 private SinkCommitterMetricGroup metricGroup ;
5956
60- private boolean creatingNewTable ;
61-
6257 private DeltaCommitter (
63- String jobId ,
64- Engine engine ,
65- Table table ,
66- Row committerContext ,
67- SinkCommitterMetricGroup metricGroup ) {
58+ String jobId ,
59+ DeltaTable deltaTable ,
60+ SinkCommitterMetricGroup metricGroup ) {
6861 this .jobId = jobId ;
69- this .engine = engine ;
70- this .table = table ;
71- this .committerContext = committerContext ;
72-
62+ this .deltaTable = deltaTable ;
7363 this .metricGroup = metricGroup ;
7464 }
7565
7666 @ Override
7767 public void commit (Collection <CommitRequest <DeltaCommittable >> committables )
7868 throws IOException , InterruptedException {
7969 LOG .debug ("Starting commit" );
80- try {
81- table .getLatestSnapshot (engine );
82- creatingNewTable = false ;
83- } catch (TableNotFoundException e ) {
84- creatingNewTable = true ;
85- }
8670 sortCommittablesByCheckpointId (committables ).forEach (this ::commitForSingleCheckpointId );
8771 }
8872
@@ -93,44 +77,6 @@ private void commitForSingleCheckpointId(
9377 long checkpointId , List <CommitRequest <DeltaCommittable >> committables ) {
9478 LOG .debug ("Committing {} committables on checkpoint {}" , committables .size (), checkpointId );
9579
96- TransactionBuilder txnBuilder =
97- table
98- .createTransactionBuilder (
99- engine ,
100- "DeltaSink/Kernel" ,
101- creatingNewTable ? Operation .CREATE_TABLE : Operation .WRITE )
102- .withTransactionId (engine , jobId , checkpointId );
103-
104- if (creatingNewTable ) {
105- // For a new table set the table schema in the transaction builder
106- txnBuilder =
107- txnBuilder
108- .withSchema (engine , TransactionStateRow .getLogicalSchema (committerContext ))
109- .withPartitionColumns (
110- engine , TransactionStateRow .getPartitionColumnsList (committerContext ));
111- }
112- final Transaction txn = txnBuilder .build (engine );
113-
114- // We check the table's latest schema is still the same as committer schema.
115- // The check is delayed here to detect external modification to the table schema.
116- if (!creatingNewTable ) {
117- final Snapshot readSnapshot = table .getSnapshotAsOfVersion (engine , txn .getReadTableVersion ());
118- final StructType tableSchema = txn .getSchema (engine );
119- final StructType committerSchema = TransactionStateRow .getLogicalSchema (committerContext );
120- Preconditions .checkArgument (
121- readSnapshot .getPath ().equals (TransactionStateRow .getTablePath (this .committerContext )),
122- String .format (
123- "Committer path does not match the latest table path."
124- + "Table path: %s, Committer path: %s" ,
125- readSnapshot .getPath (), TransactionStateRow .getTablePath (this .committerContext )));
126- Preconditions .checkArgument (
127- committerSchema .equivalent (tableSchema ),
128- String .format (
129- "DeltaSink does not support schema evolution. "
130- + "Table schema: %s, Committer schema: %s" ,
131- tableSchema , committerSchema ));
132- }
133-
13480 final CloseableIterable <Row > dataActions =
13581 new CloseableIterable <Row >() {
13682 @ Override
@@ -147,8 +93,7 @@ public void close() throws IOException {
14793 }
14894 };
14995
150- txn .commit (engine , dataActions );
151- creatingNewTable = false ;
96+ deltaTable .commit (dataActions );
15297 }
15398
15499 private TreeMap <Long , List <CommitRequest <DeltaCommittable >>> sortCommittablesByCheckpointId (
@@ -163,9 +108,7 @@ private TreeMap<Long, List<CommitRequest<DeltaCommittable>>> sortCommittablesByC
163108
164109 public static final class Builder {
165110 private String jobId ;
166- private Engine engine ;
167- private Table table ;
168- private Row committerContext ;
111+ private DeltaTable deltaTable ;
169112 private SinkCommitterMetricGroup metricGroup ;
170113
171114 public Builder () {}
@@ -175,24 +118,8 @@ public Builder withJobId(String jobId) {
175118 return this ;
176119 }
177120
178- public Builder withEngine (Engine engine ) {
179- this .engine = engine ;
180- return this ;
181- }
182-
183- public Builder withTable (Table table ) {
184- this .table = table ;
185- return this ;
186- }
187-
188- public Builder withCommitterContext (Row committerContext ) {
189- this .committerContext = committerContext ;
190- return this ;
191- }
192-
193- public Builder withCommitterContext (String committerContextJson ) {
194- this .committerContext =
195- JsonUtils .rowFromJson (committerContextJson , TransactionStateRow .SCHEMA );
121+ public Builder withDeltaTable (DeltaTable deltaTable ) {
122+ this .deltaTable = deltaTable ;
196123 return this ;
197124 }
198125
@@ -203,12 +130,10 @@ public Builder withMetricGroup(SinkCommitterMetricGroup metricGroup) {
203130
204131 public DeltaCommitter build () {
205132 Objects .requireNonNull (jobId , "jobId must not be null" );
206- Objects .requireNonNull (engine , "engine must not be null" );
207- Objects .requireNonNull (table , "table must not be null" );
208- Objects .requireNonNull (committerContext , "committerContext must not be null" );
133+ Objects .requireNonNull (deltaTable , "tableLoader must not be null" );
209134 Objects .requireNonNull (metricGroup , "metricGroup must not be null" );
210135
211- return new DeltaCommitter (jobId , engine , table , committerContext , metricGroup );
136+ return new DeltaCommitter (jobId , deltaTable , metricGroup );
212137 }
213138 }
214139}
0 commit comments