diff --git a/build.sbt b/build.sbt index 50567bd7250..853b679f66c 100644 --- a/build.sbt +++ b/build.sbt @@ -72,6 +72,7 @@ val scalaTestVersionForConnectors = "3.0.8" val parquet4sVersion = "1.9.4" val protoVersion = "3.25.1" val grpcVersion = "1.62.2" +val flink120Version = "1.20.3" // For Java 11 use the following on command line // sbt 'set targetJvm := "11"' [commands] @@ -1278,6 +1279,51 @@ lazy val hudi = (project in file("hudi")) TestParallelization.settings ) +lazy val flink = (project in file("connectors/flink/v1.20")) +// .dependsOn(kernelApi) + .dependsOn(kernelDefaults) + .dependsOn(kernelUnityCatalog) + .settings( + name := "delta-flink-v1.20", + commonSettings, + releaseSettings, + javafmtCheckSettings, + scalafmtCheckSettings, + publishArtifact := scalaBinaryVersion.value == "2.12", // only publish once + autoScalaLibrary := false, // exclude scala-library from dependencies + Compile / unmanagedJars += (kernelApi / Compile / packageBin).value, + Test / unmanagedJars += (kernelApi / Compile / packageBin).value, + + // Make sure the shaded JAR is produced before we compile/run tests + Compile / compile := (Compile / compile).dependsOn(kernelApi / Compile / packageBin).value, + Test / test := (Test / test).dependsOn(kernelApi / Compile / packageBin).value, + Test / unmanagedJars += (kernelApi / Test / packageBin).value, + + Test / publishArtifact := false, + Test / javaOptions ++= Seq( + "--add-opens=java.base/java.util=ALL-UNNAMED" // for Flink with Java 17. + ), + crossPaths := false, + libraryDependencies ++= Seq( + "org.apache.flink" % "flink-core" % flink120Version % "provided", + "org.apache.flink" % "flink-table-common" % flink120Version % "provided", + "org.apache.flink" % "flink-streaming-java" % flink120Version % "provided", + "io.unitycatalog" % "unitycatalog-client" % "0.3.0", + + "org.apache.flink" % "flink-test-utils" % flink120Version % "test", + "org.scalatest" %% "scalatest" % "3.2.19" % "test", + "org.apache.flink" % "flink-clients" % flink120Version % "test", + "org.apache.flink" % "flink-table-api-java-bridge" % flink120Version % Test, + "org.apache.flink" % "flink-table-planner-loader" % flink120Version % Test, + "org.apache.flink" % "flink-table-runtime" % flink120Version % Test, + "org.apache.flink" % "flink-test-utils-junit" % flink120Version % Test, + "org.slf4j" % "slf4j-log4j12" % "2.0.17" % "test", + // The below test dependencies are only needed for real E2E integration tests against a real + // UC endpoint. + "org.apache.hadoop" % "hadoop-aws" % hadoopVersion % "test", + ) + ) + lazy val goldenTables = (project in file("connectors/golden-tables")) .disablePlugins(JavaFormatterPlugin, ScalafmtPlugin) .settings( diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/Conversions.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/Conversions.java new file mode 100644 index 00000000000..d491dea5db5 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/Conversions.java @@ -0,0 +1,137 @@ +package io.delta.flink.sink; + +import io.delta.kernel.expressions.Literal; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.*; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeRoot; +import org.apache.flink.table.types.logical.RowType; + +/** + * Provide conversions between Flink and Delta data structures. This now includes: + * + * + */ +public class Conversions { + public static class FlinkToDelta { + public static StructType schema(RowType rowType) { + List fields = + rowType.getFields().stream() + .map( + rowField -> { + DataType rowFieldType = dataType(rowField.getType()); + return new StructField( + rowField.getName(), rowFieldType, rowField.getType().isNullable()); + }) + .collect(Collectors.toList()); + + return new StructType(fields); + } + + public static DataType dataType(LogicalType flinkType) { + final LogicalTypeRoot typeRoot = flinkType.getTypeRoot(); + switch (typeRoot) { + case INTEGER: + return io.delta.kernel.types.IntegerType.INTEGER; + case VARCHAR: + case CHAR: + return io.delta.kernel.types.StringType.STRING; + case BIGINT: + return io.delta.kernel.types.LongType.LONG; + case DOUBLE: + return io.delta.kernel.types.DoubleType.DOUBLE; + case FLOAT: + return io.delta.kernel.types.FloatType.FLOAT; + case DECIMAL: + DecimalType decimalType = (DecimalType) flinkType; + int precision = decimalType.getPrecision(); + int scale = decimalType.getScale(); + return new io.delta.kernel.types.DecimalType(precision, scale); + case DATE: + return io.delta.kernel.types.DateType.DATE; + case TIMESTAMP_WITH_TIME_ZONE: + return io.delta.kernel.types.TimestampType.TIMESTAMP; + case TIMESTAMP_WITHOUT_TIME_ZONE: + return io.delta.kernel.types.TimestampNTZType.TIMESTAMP_NTZ; + case ROW: + RowType rowType = (RowType) flinkType; + return new StructType( + rowType.getFields().stream() + .map( + field -> + new StructField( + field.getName(), + dataType(field.getType()), + field.getType().isNullable())) + .collect(Collectors.toList())); + case ARRAY: + ArrayType arrayType = (ArrayType) flinkType; + return new io.delta.kernel.types.ArrayType( + dataType(arrayType.getElementType()), arrayType.getElementType().isNullable()); + case MAP: + MapType mapType = (MapType) flinkType; + return new io.delta.kernel.types.MapType( + dataType(mapType.getKeyType()), + dataType(mapType.getValueType()), + mapType.getValueType().isNullable()); + default: + throw new UnsupportedOperationException( + String.format("Type not supported: %s", flinkType)); + } + } + + public static Map partitionValues( + StructType rowType, Collection partitionColNames, RowData rowData) { + return partitionColNames.stream() + .map( + name -> { + final int partitionValueColIdx = rowType.indexOf(name); + return new Object[] { + name, Conversions.FlinkToDelta.data(rowType, rowData, partitionValueColIdx) + }; + }) + .collect(Collectors.toMap(o -> (String) o[0], o -> (Literal) o[1])); + } + + public static Literal data(StructType rowType, RowData rowData, int colIdx) { + final StructField field = rowType.at(colIdx); + final DataType dataType = field.getDataType(); + if (dataType.equivalent(io.delta.kernel.types.IntegerType.INTEGER)) { + return Literal.ofInt(rowData.getInt(colIdx)); + } else if (dataType.equivalent(io.delta.kernel.types.LongType.LONG)) { + return Literal.ofLong(rowData.getLong(colIdx)); + } else if (dataType.equivalent(io.delta.kernel.types.StringType.STRING)) { + return Literal.ofString(rowData.getString(colIdx).toString()); + } else if (dataType.equivalent(io.delta.kernel.types.DoubleType.DOUBLE)) { + return Literal.ofDouble(rowData.getDouble(colIdx)); + } else if (dataType.equivalent(io.delta.kernel.types.FloatType.FLOAT)) { + return Literal.ofFloat(rowData.getFloat(colIdx)); + } else if (dataType instanceof io.delta.kernel.types.DecimalType) { + io.delta.kernel.types.DecimalType decimalType = + (io.delta.kernel.types.DecimalType) dataType; + int precision = decimalType.getPrecision(); + int scale = decimalType.getScale(); + return Literal.ofDecimal( + rowData.getDecimal(colIdx, precision, scale).toBigDecimal(), precision, scale); + } else if (dataType.equivalent(io.delta.kernel.types.DateType.DATE)) { + return Literal.ofDate(rowData.getInt(colIdx)); + } else if (dataType.equivalent(io.delta.kernel.types.TimestampType.TIMESTAMP)) { + return Literal.ofTimestamp(rowData.getLong(colIdx)); + } else if (dataType.equivalent(io.delta.kernel.types.TimestampNTZType.TIMESTAMP_NTZ)) { + return Literal.ofTimestampNtz(rowData.getLong(colIdx)); + } else { + throw new UnsupportedOperationException("Unsupported data type: " + dataType); + } + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaCommittable.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaCommittable.java new file mode 100644 index 00000000000..d617c0ccf12 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaCommittable.java @@ -0,0 +1,142 @@ +package io.delta.flink.sink; + +import io.delta.kernel.data.Row; +import io.delta.kernel.defaults.internal.json.JsonUtils; +import io.delta.kernel.internal.actions.SingleAction; +import io.delta.kernel.internal.util.Preconditions; +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.flink.core.io.SimpleVersionedSerializer; + +/** + * A {@code DeltaCommittable} represents a unit of pending work produced by a Delta sink writer that + * is ready to be committed to a Delta table. + * + *

{@code DeltaCommittable} instances are emitted by {@link + * org.apache.flink.api.connector.sink.SinkWriter} implementations during checkpointing and are + * later consumed by a {@link DeltaCommitter} or global committer to finalize changes in the Delta + * transaction log. + * + *

Each committable encapsulates: + * + *

    + *
  • one or more Delta actions (e.g., {@code AddFile} actions) produced by a writer, + *
  • checkpoint-scoped context that allows the commit process to be retried safely. + *
+ * + *

During recovery or retries, the same {@code DeltaCommittable} may be delivered multiple times + * to the committer. Implementations must therefore ensure that committing a committable is either + * idempotent or protected by higher-level deduplication mechanisms (for example, checkpoint + * tracking stored in the Delta table metadata). + * + *

{@code DeltaCommittable} is a transport object only; it does not perform I/O or commit + * operations itself. All side effects are applied by the corresponding committer. + * + *

This class is typically serialized and checkpointed by Flink and must therefore remain stable + * and backward-compatible across versions of the connector. + */ +public class DeltaCommittable { + private final String jobId; + private final String operatorId; + private final long checkpointId; + private final String schemaDigest; + private final List deltaActions; + + public DeltaCommittable( + String jobId, + String operatorId, + long checkpointId, + String schemaDigest, + List deltaActions) { + this.jobId = jobId; + this.operatorId = operatorId; + this.checkpointId = checkpointId; + this.schemaDigest = schemaDigest; + this.deltaActions = deltaActions; + } + + public String getJobId() { + return jobId; + } + + public String getOperatorId() { + return operatorId; + } + + public long getCheckpointId() { + return checkpointId; + } + + public String getSchemaDigest() { + return schemaDigest; + } + + public List getDeltaActions() { + return deltaActions; + } + + @Override + public String toString() { + return "DeltaCommittable{" + + "jobId='" + + jobId + + '\'' + + ", operatorId='" + + operatorId + + '\'' + + ", checkpointId=" + + checkpointId + + ", schemaDigest=" + + schemaDigest + + ", deltaActions=" + + deltaActions.stream().map(JsonUtils::rowToJson).collect(Collectors.joining(",")) + + '}'; + } + + static class Serializer implements SimpleVersionedSerializer { + @Override + public int getVersion() { + return 1; + } + + @Override + public byte[] serialize(DeltaCommittable obj) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream out = new ObjectOutputStream(bos)) { + out.writeUTF(obj.getJobId()); + out.writeUTF(obj.getOperatorId()); + out.writeLong(obj.getCheckpointId()); + out.writeUTF(obj.getSchemaDigest()); + out.writeInt(obj.getDeltaActions().size()); + for (Row row : obj.getDeltaActions()) { + Preconditions.checkArgument( + row.getSchema().equivalent(SingleAction.FULL_SCHEMA), "Need to be an action"); + out.writeUTF(JsonUtils.rowToJson(row)); + } + out.flush(); + out.close(); + return bos.toByteArray(); + } + } + + @Override + public DeltaCommittable deserialize(int version, byte[] serialized) throws IOException { + try (ByteArrayInputStream bis = new ByteArrayInputStream(serialized); + ObjectInputStream in = new ObjectInputStream(bis)) { + final String jobId = in.readUTF(); + final String operatorId = in.readUTF(); + final long checkpointId = in.readLong(); + final String schemaDigest = in.readUTF(); + final int numActions = in.readInt(); + List actions = new ArrayList<>(numActions); + for (int i = 0; i < numActions; i++) { + final String actionJson = in.readUTF(); + actions.add(JsonUtils.rowFromJson(actionJson, SingleAction.FULL_SCHEMA)); + } + return new DeltaCommittable(jobId, operatorId, checkpointId, schemaDigest, actions); + } + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaCommitter.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaCommitter.java new file mode 100644 index 00000000000..22b0da6eb33 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaCommitter.java @@ -0,0 +1,144 @@ +package io.delta.flink.sink; + +import io.delta.flink.table.DeltaTable; +import io.delta.kernel.data.Row; +import io.delta.kernel.internal.util.Preconditions; +import io.delta.kernel.internal.util.Utils; +import io.delta.kernel.utils.CloseableIterable; +import io.delta.kernel.utils.CloseableIterator; +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; +import org.apache.flink.api.connector.sink2.Committer; +import org.apache.flink.metrics.groups.SinkCommitterMetricGroup; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The Committer is responsible for committing the data staged by the CommittingSinkWriter in the + * second step of a two-phase commit protocol. + * + *

A commit must be idempotent: If some failure occurs in Flink during commit phase, Flink will + * restart from the last successful checkpoint and re-attempt to commit all committables. There are + * two cases of failures: + * + *

    + *
  1. Flink fails before completing checkpoint N. In this case, Flink discards all committables + * related to checkpoint N, and restart from reading the rows after checkpoint N-1. Flink + * calls the writer and committer to re-create the committables. Old committables are simply + * discarded. As the changes in checkpoint N is not written to Delta table, no special + * handling is needed in DeltaSink/DeltaCommitter. + *
  2. Flink fails after completing checkpoint N. In this case, the changes in checkpoint N has + * been written to the Delta table. Flink will load committables from the persisted checkpoint + * N and replay them. This will cause the changes in checkpoint N to be inserted twice into + * Delta table as duplicated add files. We rely on Delta to auto dedup these duplicated add + * files. See @link{io.delta.kernel.TransactionBuilder::withTransactionId} + *
+ * + * NOTE: Unlike IcebergCommitter, which writes the checkpoint ID into snapshot to prevent a data + * file from being added twice to the table, DeltaCommitter relies on Delta protocol to handle + * duplicated files. Thus we don't explicitly write jobId/checkpointId into DeltaLog. + */ +public class DeltaCommitter implements Committer { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaCommitter.class); + + // All committables should have the same job id as the committer. + // For simplicity, we get the job id from constructor. + private String jobId; + private DeltaTable deltaTable; + + private SinkCommitterMetricGroup metricGroup; + + private DeltaCommitter( + String jobId, DeltaTable deltaTable, SinkCommitterMetricGroup metricGroup) { + this.jobId = jobId; + this.deltaTable = deltaTable; + this.metricGroup = metricGroup; + } + + @Override + public void commit(Collection> committables) + throws IOException, InterruptedException { + LOG.debug("Starting commit"); + sortCommittablesByCheckpointId(committables).forEach(this::commitForSingleCheckpointId); + } + + @Override + public void close() throws Exception {} + + private void commitForSingleCheckpointId( + long checkpointId, List> committables) { + LOG.debug("Committing {} committables on checkpoint {}", committables.size(), checkpointId); + + deltaTable.refresh(); + String latestSchemaDigest = new DeltaSchemaDigest(deltaTable.getSchema()).sha256(); + Set committingSchemaDigest = + committables.stream() + .map(CommitRequest::getCommittable) + .map(DeltaCommittable::getSchemaDigest) + .collect(Collectors.toSet()); + Preconditions.checkArgument( + committingSchemaDigest.size() == 1 && committingSchemaDigest.contains(latestSchemaDigest), + "Committing Schema is different from latest table Schema"); + + final CloseableIterable dataActions = + new CloseableIterable() { + @Override + public CloseableIterator iterator() { + return Utils.toCloseableIterator( + committables.stream() + .flatMap(req -> req.getCommittable().getDeltaActions().stream()) + .iterator()); + } + + @Override + public void close() throws IOException { + // Nothing to close + } + }; + + deltaTable.commit(dataActions); + } + + private TreeMap>> sortCommittablesByCheckpointId( + Collection> committables) { + return committables.stream() + .collect( + Collectors.groupingBy( + commitRequest -> commitRequest.getCommittable().getCheckpointId(), + TreeMap::new, + Collectors.toList())); + } + + public static final class Builder { + private String jobId; + private DeltaTable deltaTable; + private SinkCommitterMetricGroup metricGroup; + + public Builder() {} + + public Builder withJobId(String jobId) { + this.jobId = jobId; + return this; + } + + public Builder withDeltaTable(DeltaTable deltaTable) { + this.deltaTable = deltaTable; + return this; + } + + public Builder withMetricGroup(SinkCommitterMetricGroup metricGroup) { + this.metricGroup = metricGroup; + return this; + } + + public DeltaCommitter build() { + Objects.requireNonNull(jobId, "jobId must not be null"); + Objects.requireNonNull(deltaTable, "tableLoader must not be null"); + Objects.requireNonNull(metricGroup, "metricGroup must not be null"); + + return new DeltaCommitter(jobId, deltaTable, metricGroup); + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSchemaDigest.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSchemaDigest.java new file mode 100644 index 00000000000..fcedb297b37 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSchemaDigest.java @@ -0,0 +1,52 @@ +package io.delta.flink.sink; + +import io.delta.kernel.types.StructType; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/** + * {@code DeltaSchemaDigest} computes a deterministic, compact digest representation of a Delta + * table schema. + * + *

The digest captures the essential structural aspects of a schema—such as column names, data + * types, nullability, and nested field structure—so that two schemas with the same logical + * definition produce the same digest value. + * + *

This abstraction is primarily used to enable efficient schema evolution detection without + * performing expensive deep schema comparisons. By comparing digests, callers can quickly determine + * whether a schema has changed in a way that may require action (for example, triggering a schema + * update, compatibility check, or commit-time validation). + * + *

{@code DeltaSchemaDigest} is intended for internal use by components involved in schema + * tracking, commit coordination, or metadata validation, and does not replace full schema + * inspection when detailed differences are required. + */ +public class DeltaSchemaDigest { + private final StructType schema; + + public DeltaSchemaDigest(StructType schema) { + this.schema = schema; + } + + /** + * Generate a SHA-256 digest + * + * @return schema digest in SHA256 + */ + public String sha256() { + byte[] schemaBytes = schema.toJson().getBytes(StandardCharsets.UTF_8); + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] digestBytes = digest.digest(schemaBytes); + StringBuilder sb = new StringBuilder(digestBytes.length * 2); + for (byte b : digestBytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + // SHA-256 is guaranteed to exist in Java + throw new RuntimeException(e); + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSink.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSink.java new file mode 100644 index 00000000000..c5ac9f2b42f --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSink.java @@ -0,0 +1,253 @@ +package io.delta.flink.sink; + +import io.delta.flink.table.*; +import io.delta.kernel.internal.util.Preconditions; +import io.delta.kernel.types.StructType; +import java.io.IOException; +import java.net.URI; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import jdk.jfr.Experimental; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.connector.sink2.*; +import org.apache.flink.api.java.functions.KeySelector; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.streaming.api.connector.sink2.*; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Flink v2 sink offer different hooks to insert custom topologies into the sink. We will use the + * following: + * + *

    + *
  • {@link SupportsPreWriteTopology} which redistributes the data to the writers + *
  • {@link org.apache.flink.api.connector.sink2.SinkWriter} which writes data/delete files, and + * generates the {@link DeltaWriterResult} objects for the files + *
  • {@link SupportsPreCommitTopology} which we use to place the {@link + * DeltaWriterResultAggregator} which merges the individual {@link + * org.apache.flink.api.connector.sink2.SinkWriter}'s {@link DeltaWriterResult}s to a single + * {@link DeltaCommittable} + *
  • {@link DeltaCommitter} which commits the incoming{@link DeltaCommittable}s to the Iceberg + * table + *
  • {@link SupportsPostCommitTopology} we could use for incremental compaction later. This is + * not implemented yet. + *
+ * + *

The job graph looks like below: + * + *

{@code
+ *                            Flink sink
+ *               +-----------------------------------------------------------------------------------+
+ *               |                                                                                   |
+ * +-------+     | +----------+                               +-------------+      +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | ---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+      +---------------+ |
+ *               |             \                             /                \                      |
+ *               |        DeltaWriterResults        DeltaCommittables          \                     |
+ *               |               \                         /                    \                    |
+ * +-------+     | +----------+   \ +-------------------+ /                      \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |                          | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+                          +---------------+ |
+ *               |                                             Commit only on                        |
+ *               |                                             a single committer                    |
+ *               +-----------------------------------------------------------------------------------+
+ * }
+ */ +@Experimental +public class DeltaSink + implements Sink, + SupportsCommitter, + SupportsPreCommitTopology, + SupportsPreWriteTopology, + SupportsPostCommitTopology { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaSink.class); + + private final DeltaTable deltaTable; + + public DeltaSink(DeltaTable deltaTable) { + this.deltaTable = deltaTable; + } + + @Override + public SinkWriter createWriter(InitContext context) throws IOException { + return new DeltaSinkWriter.Builder() + .withJobId(context.getJobInfo().getJobId().toString()) + .withSubtaskId(context.getTaskInfo().getIndexOfThisSubtask()) + .withAttemptNumber(context.getTaskInfo().getAttemptNumber()) + .withDeltaTable(deltaTable) + .withMetricGroup(context.metricGroup()) + .build(); + } + + @Override + public SinkWriter createWriter(WriterInitContext context) throws IOException { + return new DeltaSinkWriter.Builder() + .withJobId(context.getJobInfo().getJobId().toString()) + .withSubtaskId(context.getTaskInfo().getIndexOfThisSubtask()) + .withAttemptNumber(context.getTaskInfo().getAttemptNumber()) + .withDeltaTable(deltaTable) + .withMetricGroup(context.metricGroup()) + .build(); + } + + @Override + public Committer createCommitter(CommitterInitContext context) + throws IOException { + return new DeltaCommitter.Builder() + .withJobId(context.getJobInfo().getJobId().toString()) + .withDeltaTable(deltaTable) + .withMetricGroup(context.metricGroup()) + .build(); + } + + /** + * This method ensures that all rows with the same partitionHash will be sent to the same {@link + * DeltaSinkWriter}. It makes no promises about how many unique partitionHash's that a {@link + * DeltaSinkWriter} will handle (it may even be 0). + * + *

TODO This design may cause imbalanced workload if the data distribution is skewed. + */ + @Override + public DataStream addPreWriteTopology(DataStream inputDataStream) { + return inputDataStream.keyBy( + (KeySelector) + value -> + Conversions.FlinkToDelta.partitionValues( + deltaTable.getSchema(), deltaTable.getPartitionColumns(), value) + .entrySet().stream() + .collect( + Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toString())) + .hashCode()); + } + + @Override + public DataStream> addPreCommitTopology( + DataStream> writerResults) { + TypeInformation> typeInformation = + CommittableMessageTypeInfo.of(this::getCommittableSerializer); + String uid = String.format("DeltaSink preCommit aggregator: %s", deltaTable.getId()); + // global forces all output records send to subtask 0 of the downstream committer operator. + // This is to ensure commit only happen in one committer subtask. + return writerResults + .global() + .transform(uid, typeInformation, new DeltaWriterResultAggregator()) + .uid(uid) + .setParallelism(1) + .setMaxParallelism(1) + // global forces all output records send to subtask 0 of the downstream committer operator. + // This is to ensure commit only happen in one committer subtask. + // Once upstream Flink provides the capability of setting committer operator + // parallelism to 1, this can be removed. + .global(); + } + + @Override + public SimpleVersionedSerializer getWriteResultSerializer() { + return new DeltaWriterResult.Serializer(); + } + + @Override + public SimpleVersionedSerializer getCommittableSerializer() { + return new DeltaCommittable.Serializer(); + } + + @Override + public void addPostCommitTopology(DataStream> committables) { + committables.global().process(new PostCommitOperator()).uid("DeltaSink postCommit processor"); + } + + public static class Builder { + private DeltaTable deltaTable; + // For file-based tables + private String tablePath; + private RowType flinkSchema; + private List partitionColNames; + // For catalog-based tables + private String tableId; + private String catalogEndpoint; + private String catalogToken; + private Map configurations; + + public Builder withDeltaTable(DeltaTable deltaTable) { + this.deltaTable = deltaTable; + return this; + } + + // For file-based tables + public Builder withTablePath(String tablePath) { + this.tablePath = tablePath; + return this; + } + + public Builder withFlinkSchema(RowType flinkSchema) { + this.flinkSchema = flinkSchema; + return this; + } + + public Builder withPartitionColNames(List partitionColNames) { + this.partitionColNames = partitionColNames; + return this; + } + + // For catalog-based tables + public Builder withTableId(String tableId) { + this.tableId = tableId; + return this; + } + + public Builder withCatalogEndpoint(String catalogEndpoint) { + this.catalogEndpoint = catalogEndpoint; + return this; + } + + public Builder withCatalogToken(String catalogToken) { + this.catalogToken = catalogToken; + return this; + } + + public Builder withConfigurations(Map configurations) { + this.configurations = configurations; + return this; + } + + public DeltaSink build() { + if (configurations == null) { + configurations = Map.of(); + } + if (deltaTable == null) { + // Can use only one from tablePath or tableId + Preconditions.checkArgument( + (tablePath != null) ^ (tableId != null), "Use either tablePath or tableId"); + if (tablePath != null) { + // File-based table + StructType tableSchema = null; + if (flinkSchema != null) { + tableSchema = Conversions.FlinkToDelta.schema(flinkSchema); + } + deltaTable = + new HadoopTable( + URI.create(tablePath), configurations, tableSchema, partitionColNames); + } else { + // Catalog-based table + Objects.requireNonNull(catalogEndpoint); + Objects.requireNonNull(catalogToken); + Map finalConf = new HashMap<>(configurations); + finalConf.put(CCv2Table.CATALOG_ENDPOINT, catalogEndpoint); + finalConf.put(CCv2Table.CATALOG_TOKEN, catalogToken); + // TODO Support separated endpoints for catalog and table + Catalog restCatalog = new RESTCatalog(catalogEndpoint, catalogToken); + deltaTable = new CCv2Table(restCatalog, tableId, finalConf); + } + } + return new DeltaSink(deltaTable); + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSinkWriter.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSinkWriter.java new file mode 100644 index 00000000000..e4321beb7c6 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaSinkWriter.java @@ -0,0 +1,164 @@ +package io.delta.flink.sink; + +import io.delta.flink.table.DeltaTable; +import io.delta.kernel.expressions.Literal; +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; +import org.apache.flink.api.connector.sink2.CommittingSinkWriter; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.groups.SinkWriterMetricGroup; +import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology; +import org.apache.flink.table.data.RowData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A Delta writer implementation based on Flink’s Sink V2 Connector API. + * + *

This writer is responsible for writing incoming records to the target Delta table storage and + * producing {@link DeltaWriterResult} objects that describe the data written by this writer since + * the last successful checkpoint. + * + *

At each checkpoint, the writer emits a {@code DeltaWriterResult} containing the Delta {@code + * AddFile} actions (and any other relevant actions) generated during that checkpoint interval. + * These results are subsequently aggregated and committed by the downstream committer components to + * create a new Delta table version. + * + *

This implementation follows Flink’s checkpointing and fault-tolerance model: + * + *

    + *
  • Writes are buffered and tracked per checkpoint, + *
  • {@code DeltaWriterResult}s are emitted during checkpoint preparation, and + *
  • Commit responsibility is delegated to the committer to ensure correctness and exactly-once + * or at-least-once semantics. + *
+ * + *

The writer does not perform table commits directly. Instead, it focuses solely on producing + * durable data files and describing their effects via {@code DeltaWriterResult}, allowing commit + * coordination and deduplication to be handled centrally. + */ +public class DeltaSinkWriter implements CommittingSinkWriter { + private static final Logger LOG = LoggerFactory.getLogger(DeltaSinkWriter.class); + + private final String jobId; + private final int subtaskId; + private final int attemptNumber; + + private final DeltaTable deltaTable; + + private final Map, DeltaWriterTask> writerTasksByPartition; + + private final SinkWriterMetricGroup metricGroup; + private final Counter elementCounter; + + private DeltaSinkWriter( + String jobId, + int subtaskId, + int attemptNumber, + DeltaTable deltaTable, + SinkWriterMetricGroup metricGroup) { + this.jobId = jobId; + this.subtaskId = subtaskId; + this.attemptNumber = attemptNumber; + + this.deltaTable = deltaTable; + this.writerTasksByPartition = new HashMap<>(); + + this.metricGroup = metricGroup; + this.elementCounter = metricGroup.counter("elementCounter"); + } + + /** + * {@link DeltaSink} implements {@link SupportsPreWriteTopology} and its {@link + * DeltaSink#addPreCommitTopology} method ensures that all rows with the same partition hash will + * be sent to the same {@link DeltaSinkWriter} instance. + * + *

However, a single {@link DeltaSinkWriter} instance may receive rows for more than one + * partition hash. It may also receive no rows at all. + */ + @Override + public void write(RowData element, Context context) throws IOException, InterruptedException { + final Map partitionValues = + Conversions.FlinkToDelta.partitionValues( + deltaTable.getSchema(), deltaTable.getPartitionColumns(), element); + + Map writerKey = + partitionValues.entrySet().stream() + .collect( + Collectors.toMap(entry -> entry.getKey(), entry -> entry.getValue().toString())); + + if (!writerTasksByPartition.containsKey(writerKey)) { + writerTasksByPartition.put( + writerKey, + new DeltaWriterTask(jobId, subtaskId, attemptNumber, deltaTable, partitionValues)); + } + writerTasksByPartition.get(writerKey).write(element, context); + elementCounter.inc(); + } + + @Override + public Collection prepareCommit() throws IOException, InterruptedException { + LOG.debug("Preparing commits"); + final Collection output = new ArrayList<>(); + + for (DeltaWriterTask writerTask : writerTasksByPartition.values()) { + output.addAll(writerTask.complete()); + } + writerTasksByPartition.clear(); + return output; + } + + @Override + public void flush(boolean endOfInput) throws IOException, InterruptedException {} + + @Override + public void close() throws Exception {} + + public static class Builder { + + private String jobId; + private int subtaskId; + private int attemptNumber; + + private DeltaTable deltaTable; + + private SinkWriterMetricGroup metricGroup; + + public Builder() {} + + public Builder withJobId(String jobId) { + this.jobId = jobId; + return this; + } + + public Builder withSubtaskId(int subtaskId) { + this.subtaskId = subtaskId; + return this; + } + + public Builder withAttemptNumber(int attemptNumber) { + this.attemptNumber = attemptNumber; + return this; + } + + public Builder withDeltaTable(DeltaTable deltaTable) { + this.deltaTable = deltaTable; + return this; + } + + public Builder withMetricGroup(SinkWriterMetricGroup metricGroup) { + this.metricGroup = metricGroup; + return this; + } + + public DeltaSinkWriter build() { + // Optional safety checks + Objects.requireNonNull(jobId, "jobId must not be null"); + Objects.requireNonNull(deltaTable, "deltaTable must not be null"); + Objects.requireNonNull(metricGroup, "metricGroup must not be null"); + + return new DeltaSinkWriter(jobId, subtaskId, attemptNumber, deltaTable, metricGroup); + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterResult.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterResult.java new file mode 100644 index 00000000000..6e98df73038 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterResult.java @@ -0,0 +1,114 @@ +package io.delta.flink.sink; + +import io.delta.kernel.data.Row; +import io.delta.kernel.defaults.internal.json.JsonUtils; +import io.delta.kernel.internal.actions.SingleAction; +import io.delta.kernel.internal.util.Preconditions; +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.flink.core.io.SimpleVersionedSerializer; + +/** + * {@code DeltaWriterResult} represents the output produced by a {@code DeltaSinkWriter} for a + * single checkpoint. + * + *

A {@code DeltaWriterResult} contains the Delta actions generated by one writer subtask while + * processing records up to a checkpoint barrier. These actions typically correspond to data written + * to storage (for example, {@code AddFile} actions) and are grouped according to the table’s + * partitioning scheme. + * + *

This object is emitted by the writer and forwarded to downstream aggregation and commit + * components (such as an aggregator or committer) as part of Flink’s exactly-once checkpointing + * protocol. + * + *

{@code DeltaWriterResult} itself does not perform any commit operations. Instead, it serves as + * an immutable, checkpoint-scoped container that: + * + *

    + *
  • captures all actions generated by a single writer subtask, + *
  • represents data written since the previous successful checkpoint, and + *
  • can be safely replayed or discarded during failure recovery. + *
+ * + *

Instances of this class are expected to be serializable and deterministic to ensure correct + * recovery behavior when checkpoints are retried. + * + *

Typical lifecycle: + * + *

    + *
  1. {@code DeltaSinkWriter} writes data files and constructs Delta actions. + *
  2. A {@code DeltaWriterResult} is emitted at checkpoint time. + *
  3. Multiple writer results are aggregated into committables. + *
  4. The committer applies the aggregated actions to the Delta table. + *
+ */ +public class DeltaWriterResult implements Serializable { + /** A string representing the digest of the Delta schema used by this write result. */ + private final String schemaDigest; + + private final List deltaActions; + + public DeltaWriterResult(String schemaDigest, List deltaActions) { + this.schemaDigest = schemaDigest; + this.deltaActions = deltaActions; + } + + public String getSchemaDigest() { + return schemaDigest; + } + + public List getDeltaActions() { + return deltaActions; + } + + @Override + public String toString() { + return "DeltaWriterResult{" + + "schemaDigest='" + + schemaDigest + + '\'' + + ", deltaActions=" + + deltaActions.stream().map(JsonUtils::rowToJson).collect(Collectors.joining(",")) + + '}'; + } + + static class Serializer implements SimpleVersionedSerializer { + @Override + public int getVersion() { + return 1; + } + + @Override + public byte[] serialize(DeltaWriterResult obj) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream out = new ObjectOutputStream(bos)) { + out.writeUTF(obj.getSchemaDigest()); + out.writeInt(obj.getDeltaActions().size()); + for (Row row : obj.getDeltaActions()) { + Preconditions.checkArgument( + row.getSchema().equivalent(SingleAction.FULL_SCHEMA), "Need to be an action"); + out.writeUTF(JsonUtils.rowToJson(row)); + } + out.flush(); + return bos.toByteArray(); + } + } + + @Override + public DeltaWriterResult deserialize(int version, byte[] serialized) throws IOException { + try (ByteArrayInputStream bis = new ByteArrayInputStream(serialized); + ObjectInputStream in = new ObjectInputStream(bis)) { + String schemaDigest = in.readUTF(); + final int numActions = in.readInt(); + List actions = new ArrayList<>(numActions); + for (int i = 0; i < numActions; i++) { + final String actionJson = in.readUTF(); + actions.add(JsonUtils.rowFromJson(actionJson, SingleAction.FULL_SCHEMA)); + } + return new DeltaWriterResult(schemaDigest, actions); + } + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterResultAggregator.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterResultAggregator.java new file mode 100644 index 00000000000..193247d802f --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterResultAggregator.java @@ -0,0 +1,102 @@ +package io.delta.flink.sink; + +import io.delta.kernel.internal.util.Preconditions; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.flink.streaming.api.connector.sink2.CommittableMessage; +import org.apache.flink.streaming.api.connector.sink2.CommittableSummary; +import org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * {@code DeltaWriterResultAggregator} is a pre-commit operator in the Flink Sink V2 topology that + * aggregates individual {@link DeltaWriterResult} instances into a single {@link DeltaCommittable} + * per checkpoint. + * + *

Each {@link DeltaSinkWriter} emits {@code DeltaWriterResult}s independently from parallel + * subtasks as data is written. This operator collects and merges those per-writer results, grouping + * all actions belonging to the same checkpoint into a unified committable unit. + * + *

The aggregated {@link DeltaCommittable} represents the complete set of table changes produced + * by the sink for a given checkpoint and is passed downstream to the commit phase, where it is + * atomically applied to the Delta table. + * + *

This operator does not perform any I/O or table mutations itself; it is responsible only for + * logical aggregation and checkpoint scoping of writer results. Exactly-once guarantees are + * achieved by ensuring that at most one {@code DeltaCommittable} is emitted per checkpoint. + */ +class DeltaWriterResultAggregator + extends AbstractStreamOperator> + implements OneInputStreamOperator< + CommittableMessage, CommittableMessage> { + private static final Logger LOG = LoggerFactory.getLogger(DeltaWriterResultAggregator.class); + private final Collection results; + + DeltaWriterResultAggregator() { + results = new ArrayList<>(); + } + + @Override + public void open() throws Exception { + String flinkJobId = getContainingTask().getEnvironment().getJobID().toString(); + String operatorId = getOperatorID().toString(); + int subTaskId = getRuntimeContext().getTaskInfo().getIndexOfThisSubtask(); + Preconditions.checkArgument( + subTaskId == 0, "The subTaskId must be zero in the IcebergWriteAggregator"); + int attemptId = getRuntimeContext().getTaskInfo().getAttemptNumber(); + } + + @Override + public void finish() throws IOException { + LOG.debug("Finishing"); + prepareSnapshotPreBarrier(Long.MAX_VALUE); + } + + @Override + public void prepareSnapshotPreBarrier(long checkpointId) throws IOException { + LOG.debug("Aggregating on checkpoint {}", checkpointId); + // All WriterResults should have a single schema + Set schemaDigests = + results.stream().map(DeltaWriterResult::getSchemaDigest).collect(Collectors.toSet()); + Preconditions.checkArgument(schemaDigests.size() == 1); + String schemaDigest = schemaDigests.iterator().next(); + DeltaCommittable committable = + new DeltaCommittable( + getContainingTask().getEnvironment().getJobID().toString(), + getRuntimeContext().getOperatorUniqueID(), + checkpointId, + schemaDigest, + results.stream() + .flatMap(writerResult -> writerResult.getDeltaActions().stream()) + .collect(Collectors.toList())); + CommittableMessage summary = + new CommittableSummary<>(0, 1, checkpointId, 1, 1, 0); + output.collect(new StreamRecord<>(summary)); + CommittableMessage message = + new CommittableWithLineage<>(committable, checkpointId, 0); + output.collect(new StreamRecord<>(message)); + + LOG.debug( + "Emitted commit message to downstream committer operator on checkpoint {} containing {} rows", + checkpointId, + committable.getDeltaActions().size()); + results.clear(); + } + + @Override + public void processElement(StreamRecord> element) + throws Exception { + if (element.isRecord() && element.getValue() instanceof CommittableWithLineage) { + LOG.debug("Received writerResult: {}", element.getValue()); + results.add( + ((CommittableWithLineage) element.getValue()).getCommittable()); + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterTask.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterTask.java new file mode 100644 index 00000000000..3d1197ed8e6 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/DeltaWriterTask.java @@ -0,0 +1,197 @@ +package io.delta.flink.sink; + +import io.delta.flink.table.DeltaTable; +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.FilteredColumnarBatch; +import io.delta.kernel.data.Row; +import io.delta.kernel.defaults.internal.data.DefaultColumnarBatch; +import io.delta.kernel.expressions.Literal; +import io.delta.kernel.internal.util.Utils; +import io.delta.kernel.types.*; +import io.delta.kernel.utils.CloseableIterator; +import java.io.IOException; +import java.math.BigDecimal; +import java.util.*; +import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.table.data.RowData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * {@code DeltaWriterTask} represents a runnable unit of work responsible for writing row data to a + * single Parquet file as part of a Delta write operation. + * + *

Each writer task processes a subset of input data, writes it to exactly one Parquet file, and + * produces the corresponding Delta {@code AddFile} action describing the written file. + * + *

{@code DeltaWriterTask} instances are designed to be executed concurrently as part of a + * multi-threaded write pipeline, allowing a single sink writer to parallelize file generation while + * maintaining isolation between output files. + * + *

The result of executing a {@code DeltaWriterTask} is a wrapped Delta action that is later + * collected into a {@link DeltaWriterResult} and emitted at checkpoint boundaries. + */ +public class DeltaWriterTask { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaWriterTask.class); + + private final String jobId; + private final int subtaskId; + private final int attemptNumber; + private final Map partitionValues; + private final DeltaTable deltaTable; + + private final StructType writeSchema; + private final List buffer; + + public DeltaWriterTask( + String jobId, + int subtaskId, + int attemptNumber, + DeltaTable deltaTable, + Map partitionValues) { + this.jobId = jobId; + this.subtaskId = subtaskId; + this.attemptNumber = attemptNumber; + + this.buffer = new ArrayList<>(); + this.partitionValues = partitionValues; + this.deltaTable = deltaTable; + this.writeSchema = deltaTable.getSchema(); + } + + public void write(RowData element, SinkWriter.Context context) + throws IOException, InterruptedException { + buffer.add(element); + } + + public Collection complete() throws IOException { + LOG.debug( + "Writing {} elements to Parquet, partition: {}, jobId: {}, subtaskId: {}, attemptNumber: {}", + buffer.size(), + partitionValues, + jobId, + subtaskId, + attemptNumber); + if (buffer.isEmpty()) { + return Collections.emptyList(); + } + final CloseableIterator logicalData = flinkRowDataAsKernelData(); + // Append job and task information to target directory + final String pathSuffix = String.format("%s-%d-%d", jobId, subtaskId, attemptNumber); + final CloseableIterator actions = + deltaTable.writeParquet(pathSuffix, logicalData, partitionValues); + final String schemaDigest = new DeltaSchemaDigest(writeSchema).sha256(); + final Collection output = + actions.map(row -> new DeltaWriterResult(schemaDigest, List.of(row))).toInMemoryList(); + buffer.clear(); + return output; + } + + private CloseableIterator flinkRowDataAsKernelData() { + final int numColumns = writeSchema.length(); + final ColumnVector[] columnVectors = new ColumnVector[numColumns]; + + for (int colIdx = 0; colIdx < numColumns; colIdx++) { + final DataType colDataType = writeSchema.at(colIdx).getDataType(); + columnVectors[colIdx] = new RowDataColumnVectorView(buffer, colIdx, colDataType); + } + + return Utils.singletonCloseableIterator( + new FilteredColumnarBatch( + new DefaultColumnarBatch(buffer.size(), writeSchema, columnVectors), + Optional.empty() /* selectionVector */)); + } + + static class RowDataColumnVectorView implements ColumnVector { + + private final List rows; + private final int colIdx; + private final DataType dataType; + + public RowDataColumnVectorView(List rows, int colIdx, DataType dataType) { + this.rows = rows; + this.colIdx = colIdx; + this.dataType = dataType; + } + + @Override + public DataType getDataType() { + return this.dataType; + } + + @Override + public int getSize() { + return this.rows.size(); + } + + @Override + public void close() {} + + @Override + public boolean isNullAt(int rowId) { + checkValidRowId(rowId); + return rows.get(rowId).isNullAt(colIdx); + } + + protected void checkValidRowId(int rowId) { + if (rowId < 0 || rowId >= getSize()) { + throw new IllegalArgumentException("RowId out of range: " + rowId + " <-> " + getSize()); + } + } + + protected void checkValidDataType(DataType dataType) { + if (!this.getDataType().equivalent(dataType)) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + } + + @Override + public int getInt(int rowId) { + checkValidRowId(rowId); + checkValidDataType(IntegerType.INTEGER); + return rows.get(rowId).getInt(colIdx); + } + + @Override + public long getLong(int rowId) { + checkValidRowId(rowId); + checkValidDataType(LongType.LONG); + return rows.get(rowId).getLong(colIdx); + } + + @Override + public String getString(int rowId) { + checkValidRowId(rowId); + checkValidDataType(StringType.STRING); + return rows.get(rowId).getString(colIdx).toString(); + } + + @Override + public float getFloat(int rowId) { + checkValidRowId(rowId); + checkValidDataType(FloatType.FLOAT); + return rows.get(rowId).getFloat(colIdx); + } + + @Override + public double getDouble(int rowId) { + checkValidRowId(rowId); + checkValidDataType(DoubleType.DOUBLE); + return rows.get(rowId).getDouble(colIdx); + } + + @Override + public BigDecimal getDecimal(int rowId) { + checkValidRowId(rowId); + // Do not check precision and scale here because RowData support conversion + if (!(this.getDataType() instanceof DecimalType)) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + DecimalType actualType = (DecimalType) dataType; + return rows.get(rowId) + .getDecimal(colIdx, actualType.getPrecision(), actualType.getScale()) + .toBigDecimal(); + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/PostCommitOperator.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/PostCommitOperator.java new file mode 100644 index 00000000000..27a75532463 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/PostCommitOperator.java @@ -0,0 +1,39 @@ +package io.delta.flink.sink; + +import org.apache.flink.streaming.api.connector.sink2.CommittableMessage; +import org.apache.flink.streaming.api.connector.sink2.CommittableSummary; +import org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage; +import org.apache.flink.streaming.api.functions.ProcessFunction; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PostCommitOperator + extends ProcessFunction, Void> { + + private static final Logger LOG = LoggerFactory.getLogger(PostCommitOperator.class); + + public PostCommitOperator() {} + + @Override + public void processElement( + CommittableMessage value, + ProcessFunction, Void>.Context ctx, + Collector out) + throws Exception { + if (value instanceof CommittableWithLineage) { + CommittableWithLineage committableWithLineage = + (CommittableWithLineage) value; + LOG.debug("Received deltaCommittable {}", committableWithLineage.getCommittable()); + } + if (value instanceof CommittableSummary) { + CommittableSummary committableSummary = + (CommittableSummary) value; + LOG.debug( + "Received committableSummary: {}, # committables: {}, checkpointId: {}", + committableSummary, + committableSummary.getNumberOfCommittables(), + committableSummary.getCheckpointId()); + } + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/README.md b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/README.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/dynamic/DeltaDynamicTableSink.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/dynamic/DeltaDynamicTableSink.java new file mode 100644 index 00000000000..74c32b59b06 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/dynamic/DeltaDynamicTableSink.java @@ -0,0 +1,72 @@ +package io.delta.flink.sink.dynamic; + +import io.delta.flink.sink.DeltaSink; +import java.util.Arrays; +import java.util.Map; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.sink.SinkV2Provider; +import org.apache.flink.table.connector.sink.abilities.SupportsPartitioning; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; + +public class DeltaDynamicTableSink implements DynamicTableSink, SupportsPartitioning { + + private final DataType consumedDataType; + private final Integer configuredParallelism; + private final Map options; // whatever config you need + + public DeltaDynamicTableSink( + DataType consumedDataType, Integer configuredParallelism, Map options) { + this.consumedDataType = consumedDataType; + this.configuredParallelism = configuredParallelism; + this.options = options; + } + + @Override + public ChangelogMode getChangelogMode(ChangelogMode requestedMode) { + return ChangelogMode.insertOnly(); + } + + @Override + public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { + RowType rowType = (RowType) consumedDataType.getLogicalType(); + + DeltaSink deltaSink = + new DeltaSink.Builder() + .withFlinkSchema(rowType) + .withTablePath(options.get("table_path")) + .withPartitionColNames(Arrays.asList(options.getOrDefault("partitions", "").split(","))) + .build(); + + if (configuredParallelism != null) { + return SinkV2Provider.of(deltaSink, configuredParallelism); + } else { + return SinkV2Provider.of(deltaSink); + } + } + + @Override + public DynamicTableSink copy() { + return new DeltaDynamicTableSink( + this.consumedDataType, this.configuredParallelism, this.options); + } + + @Override + public String asSummaryString() { + return "DeltaDynamicTableSink"; + } + + @Override + public void applyStaticPartition(Map partition) { + if (!partition.isEmpty()) { + throw new ValidationException("DeltaSink does not support static partitioning"); + } + } + + @Override + public boolean requiresPartitionGrouping(boolean supportsGrouping) { + return SupportsPartitioning.super.requiresPartitionGrouping(supportsGrouping); + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/dynamic/DeltaDynamicTableSinkFactory.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/dynamic/DeltaDynamicTableSinkFactory.java new file mode 100644 index 00000000000..db7a9301492 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/sink/dynamic/DeltaDynamicTableSinkFactory.java @@ -0,0 +1,64 @@ +package io.delta.flink.sink.dynamic; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.factories.DynamicTableSinkFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.types.DataType; + +public class DeltaDynamicTableSinkFactory implements DynamicTableSinkFactory { + + ConfigOption TABLE_PATH = + ConfigOptions.key("table_path") + .stringType() + .noDefaultValue() + .withDescription("Delta table path for the sink"); + + ConfigOption PARTITIONS = + ConfigOptions.key("partitions") + .stringType() + .noDefaultValue() + .withDescription("Partition column names separated by comma"); + + @Override + public DynamicTableSink createDynamicTableSink(Context context) { + final FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(this, context); + + helper.validate(); + + ReadableConfig options = helper.getOptions(); + ResolvedSchema schema = context.getCatalogTable().getResolvedSchema(); + DataType consumedDataType = schema.toPhysicalRowDataType(); + + Integer sinkParallelism = options.getOptional(FactoryUtil.SINK_PARALLELISM).orElse(null); + + // TODO Support other Delta properties + Map deltaOptions = new HashMap<>(); + deltaOptions.put(TABLE_PATH.key(), options.get(TABLE_PATH)); + deltaOptions.put(PARTITIONS.key(), options.getOptional(PARTITIONS).orElse("")); + + return new DeltaDynamicTableSink(consumedDataType, sinkParallelism, deltaOptions); + } + + @Override + public String factoryIdentifier() { + return "delta-connector"; + } + + @Override + public Set> requiredOptions() { + return Set.of(TABLE_PATH); + } + + @Override + public Set> optionalOptions() { + return Set.of(PARTITIONS); + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/table/AbstractKernelTable.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/AbstractKernelTable.java new file mode 100644 index 00000000000..1dd88f9910a --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/AbstractKernelTable.java @@ -0,0 +1,307 @@ +package io.delta.flink.table; + +import io.delta.kernel.*; +import io.delta.kernel.data.FilteredColumnarBatch; +import io.delta.kernel.data.Row; +import io.delta.kernel.defaults.engine.DefaultEngine; +import io.delta.kernel.defaults.internal.json.JsonUtils; +import io.delta.kernel.engine.Engine; +import io.delta.kernel.exceptions.TableNotFoundException; +import io.delta.kernel.expressions.Column; +import io.delta.kernel.expressions.Literal; +import io.delta.kernel.internal.data.TransactionStateRow; +import io.delta.kernel.internal.util.Preconditions; +import io.delta.kernel.transaction.CreateTableTransactionBuilder; +import io.delta.kernel.transaction.DataLayoutSpec; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterable; +import io.delta.kernel.utils.CloseableIterator; +import io.delta.kernel.utils.DataFileStatus; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.*; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; + +/** + * An abstract base class for {@link DeltaTable} implementations backed by the Delta Kernel. + * + *

{@code AbstractKernelTable} provides common functionality for interacting with Delta tables, + * including access to table metadata, schema, partitioning information, and commit operations. + * Concrete subclasses are responsible for supplying catalog-specific or filesystem-specific logic + * such as table discovery, path resolution, and storage I/O. + * + *

This class centralizes shared behavior so that different table backends (e.g., Hadoop-based + * tables, CCv2 catalog tables, custom catalogs) can implement only the backend-specific portions + * while inheriting consistent Delta table semantics. + * + *

Subclasses must provide their own mechanisms for interpreting table identifiers and resolving + * them into physical locations or catalog entries. See + * also @link{io.delta.flink.table.DeltaCatalog} + */ +public abstract class AbstractKernelTable implements DeltaTable { + protected static String ENGINE_INFO = "DeltaSink/Kernel"; + + protected final Catalog catalog; + protected String tableId; + protected String uuid; + protected URI tablePath; + protected final Map configuration; + protected String serializableTableState; + protected List partitionColumns; + + // These fields are not serializable. They need to be recreated + // after the table are serialized. Always access them using getters. + protected transient StructType schema; + protected transient Row tableState; + protected transient Engine engine; + + public AbstractKernelTable( + Catalog catalog, + String tableId, + Map conf, + StructType schema, + List partitionColumns) { + this.catalog = catalog; + this.tableId = tableId; + this.configuration = conf; + this.schema = schema; + this.partitionColumns = partitionColumns; + initialize(); + } + + public AbstractKernelTable(Catalog catalog, String tableId, Map conf) { + this(catalog, tableId, conf, null, null); + } + + protected void initialize() { + try { + Catalog.TableBrief info = catalog.getTable(tableId); + uuid = info.uuid; + tablePath = info.tablePath; + loadExistingTable(); + } catch (TableNotFoundException e) { + Preconditions.checkArgument(schema != null); + initNewTable(schema, partitionColumns); + } + } + + protected void loadExistingTable() { + // With an existing table, partitions loaded from the table take precedence + final Snapshot latestSnapshot = loadLatestSnapshot(); + // We use a temporary transaction to generate a TransactionStateRow. + // It serves as a holder for schema and partition columns. + // The transaction will not be committed, and is discarded afterward. + Row existingTableState = + latestSnapshot + .buildUpdateTableTransaction(ENGINE_INFO, Operation.WRITE) + .build(getEngine()) + .getTransactionState(getEngine()); + this.serializableTableState = JsonUtils.rowToJson(existingTableState); + this.schema = TransactionStateRow.getLogicalSchema(existingTableState); + this.partitionColumns = TransactionStateRow.getPartitionColumnsList(existingTableState); + } + + protected void initNewTable(StructType schema, List partitionColumns) { + Row newTableState = + TableManager.buildCreateTableTransaction(getTablePath().toString(), schema, ENGINE_INFO) + .withDataLayoutSpec( + DataLayoutSpec.partitioned( + Optional.of(partitionColumns) + .map( + nonEmpty -> + nonEmpty.stream().map(Column::new).collect(Collectors.toList())) + .orElseGet(Collections::emptyList))) + .build(getEngine()) + .getTransactionState(getEngine()); + this.serializableTableState = JsonUtils.rowToJson(newTableState); + } + + protected Engine getEngine() { + if (engine == null) { + engine = createEngine(); + } + return engine; + } + + /** + * Subclass may implement this method to generate an engine. + * + * @return engine to access the tables + */ + protected Engine createEngine() { + Configuration conf = new Configuration(); + + // Built-in configurations for common file system access + conf.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem"); + conf.set("fs.s3a.path.style.access", "true"); + conf.set("fs.s3.impl.disable.cache", "true"); + conf.set("fs.s3a.impl.disable.cache", "true"); + + conf.set("fs.abfs.impl", "org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem"); + conf.set("fs.abfss.impl", "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem"); + conf.set("fs.AbstractFileSystem.abfs.impl", "org.apache.hadoop.fs.azurebfs.Abfs"); + conf.set("fs.AbstractFileSystem.abfss.impl", "org.apache.hadoop.fs.azurebfs.Abfss"); + + conf.set("fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem"); + conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS"); + + this.configuration.forEach(conf::set); + this.catalog.getCredentials(uuid).forEach(conf::set); + + return DefaultEngine.create(conf); + } + + public Catalog getCatalog() { + return catalog; + } + + @Override + public String getId() { + return tableId; + } + + public String getUuid() { + return uuid; + } + + public Map getConfiguration() { + return configuration; + } + + protected Row getWriteState() { + if (tableState == null) { + tableState = JsonUtils.rowFromJson(serializableTableState, TransactionStateRow.SCHEMA); + } + return tableState; + } + + /** + * Subclass must implement this method to fetch a Kernel snapshot + * + * @return latest snapshot of the table + */ + protected abstract Snapshot loadLatestSnapshot(); + + /** The table storage location where all data and metadata files should be stored. */ + public URI getTablePath() { + return tablePath; + } + + @Override + public StructType getSchema() { + if (schema == null) { + schema = TransactionStateRow.getLogicalSchema(getWriteState()); + } + return schema; + } + + @Override + public List getPartitionColumns() { + return partitionColumns; + } + + @Override + public void refresh() { + Snapshot latestSnapshot = loadLatestSnapshot(); + this.schema = latestSnapshot.getSchema(); + this.partitionColumns = latestSnapshot.getPartitionColumnNames(); + // TODO Refresh write state + } + + @Override + public void commit(CloseableIterable actions) { + Engine localEngine = getEngine(); + Transaction txn; + try { + Snapshot snapshot = loadLatestSnapshot(); + txn = snapshot.buildUpdateTableTransaction(ENGINE_INFO, Operation.WRITE).build(engine); + // We check the table's latest schema is still the same as committer schema. + // The check is delayed here to detect external modification to the table schema. + // TODO remove this after kernel support Column Mapping + final StructType tableSchema = txn.getSchema(engine); + final StructType committingSchema = getSchema(); + Preconditions.checkArgument( + committingSchema.equivalent(tableSchema), + String.format( + "DeltaSink does not support schema evolution. " + + "Table schema: %s, Committer schema: %s", + tableSchema, committingSchema)); + } catch (TableNotFoundException e) { + CreateTableTransactionBuilder txnBuilder = + TableManager.buildCreateTableTransaction( + getTablePath().toString(), getSchema(), ENGINE_INFO); + if (!getPartitionColumns().isEmpty()) { + txnBuilder.withDataLayoutSpec( + DataLayoutSpec.partitioned( + getPartitionColumns().stream().map(Column::new).collect(Collectors.toList()))); + } + txn = txnBuilder.build(localEngine); + } + txn.commit(localEngine, actions); + } + + @Override + public CloseableIterator writeParquet( + String pathSuffix, + CloseableIterator data, + Map partitionValues) + throws IOException { + Engine localEngine = getEngine(); + Row writeState = getWriteState(); + + final CloseableIterator physicalData = + Transaction.transformLogicalData(localEngine, writeState, data, partitionValues); + + final DataWriteContext writeContext = + Transaction.getWriteContext(localEngine, writeState, partitionValues); + final CloseableIterator dataFiles = + localEngine + .getParquetHandler() + .writeParquetFiles( + getTablePath().resolve(pathSuffix).toString(), + physicalData, + writeContext.getStatisticsColumns()); + return Transaction.generateAppendActions(localEngine, writeState, dataFiles, writeContext); + } + + /** + * Normalizes the given URI string to a canonical form. The normalization includes: + * + *

    + *
  • Ensuring file URIs use the standard triple-slash form (e.g., {@code file:/abc/def} → + * {@code file:///abc/def}). + *
  • Appending a trailing slash to paths that do not already end with {@code /}. + *
+ * + *

This method is useful for making URI comparisons consistent and avoiding issues caused by + * variations in file URI formatting or missing trailing path delimiters. + * + * @param input the URI to normalize; + * @return the normalized URI + */ + public static URI normalize(URI input) { + if (input == null) { + return null; + } + if (input.getScheme() == null) { + return new File(input.toString()).toPath().toUri(); + } + // Normalize "file:/xxx/" to "file:///xxx/" + if (input.getScheme().equals("file")) { + return new File(input).toPath().toUri(); + } + try { + // Normalize "abc://def/xxx" to "abc://def/xxx/" + if (!input.getPath().endsWith("/")) { + return new URI( + input.getScheme(), input.getHost(), input.getPath() + "/", input.getFragment()); + } + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + return input; + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/table/CCv2Table.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/CCv2Table.java new file mode 100644 index 00000000000..adf99510601 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/CCv2Table.java @@ -0,0 +1,54 @@ +package io.delta.flink.table; + +import io.delta.kernel.Snapshot; +import io.delta.kernel.unitycatalog.UCCatalogManagedClient; +import io.delta.storage.commit.uccommitcoordinator.UCClient; +import io.delta.storage.commit.uccommitcoordinator.UCTokenBasedRestClient; +import java.util.Map; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@code CCv2Table} provides functionality for interacting with tables managed by a CCv2 catalog. + * It supports: + * + *

    + *
  • loading existing tables from a catalog via the UC Open API, and + *
  • committing table changes back to the CCv2 catalog. + *
+ * + *

Note: {@code CCv2Table} does not support creating new tables. Instances must + * reference an existing catalog table; attempts to create or initialize new tables through this + * interface are not supported. + */ +public class CCv2Table extends AbstractKernelTable { + + private static Logger LOG = LoggerFactory.getLogger(CCv2Table.class); + + public static final String CATALOG_ENDPOINT = "catalog.endpoint"; + public static final String CATALOG_TOKEN = "catalog.token"; + + public CCv2Table(Catalog catalog, String tableId, Map conf) { + super(catalog, tableId, conf); + } + + protected transient UCCatalogManagedClient ccv2Client; + + protected UCCatalogManagedClient getCcv2Client() { + if (ccv2Client == null) { + String endpointUri = configuration.get(CATALOG_ENDPOINT); + String token = configuration.get(CATALOG_TOKEN); + UCClient storageClient = new UCTokenBasedRestClient(endpointUri, token); + ccv2Client = new UCCatalogManagedClient(storageClient); + } + return ccv2Client; + } + + @Override + protected Snapshot loadLatestSnapshot() { + // TODO refresh credentials on exception + return getCcv2Client() + .loadSnapshot(getEngine(), uuid, tablePath.toString(), Optional.empty(), Optional.empty()); + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/table/Catalog.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/Catalog.java new file mode 100644 index 00000000000..074693efaef --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/Catalog.java @@ -0,0 +1,69 @@ +package io.delta.flink.table; + +import java.net.URI; +import java.util.Map; + +/** + * A {@code Catalog} abstracts interaction with an external table catalog or metadata service. + * + *

The catalog is responsible for resolving logical table identifiers into concrete table + * metadata and for providing the credentials required to access the underlying storage system. This + * abstraction allows different catalog implementations (e.g., filesystem-based catalogs, + * metastore-backed catalogs, or REST-based catalogs) to be used interchangeably by higher-level + * components. + * + *

Typical responsibilities of a {@code Catalog} include: + * + *

    + *
  • mapping table identifiers to physical table locations, + *
  • providing stable table UUIDs for identification and caching, and + *
  • supplying credential or configuration information required for table access. + *
+ */ +public interface Catalog { + + /** + * Loads metadata for a table identified by the given table identifier. + * + *

The identifier format and naming conventions are defined by the specific catalog + * implementation. Implementations may interpret the identifier as a logical name, a + * fully-qualified path, or another catalog-specific reference. + * + * @param tableId the logical identifier of the table to load; must not be {@code null} + * @return a {@link TableBrief} object describing the resolved table + * @throws IllegalArgumentException if the identifier is invalid + * @throws RuntimeException if the table cannot be resolved or loaded + */ + TableBrief getTable(String tableId); + + /** + * Returns the credentials or configuration properties required to access the table identified by + * the given UUID. + * + *

The returned map may contain authentication information, endpoint configuration, or other + * filesystem- or catalog-specific properties. The exact contents and semantics are defined by the + * catalog implementation. + * + * @param uuid the unique identifier of the table + * @return a map of credential or configuration properties; may be empty but never {@code null} + */ + Map getCredentials(String uuid); + + /** + * A container for table metadata resolved by a {@link Catalog}. + * + *

{@code TableInfo} describes the essential properties needed to locate and access a table, + * independent of the underlying catalog implementation. + */ + class TableBrief { + + /** The logical identifier used to resolve the table. */ + String tableId; + + /** A stable UUID that uniquely identifies the table. */ + String uuid; + + /** The normalized physical location of the table. */ + URI tablePath; + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/table/DeltaTable.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/DeltaTable.java new file mode 100644 index 00000000000..483c45b94d6 --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/DeltaTable.java @@ -0,0 +1,140 @@ +package io.delta.flink.table; + +import io.delta.kernel.data.FilteredColumnarBatch; +import io.delta.kernel.data.Row; +import io.delta.kernel.expressions.Literal; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterable; +import io.delta.kernel.utils.CloseableIterator; +import java.io.IOException; +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +/** + * A {@code DeltaTable} represents a logical view of a Delta table and provides access to both + * static table metadata (such as schema and partitioning) and operations for reading and writing + * table data. + * + *

A {@code DeltaTable} instance abstracts the underlying Delta transaction log and storage + * layout. Implementations are responsible for: + * + *

    + *
  • exposing immutable table metadata (schema, partition information) + *
  • managing transaction boundaries and versioned commits + *
  • coordinating reads and writes against the physical table storage + *
  • serializing table changes into Delta {@code actions} and committing them atomically + *
+ * + *

All implementations must be {@link Serializable} to allow use in distributed execution + * environments. + */ +public interface DeltaTable extends Serializable { + + /** + * Returns a stable identifier that uniquely represents this table within its catalog or storage + * system. + * + *

The identifier may be: + * + *

    + *
  • a logical table name (e.g., {@code "database.table"}) + *
  • a fully-qualified catalog path + *
  • a filesystem or object-store URI + *
+ * + * @return a unique logical identifier for the table + */ + String getId(); + + /** + * Returns the table schema as a {@link StructType}. + * + *

The schema defines the logical column structure of the table. Implementations should + * guarantee that the schema corresponds to the latest committed version unless otherwise + * documented. + * + * @return the table schema + */ + StructType getSchema(); + + /** + * Returns the list of partition columns for this table. + * + *

The returned list defines the physical partitioning strategy used by the table. The ordering + * of columns follows the table’s partition specification and should be stable across versions. + * + * @return an ordered list of partition column names + */ + List getPartitionColumns(); + + /** + * Commits a new version to the table by applying the provided Delta actions. + * + *

Actions may include (but are not limited to): + * + *

    + *
  • {@code AddFile} records representing new data files + *
  • {@code RemoveFile} records removing obsolete files + *
  • metadata updates or protocol changes + *
+ * + *

Implementations must ensure atomicity: either all provided actions are committed as part of + * a new table version, or none are. Commit conflicts should be detected and surfaced as + * exceptions. + * + * @param actions an iterable collection of Delta actions to commit; the caller is responsible for + * closing the iterable + */ + void commit(CloseableIterable actions); + + /** + * Refreshes the table state by reloading the latest snapshot metadata. + * + *

This method updates the in-memory view of the table to reflect the most recently committed + * version, including: + * + *

    + *
  • the latest table schema, + *
  • partition column definitions, and + *
  • any other metadata derived from the current Delta log snapshot. + *
+ * + *

{@code refresh()} should be invoked when external changes to the table may have occurred + * (for example, commits from other writers) and the caller requires an up-to-date view before + * performing read or write operations. + * + *

Implementations may perform I/O and metadata parsing as part of this operation. + */ + void refresh(); + + /** + * Writes one or more Parquet files as part of the table and emits the corresponding {@code + * AddFile} action describing the newly written data. + * + *

This operation is responsible for: + * + *

    + *
  • writes to the underlying storage layer using the specified {@code data} + *
  • constructing physical file paths by appending {@code pathSuffix} to the table root + *
  • materializing partition values into the file metadata + *
  • returning a Row describing the resulting {@code AddFile} action + *
+ * + *

The returned iterator typically contains exactly one row (the AddFile action), but + * implementations may return multiple actions depending on file-splitting behavior. + * + * @param pathSuffix a suffix appended to the table path when generating file locations + * @param data an iterator over row batches to be written as Parquet files; the caller must close + * it + * @param partitionValues a mapping of partition column names to their literal values + * @return an iterator over {@code Row} objects representing the AddFile actions generated during + * the write + * @throws IOException if data writing or file creation fails + */ + CloseableIterator writeParquet( + String pathSuffix, + CloseableIterator data, + Map partitionValues) + throws IOException; +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/table/HadoopCatalog.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/HadoopCatalog.java new file mode 100644 index 00000000000..47bb1a7d86b --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/HadoopCatalog.java @@ -0,0 +1,79 @@ +package io.delta.flink.table; + +import java.net.URI; +import java.util.Map; + +/** + * A {@code HadoopCatalog} is a file-system–backed catalog implementation that resolves tables using + * Hadoop-compatible file system paths. + * + *

This catalog treats the table path itself as the table identifier. As a result, table + * discovery does not rely on an external metastore or catalog service. Instead, tables are located + * directly by resolving the provided identifier as a filesystem URI. + * + *

{@code HadoopCatalog} supports static credential configuration supplied at construction time. + * These credentials are returned verbatim and are intended for use by downstream components (e.g., + * table loaders or writers) when accessing the underlying storage system. + * + *

This catalog is suitable for environments where: + * + *

    + *
  • tables are stored in Hadoop-compatible file systems (e.g., HDFS, S3A, ABFS), + *
  • table identity is defined by filesystem location, and + *
  • credentials are configured statically rather than dynamically resolved. + *
+ * + *

The catalog does not perform validation of table existence or schema during lookup; it simply + * resolves the table path and returns the corresponding {@link TableBrief}. + */ +public class HadoopCatalog implements Catalog { + + private final Map configurations; + + /** + * Creates a {@code HadoopCatalog} with the given static credential configuration. + * + *

The provided configuration map is expected to contain all necessary key-value pairs required + * to access the underlying Hadoop-compatible file system (for example, access keys, secrets, or + * endpoint configurations). + * + * @param conf a map of static configuration and credential properties + */ + public HadoopCatalog(Map conf) { + this.configurations = conf; + } + + /** + * Loads a table using the given table identifier, which is interpreted as a filesystem path. + * + *

The {@code tableId} is normalized and resolved into a {@link URI} representing the table + * location. The same identifier is also used as the table UUID, as this catalog does not maintain + * a separate identifier namespace. + * + * @param tableId the table identifier, interpreted as a filesystem path or URI + * @return a {@link TableBrief} describing the resolved table + */ + @Override + public TableBrief getTable(String tableId) { + URI tablePath = AbstractKernelTable.normalize(URI.create(tableId)); + TableBrief info = new TableBrief(); + info.tableId = tableId; + info.tablePath = tablePath; + info.uuid = tableId; + return info; + } + + /** + * Returns the static credential configuration associated with this catalog. + * + *

Because this catalog only supports static credentials, the returned configuration is + * independent of the provided UUID and is shared across all tables. + * + * @param uuid the table UUID (ignored by this implementation) + * @return a map of static credential and configuration properties + */ + @Override + public Map getCredentials(String uuid) { + return configurations; + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/table/HadoopTable.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/HadoopTable.java new file mode 100644 index 00000000000..9789f662f9a --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/HadoopTable.java @@ -0,0 +1,42 @@ +package io.delta.flink.table; + +import io.delta.kernel.Snapshot; +import io.delta.kernel.TableManager; +import io.delta.kernel.types.StructType; +import java.net.URI; +import java.util.List; +import java.util.Map; + +/** + * A {@code HadoopTable} is a {@link DeltaTable} implementation backed by a Hadoop-compatible file + * system. This implementation loads and manages Delta table metadata directly from the underlying + * storage layer rather than from an external catalog service. + * + *

{@code HadoopTable} is typically used in environments where: + * + *

    + *
  • tables are stored in distributed file systems such as HDFS, ABFS, S3A (via Hadoop FS), or + * other file-system–compatible backends, + *
  • no metastore or catalog service is required for table discovery, and + *
  • file-system paths are the primary means of identifying and accessing tables. + *
+ * + *

This implementation is suitable for standalone deployments, filesystem-based analytics + * pipelines, and connector implementations where the Hadoop FileSystem abstraction is available. + */ +public class HadoopTable extends AbstractKernelTable { + + public HadoopTable(URI tablePath, Map conf) { + this(tablePath, conf, null, null); + } + + public HadoopTable( + URI tablePath, Map conf, StructType schema, List partitionColumns) { + super(new HadoopCatalog(conf), tablePath.toString(), conf, schema, partitionColumns); + } + + @Override + protected Snapshot loadLatestSnapshot() { + return TableManager.loadSnapshot(getTablePath().toString()).build(getEngine()); + } +} diff --git a/connectors/flink/v1.20/src/main/java/io/delta/flink/table/RESTCatalog.java b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/RESTCatalog.java new file mode 100644 index 00000000000..e85a907497b --- /dev/null +++ b/connectors/flink/v1.20/src/main/java/io/delta/flink/table/RESTCatalog.java @@ -0,0 +1,165 @@ +package io.delta.flink.table; + +import io.unitycatalog.client.ApiClient; +import io.unitycatalog.client.ApiException; +import io.unitycatalog.client.api.TablesApi; +import io.unitycatalog.client.api.TemporaryCredentialsApi; +import io.unitycatalog.client.model.GenerateTemporaryTableCredential; +import io.unitycatalog.client.model.TableInfo; +import io.unitycatalog.client.model.TableOperation; +import io.unitycatalog.client.model.TemporaryCredentials; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@code RESTCatalog} is a {@link Catalog} implementation that interacts with an external catalog + * service via REST APIs. + * + *

This catalog resolves table metadata and access credentials by communicating with a remote + * catalog endpoint. It is typically used in environments where table discovery, authorization, and + * credential issuance are managed centrally by a service rather than through filesystem-based + * catalogs. + * + *

The catalog uses: + * + *

    + *
  • a REST endpoint to load table metadata (e.g., storage location, table identifiers), and + *
  • temporary credentials generated by the catalog service to access underlying storage. + *
+ * + *

Authentication to the catalog service is performed using a bearer token supplied at + * construction time. The token is attached to all outgoing REST requests. + */ +public class RESTCatalog implements Catalog { + + private static Logger LOG = LoggerFactory.getLogger(RESTCatalog.class); + + public static String CATALOG_ENDPOINT = "catalog.endpoint"; + public static String CATALOG_TOKEN = "catalog.token"; + + private final URI catalogEndpoint; + private final String catalogToken; + + /** + * Lazily initialized API client used to communicate with the catalog service. This field is + * marked transient to avoid serialization issues in distributed environments. + */ + protected transient ApiClient apiClient; + + /** + * Creates a {@code RESTCatalog} with the given endpoint and authentication token. + * + * @param endpoint the catalog REST endpoint URI as a string + * @param token a bearer token used to authenticate REST requests + */ + public RESTCatalog(String endpoint, String token) { + this.catalogEndpoint = URI.create(endpoint); + this.catalogToken = token; + } + + /** + * Returns the catalog REST endpoint. + * + * @return the catalog endpoint URI + */ + public URI getCatalogEndpoint() { + return catalogEndpoint; + } + + /** + * Returns the authentication token used for catalog access. + * + * @return the catalog bearer token + */ + public String getCatalogToken() { + return catalogToken; + } + + /** + * Lazily initialize an {@link ApiClient} for communicating with the catalog service. + * + * @return an initialized {@link ApiClient} + */ + protected ApiClient getApiClient() { + if (apiClient == null) { + apiClient = + new ApiClient() + .setScheme(catalogEndpoint.getScheme()) + .setHost(catalogEndpoint.getHost()) + .setPort(catalogEndpoint.getPort()) + .setRequestInterceptor( + request -> request.header("Authorization", "Bearer " + catalogToken)); + } + return apiClient; + } + + /** + * Loads table metadata from the remote catalog. + * + *

This method retrieves table information via the catalog REST API and resolves the table + * storage location into a normalized {@link URI}. The returned {@link TableBrief} contains the + * table UUID and resolved storage path. + * + * @param tableId the logical identifier of the table + * @return a {@link TableBrief} describing the resolved table + * @throws RuntimeException if the table cannot be loaded due to API or network errors + */ + @Override + public TableBrief getTable(String tableId) { + TablesApi tablesApi = new TablesApi(getApiClient()); + try { + TableInfo tableInfo = tablesApi.getTable(tableId); + TableBrief brief = new TableBrief(); + brief.tablePath = + AbstractKernelTable.normalize( + URI.create(Objects.requireNonNull(tableInfo.getStorageLocation()))); + brief.uuid = tableInfo.getTableId(); + LOG.debug("Loaded table with UUID {} at {}", brief.uuid, brief.tablePath); + return brief; + } catch (ApiException e) { + throw new RuntimeException(e); + } + } + + /** + * Retrieves temporary credentials required to access the underlying storage for the given table. + * + *

This implementation requests short-lived credentials from the catalog service that are + * scoped to the specified table and operation type (read/write). + * + *

TODO: Currently, only AWS temporary credentials are supported. The returned configuration + * map contains Hadoop-compatible S3A credential keys. + * + * @param tableId the table identifier for which credentials are requested + * @return a map of filesystem configuration properties containing temporary credentials + * @throws RuntimeException if credential generation fails due to API or network errors + */ + @Override + public Map getCredentials(String tableId) { + try { + TemporaryCredentialsApi credentialsApi = new TemporaryCredentialsApi(getApiClient()); + TemporaryCredentials credentials = + credentialsApi.generateTemporaryTableCredentials( + new GenerateTemporaryTableCredential() + .tableId(tableId) + .operation(TableOperation.READ_WRITE)); + + Map result = new HashMap<>(); + + // AWS credentials + // TODO: Add Azure and GCP credential support + if (credentials.getAwsTempCredentials() != null) { + result.put("fs.s3a.access.key", credentials.getAwsTempCredentials().getAccessKeyId()); + result.put("fs.s3a.secret.key", credentials.getAwsTempCredentials().getSecretAccessKey()); + result.put("fs.s3a.session.token", credentials.getAwsTempCredentials().getSessionToken()); + } + return result; + } catch (ApiException e) { + throw new RuntimeException(e); + } + } +} diff --git a/connectors/flink/v1.20/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/connectors/flink/v1.20/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory new file mode 100644 index 00000000000..ad713b41158 --- /dev/null +++ b/connectors/flink/v1.20/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -0,0 +1 @@ +io.delta.flink.sink.dynamic.DeltaDynamicTableSinkFactory \ No newline at end of file diff --git a/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/DelayFinishTestSource.java b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/DelayFinishTestSource.java new file mode 100644 index 00000000000..18a559763e5 --- /dev/null +++ b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/DelayFinishTestSource.java @@ -0,0 +1,58 @@ +package io.delta.flink.sink; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.flink.api.common.state.CheckpointListener; +import org.apache.flink.streaming.api.functions.source.SourceFunction; + +/** + * This DataStream Source waits for all the following conditions to send the finish signal + * + *

    + *
  • The records are exhausted + *
  • At least one checkpoint is finished after the records are exhausted + *
+ * + * This gives the test environment enough time to finish the last checkpoint with data before the + * environment shuts down. + */ +public class DelayFinishTestSource implements SourceFunction, CheckpointListener { + + private ArrayList data = new ArrayList<>(); + private volatile boolean exhausted = false; + + private AtomicInteger counter = new AtomicInteger(0); + + public DelayFinishTestSource(List data, int delayCount) { + this.data.addAll(data); + this.counter.set(delayCount); + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + if (exhausted) { + counter.decrementAndGet(); + } + } + + @Override + public void run(SourceContext ctx) throws Exception { + data.forEach( + item -> { + synchronized (ctx.getCheckpointLock()) { + ctx.collect(item); + } + }); + exhausted = true; + // When the data exhausts, wait for checkpoints + while (counter.get() > 0) { + Thread.sleep(100L); + } + } + + @Override + public void cancel() { + // TODO not implemented + } +} diff --git a/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/DelayFinishTestSourceV2.java b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/DelayFinishTestSourceV2.java new file mode 100644 index 00000000000..6bdbd87d858 --- /dev/null +++ b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/DelayFinishTestSourceV2.java @@ -0,0 +1,165 @@ +package io.delta.flink.sink; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.flink.api.connector.source.*; +import org.apache.flink.core.io.InputStatus; +import org.apache.flink.core.io.SimpleVersionedSerializer; + +public class DelayFinishTestSourceV2 + implements Source { + + @Override + public Boundedness getBoundedness() { + return null; + } + + @Override + public SplitEnumerator createEnumerator( + SplitEnumeratorContext enumContext) throws Exception { + return null; + } + + @Override + public SplitEnumerator restoreEnumerator( + SplitEnumeratorContext enumContext, Void checkpoint) throws Exception { + return null; + } + + @Override + public SimpleVersionedSerializer getSplitSerializer() { + return null; + } + + @Override + public SimpleVersionedSerializer getEnumeratorCheckpointSerializer() { + return null; + } + + @Override + public SourceReader createReader(SourceReaderContext readerContext) + throws Exception { + return null; + } + + public static class DFTSplit implements SourceSplit { + private final String splitId; + + public DFTSplit(String id) { + this.splitId = id; + } + + @Override + public String splitId() { + return splitId; + } + } + + public static class DFTSplitEnumerator implements SplitEnumerator { + private final SplitEnumeratorContext context; + private final Set assigned = new HashSet<>(); + + public DFTSplitEnumerator(SplitEnumeratorContext context) { + this.context = context; + } + + @Override + public void start() {} + + @Override + public void handleSplitRequest(int subtaskId, String hostname) { + if (!assigned.contains(subtaskId)) { + assigned.add(subtaskId); + context.assignSplit(new DFTSplit("split-" + subtaskId), subtaskId); + context.signalNoMoreSplits(subtaskId); + } + } + + @Override + public void addSplitsBack(List splits, int subtaskId) { + assigned.remove(subtaskId); + } + + @Override + public void addReader(int subtaskId) {} + + @Override + public Void snapshotState(long checkpointId) throws Exception { + return null; // no state + } + + @Override + public void close() throws IOException {} + } + + public static class DFTReader implements SourceReader { + + private final SourceReaderContext context; + private boolean exhausted = false; + private List data; + private Iterator iterator = null; + + private AtomicInteger counter = new AtomicInteger(2); + + public DFTReader(List data, SourceReaderContext context) { + this.data = data; + this.context = context; + this.iterator = data.iterator(); + } + + @Override + public void start() {} + + @Override + public InputStatus pollNext(ReaderOutput out) throws Exception { + if (exhausted) { + // When the data exhausts, wait for checkpoints + while (counter.get() > 0) { + Thread.sleep(100L); + return InputStatus.MORE_AVAILABLE; + } + return InputStatus.END_OF_INPUT; + } + + if (iterator.hasNext()) { + T value = iterator.next(); + out.collect(value); + return InputStatus.MORE_AVAILABLE; + } else { + exhausted = true; + return InputStatus.MORE_AVAILABLE; + } + } + + @Override + public CompletableFuture isAvailable() { + return null; + } + + @Override + public List snapshotState(long checkpointId) { + return List.of(); + } + + @Override + public void addSplits(List splits) { + // You can ignore split contents if simple, or store them for use + } + + @Override + public void notifyNoMoreSplits() {} + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + counter.decrementAndGet(); + } + + @Override + public void close() throws IOException {} + } +} diff --git a/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/TestCommitterInitContext.java b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/TestCommitterInitContext.java new file mode 100644 index 00000000000..85bdc9cae95 --- /dev/null +++ b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/TestCommitterInitContext.java @@ -0,0 +1,98 @@ +package io.delta.flink.sink; + +import java.util.OptionalLong; +import org.apache.flink.api.common.JobID; +import org.apache.flink.api.common.JobInfo; +import org.apache.flink.api.common.TaskInfo; +import org.apache.flink.api.connector.sink2.CommitterInitContext; +import org.apache.flink.metrics.groups.SinkCommitterMetricGroup; +import org.apache.flink.runtime.metrics.groups.InternalSinkCommitterMetricGroup; +import org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups; + +public class TestCommitterInitContext implements CommitterInitContext { + private final int subtaskId; + private final int parallelism; + private final int attempt; + private final JobInfo jobInfo; + private final TaskInfo taskInfo; + private final SinkCommitterMetricGroup metricGroup = + InternalSinkCommitterMetricGroup.wrap( + UnregisteredMetricGroups.createUnregisteredOperatorMetricGroup()); + + public TestCommitterInitContext(int subtaskId, int parallelism, int attempt) { + this.subtaskId = subtaskId; + this.parallelism = parallelism; + this.attempt = attempt; + + this.jobInfo = + new JobInfo() { + @Override + public JobID getJobId() { + return JobID.generate(); + } + + @Override + public String getJobName() { + return "test-job"; + } + }; + + this.taskInfo = + new TaskInfo() { + @Override + public String getTaskName() { + return "test-task"; + } + + @Override + public int getMaxNumberOfParallelSubtasks() { + return TestCommitterInitContext.this.parallelism; + } + + @Override + public int getNumberOfParallelSubtasks() { + return TestCommitterInitContext.this.parallelism; + } + + @Override + public int getIndexOfThisSubtask() { + return TestCommitterInitContext.this.subtaskId; + } + + @Override + public int getAttemptNumber() { + return TestCommitterInitContext.this.attempt; + } + + @Override + public String getTaskNameWithSubtasks() { + return "aaa"; + } + + @Override + public String getAllocationIDAsString() { + return "bbb"; + } + }; + } + + @Override + public SinkCommitterMetricGroup metricGroup() { + return metricGroup; + } + + @Override + public OptionalLong getRestoredCheckpointId() { + return OptionalLong.empty(); + } + + @Override + public JobInfo getJobInfo() { + return jobInfo; + } + + @Override + public TaskInfo getTaskInfo() { + return taskInfo; + } +} diff --git a/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/TestWriterInitContext.java b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/TestWriterInitContext.java new file mode 100644 index 00000000000..1aaf950228a --- /dev/null +++ b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/TestWriterInitContext.java @@ -0,0 +1,182 @@ +package io.delta.flink.sink; + +import java.util.Optional; +import java.util.OptionalLong; +import java.util.concurrent.ScheduledFuture; +import java.util.function.Consumer; +import org.apache.flink.api.common.JobID; +import org.apache.flink.api.common.JobInfo; +import org.apache.flink.api.common.TaskInfo; +import org.apache.flink.api.common.operators.MailboxExecutor; +import org.apache.flink.api.common.operators.ProcessingTimeService; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.connector.sink2.WriterInitContext; +import org.apache.flink.metrics.groups.SinkWriterMetricGroup; +import org.apache.flink.runtime.metrics.groups.InternalSinkWriterMetricGroup; +import org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups; +import org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor; +import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; +import org.apache.flink.streaming.runtime.tasks.mailbox.MailboxExecutorImpl; +import org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl; +import org.apache.flink.util.SimpleUserCodeClassLoader; +import org.apache.flink.util.UserCodeClassLoader; + +public class TestWriterInitContext implements WriterInitContext { + + private final int subtaskId; + private final int parallelism; + private final int attempt; + private final TestProcessingTimeService processingTimeService = new TestProcessingTimeService(); + private final MailboxExecutor mailboxExecutor; + private final SinkWriterMetricGroup metricGroup; + private final JobInfo jobInfo; + private final TaskInfo taskInfo; + + public TestWriterInitContext(int subtaskId, int parallelism, int attempt) { + this.subtaskId = subtaskId; + this.parallelism = parallelism; + this.attempt = attempt; + + this.mailboxExecutor = + new MailboxExecutorImpl( + new TaskMailboxImpl(Thread.currentThread()), + Integer.MAX_VALUE, + StreamTaskActionExecutor.IMMEDIATE); + + this.metricGroup = + InternalSinkWriterMetricGroup.wrap( + UnregisteredMetricGroups.createUnregisteredOperatorMetricGroup()); + + this.jobInfo = + new JobInfo() { + @Override + public JobID getJobId() { + return JobID.generate(); + } + + @Override + public String getJobName() { + return "test-job"; + } + }; + + this.taskInfo = + new TaskInfo() { + @Override + public String getTaskName() { + return "test-task"; + } + + @Override + public int getMaxNumberOfParallelSubtasks() { + return TestWriterInitContext.this.parallelism; + } + + @Override + public int getNumberOfParallelSubtasks() { + return TestWriterInitContext.this.parallelism; + } + + @Override + public int getIndexOfThisSubtask() { + return TestWriterInitContext.this.subtaskId; + } + + @Override + public int getAttemptNumber() { + return TestWriterInitContext.this.attempt; + } + + @Override + public String getTaskNameWithSubtasks() { + return "aaa"; + } + + @Override + public String getAllocationIDAsString() { + return "bbb"; + } + }; + } + + @Override + public int getSubtaskId() { + return subtaskId; + } + + @Override + public int getNumberOfParallelSubtasks() { + return parallelism; + } + + @Override + public int getAttemptNumber() { + return attempt; + } + + @Override + public SinkWriterMetricGroup metricGroup() { + return metricGroup; + } + + @Override + public SerializationSchema.InitializationContext asSerializationSchemaInitializationContext() { + return null; + } + + @Override + public MailboxExecutor getMailboxExecutor() { + return mailboxExecutor; + } + + @Override + public ProcessingTimeService getProcessingTimeService() { + return new ProcessingTimeService() { + @Override + public long getCurrentProcessingTime() { + return processingTimeService.getCurrentProcessingTime(); + } + + @Override + public ScheduledFuture registerTimer(long time, ProcessingTimeCallback callback) { + return processingTimeService.registerTimer(time, callback::onProcessingTime); + } + }; + } + + @Override + public UserCodeClassLoader getUserCodeClassLoader() { + return SimpleUserCodeClassLoader.create(Thread.currentThread().getContextClassLoader()); + } + + @Override + public OptionalLong getRestoredCheckpointId() { + return OptionalLong.empty(); + } + + @Override + public boolean isObjectReuseEnabled() { + return false; + } + + @Override + public TypeSerializer createInputSerializer() { + return null; + } + + @Override + public JobInfo getJobInfo() { + return jobInfo; + } + + @Override + public TaskInfo getTaskInfo() { + return taskInfo; + } + + @Override + public Optional> metadataConsumer() { + return Optional.empty(); + } +} diff --git a/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/dynamic/TestDynamicTableSinkContext.java b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/dynamic/TestDynamicTableSinkContext.java new file mode 100644 index 00000000000..b0383cd595c --- /dev/null +++ b/connectors/flink/v1.20/src/test/java/io/delta/flink/sink/dynamic/TestDynamicTableSinkContext.java @@ -0,0 +1,46 @@ +package io.delta.flink.sink.dynamic; + +import java.util.Map; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.catalog.ObjectIdentifier; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.factories.DynamicTableFactory; + +public class TestDynamicTableSinkContext implements DynamicTableFactory.Context { + + private final ResolvedCatalogTable table; + + public TestDynamicTableSinkContext(ResolvedCatalogTable table) { + this.table = table; + } + + @Override + public ObjectIdentifier getObjectIdentifier() { + return ObjectIdentifier.of("default", "default", "t"); + } + + @Override + public ResolvedCatalogTable getCatalogTable() { + return table; + } + + @Override + public Map getEnrichmentOptions() { + return Map.of(); + } + + @Override + public Configuration getConfiguration() { + return new Configuration(); + } + + @Override + public ClassLoader getClassLoader() { + return Thread.currentThread().getContextClassLoader(); + } + + @Override + public boolean isTemporary() { + return true; + } +} diff --git a/connectors/flink/v1.20/src/test/resources/log4j.properties b/connectors/flink/v1.20/src/test/resources/log4j.properties new file mode 100644 index 00000000000..1df5a0237a4 --- /dev/null +++ b/connectors/flink/v1.20/src/test/resources/log4j.properties @@ -0,0 +1,20 @@ +# =========================== +# Root logger +# =========================== +# Root at INFO, logs to console appender "console" +log4j.rootLogger=WARN, console + +# Make your package DEBUG (adjust the package name!) +log4j.logger.io.delta.flink.table=DEBUG + +log4j.logger.org.apache.flink=WARN +log4j.logger.org.apache.kafka=WARN +log4j.logger.org.apache.hadoop=WARN + +# =========================== +# Console appender +# =========================== +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.Target=System.out +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss} %-5p %c{1} - %m%n diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/TestHelper.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/TestHelper.scala new file mode 100644 index 00000000000..2c423556289 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/TestHelper.scala @@ -0,0 +1,167 @@ +package io.delta.flink + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File, ObjectInputStream, ObjectOutputStream} +import java.net.URI +import java.nio.file.{Files, Path} +import java.util.{Collections, Optional, UUID} + +import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava, SeqHasAsJava} +import scala.util.Random + +import io.delta.kernel.{Operation, Table} +import io.delta.kernel.data.Row +import io.delta.kernel.defaults.engine.DefaultEngine +import io.delta.kernel.engine.Engine +import io.delta.kernel.expressions.{Column, Literal} +import io.delta.kernel.internal.actions.{AddFile, SingleAction} +import io.delta.kernel.internal.data.GenericRow +import io.delta.kernel.internal.util.Utils +import io.delta.kernel.statistics.DataFileStatistics +import io.delta.kernel.types._ +import io.delta.kernel.utils.{CloseableIterable, DataFileStatus, FileStatus} + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.shaded.org.apache.commons.io.FileUtils + +trait TestHelper { + + protected def withTempDir(f: File => Unit): Unit = { + val tempDir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try f(tempDir) + finally { + FileUtils.deleteDirectory(tempDir) + } + } + + protected def dummyRow(): Row = { + val id = Random.nextInt(1048576) + val map: Map[Integer, Object] = Map(Integer.valueOf(0) -> Integer.valueOf(id)) + new GenericRow(new StructType().add("id", IntegerType.INTEGER), map.asJava) + } + + def dummyStatistics(numRecords: Long): DataFileStatistics = + new DataFileStatistics( + numRecords, + Map.empty[Column, Literal].asJava, + Map.empty[Column, Literal].asJava, + Map.empty[Column, java.lang.Long].asJava, + Optional.empty) + + def dummyAddFileRow( + schema: StructType, + numRows: Long, + partitionValues: Map[String, Literal]): Row = { + val addFileRow = AddFile.convertDataFileStatus( + schema, + URI.create("s3://abc/def"), + new DataFileStatus( + "s3://abc/def/" + UUID.randomUUID().toString, + 1000L, + 2000L, + Optional.of(dummyStatistics(numRows))), + partitionValues.asJava, + /* dataChange= */ true, + /* tags= */ Collections.emptyMap, + /* baseRowId= */ Optional.empty, + /* defaultRowCommitVersion= */ Optional.empty, + /* deletionVectorDescriptor= */ Optional.empty) + SingleAction.createAddFileSingleAction(addFileRow.toRow) + } + + protected def dummyWriterContext( + engine: Engine, + tablePath: String, + schema: StructType, + partitionCols: Seq[String] = Seq.empty): Row = { + val table = Table.forPath(engine, tablePath) +// try { +// table.getLatestSnapshot(engine); +// val txn = table.createTransactionBuilder(engine, "dummyEngine", Operation.MANUAL_UPDATE) +// .build(engine) +// txn.getTransactionState(engine) +// } catch { +// case e: TableNotFoundException => + val txn = table.createTransactionBuilder(engine, "dummyEngine", Operation.CREATE_TABLE) + .withSchema(engine, schema) + .withPartitionColumns(engine, partitionCols.toList.asJava) + .build(engine) + txn.getTransactionState(engine) + + } + + protected def createNonEmptyTable( + engine: Engine, + tablePath: String, + schema: StructType, + partitionCols: Seq[String] = Seq.empty): Table = { + val table = Table.forPath(engine, tablePath) + val txn = table.createTransactionBuilder(engine, "dummyEngine", Operation.CREATE_TABLE) + .withSchema(engine, schema) + .withPartitionColumns(engine, partitionCols.toList.asJava) + .build(engine) + + val partitionMap = partitionCols.map { colName => + (colName, dummyRandomLiteral(schema.get(colName).getDataType)) + }.toMap.asJava + + // Prepare some dummy AddFile + val dummyAddFile = AddFile.convertDataFileStatus( + schema, + URI.create(table.getPath(engine)), + new DataFileStatus("abcdef", 1000L, 2000L, Optional.empty), + partitionMap, + true, + Map.empty[String, String].asJava, + Optional.empty(), + Optional.empty(), + Optional.empty()) + txn.commit( + engine, + CloseableIterable + .inMemoryIterable(Utils.singletonCloseableIterator( + SingleAction.createAddFileSingleAction(dummyAddFile.toRow)))) + table + } + + protected def readParquet(filePath: Path, schema: StructType): Seq[Row] = { + val fileStatus = FileStatus.of( + filePath.toString, + Files.size(filePath), + Files.getLastModifiedTime(filePath).toMillis) + + val results = DefaultEngine.create(new Configuration()) + .getParquetHandler.readParquetFiles( + Utils.singletonCloseableIterator(fileStatus), + schema, + Optional.empty()) + assert(results.hasNext) + val result = results.next() + result.getData.getRows.toInMemoryList.asScala.toSeq + } + + val random = new Random(System.currentTimeMillis()) + protected def dummyRandomLiteral(dataType: DataType): Literal = { + dataType match { + case IntegerType.INTEGER => + Literal.ofInt(random.nextInt()) + case StringType.STRING => + Literal.ofString("p" + random.nextInt()) + case LongType.LONG => + Literal.ofLong(random.nextLong()) + case _ => throw new UnsupportedOperationException + } + } + + protected def checkSerializability(input: Object): Unit = { + val baos = new ByteArrayOutputStream() + val oos = new ObjectOutputStream(baos) + oos.writeObject(input) + oos.close() + val bytes = baos.toByteArray + val ois = new ObjectInputStream(new ByteArrayInputStream(bytes)) + val restored = ois.readObject() + ois.close() + + assert(restored.getClass == input.getClass) + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/ConversionsSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/ConversionsSuite.scala new file mode 100644 index 00000000000..c5fa92ab170 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/ConversionsSuite.scala @@ -0,0 +1,26 @@ +package io.delta.flink.sink + +import io.delta.kernel.types.{IntegerType, StringType, StructType} + +import org.apache.flink.table.types.logical.{IntType, RowType, VarCharType} +import org.scalatest.funsuite.AnyFunSuite + +class ConversionsSuite extends AnyFunSuite { + + test("convert simple schema") { + val flinkSchema = RowType.of( + Array(new IntType(), new VarCharType(VarCharType.MAX_LENGTH)), + Array[String]("id", "part")) + + val deltaSchema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + assert(Conversions.FlinkToDelta.schema(flinkSchema).equivalent(deltaSchema)) + } + + test("convert struct") {} + + test("convert list") {} + + test("convert map") {} +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaCommittableSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaCommittableSuite.scala new file mode 100644 index 00000000000..2662bee4cbf --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaCommittableSuite.scala @@ -0,0 +1,31 @@ +package io.delta.flink.sink + +import scala.jdk.CollectionConverters.{CollectionHasAsScala, SeqHasAsJava} + +import io.delta.flink.TestHelper +import io.delta.kernel.defaults.internal.json.JsonUtils +import io.delta.kernel.expressions.Literal +import io.delta.kernel.types.{IntegerType, StringType, StructType} + +import org.scalatest.funsuite.AnyFunSuite + +class DeltaCommittableSuite extends AnyFunSuite with TestHelper { + + test("serialize and deserialize") { + val schema = new StructType().add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + val actions = (1 to 10) + .map(value => + dummyAddFileRow(schema, 10, partitionValues = Map("part" -> Literal.ofInt(value)))) + .toList.asJava + val origin = new DeltaCommittable("jobId", "operatorId", 100, actions) + val serde = new DeltaCommittable.Serializer + val deserialized = serde.deserialize(1, serde.serialize(origin)) + + assert(deserialized.getJobId == origin.getJobId) + assert(deserialized.getOperatorId == origin.getOperatorId) + assert(deserialized.getCheckpointId == origin.getCheckpointId) + assert(deserialized.getDeltaActions.asScala.map(JsonUtils.rowToJson) + == actions.asScala.map(JsonUtils.rowToJson)) + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaCommitterSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaCommitterSuite.scala new file mode 100644 index 00000000000..e354c8ad4f1 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaCommitterSuite.scala @@ -0,0 +1,168 @@ +package io.delta.flink.sink + +import java.net.URI +import java.nio.file.Paths + +import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava, SeqHasAsJava} + +import io.delta.flink.TestHelper +import io.delta.flink.table.HadoopTable +import io.delta.kernel.{Table, TableManager} +import io.delta.kernel.CommitRangeBuilder.CommitBoundary +import io.delta.kernel.defaults.engine.DefaultEngine +import io.delta.kernel.expressions.Literal +import io.delta.kernel.internal.ScanImpl +import io.delta.kernel.internal.actions.AddFile +import io.delta.kernel.types.{IntegerType, StringType, StructType} + +import org.apache.flink.api.connector.sink2.Committer +import org.apache.flink.api.connector.sink2.mocks.MockCommitRequest +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup +import org.apache.flink.runtime.metrics.groups.{InternalSinkCommitterMetricGroup, UnregisteredMetricGroups} +import org.apache.hadoop.conf.Configuration +import org.scalatest.funsuite.AnyFunSuite + +class DeltaCommitterSuite extends AnyFunSuite with TestHelper { + + val metricGroup = InternalSinkCommitterMetricGroup.wrap( + UnregisteredMetricGroups.createUnregisteredOperatorMetricGroup) + + test("commit with single checkpoint to an empty table") { + withTempDir { dir => + val schema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + + val table = new HadoopTable( + dir.toURI, + Map.empty[String, String].asJava, + schema, + List("part").asJava) + + val committer = new DeltaCommitter.Builder() + .withDeltaTable(table) + .withJobId("test-job") + .withMetricGroup(metricGroup) + .build() + + // By the way we direct the stream traffic, we should receive only one committable. + val commitMessages: List[Committer.CommitRequest[DeltaCommittable]] = Seq({ + val actions = (0 until 5).map { i => + dummyAddFileRow(schema, 10 + i, Map("part" -> Literal.ofString("p" + i))) + }.toList.asJava + val committable = new DeltaCommittable("test-job", "test-opr", 1000L, actions) + new MockCommitRequest(committable) + }).toList + + committer.commit(commitMessages.asJava) + + // The target table should have one version + val engine = DefaultEngine.create(new Configuration()) + val snapshot = TableManager.loadSnapshot(dir.toString).build(engine) + assert(0L == snapshot.getVersion) + val filesList = snapshot.getScanBuilder.build().asInstanceOf[ScanImpl] + .getScanFiles(engine, true).toInMemoryList + val actions = filesList.get(0).getRows + .toInMemoryList.asScala.map(row => new AddFile(row.getStruct(0))) + + // There should be 5 files to scan + assert(5 == actions.size) + assert(Set("p0", "p1", "p2", "p3", "p4") == + actions.map(_.getPartitionValues.getValues.getString(0)).toSet) + assert(60 == actions.map(_.getNumRecords.get.longValue()).sum) + } + } + + test("commit with multiple checkpoints") { + withTempDir { dir => + val schema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + + val table = new HadoopTable( + dir.toPath.toUri, + Map.empty[String, String].asJava, + schema, + List("part").asJava) + + val committer = new DeltaCommitter.Builder() + .withDeltaTable(table) + .withJobId("test-job") + .withMetricGroup(metricGroup) + .build() + + // Three checkpoints, each contains 5 add files + val commitMessages: List[Committer.CommitRequest[DeltaCommittable]] = + (0 until 3).map { i => + val actions = (0 until 5).map { j => + dummyAddFileRow(schema, j + 10, Map("part" -> Literal.ofString("p" + j))) + }.toList.asJava + val committable = new DeltaCommittable("test-job", "test-opr", i, actions) + new MockCommitRequest(committable) + }.toList + + committer.commit(commitMessages.asJava) + + // The target table should have 3 version + val engine = DefaultEngine.create(new Configuration()) + val snapshot = TableManager.loadSnapshot(dir.toString).build(engine) + assert(2L == snapshot.getVersion) + + for (version <- 0 to 2) { + val filesList = TableManager.loadSnapshot(dir.toString).atVersion(version) + .build(engine) + .getScanBuilder + .build() + .asInstanceOf[ScanImpl] + .getScanFiles(engine, true).toInMemoryList + val actions = filesList.get(0).getRows + .toInMemoryList.asScala.map(row => new AddFile(row.getStruct(0))) + + // There should be 5 files to scan + assert(5 == actions.size) + assert(Set("p0", "p1", "p2", "p3", "p4") == + actions.map(_.getPartitionValues.getValues.getString(0)).toSet) + assert(60 == actions.map(_.getNumRecords.get.longValue()).sum) + } + } + } + + test("commit to an existing table with different schema will fail") { + withTempDir { dir => + val engine = DefaultEngine.create(new Configuration()) + val schema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + val anotherSchema = new StructType() + .add("v1", StringType.STRING) + .add("v2", StringType.STRING) + + val table = new HadoopTable( + dir.toURI, + Map.empty[String, String].asJava, + schema, + List("part").asJava) + + createNonEmptyTable(engine, dir.getAbsolutePath, anotherSchema, Seq("v1")) + + val committer = new DeltaCommitter.Builder() + .withDeltaTable(table) + .withJobId("test-job") + .withMetricGroup(metricGroup) + .build() + + val commitMessages: List[Committer.CommitRequest[DeltaCommittable]] = Seq({ + val actions = (0 until 5).map { i => + dummyAddFileRow(schema, i + 10, Map("part" -> Literal.ofString("p" + i))) + }.toList.asJava + val committable = new DeltaCommittable("test-job", "test-opr", 1000L, actions) + new MockCommitRequest(committable) + }).toList + + val e = intercept[IllegalArgumentException] { + committer.commit(commitMessages.asJava) + } + assert(e.getMessage.contains("DeltaSink does not support schema evolution.")) + } + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSchemaDigestSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSchemaDigestSuite.scala new file mode 100644 index 00000000000..3ba031c731c --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSchemaDigestSuite.scala @@ -0,0 +1,28 @@ +package io.delta.flink.sink + +import io.delta.kernel.types.{IntegerType, StringType, StructType} + +import org.scalatest.funsuite.AnyFunSuite + +class DeltaSchemaDigestSuite extends AnyFunSuite { + + test("sha256") { + val schema1 = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + val schema2 = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + val schema3 = new StructType() + .add("id1", IntegerType.INTEGER) + .add("part", StringType.STRING) + val schema4 = new StructType() + .add("id", IntegerType.INTEGER, false) + .add("part", StringType.STRING) + + assert(new DeltaSchemaDigest(schema1).sha256() == new DeltaSchemaDigest(schema2).sha256()) + assert(new DeltaSchemaDigest(schema1).sha256() != new DeltaSchemaDigest(schema3).sha256()) + assert(new DeltaSchemaDigest(schema1).sha256() != new DeltaSchemaDigest(schema4).sha256()) + } + +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSinkSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSinkSuite.scala new file mode 100644 index 00000000000..ab7a5f2cd76 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSinkSuite.scala @@ -0,0 +1,123 @@ +package io.delta.flink.sink + +import java.util + +import scala.jdk.CollectionConverters.{CollectionHasAsScala, IteratorHasAsScala, SeqHasAsJava} + +import io.delta.flink.TestHelper +import io.delta.kernel.Table +import io.delta.kernel.defaults.engine.DefaultEngine +import io.delta.kernel.internal.ScanImpl +import io.delta.kernel.internal.actions.AddFile + +import org.apache.flink.api.common.functions.MapFunction +import org.apache.flink.api.common.typeinfo.{TypeInformation, Types} +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api.DataTypes +import org.apache.flink.table.data.{GenericRowData, RowData, StringData} +import org.apache.flink.table.types.logical.{IntType, RowType, VarCharType} +import org.apache.flink.util.InstantiationUtil +import org.apache.hadoop.conf.Configuration +import org.scalatest.funsuite.AnyFunSuite + +class DeltaSinkSuite extends AnyFunSuite with TestHelper { + + test("mini e2e test to empty table") { + withTempDir { dir => + val tablePath = dir.getPath + val flinkSchema = RowType.of( + Array(new IntType(), new VarCharType(VarCharType.MAX_LENGTH)), + Array[String]("id", "part")) + + val deltaSink = new DeltaSink.Builder() + .withTablePath(tablePath) + .withFlinkSchema(flinkSchema) + .withPartitionColNames(Seq("part").asJava) + .build() + + val env = StreamExecutionEnvironment.getExecutionEnvironment + env.setParallelism(5) + env.enableCheckpointing(100) + + // Use String to make the StreamSource serializable + val dataList: util.List[String] = (0 until 100000).map[String] { idx => + s"$idx,p${idx % 10}" + }.toList.asJava + + val dataType = DataTypes.of(flinkSchema); + val typeInfo: TypeInformation[RowData] = TypeInformation.of(dataType.getConversionClass) + .asInstanceOf[TypeInformation[RowData]] + val input: DataStream[RowData] = env + .addSource( + // This DataSource will prevent env from turning off before checkpoints containing data + // is processed + new DelayFinishTestSource[String](dataList, 2), + Types.STRING).map[RowData]( + new MapFunction[String, RowData]() { + override def map(value: String): RowData = { + val parts = value.split(",") + GenericRowData.of(Integer.valueOf(parts(0)), StringData.fromString(parts(1))) + } + }).returns(typeInfo) + + input.sinkTo(deltaSink).uid("deltaSink") + + env.execute("DeltaSink integration test") + + // Read the table to make sure the data is correct. + val engine = DefaultEngine.create(new Configuration()) + val table = Table.forPath(engine, tablePath) + val scan = table.getLatestSnapshot(engine).getScanBuilder.build() + // AddFiles + val results = scan.asInstanceOf[ScanImpl] + .getScanFiles(engine, true) + .asScala.flatMap { file => + file.getData.getRows.toInMemoryList.asScala + .filter { _.getStruct(0) != null } + .map { row => new AddFile(row.getStruct(0)) } + }.toList + + val partitions = results.map(_.getPartitionValues.getValues.getString(0)).toSet + assert((0 until 10).map { i => "p" + i }.toSet == partitions) + assert(100000 == results.map(_.getNumRecords.get().longValue()).sum) + } + } + + test("create writer and committer") { + withTempDir { dir => + val tablePath = dir.getPath + val flinkSchema = RowType.of( + Array(new IntType(), new VarCharType(VarCharType.MAX_LENGTH)), + Array[String]("id", "part")) + + val deltaSink = new DeltaSink.Builder() + .withTablePath(tablePath) + .withFlinkSchema(flinkSchema) + .withPartitionColNames(Seq("part").asJava) + .build() + + val writer = deltaSink.createWriter(new TestWriterInitContext(1, 1, 1)) + val committer = deltaSink.createCommitter(new TestCommitterInitContext(1, 1, 1)) + } + } + + test("sink is serializable") { + withTempDir { dir => + val tablePath = dir.getPath + val flinkSchema = RowType.of( + Array(new IntType(), new VarCharType(VarCharType.MAX_LENGTH)), + Array[String]("id", "part")) + + val deltaSink = new DeltaSink.Builder() + .withTablePath(tablePath) + .withFlinkSchema(flinkSchema) + .withPartitionColNames(Seq("part").asJava) + .build() + val serialized: Array[Byte] = InstantiationUtil.serializeObject(deltaSink) + val copy = InstantiationUtil.deserializeObject(serialized, getClass.getClassLoader) + .asInstanceOf[DeltaSink] + assert(copy != null) + } + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSinkWriterSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSinkWriterSuite.scala new file mode 100644 index 00000000000..a694b9f17bc --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaSinkWriterSuite.scala @@ -0,0 +1,89 @@ +package io.delta.flink.sink + +import java.net.URI + +import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava, SeqHasAsJava} + +import io.delta.flink.TestHelper +import io.delta.flink.table.HadoopTable +import io.delta.kernel.defaults.engine.DefaultEngine +import io.delta.kernel.types.{IntegerType, StringType, StructType} + +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup +import org.apache.flink.table.data.{GenericRowData, StringData} +import org.apache.hadoop.conf.Configuration +import org.scalatest.funsuite.AnyFunSuite + +class DeltaSinkWriterSuite extends AnyFunSuite with TestHelper { + + test("write to empty table using multiple partitions") { + withTempDir { dir => + val tablePath = dir.getAbsolutePath + val schema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + + val table = new HadoopTable( + URI.create(tablePath), + Map.empty[String, String].asJava, + schema, + List("part").asJava) + + val sinkWriter = new DeltaSinkWriter.Builder() + .withJobId("test-job") + .withSubtaskId(0) + .withAttemptNumber(1) + .withDeltaTable(table) + .withMetricGroup(UnregisteredMetricsGroup.createSinkWriterMetricGroup()) + .build() + + for (i <- 0 until 20) { + sinkWriter.write(GenericRowData.of(i, StringData.fromString("p" + (i % 3))), null) + } + val results = sinkWriter.prepareCommit() + // Three partitions + assert(3 == results.size()) + // Each partition has one action + results.asScala.foreach { result => + assert(1 == result.getDeltaActions.size) + } + } + } + + test("write to existing table using multiple partitions") { + withTempDir { dir => + val tablePath = dir.getAbsolutePath + val engine = DefaultEngine.create(new Configuration()) + val schema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + + // Create a non-empty table + createNonEmptyTable(engine, tablePath, schema, Seq("part")) + val table = new HadoopTable( + URI.create(tablePath), + Map.empty[String, String].asJava, + schema, + List("part").asJava) + + val sinkWriter = new DeltaSinkWriter.Builder() + .withDeltaTable(table) + .withJobId("test-job") + .withSubtaskId(0) + .withAttemptNumber(1) + .withMetricGroup(UnregisteredMetricsGroup.createSinkWriterMetricGroup()) + .build() + + for (i <- 0 until 20) { + sinkWriter.write(GenericRowData.of(i, StringData.fromString("p" + (i % 3))), null) + } + val results = sinkWriter.prepareCommit() + // Three partitions + assert(3 == results.size()) + // Each partition has one action + results.asScala.foreach { result => + assert(1 == result.getDeltaActions.size) + } + } + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaWriterResultSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaWriterResultSuite.scala new file mode 100644 index 00000000000..6818c37fc50 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaWriterResultSuite.scala @@ -0,0 +1,28 @@ +package io.delta.flink.sink + +import scala.jdk.CollectionConverters.{ListHasAsScala, SeqHasAsJava} + +import io.delta.flink.TestHelper +import io.delta.kernel.defaults.internal.json.JsonUtils +import io.delta.kernel.expressions.Literal +import io.delta.kernel.types.{IntegerType, StringType, StructType} + +import org.scalatest.funsuite.AnyFunSuite + +class DeltaWriterResultSuite extends AnyFunSuite with TestHelper { + + test("serialize and deserialize") { + val schema = new StructType().add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + val actions = (1 to 10) + .map(value => + dummyAddFileRow(schema, 10, partitionValues = Map("part" -> Literal.ofInt(value)))) + .toList.asJava + val origin = new DeltaWriterResult(actions) + val serde = new DeltaWriterResult.Serializer + val deserialized = serde.deserialize(1, serde.serialize(origin)) + + assert(deserialized.getDeltaActions.asScala.map(JsonUtils.rowToJson) + == actions.asScala.map(JsonUtils.rowToJson)) + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaWriterTaskSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaWriterTaskSuite.scala new file mode 100644 index 00000000000..503f892e773 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/DeltaWriterTaskSuite.scala @@ -0,0 +1,124 @@ +package io.delta.flink.sink + +import java.net.URI +import java.nio.file.Files + +import scala.jdk.CollectionConverters.{MapHasAsJava, SeqHasAsJava} + +import io.delta.flink.TestHelper +import io.delta.flink.table.HadoopTable +import io.delta.kernel.{Operation, Table} +import io.delta.kernel.defaults.engine.DefaultEngine +import io.delta.kernel.expressions.Literal +import io.delta.kernel.internal.actions.{AddFile, SingleAction} +import io.delta.kernel.types.{IntegerType, StringType, StructType} + +import org.apache.flink.table.data.{GenericRowData, StringData} +import org.apache.hadoop.conf.Configuration +import org.scalatest.funsuite.AnyFunSuite + +class DeltaWriterTaskSuite extends AnyFunSuite with TestHelper { + + test("write to empty table") { + withTempDir { dir => + val tablePath = dir.getAbsolutePath + val schema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + + val table = new HadoopTable( + URI.create(tablePath), + Map.empty[String, String].asJava, + schema, + List("part").asJava) + val partitionValues = Map("part" -> Literal.ofString("p0")).asJava + + val writerTask = new DeltaWriterTask( + /* jobId= */ "test-job-id", + /* subtaskId= */ 2, + /* attemptNumber= */ 0, + /* table = */ table, + /* partitionValues= */ partitionValues) + + for (i <- 0 until 10) { + writerTask.write(GenericRowData.of(i, StringData.fromString("p0")), null) + } + val results = writerTask.complete() + + assert(1 == results.size()) + results.forEach(result => { + assert(1 == result.getDeltaActions.size()) + val action = result.getDeltaActions.get(0) + val addFile = new AddFile(action.getStruct(SingleAction.ADD_FILE_ORDINAL)) + assert(addFile.getPath.contains("test-job-id-2-0")) + // Stats are present + assert(10 == addFile.getNumRecords.get()) + val fullPath = dir.toPath.resolve(addFile.getPath).toAbsolutePath + assert(Files.exists(fullPath)) + + val partitionMap = addFile.getPartitionValues + assert(1 == partitionMap.getSize) + assert("part" == partitionMap.getKeys.getString(0)) + assert("p0" == partitionMap.getValues.getString(0)) + + // check the Parquet file content + val rows = readParquet(fullPath, schema) + assert(10 == rows.size) + rows.zipWithIndex.iterator.foreach { case (row, idx) => + assert(idx == row.getInt(0)) + } + }) + } + } + + test("write to existing table") { + withTempDir { dir => + val tablePath = dir.getAbsolutePath + val schema = new StructType() + .add("id", IntegerType.INTEGER) + .add("part", StringType.STRING) + + val table = new HadoopTable( + URI.create(tablePath), + Map.empty[String, String].asJava, + schema, + List("part").asJava) + val partitionValues = Map("part" -> Literal.ofString("p0")).asJava + + val writerTask = new DeltaWriterTask( + /* jobId= */ "test-job-id", + /* subtaskId= */ 2, + /* attemptNumber= */ 0, + /* table = */ table, + /* partitionValues= */ partitionValues) + + for (i <- 0 until 10) { + writerTask.write(GenericRowData.of(i, StringData.fromString("p0")), null) + } + val results = writerTask.complete() + + assert(1 == results.size()) + results.forEach(result => { + assert(1 == result.getDeltaActions.size()) + val action = result.getDeltaActions.get(0) + val addFile = new AddFile(action.getStruct(SingleAction.ADD_FILE_ORDINAL)) + assert(addFile.getPath.contains("test-job-id-2-0")) + assert(10 == addFile.getNumRecords.get()) + val fullPath = dir.toPath.resolve(addFile.getPath).toAbsolutePath + assert(Files.exists(fullPath)) + + val partitionMap = addFile.getPartitionValues + assert(1 == partitionMap.getSize) + assert("part" == partitionMap.getKeys.getString(0)) + assert("p0" == partitionMap.getValues.getString(0)) + + // check the Parquet file content + val rows = readParquet(fullPath, schema) + assert(10 == rows.size) + rows.zipWithIndex.iterator.foreach { case (row, idx) => + assert(idx == row.getInt(0)) + } + }) + } + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/dynamic/DeltaDynamicTableSinkSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/dynamic/DeltaDynamicTableSinkSuite.scala new file mode 100644 index 00000000000..611d33e12d4 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/sink/dynamic/DeltaDynamicTableSinkSuite.scala @@ -0,0 +1,73 @@ +package io.delta.flink.sink.dynamic + +import scala.jdk.CollectionConverters.MapHasAsJava + +import io.delta.flink.TestHelper + +import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Schema, TableEnvironment} +import org.apache.flink.table.catalog.{CatalogTable, ResolvedCatalogTable, ResolvedSchema} +import org.scalatest.funsuite.AnyFunSuite + +class DeltaDynamicTableSinkSuite extends AnyFunSuite with TestHelper { + + test("load table") { + withTempDir { dir => + val options = Map( + "connector" -> "delta-connector", + "table_path" -> s"${dir.getPath}") + + val table = CatalogTable.of( + Schema.newBuilder + .column("id", DataTypes.BIGINT) + .column("dt", DataTypes.STRING).build, + "test table", + java.util.List.of, + options.asJava) + + val resolvedTable = new ResolvedCatalogTable( + table, + ResolvedSchema.physical( + Array("id", "dt"), + Array(DataTypes.BIGINT, DataTypes.STRING))) + + val context = new TestDynamicTableSinkContext(resolvedTable); + + val factory = new DeltaDynamicTableSinkFactory + val sink = factory.createDynamicTableSink(context) + + assert(sink.isInstanceOf[DeltaDynamicTableSink]) + } + } + + test("use sql to load table") { + withTempDir { dir => + val settings = EnvironmentSettings.newInstance.inStreamingMode.build + + val tEnv = TableEnvironment.create(settings) + + tEnv.executeSql( + """ + CREATE TEMPORARY TABLE src ( + id BIGINT, + dt STRING + ) WITH ( + 'connector' = 'values', + 'bounded' = 'true', + 'data-id' = 'my_test' + )""".stripMargin) + + tEnv.executeSql( + s""" + CREATE TEMPORARY TABLE sink ( + id BIGINT, + dt STRING + ) WITH ( + 'connector' = 'delta-connector', + 'table_path' = '${dir.getPath}' + ) + """.stripMargin) + + tEnv.executeSql("INSERT INTO sink SELECT id, dt FROM src").await(); + } + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/table/AbstractKernelTableSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/table/AbstractKernelTableSuite.scala new file mode 100644 index 00000000000..9e24a116aae --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/table/AbstractKernelTableSuite.scala @@ -0,0 +1,14 @@ +package io.delta.flink.table + +import java.net.URI + +import org.scalatest.funsuite.AnyFunSuite + +class AbstractKernelTableSuite extends AnyFunSuite { + + test("normalize") { + assert(AbstractKernelTable.normalize(URI.create("file:/var")).toString == "file:///var/") + assert(AbstractKernelTable.normalize(URI.create("file:///var")).toString == "file:///var/") + assert(AbstractKernelTable.normalize(URI.create("s3://host/var")).toString == "s3://host/var/") + } +} diff --git a/connectors/flink/v1.20/src/test/scala/io/delta/flink/table/CCv2TableSuite.scala b/connectors/flink/v1.20/src/test/scala/io/delta/flink/table/CCv2TableSuite.scala new file mode 100644 index 00000000000..1929c4ca764 --- /dev/null +++ b/connectors/flink/v1.20/src/test/scala/io/delta/flink/table/CCv2TableSuite.scala @@ -0,0 +1,79 @@ +package io.delta.flink.table + +import java.net.URI +import java.util.{Collections, Optional} + +import scala.jdk.CollectionConverters.{IterableHasAsJava, MapHasAsJava} + +import io.delta.flink.TestHelper +import io.delta.kernel.Transaction +import io.delta.kernel.TransactionSuite.longVector +import io.delta.kernel.data.{ColumnVector, FilteredColumnarBatch} +import io.delta.kernel.defaults.internal.data.DefaultColumnarBatch +import io.delta.kernel.expressions.Literal +import io.delta.kernel.internal.util.Utils +import io.delta.kernel.internal.util.Utils.toCloseableIterator +import io.delta.kernel.types.{DataType, IntegerType, StructType} +import io.delta.kernel.utils.CloseableIterable + +import org.scalatest.funsuite.AnyFunSuite + +class CCv2TableSuite extends AnyFunSuite with TestHelper { + + val CATALOG_ENDPOINT = "https://e2-dogfood.staging.cloud.databricks.com/" + val CATALOG_TOKEN = "" + + test("load table from e2dogfood") { + val table = new CCv2Table( + new RESTCatalog(CATALOG_ENDPOINT, CATALOG_TOKEN), + "main.hao.testccv2", + Map( + CCv2Table.CATALOG_ENDPOINT -> CATALOG_ENDPOINT, + CCv2Table.CATALOG_TOKEN -> CATALOG_TOKEN).asJava) + + assert(table.getId == "main.hao.testccv2") + assert(table.getTablePath == URI.create("s3://us-west-2-extstaging-managed-" + + "catalog-test-bucket-1/" + + "19a85dee-54bc-43a2-87ab-023d0ec16013/tables/b7c3e881-4f7f-40f2-88c1-dff715835a81/")) + assert(table.getSchema.equivalent(new StructType().add("id", IntegerType.INTEGER))) + } + + test("commit data to e2dogfood") { + val table = new CCv2Table( + new RESTCatalog(CATALOG_ENDPOINT, CATALOG_TOKEN), + "main.hao.testccv2", + Map( + CCv2Table.CATALOG_ENDPOINT -> CATALOG_ENDPOINT, + CCv2Table.CATALOG_TOKEN -> CATALOG_TOKEN).asJava) + + val values = (0 until 100) + val colVector = new ColumnVector() { + override def getDataType: DataType = IntegerType.INTEGER + override def getSize: Int = values.length + override def close(): Unit = {} + override def isNullAt(rowId: Int): Boolean = values(rowId) == null + override def getInt(rowId: Int): Int = values(rowId) + } + + val columnarBatchData = + new DefaultColumnarBatch(values.size, table.getSchema, Array(colVector)) + val filteredColumnarBatchData = new FilteredColumnarBatch(columnarBatchData, Optional.empty()) + val partitionValues = Collections.emptyMap[String, Literal]() + + val data = toCloseableIterator(Seq(filteredColumnarBatchData).asJava.iterator()) + val rows = table.writeParquet("abc", data, partitionValues) + + table.commit(CloseableIterable.inMemoryIterable(rows)) + } + + test("serializablity") { + val table = new CCv2Table( + new RESTCatalog(CATALOG_ENDPOINT, CATALOG_TOKEN), + "main.hao.testccv2", + Map( + CCv2Table.CATALOG_ENDPOINT -> CATALOG_ENDPOINT, + CCv2Table.CATALOG_TOKEN -> CATALOG_TOKEN).asJava) + + checkSerializability(table) + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/TransactionStateRow.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/TransactionStateRow.java index 50a02dd17ed..83f4a0e9d6d 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/TransactionStateRow.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/TransactionStateRow.java @@ -32,7 +32,7 @@ import java.util.stream.IntStream; public class TransactionStateRow extends GenericRow { - private static final StructType SCHEMA = + public static final StructType SCHEMA = new StructType() .add("logicalSchemaString", StringType.STRING) .add("physicalSchemaString", StringType.STRING) diff --git a/kernel/kernel-api/src/test/scala/io/delta/kernel/test/VectorTestUtils.scala b/kernel/kernel-api/src/test/scala/io/delta/kernel/test/VectorTestUtils.scala index 93b574c935a..c98183141c9 100644 --- a/kernel/kernel-api/src/test/scala/io/delta/kernel/test/VectorTestUtils.scala +++ b/kernel/kernel-api/src/test/scala/io/delta/kernel/test/VectorTestUtils.scala @@ -110,7 +110,7 @@ trait VectorTestUtils { } } - protected def intVector(values: Seq[IntegerJ]): ColumnVector = { + def intVector(values: Seq[IntegerJ]): ColumnVector = { new ColumnVector { override def getDataType: DataType = IntegerType.INTEGER