|
17 | 17 |
|
18 | 18 | import io.delta.kernel.CommitActions; |
19 | 19 | import io.delta.kernel.CommitRange; |
| 20 | +import io.delta.kernel.Scan; |
20 | 21 | import io.delta.kernel.Snapshot; |
21 | 22 | import io.delta.kernel.data.ColumnarBatch; |
| 23 | +import io.delta.kernel.data.FilteredColumnarBatch; |
| 24 | +import io.delta.kernel.data.Row; |
22 | 25 | import io.delta.kernel.defaults.engine.DefaultEngine; |
23 | 26 | import io.delta.kernel.engine.Engine; |
24 | 27 | import io.delta.kernel.exceptions.UnsupportedTableFeatureException; |
|
35 | 38 | import java.io.IOException; |
36 | 39 | import java.time.ZoneId; |
37 | 40 | import java.util.*; |
| 41 | +import java.util.Comparator; |
38 | 42 | import org.apache.hadoop.conf.Configuration; |
39 | 43 | import org.apache.spark.sql.SparkSession; |
40 | 44 | import org.apache.spark.sql.connector.read.InputPartition; |
@@ -154,9 +158,10 @@ public Offset initialOffset() { |
154 | 158 | version = startingVersionOpt.get(); |
155 | 159 | isInitialSnapshot = false; |
156 | 160 | } else { |
157 | | - // TODO(#5318): Support initial snapshot case (isInitialSnapshot == true) |
158 | | - throw new UnsupportedOperationException( |
159 | | - "initialOffset with initial snapshot is not supported yet"); |
| 161 | + // No starting version - create initial snapshot at latest version |
| 162 | + Snapshot latestSnapshot = snapshotManager.loadLatestSnapshot(); |
| 163 | + version = latestSnapshot.getVersion(); |
| 164 | + isInitialSnapshot = true; |
160 | 165 | } |
161 | 166 |
|
162 | 167 | return DeltaSourceOffset.apply( |
@@ -468,8 +473,16 @@ CloseableIterator<IndexedFile> getFileChanges( |
468 | 473 | CloseableIterator<IndexedFile> result; |
469 | 474 |
|
470 | 475 | if (isInitialSnapshot) { |
471 | | - // TODO(#5318): Implement initial snapshot |
472 | | - throw new UnsupportedOperationException("initial snapshot is not supported yet"); |
| 476 | + CloseableIterator<IndexedFile> snapshotFiles = getSnapshotFiles(fromVersion); |
| 477 | + long latestVersion = snapshotAtSourceInit.getVersion(); |
| 478 | + if (latestVersion > fromVersion) { |
| 479 | + // Start reading delta logs from fromVersion + 1 to avoid duplicating snapshot files |
| 480 | + CloseableIterator<IndexedFile> deltaChanges = filterDeltaLogs(fromVersion + 1, endOffset); |
| 481 | + // Lazily combine snapshot files and delta changes |
| 482 | + result = snapshotFiles.combine(deltaChanges); |
| 483 | + } else { |
| 484 | + result = snapshotFiles; |
| 485 | + } |
473 | 486 | } else { |
474 | 487 | result = filterDeltaLogs(fromVersion, endOffset); |
475 | 488 | } |
@@ -651,4 +664,66 @@ private long extractIndexedFilesFromBatch( |
651 | 664 |
|
652 | 665 | return index; |
653 | 666 | } |
| 667 | + |
| 668 | + /** |
| 669 | + * Get all files from a snapshot at the specified version, sorted by modificationTime and path, |
| 670 | + * with indices assigned sequentially, and wrapped with BEGIN/END sentinels. |
| 671 | + * |
| 672 | + * <p>Mimics DeltaSourceSnapshot in DSv1. |
| 673 | + * |
| 674 | + * @param version The snapshot version to read |
| 675 | + * @return An iterator of IndexedFile representing the snapshot files |
| 676 | + */ |
| 677 | + private CloseableIterator<IndexedFile> getSnapshotFiles(long version) { |
| 678 | + // Load snapshot at the specified version |
| 679 | + Snapshot snapshot = snapshotManager.loadSnapshotAt(version); |
| 680 | + |
| 681 | + // Build scan to get all files |
| 682 | + Scan scan = snapshot.getScanBuilder().build(); |
| 683 | + |
| 684 | + // Collect all AddFile actions |
| 685 | + List<AddFile> addFiles = new ArrayList<>(); |
| 686 | + try (CloseableIterator<FilteredColumnarBatch> filesIter = scan.getScanFiles(engine)) { |
| 687 | + while (filesIter.hasNext()) { |
| 688 | + FilteredColumnarBatch batch = filesIter.next(); |
| 689 | + try (CloseableIterator<Row> rowIter = batch.getRows()) { |
| 690 | + while (rowIter.hasNext()) { |
| 691 | + Row fileRow = rowIter.next(); |
| 692 | + // Extract AddFile from the "add" column (index 0) |
| 693 | + if (!fileRow.isNullAt(0)) { |
| 694 | + Row addFileRow = fileRow.getStruct(0); |
| 695 | + AddFile addFile = new AddFile(addFileRow); |
| 696 | + // Only include files with dataChange=true |
| 697 | + if (addFile.getDataChange()) { |
| 698 | + addFiles.add(addFile); |
| 699 | + } |
| 700 | + } |
| 701 | + } |
| 702 | + } |
| 703 | + } |
| 704 | + } catch (IOException e) { |
| 705 | + throw new RuntimeException( |
| 706 | + String.format("Failed to read snapshot files at version %d", version), e); |
| 707 | + } |
| 708 | + |
| 709 | + // CRITICAL: Sort by modificationTime, then path for deterministic ordering |
| 710 | + addFiles.sort( |
| 711 | + Comparator.comparing(AddFile::getModificationTime).thenComparing(AddFile::getPath)); |
| 712 | + |
| 713 | + // Build IndexedFile list with sentinels |
| 714 | + List<IndexedFile> indexedFiles = new ArrayList<>(); |
| 715 | + |
| 716 | + // Add BEGIN sentinel |
| 717 | + indexedFiles.add(new IndexedFile(version, DeltaSourceOffset.BASE_INDEX(), null)); |
| 718 | + |
| 719 | + // Add data files with sequential indices starting from 0 |
| 720 | + for (int i = 0; i < addFiles.size(); i++) { |
| 721 | + indexedFiles.add(new IndexedFile(version, i, addFiles.get(i))); |
| 722 | + } |
| 723 | + |
| 724 | + // Add END sentinel |
| 725 | + indexedFiles.add(new IndexedFile(version, DeltaSourceOffset.END_INDEX(), null)); |
| 726 | + |
| 727 | + return Utils.toCloseableIterator(indexedFiles.iterator()); |
| 728 | + } |
654 | 729 | } |
0 commit comments