Skip to content

Commit b63db28

Browse files
committed
Output metrics about remove_orphan_files execution
alter table lineitem execute remove_orphan_files(retention_threshold => '0d'); metric_name | metric_value ----------------------------+-------------- processed_manifests_count | 2 valid_files_count | 98 scanned_files_count | 97 deleted_files_count | 0
1 parent 1ee97a9 commit b63db28

File tree

4 files changed

+57
-10
lines changed

4 files changed

+57
-10
lines changed

docs/src/main/sphinx/connector/iceberg.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,12 +907,39 @@ time is recommended to keep size of a table's data directory under control.
907907
ALTER TABLE test_table EXECUTE remove_orphan_files(retention_threshold => '7d');
908908
```
909909

910+
```text
911+
metric_name | metric_value
912+
----------------------------+--------------
913+
processed_manifests_count | 2
914+
valid_files_count | 98
915+
scanned_files_count | 97
916+
deleted_files_count | 0
917+
```
918+
910919
The value for `retention_threshold` must be higher than or equal to
911920
`iceberg.remove-orphan-files.min-retention` in the catalog otherwise the
912921
procedure fails with a similar message: `Retention specified (1.00d) is shorter
913922
than the minimum retention configured in the system (7.00d)`. The default value
914923
for this property is `7d`.
915924

925+
The output of the query has the following metrics:
926+
927+
:::{list-table} Output
928+
:widths: 40, 60
929+
:header-rows: 1
930+
931+
* - Property name
932+
- Description
933+
* - `processed_manifests_count`
934+
- The count of manifest files read by remove_orphan_files.
935+
* - `valid_files_count`
936+
- The count of valid files found in the manifest files.
937+
* - `scanned_files_count`
938+
- The count of files scanned from the file system.
939+
* - `deleted_files_count`
940+
- The count of files deleted by remove_orphan_files.
941+
:::
942+
916943
(drop-extended-stats)=
917944
##### drop_extended_stats
918945

plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1989,8 +1989,7 @@ public Map<String, Long> executeTableExecute(ConnectorSession session, Connector
19891989
executeExpireSnapshots(session, executeHandle);
19901990
return ImmutableMap.of();
19911991
case REMOVE_ORPHAN_FILES:
1992-
executeRemoveOrphanFiles(session, executeHandle);
1993-
return ImmutableMap.of();
1992+
return executeRemoveOrphanFiles(session, executeHandle);
19941993
case ADD_FILES:
19951994
executeAddFiles(session, executeHandle);
19961995
return ImmutableMap.of();
@@ -2118,7 +2117,7 @@ private static void validateTableExecuteParameters(
21182117
sessionMinRetentionParameterName);
21192118
}
21202119

2121-
public void executeRemoveOrphanFiles(ConnectorSession session, IcebergTableExecuteHandle executeHandle)
2120+
public Map<String, Long> executeRemoveOrphanFiles(ConnectorSession session, IcebergTableExecuteHandle executeHandle)
21222121
{
21232122
IcebergRemoveOrphanFilesHandle removeOrphanFilesHandle = (IcebergRemoveOrphanFilesHandle) executeHandle.procedureHandle();
21242123

@@ -2135,14 +2134,14 @@ public void executeRemoveOrphanFiles(ConnectorSession session, IcebergTableExecu
21352134

21362135
if (table.currentSnapshot() == null) {
21372136
log.debug("Skipping remove_orphan_files procedure for empty table %s", table);
2138-
return;
2137+
return ImmutableMap.of();
21392138
}
21402139

21412140
Instant expiration = session.getStart().minusMillis(retention.toMillis());
2142-
removeOrphanFiles(table, session, executeHandle.schemaTableName(), expiration, executeHandle.fileIoProperties());
2141+
return removeOrphanFiles(table, session, executeHandle.schemaTableName(), expiration, executeHandle.fileIoProperties());
21432142
}
21442143

2145-
private void removeOrphanFiles(Table table, ConnectorSession session, SchemaTableName schemaTableName, Instant expiration, Map<String, String> fileIoProperties)
2144+
private Map<String, Long> removeOrphanFiles(Table table, ConnectorSession session, SchemaTableName schemaTableName, Instant expiration, Map<String, String> fileIoProperties)
21462145
{
21472146
Set<String> processedManifestFilePaths = new HashSet<>();
21482147
// Similarly to issues like https://github.com/trinodb/trino/issues/13759, equivalent paths may have different String
@@ -2205,7 +2204,18 @@ private void removeOrphanFiles(Table table, ConnectorSession session, SchemaTabl
22052204
// Ensure any futures still running are canceled in case of failure
22062205
manifestScanFutures.forEach(future -> future.cancel(true));
22072206
}
2208-
scanAndDeleteInvalidFiles(table, session, schemaTableName, expiration, validFileNames, fileIoProperties);
2207+
ScanAndDeleteResult result = scanAndDeleteInvalidFiles(table, session, schemaTableName, expiration, validFileNames, fileIoProperties);
2208+
log.info("remove_orphan_files for table %s completed. Processed %d manifest files, found %d valid files, scanned %d files, deleted %d files",
2209+
schemaTableName,
2210+
processedManifestFilePaths.size(),
2211+
validFileNames.size() - 1, // excluding version-hint.text
2212+
result.scannedFilesCount(),
2213+
result.deletedFilesCount());
2214+
return ImmutableMap.of(
2215+
"processed_manifests_count", (long) processedManifestFilePaths.size(),
2216+
"valid_files_count", (long) validFileNames.size() - 1, // excluding version-hint.text
2217+
"scanned_files_count", result.scannedFilesCount(),
2218+
"deleted_files_count", result.deletedFilesCount());
22092219
}
22102220

22112221
public void executeAddFiles(ConnectorSession session, IcebergTableExecuteHandle executeHandle)
@@ -2240,17 +2250,21 @@ public void executeAddFilesFromTable(ConnectorSession session, IcebergTableExecu
22402250
icebergScanExecutor);
22412251
}
22422252

2243-
private void scanAndDeleteInvalidFiles(Table table, ConnectorSession session, SchemaTableName schemaTableName, Instant expiration, Set<String> validFiles, Map<String, String> fileIoProperties)
2253+
private ScanAndDeleteResult scanAndDeleteInvalidFiles(Table table, ConnectorSession session, SchemaTableName schemaTableName, Instant expiration, Set<String> validFiles, Map<String, String> fileIoProperties)
22442254
{
22452255
List<Future<?>> deleteFutures = new ArrayList<>();
2256+
long deletedFilesCount = 0;
2257+
long scannedFilesCount = 0;
22462258
try {
22472259
List<Location> filesToDelete = new ArrayList<>(DELETE_BATCH_SIZE);
22482260
TrinoFileSystem fileSystem = fileSystemFactory.create(session.getIdentity(), fileIoProperties);
22492261
FileIterator allFiles = fileSystem.listFiles(Location.of(table.location()));
22502262
while (allFiles.hasNext()) {
22512263
FileEntry entry = allFiles.next();
2264+
scannedFilesCount++;
22522265
if (entry.lastModified().isBefore(expiration) && !validFiles.contains(entry.location().fileName())) {
22532266
filesToDelete.add(entry.location());
2267+
deletedFilesCount++;
22542268
if (filesToDelete.size() >= DELETE_BATCH_SIZE) {
22552269
List<Location> finalFilesToDelete = filesToDelete;
22562270
deleteFutures.add(icebergFileDeleteExecutor.submit(() -> deleteFiles(finalFilesToDelete, schemaTableName, fileSystem)));
@@ -2277,8 +2291,11 @@ private void scanAndDeleteInvalidFiles(Table table, ConnectorSession session, Sc
22772291
// Ensure any futures still running are canceled in case of failure
22782292
deleteFutures.forEach(future -> future.cancel(true));
22792293
}
2294+
return new ScanAndDeleteResult(deletedFilesCount, scannedFilesCount);
22802295
}
22812296

2297+
private record ScanAndDeleteResult(long deletedFilesCount, long scannedFilesCount) {}
2298+
22822299
private void deleteFiles(List<Location> files, SchemaTableName schemaTableName, TrinoFileSystem fileSystem)
22832300
{
22842301
log.debug("Deleting files while removing orphan files for table %s [%s]", schemaTableName, files);

plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6645,7 +6645,10 @@ public void testRemoveOrphanFiles()
66456645
List<String> initialDataFiles = getAllDataFilesFromTableDirectory(tableName);
66466646
assertThat(initialDataFiles).contains(orphanFile);
66476647

6648-
assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')");
6648+
assertUpdate(
6649+
sessionWithShortRetentionUnlocked,
6650+
"ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')",
6651+
"VALUES ('processed_manifests_count', 3), ('valid_files_count', 16), ('scanned_files_count', 17), ('deleted_files_count', 1)");
66496652
assertQuery("SELECT * FROM " + tableName, "VALUES ('one', 1), ('three', 3)");
66506653

66516654
List<String> updatedDataFiles = getAllDataFilesFromTableDirectory(tableName);

testing/trino-testing/src/main/java/io/trino/testing/QueryAssertions.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ private static void assertDistributedQuery(
336336
List<MaterializedRow> actualRows = actualResults.getMaterializedRows();
337337
List<MaterializedRow> expectedRows = expectedResults.getMaterializedRows();
338338

339-
if (compareUpdate) {
339+
if (compareUpdate && !actualResults.getUpdateType().equals(Optional.of("ALTER TABLE EXECUTE"))) {
340340
if (actualResults.getUpdateType().isEmpty()) {
341341
fail("update type not present for query " + queryId + ": \n" + actual);
342342
}

0 commit comments

Comments
 (0)