Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import static java.util.Locale.ENGLISH;
import static java.util.concurrent.TimeUnit.DAYS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;

@DefunctConfig({
"iceberg.allow-legacy-snapshot-syntax",
Expand Down Expand Up @@ -95,6 +96,7 @@ public class IcebergConfig
private int planningThreads = Math.min(Runtime.getRuntime().availableProcessors(), 16);
private int fileDeleteThreads = Runtime.getRuntime().availableProcessors() * 2;
private List<String> allowedExtraProperties = ImmutableList.of();
private boolean defaultNewTablesGcEnabled = GC_ENABLED_DEFAULT;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The common approach is to restrain referencing in the config files values from dependent libraries because this may trigger a silent change in the default configuration of the connector.
Please consider using rather true instead

private boolean incrementalRefreshEnabled = true;
private boolean metadataCacheEnabled = true;
private boolean objectStoreLayoutEnabled;
Expand Down Expand Up @@ -567,6 +569,19 @@ public IcebergConfig setAllowedExtraProperties(List<String> allowedExtraProperti
return this;
}

public boolean isDefaultNewTablesGcEnabled()
{
return defaultNewTablesGcEnabled;
}

@Config("iceberg.default-new-tables-gc.enabled")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iceberg.default-new-tables.gc.enabled

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you document this property in iceberg.md?

@ConfigDescription("Default value for Iceberg property gc.enabled when creating new tables")
Copy link
Contributor

@findinpath findinpath Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add as well corresponding documentation in iceberg.md as well.
I noticed that the documentation for gc.enabled (introduced by apache/iceberg#1796 ) table property is unfortunately missing as well on apache/iceberg - As a reference point you could use apache/iceberg#9231

Property to disable garbage collection operations such as expiring snapshots or removing orphan files

It seems that in the meantime the table property has as well repercussions on whether to delete dropped table's files as well.

public IcebergConfig setDefaultNewTablesGcEnabled(boolean defaultNewTablesGcEnabled)
{
this.defaultNewTablesGcEnabled = defaultNewTablesGcEnabled;
return this;
}

public boolean isIncrementalRefreshEnabled()
{
return incrementalRefreshEnabled;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ public class IcebergMetadata
private final Executor metadataFetchingExecutor;
private final ExecutorService icebergPlanningExecutor;
private final ExecutorService icebergFileDeleteExecutor;
private final boolean defaultNewTablesGcEnabled;
private final Map<IcebergTableHandle, AtomicReference<TableStatistics>> tableStatisticsCache = new ConcurrentHashMap<>();

private Transaction transaction;
Expand All @@ -491,7 +492,8 @@ public IcebergMetadata(
ExecutorService icebergScanExecutor,
Executor metadataFetchingExecutor,
ExecutorService icebergPlanningExecutor,
ExecutorService icebergFileDeleteExecutor)
ExecutorService icebergFileDeleteExecutor,
boolean defaultNewTablesGcEnabled)
{
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null");
Expand All @@ -506,6 +508,7 @@ public IcebergMetadata(
this.metadataFetchingExecutor = requireNonNull(metadataFetchingExecutor, "metadataFetchingExecutor is null");
this.icebergPlanningExecutor = requireNonNull(icebergPlanningExecutor, "icebergPlanningExecutor is null");
this.icebergFileDeleteExecutor = requireNonNull(icebergFileDeleteExecutor, "icebergFileDeleteExecutor is null");
this.defaultNewTablesGcEnabled = defaultNewTablesGcEnabled;
}

@Override
Expand Down Expand Up @@ -1300,7 +1303,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con
tableLocation = getTableLocation(tableMetadata.getProperties())
.orElseGet(() -> catalog.defaultTableLocation(session, tableMetadata.getTable()));
}
transaction = newCreateTableTransaction(catalog, tableMetadata, session, replace, tableLocation, allowedExtraProperties);
transaction = newCreateTableTransaction(catalog, tableMetadata, session, replace, tableLocation, allowedExtraProperties, defaultNewTablesGcEnabled);
Location location = Location.of(transaction.table().location());
try {
// S3 Tables internally assigns a unique location for each table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public class IcebergMetadataFactory
private final Executor metadataFetchingExecutor;
private final ExecutorService icebergPlanningExecutor;
private final ExecutorService icebergFileDeleteExecutor;
private final boolean defaultNewTablesGcEnabled;

@Inject
public IcebergMetadataFactory(
Expand Down Expand Up @@ -87,6 +88,7 @@ public IcebergMetadataFactory(
}
this.icebergPlanningExecutor = requireNonNull(icebergPlanningExecutor, "icebergPlanningExecutor is null");
this.icebergFileDeleteExecutor = requireNonNull(icebergFileDeleteExecutor, "icebergFileDeleteExecutor is null");
this.defaultNewTablesGcEnabled = config.isDefaultNewTablesGcEnabled();
}

public IcebergMetadata create(ConnectorIdentity identity)
Expand All @@ -104,6 +106,7 @@ public IcebergMetadata create(ConnectorIdentity identity)
icebergScanExecutor,
metadataFetchingExecutor,
icebergPlanningExecutor,
icebergFileDeleteExecutor);
icebergFileDeleteExecutor,
defaultNewTablesGcEnabled);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@
import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT;
import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT_DEFAULT;
import static org.apache.iceberg.TableProperties.FORMAT_VERSION;
import static org.apache.iceberg.TableProperties.GC_ENABLED;
import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
import static org.apache.iceberg.TableProperties.OBJECT_STORE_ENABLED;
import static org.apache.iceberg.TableProperties.OBJECT_STORE_ENABLED_DEFAULT;
import static org.apache.iceberg.TableProperties.ORC_BLOOM_FILTER_COLUMNS;
Expand Down Expand Up @@ -869,7 +871,7 @@ public static List<ViewColumn> viewColumnsFromSchema(TypeManager typeManager, Sc
.toList();
}

public static Transaction newCreateTableTransaction(TrinoCatalog catalog, ConnectorTableMetadata tableMetadata, ConnectorSession session, boolean replace, String tableLocation, Predicate<String> allowedExtraProperties)
public static Transaction newCreateTableTransaction(TrinoCatalog catalog, ConnectorTableMetadata tableMetadata, ConnectorSession session, boolean replace, String tableLocation, Predicate<String> allowedExtraProperties, boolean defaultNewTablesGcEnabled)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Please separate this long list of parameters on new lines for increased readability.

{
SchemaTableName schemaTableName = tableMetadata.getTable();
Schema schema = schemaFromMetadata(tableMetadata.getColumns());
Expand All @@ -879,10 +881,10 @@ public static Transaction newCreateTableTransaction(TrinoCatalog catalog, Connec
Transaction transaction;

if (replace) {
transaction = catalog.newCreateOrReplaceTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, tableLocation, createTableProperties(tableMetadata, allowedExtraProperties));
transaction = catalog.newCreateOrReplaceTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, tableLocation, createTableProperties(tableMetadata, allowedExtraProperties, defaultNewTablesGcEnabled));
}
else {
transaction = catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, Optional.ofNullable(tableLocation), createTableProperties(tableMetadata, allowedExtraProperties));
transaction = catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, Optional.ofNullable(tableLocation), createTableProperties(tableMetadata, allowedExtraProperties, defaultNewTablesGcEnabled));
}

// If user doesn't set compression-codec for parquet, we need to remove write.parquet.compression-codec property,
Expand All @@ -897,7 +899,7 @@ public static Transaction newCreateTableTransaction(TrinoCatalog catalog, Connec
return transaction;
}

public static Map<String, String> createTableProperties(ConnectorTableMetadata tableMetadata, Predicate<String> allowedExtraProperties)
public static Map<String, String> createTableProperties(ConnectorTableMetadata tableMetadata, Predicate<String> allowedExtraProperties, boolean defaultNewTablesGcEnabled)
{
ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder();
IcebergFileFormat fileFormat = IcebergTableProperties.getFileFormat(tableMetadata.getProperties());
Expand Down Expand Up @@ -955,10 +957,15 @@ public static Map<String, String> createTableProperties(ConnectorTableMetadata t
propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
}

Map<String, String> baseProperties = propertiesBuilder.buildOrThrow();
Map<String, String> extraProperties = IcebergTableProperties.getExtraProperties(tableMetadata.getProperties()).orElseGet(ImmutableMap::of);
verifyExtraProperties(propertiesBuilder.buildOrThrow().keySet(), extraProperties, allowedExtraProperties);

// If user doesn't set gc.enabled, we need to set it to defaultNewTablesGcEnabled value
if (!extraProperties.containsKey(GC_ENABLED) && defaultNewTablesGcEnabled != GC_ENABLED_DEFAULT) {
propertiesBuilder.put(GC_ENABLED, Boolean.toString(defaultNewTablesGcEnabled));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing to note about relying on GC_ENABLED property of iceberg is that it will also disallow expire_snapshots.
Is that something desirable ?
I can imagine that some users just want protection on DROP, but with this they can't run the regular maintenance operation of expiring old snapshots

Copy link
Member Author

@sopel39 sopel39 Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess they could enable GC for expire_snapshots. Users often have side spark anyway. Unless there is another property that would just limit DROP, then I think it's reasonable to expect data admins to know how to handle this situation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a check in iceberg library org.apache.iceberg.RemoveSnapshots#RemoveSnapshots

ValidationException.check(
        PropertyUtil.propertyAsBoolean(base.properties(), GC_ENABLED, GC_ENABLED_DEFAULT),
        "Cannot expire snapshots: GC is disabled (deleting files may corrupt other tables)");

Using Spark or something else won't help with that.
I'm not sure how admin deals with this, adding and removing property before/after every expire_snapshots is cumbersome and removes the DROP protection for the duration of the command. Also, most ppl have automated maintenance services, and we wouldn't want ppl to have to go and modify that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like a breaking change to expire_snapshots?

But if other engine disable the gc flag, it's also a problem to Trino, user not able to expire snapshots due a unsupported or exposed property

}

verifyExtraProperties(baseProperties.keySet(), extraProperties, allowedExtraProperties);
Map<String, String> baseProperties = propertiesBuilder.buildOrThrow();

return ImmutableMap.<String, String>builder()
.putAll(baseProperties)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP;
import static org.apache.iceberg.TableMetadata.newTableMetadata;
import static org.apache.iceberg.TableMetadataParser.getFileExtension;
import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
import static org.apache.iceberg.TableProperties.METADATA_COMPRESSION_DEFAULT;
import static org.apache.iceberg.Transactions.createOrReplaceTableTransaction;
import static org.apache.iceberg.Transactions.createTableTransaction;
Expand Down Expand Up @@ -312,7 +313,7 @@ protected Location createMaterializedViewStorage(
Schema schema = schemaFromMetadata(columns);
PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(materializedViewProperties));
SortOrder sortOrder = parseSortFields(schema, getSortOrder(materializedViewProperties));
Map<String, String> properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, materializedViewProperties, Optional.empty()), _ -> false);
Map<String, String> properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, materializedViewProperties, Optional.empty()), _ -> false, GC_ENABLED_DEFAULT);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Map<String, String> properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, materializedViewProperties, Optional.empty()), _ -> false, GC_ENABLED_DEFAULT);
Map<String, String> properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, materializedViewProperties, Optional.empty()), _ -> false, true);


TableMetadata metadata = newTableMetadata(schema, partitionSpec, sortOrder, tableLocation, properties);

Expand Down Expand Up @@ -350,7 +351,7 @@ protected SchemaTableName createMaterializedViewStorageTable(
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(storageTable, columns, materializedViewProperties, Optional.empty());
String tableLocation = getTableLocation(tableMetadata.getProperties())
.orElseGet(() -> defaultTableLocation(session, tableMetadata.getTable()));
Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false, tableLocation, _ -> false);
Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false, tableLocation, _ -> false, GC_ENABLED_DEFAULT);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false, tableLocation, _ -> false, GC_ENABLED_DEFAULT);
Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false, tableLocation, _ -> false, true);

AppendFiles appendFiles = transaction.newAppend();
commit(appendFiles, session);
transaction.commitTransaction();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@
import static java.util.Objects.requireNonNull;
import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP;
import static org.apache.iceberg.CatalogUtil.dropTableData;
import static org.apache.iceberg.TableProperties.GC_ENABLED;
import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
import static org.apache.iceberg.util.PropertyUtil.propertyAsBoolean;

public class TrinoGlueCatalog
extends AbstractTrinoCatalog
Expand Down Expand Up @@ -707,7 +710,9 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName)
// So log the exception and continue with deleting the table location
LOG.warn(e, "Failed to delete table data referenced by metadata");
}
deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location());
if (propertyAsBoolean(table.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pls replace GC_ENABLED_DEFAULT with true - we don't have control on the value of GC_ENABLED_DEFAULT when the iceberg library gets updated.

deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location());
}
invalidateTableCache(schemaTableName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@
import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP;
import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP;
import static org.apache.iceberg.CatalogUtil.dropTableData;
import static org.apache.iceberg.TableProperties.GC_ENABLED;
import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
import static org.apache.iceberg.util.PropertyUtil.propertyAsBoolean;

public class TrinoHiveCatalog
extends AbstractTrinoCatalog
Expand Down Expand Up @@ -436,7 +439,9 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName)
// So log the exception and continue with deleting the table location
log.warn(e, "Failed to delete table data referenced by metadata");
}
deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, metastoreTable.getStorage().getLocation());
if (propertyAsBoolean(metadata.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (propertyAsBoolean(metadata.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) {
if (propertyAsBoolean(metadata.properties(), GC_ENABLED, true)) {

deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, metastoreTable.getStorage().getLocation());
}
invalidateTableCache(schemaTableName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static org.apache.iceberg.CatalogUtil.dropTableData;
import static org.apache.iceberg.TableProperties.GC_ENABLED;
import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT;
import static org.apache.iceberg.util.PropertyUtil.propertyAsBoolean;
import static org.apache.iceberg.view.ViewProperties.COMMENT;

public class TrinoJdbcCatalog
Expand Down Expand Up @@ -348,7 +351,9 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName)
// So log the exception and continue with deleting the table location
LOG.warn(e, "Failed to delete table data referenced by metadata");
}
deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location());
if (propertyAsBoolean(table.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (propertyAsBoolean(table.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) {
if (propertyAsBoolean(table.properties(), GC_ENABLED, true)) {

deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location());
}
invalidateTableCache(schemaTableName);
}

Expand Down
Loading