-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Add iceberg.default-new-tables-gc.enabled config property #27359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,7 @@ | |
| import static java.util.Locale.ENGLISH; | ||
| import static java.util.concurrent.TimeUnit.DAYS; | ||
| import static java.util.concurrent.TimeUnit.SECONDS; | ||
| import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT; | ||
|
|
||
| @DefunctConfig({ | ||
| "iceberg.allow-legacy-snapshot-syntax", | ||
|
|
@@ -95,6 +96,7 @@ public class IcebergConfig | |
| private int planningThreads = Math.min(Runtime.getRuntime().availableProcessors(), 16); | ||
| private int fileDeleteThreads = Runtime.getRuntime().availableProcessors() * 2; | ||
| private List<String> allowedExtraProperties = ImmutableList.of(); | ||
| private boolean defaultNewTablesGcEnabled = GC_ENABLED_DEFAULT; | ||
| private boolean incrementalRefreshEnabled = true; | ||
| private boolean metadataCacheEnabled = true; | ||
| private boolean objectStoreLayoutEnabled; | ||
|
|
@@ -567,6 +569,19 @@ public IcebergConfig setAllowedExtraProperties(List<String> allowedExtraProperti | |
| return this; | ||
| } | ||
|
|
||
| public boolean isDefaultNewTablesGcEnabled() | ||
| { | ||
| return defaultNewTablesGcEnabled; | ||
| } | ||
|
|
||
| @Config("iceberg.default-new-tables-gc.enabled") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you document this property in |
||
| @ConfigDescription("Default value for Iceberg property gc.enabled when creating new tables") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add as well corresponding documentation in
It seems that in the meantime the table property has as well repercussions on whether to delete dropped table's files as well. |
||
| public IcebergConfig setDefaultNewTablesGcEnabled(boolean defaultNewTablesGcEnabled) | ||
| { | ||
| this.defaultNewTablesGcEnabled = defaultNewTablesGcEnabled; | ||
| return this; | ||
| } | ||
|
|
||
| public boolean isIncrementalRefreshEnabled() | ||
| { | ||
| return incrementalRefreshEnabled; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -188,6 +188,8 @@ | |
| import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT; | ||
| import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT_DEFAULT; | ||
| import static org.apache.iceberg.TableProperties.FORMAT_VERSION; | ||
| import static org.apache.iceberg.TableProperties.GC_ENABLED; | ||
| import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT; | ||
| import static org.apache.iceberg.TableProperties.OBJECT_STORE_ENABLED; | ||
| import static org.apache.iceberg.TableProperties.OBJECT_STORE_ENABLED_DEFAULT; | ||
| import static org.apache.iceberg.TableProperties.ORC_BLOOM_FILTER_COLUMNS; | ||
|
|
@@ -869,7 +871,7 @@ public static List<ViewColumn> viewColumnsFromSchema(TypeManager typeManager, Sc | |
| .toList(); | ||
| } | ||
|
|
||
| public static Transaction newCreateTableTransaction(TrinoCatalog catalog, ConnectorTableMetadata tableMetadata, ConnectorSession session, boolean replace, String tableLocation, Predicate<String> allowedExtraProperties) | ||
| public static Transaction newCreateTableTransaction(TrinoCatalog catalog, ConnectorTableMetadata tableMetadata, ConnectorSession session, boolean replace, String tableLocation, Predicate<String> allowedExtraProperties, boolean defaultNewTablesGcEnabled) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Please separate this long list of parameters on new lines for increased readability. |
||
| { | ||
| SchemaTableName schemaTableName = tableMetadata.getTable(); | ||
| Schema schema = schemaFromMetadata(tableMetadata.getColumns()); | ||
|
|
@@ -879,10 +881,10 @@ public static Transaction newCreateTableTransaction(TrinoCatalog catalog, Connec | |
| Transaction transaction; | ||
|
|
||
| if (replace) { | ||
| transaction = catalog.newCreateOrReplaceTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, tableLocation, createTableProperties(tableMetadata, allowedExtraProperties)); | ||
| transaction = catalog.newCreateOrReplaceTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, tableLocation, createTableProperties(tableMetadata, allowedExtraProperties, defaultNewTablesGcEnabled)); | ||
| } | ||
| else { | ||
| transaction = catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, Optional.ofNullable(tableLocation), createTableProperties(tableMetadata, allowedExtraProperties)); | ||
| transaction = catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, Optional.ofNullable(tableLocation), createTableProperties(tableMetadata, allowedExtraProperties, defaultNewTablesGcEnabled)); | ||
| } | ||
|
|
||
| // If user doesn't set compression-codec for parquet, we need to remove write.parquet.compression-codec property, | ||
|
|
@@ -897,7 +899,7 @@ public static Transaction newCreateTableTransaction(TrinoCatalog catalog, Connec | |
| return transaction; | ||
| } | ||
|
|
||
| public static Map<String, String> createTableProperties(ConnectorTableMetadata tableMetadata, Predicate<String> allowedExtraProperties) | ||
| public static Map<String, String> createTableProperties(ConnectorTableMetadata tableMetadata, Predicate<String> allowedExtraProperties, boolean defaultNewTablesGcEnabled) | ||
| { | ||
| ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder(); | ||
| IcebergFileFormat fileFormat = IcebergTableProperties.getFileFormat(tableMetadata.getProperties()); | ||
|
|
@@ -955,10 +957,15 @@ public static Map<String, String> createTableProperties(ConnectorTableMetadata t | |
| propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get()); | ||
| } | ||
|
|
||
| Map<String, String> baseProperties = propertiesBuilder.buildOrThrow(); | ||
| Map<String, String> extraProperties = IcebergTableProperties.getExtraProperties(tableMetadata.getProperties()).orElseGet(ImmutableMap::of); | ||
| verifyExtraProperties(propertiesBuilder.buildOrThrow().keySet(), extraProperties, allowedExtraProperties); | ||
|
|
||
| // If user doesn't set gc.enabled, we need to set it to defaultNewTablesGcEnabled value | ||
| if (!extraProperties.containsKey(GC_ENABLED) && defaultNewTablesGcEnabled != GC_ENABLED_DEFAULT) { | ||
| propertiesBuilder.put(GC_ENABLED, Boolean.toString(defaultNewTablesGcEnabled)); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One thing to note about relying on GC_ENABLED property of iceberg is that it will also disallow expire_snapshots.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess they could enable GC for
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a check in iceberg library Using Spark or something else won't help with that.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like a breaking change to But if other engine disable the gc flag, it's also a problem to Trino, user not able to expire snapshots due a unsupported or exposed property |
||
| } | ||
|
|
||
| verifyExtraProperties(baseProperties.keySet(), extraProperties, allowedExtraProperties); | ||
| Map<String, String> baseProperties = propertiesBuilder.buildOrThrow(); | ||
|
|
||
| return ImmutableMap.<String, String>builder() | ||
| .putAll(baseProperties) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -107,6 +107,7 @@ | |||||
| import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; | ||||||
| import static org.apache.iceberg.TableMetadata.newTableMetadata; | ||||||
| import static org.apache.iceberg.TableMetadataParser.getFileExtension; | ||||||
| import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT; | ||||||
| import static org.apache.iceberg.TableProperties.METADATA_COMPRESSION_DEFAULT; | ||||||
| import static org.apache.iceberg.Transactions.createOrReplaceTableTransaction; | ||||||
| import static org.apache.iceberg.Transactions.createTableTransaction; | ||||||
|
|
@@ -312,7 +313,7 @@ protected Location createMaterializedViewStorage( | |||||
| Schema schema = schemaFromMetadata(columns); | ||||||
| PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(materializedViewProperties)); | ||||||
| SortOrder sortOrder = parseSortFields(schema, getSortOrder(materializedViewProperties)); | ||||||
| Map<String, String> properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, materializedViewProperties, Optional.empty()), _ -> false); | ||||||
| Map<String, String> properties = createTableProperties(new ConnectorTableMetadata(storageTableName, columns, materializedViewProperties, Optional.empty()), _ -> false, GC_ENABLED_DEFAULT); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
|
||||||
| TableMetadata metadata = newTableMetadata(schema, partitionSpec, sortOrder, tableLocation, properties); | ||||||
|
|
||||||
|
|
@@ -350,7 +351,7 @@ protected SchemaTableName createMaterializedViewStorageTable( | |||||
| ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(storageTable, columns, materializedViewProperties, Optional.empty()); | ||||||
| String tableLocation = getTableLocation(tableMetadata.getProperties()) | ||||||
| .orElseGet(() -> defaultTableLocation(session, tableMetadata.getTable())); | ||||||
| Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false, tableLocation, _ -> false); | ||||||
| Transaction transaction = IcebergUtil.newCreateTableTransaction(this, tableMetadata, session, false, tableLocation, _ -> false, GC_ENABLED_DEFAULT); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| AppendFiles appendFiles = transaction.newAppend(); | ||||||
| commit(appendFiles, session); | ||||||
| transaction.commitTransaction(); | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -160,6 +160,9 @@ | |
| import static java.util.Objects.requireNonNull; | ||
| import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; | ||
| import static org.apache.iceberg.CatalogUtil.dropTableData; | ||
| import static org.apache.iceberg.TableProperties.GC_ENABLED; | ||
| import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT; | ||
| import static org.apache.iceberg.util.PropertyUtil.propertyAsBoolean; | ||
|
|
||
| public class TrinoGlueCatalog | ||
| extends AbstractTrinoCatalog | ||
|
|
@@ -707,7 +710,9 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) | |
| // So log the exception and continue with deleting the table location | ||
| LOG.warn(e, "Failed to delete table data referenced by metadata"); | ||
| } | ||
| deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location()); | ||
| if (propertyAsBoolean(table.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pls replace |
||
| deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location()); | ||
| } | ||
| invalidateTableCache(schemaTableName); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -127,6 +127,9 @@ | |||||
| import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; | ||||||
| import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP; | ||||||
| import static org.apache.iceberg.CatalogUtil.dropTableData; | ||||||
| import static org.apache.iceberg.TableProperties.GC_ENABLED; | ||||||
| import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT; | ||||||
| import static org.apache.iceberg.util.PropertyUtil.propertyAsBoolean; | ||||||
|
|
||||||
| public class TrinoHiveCatalog | ||||||
| extends AbstractTrinoCatalog | ||||||
|
|
@@ -436,7 +439,9 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) | |||||
| // So log the exception and continue with deleting the table location | ||||||
| log.warn(e, "Failed to delete table data referenced by metadata"); | ||||||
| } | ||||||
| deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, metastoreTable.getStorage().getLocation()); | ||||||
| if (propertyAsBoolean(metadata.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, metastoreTable.getStorage().getLocation()); | ||||||
| } | ||||||
| invalidateTableCache(schemaTableName); | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -87,6 +87,9 @@ | |||||
| import static java.util.Locale.ENGLISH; | ||||||
| import static java.util.Objects.requireNonNull; | ||||||
| import static org.apache.iceberg.CatalogUtil.dropTableData; | ||||||
| import static org.apache.iceberg.TableProperties.GC_ENABLED; | ||||||
| import static org.apache.iceberg.TableProperties.GC_ENABLED_DEFAULT; | ||||||
| import static org.apache.iceberg.util.PropertyUtil.propertyAsBoolean; | ||||||
| import static org.apache.iceberg.view.ViewProperties.COMMENT; | ||||||
|
|
||||||
| public class TrinoJdbcCatalog | ||||||
|
|
@@ -348,7 +351,9 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) | |||||
| // So log the exception and continue with deleting the table location | ||||||
| LOG.warn(e, "Failed to delete table data referenced by metadata"); | ||||||
| } | ||||||
| deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location()); | ||||||
| if (propertyAsBoolean(table.properties(), GC_ENABLED, GC_ENABLED_DEFAULT)) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location()); | ||||||
| } | ||||||
| invalidateTableCache(schemaTableName); | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The common approach is to restrain referencing in the config files values from dependent libraries because this may trigger a silent change in the default configuration of the connector.
Please consider using rather
trueinstead