From f3b7376679d0b8258f41e07e43d0e02e30f26b0f Mon Sep 17 00:00:00 2001 From: soyeric128 Date: Mon, 24 Mar 2025 09:20:54 -0400 Subject: [PATCH] added --- .../guides/51-access-data-lake/02-iceberg.md | 7 +++ .../17-table-functions/iceberg-manifest.md | 53 +++++++++++++++++++ .../17-table-functions/iceberg-snapshot.md | 48 +++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-manifest.md create mode 100644 docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-snapshot.md diff --git a/docs/en/guides/51-access-data-lake/02-iceberg.md b/docs/en/guides/51-access-data-lake/02-iceberg.md index 070c42e1f1..b15b7a2803 100644 --- a/docs/en/guides/51-access-data-lake/02-iceberg.md +++ b/docs/en/guides/51-access-data-lake/02-iceberg.md @@ -124,6 +124,13 @@ Switches the current session to the specified catalog. USE CATALOG ``` +## Iceberg Table Functions + +Databend provides the following table functions for querying Iceberg metadata, allowing users to inspect snapshots and manifests efficiently: + +- [ICEBERG_MANIFEST](/sql/sql-functions/table-functions/iceberg-manifest) +- [ICEBERG_SNAPSHOT](/sql/sql-functions/table-functions/iceberg-snapshot) + ## Usage Examples This example shows how to create an Iceberg catalog using a REST-based connection, specifying the service address, warehouse location (S3), and optional parameters like AWS region and custom endpoint: diff --git a/docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-manifest.md b/docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-manifest.md new file mode 100644 index 0000000000..f9cf1f8685 --- /dev/null +++ b/docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-manifest.md @@ -0,0 +1,53 @@ +--- +title: ICEBERG_MANIFEST +--- +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +Returns metadata about manifest files of an Iceberg table, including file paths, partitioning details, and snapshot associations. + +## Syntax + +```sql +ICEBERG_MANIFEST('', ''); +``` + +## Output + +The function returns a table with the following columns: + +- `content` (`INT`): The content type (0 for data files, 1 for delete files). +- `path` (`STRING`): The file path of the data or delete file. +- `length` (`BIGINT`): The file size in bytes. +- `partition_spec_id` (`INT`): The partition specification ID associated with the file. +- `added_snapshot_id` (`BIGINT`): The snapshot ID that added this file. +- `added_data_files_count` (`INT`): The number of new data files added. +- `existing_data_files_count` (`INT`): The number of existing data files referenced. +- `deleted_data_files_count` (`INT`): The number of data files deleted. +- `added_delete_files_count` (`INT`): The number of delete files added. +- `partition_summaries` (`MAP`): Summary of partition values related to the file. + +## Examples + +```sql +SELECT * FROM ICEBERG_MANIFEST('tpcds', 'catalog_returns'); + +╭───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ content │ path │ length │ partition_spec │ added_snapshot │ added_data_fil │ existing_data_ │ deleted_data_ │ added_delete_ │ existing_dele │ deleted_delet │ partition_sum │ +│ Int32 │ String │ Int64 │ _id │ _id │ es_count │ files_count │ files_count │ files_count │ te_files_coun │ e_files_count │ maries │ +│ │ │ │ Int32 │ Nullable(Int64 │ Nullable(Int32 │ Nullable(Int32 │ Nullable(Int3 │ Nullable(Int3 │ t │ Nullable(Int3 │ Array(Nullabl │ +│ │ │ │ │ ) │ ) │ ) │ 2) │ 2) │ Nullable(Int3 │ 2) │ e(Tuple(Nulla │ +│ │ │ │ │ │ │ │ │ │ 2) │ │ ble(Boolean), │ +│ │ │ │ │ │ │ │ │ │ │ │ Nullable(Bool │ +│ │ │ │ │ │ │ │ │ │ │ │ ean), String, │ +│ │ │ │ │ │ │ │ │ │ │ │ String))) │ +├─────────┼────────────────┼────────┼────────────────┼────────────────┼────────────────┼────────────────┼───────────────┼───────────────┼───────────────┼───────────────┼───────────────┤ +│ 0 │ s3://warehouse │ 9241 │ 0 │ 75657674165904 │ 2 │ 0 │ 0 │ 2 │ 0 │ 0 │ [] │ +│ │ /catalog_retur │ │ │ 11866 │ │ │ │ │ │ │ │ +│ │ ns/metadata/fa │ │ │ │ │ │ │ │ │ │ │ +│ │ 1ea4d5-a382-49 │ │ │ │ │ │ │ │ │ │ │ +│ │ 7a-9f22-1acb9a │ │ │ │ │ │ │ │ │ │ │ +│ │ 74a346-m0.avro │ │ │ │ │ │ │ │ │ │ │ +╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-snapshot.md b/docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-snapshot.md new file mode 100644 index 0000000000..9256a1cba8 --- /dev/null +++ b/docs/en/sql-reference/20-sql-functions/17-table-functions/iceberg-snapshot.md @@ -0,0 +1,48 @@ +--- +title: ICEBERG_SNAPSHOT +--- +import FunctionDescription from '@site/src/components/FunctionDescription'; + + + +Returns metadata about snapshots of an Iceberg table, including information about data changes, operations, and summary statistics. + +## Syntax + +```sql +ICEBERG_SNAPSHOT('', ''); +``` + +## Output + +The function returns a table with the following columns: + +- `committed_at` (`TIMESTAMP`): The timestamp when the snapshot was committed. +- `snapshot_id` (`BIGINT`): The unique identifier of the snapshot. +- `parent_id` (`BIGINT`): The parent snapshot ID, if applicable. +- `operation` (`STRING`): The type of operation performed (e.g., append, overwrite, delete). +- `manifest_list` (`STRING`): The file path of the manifest list associated with the snapshot. +- `summary` (`MAP`): A JSON-like structure containing additional metadata, such as: + - `added-data-files`: Number of newly added data files. + - `added-records`: Number of new records added. + - `total-records`: Total number of records in the snapshot. + - `total-files-size`: Total size of all data files (in bytes). + - `total-data-files`: Total number of data files in the snapshot. + - `total-delete-files`: Total number of delete files in the snapshot. + +## Examples + +```sql +SELECT * FROM ICEBERG_SNAPSHOT('tpcds', 'catalog_returns'); + +╭───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ committed_at │ snapshot_id │ parent_id │ operation │ manifest_list │ summary │ +├────────────────────────────┼─────────────────────┼───────────┼───────────┼──────────────────────────────────────────────────────┼─────────────────────────────────────────────────────┤ +│ 2025-03-12 23:18:26.626000 │ 7565767416590411866 │ 0 │ append │ s3://warehouse/catalog_returns/metadata/snap-7565767 │ {'spark.app.id':'local-1741821433430','added-data-f │ +│ │ │ │ │ 416590411866-1-fa1ea4d5-a382-497a-9f22-1acb9a74a346. │ iles':'2','added-records':'144067','total-equality- │ +│ │ │ │ │ avro │ deletes':'0','changed-partition-count':'1','total-r │ +│ │ │ │ │ │ ecords':'144067','total-files-size':'7679811','tota │ +│ │ │ │ │ │ l-data-files':'2','added-files-size':'7679811','tot │ +│ │ │ │ │ │ al-delete-files':'0','total-position-deletes':'0'} │ +╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +``` \ No newline at end of file