diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cac8008..be3a0c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: - name: Install minimal nightly with clippy and rustfmt uses: dtolnay/rust-toolchain@master with: - toolchain: nightly + toolchain: nightly-2025-01-23 components: rustfmt, clippy - name: Install tool for formatting Cargo.toml files diff --git a/README.md b/README.md index b6f7308..8d9c1bd 100644 --- a/README.md +++ b/README.md @@ -11,17 +11,30 @@ Load data from Parquet and Postgres to Delta Lake Download the binary from the [Releases page](./releases) -To load data from Postgres to Delta Lake: +Load data from Postgres to Delta Lake: ```bash export PGPASSWORD="my_password" -./lakehouse-loader pg-to-delta postgres://test-user@localhost:5432/test-db -q "SELECT * FROM some_table" s3://my-bucket/path/to/table +./lakehouse-loader pg-to-delta postgres://test-user@localhost:5432/test-db -q "SELECT * FROM some_table" s3://my-bucket/path/to/delta/table ``` -To load data from Parquet to Delta Lake: +Load data from Parquet to Delta Lake: ```bash -./lakehouse-loader parquet-to-delta some_file.parquet s3://my-bucket/path/to/table +./lakehouse-loader parquet-to-delta some_file.parquet s3://my-bucket/path/to/delta/table +``` + +Load data from Postgres to Iceberg File Catalog: + +```bash +export PGPASSWORD="my_password" +./lakehouse-loader pg-to-iceberg postgres://test-user@localhost:5432/test-db -q "SELECT * FROM some_table" s3://my-bucket/path/to/iceberg/table +``` + +Load data from Parquet to Iceberg File Catalog: + +```bash +./lakehouse-loader parquet-to-iceberg some_file.parquet s3://my-bucket/path/to/iceberg/table ``` Supports standard AWS environment variables (e.g. AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_PROFILE, AWS_ENDPOINT etc). diff --git a/docker-compose.yml b/docker-compose.yml index 0e270e4..0ec800a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ services: postgres: - image: postgres:16 + image: postgres:16.9 environment: POSTGRES_USER: test-user POSTGRES_PASSWORD: test-password @@ -12,7 +12,7 @@ services: - ./tests/postgres-init-scripts:/docker-entrypoint-initdb.d minio: - image: minio/minio:latest + image: quay.io/minio/minio:RELEASE.2024-11-07T00-52-20Z ports: - 9000:9000 - 9001:9001 @@ -21,14 +21,20 @@ services: command: minio server /data minio-setup: - image: minio/mc:latest + image: minio/mc:RELEASE.2025-03-12T17-29-24Z depends_on: - minio entrypoint: > - /bin/sh -c " - until (/usr/bin/mc config host add test-minio http://minio:9000 minioadmin minioadmin) do echo '...waiting...' && sleep 1; done; + /bin/sh -c ' + mkdir ~/.mc; + echo ''{\"version\": \"10\", \"aliases\": {\"test-minio\": {\"url\": \"http://minio:9000\", \"accessKey\": \"minioadmin\", \"secretKey\": \"minioadmin\", \"api\": \"S3v4\", \"path\": \"auto\"}}}'' > ~/.mc/config.json; + cat ~/.mc/config.json; /usr/bin/mc rm -r --force test-minio/lhl-test-bucket; /usr/bin/mc mb test-minio/lhl-test-bucket; /usr/bin/mc admin user add test-minio test-user test-pass; /usr/bin/mc admin policy attach test-minio readwrite --user test-user; - exit 0; " + exit 0; ' + healthcheck: + test: ["CMD", "false"] # Stack is not ready while the minio-setup container is up + interval: 10s + timeout: 5s diff --git a/src/delta_destination.rs b/src/delta_destination.rs index 17ba47d..62e7d15 100644 --- a/src/delta_destination.rs +++ b/src/delta_destination.rs @@ -441,7 +441,7 @@ pub async fn record_batches_to_delta( .await?; let delta_schema = deltalake::kernel::Schema::try_from(schema)?; - let table_name = target_url.path_segments().unwrap().last().unwrap(); + let table_name = target_url.path_segments().unwrap().next_back().unwrap(); let table = CreateBuilder::new() .with_log_store(log_store.clone())