dlt-hub · rudolfix · Mar 11, 2025 · Mar 5, 2025 · Mar 6, 2025 · Mar 10, 2025
diff --git a/dlt/common/configuration/providers/__init__.py b/dlt/common/configuration/providers/__init__.py
@@ -8,7 +8,6 @@
     CONFIG_TOML,
     SECRETS_TOML,
     StringTomlProvider,
-    CustomLoaderDocProvider,
 )
 from .doc import CustomLoaderDocProvider
 from .vault import SECRETS_TOML_KEY

diff --git a/dlt/common/configuration/specs/aws_credentials.py b/dlt/common/configuration/specs/aws_credentials.py
@@ -27,6 +27,8 @@ class AwsCredentialsWithoutDefaults(
     profile_name: Optional[str] = None
     region_name: Optional[str] = None
     endpoint_url: Optional[str] = None
+    s3_url_style: Optional[str] = None
+    """Only needed for duckdb sql_client s3 access, for minio this needs to be set to path for example."""
 
     def to_s3fs_credentials(self) -> Dict[str, Optional[str]]:
         """Dict of keyword arguments that can be passed to s3fs"""

diff --git a/dlt/common/libs/pydantic.py b/dlt/common/libs/pydantic.py
@@ -164,6 +164,10 @@ def pydantic_to_table_schema_columns(
                 result[schema_key] = {
                     **hints,
                     "name": snake_case_naming_convention.make_path(name, hints["name"]),
+                    # if the outer field containing the nested mode is optional,
+                    # then each field in the model itself has to be nullable as well,
+                    # as otherwise we end up with flattened non-nullable optional nested fields
+                    "nullable": hints["nullable"] or nullable,
                 }
         elif data_type == "json" and skip_nested_types:
             continue

diff --git a/dlt/destinations/dataset/__init__.py b/dlt/destinations/dataset/__init__.py
@@ -1,10 +1,6 @@
-from dlt.destinations.dataset.factory import (
-    dataset,
-)
-from dlt.destinations.dataset.dataset import (
-    ReadableDBAPIDataset,
-    get_destination_clients,
-)
+from dlt.destinations.dataset.factory import dataset
+from dlt.destinations.dataset.dataset import ReadableDBAPIDataset
+
 from dlt.destinations.dataset.utils import (
     get_destination_clients,
     get_destination_client_initial_config,

diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py
@@ -19,7 +19,6 @@
     Dict,
 )
 from fsspec import AbstractFileSystem
-from contextlib import contextmanager
 
 import dlt
 from dlt.common import logger, time, json, pendulum

diff --git a/dlt/destinations/impl/filesystem/sql_client.py b/dlt/destinations/impl/filesystem/sql_client.py
@@ -112,11 +112,17 @@ def create_authentication(self, persistent: bool = False, secret_name: str = Non
             session_token = (
                 "" if aws_creds.aws_session_token is None else aws_creds.aws_session_token
             )
-            endpoint = (
-                aws_creds.endpoint_url.replace("https://", "")
-                if aws_creds.endpoint_url
-                else "s3.amazonaws.com"
-            )
+
+            use_ssl = "true"
+            endpoint = "s3.amazonaws.com"
+            if aws_creds.endpoint_url and "http://" in aws_creds.endpoint_url:
+                use_ssl = "false"
+                endpoint = aws_creds.endpoint_url.replace("http://", "")
+            elif aws_creds.endpoint_url and "https://" in aws_creds.endpoint_url:
+                endpoint = aws_creds.endpoint_url.replace("https://", "")
+
+            s3_url_style = aws_creds.s3_url_style or "vhost"
+
             self._conn.sql(f"""
             CREATE OR REPLACE {persistent_stmt} SECRET {secret_name} (
                 TYPE S3,
@@ -125,7 +131,9 @@ def create_authentication(self, persistent: bool = False, secret_name: str = Non
                 SESSION_TOKEN '{session_token}',
                 REGION '{aws_creds.region_name}',
                 ENDPOINT '{endpoint}',
-                SCOPE '{scope}'
+                SCOPE '{scope}',
+                URL_STYLE '{s3_url_style}',
+                USE_SSL {use_ssl}
             );""")
 
         # azure with storage account creds

diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
@@ -8,7 +8,6 @@
     MsSqlCredentials,
     MsSqlClientConfiguration,
 )
-from dlt.destinations.impl.mssql.configuration import MsSqlCredentials
 
 from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType
 

diff --git a/dlt/destinations/utils.py b/dlt/destinations/utils.py
@@ -1,6 +1,6 @@
 import re
 
-from typing import Any, List, Optional, Sequence, Tuple
+from typing import Any, List, Dict, Type, Optional, Sequence, Tuple, cast
 
 from dlt.common import logger
 from dlt.common.destination.capabilities import DestinationCapabilitiesContext
@@ -15,7 +15,6 @@
     is_nested_table,
     pipeline_state_table,
 )
-from typing import Any, cast, Tuple, Dict, Type
 
 from dlt.destinations.exceptions import DatabaseTransientException
 from dlt.extract import DltResource, resource as make_resource, DltSource

diff --git a/dlt/extract/reference.py b/dlt/extract/reference.py
@@ -7,11 +7,11 @@
     Any,
     Generic,
     Tuple,
-    cast,
     overload,
+    ClassVar,
+    Type,
 )
 from typing_extensions import Self, TypeVar
-from typing import Dict, Type, ClassVar
 
 from dlt.common import logger
 from dlt.common.configuration.specs import BaseConfiguration, known_sections

diff --git a/dlt/extract/source.py b/dlt/extract/source.py
@@ -12,7 +12,6 @@
     Any,
 )
 from typing_extensions import Self
-from typing import Dict
 
 from dlt.common.configuration.resolve import inject_section
 from dlt.common.configuration.specs import known_sections

diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py
@@ -39,7 +39,6 @@
 )
 from dlt.extract.items import (
     TTableHintTemplate,
-    TDataItem,
     TFunHintTemplate,
     SupportsPipe,
 )

diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py
@@ -73,7 +73,6 @@
     WithStateSync,
     JobClientBase,
     DestinationClientStagingConfiguration,
-    DestinationClientStagingConfiguration,
 )
 from dlt.common.destination.dataset import SupportsReadableDataset
 from dlt.common.destination.typing import TDatasetType

diff --git a/dlt/sources/rest_api/__init__.py b/dlt/sources/rest_api/__init__.py
@@ -315,7 +315,8 @@ def paginate_resource(
                                 incremental_object, incremental_cursor_transform
                             )
                         )
-
+                    # TODO: expand json as well. make sure you do not default to {} as this will
+                    #  generate empty body
                     path = expand_placeholders(path, format_kwargs)
                     params = expand_placeholders(params, format_kwargs)
 

diff --git a/dlt/sources/rest_api/config_setup.py b/dlt/sources/rest_api/config_setup.py
@@ -733,7 +733,9 @@ def process_parent_data_item(
     )
     expanded_path = expand_placeholders(path, params_values)
     expanded_params = expand_placeholders(params or {}, params_values)
-    expanded_json = expand_placeholders(request_json or {}, params_values)
+    expanded_json = (
+        None if request_json is None else expand_placeholders(request_json, params_values)
+    )
 
     parent_resource_name = resolved_params[0].resolve_config["resource"]
     parent_record = build_parent_record(item, parent_resource_name, include_from_parent)

diff --git a/dlt/sources/rest_api/typing.py b/dlt/sources/rest_api/typing.py
@@ -1,5 +1,3 @@
-from dataclasses import dataclass, field
-
 from typing import (
     Any,
     Callable,
@@ -22,17 +20,6 @@
 
 from dataclasses import dataclass, field
 
-from dlt.common import jsonpath
-from dlt.common.typing import TSortOrder, TColumnNames
-from dlt.common.schema.typing import (
-    TTableFormat,
-    TAnySchemaColumns,
-    TWriteDispositionConfig,
-    TSchemaContract,
-)
-
-from dlt.extract.items import TTableHintTemplate
-from dlt.common.incremental.typing import LastValueFunc
 from dlt.extract.resource import DltResource
 
 from requests import Session
@@ -47,7 +34,6 @@
     PageNumberPaginator,
     SinglePagePaginator,
 )
-from dlt.sources.helpers.rest_client.typing import HTTPMethodBasic
 
 
 try:

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md b/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md
@@ -188,12 +188,11 @@ mode.
 ```py
 @dlt.resource(write_disposition="merge")
 def opportunity(
-    last_timestamp: Incremental[str] = dlt.sources.incremental(
+    last_timestamp: incremental[str] = dlt.sources.incremental(
         "SystemModstamp", initial_value=None
     )
-) -> Iterator[Dict[str, Any]]:
-
-    yield from _get_records(
+) -> Iterable[TDataItem]:
+    yield get_records(
         client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
     )
 ```

diff --git a/docs/website/docusaurus.config.js b/docs/website/docusaurus.config.js
@@ -14,7 +14,7 @@ const versions = {"current": {
   noIndex: true
 }}
 
-let knownVersions = ["current"];
+let knownVersions = [];
 if (fs.existsSync("versions.json")) {
   knownVersions = JSON.parse(fs.readFileSync("versions.json", 'utf8'));
 }

diff --git a/docs/website/tools/update_versions.js b/docs/website/tools/update_versions.js
@@ -2,9 +2,6 @@ const proc = require('child_process')
 const fs = require('fs');
 const semver = require('semver')
 
-// disable versions for now
-process.exit(0)
-
 // const
 const REPO_DIR = ".dlt-repo"
 const REPO_DOCS_DIR = REPO_DIR + "/docs/website"
@@ -15,7 +12,7 @@ const VERSIONED_SIDEBARS_FOLDER = "versioned_sidebars"
 const ENV_FILE = '.env'
 
 // no doc versions below this version will be deployed
-const MINIMUM_SEMVER_VERSION = "0.5.0"
+const MINIMUM_SEMVER_VERSION = "1.0.0"
 
 // clear old repo version
 fs.rmSync(REPO_DIR, { recursive: true, force: true })
@@ -41,6 +38,7 @@ versions = semver.rsort(versions.filter(v => semver.gt(v, min_version)))
 versions.filter(v => semver.prerelease(v) == null)
 
 console.log(`Found ${versions.length} elligible versions`)
+console.log(versions)
 if (versions.length < 2) {
     console.error("Sanity check failed, not enough elligble version tags found")
     process.exit(1)
@@ -50,17 +48,16 @@ if (versions.length < 2) {
 const envFileContent = `DOCUSAURUS_DLT_VERSION=${versions[0]}`;
 fs.writeFileSync(ENV_FILE, envFileContent, 'utf8');
 
-// go through the versions and find all newest versions of any major version
-// the newest version is replace by the master branch here so the master head
-// always is the "current" doc
+// in the future the only other version to build is the minor version below the latest
 const selectedVersions = ["master"];
-let lastVersion = versions[0];
-for (let ver of versions) {
-    if ( semver.major(ver) != semver.major(lastVersion)) {
-        selectedVersions.push(ver)
-    }
-    lastVersion = ver;
-}
+// let lastVersion = versions[0];
+// for (let ver of versions) {
+//     console.log(semver.minor(ver))
+//     console.log(semver.minor(lastVersion))
+//     if ( semver.minor(ver) == (semver.minor(lastVersion) - 1)) {
+//         selectedVersions.push(ver)
+//     }
+// }
 
 console.log(`Will create docs versions for ${selectedVersions}`)
 
@@ -100,7 +97,7 @@ for (const version of selectedVersions) {
 
     // build doc version, we also run preprocessing and markdown gen for each doc version
     console.log(`Building docs...`)
-    proc.execSync(`cd ${REPO_DOCS_DIR} && npm run preprocess-docs && PYTHONPATH=. pydoc-markdown`)
+    proc.execSync(`cd ${REPO_DOCS_DIR} && npm run preprocess-docs && PYTHONPATH=. pydoc-markdown && python clean_pydoc_sidebar.py`)
 
     console.log(`Snapshotting version...`)
     proc.execSync(`cd ${REPO_DOCS_DIR} && npm run docusaurus docs:version ${version}`)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dlt"
-version = "1.8.0"
+version = "1.8.1"
 description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run."
 authors = ["dltHub Inc. <[email protected]>"]
 maintainers = [ "Marcin Rudolf <[email protected]>", "Adrian Brudaru <[email protected]>", "Anton Burnashev <[email protected]>", "David Scharf <[email protected]>" ]

diff --git a/tests/libs/test_pydantic.py b/tests/libs/test_pydantic.py
@@ -743,3 +743,15 @@ class MyModel(BaseModel):
 
     with pytest.raises(ValidationError):
         m = MyModel(column_type={"data_type": "invalid_type"})  # type: ignore[typeddict-item]
+
+
+def test_parent_nullable_means_children_nullable():
+    class MyParent(BaseModel):
+        optional_child: Optional[ChildModel]
+        non_optional_child: ChildModel
+        dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
+
+    schema = pydantic_to_table_schema_columns(MyParent)
+
+    assert schema["optional_child__child_attribute"]["nullable"]
+    assert schema["non_optional_child__child_attribute"]["nullable"] is False
diff --git a/tests/load/sources/rest_api/test_rest_api_source.py b/tests/load/sources/rest_api/test_rest_api_source.py
@@ -56,7 +56,7 @@ def test_rest_api_source(destination_config: DestinationTestConfiguration, reque
 
     assert table_counts.keys() == {"pokemon_list", "berry", "location"}
 
-    assert table_counts["pokemon_list"] == 1304
+    assert table_counts["pokemon_list"] == 1302
     assert table_counts["berry"] == 64
     assert table_counts["location"] == 1039
 

diff --git a/tests/sources/rest_api/configurations/test_resolve_config.py b/tests/sources/rest_api/configurations/test_resolve_config.py
@@ -99,9 +99,33 @@ def test_process_parent_data_item() -> None:
         include_from_parent=None,
     )
     assert bound_path == "dlt-hub/dlt/issues/12345/comments"
-    assert expanded_params == {}
+    assert expanded_params == {}  # defaults to empty dict
+    assert request_json is None  # defaults to None
     assert parent_record == {}
 
+    # same but with empty params and json
+    bound_path, expanded_params, request_json, parent_record = process_parent_data_item(
+        path="dlt-hub/dlt/issues/{id}/comments",
+        item={"obj_id": 12345},
+        params={},
+        request_json={},
+        resolved_params=resolved_params,
+    )
+    # those got propagated
+    assert expanded_params == {}
+    assert request_json == {}  # generates empty body!
+
+    # also test params and json
+    bound_path, expanded_params, request_json, parent_record = process_parent_data_item(
+        path="dlt-hub/dlt/issues/comments",
+        item={"obj_id": 12345},
+        params={"orig_id": "{id}"},
+        request_json={"orig_id": "{id}"},
+        resolved_params=resolved_params,
+    )
+    assert expanded_params == {"orig_id": "12345"}
+    assert request_json == {"orig_id": "12345"}
+
     bound_path, expanded_params, request_json, parent_record = process_parent_data_item(
         path="dlt-hub/dlt/issues/{id}/comments",
         item={"obj_id": 12345},
@@ -137,11 +161,13 @@ def test_process_parent_data_item() -> None:
         path="dlt-hub/dlt/issues/comments",
         item={"obj_id": 12345, "obj_node": "node_1"},
         params={"id": "{resources.issues.obj_id}"},
+        request_json={"id": "{resources.issues.obj_id}"},
         resolved_params=resolved_params_reference,
         include_from_parent=["obj_id", "obj_node"],
     )
     assert bound_path == "dlt-hub/dlt/issues/comments"
     assert expanded_params == {"id": "12345"}
+    assert request_json == {"id": "12345"}
 
     # Test nested data
     resolved_param_nested = [