Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions metadata-ingestion/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,11 @@ max-complexity = 20
[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = "all"

[tool.ruff.lint.flake8-tidy-imports.banned-api]
# pytandic v2 deprecations
"pydantic.validator" = { msg = "Use pydantic.field_validator instead of deprecated validator" }
"pydantic.root_validator" = { msg = "Use pydantic.model_validator instead of deprecated root_validator" }

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
"src/datahub/configuration/pydantic_migration_helpers.py" = ["TID251"] # Intentional V1 imports for backward compatibility
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def as_pydantic_object(
assert self.schema_ref
assert self.schema_ref == model_type.__name__
object_dict = self.as_raw_json()
return model_type.parse_obj(object_dict)
return model_type.model_validate(object_dict)

@classmethod
def from_resource_value(
Expand All @@ -131,7 +131,7 @@ def create(
elif isinstance(object, BaseModel):
return SerializedResourceValue(
content_type=models.SerializedValueContentTypeClass.JSON,
blob=json.dumps(object.dict(), sort_keys=True).encode("utf-8"),
blob=json.dumps(object.model_dump(), sort_keys=True).encode("utf-8"),
schema_type=models.SerializedValueSchemaTypeClass.JSON,
schema_ref=object.__class__.__name__,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from dataclasses import dataclass
from typing import Callable, Iterable, List, Optional, Union

import pydantic
from pydantic import BaseModel
from pydantic import BaseModel, field_validator

import datahub.emitter.mce_builder as builder
from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
Expand Down Expand Up @@ -70,9 +69,15 @@ class CorpGroup(BaseModel):

_rename_admins_to_owners = pydantic_renamed_field("admins", "owners")

@pydantic.validator("owners", "members", each_item=True)
@field_validator("owners", "members")
@classmethod
def make_urn_if_needed(cls, v):
if isinstance(v, str):
if isinstance(v, list):
return [
builder.make_user_urn(item) if isinstance(item, str) else item
for item in v
]
elif isinstance(v, str):
return builder.make_user_urn(v)
return v

Expand Down
22 changes: 11 additions & 11 deletions metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dataclasses import dataclass
from typing import Callable, Iterable, List, Optional

import pydantic
from pydantic import model_validator

import datahub.emitter.mce_builder as builder
from datahub.configuration.common import ConfigModel
Expand Down Expand Up @@ -65,16 +65,16 @@ class CorpUser(ConfigModel):
picture_link: Optional[str] = None
phone: Optional[str] = None

@pydantic.validator("full_name", always=True)
def full_name_can_be_built_from_first_name_last_name(v, values):
if not v:
if "first_name" in values or "last_name" in values:
first_name = values.get("first_name") or ""
last_name = values.get("last_name") or ""
full_name = f"{first_name} {last_name}" if last_name else first_name
return full_name
else:
return v
@model_validator(mode="after")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

always=True -> Field(validate_default=True) as per the migration doc https://docs.pydantic.dev/latest/migration/#changes-to-validators

def full_name_can_be_built_from_first_name_last_name(self):
if not self.full_name:
if self.first_name or self.last_name:
first_name = self.first_name or ""
last_name = self.last_name or ""
self.full_name = (
f"{first_name} {last_name}" if last_name else first_name
)
return self

@property
def urn(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union

import pydantic
from pydantic import field_validator, model_validator
from ruamel.yaml import YAML
from typing_extensions import assert_never

Expand Down Expand Up @@ -71,7 +71,8 @@ class Ownership(ConfigModel):
id: str
type: str

@pydantic.validator("type")
@field_validator("type")
@classmethod
def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str:
_, _ = builder.validate_ownership_type(v)
return v
Expand Down Expand Up @@ -116,30 +117,49 @@ class DataProduct(ConfigModel):
output_ports: Optional[List[str]] = None
_original_yaml_dict: Optional[dict] = None

@pydantic.validator("assets", each_item=True)
def assets_must_be_urns(cls, v: str) -> str:
try:
Urn.from_string(v)
except Exception as e:
raise ValueError(f"asset {v} is not an urn: {e}") from e

return v

@pydantic.validator("output_ports", each_item=True)
def output_ports_must_be_urns(cls, v: str) -> str:
try:
Urn.create_from_string(v)
except Exception as e:
raise ValueError(f"Output port {v} is not an urn: {e}") from e
@field_validator("assets")
@classmethod
def assets_must_be_urns(cls, v):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing type hints for params and return types, similar with output_ports_must_be_urns

if isinstance(v, list):
for item in v:
try:
Urn.from_string(item)
except Exception as e:
raise ValueError(f"asset {item} is not an urn: {e}") from e
return v
else:
try:
Urn.from_string(v)
except Exception as e:
raise ValueError(f"asset {v} is not an urn: {e}") from e
return v

@field_validator("output_ports")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

each_item is deprecated -> the migration doc takes about Annotated metadata for better Type reuse. Not sure if we could apply here: https://docs.pydantic.dev/latest/migration/#changes-to-validators

@classmethod
def output_ports_must_be_urns(cls, v):
if v is not None:
if isinstance(v, list):
for item in v:
try:
Urn.create_from_string(item)
except Exception as e:
raise ValueError(
f"Output port {item} is not an urn: {e}"
) from e
else:
try:
Urn.create_from_string(v)
except Exception as e:
raise ValueError(f"Output port {v} is not an urn: {e}") from e
return v

@pydantic.validator("output_ports", each_item=True)
def output_ports_must_be_from_asset_list(cls, v: str, values: dict) -> str:
assets = values.get("assets", [])
if v not in assets:
raise ValueError(f"Output port {v} is not in asset list")
return v
@model_validator(mode="after")
def output_ports_must_be_from_asset_list(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should name the validator for better error messaging

if self.output_ports and self.assets:
for port in self.output_ports:
if port not in self.assets:
raise ValueError(f"Output port {port} is not in asset list")
return self

@property
def urn(self) -> str:
Expand Down Expand Up @@ -454,7 +474,7 @@ def _patch_ownership(
patches_add.append(new_owner)
else:
patches_add.append(
Ownership(id=new_owner, type=new_owner_type).dict()
Ownership(id=new_owner, type=new_owner_type).model_dump()
)

mutation_needed = bool(patches_replace or patches_drop or patches_add)
Expand Down Expand Up @@ -485,8 +505,8 @@ def patch_yaml(
raise Exception("Original Data Product was not loaded from yaml")

orig_dictionary = original_dataproduct._original_yaml_dict
original_dataproduct_dict = original_dataproduct.dict()
this_dataproduct_dict = self.dict()
original_dataproduct_dict = original_dataproduct.model_dump()
this_dataproduct_dict = self.model_dump()
for simple_field in ["display_name", "description", "external_url"]:
if original_dataproduct_dict.get(simple_field) != this_dataproduct_dict.get(
simple_field
Expand Down Expand Up @@ -566,7 +586,7 @@ def to_yaml(
yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip)
yaml.indent(mapping=2, sequence=4, offset=2)
yaml.default_flow_style = False
yaml.dump(self.dict(), fp)
yaml.dump(self.model_dump(), fp)

@staticmethod
def get_patch_builder(
Expand Down
41 changes: 25 additions & 16 deletions metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
BaseModel,
Field,
StrictStr,
root_validator,
validator,
field_validator,
model_validator,
)
from ruamel.yaml import YAML
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -213,13 +213,14 @@ def from_schema_field(
),
)

@validator("urn", pre=True, always=True)
def either_id_or_urn_must_be_filled_out(cls, v, values):
if not v and not values.get("id"):
@model_validator(mode="after")
def either_id_or_urn_must_be_filled_out(self) -> "SchemaFieldSpecification":
if not self.urn and not self.id:
raise ValueError("Either id or urn must be present")
return v
return self

@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def sync_doc_into_description(cls, values: Dict) -> Dict:
"""Synchronize doc into description field if doc is provided."""
description = values.get("description")
Expand Down Expand Up @@ -348,7 +349,8 @@ class SchemaSpecification(BaseModel):
fields: Optional[List[SchemaFieldSpecification]] = None
raw_schema: Optional[str] = None

@validator("file")
@field_validator("file")
@classmethod
def file_must_be_avsc(cls, v):
if v and not v.endswith(".avsc"):
raise ValueError("file must be a .avsc file")
Expand All @@ -359,7 +361,8 @@ class Ownership(ConfigModel):
id: str
type: str

@validator("type")
@field_validator("type")
@classmethod
def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str:
_, _ = validate_ownership_type(v)
return v
Expand Down Expand Up @@ -397,29 +400,35 @@ def platform_urn(self) -> str:
dataset_urn = DatasetUrn.from_string(self.urn)
return str(dataset_urn.get_data_platform_urn())

@validator("urn", pre=True, always=True)
def urn_must_be_present(cls, v, values):
@field_validator("urn", mode="before")
@classmethod
def urn_must_be_present(cls, v, info):
if not v:
values = info.data
assert "id" in values, "id must be present if urn is not"
assert "platform" in values, "platform must be present if urn is not"
assert "env" in values, "env must be present if urn is not"
return make_dataset_urn(values["platform"], values["id"], values["env"])
return v

@validator("name", pre=True, always=True)
def name_filled_with_id_if_not_present(cls, v, values):
@field_validator("name", mode="before")
@classmethod
def name_filled_with_id_if_not_present(cls, v, info):
if not v:
values = info.data
assert "id" in values, "id must be present if name is not"
return values["id"]
return v

@validator("platform")
@field_validator("platform")
@classmethod
def platform_must_not_be_urn(cls, v):
if v.startswith("urn:li:dataPlatform:"):
return v[len("urn:li:dataPlatform:") :]
return v

@validator("structured_properties")
@field_validator("structured_properties")
@classmethod
def simplify_structured_properties(cls, v):
return StructuredPropertiesHelper.simplify_structured_properties_list(v)

Expand Down Expand Up @@ -461,7 +470,7 @@ def from_yaml(cls, file: str) -> Iterable["Dataset"]:
if isinstance(datasets, dict):
datasets = [datasets]
for dataset_raw in datasets:
dataset = Dataset.parse_obj(dataset_raw)
dataset = Dataset.model_validate(dataset_raw)
# dataset = Dataset.model_validate(dataset_raw, strict=True)
yield dataset

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
from typing import Any, Dict, Optional

from pydantic import validator
from pydantic import field_validator
from typing_extensions import ClassVar

from datahub.api.entities.external.external_tag import ExternalTag
Expand Down Expand Up @@ -50,11 +50,10 @@ class LakeFormationTag(ExternalTag):
value: Optional[LakeFormationTagValueText] = None
catalog: Optional[str] = None

# Pydantic v1 validators
@validator("key", pre=True)
@field_validator("key", mode="before")
@classmethod
def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
"""Validate and convert key field for Pydantic v1."""
"""Validate and convert key field."""
if isinstance(v, LakeFormationTagKeyText):
return v

Expand All @@ -64,10 +63,10 @@ def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:

return LakeFormationTagKeyText(raw_text=v)

@validator("value", pre=True)
@field_validator("value", mode="before")
@classmethod
def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]:
"""Validate and convert value field for Pydantic v1."""
"""Validate and convert value field."""
if v is None:
return None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
from typing import Any, Dict, Optional, Set

# Import validator for Pydantic v1 (always needed since we removed conditional logic)
from pydantic import validator
from pydantic import field_validator
from typing_extensions import ClassVar

from datahub.api.entities.external.external_tag import ExternalTag
Expand Down Expand Up @@ -62,11 +61,10 @@ class UnityCatalogTag(ExternalTag):
key: UnityCatalogTagKeyText
value: Optional[UnityCatalogTagValueText] = None

# Pydantic v1 validators
@validator("key", pre=True)
@field_validator("key", mode="before")
@classmethod
def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
"""Validate and convert key field for Pydantic v1."""
"""Validate and convert key field."""
if isinstance(v, UnityCatalogTagKeyText):
return v

Expand All @@ -76,10 +74,10 @@ def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:

return UnityCatalogTagKeyText(raw_text=v)

@validator("value", pre=True)
@field_validator("value", mode="before")
@classmethod
def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
"""Validate and convert value field for Pydantic v1."""
"""Validate and convert value field."""
if v is None:
return None

Expand Down
Loading
Loading