1
1
import re
2
2
from functools import partial
3
- from typing import Any , Dict , List , Optional , Union
3
+ from typing import Any , Dict , List
4
4
5
5
import yaml
6
6
from pydantic import (
@@ -45,12 +45,10 @@ def __eq__(self, other):
45
45
46
46
class Column (BaseModel ):
47
47
name : str = Field (..., description = "Name of the column." )
48
- type : Optional [str ] = Field (None , description = "Data type of the column." )
49
- description : Optional [str ] = Field (None , description = "Description of the column" )
50
- expression : Optional [str ] = Field (
51
- None , description = "Aggregation expression (avg, min, max, sum)"
52
- )
53
- alias : Optional [str ] = Field (None , description = "Alias for the column" )
48
+ type : str | None = Field (None , description = "Data type of the column." )
49
+ description : str | None = Field (None , description = "Description of the column" )
50
+ expression : str | None = Field (None , description = "Aggregation expression (avg, min, max, sum)" )
51
+ alias : str | None = Field (None , description = "Alias for the column" )
54
52
55
53
@field_validator ("type" )
56
54
@classmethod
@@ -63,7 +61,9 @@ def is_column_type_supported(cls, type: str) -> str:
63
61
64
62
@field_validator ("expression" )
65
63
@classmethod
66
- def is_expression_valid (cls , expr : str ) -> str :
64
+ def is_expression_valid (cls , expr : str ) -> str | None :
65
+ if expr is None :
66
+ return expr
67
67
try :
68
68
parse_one (expr )
69
69
return expr
@@ -72,87 +72,83 @@ def is_expression_valid(cls, expr: str) -> str:
72
72
73
73
74
74
class Relation (BaseModel ):
75
- name : Optional [str ] = Field (None , description = "Name of the relationship." )
76
- description : Optional [str ] = Field (
77
- None , description = "Description of the relationship."
78
- )
79
- from_ : str = Field (
80
- ..., alias = "from" , description = "Source column for the relationship."
81
- )
75
+ name : str | None = Field (None , description = "Name of the relationship." )
76
+ description : str | None = Field (None , description = "Description of the relationship." )
77
+ from_ : str = Field (..., alias = "from" , description = "Source column for the relationship." )
82
78
to : str = Field (..., description = "Target column for the relationship." )
83
79
84
80
85
81
class TransformationParams (BaseModel ):
86
- column : Optional [ str ] = Field (None , description = "Column to transform" )
87
- value : Optional [ Union [ str , int , float , bool ]] = Field (
82
+ column : str | None = Field (None , description = "Column to transform" )
83
+ value : str | int | float | bool | None = Field (
88
84
None , description = "Value for fill_na and other transformations"
89
85
)
90
- mapping : Optional [ Dict [str , str ]] = Field (
86
+ mapping : Dict [str , str ] | None = Field (
91
87
None , description = "Mapping dictionary for map_values transformation"
92
88
)
93
- format : Optional [ str ] = Field (None , description = "Format string for date formatting" )
94
- decimals : Optional [ int ] = Field (
89
+ format : str | None = Field (None , description = "Format string for date formatting" )
90
+ decimals : int | None = Field (
95
91
None , description = "Number of decimal places for rounding"
96
92
)
97
- factor : Optional [ Union [ int , float ]] = Field (None , description = "Scaling factor" )
98
- to_tz : Optional [ str ] = Field (None , description = "Target timezone or format" )
99
- from_tz : Optional [ str ] = Field (None , description = "From timezone or format" )
100
- errors : Optional [ str ] = Field (
93
+ factor : int | float | None = Field (None , description = "Scaling factor" )
94
+ to_tz : str | None = Field (None , description = "Target timezone or format" )
95
+ from_tz : str | None = Field (None , description = "From timezone or format" )
96
+ errors : str | None = Field (
101
97
None , description = "Error handling mode for numeric/datetime conversion"
102
98
)
103
- old_value : Optional [ Any ] = Field (
99
+ old_value : Any | None = Field (
104
100
None , description = "Old value for replace transformation"
105
101
)
106
- new_value : Optional [ Any ] = Field (
102
+ new_value : Any | None = Field (
107
103
None , description = "New value for replace transformation"
108
104
)
109
- new_name : Optional [ str ] = Field (
105
+ new_name : str | None = Field (
110
106
None , description = "New name for column in rename transformation"
111
107
)
112
- pattern : Optional [ str ] = Field (
108
+ pattern : str | None = Field (
113
109
None , description = "Pattern for extract transformation"
114
110
)
115
- length : Optional [ int ] = Field (
111
+ length : int | None = Field (
116
112
None , description = "Length for truncate transformation"
117
113
)
118
- add_ellipsis : Optional [ bool ] = Field (
114
+ add_ellipsis : bool | None = Field (
119
115
True , description = "Whether to add ellipsis in truncate"
120
116
)
121
- width : Optional [ int ] = Field (None , description = "Width for pad transformation" )
122
- side : Optional [ str ] = Field ("left" , description = "Side for pad transformation" )
123
- pad_char : Optional [ str ] = Field (" " , description = "Character for pad transformation" )
124
- lower : Optional [ Union [ int , float ]] = Field (None , description = "Lower bound for clip" )
125
- upper : Optional [ Union [ int , float ]] = Field (None , description = "Upper bound for clip" )
126
- bins : Optional [ Union [ int , List [Union [ int , float ]]]] = Field (
117
+ width : int | None = Field (None , description = "Width for pad transformation" )
118
+ side : str | None = Field ("left" , description = "Side for pad transformation" )
119
+ pad_char : str | None = Field (" " , description = "Character for pad transformation" )
120
+ lower : int | float | None = Field (None , description = "Lower bound for clip" )
121
+ upper : int | float | None = Field (None , description = "Upper bound for clip" )
122
+ bins : int | List [int | float ] | None = Field (
127
123
None , description = "Bins for binning"
128
124
)
129
- labels : Optional [ List [str ]] = Field (None , description = "Labels for bins" )
130
- drop_first : Optional [ bool ] = Field (
125
+ labels : List [str ] | None = Field (None , description = "Labels for bins" )
126
+ drop_first : bool | None = Field (
131
127
True , description = "Whether to drop first category in encoding"
132
128
)
133
- drop_invalid : Optional [ bool ] = Field (
129
+ drop_invalid : bool | None = Field (
134
130
False , description = "Whether to drop invalid values"
135
131
)
136
- start_date : Optional [ str ] = Field (
132
+ start_date : str | None = Field (
137
133
None , description = "Start date for date range validation"
138
134
)
139
- end_date : Optional [ str ] = Field (
135
+ end_date : str | None = Field (
140
136
None , description = "End date for date range validation"
141
137
)
142
- country_code : Optional [ str ] = Field (
138
+ country_code : str | None = Field (
143
139
"+1" , description = "Country code for phone normalization"
144
140
)
145
- columns : Optional [ List [str ]] = Field (
141
+ columns : List [str ] | None = Field (
146
142
None , description = "List of columns for multi-column operations"
147
143
)
148
- keep : Optional [ str ] = Field ("first" , description = "Which duplicates to keep" )
149
- ref_table : Optional [ Any ] = Field (
144
+ keep : str | None = Field ("first" , description = "Which duplicates to keep" )
145
+ ref_table : Any | None = Field (
150
146
None , description = "Reference DataFrame for foreign key validation"
151
147
)
152
- ref_column : Optional [ str ] = Field (
148
+ ref_column : str | None = Field (
153
149
None , description = "Reference column for foreign key validation"
154
150
)
155
- drop_negative : Optional [ bool ] = Field (
151
+ drop_negative : bool | None = Field (
156
152
False , description = "Whether to drop negative values"
157
153
)
158
154
@@ -172,7 +168,7 @@ def validate_required_params(cls, values: dict) -> dict:
172
168
173
169
class Transformation (BaseModel ):
174
170
type : str = Field (..., description = "Type of transformation to be applied." )
175
- params : Optional [ TransformationParams ] = Field (
171
+ params : TransformationParams | None = Field (
176
172
None , description = "Parameters for the transformation."
177
173
)
178
174
@@ -195,11 +191,11 @@ def set_transform_type(cls, values: dict) -> dict:
195
191
196
192
class Source (BaseModel ):
197
193
type : str = Field (..., description = "Type of the data source." )
198
- path : Optional [ str ] = Field (None , description = "Path of the local data source." )
199
- connection : Optional [ SQLConnectionConfig ] = Field (
194
+ path : str | None = Field (None , description = "Path of the local data source." )
195
+ connection : SQLConnectionConfig | None = Field (
200
196
None , description = "Connection object of the data source."
201
197
)
202
- table : Optional [ str ] = Field (None , description = "Table of the data source." )
198
+ table : str | None = Field (None , description = "Table of the data source." )
203
199
204
200
def is_compatible_source (self , source2 : "Source" ):
205
201
"""
@@ -267,33 +263,33 @@ def is_format_supported(cls, format: str) -> str:
267
263
268
264
class SemanticLayerSchema (BaseModel ):
269
265
name : str = Field (..., description = "Dataset name." )
270
- source : Optional [ Source ] = Field (None , description = "Data source for your dataset." )
271
- view : Optional [ bool ] = Field (None , description = "Whether table is a view" )
272
- description : Optional [ str ] = Field (
266
+ source : Source | None = Field (None , description = "Data source for your dataset." )
267
+ view : bool | None = Field (None , description = "Whether table is a view" )
268
+ description : str | None = Field (
273
269
None , description = "Dataset’s contents and purpose description."
274
270
)
275
- columns : Optional [ List [Column ]] = Field (
271
+ columns : List [Column ] | None = Field (
276
272
None , description = "Structure and metadata of your dataset’s columns"
277
273
)
278
- relations : Optional [ List [Relation ]] = Field (
274
+ relations : List [Relation ] | None = Field (
279
275
None , description = "Relationships between columns and tables."
280
276
)
281
- order_by : Optional [ List [str ]] = Field (
277
+ order_by : List [str ] | None = Field (
282
278
None , description = "Ordering criteria for the dataset."
283
279
)
284
- limit : Optional [ int ] = Field (
280
+ limit : int | None = Field (
285
281
None , description = "Maximum number of records to retrieve."
286
282
)
287
- transformations : Optional [ List [Transformation ]] = Field (
283
+ transformations : List [Transformation ] | None = Field (
288
284
None , description = "List of transformations to apply to the data."
289
285
)
290
- destination : Optional [ Destination ] = Field (
286
+ destination : Destination | None = Field (
291
287
None , description = "Destination for saving the dataset."
292
288
)
293
- update_frequency : Optional [ str ] = Field (
289
+ update_frequency : str | None = Field (
294
290
None , description = "Frequency of dataset updates."
295
291
)
296
- group_by : Optional [ List [str ]] = Field (
292
+ group_by : List [str ] | None = Field (
297
293
None ,
298
294
description = "List of columns to group by. Every non-aggregated column must be included in group_by." ,
299
295
)
0 commit comments