This repository was archived by the owner on Nov 10, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 24
Block design #24
Merged
Merged
Block design #24
Changes from all commits
Commits
Show all changes
46 commits
Select commit
Hold shift + click to select a range
72a2bd9
updating blocks
6d6f4d9
merge with main
7b2d811
update all pub databuilders
4ee38bb
caching llm
aea2444
updating with main
d859057
adding compatibility_tests
b84e7de
merge main
3e1fbc0
template update
aec74ff
template update
8a215de
rm import
52d26f6
remove old return type
94ffa99
add parquet saving / loading
a19116f
adding utility block
e48ec42
adding utility block
0ebe2a0
rm block suffix
1d654dd
remove config argument
6d51079
demonstrate default vals
0d17bb6
demonstrate default vals
fba82b6
remove abstract method to simplify
e7db4dd
remove abstract method to simplify
af01c4e
misc minor changes
a94c7eb
call to generate
d06a26d
call to generate
b7fcb59
non base functions
8b03d98
merge with main
cb29984
registry change
b49ffa1
genai req bug fix
c7a49a6
Update fms_dgt/base/block.py
mvcrouse 80b426f
Update fms_dgt/base/block.py
mvcrouse e2bc1dc
Update fms_dgt/base/block.py
mvcrouse 79278fd
Update fms_dgt/base/block.py
mvcrouse fed9702
throw type error
030caad
Merge branch 'block_design' of github.com:mvcrouse/fms-sdg into block…
ed9e13b
instance methods
f78b81d
dataset type
13701af
dataset type
0953d9b
dataset type
e1a5c23
Update fms_dgt/base/block.py
mvcrouse fb48faf
Update fms_dgt/base/block.py
mvcrouse b4088ce
Update fms_dgt/base/block.py
mvcrouse 64f971a
Update fms_dgt/base/block.py
mvcrouse 41c9144
fixing base block class
6f5631a
consistency
41009e9
removing empty classes
7fb7f67
make blocks a list, easier for duplicate checking
63d4ea6
simpler type check
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| # Standard | ||
| from abc import ABC, abstractmethod | ||
| from typing import Any, Dict, Iterable, List, Optional, Union | ||
|
|
||
| # Third Party | ||
| from datasets import Dataset | ||
| import pandas as pd | ||
|
|
||
| DATASET_ROW_TYPE = Union[Dict[str, Any], pd.Series] | ||
| DATASET_TYPE = Union[Iterable[DATASET_ROW_TYPE], pd.DataFrame, Dataset] | ||
|
|
||
|
|
||
| class BaseBlock(ABC): | ||
| """Base Class for all Blocks""" | ||
|
|
||
| def __init__( | ||
| self, | ||
| name: str = None, | ||
| arg_fields: Optional[List[str]] = None, | ||
| kwarg_fields: Optional[List[str]] = None, | ||
| result_field: Optional[str] = None, | ||
| ) -> None: | ||
|
|
||
| if not isinstance(arg_fields, (list, type(None))): | ||
| raise TypeError("arg_fields must be of type 'list'") | ||
| if not isinstance(kwarg_fields, (list, type(None))): | ||
| raise TypeError("kwarg_fields must be of type 'list'") | ||
| if not isinstance(result_field, (str, type(None))): | ||
| raise TypeError("result_field must be of type 'str'") | ||
|
|
||
| self._name = name | ||
|
|
||
| self._arg_fields = arg_fields | ||
| self._kwarg_fields = kwarg_fields | ||
| self._result_field = result_field | ||
|
|
||
| @property | ||
| def name(self): | ||
| return self._name | ||
|
|
||
| @property | ||
| def arg_fields(self): | ||
| return self._arg_fields | ||
|
|
||
| @property | ||
| def kwarg_fields(self): | ||
| return self._kwarg_fields | ||
|
|
||
| @property | ||
| def result_field(self): | ||
| return self._result_field | ||
|
|
||
| def get_args_kwargs( | ||
| self, | ||
| inp: DATASET_ROW_TYPE, | ||
| arg_fields: Optional[List[str]] = None, | ||
| kwarg_fields: Optional[List[str]] = None, | ||
| ): | ||
|
|
||
| arg_fields = arg_fields or self.arg_fields or [] | ||
| kwarg_fields = kwarg_fields or self.kwarg_fields or [] | ||
|
|
||
| if isinstance(inp, (dict, pd.DataFrame, Dataset)): | ||
| return ( | ||
| [inp.get(arg) for arg in arg_fields], | ||
| {kwarg: inp.get(kwarg) for kwarg in kwarg_fields}, | ||
| ) | ||
| raise TypeError(f"Unexpected input type: {type(inp)}") | ||
|
|
||
| def write_result( | ||
| self, | ||
| inp: DATASET_ROW_TYPE, | ||
| res: Any, | ||
| result_field: Optional[str] = None, | ||
| ): | ||
| result_field = result_field or self.result_field | ||
|
|
||
| assert result_field is not None, "Result field cannot be None!" | ||
|
|
||
| if isinstance(inp, (dict, pd.DataFrame, Dataset)): | ||
| inp[result_field] = res | ||
| return | ||
|
|
||
| raise TypeError(f"Unexpected input type: {type(inp)}") | ||
|
|
||
| @abstractmethod | ||
| def generate( | ||
| self, | ||
| inputs: DATASET_TYPE, | ||
| *, | ||
| arg_fields: Optional[List[str]] = None, | ||
| kwarg_fields: Optional[List[str]] = None, | ||
| result_field: Optional[str] = None, | ||
| **kwargs, | ||
| ): | ||
| """The generate function is the primary interface to a Block | ||
|
|
||
| args: | ||
| inputs (BLOCK_INPUT_TYPE): A block operates over a logical iterable | ||
| of rows with named columns (see BLOCK_INPUT_TYPE) | ||
|
|
||
| kwargs: | ||
| arg_fields (Optional[List[str]]): Names of fields within the rows of | ||
| the inputs that should be extracted and passed as positional | ||
| args to the underlying implementation methods. | ||
| kwarg_fields (Optional[List[str]]): Names of fields within the rows | ||
| of the inputs that should be extracted and passed as keyword | ||
| args to the underlying implementation methods. | ||
| **kwargs: Additional keyword args that may be passed to the derived | ||
| block's generate function | ||
| """ | ||
|
|
||
|
|
||
| class BaseValidatorBlock(BaseBlock): | ||
| def __init__(self, filter: bool = False, **kwargs: Any) -> None: | ||
| super().__init__(**kwargs) | ||
| self._filter_invalids = filter | ||
|
|
||
| def generate( | ||
| self, | ||
| inputs: DATASET_TYPE, | ||
| *, | ||
| arg_fields: Optional[List[str]] = None, | ||
mvcrouse marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| kwarg_fields: Optional[List[str]] = None, | ||
| result_field: Optional[List[str]] = None, | ||
| ): | ||
| outputs = [] | ||
| for x in inputs: | ||
| inp_args, inp_kwargs = self.get_args_kwargs(x, arg_fields, kwarg_fields) | ||
| res = self._validate(*inp_args, **inp_kwargs) | ||
| if res or not self._filter_invalids: | ||
| self.write_result(x, res, result_field) | ||
| outputs.append(x) | ||
| return outputs | ||
|
|
||
| @abstractmethod | ||
| def _validate(self, *args: Any, **kwargs: Any) -> bool: | ||
| """Derived validators must implement _validate with their core logic""" | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.