From 215fee82fa03ee9a221a773c20e6d0a5f8c51dcf Mon Sep 17 00:00:00 2001 From: SubhadityaMukherjee Date: Thu, 6 Feb 2025 19:06:47 +0100 Subject: [PATCH] defaults are set to dataframe as part of first step towards issue #1115 --- openml/datasets/functions.py | 10 +++++----- openml/evaluations/functions.py | 10 +++++----- openml/flows/functions.py | 10 +++++----- openml/runs/functions.py | 10 +++++----- openml/setups/functions.py | 12 ++++++------ openml/tasks/functions.py | 10 +++++----- openml/tasks/task.py | 4 ++-- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 3f3c709f9..c32373079 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -120,7 +120,7 @@ def list_datasets( size: int | None = None, status: str | None = None, tag: str | None = None, - output_format: Literal["dataframe", "dict"] = "dict", + output_format: Literal["dataframe", "dict"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -141,7 +141,7 @@ def list_datasets( default active datasets are returned, but also datasets from another status can be requested. tag : str, optional - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -219,7 +219,7 @@ def _list_datasets( def _list_datasets( data_id: list | None = None, - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -234,7 +234,7 @@ def _list_datasets( data_id : list, optional - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -267,7 +267,7 @@ def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.Da def __list_datasets( api_call: str, - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", ) -> dict | pd.DataFrame: xml_string = openml._api_calls._perform_api_call(api_call, "get") datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",)) diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index a39096a58..a61a47704 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -66,7 +66,7 @@ def list_evaluations( study: int | None = None, per_fold: bool | None = None, sort_order: str | None = None, - output_format: Literal["object", "dict", "dataframe"] = "object", + output_format: Literal["object", "dict", "dataframe"] = "dataframe", ) -> dict | pd.DataFrame: """ List all run-evaluation pairs matching all of the given filters. @@ -102,7 +102,7 @@ def list_evaluations( sort_order : str, optional order of sorting evaluations, ascending ("asc") or descending ("desc") - output_format: str, optional (default='object') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'object' the output is a dict of OpenMLEvaluation objects - If 'dict' the output is a dict of dict @@ -157,7 +157,7 @@ def _list_evaluations( uploaders: list | None = None, study: int | None = None, sort_order: str | None = None, - output_format: Literal["object", "dict", "dataframe"] = "object", + output_format: Literal["object", "dict", "dataframe"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -190,7 +190,7 @@ def _list_evaluations( sort_order : str, optional order of sorting evaluations, ascending ("asc") or descending ("desc") - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict The parameter decides the format of the output. @@ -226,7 +226,7 @@ def _list_evaluations( def __list_evaluations( api_call: str, - output_format: Literal["object", "dict", "dataframe"] = "object", + output_format: Literal["object", "dict", "dataframe"] = "dataframe", ) -> dict | pd.DataFrame: """Helper function to parse API calls which are lists of runs""" xml_string = openml._api_calls._perform_api_call(api_call, "get") diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 3d056ac60..257d57c91 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -168,7 +168,7 @@ def list_flows( offset: int | None = None, size: int | None = None, tag: str | None = None, - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -183,7 +183,7 @@ def list_flows( the maximum number of flows to return tag : str, optional the tag to include - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -252,14 +252,14 @@ def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFr def _list_flows( - output_format: Literal["dict", "dataframe"] = "dict", **kwargs: Any + output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any ) -> dict | pd.DataFrame: """ Perform the api call that return a list of all flows. Parameters ---------- - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -393,7 +393,7 @@ def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataF def __list_flows( - api_call: str, output_format: Literal["dict", "dataframe"] = "dict" + api_call: str, output_format: Literal["dict", "dataframe"] = "dataframe" ) -> dict | pd.DataFrame: """Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame. diff --git a/openml/runs/functions.py b/openml/runs/functions.py index b6f950020..db95b5ab3 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -1063,7 +1063,7 @@ def list_runs( # noqa: PLR0913 tag: str | None = None, study: int | None = None, display_errors: bool = False, # noqa: FBT001, FBT002 - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -1095,7 +1095,7 @@ def list_runs( # noqa: PLR0913 Whether to list runs which have an error (for example a missing prediction file). - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -1156,7 +1156,7 @@ def _list_runs( # noqa: PLR0913 uploader: list | None = None, study: int | None = None, display_errors: bool = False, # noqa: FBT002, FBT001 - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -1186,7 +1186,7 @@ def _list_runs( # noqa: PLR0913 Whether to list runs which have an error (for example a missing prediction file). - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -1221,7 +1221,7 @@ def _list_runs( # noqa: PLR0913 def __list_runs( - api_call: str, output_format: Literal["dict", "dataframe"] = "dict" + api_call: str, output_format: Literal["dict", "dataframe"] = "dataframe" ) -> dict | pd.DataFrame: """Helper function to parse API calls which are lists of runs""" xml_string = openml._api_calls._perform_api_call(api_call, "get") diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 877384636..c37794d6b 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -134,7 +134,7 @@ def list_setups( # noqa: PLR0913 flow: int | None = None, tag: str | None = None, setup: Iterable[int] | None = None, - output_format: Literal["object", "dict", "dataframe"] = "object", + output_format: Literal["object", "dict", "dataframe"] = "dataframe", ) -> dict | pd.DataFrame: """ List all setups matching all of the given filters. @@ -146,7 +146,7 @@ def list_setups( # noqa: PLR0913 flow : int, optional tag : str, optional setup : Iterable[int], optional - output_format: str, optional (default='object') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -184,7 +184,7 @@ def list_setups( # noqa: PLR0913 def _list_setups( setup: Iterable[int] | None = None, - output_format: Literal["dict", "dataframe", "object"] = "object", + output_format: Literal["dict", "dataframe", "object"] = "dataframe", **kwargs: Any, ) -> dict[int, dict] | pd.DataFrame | dict[int, OpenMLSetup]: """ @@ -197,7 +197,7 @@ def _list_setups( setup : list(int), optional - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -221,7 +221,7 @@ def _list_setups( def __list_setups( - api_call: str, output_format: Literal["dict", "dataframe", "object"] = "object" + api_call: str, output_format: Literal["dict", "dataframe", "object"] = "dataframe" ) -> dict[int, dict] | pd.DataFrame | dict[int, OpenMLSetup]: """Helper function to parse API calls which are lists of setups""" xml_string = openml._api_calls._perform_api_call(api_call, "get") @@ -328,7 +328,7 @@ def _to_dict( def _create_setup_from_xml( - result_dict: dict, output_format: Literal["dict", "dataframe", "object"] = "object" + result_dict: dict, output_format: Literal["dict", "dataframe", "object"] = "dataframe" ) -> OpenMLSetup | dict[str, int | dict[int, Any] | None]: """Turns an API xml result into a OpenMLSetup object (or dict)""" if output_format in ["dataframe", "dict"]: diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index 54030422d..6db9bfad4 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -131,7 +131,7 @@ def list_tasks( offset: int | None = None, size: int | None = None, tag: str | None = None, - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -150,7 +150,7 @@ def list_tasks( the maximum number of tasks to show tag : str, optional the tag to include - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -197,7 +197,7 @@ def list_tasks( def _list_tasks( task_type: TaskType | None = None, - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any, ) -> dict | pd.DataFrame: """ @@ -210,7 +210,7 @@ def _list_tasks( type when used as a filter in list tasks call. task_type : TaskType, optional Refers to the type of task. - output_format: str, optional (default='dict') + output_format: str, optional (default='dataframe') The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame @@ -238,7 +238,7 @@ def _list_tasks( # TODO(eddiebergman): overload todefine type returned def __list_tasks( # noqa: PLR0912, C901 api_call: str, - output_format: Literal["dict", "dataframe"] = "dict", + output_format: Literal["dict", "dataframe"] = "dataframe", ) -> dict | pd.DataFrame: """Returns a dictionary or a Pandas DataFrame with information about OpenML tasks. diff --git a/openml/tasks/task.py b/openml/tasks/task.py index e7d19bdce..e8bef1ffd 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -295,7 +295,7 @@ def get_X_and_y( # TODO(eddiebergman): Do all OpenMLSupervisedTask have a `y`? def get_X_and_y( - self, dataset_format: Literal["dataframe", "array"] = "array" + self, dataset_format: Literal["dataframe", "array"] = "dataframe" ) -> tuple[ np.ndarray | pd.DataFrame | scipy.sparse.spmatrix, np.ndarray | pd.Series | pd.DataFrame | None, @@ -547,7 +547,7 @@ def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: ... def get_X( self, - dataset_format: Literal["array", "dataframe"] = "array", + dataset_format: Literal["array", "dataframe"] = "dataframe", ) -> np.ndarray | pd.DataFrame | scipy.sparse.spmatrix: """Get data associated with the current task.