defaults are set to dataframe as part of first step towards issue openml#1115

SubhadityaMukherjee · SubhadityaMukherjee · commit 215fee82fa03 · 2025-02-06T19:06:47.000+01:00
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -120,7 +120,7 @@ def list_datasets(
     size: int | None = None,
     status: str | None = None,
     tag: str | None = None,
-    output_format: Literal["dataframe", "dict"] = "dict",
+    output_format: Literal["dataframe", "dict"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -141,7 +141,7 @@ def list_datasets(
         default active datasets are returned, but also datasets
         from another status can be requested.
     tag : str, optional
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -219,7 +219,7 @@ def _list_datasets(
 
 def _list_datasets(
     data_id: list | None = None,
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -234,7 +234,7 @@ def _list_datasets(
 
     data_id : list, optional
 
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -267,7 +267,7 @@ def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.Da
 
 def __list_datasets(
     api_call: str,
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
 ) -> dict | pd.DataFrame:
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",))
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -66,7 +66,7 @@ def list_evaluations(
     study: int | None = None,
     per_fold: bool | None = None,
     sort_order: str | None = None,
-    output_format: Literal["object", "dict", "dataframe"] = "object",
+    output_format: Literal["object", "dict", "dataframe"] = "dataframe",
 ) -> dict | pd.DataFrame:
     """
     List all run-evaluation pairs matching all of the given filters.
@@ -102,7 +102,7 @@ def list_evaluations(
     sort_order : str, optional
        order of sorting evaluations, ascending ("asc") or descending ("desc")
 
-    output_format: str, optional (default='object')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'object' the output is a dict of OpenMLEvaluation objects
         - If 'dict' the output is a dict of dict
@@ -157,7 +157,7 @@ def _list_evaluations(
     uploaders: list | None = None,
     study: int | None = None,
     sort_order: str | None = None,
-    output_format: Literal["object", "dict", "dataframe"] = "object",
+    output_format: Literal["object", "dict", "dataframe"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -190,7 +190,7 @@ def _list_evaluations(
     sort_order : str, optional
         order of sorting evaluations, ascending ("asc") or descending ("desc")
 
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         The parameter decides the format of the output.
@@ -226,7 +226,7 @@ def _list_evaluations(
 
 def __list_evaluations(
     api_call: str,
-    output_format: Literal["object", "dict", "dataframe"] = "object",
+    output_format: Literal["object", "dict", "dataframe"] = "dataframe",
 ) -> dict | pd.DataFrame:
     """Helper function to parse API calls which are lists of runs"""
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
@@ -168,7 +168,7 @@ def list_flows(
     offset: int | None = None,
     size: int | None = None,
     tag: str | None = None,
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -183,7 +183,7 @@ def list_flows(
         the maximum number of flows to return
     tag : str, optional
         the tag to include
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -252,14 +252,14 @@ def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFr
 
 
 def _list_flows(
-    output_format: Literal["dict", "dataframe"] = "dict", **kwargs: Any
+    output_format: Literal["dict", "dataframe"] = "dataframe", **kwargs: Any
 ) -> dict | pd.DataFrame:
     """
     Perform the api call that return a list of all flows.
 
     Parameters
     ----------
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -393,7 +393,7 @@ def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataF
 
 
 def __list_flows(
-    api_call: str, output_format: Literal["dict", "dataframe"] = "dict"
+    api_call: str, output_format: Literal["dict", "dataframe"] = "dataframe"
 ) -> dict | pd.DataFrame:
     """Retrieve information about flows from OpenML API
     and parse it to a dictionary or a Pandas DataFrame.
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -1063,7 +1063,7 @@ def list_runs(  # noqa: PLR0913
     tag: str | None = None,
     study: int | None = None,
     display_errors: bool = False,  # noqa: FBT001, FBT002
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -1095,7 +1095,7 @@ def list_runs(  # noqa: PLR0913
         Whether to list runs which have an error (for example a missing
         prediction file).
 
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -1156,7 +1156,7 @@ def _list_runs(  # noqa: PLR0913
     uploader: list | None = None,
     study: int | None = None,
     display_errors: bool = False,  # noqa: FBT002, FBT001
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -1186,7 +1186,7 @@ def _list_runs(  # noqa: PLR0913
         Whether to list runs which have an error (for example a missing
         prediction file).
 
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -1221,7 +1221,7 @@ def _list_runs(  # noqa: PLR0913
 
 
 def __list_runs(
-    api_call: str, output_format: Literal["dict", "dataframe"] = "dict"
+    api_call: str, output_format: Literal["dict", "dataframe"] = "dataframe"
 ) -> dict | pd.DataFrame:
     """Helper function to parse API calls which are lists of runs"""
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
@@ -134,7 +134,7 @@ def list_setups(  # noqa: PLR0913
     flow: int | None = None,
     tag: str | None = None,
     setup: Iterable[int] | None = None,
-    output_format: Literal["object", "dict", "dataframe"] = "object",
+    output_format: Literal["object", "dict", "dataframe"] = "dataframe",
 ) -> dict | pd.DataFrame:
     """
     List all setups matching all of the given filters.
@@ -146,7 +146,7 @@ def list_setups(  # noqa: PLR0913
     flow : int, optional
     tag : str, optional
     setup : Iterable[int], optional
-    output_format: str, optional (default='object')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -184,7 +184,7 @@ def list_setups(  # noqa: PLR0913
 
 def _list_setups(
     setup: Iterable[int] | None = None,
-    output_format: Literal["dict", "dataframe", "object"] = "object",
+    output_format: Literal["dict", "dataframe", "object"] = "dataframe",
     **kwargs: Any,
 ) -> dict[int, dict] | pd.DataFrame | dict[int, OpenMLSetup]:
     """
@@ -197,7 +197,7 @@ def _list_setups(
 
     setup : list(int), optional
 
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -221,7 +221,7 @@ def _list_setups(
 
 
 def __list_setups(
-    api_call: str, output_format: Literal["dict", "dataframe", "object"] = "object"
+    api_call: str, output_format: Literal["dict", "dataframe", "object"] = "dataframe"
 ) -> dict[int, dict] | pd.DataFrame | dict[int, OpenMLSetup]:
     """Helper function to parse API calls which are lists of setups"""
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
@@ -328,7 +328,7 @@ def _to_dict(
 
 
 def _create_setup_from_xml(
-    result_dict: dict, output_format: Literal["dict", "dataframe", "object"] = "object"
+    result_dict: dict, output_format: Literal["dict", "dataframe", "object"] = "dataframe"
 ) -> OpenMLSetup | dict[str, int | dict[int, Any] | None]:
     """Turns an API xml result into a OpenMLSetup object (or dict)"""
     if output_format in ["dataframe", "dict"]:
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -131,7 +131,7 @@ def list_tasks(
     offset: int | None = None,
     size: int | None = None,
     tag: str | None = None,
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -150,7 +150,7 @@ def list_tasks(
         the maximum number of tasks to show
     tag : str, optional
         the tag to include
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -197,7 +197,7 @@ def list_tasks(
 
 def _list_tasks(
     task_type: TaskType | None = None,
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
     **kwargs: Any,
 ) -> dict | pd.DataFrame:
     """
@@ -210,7 +210,7 @@ def _list_tasks(
     type when used as a filter in list tasks call.
     task_type : TaskType, optional
         Refers to the type of task.
-    output_format: str, optional (default='dict')
+    output_format: str, optional (default='dataframe')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
@@ -238,7 +238,7 @@ def _list_tasks(
 # TODO(eddiebergman): overload todefine type returned
 def __list_tasks(  # noqa: PLR0912, C901
     api_call: str,
-    output_format: Literal["dict", "dataframe"] = "dict",
+    output_format: Literal["dict", "dataframe"] = "dataframe",
 ) -> dict | pd.DataFrame:
     """Returns a dictionary or a Pandas DataFrame with information about OpenML tasks.
 
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -295,7 +295,7 @@ def get_X_and_y(
 
     # TODO(eddiebergman): Do all OpenMLSupervisedTask have a `y`?
     def get_X_and_y(
-        self, dataset_format: Literal["dataframe", "array"] = "array"
+        self, dataset_format: Literal["dataframe", "array"] = "dataframe"
     ) -> tuple[
         np.ndarray | pd.DataFrame | scipy.sparse.spmatrix,
         np.ndarray | pd.Series | pd.DataFrame | None,
@@ -547,7 +547,7 @@ def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
     def get_X(
         self,
-        dataset_format: Literal["array", "dataframe"] = "array",
+        dataset_format: Literal["array", "dataframe"] = "dataframe",
     ) -> np.ndarray | pd.DataFrame | scipy.sparse.spmatrix:
         """Get data associated with the current task.