Skip to content

Commit b4c868a

Browse files
authored
Fix/1110 (openml#1117)
Update function signatures for create_study|suite and allow for empty studies (i.e. with no runs).
1 parent a6c0576 commit b4c868a

File tree

3 files changed

+53
-42
lines changed

3 files changed

+53
-42
lines changed

doc/progress.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Changelog
88

99
0.13.0
1010
~~~~~~
11-
11+
* FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
1212
* MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
1313

1414

openml/study/functions.py

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from typing import cast, Dict, List, Optional, Union
44
import warnings
55

6-
import dateutil.parser
76
import xmltodict
87
import pandas as pd
98

@@ -94,7 +93,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
9493
description = result_dict["oml:description"]
9594
status = result_dict["oml:status"]
9695
creation_date = result_dict["oml:creation_date"]
97-
creation_date_as_date = dateutil.parser.parse(creation_date)
9896
creator = result_dict["oml:creator"]
9997

10098
# tags is legacy. remove once no longer needed.
@@ -106,35 +104,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
106104
current_tag["window_start"] = tag["oml:window_start"]
107105
tags.append(current_tag)
108106

109-
if "oml:data" in result_dict:
110-
datasets = [int(x) for x in result_dict["oml:data"]["oml:data_id"]]
111-
else:
112-
raise ValueError("No datasets attached to study {}!".format(id_))
113-
if "oml:tasks" in result_dict:
114-
tasks = [int(x) for x in result_dict["oml:tasks"]["oml:task_id"]]
115-
else:
116-
raise ValueError("No tasks attached to study {}!".format(id_))
107+
def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
108+
if result_dict.get(key) is not None:
109+
return [int(oml_id) for oml_id in result_dict[key][subkey]]
110+
return None
117111

118-
if main_entity_type in ["runs", "run"]:
112+
datasets = get_nested_ids_from_result_dict("oml:data", "oml:data_id")
113+
tasks = get_nested_ids_from_result_dict("oml:tasks", "oml:task_id")
119114

120-
if "oml:flows" in result_dict:
121-
flows = [int(x) for x in result_dict["oml:flows"]["oml:flow_id"]]
122-
else:
123-
raise ValueError("No flows attached to study {}!".format(id_))
124-
if "oml:setups" in result_dict:
125-
setups = [int(x) for x in result_dict["oml:setups"]["oml:setup_id"]]
126-
else:
127-
raise ValueError("No setups attached to study {}!".format(id_))
128-
if "oml:runs" in result_dict:
129-
runs = [
130-
int(x) for x in result_dict["oml:runs"]["oml:run_id"]
131-
] # type: Optional[List[int]]
132-
else:
133-
if creation_date_as_date < dateutil.parser.parse("2019-01-01"):
134-
# Legacy studies did not require runs
135-
runs = None
136-
else:
137-
raise ValueError("No runs attached to study {}!".format(id_))
115+
if main_entity_type in ["runs", "run"]:
116+
flows = get_nested_ids_from_result_dict("oml:flows", "oml:flow_id")
117+
setups = get_nested_ids_from_result_dict("oml:setups", "oml:setup_id")
118+
runs = get_nested_ids_from_result_dict("oml:runs", "oml:run_id")
138119

139120
study = OpenMLStudy(
140121
study_id=study_id,
@@ -177,9 +158,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
177158
def create_study(
178159
name: str,
179160
description: str,
180-
run_ids: List[int],
181-
alias: Optional[str],
182-
benchmark_suite: Optional[int],
161+
run_ids: Optional[List[int]] = None,
162+
alias: Optional[str] = None,
163+
benchmark_suite: Optional[int] = None,
183164
) -> OpenMLStudy:
184165
"""
185166
Creates an OpenML study (collection of data, tasks, flows, setups and run),
@@ -188,16 +169,19 @@ def create_study(
188169
189170
Parameters
190171
----------
191-
alias : str (optional)
192-
a string ID, unique on server (url-friendly)
193172
benchmark_suite : int (optional)
194173
the benchmark suite (another study) upon which this study is ran.
195174
name : str
196175
the name of the study (meta-info)
197176
description : str
198177
brief description (meta-info)
199-
run_ids : list
200-
a list of run ids associated with this study
178+
run_ids : list, optional
179+
a list of run ids associated with this study,
180+
these can also be added later with ``attach_to_study``.
181+
alias : str (optional)
182+
a string ID, unique on server (url-friendly)
183+
benchmark_suite: int (optional)
184+
the ID of the suite for which this study contains run results
201185
202186
Returns
203187
-------
@@ -217,28 +201,29 @@ def create_study(
217201
data=None,
218202
tasks=None,
219203
flows=None,
220-
runs=run_ids,
204+
runs=run_ids if run_ids != [] else None,
221205
setups=None,
222206
)
223207

224208

225209
def create_benchmark_suite(
226-
name: str, description: str, task_ids: List[int], alias: Optional[str],
210+
name: str, description: str, task_ids: List[int], alias: Optional[str] = None,
227211
) -> OpenMLBenchmarkSuite:
228212
"""
229213
Creates an OpenML benchmark suite (collection of entity types, where
230214
the tasks are the linked entity)
231215
232216
Parameters
233217
----------
234-
alias : str (optional)
235-
a string ID, unique on server (url-friendly)
236218
name : str
237219
the name of the study (meta-info)
238220
description : str
239221
brief description (meta-info)
240222
task_ids : list
241223
a list of task ids associated with this study
224+
more can be added later with ``attach_to_suite``.
225+
alias : str (optional)
226+
a string ID, unique on server (url-friendly)
242227
243228
Returns
244229
-------

tests/test_study/test_study_functions.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# License: BSD 3-Clause
2+
from typing import Optional, List
23

34
import openml
45
import openml.study
@@ -114,6 +115,31 @@ def test_publish_benchmark_suite(self):
114115
self.assertEqual(study_downloaded.status, "deactivated")
115116
# can't delete study, now it's not longer in preparation
116117

118+
def _test_publish_empty_study_is_allowed(self, explicit: bool):
119+
runs: Optional[List[int]] = [] if explicit else None
120+
kind = "explicit" if explicit else "implicit"
121+
122+
study = openml.study.create_study(
123+
name=f"empty-study-{kind}",
124+
description=f"a study with no runs attached {kind}ly",
125+
run_ids=runs,
126+
)
127+
128+
study.publish()
129+
TestBase._mark_entity_for_removal("study", study.id)
130+
TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
131+
132+
self.assertGreater(study.id, 0)
133+
study_downloaded = openml.study.get_study(study.id)
134+
self.assertEqual(study_downloaded.main_entity_type, "run")
135+
self.assertIsNone(study_downloaded.runs)
136+
137+
def test_publish_empty_study_explicit(self):
138+
self._test_publish_empty_study_is_allowed(explicit=True)
139+
140+
def test_publish_empty_study_implicit(self):
141+
self._test_publish_empty_study_is_allowed(explicit=False)
142+
117143
@pytest.mark.flaky()
118144
def test_publish_study(self):
119145
# get some random runs to attach
@@ -214,7 +240,7 @@ def test_study_attach_illegal(self):
214240

215241
def test_study_list(self):
216242
study_list = openml.study.list_studies(status="in_preparation")
217-
# might fail if server is recently resetted
243+
# might fail if server is recently reset
218244
self.assertGreaterEqual(len(study_list), 2)
219245

220246
def test_study_list_output_format(self):

0 commit comments

Comments
 (0)