3
3
from typing import cast , Dict , List , Optional , Union
4
4
import warnings
5
5
6
- import dateutil .parser
7
6
import xmltodict
8
7
import pandas as pd
9
8
@@ -94,7 +93,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
94
93
description = result_dict ["oml:description" ]
95
94
status = result_dict ["oml:status" ]
96
95
creation_date = result_dict ["oml:creation_date" ]
97
- creation_date_as_date = dateutil .parser .parse (creation_date )
98
96
creator = result_dict ["oml:creator" ]
99
97
100
98
# tags is legacy. remove once no longer needed.
@@ -106,35 +104,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
106
104
current_tag ["window_start" ] = tag ["oml:window_start" ]
107
105
tags .append (current_tag )
108
106
109
- if "oml:data" in result_dict :
110
- datasets = [int (x ) for x in result_dict ["oml:data" ]["oml:data_id" ]]
111
- else :
112
- raise ValueError ("No datasets attached to study {}!" .format (id_ ))
113
- if "oml:tasks" in result_dict :
114
- tasks = [int (x ) for x in result_dict ["oml:tasks" ]["oml:task_id" ]]
115
- else :
116
- raise ValueError ("No tasks attached to study {}!" .format (id_ ))
107
+ def get_nested_ids_from_result_dict (key : str , subkey : str ) -> Optional [List ]:
108
+ if result_dict .get (key ) is not None :
109
+ return [int (oml_id ) for oml_id in result_dict [key ][subkey ]]
110
+ return None
117
111
118
- if main_entity_type in ["runs" , "run" ]:
112
+ datasets = get_nested_ids_from_result_dict ("oml:data" , "oml:data_id" )
113
+ tasks = get_nested_ids_from_result_dict ("oml:tasks" , "oml:task_id" )
119
114
120
- if "oml:flows" in result_dict :
121
- flows = [int (x ) for x in result_dict ["oml:flows" ]["oml:flow_id" ]]
122
- else :
123
- raise ValueError ("No flows attached to study {}!" .format (id_ ))
124
- if "oml:setups" in result_dict :
125
- setups = [int (x ) for x in result_dict ["oml:setups" ]["oml:setup_id" ]]
126
- else :
127
- raise ValueError ("No setups attached to study {}!" .format (id_ ))
128
- if "oml:runs" in result_dict :
129
- runs = [
130
- int (x ) for x in result_dict ["oml:runs" ]["oml:run_id" ]
131
- ] # type: Optional[List[int]]
132
- else :
133
- if creation_date_as_date < dateutil .parser .parse ("2019-01-01" ):
134
- # Legacy studies did not require runs
135
- runs = None
136
- else :
137
- raise ValueError ("No runs attached to study {}!" .format (id_ ))
115
+ if main_entity_type in ["runs" , "run" ]:
116
+ flows = get_nested_ids_from_result_dict ("oml:flows" , "oml:flow_id" )
117
+ setups = get_nested_ids_from_result_dict ("oml:setups" , "oml:setup_id" )
118
+ runs = get_nested_ids_from_result_dict ("oml:runs" , "oml:run_id" )
138
119
139
120
study = OpenMLStudy (
140
121
study_id = study_id ,
@@ -177,9 +158,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
177
158
def create_study (
178
159
name : str ,
179
160
description : str ,
180
- run_ids : List [int ],
181
- alias : Optional [str ],
182
- benchmark_suite : Optional [int ],
161
+ run_ids : Optional [ List [int ]] = None ,
162
+ alias : Optional [str ] = None ,
163
+ benchmark_suite : Optional [int ] = None ,
183
164
) -> OpenMLStudy :
184
165
"""
185
166
Creates an OpenML study (collection of data, tasks, flows, setups and run),
@@ -188,16 +169,19 @@ def create_study(
188
169
189
170
Parameters
190
171
----------
191
- alias : str (optional)
192
- a string ID, unique on server (url-friendly)
193
172
benchmark_suite : int (optional)
194
173
the benchmark suite (another study) upon which this study is ran.
195
174
name : str
196
175
the name of the study (meta-info)
197
176
description : str
198
177
brief description (meta-info)
199
- run_ids : list
200
- a list of run ids associated with this study
178
+ run_ids : list, optional
179
+ a list of run ids associated with this study,
180
+ these can also be added later with ``attach_to_study``.
181
+ alias : str (optional)
182
+ a string ID, unique on server (url-friendly)
183
+ benchmark_suite: int (optional)
184
+ the ID of the suite for which this study contains run results
201
185
202
186
Returns
203
187
-------
@@ -217,28 +201,29 @@ def create_study(
217
201
data = None ,
218
202
tasks = None ,
219
203
flows = None ,
220
- runs = run_ids ,
204
+ runs = run_ids if run_ids != [] else None ,
221
205
setups = None ,
222
206
)
223
207
224
208
225
209
def create_benchmark_suite (
226
- name : str , description : str , task_ids : List [int ], alias : Optional [str ],
210
+ name : str , description : str , task_ids : List [int ], alias : Optional [str ] = None ,
227
211
) -> OpenMLBenchmarkSuite :
228
212
"""
229
213
Creates an OpenML benchmark suite (collection of entity types, where
230
214
the tasks are the linked entity)
231
215
232
216
Parameters
233
217
----------
234
- alias : str (optional)
235
- a string ID, unique on server (url-friendly)
236
218
name : str
237
219
the name of the study (meta-info)
238
220
description : str
239
221
brief description (meta-info)
240
222
task_ids : list
241
223
a list of task ids associated with this study
224
+ more can be added later with ``attach_to_suite``.
225
+ alias : str (optional)
226
+ a string ID, unique on server (url-friendly)
242
227
243
228
Returns
244
229
-------
0 commit comments