|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
| 3 | +import builtins |
| 4 | +import json |
| 5 | +from typing import Any |
| 6 | + |
| 7 | +import xmltodict |
| 8 | + |
| 9 | +from openml.evaluations import OpenMLEvaluation |
| 10 | + |
3 | 11 | from .base import EvaluationAPI, ResourceV1API, ResourceV2API |
4 | 12 |
|
5 | 13 |
|
6 | 14 | class EvaluationV1API(ResourceV1API, EvaluationAPI): |
7 | | - """Version 1 API implementation for evaluation resources.""" |
| 15 | + """V1 API implementation for evaluations. |
| 16 | + Fetches evaluations from the v1 XML API endpoint. |
| 17 | + """ |
| 18 | + |
| 19 | + def list( # noqa: PLR0913 |
| 20 | + self, |
| 21 | + limit: int, |
| 22 | + offset: int, |
| 23 | + *, |
| 24 | + function: str, |
| 25 | + tasks: builtins.list | None = None, |
| 26 | + setups: builtins.list | None = None, |
| 27 | + flows: builtins.list | None = None, |
| 28 | + runs: builtins.list | None = None, |
| 29 | + uploaders: builtins.list | None = None, |
| 30 | + study: int | None = None, |
| 31 | + sort_order: str | None = None, |
| 32 | + **kwargs: Any, |
| 33 | + ) -> builtins.list[OpenMLEvaluation]: |
| 34 | + """Retrieve evaluations from the OpenML v1 XML API. |
| 35 | +
|
| 36 | + This method builds an evaluation query URL based on the provided |
| 37 | + filters, sends a request to the OpenML v1 endpoint, parses the XML |
| 38 | + response into a dictionary, and enriches the result with uploader |
| 39 | + usernames. |
| 40 | +
|
| 41 | + Parameters |
| 42 | + ---------- |
| 43 | + The arguments that are lists are separated from the single value |
| 44 | + ones which are put into the kwargs. |
| 45 | +
|
| 46 | + limit : int |
| 47 | + the number of evaluations to return |
| 48 | + offset : int |
| 49 | + the number of evaluations to skip, starting from the first |
| 50 | + function : str |
| 51 | + the evaluation function. e.g., predictive_accuracy |
| 52 | +
|
| 53 | + tasks : list[int,str], optional |
| 54 | + the list of task IDs |
| 55 | + setups: list[int,str], optional |
| 56 | + the list of setup IDs |
| 57 | + flows : list[int,str], optional |
| 58 | + the list of flow IDs |
| 59 | + runs :list[int,str], optional |
| 60 | + the list of run IDs |
| 61 | + uploaders : list[int,str], optional |
| 62 | + the list of uploader IDs |
| 63 | +
|
| 64 | + study : int, optional |
| 65 | +
|
| 66 | + kwargs: dict, optional |
| 67 | + Legal filter operators: tag, per_fold |
| 68 | +
|
| 69 | + sort_order : str, optional |
| 70 | + order of sorting evaluations, ascending ("asc") or descending ("desc") |
| 71 | +
|
| 72 | + Returns |
| 73 | + ------- |
| 74 | + list of OpenMLEvaluation objects |
| 75 | +
|
| 76 | + Notes |
| 77 | + ----- |
| 78 | + This method performs two API calls: |
| 79 | + 1. Fetches evaluation data from the specified endpoint |
| 80 | + 2. Fetches user information for all uploaders in the evaluation data |
| 81 | +
|
| 82 | + The user information is used to map uploader IDs to usernames. |
| 83 | + """ |
| 84 | + api_call = self._build_url( |
| 85 | + limit, |
| 86 | + offset, |
| 87 | + function=function, |
| 88 | + tasks=tasks, |
| 89 | + setups=setups, |
| 90 | + flows=flows, |
| 91 | + runs=runs, |
| 92 | + uploaders=uploaders, |
| 93 | + study=study, |
| 94 | + sort_order=sort_order, |
| 95 | + **kwargs, |
| 96 | + ) |
| 97 | + |
| 98 | + eval_response = self._http.get(api_call) |
| 99 | + xml_content = eval_response.text |
| 100 | + |
| 101 | + return self._parse_list_xml(xml_content) |
| 102 | + |
| 103 | + def _build_url( # noqa: PLR0913, C901 |
| 104 | + self, |
| 105 | + limit: int, |
| 106 | + offset: int, |
| 107 | + *, |
| 108 | + function: str, |
| 109 | + tasks: builtins.list | None = None, |
| 110 | + setups: builtins.list | None = None, |
| 111 | + flows: builtins.list | None = None, |
| 112 | + runs: builtins.list | None = None, |
| 113 | + uploaders: builtins.list | None = None, |
| 114 | + study: int | None = None, |
| 115 | + sort_order: str | None = None, |
| 116 | + **kwargs: Any, |
| 117 | + ) -> str: |
| 118 | + """ |
| 119 | + Construct an OpenML evaluation API URL with filtering parameters. |
| 120 | +
|
| 121 | + Parameters |
| 122 | + ---------- |
| 123 | + The arguments that are lists are separated from the single value |
| 124 | + ones which are put into the kwargs. |
| 125 | +
|
| 126 | + limit : int |
| 127 | + the number of evaluations to return |
| 128 | + offset : int |
| 129 | + the number of evaluations to skip, starting from the first |
| 130 | + function : str |
| 131 | + the evaluation function. e.g., predictive_accuracy |
| 132 | +
|
| 133 | + tasks : list[int,str], optional |
| 134 | + the list of task IDs |
| 135 | + setups: list[int,str], optional |
| 136 | + the list of setup IDs |
| 137 | + flows : list[int,str], optional |
| 138 | + the list of flow IDs |
| 139 | + runs :list[int,str], optional |
| 140 | + the list of run IDs |
| 141 | + uploaders : list[int,str], optional |
| 142 | + the list of uploader IDs |
| 143 | +
|
| 144 | + study : int, optional |
| 145 | +
|
| 146 | + kwargs: dict, optional |
| 147 | + Legal filter operators: tag, per_fold |
| 148 | +
|
| 149 | + sort_order : str, optional |
| 150 | + order of sorting evaluations, ascending ("asc") or descending ("desc") |
| 151 | +
|
| 152 | + Returns |
| 153 | + ------- |
| 154 | + str |
| 155 | + A relative API path suitable for an OpenML HTTP request. |
| 156 | + """ |
| 157 | + api_call = f"evaluation/list/function/{function}" |
| 158 | + if limit is not None: |
| 159 | + api_call += f"/limit/{limit}" |
| 160 | + if offset is not None: |
| 161 | + api_call += f"/offset/{offset}" |
| 162 | + if kwargs is not None: |
| 163 | + for operator, value in kwargs.items(): |
| 164 | + if value is not None: |
| 165 | + api_call += f"/{operator}/{value}" |
| 166 | + if tasks is not None: |
| 167 | + api_call += f"/task/{','.join([str(int(i)) for i in tasks])}" |
| 168 | + if setups is not None: |
| 169 | + api_call += f"/setup/{','.join([str(int(i)) for i in setups])}" |
| 170 | + if flows is not None: |
| 171 | + api_call += f"/flow/{','.join([str(int(i)) for i in flows])}" |
| 172 | + if runs is not None: |
| 173 | + api_call += f"/run/{','.join([str(int(i)) for i in runs])}" |
| 174 | + if uploaders is not None: |
| 175 | + api_call += f"/uploader/{','.join([str(int(i)) for i in uploaders])}" |
| 176 | + if study is not None: |
| 177 | + api_call += f"/study/{study}" |
| 178 | + if sort_order is not None: |
| 179 | + api_call += f"/sort_order/{sort_order}" |
| 180 | + |
| 181 | + return api_call |
| 182 | + |
| 183 | + def _parse_list_xml(self, xml_content: str) -> builtins.list[OpenMLEvaluation]: |
| 184 | + """Helper function to parse API calls which are lists of runs""" |
| 185 | + evals_dict: dict[str, Any] = xmltodict.parse(xml_content, force_list=("oml:evaluation",)) |
| 186 | + # Minimalistic check if the XML is useful |
| 187 | + if "oml:evaluations" not in evals_dict: |
| 188 | + raise ValueError( |
| 189 | + f'Error in return XML, does not contain "oml:evaluations": {evals_dict!s}', |
| 190 | + ) |
| 191 | + |
| 192 | + assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), ( |
| 193 | + "Expected 'oml:evaluation' to be a list, but got " |
| 194 | + f"{type(evals_dict['oml:evaluations']['oml:evaluation']).__name__}. " |
| 195 | + ) |
| 196 | + |
| 197 | + uploader_ids = list( |
| 198 | + {eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]}, |
| 199 | + ) |
| 200 | + user_dict = self._get_users(uploader_ids) |
| 201 | + |
| 202 | + evals = [] |
| 203 | + for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]: |
| 204 | + run_id = int(eval_["oml:run_id"]) |
| 205 | + value = float(eval_["oml:value"]) if "oml:value" in eval_ else None |
| 206 | + values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None |
| 207 | + array_data = eval_.get("oml:array_data") |
| 208 | + |
| 209 | + evals.append( |
| 210 | + OpenMLEvaluation( |
| 211 | + run_id=run_id, |
| 212 | + task_id=int(eval_["oml:task_id"]), |
| 213 | + setup_id=int(eval_["oml:setup_id"]), |
| 214 | + flow_id=int(eval_["oml:flow_id"]), |
| 215 | + flow_name=eval_["oml:flow_name"], |
| 216 | + data_id=int(eval_["oml:data_id"]), |
| 217 | + data_name=eval_["oml:data_name"], |
| 218 | + function=eval_["oml:function"], |
| 219 | + upload_time=eval_["oml:upload_time"], |
| 220 | + uploader=int(eval_["oml:uploader"]), |
| 221 | + uploader_name=user_dict[eval_["oml:uploader"]], |
| 222 | + value=value, |
| 223 | + values=values, |
| 224 | + array_data=array_data, |
| 225 | + ) |
| 226 | + ) |
| 227 | + |
| 228 | + return evals |
| 229 | + |
| 230 | + def _get_users(self, uploader_ids: builtins.list[str]) -> dict: |
| 231 | + """ |
| 232 | + Retrieve usernames for a list of OpenML user IDs. |
| 233 | +
|
| 234 | + Parameters |
| 235 | + ---------- |
| 236 | + uploader_ids : list[str] |
| 237 | + List of OpenML user IDs. |
| 238 | +
|
| 239 | + Returns |
| 240 | + ------- |
| 241 | + dict |
| 242 | + A mapping from user ID (str) to username (str). |
| 243 | + """ |
| 244 | + api_users = "user/list/user_id/" + ",".join(uploader_ids) |
| 245 | + user_response = self._http.get(api_users) |
| 246 | + xml_content_user = user_response.text |
| 247 | + |
| 248 | + users = xmltodict.parse(xml_content_user, force_list=("oml:user",)) |
| 249 | + return {user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]} |
8 | 250 |
|
9 | 251 |
|
10 | 252 | class EvaluationV2API(ResourceV2API, EvaluationAPI): |
11 | | - """Version 2 API implementation for evaluation resources.""" |
| 253 | + """V2 API implementation for evaluations. |
| 254 | + Fetches evaluations from the v2 json API endpoint. |
| 255 | + """ |
| 256 | + |
| 257 | + def list( # noqa: PLR0913 |
| 258 | + self, |
| 259 | + limit: int, # noqa: ARG002 |
| 260 | + offset: int, # noqa: ARG002 |
| 261 | + *, |
| 262 | + function: str, # noqa: ARG002 |
| 263 | + tasks: builtins.list | None = None, # noqa: ARG002 |
| 264 | + setups: builtins.list | None = None, # noqa: ARG002 |
| 265 | + flows: builtins.list | None = None, # noqa: ARG002 |
| 266 | + runs: builtins.list | None = None, # noqa: ARG002 |
| 267 | + uploaders: builtins.list | None = None, # noqa: ARG002 |
| 268 | + study: int | None = None, # noqa: ARG002 |
| 269 | + sort_order: str | None = None, # noqa: ARG002 |
| 270 | + **kwargs: Any, # noqa: ARG002 |
| 271 | + ) -> builtins.list[OpenMLEvaluation]: |
| 272 | + """ |
| 273 | + Retrieve evaluation results from the OpenML v2 JSON API. |
| 274 | +
|
| 275 | + Notes |
| 276 | + ----- |
| 277 | + This method is not yet implemented. |
| 278 | + """ |
| 279 | + self._not_supported(method="list") |
0 commit comments