diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py new file mode 100644 index 000000000..881f40671 --- /dev/null +++ b/openml/_api/__init__.py @@ -0,0 +1,8 @@ +from openml._api.runtime.core import APIContext + + +def set_api_version(version: str, *, strict: bool = False) -> None: + api_context.set_version(version=version, strict=strict) + + +api_context = APIContext() diff --git a/openml/_api/config.py b/openml/_api/config.py new file mode 100644 index 000000000..bd93c3cad --- /dev/null +++ b/openml/_api/config.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +API_V1_SERVER = "https://www.openml.org/api/v1/xml" +API_V2_SERVER = "http://127.0.0.1:8001" +API_KEY = "..." diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py new file mode 100644 index 000000000..8e6d1e4ce --- /dev/null +++ b/openml/_api/http/__init__.py @@ -0,0 +1,3 @@ +from openml._api.http.client import HTTPClient + +__all__ = ["HTTPClient"] diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py new file mode 100644 index 000000000..dea5de809 --- /dev/null +++ b/openml/_api/http/client.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any, Mapping + +import requests +from requests import Response + +from openml.__version__ import __version__ + + +class HTTPClient: + def __init__(self, base_url: str) -> None: + self.base_url = base_url + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + + def get( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: + url = f"{self.base_url}/{path}" + return requests.get(url, params=params, headers=self.headers, timeout=10) + + def post( + self, + path: str, + data: Mapping[str, Any] | None = None, + files: Any = None, + ) -> Response: + url = f"{self.base_url}/{path}" + return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) + + def delete( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: + url = f"{self.base_url}/{path}" + return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py new file mode 100644 index 000000000..f2113ccae --- /dev/null +++ b/openml/_api/resources/__init__.py @@ -0,0 +1,15 @@ +from openml._api.resources.datasets import DatasetsV1, DatasetsV2 +from openml._api.resources.estimation_procedures import ( + EstimationProceduresV1, + EstimationProceduresV2, +) +from openml._api.resources.tasks import TasksV1, TasksV2 + +__all__ = [ + "DatasetsV1", + "DatasetsV2", + "TasksV1", + "TasksV2", + "EstimationProceduresV1", + "EstimationProceduresV2", +] diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py new file mode 100644 index 000000000..a82bb28da --- /dev/null +++ b/openml/_api/resources/base.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from requests import Response + + from openml._api.http import HTTPClient + from openml.datasets.dataset import OpenMLDataset + from openml.tasks.task import OpenMLTask + + +class ResourceAPI: + def __init__(self, http: HTTPClient): + self._http = http + + +class DatasetsAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... + + +class TasksAPI(ResourceAPI, ABC): + @abstractmethod + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... + + +class EstimationProceduresAPI(ResourceAPI, ABC): + @abstractmethod + def list(self) -> list[str]: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py new file mode 100644 index 000000000..9ff1ec278 --- /dev/null +++ b/openml/_api/resources/datasets.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from openml._api.resources.base import DatasetsAPI + +if TYPE_CHECKING: + from responses import Response + + from openml.datasets.dataset import OpenMLDataset + + +class DatasetsV1(DatasetsAPI): + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError + + +class DatasetsV2(DatasetsAPI): + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError diff --git a/openml/_api/resources/estimation_procedures.py b/openml/_api/resources/estimation_procedures.py new file mode 100644 index 000000000..87a584473 --- /dev/null +++ b/openml/_api/resources/estimation_procedures.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import xmltodict + +from openml._api.resources.base import EstimationProceduresAPI + + +class EstimationProceduresV1(EstimationProceduresAPI): + """V1 API implementation for estimation procedures. + + Fetches estimation procedures from the v1 XML API endpoint. + """ + + def list(self) -> list[str]: + """List the names of all estimation procedures available on OpenML. + + Returns + ------- + list[str] + """ + path = "estimationprocedure/list" + response = self._http.get(path) + xml_content = response.text + + api_results = xmltodict.parse(xml_content) + + # Minimalistic check if the XML is useful + if "oml:estimationprocedures" not in api_results: + raise ValueError('Error in return XML, does not contain "oml:estimationprocedures"') + + if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]: + raise ValueError('Error in return XML, does not contain "oml:estimationprocedure"') + + if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list): + raise TypeError( + 'Error in return XML, does not contain "oml:estimationprocedure" as a list' + ) + + return [ + prod["oml:name"] + for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"] + ] + + +class EstimationProceduresV2(EstimationProceduresAPI): + """V2 API implementation for estimation procedures. + + Fetches estimation procedures from the v2 JSON API endpoint. + """ + + def list(self) -> list[str]: + """List the names of all estimation procedures available on OpenML. + + Returns + ------- + list[str] + """ + path = "estimationprocedure/list" + response = self._http.get(path) + list_of_prod_dicts = response.json() + + if not isinstance(list_of_prod_dicts, list): + raise TypeError(f"Expected list response, got {type(list_of_prod_dicts)}") + + return [prod["name"] for prod in list_of_prod_dicts] diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py new file mode 100644 index 000000000..f494fb9a3 --- /dev/null +++ b/openml/_api/resources/tasks.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import xmltodict + +from openml._api.resources.base import TasksAPI +from openml.tasks.task import ( + OpenMLClassificationTask, + OpenMLClusteringTask, + OpenMLLearningCurveTask, + OpenMLRegressionTask, + OpenMLTask, + TaskType, +) + +if TYPE_CHECKING: + from requests import Response + + +class TasksV1(TasksAPI): + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + path = f"task/{task_id}" + response = self._http.get(path) + xml_content = response.text + task = self._create_task_from_xml(xml_content) + + if return_response: + return task, response + + return task + + def _create_task_from_xml(self, xml: str) -> OpenMLTask: + """Create a task given a xml string. + + Parameters + ---------- + xml : string + Task xml representation. + + Returns + ------- + OpenMLTask + """ + dic = xmltodict.parse(xml)["oml:task"] + estimation_parameters = {} + inputs = {} + # Due to the unordered structure we obtain, we first have to extract + # the possible keys of oml:input; dic["oml:input"] is a list of + # OrderedDicts + + # Check if there is a list of inputs + if isinstance(dic["oml:input"], list): + for input_ in dic["oml:input"]: + name = input_["@name"] + inputs[name] = input_ + # Single input case + elif isinstance(dic["oml:input"], dict): + name = dic["oml:input"]["@name"] + inputs[name] = dic["oml:input"] + + evaluation_measures = None + if "evaluation_measures" in inputs: + evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ + "oml:evaluation_measure" + ] + + task_type = TaskType(int(dic["oml:task_type_id"])) + common_kwargs = { + "task_id": dic["oml:task_id"], + "task_type": dic["oml:task_type"], + "task_type_id": task_type, + "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], + "evaluation_measure": evaluation_measures, + } + # TODO: add OpenMLClusteringTask? + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + # Convert some more parameters + for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ + "oml:parameter" + ]: + name = parameter["@name"] + text = parameter.get("#text", "") + estimation_parameters[name] = text + + common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:type"] + common_kwargs["estimation_procedure_id"] = int( + inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] + ) + + common_kwargs["estimation_parameters"] = estimation_parameters + common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ + "oml:target_feature" + ] + common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:data_splits_url"] + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) + if cls is None: + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + return cls(**common_kwargs) # type: ignore + + +class TasksV2(TasksAPI): + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + raise NotImplementedError diff --git a/openml/_api/runtime/__init__.py b/openml/_api/runtime/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py new file mode 100644 index 000000000..4714bc80c --- /dev/null +++ b/openml/_api/runtime/core.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from openml._api.config import ( + API_V1_SERVER, + API_V2_SERVER, +) +from openml._api.http.client import HTTPClient +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + EstimationProceduresV1, + EstimationProceduresV2, + TasksV1, + TasksV2, +) + +if TYPE_CHECKING: + from openml._api.resources.base import DatasetsAPI, EstimationProceduresAPI, TasksAPI + + +class APIBackend: + def __init__( + self, + *, + datasets: DatasetsAPI, + tasks: TasksAPI, + estimation_procedures: EstimationProceduresAPI, + ): + self.datasets = datasets + self.tasks = tasks + self.estimation_procedures = estimation_procedures + + +def build_backend(version: str, *, strict: bool) -> APIBackend: + v1_http = HTTPClient(API_V1_SERVER) + v2_http = HTTPClient(API_V2_SERVER) + + v1 = APIBackend( + datasets=DatasetsV1(v1_http), + tasks=TasksV1(v1_http), + estimation_procedures=EstimationProceduresV1(v1_http), + ) + + if version == "v1": + return v1 + + v2 = APIBackend( + datasets=DatasetsV2(v2_http), + tasks=TasksV2(v2_http), + estimation_procedures=EstimationProceduresV2(v2_http), + ) + + if strict: + return v2 + + return v1 + + +class APIContext: + def __init__(self) -> None: + self._backend = build_backend("v1", strict=False) + + def set_version(self, version: str, *, strict: bool = False) -> None: + self._backend = build_backend(version=version, strict=strict) + + @property + def backend(self) -> APIBackend: + return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py new file mode 100644 index 000000000..1bc99d270 --- /dev/null +++ b/openml/_api/runtime/fallback.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + + +class FallbackProxy: + def __init__(self, primary: ResourceAPI, fallback: ResourceAPI): + self._primary = primary + self._fallback = fallback diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index 7747294d7..6e8e98230 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -15,6 +15,7 @@ import openml import openml._api_calls import openml.utils +from openml._api import api_context from openml.evaluations import OpenMLEvaluation @@ -307,24 +308,7 @@ def list_estimation_procedures() -> list[str]: ------- list """ - api_call = "estimationprocedure/list" - xml_string = openml._api_calls._perform_api_call(api_call, "get") - api_results = xmltodict.parse(xml_string) - - # Minimalistic check if the XML is useful - if "oml:estimationprocedures" not in api_results: - raise ValueError('Error in return XML, does not contain "oml:estimationprocedures"') - - if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]: - raise ValueError('Error in return XML, does not contain "oml:estimationprocedure"') - - if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list): - raise TypeError('Error in return XML, does not contain "oml:estimationprocedure" as a list') - - return [ - prod["oml:name"] - for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"] - ] + return api_context.backend.estimation_procedures.list() def list_evaluations_setups( diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index e9b879ae4..a794ad56d 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,6 +12,7 @@ import openml._api_calls import openml.utils +from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -444,11 +445,16 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") + result = api_context.backend.tasks.get(task_id, return_response=True) - with xml_file.open("w", encoding="utf8") as fh: - fh.write(task_xml) - return _create_task_from_xml(task_xml) + if isinstance(result, tuple): + task, response = result + with xml_file.open("w", encoding="utf8") as fh: + fh.write(response.text) + else: + task = result + + return task def _create_task_from_xml(xml: str) -> OpenMLTask: