From bf6e0001e00bff1a0933dd53588bbc45b312cc76 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 30 Dec 2025 02:07:17 +0530 Subject: [PATCH 1/8] changes made --- openml/_api_calls.py | 4 +- openml/config.py | 252 +++++++++++++++++++++---------------------- 2 files changed, 127 insertions(+), 129 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 81296b3da..12567ac7a 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -71,7 +71,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = config.server + url = config._config.server if not url.endswith("/"): url += "/" url += endpoint @@ -301,7 +301,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url = config.server.split("/api/") + openml_url = config._config.server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename diff --git a/openml/config.py b/openml/config.py index cf66a6346..98a48a1c6 100644 --- a/openml/config.py +++ b/openml/config.py @@ -1,6 +1,7 @@ """Store module level information like the API key, cache directory and the server""" # License: BSD 3-Clause +# ruff: noqa: PLW0603 from __future__ import annotations import configparser @@ -11,10 +12,11 @@ import shutil import warnings from contextlib import contextmanager +from dataclasses import dataclass, replace from io import StringIO from pathlib import Path -from typing import Any, Iterator, cast -from typing_extensions import Literal, TypedDict +from typing import Any, Iterator +from typing_extensions import Literal from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -27,19 +29,62 @@ _TEST_SERVER_NORMAL_USER_KEY = "normaluser" -class _Config(TypedDict): - apikey: str - server: str - cachedir: Path - avoid_duplicate_runs: bool - retry_policy: Literal["human", "robot"] - connection_n_retries: int - show_progress: bool +# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) +_user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() != "linux": + return _user_path / ".openml" + + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunately had a problem where we used XDG_CACHE_HOME/org, + # we check heuristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not heuristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) + + +@dataclass(frozen=True) +class OpenMLConfig: + apikey: str = "" + server: str = "https://www.openml.org/api/v1/xml" + cachedir: Path = _resolve_default_cache_dir() # noqa: RUF009 + avoid_duplicate_runs: bool = False + retry_policy: Literal["human", "robot"] = "human" + connection_n_retries: int = 5 + show_progress: bool = False def _create_log_handlers(create_file_handler: bool = True) -> None: # noqa: FBT001, FBT002 """Creates but does not attach the log handlers.""" - global console_handler, file_handler # noqa: PLW0603 + global console_handler, file_handler, _root_cache_directory # noqa: PLW0602 if console_handler is not None or file_handler is not None: logger.debug("Requested to create log handlers, but they are already created.") return @@ -105,61 +150,22 @@ def set_file_log_level(file_output_level: int) -> None: _set_level_register_and_store(file_handler, file_output_level) -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() - - -def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) - if user_defined_cache_dir is not None: - return Path(user_defined_cache_dir) - - if platform.system().lower() != "linux": - return _user_path / ".openml" - - xdg_cache_home = os.environ.get("XDG_CACHE_HOME") - if xdg_cache_home is None: - return Path("~", ".cache", "openml") +_config: OpenMLConfig = OpenMLConfig() +_root_cache_directory: Path = _config.cachedir - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - # The new cache directory exists - cache_dir = Path(xdg_cache_home) / "openml" - if cache_dir.exists(): - return cache_dir +def __getattr__(name: str) -> Any: + if hasattr(_config, name): + return getattr(_config, name) + raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - # The old cache directory *does not* exist - heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" - if not heuristic_dir_for_backwards_compat.exists(): - return cache_dir - root_dir_to_delete = Path(xdg_cache_home) / "org" - openml_logger.warning( - "An old cache directory was found at '%s'. This directory is no longer used by " - "OpenML-Python. To silence this warning you would need to delete the old cache " - "directory. The cached files will then be located in '%s'.", - root_dir_to_delete, - cache_dir, - ) - return Path(xdg_cache_home) - - -_defaults: _Config = { - "apikey": "", - "server": "https://www.openml.org/api/v1/xml", - "cachedir": _resolve_default_cache_dir(), - "avoid_duplicate_runs": False, - "retry_policy": "human", - "connection_n_retries": 5, - "show_progress": False, -} - -# Default values are actually added here in the _setup() function which is -# called at the end of this module -server = _defaults["server"] +def __setattr__(name: str, value: Any) -> None: # noqa: N807 + global _config + if hasattr(_config, name): + _config = replace(_config, **{name: value}) + else: + raise AttributeError(f"module 'openml.config' has no attribute '{name}'") def get_server_base_url() -> str: @@ -172,23 +178,12 @@ def get_server_base_url() -> str: ------- str """ - domain, path = server.split("/api", maxsplit=1) + domain, _ = _config.server.split("/api", maxsplit=1) return domain.replace("api", "www") -apikey: str = _defaults["apikey"] -show_progress: bool = _defaults["show_progress"] -# The current cache directory (without the server name) -_root_cache_directory: Path = Path(_defaults["cachedir"]) -avoid_duplicate_runs = _defaults["avoid_duplicate_runs"] - -retry_policy: Literal["human", "robot"] = _defaults["retry_policy"] -connection_n_retries: int = _defaults["connection_n_retries"] - - def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None: - global retry_policy # noqa: PLW0603 - global connection_n_retries # noqa: PLW0603 + global _config default_retries_by_policy = {"human": 5, "robot": 50} if value not in default_retries_by_policy: @@ -202,8 +197,11 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N if isinstance(n_retries, int) and n_retries < 1: raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - retry_policy = value - connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries + _config = replace( + _config, + retry_policy=value, + connection_n_retries=(default_retries_by_policy[value] if n_retries is None else n_retries), + ) class ConfigurationForExamples: @@ -222,24 +220,30 @@ def start_using_configuration_for_example(cls) -> None: To configuration as was before this call is stored, and can be recovered by using the `stop_use_example_configuration` method. """ - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 + global _config - if cls._start_last_called and server == cls._test_server and apikey == cls._test_apikey: + if ( + cls._start_last_called + and _config.server == cls._test_server + and _config.apikey == cls._test_apikey + ): # Method is called more than once in a row without modifying the server or apikey. # We don't want to save the current test configuration as a last used configuration. return - cls._last_used_server = server - cls._last_used_key = apikey + cls._last_used_server = _config.server + cls._last_used_key = _config.apikey cls._start_last_called = True # Test server key for examples - server = cls._test_server - apikey = cls._test_apikey + _config = replace( + _config, + server=cls._test_server, + apikey=cls._test_apikey, + ) warnings.warn( - f"Switching to the test server {server} to not upload results to the live server. " - "Using the test server may result in reduced performance of the API!", + f"Switching to the test server {_config.server} to not upload results to " + "the live server. Using the test server may result in reduced performance of the API!", stacklevel=2, ) @@ -254,11 +258,9 @@ def stop_using_configuration_for_example(cls) -> None: "`start_use_example_configuration` must be called first.", ) - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 + global _config + _config = replace(_config, server=cls._test_server, apikey=cls._test_apikey) - server = cast(str, cls._last_used_server) - apikey = cast(str, cls._last_used_key) cls._start_last_called = False @@ -327,7 +329,7 @@ def determine_config_file_path() -> Path: return config_dir / "config" -def _setup(config: _Config | None = None) -> None: +def _setup(config: dict[str, Any] | None = None) -> None: """Setup openml package. Called on first import. Reads the config file and sets up apikey, server, cache appropriately. @@ -336,11 +338,8 @@ def _setup(config: _Config | None = None) -> None: openml.config.server = SOMESERVER We could also make it a property but that's less clear. """ - global apikey # noqa: PLW0603 - global server # noqa: PLW0603 - global _root_cache_directory # noqa: PLW0603 - global avoid_duplicate_runs # noqa: PLW0603 - global show_progress # noqa: PLW0603 + global _config + global _root_cache_directory config_file = determine_config_file_path() config_dir = config_file.parent @@ -358,19 +357,24 @@ def _setup(config: _Config | None = None) -> None: if config is None: config = _parse_config(config_file) - avoid_duplicate_runs = config["avoid_duplicate_runs"] - apikey = config["apikey"] - server = config["server"] - show_progress = config["show_progress"] - n_retries = int(config["connection_n_retries"]) + _config = replace( + _config, + apikey=config["apikey"], + server=config["server"], + show_progress=config["show_progress"], + avoid_duplicate_runs=config["avoid_duplicate_runs"], + retry_policy=config["retry_policy"], + connection_n_retries=int(config["connection_n_retries"]), + ) - set_retry_policy(config["retry_policy"], n_retries) + set_retry_policy(config["retry_policy"], _config.connection_n_retries) user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) if user_defined_cache_dir is not None: short_cache_dir = Path(user_defined_cache_dir) else: short_cache_dir = Path(config["cachedir"]) + _root_cache_directory = short_cache_dir.expanduser().resolve() try: @@ -389,29 +393,31 @@ def _setup(config: _Config | None = None) -> None: def set_field_in_config_file(field: str, value: Any) -> None: """Overwrites the `field` in the configuration file with the new `value`.""" - if field not in _defaults: - raise ValueError(f"Field '{field}' is not valid and must be one of '{_defaults.keys()}'.") + global _config + if not hasattr(_config, field): + raise ValueError( + f"Field '{field}' is not valid and must be one of '{_config.__dict__.keys()}'." + ) - # TODO(eddiebergman): This use of globals has gone too far - globals()[field] = value + _config = replace(_config, **{field: value}) config_file = determine_config_file_path() - config = _parse_config(config_file) + existing = _parse_config(config_file) with config_file.open("w") as fh: - for f in _defaults: + for f in _config.__dict__: # We can't blindly set all values based on globals() because when the user # sets it through config.FIELD it should not be stored to file. # There doesn't seem to be a way to avoid writing defaults to file with configparser, # because it is impossible to distinguish from an explicitly set value that matches # the default value, to one that was set to its default because it was omitted. - value = globals()[f] if f == field else config.get(f) # type: ignore - if value is not None: - fh.write(f"{f} = {value}\n") + v = value if f == field else existing.get(f) + if v is not None: + fh.write(f"{f} = {v}\n") -def _parse_config(config_file: str | Path) -> _Config: +def _parse_config(config_file: str | Path) -> dict[str, Any]: """Parse the config file, set up defaults.""" config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=_defaults) # type: ignore + config = configparser.RawConfigParser(defaults=_config.__dict__) # type: ignore # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. # Cheat the ConfigParser module by adding a fake section header @@ -434,16 +440,8 @@ def _parse_config(config_file: str | Path) -> _Config: return configuration # type: ignore -def get_config_as_dict() -> _Config: - return { - "apikey": apikey, - "server": server, - "cachedir": _root_cache_directory, - "avoid_duplicate_runs": avoid_duplicate_runs, - "connection_n_retries": connection_n_retries, - "retry_policy": retry_policy, - "show_progress": show_progress, - } +def get_config_as_dict() -> dict[str, Any]: + return _config.__dict__.copy() # NOTE: For backwards compatibility, we keep the `str` @@ -467,7 +465,7 @@ def get_cache_directory() -> str: The current cache directory. """ - url_suffix = urlparse(server).netloc + url_suffix = urlparse(_config.server).netloc reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118 return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118 @@ -491,7 +489,7 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: -------- get_cache_directory """ - global _root_cache_directory # noqa: PLW0603 + global _root_cache_directory _root_cache_directory = Path(root_cache_directory) @@ -502,7 +500,7 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: @contextmanager -def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: +def overwrite_config_context(config: dict[str, Any]) -> Iterator[dict[str, Any]]: """A context manager to temporarily override variables in the configuration.""" existing_config = get_config_as_dict() merged_config = {**existing_config, **config} @@ -515,10 +513,10 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: __all__ = [ "get_cache_directory", + "get_config_as_dict", "set_root_cache_directory", "start_using_configuration_for_example", "stop_using_configuration_for_example", - "get_config_as_dict", ] _setup() From 834782c105b5244095e20f17059c081b88634640 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 30 Dec 2025 12:31:52 +0530 Subject: [PATCH 2/8] bug fixing --- examples/Advanced/datasets_tutorial.py | 2 +- .../benchmark_with_optunahub.py | 4 +-- .../flow_id_tutorial.py | 2 +- openml/_api_calls.py | 12 ++++----- openml/cli.py | 2 +- openml/config.py | 16 +++++++----- openml/runs/functions.py | 2 +- openml/testing.py | 14 +++++----- tests/conftest.py | 16 ++++++------ tests/test_datasets/test_dataset_functions.py | 14 +++++----- tests/test_openml/test_config.py | 26 +++++++++---------- tests/test_utils/test_utils.py | 2 +- 12 files changed, 58 insertions(+), 54 deletions(-) diff --git a/examples/Advanced/datasets_tutorial.py b/examples/Advanced/datasets_tutorial.py index cc57686d0..3a4833206 100644 --- a/examples/Advanced/datasets_tutorial.py +++ b/examples/Advanced/datasets_tutorial.py @@ -139,7 +139,7 @@ # only for the dataset owner. Further, critical fields cannot be edited if the dataset has any # tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you, # configure the API key: -# openml.config.apikey = 'FILL_IN_OPENML_API_KEY' +# openml.config._config.apikey = 'FILL_IN_OPENML_API_KEY' # This example here only shows a failure when trying to work on a dataset not owned by you: # %% diff --git a/examples/_external_or_deprecated/benchmark_with_optunahub.py b/examples/_external_or_deprecated/benchmark_with_optunahub.py index ece3e7c40..c8f5f7b0c 100644 --- a/examples/_external_or_deprecated/benchmark_with_optunahub.py +++ b/examples/_external_or_deprecated/benchmark_with_optunahub.py @@ -44,7 +44,7 @@ # account (you don't need one for anything else, just to upload your results), # go to your profile and select the API-KEY. # Or log in, and navigate to https://www.openml.org/auth/api-key -openml.config.apikey = "" +openml.config._config.apikey = "" ############################################################################ # Prepare for preprocessors and an OpenML task # ============================================ @@ -95,7 +95,7 @@ def objective(trial: optuna.Trial) -> Pipeline: run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False) logger.log(1, f"Model has been trained - {run}") - if openml.config.apikey != "": + if openml.config._config.apikey != "": try: run.publish() diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index e813655fc..c533cfd9f 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config.server = "https://api.openml.org/api/v1/xml" +openml.config._configserver = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 12567ac7a..c3f6d285f 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -172,7 +172,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config.show_progress else None, + progress=ProgressBar() if config._config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -317,7 +317,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config.apikey + data["api_key"] = config._config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -337,8 +337,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config.apikey: - data["api_key"] = config.apikey + if config._config.apikey: + data["api_key"] = config._config.apikey return _send_request( request_method=request_method, url=url, @@ -363,10 +363,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config.connection_n_retries) + n_retries = max(1, config._config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if config._config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/cli.py b/openml/cli.py index d0a46e498..fb39afe97 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -339,7 +339,7 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] + configurable_fields = [f for f in config.get_config_as_dict() if f not in ["max_retries"]] parser_configure.add_argument( "field", diff --git a/openml/config.py b/openml/config.py index 98a48a1c6..20825463e 100644 --- a/openml/config.py +++ b/openml/config.py @@ -15,7 +15,7 @@ from dataclasses import dataclass, replace from io import StringIO from pathlib import Path -from typing import Any, Iterator +from typing import Any, Iterator, cast from typing_extensions import Literal from urllib.parse import urlparse @@ -71,7 +71,7 @@ def _resolve_default_cache_dir() -> Path: return Path(xdg_cache_home) -@dataclass(frozen=True) +@dataclass class OpenMLConfig: apikey: str = "" server: str = "https://www.openml.org/api/v1/xml" @@ -259,8 +259,11 @@ def stop_using_configuration_for_example(cls) -> None: ) global _config - _config = replace(_config, server=cls._test_server, apikey=cls._test_apikey) - + _config = replace( + _config, + server=cast(str, cls._last_used_server), + apikey=cast(str, cls._last_used_key), + ) cls._start_last_called = False @@ -334,8 +337,8 @@ def _setup(config: dict[str, Any] | None = None) -> None: Reads the config file and sets up apikey, server, cache appropriately. key and server can be set by the user simply using - openml.config.apikey = THEIRKEY - openml.config.server = SOMESERVER + openml.config._config.apikey = THEIRKEY + openml.config._config.server = SOMESERVER We could also make it a property but that's less clear. """ global _config @@ -376,6 +379,7 @@ def _setup(config: dict[str, Any] | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() + _config = replace(_config, cachedir=_root_cache_directory) try: cache_exists = _root_cache_directory.exists() diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 666b75c37..7fa560833 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -226,7 +226,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 raise ValueError("flow_tags should be a list") if avoid_duplicate_runs is None: - avoid_duplicate_runs = openml.config.avoid_duplicate_runs + avoid_duplicate_runs = openml.config._config.avoid_duplicate_runs # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018). # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019). diff --git a/openml/testing.py b/openml/testing.py index d1da16876..fbf7edf44 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -99,13 +99,13 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: os.chdir(self.workdir) self.cached = True - openml.config.apikey = TestBase.user_key + openml.config._config.apikey = TestBase.user_key self.production_server = "https://www.openml.org/api/v1/xml" openml.config.set_root_cache_directory(str(self.workdir)) # Increase the number of retries to avoid spurious server failures - self.retry_policy = openml.config.retry_policy - self.connection_n_retries = openml.config.connection_n_retries + self.retry_policy = openml.config._config.retry_policy + self.connection_n_retries = openml.config._config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) def use_production_server(self) -> None: @@ -114,8 +114,8 @@ def use_production_server(self) -> None: Please use this sparingly - it is better to use the test server. """ - openml.config.server = self.production_server - openml.config.apikey = "" + openml.config._config.server = self.production_server + openml.config._config.apikey = "" def tearDown(self) -> None: """Tear down the test""" @@ -127,8 +127,8 @@ def tearDown(self) -> None: # one of the files may still be used by another process raise e - openml.config.connection_n_retries = self.connection_n_retries - openml.config.retry_policy = self.retry_policy + openml.config._config.connection_n_retries = self.connection_n_retries + openml.config._config.retry_policy = self.retry_policy @classmethod def _mark_entity_for_removal( diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..ba7c65813 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,8 +97,8 @@ def delete_remote_files(tracker, flow_names) -> None: :param tracker: Dict :return: None """ - openml.config.server = TestBase.test_server - openml.config.apikey = TestBase.user_key + openml.config._config.server = TestBase.test_server + openml.config._config.apikey = TestBase.user_key # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -263,8 +263,8 @@ def verify_cache_state(test_files_directory) -> Iterator[None]: @pytest.fixture(autouse=True, scope="session") def as_robot() -> Iterator[None]: - policy = openml.config.retry_policy - n_retries = openml.config.connection_n_retries + policy = openml.config._config.retry_policy + n_retries = openml.config._config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) yield openml.config.set_retry_policy(policy, n_retries) @@ -273,12 +273,12 @@ def as_robot() -> Iterator[None]: @pytest.fixture(autouse=True) def with_server(request): if "production" in request.keywords: - openml.config.server = "https://www.openml.org/api/v1/xml" - openml.config.apikey = None + openml.config._config.server = "https://www.openml.org/api/v1/xml" + openml.config._config.apikey = None yield return - openml.config.server = "https://test.openml.org/api/v1/xml" - openml.config.apikey = TestBase.user_key + openml.config._config.server = "https://test.openml.org/api/v1/xml" + openml.config._config.apikey = TestBase.user_key yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 266a6f6f7..ab5a4d8b8 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -153,7 +153,7 @@ def test_check_datasets_active(self): openml.datasets.check_datasets_active, [79], ) - openml.config.server = self.test_server + openml.config._config.server = self.test_server def test_illegal_character_tag(self): dataset = openml.datasets.get_dataset(1) @@ -179,7 +179,7 @@ def test__name_to_id_with_deactivated(self): self.use_production_server() # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 - openml.config.server = self.test_server + openml.config._config.server = self.test_server @pytest.mark.production() def test__name_to_id_with_multiple_active(self): @@ -417,8 +417,8 @@ def test__getarff_md5_issue(self): "oml:md5_checksum": "abc", "oml:url": "https://www.openml.org/data/download/61", } - n = openml.config.connection_n_retries - openml.config.connection_n_retries = 1 + n = openml.config._config.connection_n_retries + openml.config._config.connection_n_retries = 1 self.assertRaisesRegex( OpenMLHashException, @@ -428,7 +428,7 @@ def test__getarff_md5_issue(self): description, ) - openml.config.connection_n_retries = n + openml.config._config.connection_n_retries = n def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) @@ -588,7 +588,7 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) - openml.config.apikey = TestBase.admin_key + openml.config._config.apikey = TestBase.admin_key openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1507,7 +1507,7 @@ def test_list_datasets_with_high_size_parameter(self): datasets_b = openml.datasets.list_datasets(size=np.inf) # Reverting to test server - openml.config.server = self.test_server + openml.config._config.server = self.test_server assert len(datasets_a) == len(datasets_b) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 7ef223504..3ff4bcb00 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -46,7 +46,7 @@ class TestConfig(openml.testing.TestBase): def test_non_writable_home(self, log_handler_mock, warnings_mock): with tempfile.TemporaryDirectory(dir=self.workdir) as td: os.chmod(td, 0o444) - _dd = copy(openml.config._defaults) + _dd = copy(openml.config.get_config_as_dict()) _dd["cachedir"] = Path(td) / "something-else" openml.config._setup(_dd) @@ -110,26 +110,26 @@ class TestConfigurationForExamples(openml.testing.TestBase): def test_switch_to_example_configuration(self): """Verifies the test configuration is loaded properly.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config.apikey = TestBase.admin_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.admin_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.test_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.test_server @pytest.mark.production() def test_switch_from_example_configuration(self): """Verifies the previous configuration is loaded after stopping.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config.apikey = TestBase.user_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.user_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.production_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.production_server def test_example_configuration_stop_before_start(self): """Verifies an error is raised if `stop_...` is called before `start_...`.""" @@ -146,15 +146,15 @@ def test_example_configuration_stop_before_start(self): @pytest.mark.production() def test_example_configuration_start_twice(self): """Checks that the original config can be returned to if `start..` is called twice.""" - openml.config.apikey = TestBase.user_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.user_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.production_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.production_server def test_configuration_file_not_overwritten_on_load(): diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 35be84903..1c0b50fe5 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -44,7 +44,7 @@ def min_number_evaluations_on_test_server() -> int: def _mocked_perform_api_call(call, request_method): - url = openml.config.server + "/" + call + url = openml.config._config.server + "/" + call return openml._api_calls._download_text_file(url) From 38ae9beb47122c54df2122e113ac8a4727bb2eb7 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:07:30 +0530 Subject: [PATCH 3/8] test failures fix --- examples/Basics/introduction_tutorial.py | 2 +- openml/config.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index c864772f5..648bc90ed 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -35,7 +35,7 @@ # %% import openml -openml.config.apikey = "YOURKEY" +openml.config._config.apikey = "YOURKEY" # %% [markdown] # ## Caching diff --git a/openml/config.py b/openml/config.py index 20825463e..f2020b8c6 100644 --- a/openml/config.py +++ b/openml/config.py @@ -261,8 +261,8 @@ def stop_using_configuration_for_example(cls) -> None: global _config _config = replace( _config, - server=cast(str, cls._last_used_server), - apikey=cast(str, cls._last_used_key), + server=cast("str", cls._last_used_server), + apikey=cast("str", cls._last_used_key), ) cls._start_last_called = False @@ -421,7 +421,7 @@ def set_field_in_config_file(field: str, value: Any) -> None: def _parse_config(config_file: str | Path) -> dict[str, Any]: """Parse the config file, set up defaults.""" config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=_config.__dict__) # type: ignore + config = configparser.RawConfigParser(defaults=OpenMLConfig().__dict__) # type: ignore # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. # Cheat the ConfigParser module by adding a fake section header @@ -493,8 +493,9 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: -------- get_cache_directory """ - global _root_cache_directory + global _root_cache_directory, _config _root_cache_directory = Path(root_cache_directory) + _config = replace(_config, cachedir=_root_cache_directory) start_using_configuration_for_example = ( From 93ab9c21ce0dcd307666f98766b924e5bc1c09ba Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:13:37 +0530 Subject: [PATCH 4/8] Update flow_id_tutorial.py --- examples/_external_or_deprecated/flow_id_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index c533cfd9f..496102085 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config._configserver = "https://api.openml.org/api/v1/xml" +openml.config._config.server = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier From aa25dd69aa2a8b08f17a3bd2d411a1829fd6eccf Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:24:36 +0530 Subject: [PATCH 5/8] _defaults bug fixing --- openml/cli.py | 6 +++++- tests/test_openml/test_config.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openml/cli.py b/openml/cli.py index fb39afe97..c1363ea74 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -9,6 +9,8 @@ from typing import Callable from urllib.parse import urlparse +from attr import fields + from openml import config @@ -339,7 +341,9 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config.get_config_as_dict() if f not in ["max_retries"]] + configurable_fields = [ + f.name for f in fields(config.OpenMLConfig) if f.name not in ["max_retries"] + ] parser_configure.add_argument( "field", diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 3ff4bcb00..104639460 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -46,7 +46,7 @@ class TestConfig(openml.testing.TestBase): def test_non_writable_home(self, log_handler_mock, warnings_mock): with tempfile.TemporaryDirectory(dir=self.workdir) as td: os.chmod(td, 0o444) - _dd = copy(openml.config.get_config_as_dict()) + _dd = copy(openml.config.OpenMLConfig().__dict__) _dd["cachedir"] = Path(td) / "something-else" openml.config._setup(_dd) From a98b6b1c7753dbf02d8d6a2dc552abff8e8c60bb Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 19:10:58 +0530 Subject: [PATCH 6/8] removed __setattr__ given it is not supported --- openml/config.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/openml/config.py b/openml/config.py index f2020b8c6..ad8060e7d 100644 --- a/openml/config.py +++ b/openml/config.py @@ -160,14 +160,6 @@ def __getattr__(name: str) -> Any: raise AttributeError(f"module 'openml.config' has no attribute '{name}'") -def __setattr__(name: str, value: Any) -> None: # noqa: N807 - global _config - if hasattr(_config, name): - _config = replace(_config, **{name: value}) - else: - raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - - def get_server_base_url() -> str: """Return the base URL of the currently configured server. From 146dd2160f668149d2bd39ed691f703817df8cc6 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 5 Jan 2026 17:12:29 +0530 Subject: [PATCH 7/8] Update all files --- examples/Advanced/datasets_tutorial.py | 2 +- examples/Basics/introduction_tutorial.py | 4 +- .../benchmark_with_optunahub.py | 4 +- .../flow_id_tutorial.py | 2 +- openml/__init__.py | 9 +- openml/_api_calls.py | 19 +- openml/config.py | 798 ++++++++---------- openml/runs/functions.py | 13 +- openml/setups/functions.py | 5 +- openml/tasks/task.py | 2 +- openml/testing.py | 14 +- openml/utils.py | 6 +- tests/conftest.py | 16 +- tests/test_datasets/test_dataset_functions.py | 14 +- tests/test_openml/test_config.py | 25 +- tests/test_utils/test_utils.py | 2 +- 16 files changed, 443 insertions(+), 492 deletions(-) diff --git a/examples/Advanced/datasets_tutorial.py b/examples/Advanced/datasets_tutorial.py index 3a4833206..cc57686d0 100644 --- a/examples/Advanced/datasets_tutorial.py +++ b/examples/Advanced/datasets_tutorial.py @@ -139,7 +139,7 @@ # only for the dataset owner. Further, critical fields cannot be edited if the dataset has any # tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you, # configure the API key: -# openml.config._config.apikey = 'FILL_IN_OPENML_API_KEY' +# openml.config.apikey = 'FILL_IN_OPENML_API_KEY' # This example here only shows a failure when trying to work on a dataset not owned by you: # %% diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index 648bc90ed..4b972b95b 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -35,7 +35,7 @@ # %% import openml -openml.config._config.apikey = "YOURKEY" +openml.config.apikey = "YOURKEY" # %% [markdown] # ## Caching @@ -52,4 +52,4 @@ # %% import openml -openml.config.set_root_cache_directory("YOURDIR") \ No newline at end of file +openml.config.set_root_cache_directory("YOURDIR") diff --git a/examples/_external_or_deprecated/benchmark_with_optunahub.py b/examples/_external_or_deprecated/benchmark_with_optunahub.py index c8f5f7b0c..ece3e7c40 100644 --- a/examples/_external_or_deprecated/benchmark_with_optunahub.py +++ b/examples/_external_or_deprecated/benchmark_with_optunahub.py @@ -44,7 +44,7 @@ # account (you don't need one for anything else, just to upload your results), # go to your profile and select the API-KEY. # Or log in, and navigate to https://www.openml.org/auth/api-key -openml.config._config.apikey = "" +openml.config.apikey = "" ############################################################################ # Prepare for preprocessors and an OpenML task # ============================================ @@ -95,7 +95,7 @@ def objective(trial: optuna.Trial) -> Pipeline: run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False) logger.log(1, f"Model has been trained - {run}") - if openml.config._config.apikey != "": + if openml.config.apikey != "": try: run.publish() diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index 496102085..e813655fc 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config._config.server = "https://api.openml.org/api/v1/xml" +openml.config.server = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier diff --git a/openml/__init__.py b/openml/__init__.py index c49505eb9..e23316d4d 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -18,9 +18,11 @@ # License: BSD 3-Clause from __future__ import annotations +from typing import TYPE_CHECKING + from . import ( _api_calls, - config, + config as _config_module, datasets, evaluations, exceptions, @@ -49,6 +51,11 @@ OpenMLTask, ) +if TYPE_CHECKING: + from .config import OpenMLConfigManager + +config: OpenMLConfigManager = _config_module._config + def populate_cache( task_ids: list[int] | None = None, diff --git a/openml/_api_calls.py b/openml/_api_calls.py index c3f6d285f..a72da1b8c 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -12,7 +12,7 @@ import xml import zipfile from pathlib import Path -from typing import Dict, Tuple, Union +from typing import Dict, Tuple, Union, cast import minio import requests @@ -71,7 +71,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = config._config.server + url = cast(str, config.server) if not url.endswith("/"): url += "/" url += endpoint @@ -172,7 +172,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config._config.show_progress else None, + progress=ProgressBar() if config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -301,7 +301,8 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url = config._config.server.split("/api/") + openml_server = cast(str, config.server) + openml_url = openml_server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename @@ -317,7 +318,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config._config.apikey + data["api_key"] = config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -337,8 +338,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config._config.apikey: - data["api_key"] = config._config.apikey + if config.apikey: + data["api_key"] = config.apikey return _send_request( request_method=request_method, url=url, @@ -363,10 +364,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config._config.connection_n_retries) + n_retries = max(1, config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config._config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/config.py b/openml/config.py index ad8060e7d..2ecb3c64f 100644 --- a/openml/config.py +++ b/openml/config.py @@ -12,7 +12,7 @@ import shutil import warnings from contextlib import contextmanager -from dataclasses import dataclass, replace +from dataclasses import dataclass, field, replace from io import StringIO from pathlib import Path from typing import Any, Iterator, cast @@ -21,41 +21,24 @@ logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") -console_handler: logging.StreamHandler | None = None -file_handler: logging.handlers.RotatingFileHandler | None = None - -OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" -OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" -_TEST_SERVER_NORMAL_USER_KEY = "normaluser" - - -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) + user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") if user_defined_cache_dir is not None: return Path(user_defined_cache_dir) if platform.system().lower() != "linux": - return _user_path / ".openml" + return Path("~", ".openml") xdg_cache_home = os.environ.get("XDG_CACHE_HOME") if xdg_cache_home is None: return Path("~", ".cache", "openml") - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - - # The new cache directory exists cache_dir = Path(xdg_cache_home) / "openml" if cache_dir.exists(): return cache_dir - # The old cache directory *does not* exist heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" if not heuristic_dir_for_backwards_compat.exists(): return cache_dir @@ -73,447 +56,412 @@ def _resolve_default_cache_dir() -> Path: @dataclass class OpenMLConfig: + """Dataclass storing the OpenML configuration.""" + apikey: str = "" server: str = "https://www.openml.org/api/v1/xml" - cachedir: Path = _resolve_default_cache_dir() # noqa: RUF009 + cachedir: Path = field(default_factory=_resolve_default_cache_dir) avoid_duplicate_runs: bool = False retry_policy: Literal["human", "robot"] = "human" connection_n_retries: int = 5 show_progress: bool = False + def __setattr__(self, name: str, value: Any) -> None: + if name == "apikey" and value is not None and not isinstance(value, str): + raise ValueError("apikey must be a string or None") -def _create_log_handlers(create_file_handler: bool = True) -> None: # noqa: FBT001, FBT002 - """Creates but does not attach the log handlers.""" - global console_handler, file_handler, _root_cache_directory # noqa: PLW0602 - if console_handler is not None or file_handler is not None: - logger.debug("Requested to create log handlers, but they are already created.") - return - - message_format = "[%(levelname)s] [%(asctime)s:%(name)s] %(message)s" - output_formatter = logging.Formatter(message_format, datefmt="%H:%M:%S") - - console_handler = logging.StreamHandler() - console_handler.setFormatter(output_formatter) - - if create_file_handler: - one_mb = 2**20 - log_path = _root_cache_directory / "openml_python.log" - file_handler = logging.handlers.RotatingFileHandler( - log_path, - maxBytes=one_mb, - backupCount=1, - delay=True, - ) - file_handler.setFormatter(output_formatter) - - -def _convert_log_levels(log_level: int) -> tuple[int, int]: - """Converts a log level that's either defined by OpenML/Python to both specifications.""" - # OpenML verbosity level don't match Python values directly: - openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} - python_to_openml = { - logging.DEBUG: 2, - logging.INFO: 1, - logging.WARNING: 0, - logging.CRITICAL: 0, - logging.ERROR: 0, - } - # Because the dictionaries share no keys, we use `get` to convert as necessary: - openml_level = python_to_openml.get(log_level, log_level) - python_level = openml_to_python.get(log_level, log_level) - return openml_level, python_level - - -def _set_level_register_and_store(handler: logging.Handler, log_level: int) -> None: - """Set handler log level, register it if needed, save setting to config file if specified.""" - _oml_level, py_level = _convert_log_levels(log_level) - handler.setLevel(py_level) - - if openml_logger.level > py_level or openml_logger.level == logging.NOTSET: - openml_logger.setLevel(py_level) + super().__setattr__(name, value) - if handler not in openml_logger.handlers: - openml_logger.addHandler(handler) +class OpenMLConfigManager: + """The OpenMLConfigManager manages the configuration of the openml-python package.""" -def set_console_log_level(console_output_level: int) -> None: - """Set console output to the desired level and register it with openml logger if needed.""" - global console_handler # noqa: PLW0602 - assert console_handler is not None - _set_level_register_and_store(console_handler, console_output_level) + def __init__(self) -> None: + self.console_handler: logging.StreamHandler | None = None + self.file_handler: logging.handlers.RotatingFileHandler | None = None + self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" + self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" + self._TEST_SERVER_NORMAL_USER_KEY = "normaluser" -def set_file_log_level(file_output_level: int) -> None: - """Set file output to the desired level and register it with openml logger if needed.""" - global file_handler # noqa: PLW0602 - assert file_handler is not None - _set_level_register_and_store(file_handler, file_output_level) + self._user_path = Path("~").expanduser().absolute() + self._config: OpenMLConfig = OpenMLConfig() + self._root_cache_directory: Path = self._config.cachedir -_config: OpenMLConfig = OpenMLConfig() -_root_cache_directory: Path = _config.cachedir + self.logger = logger + self.openml_logger = openml_logger + self._examples = self.ConfigurationForExamples(self) -def __getattr__(name: str) -> Any: - if hasattr(_config, name): - return getattr(_config, name) - raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - - -def get_server_base_url() -> str: - """Return the base URL of the currently configured server. - - Turns ``"https://api.openml.org/api/v1/xml"`` in ``"https://www.openml.org/"`` - and ``"https://test.openml.org/api/v1/xml"`` in ``"https://test.openml.org/"`` - - Returns - ------- - str - """ - domain, _ = _config.server.split("/api", maxsplit=1) - return domain.replace("api", "www") - - -def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None: - global _config - default_retries_by_policy = {"human": 5, "robot": 50} - - if value not in default_retries_by_policy: - raise ValueError( - f"Detected retry_policy '{value}' but must be one of " - f"{list(default_retries_by_policy.keys())}", - ) - if n_retries is not None and not isinstance(n_retries, int): - raise TypeError(f"`n_retries` must be of type `int` or `None` but is `{type(n_retries)}`.") - - if isinstance(n_retries, int) and n_retries < 1: - raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - - _config = replace( - _config, - retry_policy=value, - connection_n_retries=(default_retries_by_policy[value] if n_retries is None else n_retries), - ) + self._setup() + def __getattr__(self, name: str) -> Any: + if hasattr(self._config, name): + return getattr(self._config, name) + raise AttributeError(f"{type(self).__name__!r} object has no attribute {name!r}") -class ConfigurationForExamples: - """Allows easy switching to and from a test configuration, used for examples.""" + _FIELDS = { # noqa: RUF012 + "apikey", + "server", + "cachedir", + "avoid_duplicate_runs", + "retry_policy", + "connection_n_retries", + "show_progress", + } - _last_used_server = None - _last_used_key = None - _start_last_called = False - _test_server = "https://test.openml.org/api/v1/xml" - _test_apikey = _TEST_SERVER_NORMAL_USER_KEY + def __setattr__(self, name: str, value: Any) -> None: + # during __init__ before _config exists + if name in { + "_config", + "_root_cache_directory", + "console_handler", + "file_handler", + "logger", + "openml_logger", + "_examples", + "OPENML_CACHE_DIR_ENV_VAR", + "OPENML_SKIP_PARQUET_ENV_VAR", + "_TEST_SERVER_NORMAL_USER_KEY", + "_user_path", + }: + return object.__setattr__(self, name, value) + + if name in self._FIELDS: + # write into dataclass, not manager (prevents shadowing) + if name == "cachedir": + object.__setattr__(self, "_root_cache_directory", Path(value)) + object.__setattr__(self, "_config", replace(self._config, **{name: value})) + return None + + object.__setattr__(self, name, value) + return None + + def _create_log_handlers(self, create_file_handler: bool = True) -> None: # noqa: FBT001, FBT002 + if self.console_handler is not None or self.file_handler is not None: + self.logger.debug("Requested to create log handlers, but they are already created.") + return - @classmethod - def start_using_configuration_for_example(cls) -> None: - """Sets the configuration to connect to the test server with valid apikey. + message_format = "[%(levelname)s] [%(asctime)s:%(name)s] %(message)s" + output_formatter = logging.Formatter(message_format, datefmt="%H:%M:%S") - To configuration as was before this call is stored, and can be recovered - by using the `stop_use_example_configuration` method. - """ - global _config + self.console_handler = logging.StreamHandler() + self.console_handler.setFormatter(output_formatter) - if ( - cls._start_last_called - and _config.server == cls._test_server - and _config.apikey == cls._test_apikey - ): - # Method is called more than once in a row without modifying the server or apikey. - # We don't want to save the current test configuration as a last used configuration. - return + if create_file_handler: + one_mb = 2**20 + log_path = self._root_cache_directory / "openml_python.log" + self.file_handler = logging.handlers.RotatingFileHandler( + log_path, + maxBytes=one_mb, + backupCount=1, + delay=True, + ) + self.file_handler.setFormatter(output_formatter) + + def _convert_log_levels(self, log_level: int) -> tuple[int, int]: + openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} + python_to_openml = { + logging.DEBUG: 2, + logging.INFO: 1, + logging.WARNING: 0, + logging.CRITICAL: 0, + logging.ERROR: 0, + } + openml_level = python_to_openml.get(log_level, log_level) + python_level = openml_to_python.get(log_level, log_level) + return openml_level, python_level + + def _set_level_register_and_store(self, handler: logging.Handler, log_level: int) -> None: + _oml_level, py_level = self._convert_log_levels(log_level) + handler.setLevel(py_level) + + if self.openml_logger.level > py_level or self.openml_logger.level == logging.NOTSET: + self.openml_logger.setLevel(py_level) + + if handler not in self.openml_logger.handlers: + self.openml_logger.addHandler(handler) + + def set_console_log_level(self, console_output_level: int) -> None: + """Set the log level for console output.""" + assert self.console_handler is not None + self._set_level_register_and_store(self.console_handler, console_output_level) + + def set_file_log_level(self, file_output_level: int) -> None: + """Set the log level for file output.""" + assert self.file_handler is not None + self._set_level_register_and_store(self.file_handler, file_output_level) + + def get_server_base_url(self) -> str: + """Get the base URL of the OpenML server (i.e., without /api).""" + domain, _ = self._config.server.split("/api", maxsplit=1) + return domain.replace("api", "www") + + def set_retry_policy( + self, value: Literal["human", "robot"], n_retries: int | None = None + ) -> None: + """Set the retry policy for server connections.""" + default_retries_by_policy = {"human": 5, "robot": 50} + + if value not in default_retries_by_policy: + raise ValueError( + f"Detected retry_policy '{value}' but must be one of " + f"{list(default_retries_by_policy.keys())}", + ) + if n_retries is not None and not isinstance(n_retries, int): + raise TypeError( + f"`n_retries` must be of type `int` or `None` but is `{type(n_retries)}`." + ) - cls._last_used_server = _config.server - cls._last_used_key = _config.apikey - cls._start_last_called = True + if isinstance(n_retries, int) and n_retries < 1: + raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - # Test server key for examples - _config = replace( - _config, - server=cls._test_server, - apikey=cls._test_apikey, - ) - warnings.warn( - f"Switching to the test server {_config.server} to not upload results to " - "the live server. Using the test server may result in reduced performance of the API!", - stacklevel=2, + self._config = replace( + self._config, + retry_policy=value, + connection_n_retries=( + default_retries_by_policy[value] if n_retries is None else n_retries + ), ) - @classmethod - def stop_using_configuration_for_example(cls) -> None: - """Return to configuration as it was before `start_use_example_configuration`.""" - if not cls._start_last_called: - # We don't want to allow this because it will (likely) result in the `server` and - # `apikey` variables being set to None. - raise RuntimeError( - "`stop_use_example_configuration` called without a saved config." - "`start_use_example_configuration` must be called first.", + def _handle_xdg_config_home_backwards_compatibility(self, xdg_home: str) -> Path: + config_dir = Path(xdg_home) / "openml" + + backwards_compat_config_file = Path(xdg_home) / "config" + if not backwards_compat_config_file.exists(): + return config_dir + + try: + self._parse_config(backwards_compat_config_file) + except Exception: # noqa: BLE001 + return config_dir + + correct_config_location = config_dir / "config" + try: + shutil.copy(backwards_compat_config_file, correct_config_location) + self.openml_logger.warning( + "An openml configuration file was found at the old location " + f"at {backwards_compat_config_file}. We have copied it to the new " + f"location at {correct_config_location}. " + "\nTo silence this warning please verify that the configuration file " + f"at {correct_config_location} is correct and delete the file at " + f"{backwards_compat_config_file}." + ) + return config_dir + except Exception as e: # noqa: BLE001 + self.openml_logger.warning( + "While attempting to perform a backwards compatible fix, we " + f"failed to copy the openml config file at " + f"{backwards_compat_config_file}' to {correct_config_location}" + f"\n{type(e)}: {e}", + "\n\nTo silence this warning, please copy the file " + "to the new location and delete the old file at " + f"{backwards_compat_config_file}.", + ) + return backwards_compat_config_file + + def determine_config_file_path(self) -> Path: + """Determine the path to the openml configuration file.""" + if platform.system().lower() == "linux": + xdg_home = os.environ.get("XDG_CONFIG_HOME") + if xdg_home is not None: + config_dir = self._handle_xdg_config_home_backwards_compatibility(xdg_home) + else: + config_dir = Path("~", ".config", "openml") + else: + config_dir = Path("~") / ".openml" + + config_dir = Path(config_dir).expanduser().resolve() + return config_dir / "config" + + def _parse_config(self, config_file: str | Path) -> dict[str, Any]: + config_file = Path(config_file) + config = configparser.RawConfigParser(defaults=OpenMLConfig().__dict__) # type: ignore + + config_file_ = StringIO() + config_file_.write("[FAKE_SECTION]\n") + try: + with config_file.open("r") as fh: + for line in fh: + config_file_.write(line) + except FileNotFoundError: + self.logger.info( + "No config file found at %s, using default configuration.", config_file + ) + except OSError as e: + self.logger.info("Error opening file %s: %s", config_file, e.args[0]) + config_file_.seek(0) + config.read_file(config_file_) + configuration = dict(config.items("FAKE_SECTION")) + for boolean_field in ["avoid_duplicate_runs", "show_progress"]: + if isinstance(config["FAKE_SECTION"][boolean_field], str): + configuration[boolean_field] = config["FAKE_SECTION"].getboolean(boolean_field) # type: ignore + return configuration # type: ignore + + def start_using_configuration_for_example(self) -> None: + """Sets the configuration to connect to the test server with valid apikey.""" + return self._examples.start_using_configuration_for_example() + + def stop_using_configuration_for_example(self) -> None: + """Store the configuration as it was before `start_use_example_configuration`.""" + return self._examples.stop_using_configuration_for_example() + + def _setup(self, config: dict[str, Any] | None = None) -> None: + config_file = self.determine_config_file_path() + config_dir = config_file.parent + + try: + if not config_dir.exists(): + config_dir.mkdir(exist_ok=True, parents=True) + except PermissionError: + self.openml_logger.warning( + f"No permission to create OpenML directory at {config_dir}!" + " This can result in OpenML-Python not working properly." ) - global _config - _config = replace( - _config, - server=cast("str", cls._last_used_server), - apikey=cast("str", cls._last_used_key), - ) - cls._start_last_called = False - - -def _handle_xdg_config_home_backwards_compatibility( - xdg_home: str, -) -> Path: - # NOTE(eddiebergman): A previous bug results in the config - # file being located at `${XDG_CONFIG_HOME}/config` instead - # of `${XDG_CONFIG_HOME}/openml/config`. As to maintain backwards - # compatibility, where users may already may have had a configuration, - # we copy it over an issue a warning until it's deleted. - # As a heurisitic to ensure that it's "our" config file, we try parse it first. - config_dir = Path(xdg_home) / "openml" - - backwards_compat_config_file = Path(xdg_home) / "config" - if not backwards_compat_config_file.exists(): - return config_dir - - # If it errors, that's a good sign it's not ours and we can - # safely ignore it, jumping out of this block. This is a heurisitc - try: - _parse_config(backwards_compat_config_file) - except Exception: # noqa: BLE001 - return config_dir - - # Looks like it's ours, lets try copy it to the correct place - correct_config_location = config_dir / "config" - try: - # We copy and return the new copied location - shutil.copy(backwards_compat_config_file, correct_config_location) - openml_logger.warning( - "An openml configuration file was found at the old location " - f"at {backwards_compat_config_file}. We have copied it to the new " - f"location at {correct_config_location}. " - "\nTo silence this warning please verify that the configuration file " - f"at {correct_config_location} is correct and delete the file at " - f"{backwards_compat_config_file}." + if config is None: + config = self._parse_config(config_file) + + self._config = replace( + self._config, + apikey=config["apikey"], + server=config["server"], + show_progress=config["show_progress"], + avoid_duplicate_runs=config["avoid_duplicate_runs"], + retry_policy=config["retry_policy"], + connection_n_retries=int(config["connection_n_retries"]), ) - return config_dir - except Exception as e: # noqa: BLE001 - # We failed to copy and its ours, return the old one. - openml_logger.warning( - "While attempting to perform a backwards compatible fix, we " - f"failed to copy the openml config file at " - f"{backwards_compat_config_file}' to {correct_config_location}" - f"\n{type(e)}: {e}", - "\n\nTo silence this warning, please copy the file " - "to the new location and delete the old file at " - f"{backwards_compat_config_file}.", - ) - return backwards_compat_config_file + self.set_retry_policy(config["retry_policy"], self._config.connection_n_retries) -def determine_config_file_path() -> Path: - if platform.system().lower() == "linux": - xdg_home = os.environ.get("XDG_CONFIG_HOME") - if xdg_home is not None: - config_dir = _handle_xdg_config_home_backwards_compatibility(xdg_home) + user_defined_cache_dir = os.environ.get(self.OPENML_CACHE_DIR_ENV_VAR) + if user_defined_cache_dir is not None: + short_cache_dir = Path(user_defined_cache_dir) else: - config_dir = Path("~", ".config", "openml") - else: - config_dir = Path("~") / ".openml" - - # Still use os.path.expanduser to trigger the mock in the unit test - config_dir = Path(config_dir).expanduser().resolve() - return config_dir / "config" - - -def _setup(config: dict[str, Any] | None = None) -> None: - """Setup openml package. Called on first import. - - Reads the config file and sets up apikey, server, cache appropriately. - key and server can be set by the user simply using - openml.config._config.apikey = THEIRKEY - openml.config._config.server = SOMESERVER - We could also make it a property but that's less clear. - """ - global _config - global _root_cache_directory - - config_file = determine_config_file_path() - config_dir = config_file.parent - - # read config file, create directory for config file - try: - if not config_dir.exists(): - config_dir.mkdir(exist_ok=True, parents=True) - except PermissionError: - openml_logger.warning( - f"No permission to create OpenML directory at {config_dir}!" - " This can result in OpenML-Python not working properly." - ) + short_cache_dir = Path(config["cachedir"]) + + self._root_cache_directory = short_cache_dir.expanduser().resolve() + self._config = replace(self._config, cachedir=self._root_cache_directory) + + try: + cache_exists = self._root_cache_directory.exists() + if not cache_exists: + self._root_cache_directory.mkdir(exist_ok=True, parents=True) + self._create_log_handlers() + except PermissionError: + self.openml_logger.warning( + f"No permission to create OpenML directory at {self._root_cache_directory}!" + " This can result in OpenML-Python not working properly." + ) + self._create_log_handlers(create_file_handler=False) + + def set_field_in_config_file(self, field: str, value: Any) -> None: + """Set a field in the configuration file.""" + if not hasattr(OpenMLConfig(), field): + raise ValueError( + f"Field '{field}' is not valid and must be one of " + f"'{OpenMLConfig().__dict__.keys()}'." + ) - if config is None: - config = _parse_config(config_file) - - _config = replace( - _config, - apikey=config["apikey"], - server=config["server"], - show_progress=config["show_progress"], - avoid_duplicate_runs=config["avoid_duplicate_runs"], - retry_policy=config["retry_policy"], - connection_n_retries=int(config["connection_n_retries"]), - ) + self._config = replace(self._config, **{field: value}) + config_file = self.determine_config_file_path() + existing = self._parse_config(config_file) + with config_file.open("w") as fh: + for f in OpenMLConfig().__dict__: + v = value if f == field else existing.get(f) + if v is not None: + fh.write(f"{f} = {v}\n") + + def get_config_as_dict(self) -> dict[str, Any]: + """Get the current configuration as a dictionary.""" + return self._config.__dict__.copy() + + def get_cache_directory(self) -> str: + """Get the cache directory for the current server.""" + url_suffix = urlparse(self._config.server).netloc + reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118 + return os.path.join(self._root_cache_directory, reversed_url_suffix) # noqa: PTH118 + + def set_root_cache_directory(self, root_cache_directory: str | Path) -> None: + """Set the root cache directory.""" + self._root_cache_directory = Path(root_cache_directory) + self._config = replace(self._config, cachedir=self._root_cache_directory) + + @contextmanager + def overwrite_config_context(self, config: dict[str, Any]) -> Iterator[dict[str, Any]]: + """Overwrite the current configuration within a context manager.""" + existing_config = self.get_config_as_dict() + merged_config = {**existing_config, **config} + + self._setup(merged_config) + yield merged_config + self._setup(existing_config) + + class ConfigurationForExamples: + """Allows easy switching to and from a test configuration, used for examples.""" + + _last_used_server = None + _last_used_key = None + _start_last_called = False + + def __init__(self, manager: OpenMLConfigManager): + self._manager = manager + self._test_apikey = manager._TEST_SERVER_NORMAL_USER_KEY + self._test_server = "https://test.openml.org/api/v1/xml" + + def start_using_configuration_for_example(self) -> None: + """Sets the configuration to connect to the test server with valid apikey. + + To configuration as was before this call is stored, and can be recovered + by using the `stop_use_example_configuration` method. + """ + if ( + self._start_last_called + and self._manager._config.server == self._test_server + and self._manager._config.apikey == self._test_apikey + ): + # Method is called more than once in a row without modifying the server or apikey. + # We don't want to save the current test configuration as a last used configuration. + return + + self._last_used_server = self._manager._config.server + self._last_used_key = self._manager._config.apikey + self._start_last_called = True + + # Test server key for examples + self._manager._config = replace( + self._manager._config, + server=self._test_server, + apikey=self._test_apikey, + ) + warnings.warn( + f"Switching to the test server {self._test_server} to not upload results to " + "the live server. Using the test server may result in reduced performance of the " + "API!", + stacklevel=2, + ) - set_retry_policy(config["retry_policy"], _config.connection_n_retries) + def stop_using_configuration_for_example(self) -> None: + """Return to configuration as it was before `start_use_example_configuration`.""" + if not self._start_last_called: + # We don't want to allow this because it will (likely) result in the `server` and + # `apikey` variables being set to None. + raise RuntimeError( + "`stop_use_example_configuration` called without a saved config." + "`start_use_example_configuration` must be called first.", + ) + + self._manager._config = replace( + self._manager._config, + server=cast("str", self._last_used_server), + apikey=cast("str", self._last_used_key), + ) + self._start_last_called = False - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) - if user_defined_cache_dir is not None: - short_cache_dir = Path(user_defined_cache_dir) - else: - short_cache_dir = Path(config["cachedir"]) - - _root_cache_directory = short_cache_dir.expanduser().resolve() - _config = replace(_config, cachedir=_root_cache_directory) - - try: - cache_exists = _root_cache_directory.exists() - # create the cache subdirectory - if not cache_exists: - _root_cache_directory.mkdir(exist_ok=True, parents=True) - _create_log_handlers() - except PermissionError: - openml_logger.warning( - f"No permission to create OpenML directory at {_root_cache_directory}!" - " This can result in OpenML-Python not working properly." - ) - _create_log_handlers(create_file_handler=False) +_config = OpenMLConfigManager() -def set_field_in_config_file(field: str, value: Any) -> None: - """Overwrites the `field` in the configuration file with the new `value`.""" - global _config - if not hasattr(_config, field): - raise ValueError( - f"Field '{field}' is not valid and must be one of '{_config.__dict__.keys()}'." - ) - _config = replace(_config, **{field: value}) - config_file = determine_config_file_path() - existing = _parse_config(config_file) - with config_file.open("w") as fh: - for f in _config.__dict__: - # We can't blindly set all values based on globals() because when the user - # sets it through config.FIELD it should not be stored to file. - # There doesn't seem to be a way to avoid writing defaults to file with configparser, - # because it is impossible to distinguish from an explicitly set value that matches - # the default value, to one that was set to its default because it was omitted. - v = value if f == field else existing.get(f) - if v is not None: - fh.write(f"{f} = {v}\n") - - -def _parse_config(config_file: str | Path) -> dict[str, Any]: - """Parse the config file, set up defaults.""" - config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=OpenMLConfig().__dict__) # type: ignore - - # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. - # Cheat the ConfigParser module by adding a fake section header - config_file_ = StringIO() - config_file_.write("[FAKE_SECTION]\n") - try: - with config_file.open("r") as fh: - for line in fh: - config_file_.write(line) - except FileNotFoundError: - logger.info("No config file found at %s, using default configuration.", config_file) - except OSError as e: - logger.info("Error opening file %s: %s", config_file, e.args[0]) - config_file_.seek(0) - config.read_file(config_file_) - configuration = dict(config.items("FAKE_SECTION")) - for boolean_field in ["avoid_duplicate_runs", "show_progress"]: - if isinstance(config["FAKE_SECTION"][boolean_field], str): - configuration[boolean_field] = config["FAKE_SECTION"].getboolean(boolean_field) # type: ignore - return configuration # type: ignore - - -def get_config_as_dict() -> dict[str, Any]: - return _config.__dict__.copy() - - -# NOTE: For backwards compatibility, we keep the `str` -def get_cache_directory() -> str: - """Get the current cache directory. - - This gets the cache directory for the current server relative - to the root cache directory that can be set via - ``set_root_cache_directory()``. The cache directory is the - ``root_cache_directory`` with additional information on which - subdirectory to use based on the server name. By default it is - ``root_cache_directory / org / openml / www`` for the standard - OpenML.org server and is defined as - ``root_cache_directory / top-level domain / second-level domain / - hostname`` - ``` - - Returns - ------- - cachedir : string - The current cache directory. - - """ - url_suffix = urlparse(_config.server).netloc - reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118 - return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118 - - -def set_root_cache_directory(root_cache_directory: str | Path) -> None: - """Set module-wide base cache directory. - - Sets the root cache directory, wherin the cache directories are - created to store content from different OpenML servers. For example, - by default, cached data for the standard OpenML.org server is stored - at ``root_cache_directory / org / openml / www``, and the general - pattern is ``root_cache_directory / top-level domain / second-level - domain / hostname``. - - Parameters - ---------- - root_cache_directory : string - Path to use as cache directory. - - See Also - -------- - get_cache_directory - """ - global _root_cache_directory, _config - _root_cache_directory = Path(root_cache_directory) - _config = replace(_config, cachedir=_root_cache_directory) - - -start_using_configuration_for_example = ( - ConfigurationForExamples.start_using_configuration_for_example -) -stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example - - -@contextmanager -def overwrite_config_context(config: dict[str, Any]) -> Iterator[dict[str, Any]]: - """A context manager to temporarily override variables in the configuration.""" - existing_config = get_config_as_dict() - merged_config = {**existing_config, **config} - - _setup(merged_config) # type: ignore - yield merged_config # type: ignore - - _setup(existing_config) - - -__all__ = [ - "get_cache_directory", - "get_config_as_dict", - "set_root_cache_directory", - "start_using_configuration_for_example", - "stop_using_configuration_for_example", -] - -_setup() +def __getattr__(name: str) -> Any: + return getattr(_config, name) diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 7fa560833..573d91576 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -18,7 +18,6 @@ import openml import openml._api_calls import openml.utils -from openml import config from openml.exceptions import ( OpenMLCacheException, OpenMLRunsExistError, @@ -107,7 +106,7 @@ def run_model_on_task( # noqa: PLR0913 """ if avoid_duplicate_runs is None: avoid_duplicate_runs = openml.config.avoid_duplicate_runs - if avoid_duplicate_runs and not config.apikey: + if avoid_duplicate_runs and not openml.config.apikey: warnings.warn( "avoid_duplicate_runs is set to True, but no API key is set. " "Please set your API key in the OpenML configuration file, see" @@ -226,7 +225,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 raise ValueError("flow_tags should be a list") if avoid_duplicate_runs is None: - avoid_duplicate_runs = openml.config._config.avoid_duplicate_runs + avoid_duplicate_runs = openml.config.avoid_duplicate_runs # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018). # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019). @@ -336,7 +335,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}" else: message = f"Executed Task {task.task_id} on local Flow with name {flow.name}." - config.logger.info(message) + openml.config.logger.info(message) return run @@ -528,7 +527,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 # The forked child process may not copy the configuration state of OpenML from the parent. # Current configuration setup needs to be copied and passed to the child processes. - _config = config.get_config_as_dict() + _config = openml.config.get_config_as_dict() # Execute runs in parallel # assuming the same number of tasks as workers (n_jobs), the total compute time for this # statement will be similar to the slowest run @@ -733,7 +732,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 """ # Sets up the OpenML instantiated in the child process to match that of the parent's # if configuration=None, loads the default - config._setup(configuration) + openml.config._setup(configuration) train_indices, test_indices = task.get_train_test_split_indices( repeat=rep_no, @@ -757,7 +756,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 else: raise NotImplementedError(task.task_type) - config.logger.info( + openml.config.logger.info( f"Going to run model {model!s} on " f"dataset {openml.datasets.get_dataset(task.dataset_id).name} " f"for repeat {rep_no} fold {fold_no} sample {sample_no}" diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 374911901..90dd73c06 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -14,7 +14,6 @@ import openml import openml.exceptions import openml.utils -from openml import config from openml.flows import OpenMLFlow, flow_exists from .setup import OpenMLParameter, OpenMLSetup @@ -84,7 +83,7 @@ def _get_cached_setup(setup_id: int) -> OpenMLSetup: OpenMLCacheException If the setup file for the given setup ID is not cached. """ - cache_dir = Path(config.get_cache_directory()) + cache_dir = Path(openml.config.get_cache_directory()) setup_cache_dir = cache_dir / "setups" / str(setup_id) try: setup_file = setup_cache_dir / "description.xml" @@ -112,7 +111,7 @@ def get_setup(setup_id: int) -> OpenMLSetup: ------- OpenMLSetup (an initialized openml setup object) """ - setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id) + setup_dir = Path(openml.config.get_cache_directory()) / "setups" / str(setup_id) setup_dir.mkdir(exist_ok=True, parents=True) setup_file = setup_dir / "description.xml" diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 395b52482..304bab544 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -10,8 +10,8 @@ from typing import TYPE_CHECKING, Any, Sequence from typing_extensions import TypedDict +import openml import openml._api_calls -import openml.config from openml import datasets from openml.base import OpenMLBase from openml.utils import _create_cache_directory_for_id diff --git a/openml/testing.py b/openml/testing.py index fbf7edf44..d1da16876 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -99,13 +99,13 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: os.chdir(self.workdir) self.cached = True - openml.config._config.apikey = TestBase.user_key + openml.config.apikey = TestBase.user_key self.production_server = "https://www.openml.org/api/v1/xml" openml.config.set_root_cache_directory(str(self.workdir)) # Increase the number of retries to avoid spurious server failures - self.retry_policy = openml.config._config.retry_policy - self.connection_n_retries = openml.config._config.connection_n_retries + self.retry_policy = openml.config.retry_policy + self.connection_n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) def use_production_server(self) -> None: @@ -114,8 +114,8 @@ def use_production_server(self) -> None: Please use this sparingly - it is better to use the test server. """ - openml.config._config.server = self.production_server - openml.config._config.apikey = "" + openml.config.server = self.production_server + openml.config.apikey = "" def tearDown(self) -> None: """Tear down the test""" @@ -127,8 +127,8 @@ def tearDown(self) -> None: # one of the files may still be used by another process raise e - openml.config._config.connection_n_retries = self.connection_n_retries - openml.config._config.retry_policy = self.retry_policy + openml.config.connection_n_retries = self.connection_n_retries + openml.config.retry_policy = self.retry_policy @classmethod def _mark_entity_for_removal( diff --git a/openml/utils.py b/openml/utils.py index 7e72e7aee..f4a78fa44 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -18,8 +18,6 @@ import openml._api_calls import openml.exceptions -from . import config - # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from openml.base import OpenMLBase @@ -328,7 +326,7 @@ def _list_all( # noqa: C901 def _get_cache_dir_for_key(key: str) -> Path: - return Path(config.get_cache_directory()) / key + return Path(openml.config.get_cache_directory()) / key def _create_cache_directory(key: str) -> Path: @@ -428,7 +426,7 @@ def safe_func(*args: P.args, **kwargs: P.kwargs) -> R: def _create_lockfiles_dir() -> Path: - path = Path(config.get_cache_directory()) / "locks" + path = Path(openml.config.get_cache_directory()) / "locks" # TODO(eddiebergman): Not sure why this is allowed to error and ignore??? with contextlib.suppress(OSError): path.mkdir(exist_ok=True, parents=True) diff --git a/tests/conftest.py b/tests/conftest.py index ba7c65813..bd974f3f3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,8 +97,8 @@ def delete_remote_files(tracker, flow_names) -> None: :param tracker: Dict :return: None """ - openml.config._config.server = TestBase.test_server - openml.config._config.apikey = TestBase.user_key + openml.config.server = TestBase.test_server + openml.config.apikey = TestBase.user_key # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -263,8 +263,8 @@ def verify_cache_state(test_files_directory) -> Iterator[None]: @pytest.fixture(autouse=True, scope="session") def as_robot() -> Iterator[None]: - policy = openml.config._config.retry_policy - n_retries = openml.config._config.connection_n_retries + policy = openml.config.retry_policy + n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) yield openml.config.set_retry_policy(policy, n_retries) @@ -273,12 +273,12 @@ def as_robot() -> Iterator[None]: @pytest.fixture(autouse=True) def with_server(request): if "production" in request.keywords: - openml.config._config.server = "https://www.openml.org/api/v1/xml" - openml.config._config.apikey = None + openml.config.server = "https://www.openml.org/api/v1/xml" + openml.config.apikey = None yield return - openml.config._config.server = "https://test.openml.org/api/v1/xml" - openml.config._config.apikey = TestBase.user_key + openml.config.server = "https://test.openml.org/api/v1/xml" + openml.config.apikey = TestBase.user_key yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index f1f9e6346..f8cb1943c 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -153,7 +153,7 @@ def test_check_datasets_active(self): openml.datasets.check_datasets_active, [79], ) - openml.config._config.server = self.test_server + openml.config.server = self.test_server def test_illegal_character_tag(self): dataset = openml.datasets.get_dataset(1) @@ -179,7 +179,7 @@ def test__name_to_id_with_deactivated(self): self.use_production_server() # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 - openml.config._config.server = self.test_server + openml.config.server = self.test_server @pytest.mark.production() def test__name_to_id_with_multiple_active(self): @@ -418,8 +418,8 @@ def test__getarff_md5_issue(self): "oml:md5_checksum": "abc", "oml:url": "https://www.openml.org/data/download/61", } - n = openml.config._config.connection_n_retries - openml.config._config.connection_n_retries = 1 + n = openml.config.connection_n_retries + openml.config.connection_n_retries = 1 self.assertRaisesRegex( OpenMLHashException, @@ -429,7 +429,7 @@ def test__getarff_md5_issue(self): description, ) - openml.config._config.connection_n_retries = n + openml.config.connection_n_retries = n def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) @@ -589,7 +589,7 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) - openml.config._config.apikey = TestBase.admin_key + openml.config.apikey = TestBase.admin_key openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1516,7 +1516,7 @@ def test_list_datasets_with_high_size_parameter(self): datasets_b = openml.datasets.list_datasets(size=np.inf) # Reverting to test server - openml.config._config.server = self.test_server + openml.config.server = self.test_server assert len(datasets_a) == len(datasets_b) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 104639460..282838414 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -110,26 +110,25 @@ class TestConfigurationForExamples(openml.testing.TestBase): def test_switch_to_example_configuration(self): """Verifies the test configuration is loaded properly.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config._config.apikey = TestBase.admin_key - openml.config._config.server = self.production_server + openml.config.apikey = TestBase.admin_key + openml.config.server = self.production_server openml.config.start_using_configuration_for_example() - assert openml.config._config.apikey == TestBase.user_key - assert openml.config._config.server == self.test_server + assert openml.config.apikey == TestBase.user_key + assert openml.config.server == self.test_server @pytest.mark.production() def test_switch_from_example_configuration(self): """Verifies the previous configuration is loaded after stopping.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config._config.apikey = TestBase.user_key - openml.config._config.server = self.production_server + openml.config.apikey = TestBase.user_key + openml.config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - - assert openml.config._config.apikey == TestBase.user_key - assert openml.config._config.server == self.production_server + assert openml.config.apikey == TestBase.user_key + assert openml.config.server == self.production_server def test_example_configuration_stop_before_start(self): """Verifies an error is raised if `stop_...` is called before `start_...`.""" @@ -146,15 +145,15 @@ def test_example_configuration_stop_before_start(self): @pytest.mark.production() def test_example_configuration_start_twice(self): """Checks that the original config can be returned to if `start..` is called twice.""" - openml.config._config.apikey = TestBase.user_key - openml.config._config.server = self.production_server + openml.config.apikey = TestBase.user_key + openml.config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config._config.apikey == TestBase.user_key - assert openml.config._config.server == self.production_server + assert openml.config.apikey == TestBase.user_key + assert openml.config.server == self.production_server def test_configuration_file_not_overwritten_on_load(): diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 1c0b50fe5..35be84903 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -44,7 +44,7 @@ def min_number_evaluations_on_test_server() -> int: def _mocked_perform_api_call(call, request_method): - url = openml.config._config.server + "/" + call + url = openml.config.server + "/" + call return openml._api_calls._download_text_file(url) From 7a67bf01834ef0d5ba4075c612de6a3554d2d82b Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 5 Jan 2026 17:30:53 +0530 Subject: [PATCH 8/8] Update introduction_tutorial.py --- examples/Basics/introduction_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index 4b972b95b..c864772f5 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -52,4 +52,4 @@ # %% import openml -openml.config.set_root_cache_directory("YOURDIR") +openml.config.set_root_cache_directory("YOURDIR") \ No newline at end of file