From 9a3dadd89da13adc4d71e87bed3a5a77b7680234 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 9 Dec 2025 11:22:11 +0100
Subject: [PATCH 1/4] Exclude DS_Store files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 75dd10d..5a1ad1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 docker/mysql/data
+.DS_Store
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

From 4f323eff90e2d2fe2d6f5dc94f899e815f55f553 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 10 Dec 2025 10:18:11 +0100
Subject: [PATCH 2/4] Ignore return values explicitly

---
 src/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main.py b/src/main.py
index 85e2ed2..d8e61b3 100644
--- a/src/main.py
+++ b/src/main.py
@@ -21,18 +21,18 @@ def _parse_args() -> argparse.Namespace:
         "uvicorn",
         "arguments forwarded to uvicorn",
     )
-    uvicorn_options.add_argument(
+    _ = uvicorn_options.add_argument(
         "--reload",
         action="store_true",
         help="Enable auto-reload",
     )
-    uvicorn_options.add_argument(
+    _ = uvicorn_options.add_argument(
         "--host",
         default="127.0.0.1",
         type=str,
         help="Bind socket to this host.",
     )
-    uvicorn_options.add_argument(
+    _ = uvicorn_options.add_argument(
         "--port",
         default=8000,
         type=int,

From 7c0d5c3b642efd8b8acfdaa1ab830db77a993fe9 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 10 Dec 2025 10:42:51 +0100
Subject: [PATCH 3/4] Remove old PHP docker

---
 docker/mysql/README.md |  2 +-
 docker/readme.md       | 27 +--------------------------
 2 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/docker/mysql/README.md b/docker/mysql/README.md
index edb3b48..95ead55 100644
--- a/docker/mysql/README.md
+++ b/docker/mysql/README.md
@@ -16,7 +16,7 @@ which sets:
 
 You should be able to connect to it using `mysql`:
 ```bash
-
+mysql --host 127.0.0.1 --port 3306 -uroot -pok
 ```
 If you do not have `mysql` installed, you may refer to the MySQL image documentation on
 how to use the image instead to connect over a docker network if you want to connect
diff --git a/docker/readme.md b/docker/readme.md
index 8fc041f..f31c281 100644
--- a/docker/readme.md
+++ b/docker/readme.md
@@ -7,39 +7,14 @@ This directory contains the files and information to build the following 5 image
  - docs: the official [mkdocs-material](https://hub.docker.com/r/squidfunk/mkdocs-material)
     image but with additional plugins installed required for building the documentation
     in this project's `/doc` directory.
- - [openml/php-rest-api](https://hub.docker.com/r/openml/php-rest-api): image with the
-    php back-end code, but ran on [feature/elasticsearch8](https://github.com/openml/openml/tree/feature/elasticsearch8)
-    branch.
  - python-api: an image of this project, to facilitate development on any platform.
- - [openml/elasticsearch8-prebuilt](https://hub.docker.com/r/openml/elasticsearch8-prebuilt):
-    the default elasticsearch image, but with indices already built on the test database
-    through invocation of the old php code.
 
 Between the prebuilt indices and the baked-in database, when all images have already been
 pulled, a `docker compose up` step should only take seconds. 🚀
 
-## Building `openml/elasticsearch8-prebuilt`
-The `openml/elasticsearch8-prebuilt` is not made with a Dockerfile, because it requires
-steps of running containers, which to the best of my knowledge is not facilitated by
-docker (not even through [multi-stage builds](https://docs.docker.com/build/building/multi-stage/)).
-So, instead we build the container state locally and then use [`docker commit`](https://docs.docker.com/engine/reference/commandline/commit/).
-
-1. run `docker compose up`, but with the `elasticsearch` service pointing to
-    `docker.elastic.co/elasticsearch/elasticsearch:8.10.4` instead of `openml/elasticsearch8-prebuilt`.
-2. build the indices from the `php-api` container:
-
-   1. Connect to the container: `docker exec -it server-api-php-api-1 /bin/bash`
-   2. (optional) Edit `/var/www/openml/index.php` and set L56 to `development` instead of `production`,
-       this will show progress of building the indices, or print out any error that may occur.
-   3. Build the indices: `php /var/www/openml/index.php cron build_es_indices`
-   4. Exit the container with `exit`.
-
-3. Make a commit of the elastic search container with prebuilt indices: `docker commit elasticsearch openml/elasticsearch8-prebuilt`
-4. Push the image created by the commit: `docker push openml/elasticsearch8-prebuilt`
-
 ## Building for multiple platforms
 
-Following Docker's "[multi-platform images](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&ved=2ahUKEwiTutyczsOCAxUUhv0HHe_VA6QQFnoECBAQAQ&url=https%3A%2F%2Fdocs.docker.com%2Fbuild%2Fbuilding%2Fmulti-platform%2F&usg=AOvVaw0YP_mkj5WTYD-0weEfrfDv&opi=89978449)"
+Following Docker's "[multi-platform images](https://docs.docker.com/build/building/multi-platform/)"
 documentation, we can build multi-platform images in a few simple steps:
 
 1. Only the first time, create a docker-container driver: `docker buildx create --name container --driver=docker-container`

From 813927afb347e06756189adf4786dbdc5bd73d6e Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 19 Dec 2025 09:33:20 +0100
Subject: [PATCH 4/4] Make tests pass with openml-services (#217)

Currently still maintain the relevant definition files in this repository to allow them to change independently for a little while when the server is under most active development. We can then consider which changes should be merged to services to reduce duplication again.
---
 .github/workflows/tests.yml                   |  30 +-
 docker-compose.yaml                           |  71 +++--
 docker/database/update.sh                     |  31 ++
 docker/elasticsearch/.env                     |   3 +
 docker/php/.env                               |  14 +
 src/config.py                                 |  14 +-
 src/config.toml                               |   4 +
 src/core/formatting.py                        |  13 +-
 src/routers/openml/flows.py                   |  10 +-
 src/routers/openml/tasks.py                   |   4 +-
 src/schemas/datasets/mldcat_ap.py             |   2 +-
 src/schemas/flows.py                          |   9 +-
 tests/conftest.py                             |   6 +-
 tests/constants.py                            |   4 +-
 tests/routers/openml/datasets_test.py         |  13 +-
 tests/routers/openml/flows_test.py            | 277 +++++++++---------
 .../migration/datasets_migration_test.py      |   7 +-
 .../openml/migration/flows_migration_test.py  |   6 +-
 tests/routers/openml/task_test.py             |   2 +-
 19 files changed, 298 insertions(+), 222 deletions(-)
 create mode 100755 docker/database/update.sh
 create mode 100644 docker/elasticsearch/.env
 create mode 100644 docker/php/.env

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6b5e31b..cff3449 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -15,21 +15,15 @@ jobs:
   compare-php:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: 3.x
 
-      # A naive `docker compose up` would first build the `python-api` container and then
-      # start all services, which kickstarts Elastic Search and building indices.
-      # But since those two steps are independent, we can parallelize them to save time.
-      - run: |
-          docker compose build python-api
-          docker compose up -d --wait python-api php-api
-      - run: docker container ls && docker image ls
-      - run: docker exec python-api python -m pip freeze
-      - run: docker exec python-api coverage run -m pytest -xv -m "php_api"
-      - run: docker exec python-api coverage xml
+      # https://github.com/docker/compose/issues/10596
+      - run: docker compose --profile "python" --profile "php" up --detach --wait --remove-orphans || exit $(docker compose ps -q | xargs docker inspect -f '{{.State.ExitCode}}' | grep -v '^0' | wc -l)
+      - run: docker exec openml-python-rest-api coverage run -m pytest -v -m "php_api"
+      - run: docker exec openml-python-rest-api coverage xml
       - name: Upload results to Codecov
         uses: codecov/codecov-action@v4
         with:
@@ -37,15 +31,13 @@ jobs:
   python:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: 3.x
-      - run: docker compose up -d --wait database python-api
-      - run: docker container ls && docker image ls
-      - run: docker exec python-api python -m pip freeze
-      - run: docker exec python-api coverage run -m pytest -xv -m "not php_api"
-      - run: docker exec python-api coverage xml
+      - run: docker compose --profile "python" up --detach --wait --remove-orphans || exit $(docker compose ps -q | xargs docker inspect -f '{{.State.ExitCode}}' | grep -v '^0' | wc -l)
+      - run: docker exec openml-python-rest-api coverage run -m pytest -v -m "not php_api"
+      - run: docker exec openml-python-rest-api coverage xml
       - name: Upload results to Codecov
         uses: codecov/codecov-action@v4
         with:
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 8b0ef56..324350f 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,13 +1,33 @@
 services:
   database:
-    image: "openml/test-database"
+    profiles: ["python", "php", "all"]
+    image: "openml/test-database:20240105"
     container_name: "openml-test-database"
     environment:
       MYSQL_ROOT_PASSWORD: ok
     ports:
       - "3306:3306"
+    healthcheck:
+      test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
+      start_period: 30s
+      start_interval: 1s
+      timeout: 3s
+      interval: 5s
+      retries: 10
+
+  database-setup:
+    profiles: ["python", "php", "all"]
+    image: mysql
+    container_name: "openml-test-database-setup"
+    volumes:
+      - ./docker/database/update.sh:/database-update.sh
+    command: /bin/sh -c "/database-update.sh"
+    depends_on:
+      database:
+        condition: service_healthy
 
   docs:
+    profiles: ["all"]
     build:
       context: .
       dockerfile: docker/docs/Dockerfile
@@ -16,8 +36,35 @@ services:
     volumes:
       - .:/docs
 
+  elasticsearch:
+    profiles: ["php", "all"]
+    image: docker.elastic.co/elasticsearch/elasticsearch:6.8.23
+    container_name: "openml-elasticsearch"
+    platform: "linux/amd64"
+    ports:
+      - "9200:9200"  # also known as /es (nginx)
+      - "9300:9300"
+    env_file: docker/elasticsearch/.env
+    healthcheck:
+      test: curl 127.0.0.1:9200/_cluster/health | grep -e "green"
+      start_period: 30s
+      start_interval: 5s
+      timeout: 3s
+      interval: 10s
+    deploy:
+      resources:
+        limits:
+          cpus: '1'
+          memory: 1G
+        reservations:
+          cpus: '0.2'
+          memory: 250M
+
   php-api:
-    image: "openml/php-rest-api"
+    profiles: ["php", "all"]
+    image: "openml/php-rest-api:v1.2.2"
+    container_name: "openml-php-rest-api"
+    env_file: docker/php/.env
     ports:
       - "8002:80"
     depends_on:
@@ -33,7 +80,8 @@ services:
       interval: 1m
 
   python-api:
-    container_name: "python-api"
+    profiles: ["python", "all"]
+    container_name: "openml-python-rest-api"
     build:
       context: .
       dockerfile: docker/python/Dockerfile
@@ -43,20 +91,3 @@ services:
       - .:/python-api
     depends_on:
       - database
-
-  elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:6.8.23
-    container_name: "elasticsearch"
-    ports:
-      - "9200:9200"
-      - "9300:9300"
-    environment:
-      - ELASTIC_PASSWORD=default
-      - discovery.type=single-node
-      - xpack.security.enabled=false
-    healthcheck:
-      test: curl 127.0.0.1:9200/_cluster/health | grep -e "green"
-      start_period: 30s
-      start_interval: 5s
-      timeout: 3s
-      interval: 1m
diff --git a/docker/database/update.sh b/docker/database/update.sh
new file mode 100755
index 0000000..7c87ca8
--- /dev/null
+++ b/docker/database/update.sh
@@ -0,0 +1,31 @@
+#/bin/bash
+# Change the filepath of openml.file
+# from "https://www.openml.org/data/download/1666876/phpFsFYVN"
+# to "http://minio:9000/datasets/0000/0001/phpFsFYVN"
+mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";'
+
+# Update openml.expdb.dataset with the same url
+mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;'
+
+
+
+
+
+# Create the data_feature_description TABLE. TODO: can we make sure this table exists already?
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` (
+  `did` int unsigned NOT NULL,
+  `index` int unsigned NOT NULL,
+  `uploader` mediumint unsigned NOT NULL,
+  `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  `description_type` enum("plain", "ontology") NOT NULL,
+  `value` varchar(256) NOT NULL,
+  KEY `did` (`did`,`index`),
+  CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE
+)'
+
+# SET dataset 1 to active (used in unittests java)
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)'
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";'
+
+# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing.
+mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)'
diff --git a/docker/elasticsearch/.env b/docker/elasticsearch/.env
new file mode 100644
index 0000000..6e19eaa
--- /dev/null
+++ b/docker/elasticsearch/.env
@@ -0,0 +1,3 @@
+ELASTIC_PASSWORD=default
+discovery.type=single-node
+xpack.security.enabled=false
diff --git a/docker/php/.env b/docker/php/.env
new file mode 100644
index 0000000..ad0bb55
--- /dev/null
+++ b/docker/php/.env
@@ -0,0 +1,14 @@
+API_KEY=AD000000000000000000000000000000
+BASE_URL=http://php-api:80/
+MINIO_URL=http://minio:9000/
+DB_HOST_OPENML=database:3306
+DB_HOST_EXPDB=database:3306
+DB_USER_OPENML=root
+DB_PASS_OPENML=ok
+DB_USER_EXPDB_WRITE=root
+DB_PASS_EXPDB_WRITE=ok
+DB_USER_EXPDB_READ=root
+DB_PASS_EXPDB_READ=ok
+ES_URL=elasticsearch:9200
+ES_PASSWORD=default
+INDEX_ES_DURING_STARTUP=false
diff --git a/src/config.py b/src/config.py
index 4c97e4f..8a19f04 100644
--- a/src/config.py
+++ b/src/config.py
@@ -8,6 +8,8 @@
 
 TomlTable = dict[str, typing.Any]
 
+CONFIG_PATH = Path(__file__).parent / "config.toml"
+
 
 def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
     defaults = configuration["defaults"]
@@ -19,9 +21,17 @@ def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
 
 
 @functools.cache
-def load_database_configuration(file: Path = Path(__file__).parent / "config.toml") -> TomlTable:
-    configuration = tomllib.loads(file.read_text())
+def _load_configuration(file: Path) -> TomlTable:
+    return typing.cast(TomlTable, tomllib.loads(file.read_text()))
+
+
+def load_routing_configuration(file: Path = CONFIG_PATH) -> TomlTable:
+    return typing.cast(TomlTable, _load_configuration(file)["routing"])
 
+
+@functools.cache
+def load_database_configuration(file: Path = CONFIG_PATH) -> TomlTable:
+    configuration = _load_configuration(file)
     database_configuration = _apply_defaults_to_siblings(
         configuration["databases"],
     )
diff --git a/src/config.toml b/src/config.toml
index 732d7ad..0812add 100644
--- a/src/config.toml
+++ b/src/config.toml
@@ -15,3 +15,7 @@ database="openml_expdb"
 
 [databases.openml]
 database="openml"
+
+[routing]
+minio_url="http://minio:9000/"
+server_url="http://php-api:80/"
diff --git a/src/core/formatting.py b/src/core/formatting.py
index 83e3f89..174261f 100644
--- a/src/core/formatting.py
+++ b/src/core/formatting.py
@@ -2,7 +2,7 @@
 
 from sqlalchemy.engine import Row
 
-from config import load_configuration
+from config import load_routing_configuration
 from core.errors import DatasetError
 from schemas.datasets.openml import DatasetFileFormat
 
@@ -25,15 +25,16 @@ def _format_parquet_url(dataset: Row) -> str | None:
     if dataset.format.lower() != DatasetFileFormat.ARFF:
         return None
 
-    minio_base_url = load_configuration()["minio_base_url"]
-    prefix = dataset.did // 10_000
-    return f"{minio_base_url}/datasets/{prefix:04d}/{dataset.did:04d}/dataset_{dataset.did}.pq"
+    minio_base_url = load_routing_configuration()["minio_url"]
+    ten_thousands_prefix = f"{dataset.did // 10_000:04d}"
+    padded_id = f"{dataset.did:04d}"
+    return f"{minio_base_url}datasets/{ten_thousands_prefix}/{padded_id}/dataset_{dataset.did}.pq"
 
 
 def _format_dataset_url(dataset: Row) -> str:
-    base_url = load_configuration()["arff_base_url"]
+    base_url = load_routing_configuration()["server_url"]
     filename = f"{html.escape(dataset.name)}.{dataset.format.lower()}"
-    return f"{base_url}/data/v1/download/{dataset.file_id}/{filename}"
+    return f"{base_url}data/v1/download/{dataset.file_id}/{filename}"
 
 
 def _safe_unquote(text: str | None) -> str | None:
diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py
index 4eae983..083916b 100644
--- a/src/routers/openml/flows.py
+++ b/src/routers/openml/flows.py
@@ -49,14 +49,8 @@ def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection
     ]
 
     tags = database.flows.get_tags(flow_id, expdb)
-    flow_rows = database.flows.get_subflows(for_flow=flow_id, expdb=expdb)
-    subflows = [
-        {
-            "identifier": flow.identifier,
-            "flow": get_flow(flow_id=flow.child_id, expdb=expdb),
-        }
-        for flow in flow_rows
-    ]
+    flow_rows = database.flows.get_subflows(flow_id, expdb)
+    subflows = [get_flow(flow_id=flow.child_id, expdb=expdb) for flow in flow_rows]
 
     return Flow(
         id_=flow.id,
diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py
index 4fcb362..96d0198 100644
--- a/src/routers/openml/tasks.py
+++ b/src/routers/openml/tasks.py
@@ -7,6 +7,7 @@
 from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy import Connection, RowMapping, text
 
+import config
 import database.datasets
 import database.tasks
 from routers.dependencies import expdb_connection
@@ -139,7 +140,8 @@ def _fill_json_template(
     # I believe that the operations below are always part of string output, so
     # we don't need to be careful to avoid losing typedness
     template = template.replace("[TASK:id]", str(task.task_id))
-    return template.replace("[CONSTANT:base_url]", "https://test.openml.org/")
+    server_url = config.load_routing_configuration()["server_url"]
+    return template.replace("[CONSTANT:base_url]", server_url)
 
 
 @router.get("/{task_id}")
diff --git a/src/schemas/datasets/mldcat_ap.py b/src/schemas/datasets/mldcat_ap.py
index 9525431..d7e277f 100644
--- a/src/schemas/datasets/mldcat_ap.py
+++ b/src/schemas/datasets/mldcat_ap.py
@@ -275,7 +275,7 @@ class DataService(JsonLDObject):
 
 
 class JsonLDGraph(BaseModel):
-    context: str | dict[str, HttpUrl] = Field(default_factory=dict, serialization_alias="@context")
+    context: str | dict[str, HttpUrl] = Field(default_factory=dict, serialization_alias="@context")  # type: ignore[arg-type]
     graph: list[Distribution | DataService | Dataset | Quality | Feature | Agent | MD5Checksum] = (
         Field(default_factory=list, serialization_alias="@graph")
     )
diff --git a/src/schemas/flows.py b/src/schemas/flows.py
index a6cd479..33dc081 100644
--- a/src/schemas/flows.py
+++ b/src/schemas/flows.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from datetime import datetime
-from typing import Any, TypedDict
+from typing import Any, Self
 
 from pydantic import BaseModel, ConfigDict, Field
 
@@ -25,12 +25,7 @@ class Flow(BaseModel):
     language: str | None = Field(max_length=128)
     dependencies: str | None
     parameter: list[Parameter]
-    subflows: list[Subflow]
+    subflows: list[Self]
     tag: list[str]
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
-
-
-class Subflow(TypedDict):
-    identifier: str | None
-    flow: Flow
diff --git a/tests/conftest.py b/tests/conftest.py
index 4d2c2c9..eecc128 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,6 +16,8 @@
 from main import create_api
 from routers.dependencies import expdb_connection, userdb_connection
 
+PHP_API_URL = "http://openml-php-rest-api:80/api/v1/json"
+
 
 @contextlib.contextmanager
 def automatic_rollback(engine: Engine) -> Iterator[Connection]:
@@ -39,8 +41,8 @@ def user_test() -> Connection:
 
 
 @pytest.fixture
-def php_api() -> Iterator[httpx.Client]:
-    with httpx.Client(base_url="http://server-api-php-api-1:80/api/v1/json") as client:
+def php_api() -> httpx.Client:
+    with httpx.Client(base_url=PHP_API_URL) as client:
         yield client
 
 
diff --git a/tests/constants.py b/tests/constants.py
index e471fd5..6881f88 100644
--- a/tests/constants.py
+++ b/tests/constants.py
@@ -1,6 +1,6 @@
 PRIVATE_DATASET_ID = {130}
-IN_PREPARATION_ID = {1, 33}
-DEACTIVATED_DATASETS = {2, 131}
+IN_PREPARATION_ID = {33}
+DEACTIVATED_DATASETS = {131}
 DATASETS = set(range(1, 132))
 
 NUMBER_OF_DATASETS = len(DATASETS)
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index 7c1457f..b463d3d 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -8,6 +8,7 @@
 from database.users import User
 from routers.openml.datasets import get_dataset
 from schemas.datasets.openml import DatasetMetadata, DatasetStatus
+from tests import constants
 from tests.users import ADMIN_USER, NO_USER, OWNER_USER, SOME_USER, ApiKey
 
 
@@ -44,14 +45,14 @@ def test_get_dataset(py_api: TestClient) -> None:
         "description_version": 1,
         "upload_date": "2014-04-06T23:19:24",
         "licence": "Public",
-        "url": "https://test.openml.org/data/v1/download/1/anneal.arff",
-        "parquet_url": "https://openml1.win.tue.nl/datasets/0000/0001/dataset_1.pq",
+        "url": "http://php-api/data/v1/download/1/anneal.arff",
+        "parquet_url": "http://minio:9000/datasets/0000/0001/dataset_1.pq",
         "file_id": 1,
         "default_target_attribute": ["class"],
         "version_label": "1",
         "tag": ["study_14"],
         "visibility": "public",
-        "status": "in_preparation",
+        "status": "active",
         "processing_date": "2024-01-04T10:13:59",
         "md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",
         "row_id_attribute": [],
@@ -222,7 +223,7 @@ def test_dataset_status_update_active_to_deactivated(dataset_id: int, py_api: Te
 def test_dataset_status_update_in_preparation_to_active(py_api: TestClient) -> None:
     _assert_status_update_is_successful(
         apikey=ApiKey.ADMIN,
-        dataset_id=1,
+        dataset_id=next(iter(constants.IN_PREPARATION_ID)),
         status=DatasetStatus.ACTIVE,
         py_api=py_api,
     )
@@ -232,7 +233,7 @@ def test_dataset_status_update_in_preparation_to_active(py_api: TestClient) -> N
 def test_dataset_status_update_in_preparation_to_deactivated(py_api: TestClient) -> None:
     _assert_status_update_is_successful(
         apikey=ApiKey.ADMIN,
-        dataset_id=1,
+        dataset_id=next(iter(constants.IN_PREPARATION_ID)),
         status=DatasetStatus.DEACTIVATED,
         py_api=py_api,
     )
@@ -242,7 +243,7 @@ def test_dataset_status_update_in_preparation_to_deactivated(py_api: TestClient)
 def test_dataset_status_update_deactivated_to_active(py_api: TestClient) -> None:
     _assert_status_update_is_successful(
         apikey=ApiKey.ADMIN,
-        dataset_id=131,
+        dataset_id=next(iter(constants.DEACTIVATED_DATASETS)),
         status=DatasetStatus.ACTIVE,
         py_api=py_api,
     )
diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py
index 2bf9fc3..611e478 100644
--- a/tests/routers/openml/flows_test.py
+++ b/tests/routers/openml/flows_test.py
@@ -221,148 +221,141 @@ def test_get_flow_with_subflow(py_api: TestClient) -> None:
         ],
         "subflows": [
             {
-                "identifier": None,
-                "flow": {
-                    "id": 4,
-                    "uploader": 16,
-                    "name": "weka.J48",
-                    "class_name": "weka.classifiers.trees.J48",
-                    "version": 1,
-                    "external_version": "Weka_3.9.0_11194",
-                    "description": (
-                        "Ross Quinlan (1993). C4.5: Programs for Machine Learning. "
-                        "Morgan Kaufmann Publishers, San Mateo, CA."
-                    ),
-                    "upload_date": "2017-03-24T14:26:40",
-                    "language": "English",
-                    "dependencies": "Weka_3.9.0",
-                    "parameter": [
-                        {
-                            "name": "-do-not-check-capabilities",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": (
-                                "If set, classifier capabilities are not checked"
-                                " before classifier is built\n\t(use with caution)."
-                            ),
-                        },
-                        {
-                            "name": "-doNotMakeSplitPointActualValue",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Do not make split point actual value.",
-                        },
-                        {
-                            "name": "A",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Laplace smoothing for predicted probabilities.",
-                        },
-                        {
-                            "name": "B",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Use binary splits only.",
-                        },
-                        {
-                            "name": "C",
-                            "data_type": "option",
-                            "default_value": 0.25,
-                            "description": (
-                                "Set confidence threshold for pruning.\n\t(default 0.25)"
-                            ),
-                        },
-                        {
-                            "name": "J",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": (
-                                "Do not use MDL correction for info gain on numeric attributes."
-                            ),
-                        },
-                        {
-                            "name": "L",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Do not clean up after the tree has been built.",
-                        },
-                        {
-                            "name": "M",
-                            "data_type": "option",
-                            "default_value": 2,
-                            "description": (
-                                "Set minimum number of instances per leaf.\n\t(default 2)"
-                            ),
-                        },
-                        {
-                            "name": "N",
-                            "data_type": "option",
-                            "default_value": None,
-                            "description": (
-                                "Set number of folds for reduced error\n\t"
-                                "pruning. One fold is used as pruning set.\n\t(default 3)"
-                            ),
-                        },
-                        {
-                            "name": "O",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Do not collapse tree.",
-                        },
-                        {
-                            "name": "Q",
-                            "data_type": "option",
-                            "default_value": None,
-                            "description": "Seed for random data shuffling (default 1).",
-                        },
-                        {
-                            "name": "R",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Use reduced error pruning.",
-                        },
-                        {
-                            "name": "S",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Do not perform subtree raising.",
-                        },
-                        {
-                            "name": "U",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": "Use unpruned tree.",
-                        },
-                        {
-                            "name": "batch-size",
-                            "data_type": "option",
-                            "default_value": None,
-                            "description": (
-                                "The desired batch size for batch prediction  (default 100)."
-                            ),
-                        },
-                        {
-                            "name": "num-decimal-places",
-                            "data_type": "option",
-                            "default_value": None,
-                            "description": (
-                                "The number of decimal places for the output of numbers"
-                                " in the model (default 2)."
-                            ),
-                        },
-                        {
-                            "name": "output-debug-info",
-                            "data_type": "flag",
-                            "default_value": None,
-                            "description": (
-                                "If set, classifier is run in debug mode and\n\t"
-                                "may output additional info to the console"
-                            ),
-                        },
-                    ],
-                    "tag": ["OpenmlWeka", "weka"],
-                    "subflows": [],
-                },
+                "id": 4,
+                "uploader": 16,
+                "name": "weka.J48",
+                "class_name": "weka.classifiers.trees.J48",
+                "version": 1,
+                "external_version": "Weka_3.9.0_11194",
+                "description": (
+                    "Ross Quinlan (1993). C4.5: Programs for Machine Learning. "
+                    "Morgan Kaufmann Publishers, San Mateo, CA."
+                ),
+                "upload_date": "2017-03-24T14:26:40",
+                "language": "English",
+                "dependencies": "Weka_3.9.0",
+                "parameter": [
+                    {
+                        "name": "-do-not-check-capabilities",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": (
+                            "If set, classifier capabilities are not checked"
+                            " before classifier is built\n\t(use with caution)."
+                        ),
+                    },
+                    {
+                        "name": "-doNotMakeSplitPointActualValue",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Do not make split point actual value.",
+                    },
+                    {
+                        "name": "A",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Laplace smoothing for predicted probabilities.",
+                    },
+                    {
+                        "name": "B",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Use binary splits only.",
+                    },
+                    {
+                        "name": "C",
+                        "data_type": "option",
+                        "default_value": 0.25,
+                        "description": ("Set confidence threshold for pruning.\n\t(default 0.25)"),
+                    },
+                    {
+                        "name": "J",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": (
+                            "Do not use MDL correction for info gain on numeric attributes."
+                        ),
+                    },
+                    {
+                        "name": "L",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Do not clean up after the tree has been built.",
+                    },
+                    {
+                        "name": "M",
+                        "data_type": "option",
+                        "default_value": 2,
+                        "description": ("Set minimum number of instances per leaf.\n\t(default 2)"),
+                    },
+                    {
+                        "name": "N",
+                        "data_type": "option",
+                        "default_value": None,
+                        "description": (
+                            "Set number of folds for reduced error\n\t"
+                            "pruning. One fold is used as pruning set.\n\t(default 3)"
+                        ),
+                    },
+                    {
+                        "name": "O",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Do not collapse tree.",
+                    },
+                    {
+                        "name": "Q",
+                        "data_type": "option",
+                        "default_value": None,
+                        "description": "Seed for random data shuffling (default 1).",
+                    },
+                    {
+                        "name": "R",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Use reduced error pruning.",
+                    },
+                    {
+                        "name": "S",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Do not perform subtree raising.",
+                    },
+                    {
+                        "name": "U",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": "Use unpruned tree.",
+                    },
+                    {
+                        "name": "batch-size",
+                        "data_type": "option",
+                        "default_value": None,
+                        "description": (
+                            "The desired batch size for batch prediction  (default 100)."
+                        ),
+                    },
+                    {
+                        "name": "num-decimal-places",
+                        "data_type": "option",
+                        "default_value": None,
+                        "description": (
+                            "The number of decimal places for the output of numbers"
+                            " in the model (default 2)."
+                        ),
+                    },
+                    {
+                        "name": "output-debug-info",
+                        "data_type": "flag",
+                        "default_value": None,
+                        "description": (
+                            "If set, classifier is run in debug mode and\n\t"
+                            "may output additional info to the console"
+                        ),
+                    },
+                ],
+                "tag": ["OpenmlWeka", "weka"],
+                "subflows": [],
             },
         ],
         "tag": ["OpenmlWeka", "weka"],
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 3faca11..812bde7 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -1,11 +1,11 @@
 import json
 from http import HTTPStatus
 
-import constants
 import httpx
 import pytest
 from starlette.testclient import TestClient
 
+import tests.constants
 from core.conversions import nested_remove_single_element_list
 from tests.users import ApiKey
 
@@ -43,6 +43,9 @@ def test_dataset_response_is_identical(  # noqa: C901, PLR0912
     # The new API has normalized `format` field:
     original_json["format"] = original_json["format"].lower()
 
+    # Pydantic HttpURL serialization omits port 80 for HTTP urls.
+    original_json["url"] = original_json["url"].replace(":80", "")
+
     # There is odd behavior in the live server that I don't want to recreate:
     # when the creator is a list of csv names, it can either be a str or a list
     # depending on whether the names are quoted. E.g.:
@@ -127,7 +130,7 @@ def test_private_dataset_owner_access(
     php_api: TestClient,
     api_key: str,
 ) -> None:
-    [private_dataset] = constants.PRIVATE_DATASET_ID
+    [private_dataset] = tests.constants.PRIVATE_DATASET_ID
     new_response = py_api.get(f"/datasets/{private_dataset}?api_key={api_key}")
     old_response = php_api.get(f"/data/{private_dataset}?api_key={api_key}")
     assert old_response.status_code == HTTPStatus.OK
diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py
index 674bc43..14d8088 100644
--- a/tests/routers/openml/migration/flows_migration_test.py
+++ b/tests/routers/openml/migration/flows_migration_test.py
@@ -65,9 +65,7 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
             if parameter["default_value"] is None:
                 parameter["default_value"] = []
         for subflow in flow["subflows"]:
-            subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"])
-            if subflow["identifier"] is None:
-                subflow["identifier"] = []
+            convert_flow_naming_and_defaults(subflow)
         flow["component"] = flow.pop("subflows")
         if flow["component"] == []:
             flow.pop("component")
@@ -77,6 +75,8 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
     new = nested_remove_single_element_list(new)
 
     expected = php_api.get(f"/flow/{flow_id}").json()["flow"]
+    if subflow := expected.get("component"):
+        expected["component"] = subflow["flow"]
     # The reason we don't transform "new" to str is that it becomes harder to ignore numeric type
     # differences (e.g., '1.0' vs '1')
     expected = nested_str_to_num(expected)
diff --git a/tests/routers/openml/task_test.py b/tests/routers/openml/task_test.py
index 89fc316..d635fdf 100644
--- a/tests/routers/openml/task_test.py
+++ b/tests/routers/openml/task_test.py
@@ -19,7 +19,7 @@ def test_get_task(py_api: TestClient) -> None:
                 "estimation_procedure": {
                     "id": 5,
                     "type": "holdout",
-                    "data_splits_url": "https://test.openml.org/api_splits/get/59/Task_59_splits.arff",
+                    "data_splits_url": "http://php-api:80/api_splits/get/59/Task_59_splits.arff",
                     "parameter": [
                         {"name": "number_repeats", "value": 1},
                         {"name": "number_folds", "value": None},