From b1dd57d38c7d199db974d9c76d07301e556e6e50 Mon Sep 17 00:00:00 2001 From: "bram.pramono" Date: Thu, 27 Nov 2025 11:41:31 +0100 Subject: [PATCH 1/2] fix(tasks): print also tasks not prefixed by the suite name --- src/lighteval/tasks/registry.py | 40 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py index e7c4e9eb6..9ab63b309 100644 --- a/src/lighteval/tasks/registry.py +++ b/src/lighteval/tasks/registry.py @@ -410,35 +410,49 @@ def print_all_tasks(self, suites: str | None = None): # Get all tasks and filter by requested suites all_tasks = list(self._task_registry.keys()) - tasks_names = [task for task in all_tasks if task.split("|")[0] in requested_suites] - - # Ensure all requested suites are present (even if empty) - suites_in_registry = {name.split("|")[0] for name in tasks_names} - for suite in requested_suites: - if suite not in suites_in_registry: - # We add a dummy task to make sure the suite is printed - tasks_names.append(f"{suite}|") - - tasks_names.sort() + non_suite_tasks, tasks_on_suite = self._group_tasks(all_tasks, requested_suites) print(f"Displaying tasks for suites: {', '.join(requested_suites)}") print("=" * 60) - for suite, g in groupby(tasks_names, lambda x: x.split("|")[0]): + for suite, g in groupby(tasks_on_suite, lambda x: x.split("|")[0]): tasks_in_suite = [name for name in g if name.split("|")[1]] # Filter out dummy tasks tasks_in_suite.sort() - print(f"\n- {suite}:") + print(f"\n* {suite}:") if not tasks_in_suite: print(" (no tasks in this suite)") else: for task_name in tasks_in_suite: print(f" - {task_name}") + print("\n* Non suite tasks:") + for task_name in non_suite_tasks: + print(f" - {task_name}") # Print summary - total_tasks = len([t for t in tasks_names if t.split("|")[1]]) + total_tasks = len(tasks_on_suite) + len(non_suite_tasks) print(f"\nTotal tasks displayed: {total_tasks}") + def _group_tasks(self, all_tasks: list[str], requested_suites: list[str]) -> tuple[list[str], list[str]]: + non_suite_tasks = [] + tasks_on_suite = [] + for task in all_tasks: + if task.split("|")[0] in requested_suites: + tasks_on_suite.append(task) + else: + non_suite_tasks.append(task) + + # Ensure all requested suites are present (even if empty) + suites_in_registry = {name.split("|")[0] for name in tasks_on_suite} + for suite in requested_suites: + if suite not in suites_in_registry: + # We add a dummy task to make sure the suite is printed + tasks_on_suite.append(f"{suite}|") + + tasks_on_suite.sort() + + return non_suite_tasks, tasks_on_suite + def get_tasks_dump(self) -> list[dict]: # noqa: C901 """Get all task names, metadata, and docstrings as a Python object. From 89a2007ebf95a7110cd7f8430bdded752ad506e5 Mon Sep 17 00:00:00 2001 From: "bram.pramono" Date: Tue, 16 Dec 2025 16:15:25 +0100 Subject: [PATCH 2/2] fix(tasks): remove suites in listing tasks --- src/lighteval/tasks/registry.py | 111 +++++--------------------------- 1 file changed, 17 insertions(+), 94 deletions(-) diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py index 9ab63b309..832b0c019 100644 --- a/src/lighteval/tasks/registry.py +++ b/src/lighteval/tasks/registry.py @@ -80,34 +80,12 @@ def load_community_tasks(): logger = logging.getLogger(__name__) -# Helm, Bigbench, Harness are implementations following an evaluation suite setup # Original follows the original implementation as closely as possible # Leaderboard are the evaluations we fixed on the open llm leaderboard - you should get similar results # Community are for community added evaluations # Extended are for evaluations with custom logic # Custom is for all the experiments you might want to do! -# Core suites - always available without extra dependencies -CORE_SUITES = [ - "helm", - "bigbench", - "harness", - "leaderboard", - "lighteval", - "original", - "extended", - "custom", - "test", -] - -# Optional suites - may require extra dependencies -OPTIONAL_SUITES = [ - "community", - "multilingual", -] - -DEFAULT_SUITES = CORE_SUITES + OPTIONAL_SUITES - class Registry: """The Registry class is used to manage the task registry and get task classes.""" @@ -138,7 +116,6 @@ def __init__( TASKS_TABLE = [ LightevalTaskConfig( name="custom_task", - suite="custom", ... ) ] @@ -187,7 +164,7 @@ def _update_task_configs(self) -> dict[str, LightevalTaskConfig]: # noqa: C901 Now expects task specs in the form: - task|few_shot - task (defaults to few_shot=0) - Backwards-compat for suite|task|few_shot is preserved but the suite is ignored. + Backwards-compat for task|few_shot is preserved. """ task_to_configs = collections.defaultdict(list) @@ -259,7 +236,7 @@ def load_tasks(self) -> dict[str, LightevalTask]: @lru_cache def _task_superset_dict(self): """Returns: - dict[str, list[str]]: A dictionary where keys are task super set names (suite|task) and values are lists of task subset names (suite|task). + dict[str, list[str]]: A dictionary where keys are task super set names (task) and values are lists of task subset names (task). Example: { @@ -276,11 +253,11 @@ def _expand_task_definition(self, task_definition: str): """ Args: task_definition (str): Task definition to expand. In format: - - suite|task - - suite|task_superset (e.g lighteval|mmlu, which runs all the mmlu subtasks) + - task + - task_superset (e.g mmlu, which runs all the mmlu subtasks) Returns: - list[str]: List of task names (suite|task) + list[str]: List of task names (task) """ # Try if it's a task superset tasks = self._task_superset_dict.get(task_definition, None) @@ -379,80 +356,26 @@ def load_all_task_configs( logger.info(f"Loaded {len(loaded_configs)} task configs in {time_end - time_start:.1f} seconds") return loaded_configs - def print_all_tasks(self, suites: str | None = None): - """Print all the tasks in the task registry. - - Args: - suites: Comma-separated list of suites to display. If None, shows core suites only. - Use 'all' to show all available suites (core + optional). - Special handling for 'multilingual' suite with dependency checking. - """ - # Parse requested suites - if suites is None: - requested_suites = CORE_SUITES.copy() - else: - requested_suites = [s.strip() for s in suites.split(",")] - - # Handle 'all' special case - if "all" in requested_suites: - requested_suites = DEFAULT_SUITES.copy() + def print_all_tasks(self): + """Print all the tasks in the task registry.""" - # Check for multilingual dependencies if requested - if "multilingual" in requested_suites: - import importlib.util + # Get all tasks + all_tasks = sorted(list(self._task_registry.keys())) - if importlib.util.find_spec("langcodes") is None: - logger.warning( - "Multilingual tasks require additional dependencies (langcodes). " - "Install them with: pip install langcodes" - ) - requested_suites.remove("multilingual") - - # Get all tasks and filter by requested suites - all_tasks = list(self._task_registry.keys()) - non_suite_tasks, tasks_on_suite = self._group_tasks(all_tasks, requested_suites) - - print(f"Displaying tasks for suites: {', '.join(requested_suites)}") + print(f"Displaying tasks:") print("=" * 60) - for suite, g in groupby(tasks_on_suite, lambda x: x.split("|")[0]): - tasks_in_suite = [name for name in g if name.split("|")[1]] # Filter out dummy tasks - tasks_in_suite.sort() - - print(f"\n* {suite}:") - if not tasks_in_suite: - print(" (no tasks in this suite)") - else: - for task_name in tasks_in_suite: - print(f" - {task_name}") - - print("\n* Non suite tasks:") - for task_name in non_suite_tasks: + last_task = None + for task_name in all_tasks: + task_parts = task_name.split(":") + if last_task != task_parts[0]: + print("") + last_task = task_parts[0] print(f" - {task_name}") # Print summary - total_tasks = len(tasks_on_suite) + len(non_suite_tasks) + total_tasks = len(all_tasks) print(f"\nTotal tasks displayed: {total_tasks}") - def _group_tasks(self, all_tasks: list[str], requested_suites: list[str]) -> tuple[list[str], list[str]]: - non_suite_tasks = [] - tasks_on_suite = [] - for task in all_tasks: - if task.split("|")[0] in requested_suites: - tasks_on_suite.append(task) - else: - non_suite_tasks.append(task) - - # Ensure all requested suites are present (even if empty) - suites_in_registry = {name.split("|")[0] for name in tasks_on_suite} - for suite in requested_suites: - if suite not in suites_in_registry: - # We add a dummy task to make sure the suite is printed - tasks_on_suite.append(f"{suite}|") - - tasks_on_suite.sort() - - return non_suite_tasks, tasks_on_suite - def get_tasks_dump(self) -> list[dict]: # noqa: C901 """Get all task names, metadata, and docstrings as a Python object.