diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py index e7c4e9eb6..832b0c019 100644 --- a/src/lighteval/tasks/registry.py +++ b/src/lighteval/tasks/registry.py @@ -80,34 +80,12 @@ def load_community_tasks(): logger = logging.getLogger(__name__) -# Helm, Bigbench, Harness are implementations following an evaluation suite setup # Original follows the original implementation as closely as possible # Leaderboard are the evaluations we fixed on the open llm leaderboard - you should get similar results # Community are for community added evaluations # Extended are for evaluations with custom logic # Custom is for all the experiments you might want to do! -# Core suites - always available without extra dependencies -CORE_SUITES = [ - "helm", - "bigbench", - "harness", - "leaderboard", - "lighteval", - "original", - "extended", - "custom", - "test", -] - -# Optional suites - may require extra dependencies -OPTIONAL_SUITES = [ - "community", - "multilingual", -] - -DEFAULT_SUITES = CORE_SUITES + OPTIONAL_SUITES - class Registry: """The Registry class is used to manage the task registry and get task classes.""" @@ -138,7 +116,6 @@ def __init__( TASKS_TABLE = [ LightevalTaskConfig( name="custom_task", - suite="custom", ... ) ] @@ -187,7 +164,7 @@ def _update_task_configs(self) -> dict[str, LightevalTaskConfig]: # noqa: C901 Now expects task specs in the form: - task|few_shot - task (defaults to few_shot=0) - Backwards-compat for suite|task|few_shot is preserved but the suite is ignored. + Backwards-compat for task|few_shot is preserved. """ task_to_configs = collections.defaultdict(list) @@ -259,7 +236,7 @@ def load_tasks(self) -> dict[str, LightevalTask]: @lru_cache def _task_superset_dict(self): """Returns: - dict[str, list[str]]: A dictionary where keys are task super set names (suite|task) and values are lists of task subset names (suite|task). + dict[str, list[str]]: A dictionary where keys are task super set names (task) and values are lists of task subset names (task). Example: { @@ -276,11 +253,11 @@ def _expand_task_definition(self, task_definition: str): """ Args: task_definition (str): Task definition to expand. In format: - - suite|task - - suite|task_superset (e.g lighteval|mmlu, which runs all the mmlu subtasks) + - task + - task_superset (e.g mmlu, which runs all the mmlu subtasks) Returns: - list[str]: List of task names (suite|task) + list[str]: List of task names (task) """ # Try if it's a task superset tasks = self._task_superset_dict.get(task_definition, None) @@ -379,64 +356,24 @@ def load_all_task_configs( logger.info(f"Loaded {len(loaded_configs)} task configs in {time_end - time_start:.1f} seconds") return loaded_configs - def print_all_tasks(self, suites: str | None = None): - """Print all the tasks in the task registry. - - Args: - suites: Comma-separated list of suites to display. If None, shows core suites only. - Use 'all' to show all available suites (core + optional). - Special handling for 'multilingual' suite with dependency checking. - """ - # Parse requested suites - if suites is None: - requested_suites = CORE_SUITES.copy() - else: - requested_suites = [s.strip() for s in suites.split(",")] - - # Handle 'all' special case - if "all" in requested_suites: - requested_suites = DEFAULT_SUITES.copy() + def print_all_tasks(self): + """Print all the tasks in the task registry.""" - # Check for multilingual dependencies if requested - if "multilingual" in requested_suites: - import importlib.util + # Get all tasks + all_tasks = sorted(list(self._task_registry.keys())) - if importlib.util.find_spec("langcodes") is None: - logger.warning( - "Multilingual tasks require additional dependencies (langcodes). " - "Install them with: pip install langcodes" - ) - requested_suites.remove("multilingual") - - # Get all tasks and filter by requested suites - all_tasks = list(self._task_registry.keys()) - tasks_names = [task for task in all_tasks if task.split("|")[0] in requested_suites] - - # Ensure all requested suites are present (even if empty) - suites_in_registry = {name.split("|")[0] for name in tasks_names} - for suite in requested_suites: - if suite not in suites_in_registry: - # We add a dummy task to make sure the suite is printed - tasks_names.append(f"{suite}|") - - tasks_names.sort() - - print(f"Displaying tasks for suites: {', '.join(requested_suites)}") + print(f"Displaying tasks:") print("=" * 60) - for suite, g in groupby(tasks_names, lambda x: x.split("|")[0]): - tasks_in_suite = [name for name in g if name.split("|")[1]] # Filter out dummy tasks - tasks_in_suite.sort() - - print(f"\n- {suite}:") - if not tasks_in_suite: - print(" (no tasks in this suite)") - else: - for task_name in tasks_in_suite: - print(f" - {task_name}") - + last_task = None + for task_name in all_tasks: + task_parts = task_name.split(":") + if last_task != task_parts[0]: + print("") + last_task = task_parts[0] + print(f" - {task_name}") # Print summary - total_tasks = len([t for t in tasks_names if t.split("|")[1]]) + total_tasks = len(all_tasks) print(f"\nTotal tasks displayed: {total_tasks}") def get_tasks_dump(self) -> list[dict]: # noqa: C901