From 9b46f4ad97eeec5f440b9b772486985b490c86a9 Mon Sep 17 00:00:00 2001 From: juanmichelini Date: Fri, 16 Jan 2026 16:28:05 -0300 Subject: [PATCH 1/2] fix(swebench-multimodal): create output.report.json for consistency with other benchmarks The swebench-multimodal evaluation was only creating report.json, while other benchmarks (swebench, commit0, etc.) create output.report.json. This caused the push-to-index workflow to fail when trying to find output.report.json. This change copies report.json to output.report.json after evaluation completes, matching the behavior of other benchmarks. Fixes the issue where GitHub Actions run 21077636459 failed to create a PR in openhands-index-results because the script couldn't find output.report.json. --- benchmarks/swebenchmultimodal/eval_infer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/benchmarks/swebenchmultimodal/eval_infer.py b/benchmarks/swebenchmultimodal/eval_infer.py index f8404f64..c79ef871 100644 --- a/benchmarks/swebenchmultimodal/eval_infer.py +++ b/benchmarks/swebenchmultimodal/eval_infer.py @@ -279,6 +279,17 @@ def main() -> None: str(output_file), args.dataset, args.split, args.workers, args.run_id ) + # Copy report.json to output.report.json for consistency with other benchmarks + # SWE-Bench Multimodal creates report.json in the same directory as the predictions file + report_path = output_file.parent / "report.json" + dest_report_path = input_file.with_suffix(".report.json") + + if report_path.exists(): + shutil.copy(str(report_path), str(dest_report_path)) + logger.info(f"Copied report file to: {dest_report_path}") + else: + logger.warning(f"Report file not found at expected location: {report_path}") + # Generate cost report as final step generate_cost_report(str(input_file)) From d39ed21830cac49e90107615aae472697d0a040e Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 16 Jan 2026 20:47:46 +0000 Subject: [PATCH 2/2] fix: add missing shutil import for report file copy The shutil module was used but not imported, causing pre-commit checks to fail. Co-authored-by: openhands --- benchmarks/swebenchmultimodal/eval_infer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmarks/swebenchmultimodal/eval_infer.py b/benchmarks/swebenchmultimodal/eval_infer.py index c79ef871..165ef85c 100644 --- a/benchmarks/swebenchmultimodal/eval_infer.py +++ b/benchmarks/swebenchmultimodal/eval_infer.py @@ -11,6 +11,7 @@ import argparse import json +import shutil import subprocess import sys from pathlib import Path @@ -283,12 +284,14 @@ def main() -> None: # SWE-Bench Multimodal creates report.json in the same directory as the predictions file report_path = output_file.parent / "report.json" dest_report_path = input_file.with_suffix(".report.json") - + if report_path.exists(): shutil.copy(str(report_path), str(dest_report_path)) logger.info(f"Copied report file to: {dest_report_path}") else: - logger.warning(f"Report file not found at expected location: {report_path}") + logger.warning( + f"Report file not found at expected location: {report_path}" + ) # Generate cost report as final step generate_cost_report(str(input_file))