From 5e0ee6989b360bb99be648cee41be15421338f97 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Thu, 15 Jan 2026 22:09:33 +0800 Subject: [PATCH 01/17] add eval_backend_perf --- graph_net_bench/torch/eval_backend_diff.py | 419 +++++---------------- graph_net_bench/torch/eval_backend_perf.py | 337 +++++++++++++++++ graph_net_bench/torch/utils.py | 11 + test/eval_backend_diff_test.sh | 13 +- 4 files changed, 458 insertions(+), 322 deletions(-) create mode 100644 graph_net_bench/torch/eval_backend_perf.py diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 8488b71b7..07a19ff88 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -1,103 +1,18 @@ from . import utils import subprocess import argparse -import importlib.util import torch -from pathlib import Path -from typing import Type import sys import os import os.path import traceback import json -import random -import numpy as np -import platform import base64 -from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend -from graph_net_bench.torch.backend.tvm_backend import TvmBackend -from graph_net_bench.torch.backend.xla_backend import XlaBackend -from graph_net_bench.torch.backend.inductor_backend import InductorBackend -from graph_net_bench.torch.backend.tensorrt_backend import TensorRTBackend -from graph_net_bench.torch.backend.blade_disc_backend import BladeDISCBackend -from graph_net_bench.torch.backend.nope_backend import NopeBackend -from graph_net_bench.torch.backend.pass_mgr_backend import PassMgrBackend -from graph_net_bench.torch.backend.unstable_to_stable_backend import ( - UnstableToStableBackend, -) -from graph_net_bench.torch.backend.range_decomposer_validator_backend import ( - RangeDecomposerValidatorBackend, -) -from graph_net_bench.torch.backend.graph_variable_renamer_validator_backend import ( - GraphVariableRenamerValidatorBackend, -) +import types from graph_net_bench import test_compiler_util from graph_net_bench import path_utils -compiler_backend_name2class = { - "tvm": TvmBackend, - "xla": XlaBackend, - "inductor": InductorBackend, - "tensorrt": TensorRTBackend, - "bladedisc": BladeDISCBackend, - "nope": NopeBackend, - "pass_mgr": PassMgrBackend, - "unstable_to_stable": UnstableToStableBackend, - "range_decomposer_validator": RangeDecomposerValidatorBackend, - "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend, -} - - -def set_seed(random_seed): - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed(random_seed) - torch.cuda.manual_seed_all(random_seed) - - -def get_hardward_name(args): - hardware_name = "unknown" - if "cuda" in args.device: - hardware_name = torch.cuda.get_device_name(args.device) - elif args.device == "cpu": - hardware_name = platform.processor() - return hardware_name - - -def get_compile_framework_version(args): - if args.compiler in ["inductor", "nope", "unstable_to_stable"]: - return torch.__version__ - elif args.compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: - # Assuming compiler object has a version attribute - return f"{args.compiler.capitalize()} {args.compiler.version}" - return "unknown" - - -def load_class_from_file( - args: argparse.Namespace, class_name: str, device: str -) -> Type[torch.nn.Module]: - file_path = f"{args.model_path}/model.py" - file = Path(file_path).resolve() - module_name = file.stem - - with open(file_path, "r", encoding="utf-8") as f: - model_code = f.read() - model_code = utils.modify_code_by_device(model_code, device) - spec = importlib.util.spec_from_loader(module_name, loader=None) - module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module - compiled_code = compile(model_code, filename=file, mode="exec") - exec(compiled_code, module.__dict__) - - model_class = getattr(module, class_name, None) - setattr(model_class, "__graph_net_file_path__", file_path) - setattr(model_class, "__graph_net_device__", device) - return model_class - - def convert_to_dict(config_str): if config_str in {None, "", "null", "None"}: return {} @@ -107,203 +22,6 @@ def convert_to_dict(config_str): return config -def get_compiler_backend(args) -> GraphCompilerBackend: - assert ( - args.compiler in compiler_backend_name2class - ), f"Unknown compiler: {args.compiler}" - backend_class = compiler_backend_name2class[args.compiler] - return backend_class(args.backend_config) - - -def get_model(args): - device = "xla" if args.compiler == "xla" else args.device - - # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') - model_class = load_class_from_file(args, class_name="GraphModule", device=device) - model = model_class().to(torch.device(args.device)) - return model - - -def get_input_dict(args): - inputs_params = utils.load_converted_from_text(f"{args.model_path}") - params = inputs_params["weight_info"] - for tensor_meta in params.values(): - if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = args.device - return { - k: utils.replay_tensor(v).to(torch.device(args.device)) - for k, v in params.items() - } - - -def measure_performance(model_call, args, compiler): - stats = {} - outs = model_call() - - # Warmup runs - for _ in range(args.warmup): - model_call() - compiler.synchronize() - - hardware_name = get_hardward_name(args) - print( - f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", - file=sys.stderr, - flush=True, - ) - - if "cuda" in args.device: - """ - Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings, - With reference to methods only based on CUDA events from KernelBench in https://github.com/ScalingIntelligence/KernelBench - """ - - e2e_times = [] - gpu_times = [] - - for i in range(args.trials): - # End-to-end timing (naive_timer) - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - # GPU-only timing (CUDA Events) - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - start_event.record() - - model_call() - - end_event.record() - compiler.synchronize() - - gpu_time_ms = start_event.elapsed_time(end_event) - e2e_times.append(duration_box.value) - gpu_times.append(gpu_time_ms) - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms", - file=sys.stderr, - flush=True, - ) - - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) - stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times) - - else: # CPU or other devices - e2e_times = [] - for i in range(args.trials): - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - model_call() - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms", - file=sys.stderr, - flush=True, - ) - e2e_times.append(duration_box.value) - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) - - return outs, stats - - -def test_single_model(args): - compiler = get_compiler_backend(args) - input_dict = get_input_dict(args) - model = get_model(args) - model_path = os.path.normpath(args.model_path) - test_compiler_util.print_with_log_prompt( - "[Processing]", model_path, args.log_prompt - ) - test_compiler_util.print_basic_config( - args, get_hardward_name(args), get_compile_framework_version(args) - ) - - runtime_seed = 1024 - eager_failure = False - expected_out = None - eager_time_stats = {} - - try: - - def eager_model_call(): - return model(**input_dict) - - expected_out, eager_time_stats = measure_performance( - eager_model_call, args, compiler - ) - - torch.manual_seed(runtime_seed) - if not isinstance(expected_out, tuple): - expected_out = (expected_out,) - except (TypeError, RuntimeError) as e: - print(f"Eager model execution failed: {str(e)}", file=sys.stderr) - eager_failure = True - - compiled_failure = False - compiled_model = None - compiled_time_stats = {} - - try: - compiled_model = compiler(model) - torch.manual_seed(runtime_seed) - - def compiled_model_call(): - return compiled_model(**input_dict) - - compiled_out, compiled_time_stats = measure_performance( - compiled_model_call, args, compiler - ) - - if not isinstance(compiled_out, tuple): - compiled_out = (compiled_out,) - if args.compiler == "xla": - compiled_out = tuple(item.to("cpu").to("cuda") for item in compiled_out) - except (TypeError, RuntimeError) as e: - print(f"Compiled model execution failed: {str(e)}", file=sys.stderr) - compiled_failure = True - print("\n--- Full Traceback ---") - traceback.print_exc() - print(f"debug-model-execution {type(e).__name__} {args.model_path}", flush=True) - except Exception as e: - compiled_failure = True - print("\n--- Full Traceback ---") - traceback.print_exc() - print(f"debug-model-execution {type(e).__name__} {args.model_path}", flush=True) - - if eager_failure: - print(f"{args.log_prompt} [Result] status: failed", file=sys.stderr, flush=True) - print( - f"{args.log_prompt} [Fail due to eager model execution error.]", - file=sys.stderr, - flush=True, - ) - elif compiled_failure: - print(f"{args.log_prompt} [Result] status: failed", file=sys.stderr, flush=True) - print( - f"{args.log_prompt} [Fail due to compiled model execution error.]", - file=sys.stderr, - flush=True, - ) - else: - compare_correctness(expected_out, compiled_out, args) - - print( - f"{args.log_prompt} [Result] status: success", file=sys.stderr, flush=True - ) - - test_compiler_util.print_times_and_speedup( - args, eager_time_stats, compiled_time_stats - ) - - -def print_and_store_cmp(key, cmp_func, args, expected_out, compiled_out, **kwargs): - cmp_ret = cmp_func(expected_out, compiled_out, **kwargs) - print( - f"{args.log_prompt} [Correctness]{key}: {cmp_ret}", - file=sys.stderr, - flush=True, - ) - return cmp_ret - - def compare_correctness(expected_out, compiled_out, args): eager_dtypes = [ ( @@ -386,13 +104,24 @@ def get_cmp_diff_count(expected_out, compiled_out, atol, rtol): return " ".join(results) -def get_sample_root(args): - return args.model_path_prefix +def parse_time_stats_from_reference_log(log_path): + assert os.path.isfile( + log_path + ), f"{log_path} does not exist or is not a regular file." + + with open(log_path, "r", encoding="utf-8") as f: + lines = f.readlines() + for line in reversed(lines): + if "[Performance][eager]" in line: + start = line.find("{") + end = line.rfind("}") + time_stats = json.loads(line[start : end + 1]) + return time_stats -def test_multi_models(args): +def eval_multi_models(args, model_path_prefix): test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, get_sample_root(args) + args.model_path_list, model_path_prefix ) sample_idx = 0 @@ -435,15 +164,15 @@ def test_multi_models(args): print(f"- {model_path}", file=sys.stderr, flush=True) -def test_multi_models_with_prefix(args): - assert os.path.isdir(args.model_path_prefix) +def eval_multi_models_with_prefix(args, model_path_prefix): + assert os.path.isdir(model_path_prefix) assert os.path.isfile(args.model_path_list) test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, get_sample_root(args) + args.model_path_list, model_path_prefix ) py_module_name = os.path.splitext(os.path.basename(__file__))[0] for rel_model_path in test_samples: - model_path = os.path.join(args.model_path_prefix, rel_model_path) + model_path = os.path.join(model_path_prefix, rel_model_path) if not os.path.exists(model_path): continue if not os.path.exists(os.path.join(model_path, "model.py")): @@ -467,39 +196,92 @@ def test_multi_models_with_prefix(args): traceback.print_exc() +def compare_perf_diff(args, model_path, ref_dir, target_dir): + # A + ref_dump_path = utils.get_output_path(ref_dir, model_path) + ref_out = torch.load(str(ref_dump_path)) + + ref_log_path = utils.get_log_path(ref_dir, model_path) + ref_time_stats = parse_time_stats_from_reference_log(ref_log_path) + + # B + target_dump_path = utils.get_output_path(target_dir, model_path) + target_out = torch.load(str(target_dump_path)) + + target_log_path = utils.get_log_path(target_dir, model_path) + target_time_stats = parse_time_stats_from_reference_log(target_log_path) + + compare_correctness(ref_out, target_out, args) + + test_compiler_util.print_times_and_speedup(args, ref_time_stats, target_time_stats) + + +def eval_single_model(args): + ref_dir = "/tmp/eval_perf_diff/A" + target_dir = "/tmp/eval_perf_diff/B" + + EvalCfg = types.SimpleNamespace( + ref_env=types.SimpleNamespace(**convert_to_dict(args.config)["ref_env"]), + target_env=types.SimpleNamespace(**convert_to_dict(args.config)["target_env"]), + ) + + ref_args = build_sub_args(EvalCfg.ref_env) + target_args = build_sub_args(EvalCfg.target_env) + + run_sub_process(ref_args, args.model_path, ref_dir) + run_sub_process(target_args, args.model_path, target_dir) + compare_perf_diff(ref_args, args.model_path, ref_dir, target_dir) + + +def run_sub_process(env_args, model_path, output_path): + cmd = [sys.executable, "-m", "graph_net_bench.torch.eval_backend_perf"] + args_pairs = [ + ("--model-path", model_path), + ("--output-path", output_path), + ("--seed", str(env_args.seed)), + ("--compiler", env_args.compiler), + ("--device", env_args.device), + ("--op-lib", env_args.op_lib), + ("--warmup", str(env_args.warmup)), + ("--trials", str(env_args.trials)), + ("--log-prompt", env_args.log_prompt), + ("--model-path-prefix", env_args.model_path_prefix), + ("--config", env_args.backend_config), + ] + + for arg_name, arg_value in args_pairs: + if arg_value is not None: + cmd.extend([arg_name, arg_value]) + + subprocess.run(cmd, check=True) + + +def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: + sub = argparse.Namespace() + sub.seed = getattr(env_ns, "seed", 123) + sub.compiler = getattr(env_ns, "compiler", None) + sub.device = getattr(env_ns, "device", None) + sub.op_lib = getattr(env_ns, "op_lib", None) + sub.warmup = getattr(env_ns, "warmup", 3) + sub.trials = getattr(env_ns, "trials", 5) + sub.log_prompt = getattr(env_ns, "log_prompt", None) + sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) + sub.backend_config = getattr(env_ns, "backend_config", None) + return sub + + def main(args): - if args.model_path_list is not None and args.model_path_prefix is not None: - test_multi_models_with_prefix(args) + config_dict = convert_to_dict(args.config) + model_path_prefix = config_dict["ref_env"]["model_path_prefix"] + if args.model_path_list is not None and model_path_prefix is not None: + eval_multi_models_with_prefix(args, model_path_prefix) return assert os.path.isdir(args.model_path) - initalize_seed = 123 - set_seed(random_seed=initalize_seed) - if path_utils.is_single_model_dir(args.model_path): - test_single_model(args) + eval_single_model(args) else: - test_multi_models(args) - - -def complete_default_args( - mut_args, - compiler: str = "inductor", # Compiler name - device: str = "cuda", # Device for testing the compiler (e.g., 'cpu' or 'cuda') - warmup: int = 3, # Number of warmup steps - trials: int = 5, # Number of timing trials - log_prompt: str = "graph-net-test-compiler-log", # Log prompt for performance log filtering - model_path_prefix: str = None, # Prefix path to model path in --model-path-list - backend_config: dict = None, # backend configuration json -): - backend_config = backend_config if backend_config is not None else {} - mut_args.compiler = compiler - mut_args.device = device - mut_args.warmup = warmup - mut_args.trials = trials - mut_args.log_prompt = log_prompt - mut_args.model_path_prefix = model_path_prefix - mut_args.backend_config = backend_config + eval_multi_models(args, model_path_prefix) if __name__ == "__main__": @@ -526,5 +308,4 @@ def complete_default_args( help="base64 encode configuration json.", ) args = parser.parse_args() - complete_default_args(args, **convert_to_dict(args.config)) main(args=args) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py new file mode 100644 index 000000000..7e12f6ebf --- /dev/null +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -0,0 +1,337 @@ +from . import utils +import argparse +import importlib.util +import torch +from pathlib import Path +from typing import Type +import sys +import os +import traceback +import json +import random +import numpy as np +import platform +import base64 +from contextlib import redirect_stdout, redirect_stderr + +from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend +from graph_net_bench.torch.backend.tvm_backend import TvmBackend +from graph_net_bench.torch.backend.xla_backend import XlaBackend +from graph_net_bench.torch.backend.inductor_backend import InductorBackend +from graph_net_bench.torch.backend.tensorrt_backend import TensorRTBackend +from graph_net_bench.torch.backend.blade_disc_backend import BladeDISCBackend +from graph_net_bench.torch.backend.nope_backend import NopeBackend +from graph_net_bench.torch.backend.pass_mgr_backend import PassMgrBackend +from graph_net_bench.torch.backend.unstable_to_stable_backend import ( + UnstableToStableBackend, +) +from graph_net_bench.torch.backend.range_decomposer_validator_backend import ( + RangeDecomposerValidatorBackend, +) +from graph_net_bench.torch.backend.graph_variable_renamer_validator_backend import ( + GraphVariableRenamerValidatorBackend, +) +from graph_net_bench import test_compiler_util + + +compiler_backend_name2class = { + "tvm": TvmBackend, + "xla": XlaBackend, + "inductor": InductorBackend, + "tensorrt": TensorRTBackend, + "bladedisc": BladeDISCBackend, + "nope": NopeBackend, + "pass_mgr": PassMgrBackend, + "unstable_to_stable": UnstableToStableBackend, + "range_decomposer_validator": RangeDecomposerValidatorBackend, + "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend, +} + + +def register_op_lib(op_lib): + if op_lib == "flaggems": + import flag_gems + + flag_gems.enable() + else: + pass + + +def set_seed(random_seed): + random.seed(random_seed) + np.random.seed(random_seed) + torch.manual_seed(random_seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(random_seed) + torch.cuda.manual_seed_all(random_seed) + + +def get_hardward_name(args): + hardware_name = "unknown" + if "cuda" in args.device: + hardware_name = torch.cuda.get_device_name(args.device) + elif args.device == "cpu": + hardware_name = platform.processor() + return hardware_name + + +def get_compile_framework_version(args): + if args.compiler in ["inductor", "nope", "unstable_to_stable"]: + return torch.__version__ + elif args.compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: + # Assuming compiler object has a version attribute + return f"{args.compiler.capitalize()} {args.compiler.version}" + return "unknown" + + +def load_class_from_file( + args: argparse.Namespace, class_name: str, device: str +) -> Type[torch.nn.Module]: + file_path = f"{args.model_path}/model.py" + file = Path(file_path).resolve() + module_name = file.stem + + with open(file_path, "r", encoding="utf-8") as f: + model_code = f.read() + model_code = utils.modify_code_by_device(model_code, device) + spec = importlib.util.spec_from_loader(module_name, loader=None) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + compiled_code = compile(model_code, filename=file, mode="exec") + exec(compiled_code, module.__dict__) + + model_class = getattr(module, class_name, None) + setattr(model_class, "__graph_net_file_path__", file_path) + setattr(model_class, "__graph_net_device__", device) + return model_class + + +def convert_to_dict(config_str): + if config_str is None or config_str == "None": + return {} + config_str = base64.b64decode(config_str).decode("utf-8") + config = json.loads(config_str) + assert isinstance(config, dict), f"config should be a dict. {config_str=}" + return config + + +def get_compiler_backend(args) -> GraphCompilerBackend: + assert ( + args.compiler in compiler_backend_name2class + ), f"Unknown compiler: {args.compiler}" + backend_class = compiler_backend_name2class[args.compiler] + config = convert_to_dict(args.config) if args.config is not None else {} + return backend_class(config) + + +def get_model(args): + device = "xla" if args.compiler == "xla" else args.device + + # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') + model_class = load_class_from_file(args, class_name="GraphModule", device=device) + model = model_class().to(torch.device(args.device)) + return model + + +def get_input_dict(args): + inputs_params = utils.load_converted_from_text(f"{args.model_path}") + params = inputs_params["weight_info"] + for tensor_meta in params.values(): + if "device" in tensor_meta["info"]: + tensor_meta["info"]["device"] = args.device + return { + k: utils.replay_tensor(v).to(torch.device(args.device)) + for k, v in params.items() + } + + +def measure_performance(model_call, args, compiler): + stats = {} + outs = model_call() + + # Warmup runs + for _ in range(args.warmup): + model_call() + compiler.synchronize() + + hardware_name = get_hardward_name(args) + print( + f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", + file=sys.stderr, + flush=True, + ) + + if "cuda" in args.device: + torch.cuda.empty_cache() + e2e_times = [] + gpu_times = [] + + for i in range(args.trials): + # End-to-end timing (naive_timer) + duration_box = test_compiler_util.DurationBox(-1) + with test_compiler_util.naive_timer(duration_box, compiler.synchronize): + # GPU-only timing (CUDA Events) + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + start_event.record() + + model_call() + + end_event.record() + compiler.synchronize() + + gpu_time_ms = start_event.elapsed_time(end_event) + e2e_times.append(duration_box.value) + gpu_times.append(gpu_time_ms) + print( + f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms", + file=sys.stderr, + flush=True, + ) + + stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) + stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times) + + else: # CPU or other devices + e2e_times = [] + for i in range(args.trials): + duration_box = test_compiler_util.DurationBox(-1) + with test_compiler_util.naive_timer(duration_box, compiler.synchronize): + model_call() + print( + f"Trial {i + 1}: e2e={duration_box.value:.5f} ms", + file=sys.stderr, + flush=True, + ) + e2e_times.append(duration_box.value) + stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) + + return outs, stats + + +def eval_single_model(args): + log_path = utils.get_log_path(args.output_path, args.model_path) + output_dump_path = utils.get_output_path(args.output_path, args.model_path) + print(f"Log path: {log_path}", file=sys.stderr, flush=True) + print(f"Outputs path: {output_dump_path}", file=sys.stderr, flush=True) + + with open(log_path, "w", encoding="utf-8") as log_f: + with redirect_stdout(log_f), redirect_stderr(log_f): + compiler = get_compiler_backend(args) + + input_dict = get_input_dict(args) + model = get_model(args) + model.eval() + + test_compiler_util.print_with_log_prompt( + "[Config] seed:", args.seed, args.log_prompt + ) + + test_compiler_util.print_basic_config( + args, + get_hardward_name(args), + get_compile_framework_version(args), + ) + + test_compiler_util.print_with_log_prompt( + "[Config] op_lib:", args.op_lib, args.log_prompt + ) + + success = False + time_stats = {} + try: + compiled_model = compiler(model) + + def model_call(): + return compiled_model(**input_dict) + + outputs, time_stats = measure_performance(model_call, args, compiler) + success = True + except Exception as e: + print( + f"Run model failed: {str(e)}\n{traceback.format_exc()}", + file=sys.stderr, + flush=True, + ) + + test_compiler_util.print_running_status(args, success) + if success: + torch.save(outputs, str(output_dump_path)) + test_compiler_util.print_with_log_prompt( + "[Performance][eager]:", json.dumps(time_stats), args.log_prompt + ) + + with open(log_path, "r", encoding="utf-8") as f: + content = f.read() + print(content, file=sys.stderr, flush=True) + + +def main(args): + set_seed(args.seed) + os.makedirs(args.output_path, exist_ok=True) + eval_single_model(args) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="GraphNet Backend Performance Evaluation" + ) + parser.add_argument( + "--model-path", + type=str, + required=False, + default=None, + help="Path to model file(s), each subdirectory containing graph_net.json will be regarded as a model", + ) + parser.add_argument( + "--output-path", + type=str, + required=False, + default="/tmp/test_save", + help="Path to save outputs", + ) + parser.add_argument("--seed", type=int, required=False, default=123) + parser.add_argument( + "--compiler", + type=str, + required=False, + default="inductor", + help="Path to customized compiler python file", + ) + parser.add_argument( + "--device", + type=str, + required=False, + default="cuda", + help="Device for testing the compiler (e.g., 'cpu' or 'cuda')", + ) + parser.add_argument("--op-lib", type=str, required=False, default=None) + parser.add_argument( + "--warmup", type=int, required=False, default=3, help="Number of warmup steps" + ) + parser.add_argument( + "--trials", type=int, required=False, default=5, help="Number of timing trials" + ) + parser.add_argument( + "--log-prompt", + type=str, + required=False, + default="graph-net-test-compiler-log", + help="Log prompt for performance log filtering.", + ) + parser.add_argument( + "--model-path-prefix", + type=str, + required=False, + default=None, + help="Prefix path to model path list", + ) + parser.add_argument( + "--config", + type=str, + required=False, + default=None, + help="base64 encode configuration json.", + ) + args = parser.parse_args() + main(args=args) diff --git a/graph_net_bench/torch/utils.py b/graph_net_bench/torch/utils.py index c937ff4de..700a59972 100755 --- a/graph_net_bench/torch/utils.py +++ b/graph_net_bench/torch/utils.py @@ -1,4 +1,5 @@ import torch +import os import ast import math import inspect @@ -7,6 +8,16 @@ kLiteralTensorSize = 64 +def get_log_path(log_dir, model_path): + model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") + return os.path.join(log_dir, f"{model_name}.log") + + +def get_output_path(output_dir, model_path): + model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") + return os.path.join(output_dir, f"{model_name}.pth") + + def get_limited_precision_float_str(value): if not isinstance(value, float): return value diff --git a/test/eval_backend_diff_test.sh b/test/eval_backend_diff_test.sh index e3fa79602..16da81903 100755 --- a/test/eval_backend_diff_test.sh +++ b/test/eval_backend_diff_test.sh @@ -10,9 +10,16 @@ python3 -m graph_net_bench.torch.eval_backend_diff \ --model-path-list $model_list \ --config $(base64 -w 0 <&1 | tee "$OUTPUT_PATH/validation.log" From f83ab0cfd1bfdb325c356a0604e9cb717e45abde Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 08:56:00 +0800 Subject: [PATCH 02/17] Simplify eval_multi_models --- graph_net_bench/torch/eval_backend_diff.py | 123 ++++++++++----------- 1 file changed, 57 insertions(+), 66 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 07a19ff88..50d17cb62 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -119,81 +119,69 @@ def parse_time_stats_from_reference_log(log_path): return time_stats -def eval_multi_models(args, model_path_prefix): - test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, model_path_prefix - ) - +def eval_multi_models(args, model_path_prefix=None, use_model_list=False): sample_idx = 0 failed_samples = [] module_name = os.path.splitext(os.path.basename(__file__))[0] - for model_path in path_utils.get_recursively_model_path(args.model_path): - if test_samples is None or os.path.abspath(model_path) in test_samples: - print( - f"[{sample_idx}] {module_name}, model_path: {model_path}", - file=sys.stderr, - flush=True, - ) - cmd = " ".join( - [ - sys.executable, - f"-m graph_net_bench.torch.{module_name}", - f"--model-path {model_path}", - f"--config {args.config}", - ] - ) - try: - process = subprocess.Popen(cmd, shell=True) - cmd_ret = process.wait() - except KeyboardInterrupt: - print("KeyboardInterrupt") - sys.exit(1) - except Exception: - print("\n--- Full Traceback ---") - traceback.print_exc() - if cmd_ret != 0: - failed_samples.append(model_path) - sample_idx += 1 - - print( - f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", - file=sys.stderr, - flush=True, - ) - for model_path in failed_samples: - print(f"- {model_path}", file=sys.stderr, flush=True) - -def eval_multi_models_with_prefix(args, model_path_prefix): - assert os.path.isdir(model_path_prefix) - assert os.path.isfile(args.model_path_list) - test_samples = test_compiler_util.get_allow_samples( - args.model_path_list, model_path_prefix - ) - py_module_name = os.path.splitext(os.path.basename(__file__))[0] - for rel_model_path in test_samples: - model_path = os.path.join(model_path_prefix, rel_model_path) - if not os.path.exists(model_path): - continue - if not os.path.exists(os.path.join(model_path, "model.py")): - continue + if use_model_list: + assert os.path.isdir(model_path_prefix) + assert os.path.isfile(args.model_path_list) + test_samples = test_compiler_util.get_allow_samples( + args.model_path_list, model_path_prefix + ) + model_paths = [] + for rel_model_path in test_samples: + model_path = os.path.join(model_path_prefix, rel_model_path) + if os.path.exists(model_path) and os.path.exists( + os.path.join(model_path, "model.py") + ): + model_paths.append(model_path) + else: + assert os.path.isdir(args.model_path) + test_samples = test_compiler_util.get_allow_samples( + args.model_path_list, model_path_prefix + ) + model_paths = [] + for model_path in path_utils.get_recursively_model_path(args.model_path): + if test_samples is None or os.path.abspath(model_path) in test_samples: + model_paths.append(model_path) + + for model_path in model_paths: + print( + f"[{sample_idx}] {module_name}, model_path: {model_path}", + file=sys.stderr, + flush=True, + ) cmd = " ".join( [ sys.executable, - f"-m graph_net_bench.torch.{py_module_name}", + f"-m graph_net_bench.torch.{module_name}", f"--model-path {model_path}", f"--config {args.config}", ] ) try: process = subprocess.Popen(cmd, shell=True) - process.wait() + cmd_ret = process.wait() except KeyboardInterrupt: print("KeyboardInterrupt") sys.exit(1) except Exception: print("\n--- Full Traceback ---") traceback.print_exc() + if cmd_ret != 0: + failed_samples.append(model_path) + sample_idx += 1 + + print( + f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", + file=sys.stderr, + flush=True, + ) + if failed_samples: + for model_path in failed_samples: + print(f"- {model_path}", file=sys.stderr, flush=True) def compare_perf_diff(args, model_path, ref_dir, target_dir): @@ -272,20 +260,23 @@ def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: def main(args): config_dict = convert_to_dict(args.config) - model_path_prefix = config_dict["ref_env"]["model_path_prefix"] - if args.model_path_list is not None and model_path_prefix is not None: - eval_multi_models_with_prefix(args, model_path_prefix) - return - assert os.path.isdir(args.model_path) - - if path_utils.is_single_model_dir(args.model_path): - eval_single_model(args) + model_path_prefix = config_dict.get("ref_env", {}).get("model_path_prefix") + + if args.model_path_list and model_path_prefix: + eval_multi_models(args, model_path_prefix, use_model_list=True) + elif os.path.isdir(args.model_path): + if path_utils.is_single_model_dir(args.model_path): + eval_single_model(args) + else: + eval_multi_models(args, model_path_prefix, use_model_list=False) else: - eval_multi_models(args, model_path_prefix) + raise ValueError(f"Invalid model path: {args.model_path}") if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Test compiler performance.") + parser = argparse.ArgumentParser( + description="Evaluate backend performance difference." + ) parser.add_argument( "--model-path", type=str, From 9670c7a6787b7d8b03cee5e5232383587ed839ea Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 08:59:30 +0800 Subject: [PATCH 03/17] minor change --- graph_net_bench/torch/eval_backend_diff.py | 4 ++-- graph_net_bench/torch/eval_backend_perf.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 50d17cb62..a5c02ec7b 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -252,7 +252,7 @@ def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: sub.op_lib = getattr(env_ns, "op_lib", None) sub.warmup = getattr(env_ns, "warmup", 3) sub.trials = getattr(env_ns, "trials", 5) - sub.log_prompt = getattr(env_ns, "log_prompt", None) + sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) sub.backend_config = getattr(env_ns, "backend_config", None) return sub @@ -275,7 +275,7 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Evaluate backend performance difference." + description="Evaluate Backend Performance Difference." ) parser.add_argument( "--model-path", diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 7e12f6ebf..60194ae88 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -274,7 +274,7 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser( - description="GraphNet Backend Performance Evaluation" + description="Single Backend Performance Evaluation" ) parser.add_argument( "--model-path", @@ -316,7 +316,7 @@ def main(args): "--log-prompt", type=str, required=False, - default="graph-net-test-compiler-log", + default="graph-net-bench-log", help="Log prompt for performance log filtering.", ) parser.add_argument( From cb9a4f1e3cb7aff5a68b9f34083ba913faeb276f Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 09:06:35 +0800 Subject: [PATCH 04/17] Minor change on names --- graph_net_bench/torch/eval_backend_perf.py | 28 ++++++++++++---------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 60194ae88..c550767f0 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -66,28 +66,28 @@ def set_seed(random_seed): torch.cuda.manual_seed_all(random_seed) -def get_hardward_name(args): +def get_hardward_name(device): hardware_name = "unknown" - if "cuda" in args.device: - hardware_name = torch.cuda.get_device_name(args.device) + if "cuda" in device: + hardware_name = torch.cuda.get_device_name(device) elif args.device == "cpu": hardware_name = platform.processor() return hardware_name -def get_compile_framework_version(args): - if args.compiler in ["inductor", "nope", "unstable_to_stable"]: +def get_compiler_version(compiler): + if compiler in ["inductor", "nope", "unstable_to_stable"]: return torch.__version__ - elif args.compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: + elif compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: # Assuming compiler object has a version attribute - return f"{args.compiler.capitalize()} {args.compiler.version}" + return f"{compiler.capitalize()} {compiler.version}" return "unknown" def load_class_from_file( - args: argparse.Namespace, class_name: str, device: str + model_path: str, class_name: str, device: str ) -> Type[torch.nn.Module]: - file_path = f"{args.model_path}/model.py" + file_path = f"{model_path}/model.py" file = Path(file_path).resolve() module_name = file.stem @@ -128,7 +128,9 @@ def get_model(args): device = "xla" if args.compiler == "xla" else args.device # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') - model_class = load_class_from_file(args, class_name="GraphModule", device=device) + model_class = load_class_from_file( + args.model_path, class_name="GraphModule", device=device + ) model = model_class().to(torch.device(args.device)) return model @@ -154,7 +156,7 @@ def measure_performance(model_call, args, compiler): model_call() compiler.synchronize() - hardware_name = get_hardward_name(args) + hardware_name = get_hardward_name(args.device) print( f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", file=sys.stderr, @@ -229,8 +231,8 @@ def eval_single_model(args): test_compiler_util.print_basic_config( args, - get_hardward_name(args), - get_compile_framework_version(args), + get_hardward_name(args.device), + get_compiler_version(args.compiler), ) test_compiler_util.print_with_log_prompt( From 6b0975da11997a52ee4f8daf11215f6dd3b71564 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 14:41:35 +0800 Subject: [PATCH 05/17] use call method instead of bash --- graph_net_bench/torch/eval_backend_diff.py | 75 ++++++++++------------ graph_net_bench/torch/eval_backend_perf.py | 4 +- 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index a5c02ec7b..bb7811689 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -1,5 +1,4 @@ from . import utils -import subprocess import argparse import torch import sys @@ -11,6 +10,7 @@ import types from graph_net_bench import test_compiler_util from graph_net_bench import path_utils +from .eval_backend_perf import eval_single_model_with_single_backend def convert_to_dict(config_str): @@ -153,23 +153,33 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): file=sys.stderr, flush=True, ) - cmd = " ".join( - [ - sys.executable, - f"-m graph_net_bench.torch.{module_name}", - f"--model-path {model_path}", - f"--config {args.config}", - ] - ) + try: - process = subprocess.Popen(cmd, shell=True) - cmd_ret = process.wait() + single_model_args = argparse.Namespace() + + single_model_args.model_path = model_path + single_model_args.config = args.config + single_model_args.model_path_list = None + + if path_utils.is_single_model_dir(model_path): + eval_single_model(single_model_args) + else: + submodel_paths = path_utils.get_recursively_model_path(model_path) + for submodel_path in submodel_paths: + sub_args = argparse.Namespace() + sub_args.model_path = submodel_path + sub_args.config = args.config + sub_args.model_path_list = None + eval_single_model(sub_args) + cmd_ret = 0 except KeyboardInterrupt: print("KeyboardInterrupt") sys.exit(1) except Exception: print("\n--- Full Traceback ---") traceback.print_exc() + cmd_ret = 1 + if cmd_ret != 0: failed_samples.append(model_path) sample_idx += 1 @@ -213,48 +223,29 @@ def eval_single_model(args): target_env=types.SimpleNamespace(**convert_to_dict(args.config)["target_env"]), ) - ref_args = build_sub_args(EvalCfg.ref_env) - target_args = build_sub_args(EvalCfg.target_env) + ref_args = build_sub_args(EvalCfg.ref_env, args.model_path, ref_dir) + target_args = build_sub_args(EvalCfg.target_env, args.model_path, target_dir) - run_sub_process(ref_args, args.model_path, ref_dir) - run_sub_process(target_args, args.model_path, target_dir) + eval_single_model_with_single_backend(ref_args) + eval_single_model_with_single_backend(target_args) compare_perf_diff(ref_args, args.model_path, ref_dir, target_dir) -def run_sub_process(env_args, model_path, output_path): - cmd = [sys.executable, "-m", "graph_net_bench.torch.eval_backend_perf"] - args_pairs = [ - ("--model-path", model_path), - ("--output-path", output_path), - ("--seed", str(env_args.seed)), - ("--compiler", env_args.compiler), - ("--device", env_args.device), - ("--op-lib", env_args.op_lib), - ("--warmup", str(env_args.warmup)), - ("--trials", str(env_args.trials)), - ("--log-prompt", env_args.log_prompt), - ("--model-path-prefix", env_args.model_path_prefix), - ("--config", env_args.backend_config), - ] - - for arg_name, arg_value in args_pairs: - if arg_value is not None: - cmd.extend([arg_name, arg_value]) - - subprocess.run(cmd, check=True) - - -def build_sub_args(env_ns: types.SimpleNamespace) -> argparse.Namespace: +def build_sub_args( + env_ns: types.SimpleNamespace, model_path: str, output_path: str +) -> argparse.Namespace: sub = argparse.Namespace() + sub.model_path = model_path + sub.output_path = output_path sub.seed = getattr(env_ns, "seed", 123) - sub.compiler = getattr(env_ns, "compiler", None) - sub.device = getattr(env_ns, "device", None) + sub.compiler = getattr(env_ns, "compiler", "inductor") + sub.device = getattr(env_ns, "device", "cuda") sub.op_lib = getattr(env_ns, "op_lib", None) sub.warmup = getattr(env_ns, "warmup", 3) sub.trials = getattr(env_ns, "trials", 5) sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) - sub.backend_config = getattr(env_ns, "backend_config", None) + sub.config = getattr(env_ns, "backend_config", None) return sub diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index c550767f0..d099ac7d9 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -211,7 +211,7 @@ def measure_performance(model_call, args, compiler): return outs, stats -def eval_single_model(args): +def eval_single_model_with_single_backend(args): log_path = utils.get_log_path(args.output_path, args.model_path) output_dump_path = utils.get_output_path(args.output_path, args.model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) @@ -271,7 +271,7 @@ def model_call(): def main(args): set_seed(args.seed) os.makedirs(args.output_path, exist_ok=True) - eval_single_model(args) + eval_single_model_with_single_backend(args) if __name__ == "__main__": From 980f7377973080b6bfd89250e7129cf3647a638a Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 14:46:21 +0800 Subject: [PATCH 06/17] minor change --- graph_net_bench/torch/eval_backend_perf.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index d099ac7d9..fcf313cef 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -212,6 +212,8 @@ def measure_performance(model_call, args, compiler): def eval_single_model_with_single_backend(args): + set_seed(args.seed) + os.makedirs(args.output_path, exist_ok=True) log_path = utils.get_log_path(args.output_path, args.model_path) output_dump_path = utils.get_output_path(args.output_path, args.model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) @@ -268,12 +270,6 @@ def model_call(): print(content, file=sys.stderr, flush=True) -def main(args): - set_seed(args.seed) - os.makedirs(args.output_path, exist_ok=True) - eval_single_model_with_single_backend(args) - - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Single Backend Performance Evaluation" @@ -336,4 +332,4 @@ def main(args): help="base64 encode configuration json.", ) args = parser.parse_args() - main(args=args) + eval_single_model_with_single_backend(args=args) From 5c49521d22fb87d4cf3f25549dd5ee2b76652262 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 14:55:08 +0800 Subject: [PATCH 07/17] change some names --- graph_net_bench/torch/eval_backend_diff.py | 16 ++++++++++------ graph_net_bench/torch/eval_backend_perf.py | 8 +++++--- test/eval_backend_diff_test.sh | 8 ++++++-- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index bb7811689..0e7229086 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -219,12 +219,16 @@ def eval_single_model(args): target_dir = "/tmp/eval_perf_diff/B" EvalCfg = types.SimpleNamespace( - ref_env=types.SimpleNamespace(**convert_to_dict(args.config)["ref_env"]), - target_env=types.SimpleNamespace(**convert_to_dict(args.config)["target_env"]), + reference_config=types.SimpleNamespace( + **convert_to_dict(args.config)["reference_config"] + ), + target_config=types.SimpleNamespace( + **convert_to_dict(args.config)["target_config"] + ), ) - ref_args = build_sub_args(EvalCfg.ref_env, args.model_path, ref_dir) - target_args = build_sub_args(EvalCfg.target_env, args.model_path, target_dir) + ref_args = build_sub_args(EvalCfg.reference_config, args.model_path, ref_dir) + target_args = build_sub_args(EvalCfg.target_config, args.model_path, target_dir) eval_single_model_with_single_backend(ref_args) eval_single_model_with_single_backend(target_args) @@ -245,13 +249,13 @@ def build_sub_args( sub.trials = getattr(env_ns, "trials", 5) sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) - sub.config = getattr(env_ns, "backend_config", None) + sub.backend_config = getattr(env_ns, "backend_config", None) return sub def main(args): config_dict = convert_to_dict(args.config) - model_path_prefix = config_dict.get("ref_env", {}).get("model_path_prefix") + model_path_prefix = config_dict.get("reference_config", {}).get("model_path_prefix") if args.model_path_list and model_path_prefix: eval_multi_models(args, model_path_prefix, use_model_list=True) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index fcf313cef..29c40d3fb 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -120,8 +120,10 @@ def get_compiler_backend(args) -> GraphCompilerBackend: args.compiler in compiler_backend_name2class ), f"Unknown compiler: {args.compiler}" backend_class = compiler_backend_name2class[args.compiler] - config = convert_to_dict(args.config) if args.config is not None else {} - return backend_class(config) + backend_config = ( + convert_to_dict(args.backend_config) if args.backend_config is not None else {} + ) + return backend_class(backend_config) def get_model(args): @@ -325,7 +327,7 @@ def model_call(): help="Prefix path to model path list", ) parser.add_argument( - "--config", + "--backend-config", type=str, required=False, default=None, diff --git a/test/eval_backend_diff_test.sh b/test/eval_backend_diff_test.sh index 16da81903..17bba712e 100755 --- a/test/eval_backend_diff_test.sh +++ b/test/eval_backend_diff_test.sh @@ -10,14 +10,18 @@ python3 -m graph_net_bench.torch.eval_backend_diff \ --model-path-list $model_list \ --config $(base64 -w 0 < Date: Fri, 16 Jan 2026 15:51:53 +0800 Subject: [PATCH 08/17] Dynamically load backend class based on args.compiler --- graph_net_bench/torch/eval_backend_perf.py | 61 +++++++++------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 29c40d3fb..4d5ea94a5 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -13,41 +13,10 @@ import platform import base64 from contextlib import redirect_stdout, redirect_stderr - from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend -from graph_net_bench.torch.backend.tvm_backend import TvmBackend -from graph_net_bench.torch.backend.xla_backend import XlaBackend -from graph_net_bench.torch.backend.inductor_backend import InductorBackend -from graph_net_bench.torch.backend.tensorrt_backend import TensorRTBackend -from graph_net_bench.torch.backend.blade_disc_backend import BladeDISCBackend -from graph_net_bench.torch.backend.nope_backend import NopeBackend -from graph_net_bench.torch.backend.pass_mgr_backend import PassMgrBackend -from graph_net_bench.torch.backend.unstable_to_stable_backend import ( - UnstableToStableBackend, -) -from graph_net_bench.torch.backend.range_decomposer_validator_backend import ( - RangeDecomposerValidatorBackend, -) -from graph_net_bench.torch.backend.graph_variable_renamer_validator_backend import ( - GraphVariableRenamerValidatorBackend, -) from graph_net_bench import test_compiler_util -compiler_backend_name2class = { - "tvm": TvmBackend, - "xla": XlaBackend, - "inductor": InductorBackend, - "tensorrt": TensorRTBackend, - "bladedisc": BladeDISCBackend, - "nope": NopeBackend, - "pass_mgr": PassMgrBackend, - "unstable_to_stable": UnstableToStableBackend, - "range_decomposer_validator": RangeDecomposerValidatorBackend, - "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend, -} - - def register_op_lib(op_lib): if op_lib == "flaggems": import flag_gems @@ -70,7 +39,7 @@ def get_hardward_name(device): hardware_name = "unknown" if "cuda" in device: hardware_name = torch.cuda.get_device_name(device) - elif args.device == "cpu": + elif device == "cpu": hardware_name = platform.processor() return hardware_name @@ -116,10 +85,28 @@ def convert_to_dict(config_str): def get_compiler_backend(args) -> GraphCompilerBackend: - assert ( - args.compiler in compiler_backend_name2class - ), f"Unknown compiler: {args.compiler}" - backend_class = compiler_backend_name2class[args.compiler] + """ + Dynamically load backend class based on args.compiler + """ + compiler_name = args.compiler.lower() + module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" + + try: + module = __import__(module_name, fromlist=[f"{compiler_name.title()}Backend"]) + + class_name = ( + f"{''.join(part.title() for part in compiler_name.split('_'))}Backend" + ) + + backend_class = None + if hasattr(module, class_name): + backend_class = getattr(module, class_name) + else: + raise ImportError(f"No valid backend class found in {module_name}") + + except ImportError as e: + raise ImportError(f"Failed to import backend module for '{compiler_name}': {e}") + backend_config = ( convert_to_dict(args.backend_config) if args.backend_config is not None else {} ) @@ -327,7 +314,7 @@ def model_call(): help="Prefix path to model path list", ) parser.add_argument( - "--backend-config", + "--config", type=str, required=False, default=None, From 8c2b1c3799020940836c34ace41350557b1e7ac8 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 16:28:01 +0800 Subject: [PATCH 09/17] Change argument passing to json config --- graph_net_bench/test_compiler_util.py | 28 +++++ graph_net_bench/torch/eval_backend_diff.py | 50 +++----- graph_net_bench/torch/eval_backend_perf.py | 131 +++++++-------------- 3 files changed, 88 insertions(+), 121 deletions(-) diff --git a/graph_net_bench/test_compiler_util.py b/graph_net_bench/test_compiler_util.py index f587da2ff..de38a29fa 100644 --- a/graph_net_bench/test_compiler_util.py +++ b/graph_net_bench/test_compiler_util.py @@ -5,6 +5,7 @@ import time import subprocess import shutil +import base64 import numpy as np from dataclasses import dataclass from contextlib import contextmanager @@ -156,6 +157,24 @@ def print_basic_config(args, hardware_name, compile_framework_version): ) +def print_config(model_path, config, hardware_name, compiler_version): + model_path = os.path.normpath(model_path) + model_name = get_model_name(model_path) + print_with_log_prompt("[Config] model:", model_name, config.log_prompt) + print_with_log_prompt("[Config] seed:", config.seed, config.log_prompt) + print_with_log_prompt("[Config] device:", config.device, config.log_prompt) + print_with_log_prompt("[Config] hardware:", hardware_name, config.log_prompt) + print_with_log_prompt("[Config] op_lib:", config.op_lib, config.log_prompt) + print_with_log_prompt("[Config] compiler:", config.compiler, config.log_prompt) + print_with_log_prompt("[Config] warmup:", config.warmup, config.log_prompt) + print_with_log_prompt("[Config] trials:", config.trials, config.log_prompt) + print_with_log_prompt( + "[Config] compile_framework_version:", + compiler_version, + config.log_prompt, + ) + + def print_running_status(args, eager_success, compiled_success=None): def convert_to_str(b): return "success" if b else "failed" @@ -353,3 +372,12 @@ def get_allow_samples(allow_list, model_path_prefix): test_samples.append(os.path.join(model_path_prefix, line.strip())) return test_samples + + +def convert_to_dict(config_str): + if config_str in {None, "", "null", "None"}: + return {} + config_str = base64.b64decode(config_str).decode("utf-8") + config = json.loads(config_str) + assert isinstance(config, dict), f"config should be a dict. {config_str=}" + return config diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 0e7229086..6f8dc550b 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -6,22 +6,12 @@ import os.path import traceback import json -import base64 import types from graph_net_bench import test_compiler_util from graph_net_bench import path_utils from .eval_backend_perf import eval_single_model_with_single_backend -def convert_to_dict(config_str): - if config_str in {None, "", "null", "None"}: - return {} - config_str = base64.b64decode(config_str).decode("utf-8") - config = json.loads(config_str) - assert isinstance(config, dict), f"config should be a dict. {config_str=}" - return config - - def compare_correctness(expected_out, compiled_out, args): eager_dtypes = [ ( @@ -220,41 +210,37 @@ def eval_single_model(args): EvalCfg = types.SimpleNamespace( reference_config=types.SimpleNamespace( - **convert_to_dict(args.config)["reference_config"] + **test_compiler_util.convert_to_dict(args.config)["reference_config"] ), target_config=types.SimpleNamespace( - **convert_to_dict(args.config)["target_config"] + **test_compiler_util.convert_to_dict(args.config)["target_config"] ), ) - ref_args = build_sub_args(EvalCfg.reference_config, args.model_path, ref_dir) - target_args = build_sub_args(EvalCfg.target_config, args.model_path, target_dir) + reference_config = build_sub_config(EvalCfg.reference_config) + target_config = build_sub_config(EvalCfg.target_config) - eval_single_model_with_single_backend(ref_args) - eval_single_model_with_single_backend(target_args) - compare_perf_diff(ref_args, args.model_path, ref_dir, target_dir) + eval_single_model_with_single_backend(args.model_path, ref_dir, reference_config) + eval_single_model_with_single_backend(args.model_path, target_dir, target_config) + compare_perf_diff(reference_config, args.model_path, ref_dir, target_dir) -def build_sub_args( - env_ns: types.SimpleNamespace, model_path: str, output_path: str -) -> argparse.Namespace: +def build_sub_config(config): sub = argparse.Namespace() - sub.model_path = model_path - sub.output_path = output_path - sub.seed = getattr(env_ns, "seed", 123) - sub.compiler = getattr(env_ns, "compiler", "inductor") - sub.device = getattr(env_ns, "device", "cuda") - sub.op_lib = getattr(env_ns, "op_lib", None) - sub.warmup = getattr(env_ns, "warmup", 3) - sub.trials = getattr(env_ns, "trials", 5) - sub.log_prompt = getattr(env_ns, "log_prompt", "graph-net-bench-log") - sub.model_path_prefix = getattr(env_ns, "model_path_prefix", None) - sub.backend_config = getattr(env_ns, "backend_config", None) + sub.seed = getattr(config, "seed", 123) + sub.compiler = getattr(config, "compiler", "inductor") + sub.device = getattr(config, "device", "cuda") + sub.op_lib = getattr(config, "op_lib", None) + sub.warmup = getattr(config, "warmup", 3) + sub.trials = getattr(config, "trials", 5) + sub.log_prompt = getattr(config, "log_prompt", "graph-net-bench-log") + sub.model_path_prefix = getattr(config, "model_path_prefix", None) + sub.backend_config = getattr(config, "backend_config", None) return sub def main(args): - config_dict = convert_to_dict(args.config) + config_dict = test_compiler_util.convert_to_dict(args.config) model_path_prefix = config_dict.get("reference_config", {}).get("model_path_prefix") if args.model_path_list and model_path_prefix: diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 4d5ea94a5..3774d4176 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -11,7 +11,6 @@ import random import numpy as np import platform -import base64 from contextlib import redirect_stdout, redirect_stderr from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend from graph_net_bench import test_compiler_util @@ -75,20 +74,11 @@ def load_class_from_file( return model_class -def convert_to_dict(config_str): - if config_str is None or config_str == "None": - return {} - config_str = base64.b64decode(config_str).decode("utf-8") - config = json.loads(config_str) - assert isinstance(config, dict), f"config should be a dict. {config_str=}" - return config - - -def get_compiler_backend(args) -> GraphCompilerBackend: +def get_compiler_backend(config) -> GraphCompilerBackend: """ - Dynamically load backend class based on args.compiler + Dynamically load backend class based on config.compiler """ - compiler_name = args.compiler.lower() + compiler_name = config.compiler.lower() module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" try: @@ -108,56 +98,58 @@ def get_compiler_backend(args) -> GraphCompilerBackend: raise ImportError(f"Failed to import backend module for '{compiler_name}': {e}") backend_config = ( - convert_to_dict(args.backend_config) if args.backend_config is not None else {} + test_compiler_util.convert_to_dict(config.backend_config) + if config.backend_config is not None + else {} ) return backend_class(backend_config) -def get_model(args): - device = "xla" if args.compiler == "xla" else args.device +def get_model(model_path, config): + device = "xla" if config.compiler == "xla" else config.device # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') model_class = load_class_from_file( - args.model_path, class_name="GraphModule", device=device + model_path, class_name="GraphModule", device=device ) - model = model_class().to(torch.device(args.device)) + model = model_class().to(torch.device(config.device)) return model -def get_input_dict(args): - inputs_params = utils.load_converted_from_text(f"{args.model_path}") +def get_input_dict(model_path, config): + inputs_params = utils.load_converted_from_text(f"{model_path}") params = inputs_params["weight_info"] for tensor_meta in params.values(): if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = args.device + tensor_meta["info"]["device"] = config.device return { - k: utils.replay_tensor(v).to(torch.device(args.device)) + k: utils.replay_tensor(v).to(torch.device(config.device)) for k, v in params.items() } -def measure_performance(model_call, args, compiler): +def measure_performance(model_call, config, compiler): stats = {} outs = model_call() # Warmup runs - for _ in range(args.warmup): + for _ in range(config.warmup): model_call() compiler.synchronize() - hardware_name = get_hardward_name(args.device) + hardware_name = get_hardward_name(config.device) print( - f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", + f"[Profiling] Using device: {config.device} {hardware_name}, warm up {config.warmup}, trials {config.trials}", file=sys.stderr, flush=True, ) - if "cuda" in args.device: + if "cuda" in config.device: torch.cuda.empty_cache() e2e_times = [] gpu_times = [] - for i in range(args.trials): + for i in range(config.trials): # End-to-end timing (naive_timer) duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): @@ -185,7 +177,7 @@ def measure_performance(model_call, args, compiler): else: # CPU or other devices e2e_times = [] - for i in range(args.trials): + for i in range(config.trials): duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): model_call() @@ -200,34 +192,27 @@ def measure_performance(model_call, args, compiler): return outs, stats -def eval_single_model_with_single_backend(args): - set_seed(args.seed) - os.makedirs(args.output_path, exist_ok=True) - log_path = utils.get_log_path(args.output_path, args.model_path) - output_dump_path = utils.get_output_path(args.output_path, args.model_path) +def eval_single_model_with_single_backend(model_path, output_path, config): + set_seed(config.seed) + os.makedirs(output_path, exist_ok=True) + log_path = utils.get_log_path(output_path, model_path) + output_dump_path = utils.get_output_path(output_path, model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) print(f"Outputs path: {output_dump_path}", file=sys.stderr, flush=True) with open(log_path, "w", encoding="utf-8") as log_f: with redirect_stdout(log_f), redirect_stderr(log_f): - compiler = get_compiler_backend(args) + compiler = get_compiler_backend(config) - input_dict = get_input_dict(args) - model = get_model(args) + input_dict = get_input_dict(model_path, config) + model = get_model(model_path, config) model.eval() - test_compiler_util.print_with_log_prompt( - "[Config] seed:", args.seed, args.log_prompt - ) - - test_compiler_util.print_basic_config( - args, - get_hardward_name(args.device), - get_compiler_version(args.compiler), - ) - - test_compiler_util.print_with_log_prompt( - "[Config] op_lib:", args.op_lib, args.log_prompt + test_compiler_util.print_config( + model_path, + config, + get_hardward_name(config.device), + get_compiler_version(config.compiler), ) success = False @@ -238,7 +223,7 @@ def eval_single_model_with_single_backend(args): def model_call(): return compiled_model(**input_dict) - outputs, time_stats = measure_performance(model_call, args, compiler) + outputs, time_stats = measure_performance(model_call, config, compiler) success = True except Exception as e: print( @@ -247,11 +232,11 @@ def model_call(): flush=True, ) - test_compiler_util.print_running_status(args, success) + test_compiler_util.print_running_status(config, success) if success: torch.save(outputs, str(output_dump_path)) test_compiler_util.print_with_log_prompt( - "[Performance][eager]:", json.dumps(time_stats), args.log_prompt + "[Performance][eager]:", json.dumps(time_stats), config.log_prompt ) with open(log_path, "r", encoding="utf-8") as f: @@ -277,42 +262,6 @@ def model_call(): default="/tmp/test_save", help="Path to save outputs", ) - parser.add_argument("--seed", type=int, required=False, default=123) - parser.add_argument( - "--compiler", - type=str, - required=False, - default="inductor", - help="Path to customized compiler python file", - ) - parser.add_argument( - "--device", - type=str, - required=False, - default="cuda", - help="Device for testing the compiler (e.g., 'cpu' or 'cuda')", - ) - parser.add_argument("--op-lib", type=str, required=False, default=None) - parser.add_argument( - "--warmup", type=int, required=False, default=3, help="Number of warmup steps" - ) - parser.add_argument( - "--trials", type=int, required=False, default=5, help="Number of timing trials" - ) - parser.add_argument( - "--log-prompt", - type=str, - required=False, - default="graph-net-bench-log", - help="Log prompt for performance log filtering.", - ) - parser.add_argument( - "--model-path-prefix", - type=str, - required=False, - default=None, - help="Prefix path to model path list", - ) parser.add_argument( "--config", type=str, @@ -321,4 +270,8 @@ def model_call(): help="base64 encode configuration json.", ) args = parser.parse_args() - eval_single_model_with_single_backend(args=args) + eval_single_model_with_single_backend( + args.model_path, + args.output_path, + **test_compiler_util.convert_to_dict(args.config), + ) From db877bdb37c4cf20ca29414340d6a5707f072be9 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 18:11:20 +0800 Subject: [PATCH 10/17] Add check_and_complete_args --- graph_net_bench/test_compiler_util.py | 22 ++--- graph_net_bench/torch/eval_backend_diff.py | 73 ++++++--------- graph_net_bench/torch/eval_backend_perf.py | 100 +++++++++++++-------- 3 files changed, 101 insertions(+), 94 deletions(-) diff --git a/graph_net_bench/test_compiler_util.py b/graph_net_bench/test_compiler_util.py index de38a29fa..44ccc703e 100644 --- a/graph_net_bench/test_compiler_util.py +++ b/graph_net_bench/test_compiler_util.py @@ -157,21 +157,21 @@ def print_basic_config(args, hardware_name, compile_framework_version): ) -def print_config(model_path, config, hardware_name, compiler_version): - model_path = os.path.normpath(model_path) +def print_config(args, hardware_name, compiler_version): + model_path = os.path.normpath(args.model_path) model_name = get_model_name(model_path) - print_with_log_prompt("[Config] model:", model_name, config.log_prompt) - print_with_log_prompt("[Config] seed:", config.seed, config.log_prompt) - print_with_log_prompt("[Config] device:", config.device, config.log_prompt) - print_with_log_prompt("[Config] hardware:", hardware_name, config.log_prompt) - print_with_log_prompt("[Config] op_lib:", config.op_lib, config.log_prompt) - print_with_log_prompt("[Config] compiler:", config.compiler, config.log_prompt) - print_with_log_prompt("[Config] warmup:", config.warmup, config.log_prompt) - print_with_log_prompt("[Config] trials:", config.trials, config.log_prompt) + print_with_log_prompt("[Config] model:", model_name, args.log_prompt) + print_with_log_prompt("[Config] seed:", args.seed, args.log_prompt) + print_with_log_prompt("[Config] device:", args.device, args.log_prompt) + print_with_log_prompt("[Config] hardware:", hardware_name, args.log_prompt) + print_with_log_prompt("[Config] op_lib:", args.op_lib, args.log_prompt) + print_with_log_prompt("[Config] compiler:", args.compiler, args.log_prompt) + print_with_log_prompt("[Config] warmup:", args.warmup, args.log_prompt) + print_with_log_prompt("[Config] trials:", args.trials, args.log_prompt) print_with_log_prompt( "[Config] compile_framework_version:", compiler_version, - config.log_prompt, + args.log_prompt, ) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index 6f8dc550b..c230f6bd8 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -146,10 +146,9 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): try: single_model_args = argparse.Namespace() - single_model_args.model_path = model_path - single_model_args.config = args.config single_model_args.model_path_list = None + single_model_args.config = args.config if path_utils.is_single_model_dir(model_path): eval_single_model(single_model_args) @@ -158,8 +157,8 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): for submodel_path in submodel_paths: sub_args = argparse.Namespace() sub_args.model_path = submodel_path - sub_args.config = args.config sub_args.model_path_list = None + sub_args.config = args.config eval_single_model(sub_args) cmd_ret = 0 except KeyboardInterrupt: @@ -184,60 +183,44 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): print(f"- {model_path}", file=sys.stderr, flush=True) -def compare_perf_diff(args, model_path, ref_dir, target_dir): +def eval_single_model(args): + ref_dir = "/tmp/eval_perf_diff/A" + target_dir = "/tmp/eval_perf_diff/B" + + ref_args = types.SimpleNamespace( + model_path=args.model_path, + output_path=ref_dir, + **test_compiler_util.convert_to_dict(args.config)["reference_config"], + ) + target_args = types.SimpleNamespace( + model_path=args.model_path, + output_path=target_dir, + **test_compiler_util.convert_to_dict(args.config)["target_config"], + ) + + eval_single_model_with_single_backend(ref_args) + eval_single_model_with_single_backend(target_args) + + # compare_perf_diff # A - ref_dump_path = utils.get_output_path(ref_dir, model_path) + ref_dump_path = utils.get_output_path(ref_dir, args.model_path) ref_out = torch.load(str(ref_dump_path)) - ref_log_path = utils.get_log_path(ref_dir, model_path) + ref_log_path = utils.get_log_path(ref_dir, args.model_path) ref_time_stats = parse_time_stats_from_reference_log(ref_log_path) # B - target_dump_path = utils.get_output_path(target_dir, model_path) + target_dump_path = utils.get_output_path(target_dir, args.model_path) target_out = torch.load(str(target_dump_path)) - target_log_path = utils.get_log_path(target_dir, model_path) + target_log_path = utils.get_log_path(target_dir, args.model_path) target_time_stats = parse_time_stats_from_reference_log(target_log_path) - compare_correctness(ref_out, target_out, args) - - test_compiler_util.print_times_and_speedup(args, ref_time_stats, target_time_stats) - - -def eval_single_model(args): - ref_dir = "/tmp/eval_perf_diff/A" - target_dir = "/tmp/eval_perf_diff/B" - - EvalCfg = types.SimpleNamespace( - reference_config=types.SimpleNamespace( - **test_compiler_util.convert_to_dict(args.config)["reference_config"] - ), - target_config=types.SimpleNamespace( - **test_compiler_util.convert_to_dict(args.config)["target_config"] - ), + compare_correctness(ref_out, target_out, ref_args) + test_compiler_util.print_times_and_speedup( + ref_args, ref_time_stats, target_time_stats ) - reference_config = build_sub_config(EvalCfg.reference_config) - target_config = build_sub_config(EvalCfg.target_config) - - eval_single_model_with_single_backend(args.model_path, ref_dir, reference_config) - eval_single_model_with_single_backend(args.model_path, target_dir, target_config) - compare_perf_diff(reference_config, args.model_path, ref_dir, target_dir) - - -def build_sub_config(config): - sub = argparse.Namespace() - sub.seed = getattr(config, "seed", 123) - sub.compiler = getattr(config, "compiler", "inductor") - sub.device = getattr(config, "device", "cuda") - sub.op_lib = getattr(config, "op_lib", None) - sub.warmup = getattr(config, "warmup", 3) - sub.trials = getattr(config, "trials", 5) - sub.log_prompt = getattr(config, "log_prompt", "graph-net-bench-log") - sub.model_path_prefix = getattr(config, "model_path_prefix", None) - sub.backend_config = getattr(config, "backend_config", None) - return sub - def main(args): config_dict = test_compiler_util.convert_to_dict(args.config) diff --git a/graph_net_bench/torch/eval_backend_perf.py b/graph_net_bench/torch/eval_backend_perf.py index 3774d4176..5c8586f30 100644 --- a/graph_net_bench/torch/eval_backend_perf.py +++ b/graph_net_bench/torch/eval_backend_perf.py @@ -11,6 +11,7 @@ import random import numpy as np import platform +import types from contextlib import redirect_stdout, redirect_stderr from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend from graph_net_bench import test_compiler_util @@ -74,11 +75,11 @@ def load_class_from_file( return model_class -def get_compiler_backend(config) -> GraphCompilerBackend: +def get_compiler_backend(args) -> GraphCompilerBackend: """ - Dynamically load backend class based on config.compiler + Dynamically load backend class based on args.compiler """ - compiler_name = config.compiler.lower() + compiler_name = args.compiler.lower() module_name = f"graph_net_bench.torch.backend.{compiler_name}_backend" try: @@ -98,58 +99,57 @@ def get_compiler_backend(config) -> GraphCompilerBackend: raise ImportError(f"Failed to import backend module for '{compiler_name}': {e}") backend_config = ( - test_compiler_util.convert_to_dict(config.backend_config) - if config.backend_config is not None + test_compiler_util.convert_to_dict(args.backend_config) + if args.backend_config is not None else {} ) return backend_class(backend_config) -def get_model(model_path, config): - device = "xla" if config.compiler == "xla" else config.device +def get_model(args): + device = "xla" if args.compiler == "xla" else args.device # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') model_class = load_class_from_file( - model_path, class_name="GraphModule", device=device + args.model_path, class_name="GraphModule", device=device ) - model = model_class().to(torch.device(config.device)) + model = model_class().to(torch.device(args.device)) return model -def get_input_dict(model_path, config): - inputs_params = utils.load_converted_from_text(f"{model_path}") +def get_input_dict(args): + inputs_params = utils.load_converted_from_text(f"{args.model_path}") params = inputs_params["weight_info"] for tensor_meta in params.values(): if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = config.device + tensor_meta["info"]["device"] = args.device return { - k: utils.replay_tensor(v).to(torch.device(config.device)) + k: utils.replay_tensor(v).to(torch.device(args.device)) for k, v in params.items() } -def measure_performance(model_call, config, compiler): +def measure_performance(model_call, args, compiler): stats = {} outs = model_call() # Warmup runs - for _ in range(config.warmup): + for _ in range(args.warmup): model_call() compiler.synchronize() - hardware_name = get_hardward_name(config.device) print( - f"[Profiling] Using device: {config.device} {hardware_name}, warm up {config.warmup}, trials {config.trials}", + f"[Profiling] Warm up {args.warmup}, Trials {args.trials}", file=sys.stderr, flush=True, ) - if "cuda" in config.device: + if "cuda" in args.device: torch.cuda.empty_cache() e2e_times = [] gpu_times = [] - for i in range(config.trials): + for i in range(args.trials): # End-to-end timing (naive_timer) duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): @@ -177,7 +177,7 @@ def measure_performance(model_call, config, compiler): else: # CPU or other devices e2e_times = [] - for i in range(config.trials): + for i in range(args.trials): duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, compiler.synchronize): model_call() @@ -192,27 +192,27 @@ def measure_performance(model_call, config, compiler): return outs, stats -def eval_single_model_with_single_backend(model_path, output_path, config): - set_seed(config.seed) - os.makedirs(output_path, exist_ok=True) - log_path = utils.get_log_path(output_path, model_path) - output_dump_path = utils.get_output_path(output_path, model_path) +def eval_single_model_with_single_backend(args): + check_and_complete_args(args) + set_seed(args.seed) + os.makedirs(args.output_path, exist_ok=True) + log_path = utils.get_log_path(args.output_path, args.model_path) + output_dump_path = utils.get_output_path(args.output_path, args.model_path) print(f"Log path: {log_path}", file=sys.stderr, flush=True) print(f"Outputs path: {output_dump_path}", file=sys.stderr, flush=True) with open(log_path, "w", encoding="utf-8") as log_f: with redirect_stdout(log_f), redirect_stderr(log_f): - compiler = get_compiler_backend(config) + compiler = get_compiler_backend(args) - input_dict = get_input_dict(model_path, config) - model = get_model(model_path, config) + input_dict = get_input_dict(args) + model = get_model(args) model.eval() test_compiler_util.print_config( - model_path, - config, - get_hardward_name(config.device), - get_compiler_version(config.compiler), + args, + get_hardward_name(args.device), + get_compiler_version(args.compiler), ) success = False @@ -223,7 +223,7 @@ def eval_single_model_with_single_backend(model_path, output_path, config): def model_call(): return compiled_model(**input_dict) - outputs, time_stats = measure_performance(model_call, config, compiler) + outputs, time_stats = measure_performance(model_call, args, compiler) success = True except Exception as e: print( @@ -232,11 +232,11 @@ def model_call(): flush=True, ) - test_compiler_util.print_running_status(config, success) + test_compiler_util.print_running_status(args, success) if success: torch.save(outputs, str(output_dump_path)) test_compiler_util.print_with_log_prompt( - "[Performance][eager]:", json.dumps(time_stats), config.log_prompt + "[Performance][eager]:", json.dumps(time_stats), args.log_prompt ) with open(log_path, "r", encoding="utf-8") as f: @@ -244,6 +244,29 @@ def model_call(): print(content, file=sys.stderr, flush=True) +def check_and_complete_args(args): + """ + Ensure all required arguments are present with default values if missing + """ + defaults = { + "model_path": None, # Model path + "output_path": None, # Log and output directory + "seed": 123, # Random seed + "compiler": "inductor", # Compiler name + "device": "cuda", # Device for testing the compiler (e.g., 'cpu' or 'cuda') + "op_lib": None, # Operator library + "warmup": 3, # Number of warmup steps + "trials": 5, # Number of timing trials + "log_prompt": "graph-net-bench-log", # Log prompt for performance log filtering + "model_path_prefix": None, # Prefix path to model path in args.model-path + "backend_config": None, # backend configuration json + } + + for key, default in defaults.items(): + if not hasattr(args, key): + setattr(args, key, default) + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Single Backend Performance Evaluation" @@ -270,8 +293,9 @@ def model_call(): help="base64 encode configuration json.", ) args = parser.parse_args() - eval_single_model_with_single_backend( - args.model_path, - args.output_path, + mut_args = types.SimpleNamespace( + model_path=args.model_path, + output_path=args.output_path, **test_compiler_util.convert_to_dict(args.config), ) + eval_single_model_with_single_backend(mut_args) From 0e6ec45faf2fe026640c7535e0ed4d2e567dfe02 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Fri, 16 Jan 2026 18:25:24 +0800 Subject: [PATCH 11/17] Simplify --- graph_net_bench/torch/eval_backend_diff.py | 78 +++++++++++----------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index c230f6bd8..ecafb71ae 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -109,72 +109,74 @@ def parse_time_stats_from_reference_log(log_path): return time_stats -def eval_multi_models(args, model_path_prefix=None, use_model_list=False): - sample_idx = 0 - failed_samples = [] - module_name = os.path.splitext(os.path.basename(__file__))[0] - +def _get_model_paths(args, model_path_prefix, use_model_list): if use_model_list: - assert os.path.isdir(model_path_prefix) - assert os.path.isfile(args.model_path_list) + assert os.path.isdir(model_path_prefix) and os.path.isfile(args.model_path_list) + test_samples = test_compiler_util.get_allow_samples( args.model_path_list, model_path_prefix ) - model_paths = [] - for rel_model_path in test_samples: - model_path = os.path.join(model_path_prefix, rel_model_path) - if os.path.exists(model_path) and os.path.exists( - os.path.join(model_path, "model.py") - ): - model_paths.append(model_path) + model_paths = [ + os.path.join(model_path_prefix, rel_model_path) + for rel_model_path in test_samples + if os.path.exists( + os.path.join(model_path_prefix, rel_model_path, "model.py") + ) + ] else: assert os.path.isdir(args.model_path) + test_samples = test_compiler_util.get_allow_samples( args.model_path_list, model_path_prefix ) - model_paths = [] - for model_path in path_utils.get_recursively_model_path(args.model_path): - if test_samples is None or os.path.abspath(model_path) in test_samples: - model_paths.append(model_path) + model_paths = [ + model_path + for model_path in path_utils.get_recursively_model_path(args.model_path) + if test_samples is None or os.path.abspath(model_path) in test_samples + ] + + return model_paths + - for model_path in model_paths: +def _create_model_args(model_path, config): + args = argparse.Namespace() + args.model_path = model_path + args.model_path_list = None + args.config = config + return args + + +def eval_multi_models(args, model_path_prefix=None, use_model_list=False): + module_name = os.path.splitext(os.path.basename(__file__))[0] + + model_paths = _get_model_paths(args, model_path_prefix, use_model_list) + failed_samples = [] + for sample_idx, model_path in enumerate(model_paths): print( f"[{sample_idx}] {module_name}, model_path: {model_path}", file=sys.stderr, flush=True, ) - try: - single_model_args = argparse.Namespace() - single_model_args.model_path = model_path - single_model_args.model_path_list = None - single_model_args.config = args.config - if path_utils.is_single_model_dir(model_path): - eval_single_model(single_model_args) + eval_single_model(_create_model_args(model_path, args.config)) else: - submodel_paths = path_utils.get_recursively_model_path(model_path) - for submodel_path in submodel_paths: - sub_args = argparse.Namespace() - sub_args.model_path = submodel_path - sub_args.model_path_list = None - sub_args.config = args.config - eval_single_model(sub_args) - cmd_ret = 0 + for submodel_path in path_utils.get_recursively_model_path(model_path): + eval_single_model(_create_model_args(submodel_path, args.config)) + success = True except KeyboardInterrupt: print("KeyboardInterrupt") sys.exit(1) except Exception: print("\n--- Full Traceback ---") traceback.print_exc() - cmd_ret = 1 + success = False - if cmd_ret != 0: + if not success: failed_samples.append(model_path) - sample_idx += 1 print( - f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", + f"Totally {len(model_paths)} verified samples, failed {len(failed_samples)} samples.", file=sys.stderr, flush=True, ) From a5fa17369258592c16abdd0ef69a47a92c5f677c Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 13:57:16 +0800 Subject: [PATCH 12/17] modify args.config to separate args.reference_config and args.target_config --- graph_net_bench/torch/eval_backend_diff.py | 39 +++++++++++++--------- test/eval_backend_diff_test.sh | 31 ++++++++--------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index ecafb71ae..c254eafaf 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -138,11 +138,12 @@ def _get_model_paths(args, model_path_prefix, use_model_list): return model_paths -def _create_model_args(model_path, config): +def _create_model_args(model_path, reference_config, target_config): args = argparse.Namespace() args.model_path = model_path args.model_path_list = None - args.config = config + args.reference_config = reference_config + args.target_config = target_config return args @@ -157,12 +158,15 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): file=sys.stderr, flush=True, ) + + model_args = argparse.Namespace() + model_args.model_path = model_path + model_args.model_path_list = None + model_args.reference_config = args.reference_config + model_args.target_config = args.target_config + try: - if path_utils.is_single_model_dir(model_path): - eval_single_model(_create_model_args(model_path, args.config)) - else: - for submodel_path in path_utils.get_recursively_model_path(model_path): - eval_single_model(_create_model_args(submodel_path, args.config)) + eval_single_model(model_args) success = True except KeyboardInterrupt: print("KeyboardInterrupt") @@ -192,12 +196,12 @@ def eval_single_model(args): ref_args = types.SimpleNamespace( model_path=args.model_path, output_path=ref_dir, - **test_compiler_util.convert_to_dict(args.config)["reference_config"], + **test_compiler_util.convert_to_dict(args.reference_config), ) target_args = types.SimpleNamespace( model_path=args.model_path, output_path=target_dir, - **test_compiler_util.convert_to_dict(args.config)["target_config"], + **test_compiler_util.convert_to_dict(args.target_config), ) eval_single_model_with_single_backend(ref_args) @@ -225,8 +229,8 @@ def eval_single_model(args): def main(args): - config_dict = test_compiler_util.convert_to_dict(args.config) - model_path_prefix = config_dict.get("reference_config", {}).get("model_path_prefix") + ref_config = test_compiler_util.convert_to_dict(args.reference_config) + model_path_prefix = ref_config.get("model_path_prefix") if args.model_path_list and model_path_prefix: eval_multi_models(args, model_path_prefix, use_model_list=True) @@ -258,11 +262,16 @@ def main(args): help="Path to samples list, each line contains a sample path", ) parser.add_argument( - "--config", + "--reference-config", type=str, - required=False, - default=None, - help="base64 encode configuration json.", + required=True, + help="base64 encode reference config json.", + ) + parser.add_argument( + "--target-config", + type=str, + required=True, + help="base64 encode target config json.", ) args = parser.parse_args() main(args=args) diff --git a/test/eval_backend_diff_test.sh b/test/eval_backend_diff_test.sh index 17bba712e..1eaca5ecd 100755 --- a/test/eval_backend_diff_test.sh +++ b/test/eval_backend_diff_test.sh @@ -8,22 +8,23 @@ model_list="$AI4C_ROOT/test/workspace_eval_backend_diff/sample_list.txt" python3 -m graph_net_bench.torch.eval_backend_diff \ --model-path-list $model_list \ - --config $(base64 -w 0 <&1 | tee "$OUTPUT_PATH/validation.log" From 0c9e07b8d93e9f1aba28569a86995edab583b383 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 14:24:51 +0800 Subject: [PATCH 13/17] reuse some code --- graph_net/torch/test_reference_device.py | 26 ++++-------------------- graph_net/torch/test_target_device.py | 22 +++++++------------- 2 files changed, 11 insertions(+), 37 deletions(-) diff --git a/graph_net/torch/test_reference_device.py b/graph_net/torch/test_reference_device.py index f022d2ba5..33d0ec8e4 100644 --- a/graph_net/torch/test_reference_device.py +++ b/graph_net/torch/test_reference_device.py @@ -11,30 +11,12 @@ from graph_net_bench import test_compiler_util from graph_net import model_path_util from graph_net_bench.torch import test_compiler - - -def get_reference_log_path(reference_dir, model_path): - model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") - return os.path.join(reference_dir, f"{model_name}.log") - - -def get_reference_output_path(reference_dir, model_path): - model_name = model_path.split("torch_samples/")[-1].replace(os.sep, "_") - return os.path.join(reference_dir, f"{model_name}.pth") - - -def register_op_lib(op_lib): - if op_lib == "flaggems": - import flag_gems - - flag_gems.enable() - else: - pass +from graph_net_bench.torch import utils, eval_backend_perf def test_single_model(args): - ref_log = get_reference_log_path(args.reference_dir, args.model_path) - ref_dump = get_reference_output_path(args.reference_dir, args.model_path) + ref_log = utils.get_log_path(args.reference_dir, args.model_path) + ref_dump = utils.get_output_path(args.reference_dir, args.model_path) print(f"Reference log path: {ref_log}", file=sys.stderr, flush=True) print(f"Reference outputs path: {ref_dump}", file=sys.stderr, flush=True) @@ -149,7 +131,7 @@ def main(args): ref_dump_dir.mkdir(parents=True, exist_ok=True) if path_utils.is_single_model_dir(args.model_path): - register_op_lib(args.op_lib) + eval_backend_perf.register_op_lib(args.op_lib) test_single_model(args) else: test_multi_models(args) diff --git a/graph_net/torch/test_target_device.py b/graph_net/torch/test_target_device.py index ec2085a32..cf56dee69 100644 --- a/graph_net/torch/test_target_device.py +++ b/graph_net/torch/test_target_device.py @@ -8,7 +8,7 @@ from graph_net_bench import path_utils from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net_bench.torch import test_compiler, test_reference_device +from graph_net_bench.torch import test_compiler, utils, eval_backend_perf def parse_config_from_reference_log(log_path): @@ -46,9 +46,7 @@ def parse_time_stats_from_reference_log(log_path): def update_args_and_set_seed(args, model_path): - ref_log = test_reference_device.get_reference_log_path( - args.reference_dir, model_path - ) + ref_log = utils.get_log_path(args.reference_dir, model_path) config = parse_config_from_reference_log(ref_log) vars(args)["model_path"] = model_path vars(args)["compiler"] = config.get("compiler") @@ -100,14 +98,10 @@ def model_call(): if test_compiler_util.get_subgraph_tag(args.model_path): model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path) - ref_dump = test_reference_device.get_reference_output_path( - args.reference_dir, args.model_path - ) + ref_dump = utils.get_output_path(args.reference_dir, args.model_path) ref_out = torch.load(str(ref_dump)) - ref_log = test_reference_device.get_reference_log_path( - args.reference_dir, args.model_path - ) + ref_log = utils.get_log_path(args.reference_dir, args.model_path) ref_time_stats = parse_time_stats_from_reference_log(ref_log) if success: @@ -117,7 +111,7 @@ def model_call(): def is_reference_log_exist(reference_dir, model_path): - log_path = test_reference_device.get_reference_log_path(reference_dir, model_path) + log_path = utils.get_log_path(reference_dir, model_path) return os.path.isfile(log_path) @@ -171,16 +165,14 @@ def main(args): if path_utils.is_single_model_dir(args.model_path): if args.op_lib == "origin": - ref_log = test_reference_device.get_reference_log_path( - args.reference_dir, args.model_path - ) + ref_log = utils.get_log_path(args.reference_dir, args.model_path) config = parse_config_from_reference_log(ref_log) vars(args)["op_lib"] = config.get("op_lib") test_compiler_util.print_with_log_prompt( "[Config] op_lib:", args.op_lib, args.log_prompt ) else: - test_reference_device.register_op_lib(args.op_lib) + eval_backend_perf.register_op_lib(args.op_lib) args = update_args_and_set_seed(args, args.model_path) test_single_model(args) From ebd46af74be6fb0ee7828cb4eca27754afcf1a11 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 14:37:02 +0800 Subject: [PATCH 14/17] Add unittest on test device; minor fix --- graph_net/torch/test_reference_device.py | 2 +- test/eval_device_diff_test.sh | 37 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100755 test/eval_device_diff_test.sh diff --git a/graph_net/torch/test_reference_device.py b/graph_net/torch/test_reference_device.py index 33d0ec8e4..6a28095e4 100644 --- a/graph_net/torch/test_reference_device.py +++ b/graph_net/torch/test_reference_device.py @@ -119,7 +119,7 @@ def test_multi_models(args): def main(args): assert os.path.isdir(args.model_path) # Support all torch compilers - valid_compilers = list(test_compiler.registry_backend.keys()) + valid_compilers = list(test_compiler.compiler_backend_name2class.keys()) assert ( args.compiler in valid_compilers ), f"Compiler must be one of {valid_compilers}" diff --git a/test/eval_device_diff_test.sh b/test/eval_device_diff_test.sh new file mode 100755 index 000000000..10e0ab766 --- /dev/null +++ b/test/eval_device_diff_test.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +AI4C_ROOT=$(python3 -c "import graph_net_bench; import os; print(os.path.dirname(os.path.dirname(graph_net_bench.__file__)))") +OUTPUT_PATH=/tmp/workspace_eval_device_diff_test +REFERENCE_DIR="$OUTPUT_PATH/reference" + +mkdir -p "$OUTPUT_PATH" +mkdir -p "$REFERENCE_DIR" + +MODEL_PATH="$AI4C_ROOT/samples/ultralytics/yolov3-tinyu" + +echo "==========================================" +echo "Step 1: Generate reference on device A (simulated)" +echo "==========================================" +python3 -m graph_net.torch.test_reference_device \ + --model-path "$MODEL_PATH" \ + --compiler nope \ + --device cuda \ + --warmup 1 \ + --trials 1 \ + --reference-dir "$REFERENCE_DIR" \ + 2>&1 | tee "$OUTPUT_PATH/reference.log" + +echo "" +echo "==========================================" +echo "Step 2: Compare on device B (simulated)" +echo "==========================================" +python3 -m graph_net.torch.test_target_device \ + --model-path "$MODEL_PATH" \ + --device cuda \ + --reference-dir "$REFERENCE_DIR" \ + 2>&1 | tee "$OUTPUT_PATH/target.log" + +echo "" +echo "==========================================" +echo "Test completed. Logs saved to: $OUTPUT_PATH" +echo "==========================================" \ No newline at end of file From 74b5238ef64ed5a1c08eceed44bbe0e77cc3f72c Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 15:15:45 +0800 Subject: [PATCH 15/17] reuse eval_backend_perf, eval_backend_diff instead of test_compiler in test_device --- graph_net/torch/test_reference_device.py | 96 +++++-------------- graph_net/torch/test_target_device.py | 114 +++++++++-------------- 2 files changed, 66 insertions(+), 144 deletions(-) diff --git a/graph_net/torch/test_reference_device.py b/graph_net/torch/test_reference_device.py index 6a28095e4..bb80c1e8c 100644 --- a/graph_net/torch/test_reference_device.py +++ b/graph_net/torch/test_reference_device.py @@ -1,76 +1,33 @@ import argparse -import torch import os -from pathlib import Path -from contextlib import redirect_stdout, redirect_stderr -import json import sys -import traceback +import types +from pathlib import Path from graph_net_bench import path_utils -from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net_bench.torch import test_compiler -from graph_net_bench.torch import utils, eval_backend_perf +from graph_net_bench.torch import eval_backend_perf + + +def convert_args_for_eval_backend(args): + """Convert test_reference_device args to eval_backend_perf args format.""" + return types.SimpleNamespace( + model_path=args.model_path, + output_path=args.reference_dir, + seed=args.seed, + compiler=args.compiler, + device=args.device, + op_lib=args.op_lib, + warmup=args.warmup, + trials=args.trials, + log_prompt=args.log_prompt, + backend_config=getattr(args, "config", None), + ) def test_single_model(args): - ref_log = utils.get_log_path(args.reference_dir, args.model_path) - ref_dump = utils.get_output_path(args.reference_dir, args.model_path) - print(f"Reference log path: {ref_log}", file=sys.stderr, flush=True) - print(f"Reference outputs path: {ref_dump}", file=sys.stderr, flush=True) - - with open(ref_log, "w", encoding="utf-8") as log_f: - with redirect_stdout(log_f), redirect_stderr(log_f): - compiler = test_compiler.get_compiler_backend(args) - - input_dict = test_compiler.get_input_dict(args) - model = test_compiler.get_model(args) - model.eval() - - test_compiler_util.print_with_log_prompt( - "[Config] seed:", args.seed, args.log_prompt - ) - - test_compiler_util.print_basic_config( - args, - test_compiler.get_hardward_name(args), - test_compiler.get_compile_framework_version(args), - ) - - test_compiler_util.print_with_log_prompt( - "[Config] op_lib:", args.op_lib, args.log_prompt - ) - - success = False - time_stats = {} - try: - compiled_model = compiler(model) - - def model_call(): - return compiled_model(**input_dict) - - outputs, time_stats = test_compiler.measure_performance( - model_call, args, compiler - ) - success = True - except Exception as e: - print( - f"Run model failed: {str(e)}\n{traceback.format_exc()}", - file=sys.stderr, - flush=True, - ) - - test_compiler_util.print_running_status(args, success) - if success: - torch.save(outputs, str(ref_dump)) - test_compiler_util.print_with_log_prompt( - "[Performance][eager]:", json.dumps(time_stats), args.log_prompt - ) - - with open(ref_log, "r", encoding="utf-8") as f: - content = f.read() - print(content, file=sys.stderr, flush=True) + eval_args = convert_args_for_eval_backend(args) + eval_backend_perf.eval_single_model_with_single_backend(eval_args) def test_multi_models(args): @@ -118,14 +75,9 @@ def test_multi_models(args): def main(args): assert os.path.isdir(args.model_path) - # Support all torch compilers - valid_compilers = list(test_compiler.compiler_backend_name2class.keys()) - assert ( - args.compiler in valid_compilers - ), f"Compiler must be one of {valid_compilers}" - assert args.device in ["cuda"] - - test_compiler.set_seed(random_seed=args.seed) + assert args.device in ["cuda", "cpu"] + + eval_backend_perf.set_seed(args.seed) ref_dump_dir = Path(args.reference_dir) ref_dump_dir.mkdir(parents=True, exist_ok=True) diff --git a/graph_net/torch/test_target_device.py b/graph_net/torch/test_target_device.py index cf56dee69..ee46ceee6 100644 --- a/graph_net/torch/test_target_device.py +++ b/graph_net/torch/test_target_device.py @@ -1,14 +1,13 @@ import argparse import os -import json import sys -import traceback +import types import torch from graph_net_bench import path_utils from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net_bench.torch import test_compiler, utils, eval_backend_perf +from graph_net_bench.torch import utils, eval_backend_perf, eval_backend_diff def parse_config_from_reference_log(log_path): @@ -30,84 +29,55 @@ def parse_config_from_reference_log(log_path): return config -def parse_time_stats_from_reference_log(log_path): - assert os.path.isfile( - log_path - ), f"{log_path} does not exist or is not a regular file." - - with open(log_path, "r", encoding="utf-8") as f: - lines = f.readlines() - for line in reversed(lines): - if "[Performance][eager]" in line: - start = line.find("{") - end = line.rfind("}") - time_stats = json.loads(line[start : end + 1]) - return time_stats - - -def update_args_and_set_seed(args, model_path): +def get_ref_config_from_log(args, model_path): + """Extract config from reference log file.""" ref_log = utils.get_log_path(args.reference_dir, model_path) config = parse_config_from_reference_log(ref_log) - vars(args)["model_path"] = model_path - vars(args)["compiler"] = config.get("compiler") - vars(args)["trials"] = int(config.get("trials")) - vars(args)["warmup"] = int(config.get("warmup")) - test_compiler.set_seed(random_seed=int(config.get("seed"))) - return args - - -def test_single_model(args): - compiler = test_compiler.get_compiler_backend(args) + return config - input_dict = test_compiler.get_input_dict(args) - model = test_compiler.get_model(args) - model.eval() - model_path = os.path.normpath(args.model_path) - test_compiler_util.print_with_log_prompt( - "[Processing]", model_path, args.log_prompt - ) - test_compiler_util.print_basic_config( - args, - test_compiler.get_hardward_name(args), - test_compiler.get_compile_framework_version(args), +def convert_args_for_eval_backend(args, output_path): + """Convert test_target_device args to eval_backend_perf args format.""" + return types.SimpleNamespace( + model_path=args.model_path, + output_path=output_path, + seed=args.seed, + compiler=args.compiler, + device=args.device, + op_lib=args.op_lib, + warmup=args.warmup, + trials=args.trials, + log_prompt=args.log_prompt, + backend_config=getattr(args, "config", None), ) - success = False - time_stats = {} - try: - compiled_model = compiler(model) - def model_call(): - return compiled_model(**input_dict) - - outputs, time_stats = test_compiler.measure_performance( - model_call, args, compiler - ) - success = True - except Exception as e: - print( - f"Run model failed: {str(e)}\n{traceback.format_exc()}", - file=sys.stderr, - flush=True, - ) +def test_single_model(args): + target_dir = "/tmp/eval_device_diff/target" - test_compiler_util.print_running_status(args, success) + ref_config = get_ref_config_from_log(args, args.model_path) + vars(args)["compiler"] = ref_config.get("compiler") + vars(args)["trials"] = int(ref_config.get("trials")) + vars(args)["warmup"] = int(ref_config.get("warmup")) + vars(args)["seed"] = int(ref_config.get("seed")) - model_name = test_compiler_util.get_model_name(args.model_path) - if test_compiler_util.get_subgraph_tag(args.model_path): - model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path) + eval_args = convert_args_for_eval_backend(args, target_dir) + eval_backend_perf.eval_single_model_with_single_backend(eval_args) ref_dump = utils.get_output_path(args.reference_dir, args.model_path) ref_out = torch.load(str(ref_dump)) - ref_log = utils.get_log_path(args.reference_dir, args.model_path) - ref_time_stats = parse_time_stats_from_reference_log(ref_log) + ref_time_stats = eval_backend_diff.parse_time_stats_from_reference_log(ref_log) - if success: - test_compiler.compare_correctness(ref_out, outputs, args) + target_dump = utils.get_output_path(target_dir, args.model_path) + target_out = torch.load(str(target_dump)) + target_log = utils.get_log_path(target_dir, args.model_path) + target_time_stats = eval_backend_diff.parse_time_stats_from_reference_log( + target_log + ) - test_compiler_util.print_times_and_speedup(args, ref_time_stats, time_stats) + eval_backend_diff.compare_correctness(ref_out, target_out, eval_args) + test_compiler_util.print_times_and_speedup(args, ref_time_stats, target_time_stats) def is_reference_log_exist(reference_dir, model_path): @@ -165,16 +135,16 @@ def main(args): if path_utils.is_single_model_dir(args.model_path): if args.op_lib == "origin": - ref_log = utils.get_log_path(args.reference_dir, args.model_path) - config = parse_config_from_reference_log(ref_log) - vars(args)["op_lib"] = config.get("op_lib") - test_compiler_util.print_with_log_prompt( - "[Config] op_lib:", args.op_lib, args.log_prompt + ref_config = get_ref_config_from_log(args, args.model_path) + vars(args)["op_lib"] = ref_config.get("op_lib") + print( + f"{args.log_prompt} [Config] op_lib: {args.op_lib}", + file=sys.stderr, + flush=True, ) else: eval_backend_perf.register_op_lib(args.op_lib) - args = update_args_and_set_seed(args, args.model_path) test_single_model(args) else: test_multi_models(args) From d8514e4a13f67aac1bcd293cbba62cd860008b21 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 16:48:12 +0800 Subject: [PATCH 16/17] move utest --- .../test/test_device_test.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/eval_device_diff_test.sh => graph_net/test/test_device_test.sh (100%) diff --git a/test/eval_device_diff_test.sh b/graph_net/test/test_device_test.sh similarity index 100% rename from test/eval_device_diff_test.sh rename to graph_net/test/test_device_test.sh From b83b6a967770a644881a6751800ef7e7dc144a28 Mon Sep 17 00:00:00 2001 From: JewelRoam <2752594773@qq.com> Date: Tue, 20 Jan 2026 16:53:13 +0800 Subject: [PATCH 17/17] minor change --- graph_net_bench/torch/eval_backend_diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py index c254eafaf..cfa171dc6 100755 --- a/graph_net_bench/torch/eval_backend_diff.py +++ b/graph_net_bench/torch/eval_backend_diff.py @@ -190,8 +190,8 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False): def eval_single_model(args): - ref_dir = "/tmp/eval_perf_diff/A" - target_dir = "/tmp/eval_perf_diff/B" + ref_dir = "/tmp/eval_perf_diff/reference" + target_dir = "/tmp/eval_perf_diff/target" ref_args = types.SimpleNamespace( model_path=args.model_path,