Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
efc31a5
use_all_inputs only enabled for the first subgraph
lixinqi Jan 15, 2026
156a461
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 15, 2026
e7ad469
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 15, 2026
7ba84ed
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 15, 2026
c09d3be
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 15, 2026
1fa6b25
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 16, 2026
cef146a
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 16, 2026
d622e77
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 17, 2026
6841ebf
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 17, 2026
1592aa0
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 17, 2026
d618c28
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 17, 2026
b1adb24
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 17, 2026
3fb65e6
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 19, 2026
c36b325
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 19, 2026
b2285b7
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 19, 2026
7109bb2
Merge branch 'develop' of github.com:PaddlePaddle/GraphNet into develop
lixinqi Jan 19, 2026
53397eb
op_lib_evaluator
lixinqi Jan 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions graph_net/fault_locator/torch/op_lib_evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import sys
import subprocess
from pathlib import Path
from graph_net.declare_config_mixin import DeclareConfigMixin


class OpLibEvaluator(DeclareConfigMixin):
"""
Functor responsible for evaluating model samples by comparing a target operator
library's (e.g., FlagGems) performance and accuracy against a reference implementation.
The evaluator manages reference data generation and captures execution logs.
"""

def __init__(self, config=None):
self.init_config(config)

def declare_config(
self,
model_path_prefix: str,
output_dir: str,
op_lib: str,
device: str = "cuda",
compiler: str = "nope",
):
"""
Configuration schema for operator library benchmarking.
The reference_data directory is automatically managed within the output_dir.
"""
pass

def __call__(self, rel_model_path: str) -> str:
"""
Orchestrates the pipeline for reference data generation and target library testing.

Returns:
The complete log content from the target device test execution.
"""
output_path = Path(self.config["output_dir"])
# Create an isolated workspace for the current model sample
workspace = output_path / rel_model_path
workspace.mkdir(parents=True, exist_ok=True)

# Determine the shared directory for reference ground truth
reference_dir = output_path / "reference_data"
reference_dir.mkdir(parents=True, exist_ok=True)

# Construct the absolute model path
full_model_path = Path(self.config["model_path_prefix"]) / rel_model_path
log_file = workspace / "op_lib_validation.log"

# 1. Execute reference test to establish baseline metrics
self._run_reference_test(full_model_path, reference_dir)

# 2. Execute target library test and capture performance/accuracy logs
return self._run_target_test(full_model_path, reference_dir, log_file)

def _run_reference_test(self, full_model_path: Path, reference_dir: Path):
"""
Invokes the reference device test module to generate ground truth data.
"""
cmd = [
sys.executable,
"-m",
"graph_net.torch.test_reference_device",
"--model-path",
str(full_model_path),
"--reference-dir",
str(reference_dir),
"--compiler",
self.config["compiler"],
"--device",
self.config["device"],
]
# Reference tests are executed synchronously; output is captured but not returned
subprocess.run(cmd, check=True, capture_output=True, text=True)

def _run_target_test(
self, full_model_path: Path, reference_dir: Path, log_file: Path
) -> str:
"""
Invokes the target device test module for the specified op_lib and merges
stdout/stderr into the local log file.
"""
cmd = [
sys.executable,
"-m",
"graph_net.torch.test_target_device",
"--model-path",
str(full_model_path),
"--reference-dir",
str(reference_dir),
"--device",
self.config["device"],
"--op-lib",
self.config["op_lib"],
]

print(" ".join(cmd))
# Redirect all output to the log file for persistence and analysis
with log_file.open("w") as f:
subprocess.run(cmd, stdout=f, stderr=subprocess.STDOUT, check=True)

return log_file.read_text()
40 changes: 40 additions & 0 deletions graph_net/test/flaggems_fault_bisearcher_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# Resolve the root directory of the project
GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))")

# Test Environment Setup
MODEL_LIST="$GRAPH_NET_ROOT/graph_net/test/small10_torch_samples_list.txt"
MODEL_PREFIX="$GRAPH_NET_ROOT"
OUTPUT_DIR="/tmp/workspace_auto_fault_bisearcher"

# Execute the SamplePass via the standard CLI entry point
python3 -m graph_net.apply_sample_pass \
--model-path-list "$MODEL_LIST" \
--sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/sample_pass/auto_fault_bisearcher.py" \
--sample-pass-class-name AutoFaultBisearcher \
--sample-pass-config $(base64 -w 0 <<EOF
{
"model_path_prefix": "$MODEL_PREFIX",
"output_dir": "$OUTPUT_DIR",
"output_file_name": "truncate_size_has_fault.txt",

"truncator_config": {
"model_path_prefix": "$MODEL_PREFIX",
"output_dir": "$OUTPUT_DIR/workspace_truncator/"
},

"evaluator_file_path": "$GRAPH_NET_ROOT/graph_net/fault_locator/torch/op_lib_evaluator.py",
"evaluator_class_name": "OpLibEvaluator",
"evaluator_config": {
"model_path_prefix": "$OUTPUT_DIR/workspace_truncator/",
"output_dir": "$OUTPUT_DIR/op_lib_evaluator",
"op_lib": "flaggems",
"compiler": "nope",
"device": "cuda"
},

"tolerance": 0
}
EOF
)