From a83b4faf7ba50972c79042bb7bc5a59b8b36a124 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Tue, 13 Jan 2026 13:43:56 -0800 Subject: [PATCH 1/3] Add option to ignore router cache --- Fuser/auto_agent.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/Fuser/auto_agent.py b/Fuser/auto_agent.py index dadaa8f..ac74293 100644 --- a/Fuser/auto_agent.py +++ b/Fuser/auto_agent.py @@ -330,6 +330,7 @@ def __init__( dispatch_jobs: int = 2, allow_fallback: bool = True, target_platform: str | None = None, + use_router_cache: bool = True, ) -> None: self.ka_model = ka_model self.ka_num_workers = ka_num_workers @@ -352,6 +353,7 @@ def __init__( self.dispatch_jobs = dispatch_jobs self.allow_fallback = allow_fallback self.platform_config = get_platform(target_platform) + self.use_router_cache = use_router_cache def _solve_with_kernelagent(self, problem_code: str) -> RouteResult: agent = TritonKernelAgent( @@ -469,7 +471,7 @@ def solve(self, problem_path: Path) -> RouteResult: route_conf: float | None = None route_cfg: dict[str, Any] = {} - if isinstance(cached, dict): + if self.use_router_cache and isinstance(cached, dict): strategy = ( str(cached.get("route_strategy") or cached.get("route") or "") or None ) @@ -483,11 +485,12 @@ def solve(self, problem_path: Path) -> RouteResult: problem_path, code, cx ) # Persist in cache for future runs - cache[code_hash] = info.get("parsed") or { - "route_strategy": strategy, - "confidence": route_conf, - } - _save_router_cache(cache) + if self.use_router_cache: + cache[code_hash] = info.get("parsed") or { + "route_strategy": strategy, + "confidence": route_conf, + } + _save_router_cache(cache) except Exception: # No provider or failure; fall back later pass @@ -704,6 +707,11 @@ def main(argv: list[str] | None = None) -> int: p.add_argument("--verify", action="store_true") p.add_argument("--dispatch-jobs", type=int, default=2) p.add_argument("--no-fallback", action="store_true") + p.add_argument( + "--no-router-cache", + action="store_true", + help="Disable router cache (do not read from or write to cache)", + ) p.add_argument( "--target-platform", default="cuda", @@ -741,6 +749,7 @@ def main(argv: list[str] | None = None) -> int: dispatch_jobs=args.dispatch_jobs, allow_fallback=(not args.no_fallback), target_platform=args.target_platform, + use_router_cache=(not args.no_router_cache), ) try: From f7f521b599b4e1de70ab3420953399f73f6165b6 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Tue, 13 Jan 2026 16:20:36 -0800 Subject: [PATCH 2/3] Wrap load into conditional --- Fuser/auto_agent.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Fuser/auto_agent.py b/Fuser/auto_agent.py index ac74293..35b3fbe 100644 --- a/Fuser/auto_agent.py +++ b/Fuser/auto_agent.py @@ -463,20 +463,23 @@ def solve(self, problem_path: Path) -> RouteResult: heuristic_prefers_fuser = cx.route_to_fuser() # Cache lookup by content hash to avoid repeated router calls + cache = {} code_hash = _file_sha256_text(code) - cache = _load_router_cache() - cached = cache.get(code_hash) - strategy: str | None = None route_conf: float | None = None route_cfg: dict[str, Any] = {} - if self.use_router_cache and isinstance(cached, dict): - strategy = ( - str(cached.get("route_strategy") or cached.get("route") or "") or None - ) - route_conf = cached.get("confidence") - route_cfg = cached.get("config") or {} + if self.use_router_cache: + cache = _load_router_cache() + cached = cache.get(code_hash) + + if isinstance(cached, dict): + strategy = ( + str(cached.get("route_strategy") or cached.get("route") or "") + or None + ) + route_conf = cached.get("confidence") + route_cfg = cached.get("config") or {} if strategy is None: # Try LLM-driven decision From 18ff4929918d1adacf51d4dfe279f0adcfafe63a Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Fri, 16 Jan 2026 11:19:57 -0800 Subject: [PATCH 3/3] Update README --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d39fb4b..26302a5 100644 --- a/README.md +++ b/README.md @@ -91,8 +91,10 @@ More knobs live in `triton_kernel_agent/agent.py` and `Fuser/config.py`. ```bash python -m Fuser.auto_agent \ --problem /abs/path/to/KernelBench/level1/19_ReLU.py \ - --verify # ensure final composition test runs + --no-router-cache \ # avoid caching or using cached results + --verify # ensure final composition test runs ``` + `--no-router-cache` can be enabled to avoid utilizing any cached router results and prevent writing to the cache. - **Manually run the pipeline (extract → dispatch → compose)** when you want explicit control over models or concurrency: ```bash @@ -144,7 +146,7 @@ More knobs live in `triton_kernel_agent/agent.py` and `Fuser/config.py`. ## Component Details -- **AutoRouter (`Fuser/auto_agent.py`)**: parses the problem’s AST, looks for attention blocks, transposed convolutions, control flow, and long op chains. It caches decisions under `.fuse/router_cache.json` and can fall back to the other path if the first attempt fails. +- **AutoRouter (`Fuser/auto_agent.py`)**: parses the problem’s AST, looks for attention blocks, transposed convolutions, control flow, and long op chains. It caches decisions under `.fuse/router_cache.json` and can fall back to the other path if the first attempt fails. Use `--no-router-cache` ignore the existing cache and caching new routes. - **Fuser Orchestrator (`Fuser/orchestrator.py`)**: rewrites the PyTorch module into fusable modules, executes them for validation, and packages a tarball of the fused code. Run IDs and directories are managed via `Fuser/paths.py`.