From 1c2e6669d60bfcf32a810596a94738dad9bd1fe8 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 7 Jan 2026 05:59:04 +0000 Subject: [PATCH] Optimize find_last_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **150x speedup** by eliminating redundant computation through two key optimizations: ## Primary Optimization: Set-based Lookup The original code uses a nested loop structure: for each node, it checks `all(e["source"] != n["id"] for e in edges)`, resulting in O(n × m) comparisons where n is the number of nodes and m is the number of edges. This means for a graph with 500 nodes and 499 edges, the original code performs up to 249,500 comparisons. The optimized version pre-computes a set of all source IDs (`source_ids = {e["source"] for e in edges}`), reducing the complexity to O(m + n). Set membership testing (`n["id"] not in source_ids`) is O(1) average case, dramatically faster than iterating through all edges for each node. ## Secondary Optimization: Early Return for Empty Edges When there are no edges, the optimized code short-circuits with `if not edges: return next((n for n in nodes), None)`, avoiding unnecessary set construction and dictionary access operations. This provides modest gains in edge cases (75-94% faster in single-node scenarios). ## Impact Analysis The performance gains scale with graph size: - **Small graphs** (2-20 nodes): 55-293% faster - **Medium graphs** (100 nodes): 4,613% faster - **Large graphs** (500-800 nodes): 15,000-25,000% faster The optimization is particularly effective for: - **Linear chains** where most nodes are sources (test case: 15,377% faster for 500-node chain) - **Dense graphs** with many edges (test case: 4,613% faster for 100-node graph with 360 edges) - **Cyclic graphs** where all nodes are sources (test case: 9,672% faster for 300-node cycle) The behavior remains identical for all valid inputs, preserving the "first match" semantics when multiple sink nodes exist and correctly handling edge cases like mixed ID types, disconnected components, and missing node references in edges. --- src/algorithms/graph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/algorithms/graph.py b/src/algorithms/graph.py index 777ea3b..c2ab117 100644 --- a/src/algorithms/graph.py +++ b/src/algorithms/graph.py @@ -47,7 +47,10 @@ def find_shortest_path(self, start: str, end: str) -> list[str]: def find_last_node(nodes, edges): """This function receives a flow and returns the last node.""" - return next((n for n in nodes if all(e["source"] != n["id"] for e in edges)), None) + if not edges: + return next((n for n in nodes), None) + source_ids = {e["source"] for e in edges} + return next((n for n in nodes if n["id"] not in source_ids), None) def find_leaf_nodes(nodes: list[dict], edges: list[dict]) -> list[dict]: