From 22e0318fed2a1aa7fe01d2198bf34a4d33827633 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 7 Jan 2026 04:45:10 +0000 Subject: [PATCH] Optimize find_last_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **~200x speedup** by eliminating redundant nested iterations through a fundamental algorithmic improvement. **What changed:** The original implementation uses a nested loop structure where for each node, it checks against *all* edges to verify the node isn't a source (`all(e["source"] != n["id"] for e in edges)`). This creates O(N×M) time complexity where N is the number of nodes and M is the number of edges. The optimized version pre-computes a set of all source node IDs (`sources = {e["source"] for e in edges}`), then performs simple O(1) membership checks (`n["id"] not in sources`). This reduces complexity to O(N+M). **Why it's faster:** 1. **Set lookups are O(1)**: Python sets use hash tables, making membership checks nearly instantaneous regardless of set size 2. **Single edge traversal**: Edges are only iterated once to build the set, not repeatedly for each node 3. **Eliminates nested iteration**: The original's nested generator expressions created quadratic behavior that became catastrophic with larger graphs **Performance characteristics from tests:** - **Small graphs (2-5 nodes)**: 30-100% faster - modest gains due to set construction overhead - **Linear chains (500-1000 nodes)**: 15,000-32,000% faster - dramatic improvement as the quadratic bottleneck is eliminated - **Dense graphs**: 5,800% faster on 100-node dense graph - particularly benefits from avoiding repeated edge scans - **Empty edges**: Slight overhead (9% slower) due to set construction, but negligible in absolute terms (microseconds) The optimization is universally beneficial for any workflow calling this function with non-trivial graph sizes, particularly in scenarios involving graph traversal, flow analysis, or dependency resolution where graphs commonly have hundreds or thousands of nodes. --- src/algorithms/graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/algorithms/graph.py b/src/algorithms/graph.py index 777ea3b..f23d356 100644 --- a/src/algorithms/graph.py +++ b/src/algorithms/graph.py @@ -47,7 +47,8 @@ def find_shortest_path(self, start: str, end: str) -> list[str]: def find_last_node(nodes, edges): """This function receives a flow and returns the last node.""" - return next((n for n in nodes if all(e["source"] != n["id"] for e in edges)), None) + sources = {e["source"] for e in edges} + return next((n for n in nodes if n["id"] not in sources), None) def find_leaf_nodes(nodes: list[dict], edges: list[dict]) -> list[dict]: