From 43bd0d975719d1348888d285b7a7358930c5c8ef Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Sat, 27 Dec 2025 00:21:17 +0000 Subject: [PATCH] Optimize find_last_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **228x speedup** (from 78.6ms to 344μs) by eliminating a quadratic nested loop pattern. **Key optimization:** - **Original approach**: For each node, iterates through *all* edges to check if the node's ID appears as a source — O(N×M) complexity where N is the number of nodes and M is the number of edges. - **Optimized approach**: Pre-builds a set of all source IDs in one pass, then performs O(1) membership checks for each node — O(N+M) complexity. **Why this matters:** Python's `set` lookup using hash tables is extremely fast (O(1) average case), while the original `all()` with a generator expression must scan through every edge for each node. The performance gap widens dramatically as the graph grows: - **Large chain graph (1000 nodes)**: 18.5ms → 56.8μs (324x faster) - **Large cycle graph (1000 nodes)**: 18.4ms → 55.1μs (333x faster) **Test case performance patterns:** - Small graphs (2-3 nodes): 30-91% faster — modest gains due to overhead of set creation - Medium graphs (10-100 nodes): Consistent 50-80% improvements - Large graphs (1000+ nodes): 300-330x speedups — the optimization truly shines here - Empty/single node cases: Slight overhead (~10% slower) due to set creation, but negligible in absolute terms (nanoseconds) **Impact considerations:** The optimization is most beneficial when: 1. The function is called repeatedly in workflows involving large graphs 2. Graph processing happens in performance-critical paths (data pipelines, real-time graph analysis) 3. The edge count is substantial relative to node count The tradeoff is minimal: a tiny overhead for empty graphs (which complete in <1μs anyway) in exchange for massive gains on any non-trivial graph structure. --- src/algorithms/graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/algorithms/graph.py b/src/algorithms/graph.py index 777ea3b..156485a 100644 --- a/src/algorithms/graph.py +++ b/src/algorithms/graph.py @@ -47,7 +47,8 @@ def find_shortest_path(self, start: str, end: str) -> list[str]: def find_last_node(nodes, edges): """This function receives a flow and returns the last node.""" - return next((n for n in nodes if all(e["source"] != n["id"] for e in edges)), None) + source_ids = {e["source"] for e in edges} + return next((n for n in nodes if n["id"] not in source_ids), None) def find_leaf_nodes(nodes: list[dict], edges: list[dict]) -> list[dict]: