From c73d09e0923e21b0f11484bbe553d630ac623890 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 30 Dec 2025 08:23:56 +0000 Subject: [PATCH] Optimize find_last_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **137x speedup** by replacing an O(N×M) nested loop with an O(M+N) set-based lookup approach. **Key optimization:** The original code uses a nested iteration pattern: ```python all(e["source"] != n["id"] for e in edges) ``` For each node, this checks ALL edges to verify none have that node as a source. With N nodes and M edges, this creates N×M comparisons in the worst case. The optimized code pre-computes a set of source node IDs: ```python sources = {e["source"] for e in edges} return next((n for n in nodes if n["id"] not in sources), None) ``` This builds the source set once (O(M)) and then performs O(1) set membership checks for each node (O(N)), resulting in O(M+N) total complexity. **Why this matters:** The speedup is dramatic on larger graphs. Test results show: - **Large linear chain (1000 nodes):** 18.4ms → 58.4μs (**314x faster**) - **Large cycle (1000 nodes):** 18.3ms → 57.6μs (**316x faster**) - **Dense graph (100 nodes, 2500 edges):** 2.23ms → 52.8μs (**42x faster**) Even small graphs benefit significantly (60-100% speedups) because set construction and lookup are highly optimized in Python, while the nested `all()` with generator expressions has per-iteration overhead. **Edge case handling:** The `if not edges:` check preserves the original behavior where an empty edge list allows returning the first node without accessing `n["id"]`, avoiding `KeyError` when nodes lack an 'id' field. This maintains backward compatibility while enabling the optimization for all normal cases. --- src/algorithms/graph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/algorithms/graph.py b/src/algorithms/graph.py index 777ea3b..b6f3097 100644 --- a/src/algorithms/graph.py +++ b/src/algorithms/graph.py @@ -47,7 +47,10 @@ def find_shortest_path(self, start: str, end: str) -> list[str]: def find_last_node(nodes, edges): """This function receives a flow and returns the last node.""" - return next((n for n in nodes if all(e["source"] != n["id"] for e in edges)), None) + if not edges: + return next((n for n in nodes), None) + sources = {e["source"] for e in edges} + return next((n for n in nodes if n["id"] not in sources), None) def find_leaf_nodes(nodes: list[dict], edges: list[dict]) -> list[dict]: