From b6caced0917baf69faaf67b3efa3b7a7fbc62f30 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Thu, 8 Jan 2026 17:04:38 -0500 Subject: [PATCH 1/9] try fix --- codeflash/benchmarking/instrument_codeflash_trace.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/codeflash/benchmarking/instrument_codeflash_trace.py b/codeflash/benchmarking/instrument_codeflash_trace.py index f54674cfa..dc6b1bbee 100644 --- a/codeflash/benchmarking/instrument_codeflash_trace.py +++ b/codeflash/benchmarking/instrument_codeflash_trace.py @@ -105,6 +105,17 @@ def add_codeflash_decorator_to_code(code: str, functions_to_optimize: list[Funct def instrument_codeflash_trace_decorator(file_to_funcs_to_optimize: dict[Path, list[FunctionToOptimize]]) -> None: """Instrument codeflash_trace decorator to functions to optimize.""" for file_path, functions_to_optimize in file_to_funcs_to_optimize.items(): + # Skip codeflash's own benchmarking and picklepatch modules to avoid circular imports + # (codeflash_trace.py imports from picklepatch, and instrumenting these would cause + # them to import codeflash_trace back, creating a circular import) + parts = file_path.parts + # Find the last "codeflash" in path (handles nested paths like .../codeflash/codeflash/...) + codeflash_indices = [i for i, p in enumerate(parts) if p == "codeflash"] + if codeflash_indices: + codeflash_idx = codeflash_indices[-1] # Use the last occurrence (the module, not project dir) + submodule = parts[codeflash_idx + 1] if codeflash_idx + 1 < len(parts) else None + if submodule in ("benchmarking", "picklepatch"): + continue original_code = file_path.read_text(encoding="utf-8") new_code = add_codeflash_decorator_to_code(original_code, functions_to_optimize) # Modify the code From 1d1bbbbb80d4d006b85c0255c4011fae2291ab94 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Thu, 8 Jan 2026 17:20:32 -0500 Subject: [PATCH 2/9] logging --- codeflash/benchmarking/trace_benchmarks.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py index 6de415a55..e62bf11a9 100644 --- a/codeflash/benchmarking/trace_benchmarks.py +++ b/codeflash/benchmarking/trace_benchmarks.py @@ -32,16 +32,22 @@ def trace_benchmarks_pytest( **run_args, ) if result.returncode != 0: - if "ERROR collecting" in result.stdout: + # Combine stdout and stderr for error reporting (errors often go to stderr) + combined_output = result.stdout + if result.stderr: + combined_output = combined_output + "\n" + result.stderr if combined_output else result.stderr + + if "ERROR collecting" in combined_output: # Pattern matches "===== ERRORS =====" (any number of =) and captures everything after error_pattern = r"={3,}\s*ERRORS\s*={3,}\n([\s\S]*?)(?:={3,}|$)" - match = re.search(error_pattern, result.stdout) - error_section = match.group(1) if match else result.stdout - elif "FAILURES" in result.stdout: + match = re.search(error_pattern, combined_output) + error_section = match.group(1) if match else combined_output + elif "FAILURES" in combined_output: # Pattern matches "===== FAILURES =====" (any number of =) and captures everything after error_pattern = r"={3,}\s*FAILURES\s*={3,}\n([\s\S]*?)(?:={3,}|$)" - match = re.search(error_pattern, result.stdout) - error_section = match.group(1) if match else result.stdout + match = re.search(error_pattern, combined_output) + error_section = match.group(1) if match else combined_output else: - error_section = result.stdout + error_section = combined_output logger.warning(f"Error collecting benchmarks - Pytest Exit code: {result.returncode}, {error_section}") + logger.debug(f"Full pytest output:\n{combined_output}") \ No newline at end of file From 04db08b175e7e7cab4b68ba25b1d4cdba09cdd1a Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Thu, 8 Jan 2026 17:20:46 -0500 Subject: [PATCH 3/9] debug --- codeflash/benchmarking/trace_benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py index e62bf11a9..8217ac37e 100644 --- a/codeflash/benchmarking/trace_benchmarks.py +++ b/codeflash/benchmarking/trace_benchmarks.py @@ -50,4 +50,4 @@ def trace_benchmarks_pytest( else: error_section = combined_output logger.warning(f"Error collecting benchmarks - Pytest Exit code: {result.returncode}, {error_section}") - logger.debug(f"Full pytest output:\n{combined_output}") \ No newline at end of file + logger.debug(f"Full pytest output:\n{combined_output}") From 33fc22c5a08b6901746abf4c0794e03b7c27b15f Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 9 Jan 2026 08:15:05 +0000 Subject: [PATCH 4/9] Optimize AssertCleanup.transform_asserts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimization replaces the general-purpose `_split_top_level_args()` method with a specialized `_first_top_level_arg()` method that extracts only the first argument from a comma-separated argument list. **Key Performance Improvement:** The original implementation (`_split_top_level_args()`) unnecessarily parsed *all* top-level arguments by: 1. Iterating through the entire string character by character 2. Building a list (`current`) and accumulating characters for each argument 3. Joining those characters into strings for every argument 4. Collecting all arguments into a result list 5. Returning the entire list when only `arg_parts[0]` was ever used The optimized version (`_first_top_level_arg()`) short-circuits this process by: 1. Scanning only until the first top-level comma is found 2. Immediately returning a slice of the original string (`args[:i].strip()`) 3. Avoiding list construction and string joining entirely 4. If no comma exists, returning the whole string stripped **Why This Is Faster:** String slicing in Python is O(n) but with very low constant factors (native C implementation). The original approach had higher overhead from: - Multiple list append operations (`current.append(char)`) - String joining operations (`"".join(current)`) performed for *every* argument - List construction for the result array - Multiple strip() calls on all arguments when only the first was needed **Performance Results:** The line profiler shows the critical line dropping from **19.678 ms (79.2% of function time)** to **12.388 ms (62.2% of function time)** - a **~37% speedup** on that specific operation. This translates to an overall **25% speedup** in the full code runtime (2.40ms → 1.92ms). **Test Case Analysis:** The optimization particularly benefits test cases with complex unittest assertions containing multiple arguments: - `test_unittest_with_nested_args_and_commas`: 30.6-38.2% faster - `test_unittest_args_with_kwargs_and_no_top_level_comma`: 32.6% faster - Large-scale tests with many unittest assertions: 29.1-54.5% faster - Complex multi-argument assertions: up to 125% faster Cases with simple asserts or non-matching lines see minimal change, as expected since they don't invoke the modified code path. --- codeflash/code_utils/concolic_utils.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/codeflash/code_utils/concolic_utils.py b/codeflash/code_utils/concolic_utils.py index f59cb7aab..f49bd0469 100644 --- a/codeflash/code_utils/concolic_utils.py +++ b/codeflash/code_utils/concolic_utils.py @@ -33,9 +33,9 @@ def _transform_assert_line(self, line: str) -> Optional[str]: indent, assert_method, args = unittest_match.groups() if args: - arg_parts = self._split_top_level_args(args) - if arg_parts and arg_parts[0]: - return f"{indent}{arg_parts[0]}" + arg_parts = self._first_top_level_arg(args) + if arg_parts: + return f"{indent}{arg_parts}" return None @@ -67,6 +67,17 @@ def __init__(self) -> None: self.assert_re = re.compile(r"\s*assert\s+(.*?)(?:\s*==\s*.*)?$") self.unittest_re = re.compile(r"(\s*)self\.assert([A-Za-z]+)\((.*)\)$") + def _first_top_level_arg(self, args: str) -> str: + depth = 0 + for i, ch in enumerate(args): + if ch in "([{": + depth += 1 + elif ch in ")]}": + depth -= 1 + elif ch == "," and depth == 0: + return args[:i].strip() + return args.strip() + def clean_concolic_tests(test_suite_code: str) -> str: try: From a4f42499194d46b4dee089aeff3401b5bb730da0 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 13 Jan 2026 17:50:43 -0500 Subject: [PATCH 5/9] refactor: use rpartition for path finding in instrument_codeflash_trace Replace list comprehension with string partition operations for cleaner code. --- codeflash/benchmarking/instrument_codeflash_trace.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/codeflash/benchmarking/instrument_codeflash_trace.py b/codeflash/benchmarking/instrument_codeflash_trace.py index dc6b1bbee..40a9a0390 100644 --- a/codeflash/benchmarking/instrument_codeflash_trace.py +++ b/codeflash/benchmarking/instrument_codeflash_trace.py @@ -108,12 +108,10 @@ def instrument_codeflash_trace_decorator(file_to_funcs_to_optimize: dict[Path, l # Skip codeflash's own benchmarking and picklepatch modules to avoid circular imports # (codeflash_trace.py imports from picklepatch, and instrumenting these would cause # them to import codeflash_trace back, creating a circular import) - parts = file_path.parts - # Find the last "codeflash" in path (handles nested paths like .../codeflash/codeflash/...) - codeflash_indices = [i for i, p in enumerate(parts) if p == "codeflash"] - if codeflash_indices: - codeflash_idx = codeflash_indices[-1] # Use the last occurrence (the module, not project dir) - submodule = parts[codeflash_idx + 1] if codeflash_idx + 1 < len(parts) else None + # Use rpartition to find the last "codeflash" in path (handles nested paths) + _, sep, after = file_path.as_posix().rpartition("/codeflash/") + if sep: + submodule = after.partition("/")[0] if submodule in ("benchmarking", "picklepatch"): continue original_code = file_path.read_text(encoding="utf-8") From 49a705e154697b9e7537c98a23186d632a537e67 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 13 Jan 2026 17:50:47 -0500 Subject: [PATCH 6/9] refactor: remove unused _split_top_level_args method --- codeflash/code_utils/concolic_utils.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/codeflash/code_utils/concolic_utils.py b/codeflash/code_utils/concolic_utils.py index f49bd0469..9148c6145 100644 --- a/codeflash/code_utils/concolic_utils.py +++ b/codeflash/code_utils/concolic_utils.py @@ -39,29 +39,6 @@ def _transform_assert_line(self, line: str) -> Optional[str]: return None - def _split_top_level_args(self, args_str: str) -> list[str]: - result = [] - current = [] - depth = 0 - - for char in args_str: - if char in "([{": - depth += 1 - current.append(char) - elif char in ")]}": - depth -= 1 - current.append(char) - elif char == "," and depth == 0: - result.append("".join(current).strip()) - current = [] - else: - current.append(char) - - if current: - result.append("".join(current).strip()) - - return result - def __init__(self) -> None: # Pre-compiling regular expressions for faster execution self.assert_re = re.compile(r"\s*assert\s+(.*?)(?:\s*==\s*.*)?$") From e3d36b25160136f5f3157f77f6c40783c9bd8513 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 13 Jan 2026 17:50:51 -0500 Subject: [PATCH 7/9] test: add unit tests for _first_top_level_arg --- tests/code_utils/test_concolic_utils.py | 95 +++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 tests/code_utils/test_concolic_utils.py diff --git a/tests/code_utils/test_concolic_utils.py b/tests/code_utils/test_concolic_utils.py new file mode 100644 index 000000000..2117216f2 --- /dev/null +++ b/tests/code_utils/test_concolic_utils.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import pytest + +from codeflash.code_utils.concolic_utils import AssertCleanup + + +class TestFirstTopLevelArg: + @pytest.fixture + def cleanup(self) -> AssertCleanup: + return AssertCleanup() + + def test_single_argument_no_comma(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("foo") == "foo" + + def test_single_argument_with_whitespace(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg(" foo ") == "foo" + + def test_two_simple_arguments(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("a, b") == "a" + + def test_multiple_arguments(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("x, y, z") == "x" + + def test_nested_parentheses_comma_ignored(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("func(a, b), c") == "func(a, b)" + + def test_nested_brackets_comma_ignored(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("[1, 2], x") == "[1, 2]" + + def test_nested_braces_comma_ignored(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("{a: b, c: d}, e") == "{a: b, c: d}" + + def test_deeply_nested_parentheses(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("f(g(h(i))), j") == "f(g(h(i)))" + + def test_mixed_bracket_types(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("func([{a, b}], c), d") == "func([{a, b}], c)" + + def test_empty_string(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("") == "" + + def test_only_whitespace(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg(" ") == "" + + def test_comma_at_start(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg(", a") == "" + + def test_no_top_level_comma(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("func(a, b)") == "func(a, b)" + + def test_empty_parentheses(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("(), b") == "()" + + def test_empty_brackets(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("[], b") == "[]" + + def test_empty_braces(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("{}, b") == "{}" + + def test_whitespace_around_comma(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("a , b") == "a" + + def test_complex_nested_structure(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("{'key': [1, (2, 3)]}, other") == "{'key': [1, (2, 3)]}" + + def test_string_literal_with_comma(self, cleanup: AssertCleanup) -> None: + # Note: this function doesn't handle string literals specially + # commas inside strings are treated as top-level + assert cleanup._first_top_level_arg('"a,b", c') == '"a' + + def test_unbalanced_opening_bracket(self, cleanup: AssertCleanup) -> None: + # With unbalanced opening, no top-level comma found + assert cleanup._first_top_level_arg("(a, b") == "(a, b" + + def test_multiple_consecutive_commas(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg(",,") == "" + + def test_attribute_access(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("obj.attr, b") == "obj.attr" + + def test_numeric_arguments(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("123, 456") == "123" + + def test_negative_number(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("-42, x") == "-42" + + def test_float_argument(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("3.14, x") == "3.14" + + def test_newline_in_argument(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("a\nb, c") == "a\nb" + + def test_tab_whitespace(self, cleanup: AssertCleanup) -> None: + assert cleanup._first_top_level_arg("\ta\t, b") == "a" From 0a6ece297ec4a4c107cc8f1962440e03e86ad844 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 13 Jan 2026 17:53:32 -0500 Subject: [PATCH 8/9] test: add coverage for benchmarking/picklepatch skip logic --- tests/test_instrument_codeflash_trace.py | 54 +++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/tests/test_instrument_codeflash_trace.py b/tests/test_instrument_codeflash_trace.py index 38a6381e2..94d03c1fa 100644 --- a/tests/test_instrument_codeflash_trace.py +++ b/tests/test_instrument_codeflash_trace.py @@ -544,4 +544,56 @@ def target_function(): return "Hello from target function after nested function" """ - assert modified_code.strip() == expected_code.strip() \ No newline at end of file + assert modified_code.strip() == expected_code.strip() + + +def test_instrument_codeflash_trace_skips_benchmarking_module() -> None: + """Test that files in codeflash/benchmarking/ are skipped to avoid circular imports.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create a directory structure that mimics codeflash/benchmarking/ + benchmarking_dir = Path(temp_dir) / "codeflash" / "benchmarking" + benchmarking_dir.mkdir(parents=True) + + test_file_path = benchmarking_dir / "some_module.py" + original_content = """ +def some_function(): + return "This should not be modified" +""" + test_file_path.write_text(original_content, encoding="utf-8") + + fto = FunctionToOptimize( + function_name="some_function", + file_path=test_file_path, + parents=[] + ) + + instrument_codeflash_trace_decorator({test_file_path: [fto]}) + + # File should remain unchanged + assert test_file_path.read_text(encoding="utf-8") == original_content + + +def test_instrument_codeflash_trace_skips_picklepatch_module() -> None: + """Test that files in codeflash/picklepatch/ are skipped to avoid circular imports.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create a directory structure that mimics codeflash/picklepatch/ + picklepatch_dir = Path(temp_dir) / "codeflash" / "picklepatch" + picklepatch_dir.mkdir(parents=True) + + test_file_path = picklepatch_dir / "patcher.py" + original_content = """ +def patch_function(): + return "This should not be modified" +""" + test_file_path.write_text(original_content, encoding="utf-8") + + fto = FunctionToOptimize( + function_name="patch_function", + file_path=test_file_path, + parents=[] + ) + + instrument_codeflash_trace_decorator({test_file_path: [fto]}) + + # File should remain unchanged + assert test_file_path.read_text(encoding="utf-8") == original_content \ No newline at end of file From 155c4b80c18b8411557a57c5b39232be22c3cb6a Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 13 Jan 2026 17:58:10 -0500 Subject: [PATCH 9/9] test: add tests for rpartition path matching logic --- tests/test_instrument_codeflash_trace.py | 90 +++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/tests/test_instrument_codeflash_trace.py b/tests/test_instrument_codeflash_trace.py index 94d03c1fa..b1a2ca17f 100644 --- a/tests/test_instrument_codeflash_trace.py +++ b/tests/test_instrument_codeflash_trace.py @@ -596,4 +596,92 @@ def patch_function(): instrument_codeflash_trace_decorator({test_file_path: [fto]}) # File should remain unchanged - assert test_file_path.read_text(encoding="utf-8") == original_content \ No newline at end of file + assert test_file_path.read_text(encoding="utf-8") == original_content + + +def test_instrument_codeflash_trace_nested_codeflash_path_skips_benchmarking() -> None: + """Test that nested codeflash paths like /project/codeflash/codeflash/benchmarking/ are skipped. + + The rpartition logic should find the LAST 'codeflash' in the path. + """ + with tempfile.TemporaryDirectory() as temp_dir: + # Create nested structure: project_codeflash/codeflash/benchmarking/ + nested_dir = Path(temp_dir) / "project_codeflash" / "codeflash" / "benchmarking" + nested_dir.mkdir(parents=True) + + test_file_path = nested_dir / "trace_module.py" + original_content = """ +def trace_func(): + return "Should not be modified" +""" + test_file_path.write_text(original_content, encoding="utf-8") + + fto = FunctionToOptimize( + function_name="trace_func", + file_path=test_file_path, + parents=[] + ) + + instrument_codeflash_trace_decorator({test_file_path: [fto]}) + + # File should remain unchanged because last /codeflash/ is followed by benchmarking + assert test_file_path.read_text(encoding="utf-8") == original_content + + +def test_instrument_codeflash_trace_nested_codeflash_path_instruments_other_modules() -> None: + """Test that nested codeflash paths with non-skipped modules ARE instrumented. + + The rpartition logic should allow instrumentation when the submodule is not benchmarking/picklepatch. + """ + with tempfile.TemporaryDirectory() as temp_dir: + # Create nested structure: project_codeflash/codeflash/other_module/ + nested_dir = Path(temp_dir) / "project_codeflash" / "codeflash" / "other_module" + nested_dir.mkdir(parents=True) + + test_file_path = nested_dir / "utils.py" + original_content = """ +def util_func(): + return "Should be modified" +""" + test_file_path.write_text(original_content, encoding="utf-8") + + fto = FunctionToOptimize( + function_name="util_func", + file_path=test_file_path, + parents=[] + ) + + instrument_codeflash_trace_decorator({test_file_path: [fto]}) + + # File SHOULD be modified because other_module is not in skip list + modified_content = test_file_path.read_text(encoding="utf-8") + assert "codeflash_trace" in modified_content + assert "@codeflash_trace" in modified_content + + +def test_instrument_codeflash_trace_no_codeflash_in_path() -> None: + """Test that paths without 'codeflash' directory are instrumented normally.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create a path with no 'codeflash' directory + project_dir = Path(temp_dir) / "myproject" / "src" + project_dir.mkdir(parents=True) + + test_file_path = project_dir / "main.py" + original_content = """ +def main_func(): + return "Should be modified" +""" + test_file_path.write_text(original_content, encoding="utf-8") + + fto = FunctionToOptimize( + function_name="main_func", + file_path=test_file_path, + parents=[] + ) + + instrument_codeflash_trace_decorator({test_file_path: [fto]}) + + # File SHOULD be modified + modified_content = test_file_path.read_text(encoding="utf-8") + assert "codeflash_trace" in modified_content + assert "@codeflash_trace" in modified_content \ No newline at end of file