From 30f8cb2669ff4e3239a8e77903919b178a23be76 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Thu, 15 May 2025 14:33:24 -0400 Subject: [PATCH] fix(parser): only keep final traceback for each failing test The Tempest report parser was updated to correctly extract the final traceback when multiple tracebacks are present in a single log entry. Previously, the parser would capture the first traceback encountered. This could lead to large amount of inputs that our model can't handle. For now let's just focus on the last traceback that is found for each test. --- src/rca_accelerator_chatbot/api.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/rca_accelerator_chatbot/api.py b/src/rca_accelerator_chatbot/api.py index ccc9710..b7b5fe8 100644 --- a/src/rca_accelerator_chatbot/api.py +++ b/src/rca_accelerator_chatbot/api.py @@ -175,8 +175,9 @@ def _extract_test_name(test_name_part: str) -> str: return test_name -async def fetch_and_parse_tempest_report(url: str) -> List[Dict[str, str]]: - """Fetches and parses the Tempest HTML report to extract test names and tracebacks.""" +async def fetch_and_parse_tempest_report(url: str) -> List[Dict[str, str]]: # pylint: disable=too-many-locals + """Fetches and parses the Tempest HTML report to extract test names + and the last traceback for each failed test.""" async with httpx.AsyncClient(verify=False, follow_redirects=True) as client: try: response = await client.get(url, auth=HTTPSPNEGOAuth(mutual_authentication=OPTIONAL)) @@ -202,14 +203,20 @@ async def fetch_and_parse_tempest_report(url: str) -> List[Dict[str, str]]: test_name_part = row_text[:traceback_start_index].strip() test_name = _extract_test_name(test_name_part) - traceback_text = row_text[traceback_start_index:] - end_marker_index = traceback_text.find("}}}") - if end_marker_index != -1: - traceback_text = traceback_text[:end_marker_index].strip() - else: - traceback_text = traceback_text.strip() - - results.append({"test_name": test_name, "traceback": traceback_text}) + tb_marker = "Traceback (most recent call last):" + traceback_pattern = re.compile( + # Match from one tb_marker to the next (non-greedy), or to end of string + f"{re.escape(tb_marker)}.*?(?={re.escape(tb_marker)}|$)", + re.DOTALL + ) + + traceback_parts = traceback_pattern.findall(row_text[traceback_start_index:]) + if traceback_parts: + last_traceback = traceback_parts[-1].strip() + end_marker_index = last_traceback.find("}}}") + if end_marker_index != -1: + last_traceback = last_traceback[:end_marker_index].strip() + results.append({"test_name": test_name, "traceback": last_traceback}) if not results: pass