From 4ff0c863abe5b35b4e82f38500310fa1ce0797ca Mon Sep 17 00:00:00 2001 From: Larris Xie Date: Sat, 13 Sep 2025 21:14:35 -0400 Subject: [PATCH 1/3] Small fixes --- backend/__init__.py | 3 +++ backend/agents/database.py | 4 ++-- backend/agents/run_suite.py | 45 ------------------------------------- backend/agents/runner.py | 6 +++-- backend/tests/__init__.py | 0 backend/tests/agent_test.py | 14 +++++++----- 6 files changed, 17 insertions(+), 55 deletions(-) create mode 100644 backend/__init__.py delete mode 100644 backend/agents/run_suite.py create mode 100644 backend/tests/__init__.py diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000..6e2b56f --- /dev/null +++ b/backend/__init__.py @@ -0,0 +1,3 @@ +"""Backend package initializer.""" + + diff --git a/backend/agents/database.py b/backend/agents/database.py index 2e49f3b..7a1831a 100644 --- a/backend/agents/database.py +++ b/backend/agents/database.py @@ -1,9 +1,9 @@ from typing import Dict, Any, Optional from supabase import create_client -from dotenv import load_dotenv +from dotenv import load_dotenv, find_dotenv import os -load_dotenv() +load_dotenv(find_dotenv()) SUPABASE_URL = os.getenv('SUPABASE_URL') SUPABASE_KEY = os.getenv('SUPABASE_KEY') supabase = create_client(SUPABASE_URL, SUPABASE_KEY) if SUPABASE_URL and SUPABASE_KEY else None diff --git a/backend/agents/run_suite.py b/backend/agents/run_suite.py deleted file mode 100644 index aba570e..0000000 --- a/backend/agents/run_suite.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -""" -Bridge script to run agent suites from Node.js CICD pipeline -This script is called by qai-pipeline.js to execute individual test suites -""" -import sys -import asyncio -from pathlib import Path - -# Add the agents directory to the Python path -sys.path.append(str(Path(__file__).parent)) - -from runner import run_qai_tests - -async def main(): - if len(sys.argv) != 2: - print("Usage: python run_suite.py ", file=sys.stderr) - sys.exit(1) - - try: - suite_id = int(sys.argv[1]) - print(f"🚀 Starting agent execution for suite {suite_id}") - - # Run the agent with the database-backed suite - result = await run_qai_tests(suite_id=suite_id) - - print(f"✅ Agent execution completed for suite {suite_id}") - print(f"📊 Agent status: {result['agent_result']['status']}") - - # Exit with appropriate code - if result['agent_result']['status'] == 'success': - sys.exit(0) - else: - print(f"❌ Agent failed: {result['agent_result'].get('error', 'Unknown error')}", file=sys.stderr) - sys.exit(1) - - except ValueError: - print(f"Error: Invalid suite_id '{sys.argv[1]}'. Must be an integer.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"❌ Agent execution failed: {str(e)}", file=sys.stderr) - sys.exit(1) - -if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file diff --git a/backend/agents/runner.py b/backend/agents/runner.py index 3f83c51..43cdb33 100644 --- a/backend/agents/runner.py +++ b/backend/agents/runner.py @@ -85,6 +85,8 @@ def _prepare_step_for_storage(item: Dict[str, Any]): instructions=build_agent_instructions(tests, spec), ) + await computer.venv_install("recording_venv", []) + # Open the browser before starting agent steps try: await computer.interface.left_click(536, 742) @@ -107,7 +109,7 @@ def _prepare_step_for_storage(item: Dict[str, Any]): # Start recording inside VM try: remote_dir = make_remote_recording_dir(suite_id, test_name) - await computer.venv_exec("demo_venv", start_recording, output_dir=remote_dir, fps=5) + await computer.venv_exec("recording_venv", start_recording, output_dir=remote_dir, fps=5) print(f"[Agent {suite_id}] recording started for {test_name}") except Exception as _e: print(f"[Agent {suite_id}] recording start failed for {test_name}: {_e}") @@ -145,7 +147,7 @@ def _prepare_step_for_storage(item: Dict[str, Any]): # Stop recording and get S3 URL try: - recording_stop = await computer.venv_exec("demo_venv", stop_recording) + recording_stop = await computer.venv_exec("recording_venv", stop_recording) if isinstance(recording_stop, dict): upload = recording_stop.get("upload") or {} resp = upload.get("response") or {} diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/agent_test.py b/backend/tests/agent_test.py index a948852..5df3aa0 100644 --- a/backend/tests/agent_test.py +++ b/backend/tests/agent_test.py @@ -10,9 +10,9 @@ def main() -> None: specs = [ { - "persona": "navigation", + "name": "navigation", + "suite_id": 1, "container_name": os.getenv("CUA_CONTAINER_NAME"), - "suite": "navigation", "tests": [ { "name": "admissions-header", @@ -42,9 +42,9 @@ def main() -> None: ], }, { - "persona": "events", - "container_name": os.getenv("CUA_CONTAINER_NAME2") or os.getenv("CUA_CONTAINER_NAME"), - "suite": "events", + "name": "events", + "suite_id": 2, + "container_name": os.getenv("CUA_CONTAINER_NAME2"), "tests": [ { "name": "first-event", @@ -62,7 +62,9 @@ def main() -> None: }, ] - summary = asyncio.run(run_agents(specs)) + pr_name = os.getenv("PR_NAME", "local-dev") + pr_link = os.getenv("PR_LINK", "http://localhost") + summary = asyncio.run(run_agents(specs, pr_name, pr_link)) print(summary) From 9e80ce08e33437a029ee7ace59c71d022bb908a4 Mon Sep 17 00:00:00 2001 From: Larris Xie Date: Sat, 13 Sep 2025 22:28:44 -0400 Subject: [PATCH 2/3] Optimizes prompts --- backend/agents/prompts.py | 57 ++++++++++++++++--------------------- backend/agents/runner.py | 33 +++++---------------- backend/agents/utils.py | 45 ++++++++++++++++++++++------- backend/tests/agent_test.py | 2 +- 4 files changed, 67 insertions(+), 70 deletions(-) diff --git a/backend/agents/prompts.py b/backend/agents/prompts.py index 44b54f2..a601a21 100644 --- a/backend/agents/prompts.py +++ b/backend/agents/prompts.py @@ -4,60 +4,53 @@ from dotenv import load_dotenv load_dotenv() + def build_agent_instructions(tests: List[Dict], suite: Dict) -> str: - """Build optimized instructions for autonomous agent testing""" - - base_url = os.getenv('DEPLOYMENT_URL', 'https://staging.example.com') - - # Create comprehensive testing instructions + """Build optimized instructions for autonomous agent testing with concise STEP emissions.""" + base_url = os.getenv("DEPLOYMENT_URL", "https://staging.example.com") + + # Create concise, UI-friendly testing instructions instructions = f""" You are an autonomous QA testing agent for web applications. Your goal is to thoroughly test the deployment at {base_url}. SUITE: {suite['name']} TOTAL TESTS: {len(tests)} +COMMUNICATION RULES (MANDATORY): +- Only output concise major steps in the exact format: "STEP: ". +- Use action/gerund form ending with "-ing" (e.g., "Clicking Admissions", "Navigating to home"). +- Major steps are human-meaningful actions: navigation, key clicks that change pages, form submissions, and verifications. +- Exclude micro steps (mouse moves, small scrolls, key-by-key typing) unless they are the core action. +- Exclude continuations from the last step (ex. "Continuing to scroll"). +- Keep each step under 7 words when possible. +- After completing each test scenario, output exactly one line: "RESULT: PASSED" or "RESULT: FAILED". + TESTING APPROACH: -1. Start by taking a screenshot to see the current state -2. Navigate to the base URL: {base_url} -3. Perform comprehensive exploratory testing -4. Look for bugs, broken functionality, and edge cases -5. Test user flows and interactions -6. Pay special attention to recent changes that might have introduced issues +1. Navigate to the base URL: {base_url} +2. Execute each scenario's intent +3. Verify the expected destination/state +4. Document ONLY major actions using STEP lines in gerund form SPECIFIC TEST SCENARIOS: """ - + for i, test in enumerate(tests, 1): instructions += f""" {i}. {test['name']} Description: {test.get('summary', 'No description provided')} Priority: {'HIGH' if 'critical' in test.get('summary', '').lower() else 'MEDIUM'} """ - + instructions += """ TESTING GUIDELINES: -- Be thorough and methodical in your approach -- Take screenshots at key moments to document your findings +- Be thorough, but keep communication to STEP lines only +- Take screenshots at key moments for your own reasoning, but do not describe them - Test both happy paths and edge cases -- Look for unexpected behaviors, errors, or broken functionality -- Pay attention to UI/UX issues and usability problems -- Test form submissions, navigation, and interactive elements -- Check for responsive design and mobile compatibility if applicable -- Document any bugs or issues you discover with clear descriptions - -SUCCESS CRITERIA: -- Complete testing of all specified scenarios -- Identify and document any bugs or issues found -- Verify that core functionality works as expected -- Provide clear feedback on the overall quality of the deployment - -Remember: You are looking for unexpected bugs and issues that developers might miss. Be creative in your testing approach and explore edge cases. +- Prioritize actions that meaningfully change app state or page FINAL VERDICT FORMAT (MANDATORY): -- After completing each test scenario, output exactly one line with no extra commentary: -- RESULT: PASSED (if the scenario executed successfully and no critical issues were found) -- RESULT: FAILED (if execution could not complete or a critical/blocking issue was found) +- After each scenario, output exactly one line: RESULT: PASSED or RESULT: FAILED """ - + return instructions \ No newline at end of file diff --git a/backend/agents/runner.py b/backend/agents/runner.py index 43cdb33..29cd500 100644 --- a/backend/agents/runner.py +++ b/backend/agents/runner.py @@ -15,7 +15,7 @@ set_suite_result_id, ) from .prompts import build_agent_instructions -from .utils import normalize_tests, make_remote_recording_dir, process_item +from .utils import normalize_tests, make_remote_recording_dir, process_item, extract_major_steps from .record import start_recording, stop_recording class RunStatus(Enum): @@ -29,7 +29,7 @@ class RunStatus(Enum): async def run_single_agent(spec: Dict[str, Any]) -> Dict[str, Any]: # Setup CUA agent - model = spec.get("model") or os.getenv("CUA_MODEL", "anthropic/claude-3-5-sonnet-20241022") + model = spec.get("model") or os.getenv("CUA_MODEL", "claude-sonnet-4-20250514") budget = spec.get("budget", 5.0) suite_id = spec.get("suite_id") @@ -50,26 +50,7 @@ async def _execute() -> Dict[str, Any]: # Results from all tests from the suite suite_results: List[Dict[str, Any]] = [] - def _prepare_step_for_storage(item: Dict[str, Any]): - t = item.get("type") - if t == "message": - try: - content = item.get("content") or [] - for block in content: - if isinstance(block, dict) and block.get("text"): - return block["text"] - except Exception: - return item - elif t in ("computer_call", "computer_call_output", "function_call", "function_call_output"): - pruned = dict(item) - if pruned.get("type") == "computer_call_output": - output = pruned.get("output", {}) - if isinstance(output, dict) and "image_url" in output: - output = dict(output) - output["image_url"] = "[omitted]" - pruned["output"] = output - return pruned - return item + async with Computer( os_type=os_type, @@ -117,12 +98,12 @@ def _prepare_step_for_storage(item: Dict[str, Any]): try: async for result in agent.run(test_instructions): for item in result.get("output", []): - # Add agent's current step + # Add agent's current condensed steps test_agent_steps = process_item(item, suite_id, test_agent_steps) - # Persist step immediately to DB + # Persist condensed steps immediately to DB if test_id is not None: - step_payload = _prepare_step_for_storage(item) - await append_test_step(test_id, step_payload) + for step in extract_major_steps(item): + await append_test_step(test_id, step) # Parse explicit verdict from agent message content try: if isinstance(item, dict) and item.get("type") == "message": diff --git a/backend/agents/utils.py b/backend/agents/utils.py index d954d0f..fdb23ad 100644 --- a/backend/agents/utils.py +++ b/backend/agents/utils.py @@ -37,40 +37,63 @@ def make_remote_recording_dir(suite_id: str, test_name: str) -> str: def process_item(item: dict, suite_id: str, test_agent_steps: list[dict]) -> dict: item_type = item.get("type") - + if item_type == "message": try: content = item.get("content") or [] for block in content: if isinstance(block, dict) and block.get("text"): - print(f"[Agent {suite_id}] message: {block['text']}") - test_agent_steps.append(block["text"]) + text = block["text"] + for line in str(text).splitlines(): + candidate = line.strip() + if candidate.upper().startswith("STEP:"): + step_text = candidate.split(":", 1)[1].strip() + step_text = step_text + if step_text: + print(f"[Agent {suite_id}] STEP: {step_text}") + test_agent_steps.append(step_text) except Exception: pass elif item_type in ("computer_call", "computer_call_output", "function_call", "function_call_output"): + # Keep debug output, but do not append raw tool calls to steps pruned = dict(item) if pruned.get("type") == "computer_call_output": output = pruned.get("output", {}) if isinstance(output, dict) and "image_url" in output: - output = dict(output) - output["image_url"] = "[omitted]" - pruned["output"] = output print(f"[Agent {suite_id}] computer_call_output: screenshot captured") - elif pruned.get("type") == "computer_call": action = pruned.get("action", {}) or {} a_type = action.get("type", "unknown") a_args = {k: v for k, v in action.items() if k != "type"} print(f"[Agent {suite_id}] computer_call: {a_type}({a_args})") - elif pruned.get("type") == "function_call": fname = pruned.get("name", "") print(f"[Agent {suite_id}] function_call: {fname}") - elif pruned.get("type") == "function_call_output": print(f"[Agent {suite_id}] function_call_output: received") - test_agent_steps.append(pruned) - return test_agent_steps + + +def extract_major_steps(item: dict) -> list[str]: + """Extract condensed STEP lines from a single agent output item.""" + steps: list[str] = [] + try: + if item.get("type") != "message": + return steps + content = item.get("content") or [] + for block in content: + if isinstance(block, dict) and block.get("text"): + text = block["text"] + for line in str(text).splitlines(): + candidate = line.strip() + if candidate.upper().startswith("STEP:"): + step_text = candidate.split(":", 1)[1].strip() + step_text = step_text + if step_text: + steps.append(step_text) + except Exception: + return steps + return steps + diff --git a/backend/tests/agent_test.py b/backend/tests/agent_test.py index 5df3aa0..23677d6 100644 --- a/backend/tests/agent_test.py +++ b/backend/tests/agent_test.py @@ -20,7 +20,7 @@ def main() -> None: { "role": "user", "content": ( - "Go to https://uwaterloo.ca. " + "Go to http://myproject123.io" "Click the header link labeled \"Admissions\". " "Confirm that the page you land on is related to Admissions." ), From 46408f1a63f9f7a06e237ad1578e1f31832a4811 Mon Sep 17 00:00:00 2001 From: Larris Xie Date: Sat, 13 Sep 2025 23:29:59 -0400 Subject: [PATCH 3/3] Changes --- backend/agents/database.py | 61 +++++- backend/agents/main.py | 368 +++++++++-------------------------- backend/agents/runner.py | 81 +++++++- backend/cicd/qai-pipeline.js | 85 ++------ 4 files changed, 235 insertions(+), 360 deletions(-) diff --git a/backend/agents/database.py b/backend/agents/database.py index fc7594e..f62a666 100644 --- a/backend/agents/database.py +++ b/backend/agents/database.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List from supabase import create_client from dotenv import load_dotenv, find_dotenv import os @@ -149,3 +149,62 @@ async def get_result_id_for_suite(suite_id: int) -> Optional[int]: except Exception as e: print(f"[db] ❌ get_result_id_for_suite error: {str(e)}") return None + + +async def get_suites_with_tests_for_result(result_id: int) -> List[Dict[str, Any]]: + """Fetch all suites (and their tests) for a given result_id, formatted for agent specs.""" + try: + if not _has_client(): + print(f"[db] Skipping get_suites_with_tests_for_result for result {result_id}: no client") + return [] + # Fetch suites under the result + suites_resp = supabase.table('suites').select('id,name').eq('result_id', result_id).execute() + suites = suites_resp.data or [] + specs: List[Dict[str, Any]] = [] + for s in suites: + suite_id = s.get('id') + name = s.get('name', 'Untitled Suite') + if suite_id is None: + continue + # Fetch tests for this suite + tests_resp = supabase.table('tests').select('*').eq('suite_id', suite_id).execute() + tests = tests_resp.data or [] + formatted_tests: List[Dict[str, Any]] = [] + for t in tests: + formatted_tests.append({ + 'name': t.get('name', 'Untitled Test'), + 'instructions': t.get('summary', '').split('\n') if t.get('summary') else ['Run basic test'], + }) + specs.append({ + 'suite_id': suite_id, + 'name': name, + 'tests': formatted_tests, + }) + return specs + except Exception as e: + print(f"[db] ❌ get_suites_with_tests_for_result error: {str(e)}") + return [] + + +async def get_result_basics(result_id: int) -> Optional[Dict[str, Any]]: + """Fetch basic fields from results needed to run agents (pr_name, pr-link).""" + try: + if not _has_client(): + return None + resp = supabase.table('results').select('id, pr_name, pr-link').eq('id', result_id).limit(1).execute() + if resp.data: + return resp.data[0] + return None + except Exception as e: + print(f"[db] ❌ get_result_basics error: {str(e)}") + return None + + +async def update_result_fields(result_id: int, fields: Dict[str, Any]) -> None: + """Generic update for a results row.""" + try: + if not _has_client(): + return + supabase.table('results').update(fields).eq('id', result_id).execute() + except Exception as e: + print(f"[db] ❌ update_result_fields error: {str(e)}") diff --git a/backend/agents/main.py b/backend/agents/main.py index a716318..30a451c 100644 --- a/backend/agents/main.py +++ b/backend/agents/main.py @@ -2,18 +2,11 @@ from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import List, Dict, Any, Optional -import asyncio import os from dotenv import load_dotenv -from runner import run_single_agent, run_agents, run_qai_tests +from runner import run_agents, run_qai_tests, run_suites_for_result from database import ( - get_or_create_test, - create_result, - set_suite_result_id, - get_suite_with_tests, - update_test_fields, - append_test_step, _has_client ) @@ -74,257 +67,22 @@ class MultiAgentRunRequest(BaseModel): pr_name: str pr_link: str +class RunResultRequest(BaseModel): + result_id: int + @app.get("/health") async def health_check(): """Health check endpoint""" - return { + db_connected = _has_client() + print(f"[API] Health check requested - Database connected: {db_connected}") + + response = { "status": "healthy", - "database_connected": _has_client(), + "database_connected": db_connected, "version": "1.0.0" } - -# Results endpoints (matching API.md) - -@app.post("/results") -async def create_result_endpoint(request: CreateResultRequest): - """Create a new result (PR test run)""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - # Use the database function to create result - result_id = await create_result(request.prName, request.prLink, {}, "PENDING") - if result_id is None: - raise HTTPException(status_code=500, detail="Failed to create result") - - # Return response matching API.md format - return { - "success": True, - "message": "Result created successfully", - "data": { - "id": result_id, - "pr-link": request.prLink, - "pr-name": request.prName, - "res-success": False - } - } - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to create result: {str(e)}") - -@app.patch("/results/{result_id}") -async def update_result_endpoint(result_id: int, request: UpdateResultRequest): - """Update result success status""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - response = supabase.table('results').update({ - 'res-success': request.resSuccess - }).eq('id', result_id).execute() - - if not response.data: - raise HTTPException(status_code=404, detail="Result not found") - - return { - "success": True, - "message": "Result updated successfully", - "data": response.data[0] - } - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to update result: {str(e)}") - -@app.get("/results") -async def get_all_results(): - """Get all results""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - response = supabase.table('results').select('*').order('created_at', desc=True).execute() - - return { - "success": True, - "data": response.data or [] - } - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to fetch results: {str(e)}") - -# Suite endpoints - -@app.post("/suites") -async def create_suite_endpoint(request: CreateSuiteRequest): - """Create a new test suite""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - suite_data = { - 'result_id': request.resultId, - 'name': request.name, - 'suites-success': request.suitesSuccess - } - if request.s3Link: - suite_data['s3-link'] = request.s3Link - - response = supabase.table('suites').insert([suite_data]).execute() - - if not response.data: - raise HTTPException(status_code=500, detail="Failed to create suite") - - return { - "success": True, - "message": "Suite created successfully", - "data": response.data[0] - } - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to create suite: {str(e)}") - -@app.patch("/suites/{suite_id}") -async def update_suite_endpoint(suite_id: int, request: UpdateSuiteRequest): - """Update suite success status and/or S3 link""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - update_data = {} - if request.suitesSuccess is not None: - update_data['suites-success'] = request.suitesSuccess - if request.s3Link is not None: - update_data['s3-link'] = request.s3Link - - if not update_data: - raise HTTPException(status_code=400, detail="No update data provided") - - response = supabase.table('suites').update(update_data).eq('id', suite_id).execute() - - if not response.data: - raise HTTPException(status_code=404, detail="Suite not found") - - return { - "success": True, - "message": "Suite updated successfully", - "data": response.data[0] - } - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to update suite: {str(e)}") - -@app.get("/results/{result_id}/suites") -async def get_suites_for_result(result_id: int): - """Get suites for a specific result""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - response = supabase.table('suites').select('*').eq('result_id', result_id).order('created_at', desc=True).execute() - - return { - "success": True, - "data": response.data or [] - } - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to fetch suites: {str(e)}") - -# Test endpoints - -@app.post("/tests") -async def create_test_endpoint(request: CreateTestRequest): - """Create a new individual test""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - test_data = { - 'suite_id': request.suiteId, - 'name': request.name, - 'summary': request.summary, - 'test-success': request.testSuccess - } - - response = supabase.table('tests').insert([test_data]).execute() - - if not response.data: - raise HTTPException(status_code=500, detail="Failed to create test") - - return { - "success": True, - "message": "Test created successfully", - "data": response.data[0] - } - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to create test: {str(e)}") - -@app.patch("/tests/{test_id}") -async def update_test_endpoint(test_id: int, request: UpdateTestRequest): - """Update test success status and/or summary""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - update_data = {} - if request.testSuccess is not None: - update_data['test-success'] = request.testSuccess - if request.summary is not None: - update_data['summary'] = request.summary - - if not update_data: - raise HTTPException(status_code=400, detail="No update data provided") - - response = supabase.table('tests').update(update_data).eq('id', test_id).execute() - - if not response.data: - raise HTTPException(status_code=404, detail="Test not found") - - return { - "success": True, - "message": "Test updated successfully", - "data": response.data[0] - } - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to update test: {str(e)}") - -@app.get("/suites/{suite_id}/tests") -async def get_tests_for_suite(suite_id: int): - """Get tests for a specific suite""" - try: - if not _has_client(): - raise HTTPException(status_code=500, detail="Database not configured") - - from database import supabase - - response = supabase.table('tests').select('*').eq('suite_id', suite_id).order('created_at', desc=True).execute() - - return { - "success": True, - "data": response.data or [] - } - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to fetch tests: {str(e)}") + print(f"[API] Health check response: {response}") + return response # Agent execution endpoints @@ -333,42 +91,48 @@ async def run_suite_endpoint(request: RunSuiteRequest): """ Run a test suite by ID - This is the main endpoint called by the CICD pipeline """ + suite_id = request.suite_id + print(f"[API] Starting suite execution for suite_id: {suite_id}") + print(f"[API] Request received at: {__import__('datetime').datetime.now().isoformat()}") + try: - result = await run_qai_tests(request.suite_id) + print(f"[API] Calling run_qai_tests for suite_id: {suite_id}") + result = await run_qai_tests(suite_id) + print(f"[API] run_qai_tests completed for suite_id: {suite_id}") + print(f"[API] Result status: {result.get('agent_result', {}).get('status', 'unknown')}") if result['agent_result']['status'] == 'success': - return { + print(f"[API] Suite {suite_id} executed successfully") + print(f"[API] Tests run: {result['agent_result'].get('tests_run', 0)}") + + response = { "status": "success", - "message": f"Suite {request.suite_id} executed successfully", + "message": f"Suite {suite_id} executed successfully", "data": result } + print(f"[API] Returning success response for suite_id: {suite_id}") + return response else: + error_msg = result['agent_result'].get('error', 'Unknown error') + print(f"[API] Suite execution failed for suite_id: {suite_id}") + print(f"[API] Error: {error_msg}") raise HTTPException( status_code=500, - detail=f"Suite execution failed: {result['agent_result'].get('error', 'Unknown error')}" + detail=f"Suite execution failed: {error_msg}" ) - except HTTPException: + except HTTPException as he: + print(f"[API] HTTPException raised for suite_id: {suite_id}") + print(f"[API] HTTPException detail: {he.detail}") + print(f"[API] HTTPException status_code: {he.status_code}") raise except Exception as e: + print(f"[API] Unexpected error for suite_id: {suite_id}") + print(f"[API] Exception type: {type(e).__name__}") + print(f"[API] Exception message: {str(e)}") + print(f"[API] Exception details: {e}") raise HTTPException(status_code=500, detail=f"Suite execution failed: {str(e)}") - -@app.post("/run-agent") -async def run_agent_endpoint(request: AgentRunRequest): - """Run a single agent test (legacy endpoint)""" - try: - result = await run_single_agent(request.spec) - return {"status": "success", "result": result} - except Exception as e: - raise HTTPException(status_code=500, detail=f"Agent execution failed: {str(e)}") - -@app.post("/run-agents") -async def run_agents_endpoint(request: MultiAgentRunRequest): - """Run multiple agent tests (legacy endpoint)""" - try: - result = await run_agents(request.test_specs, request.pr_name, request.pr_link) - return {"status": "success", "result": result} - except Exception as e: - raise HTTPException(status_code=500, detail=f"Agents execution failed: {str(e)}") + finally: + print(f"[API] Request completed for suite_id: {suite_id}") @app.get("/") async def root(): @@ -378,14 +142,56 @@ async def root(): "version": "1.0.0", "endpoints": { "health": "/health", - "results": "/results", - "suites": "/suites", - "tests": "/tests", - "run_suite": "/run-suite" + "run_suite": "/run-suite", + "run_result": "/run-result", + "run_agents": "/run-agents" } } + +@app.post("/run-result") +async def run_result_endpoint(request: RunResultRequest): + """Run all suites and tests for a given result_id.""" + result_id = request.result_id + print(f"[API] Starting result execution for result_id: {result_id}") + try: + summary = await run_suites_for_result(result_id) + status = summary.get("run_status") + if status == "PASSED" or status == "FAILED": + return { + "status": "success", + "message": f"Result {result_id} executed", + "data": summary, + } + return { + "status": "success", + "message": f"Result {result_id} executed", + "data": summary, + } + except Exception as e: + print(f"[API] run_result failed: {e}") + raise HTTPException(status_code=500, detail=f"Result execution failed: {str(e)}") + + +@app.post("/run-agents") +async def run_agents_endpoint(request: MultiAgentRunRequest): + """Run multiple agent suites provided directly as test_specs along with PR metadata.""" + try: + summary = await run_agents(request.test_specs, request.pr_name, request.pr_link) + return { + "status": "success", + "message": "Agents executed", + "data": summary, + } + except Exception as e: + print(f"[API] run_agents failed: {e}") + raise HTTPException(status_code=500, detail=f"Agents execution failed: {str(e)}") + # For Vercel deployment if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8000"))) \ No newline at end of file + port = int(os.getenv("PORT", "8000")) + print(f"[API] Starting QAI Agent Runner API on port {port}") + print(f"[API] Database connection status: {_has_client()}") + print(f"[API] Environment loaded from .env: {os.getenv('CUA_API_KEY', 'Not set')[:10]}...") + uvicorn.run(app, host="0.0.0.0", port=port) \ No newline at end of file diff --git a/backend/agents/runner.py b/backend/agents/runner.py index 93d7650..fd99596 100644 --- a/backend/agents/runner.py +++ b/backend/agents/runner.py @@ -13,10 +13,12 @@ update_test_fields, create_result, set_suite_result_id, + get_suites_with_tests_for_result, + update_result_fields, ) -from .prompts import build_agent_instructions -from .utils import normalize_tests, make_remote_recording_dir, process_item, extract_major_steps -from .record import start_recording, stop_recording +from prompts import build_agent_instructions +from utils import normalize_tests, make_remote_recording_dir, process_item, extract_major_steps +from record import start_recording, stop_recording class RunStatus(Enum): QUEUED = "QUEUED" @@ -97,6 +99,7 @@ async def _execute() -> Dict[str, Any]: try: async for result in agent.run(test_instructions): + print(f"WTF HEREHEREHEREHEREHEREHERE {result}") for item in result.get("output", []): # Add agent's current condensed steps test_agent_steps = process_item(item, suite_id, test_agent_steps) @@ -169,22 +172,28 @@ async def run_qai_tests(suite_id: int) -> Dict[str, Any]: try: # Fetch suite and test data from database + print(f"[run_qai_tests] Fetching suite data for suite_id: {suite_id}") suite_data = await get_suite_with_tests(suite_id) if not suite_data: + print(f"[run_qai_tests] Suite {suite_id} not found in database") return { 'agent_result': { 'status': 'failed', 'error': f'Suite {suite_id} not found' } } + print(f"[run_qai_tests] Suite data: {suite_data}") + + print(f"[run_qai_tests] Retrieved suite data: {suite_data.get('name', 'Unknown')} with {len(suite_data.get('tests', []))} tests") # Convert database format to agent spec format spec = { 'suite_id': suite_id, + 'name': suite_data.get('name'), # Add suite name for build_agent_instructions 'model': os.getenv("CUA_MODEL", "anthropic/claude-3-5-sonnet-20241022"), 'budget': 5.0, 'container_name': os.getenv("CUA_CONTAINER_NAME"), - 'tests': suite_data.get('tests', []) + 'tests': suite_data.get('tests') } # Run the agent @@ -247,3 +256,67 @@ async def run_agents(test_specs: List[Dict[str, Any]], pr_name: str, pr_link: st } print(json.dumps(summary)) return summary + + +async def run_suites_for_result(result_id: int) -> Dict[str, Any]: + """ + Fetch all suites/tests for a given result_id and run them together. + Updates the existing result row with overall summary and run_status. + """ + try: + # Load suite specs for this result + specs: List[Dict[str, Any]] = await get_suites_with_tests_for_result(result_id) + if not specs: + await update_result_fields(result_id, {"run_status": RunStatus.FAILED.value}) + return { + "result_id": result_id, + "overall_result": {"passed_tests": 0, "failed_tests": 0, "total_tests": 0}, + "run_status": RunStatus.FAILED.value, + "error": "No suites found for result" + } + + # Run each suite's tests concurrently + tasks = [run_single_agent(spec) for spec in specs] + results: List[Any] = await asyncio.gather(*tasks, return_exceptions=True) + + total_tests = 0 + passed_tests = 0 + failed_tests = 0 + for res in results: + if isinstance(res, Exception): + # Count as a failed suite with unknown test count + continue + for t in res: + total_tests += 1 + if t.get("test_success"): + passed_tests += 1 + else: + failed_tests += 1 + + run_status = RunStatus.PASSED if failed_tests == 0 and total_tests > 0 else RunStatus.FAILED + overall_result = { + "passed_tests": passed_tests, + "failed_tests": failed_tests, + "total_tests": total_tests, + } + + await update_result_fields(result_id, { + "overall_result": overall_result, + "run_status": run_status.value, + }) + + summary = { + "result_id": result_id, + "overall_result": overall_result, + "run_status": run_status.value, + } + print(json.dumps(summary)) + return summary + except Exception as e: + await update_result_fields(result_id, {"run_status": RunStatus.FAILED.value}) + return { + "result_id": result_id, + "overall_result": {"passed_tests": 0, "failed_tests": 0, "total_tests": 0}, + "run_status": RunStatus.FAILED.value, + "error": str(e), + } diff --git a/backend/cicd/qai-pipeline.js b/backend/cicd/qai-pipeline.js index d8f80db..6ed576f 100644 --- a/backend/cicd/qai-pipeline.js +++ b/backend/cicd/qai-pipeline.js @@ -114,87 +114,24 @@ Generate focused test scenarios for autonomous agents.` throw new Error('No result_id available - database upload may have failed'); } - // Get suites for this result to run agents - const { data: suites } = await this.supabase - .from('suites') - .select('id, name') - .eq('result_id', this.resultId); - - if (!suites || suites.length === 0) { - throw new Error('No suites found for this result'); - } - - console.log(`🏃 Running ${suites.length} agent suites via API...`); - - const results = []; + // Call new single-shot endpoint to run all suites for this result const agentTimeout = parseInt(process.env.AGENT_TIMEOUT || '600000'); - - for (const suite of suites) { - console.log(`🤖 Calling API for suite: ${suite.name} (ID: ${suite.id})`); - - try { - // Call the QAI API endpoint to run the suite - const response = await axios.post( - `${process.env.QAI_ENDPOINT}/run-suite`, - { suite_id: suite.id }, - { - timeout: agentTimeout + 60000, // API timeout + 1 minute buffer - headers: { 'Content-Type': 'application/json' } - } - ); - - if (response.data.status === 'success') { - console.log(`✅ Suite ${suite.id} completed successfully via API`); - results.push({ - suite_id: suite.id, - suite_name: suite.name, - success: true, - api_response: response.data - }); - } else { - throw new Error(`API returned non-success status: ${response.data.status}`); - } - - } catch (error) { - console.error(`❌ Suite ${suite.id} API call failed: ${error.message}`); - - let errorMessage = error.message; - if (error.response) { - errorMessage = `HTTP ${error.response.status}: ${error.response.data?.detail || error.response.statusText}`; - } else if (error.code === 'ECONNREFUSED') { - errorMessage = 'Cannot connect to QAI API endpoint'; - } - - results.push({ - suite_id: suite.id, - suite_name: suite.name, - success: false, - error: errorMessage - }); + console.log(`🏃 Calling /run-result for result_id=${this.resultId} ...`); + const response = await axios.post( + `${process.env.QAI_ENDPOINT}/run-result`, + { result_id: this.resultId }, + { + timeout: agentTimeout + 60000, + headers: { 'Content-Type': 'application/json' } } - } - - console.log(`✅ Completed ${results.length} agent suite API calls`); - - this.saveFile('test-results.json', results); + ); - const passed = results.filter(r => r.success).length; - const failed = results.length - passed; - - console.log(`📊 API Results: ${passed}/${results.length} suites passed`); - - if (failed > 0) { - console.log(`❌ Failed suites:`); - results.filter(r => !r.success).forEach(result => { - console.log(` • ${result.suite_name}: ${result.error || 'Failed'}`); - }); + if (response.data?.status !== 'success') { + throw new Error(`API returned non-success status: ${response.data?.status || 'unknown'}`); } - console.log(`💾 Database results updated by QAI API system`); - // Verify final database state const finalSuccess = await this.verifyFinalResults(); - console.log(`::set-output name=success::${finalSuccess}`); return finalSuccess; } catch (error) {