From d0741cc79926987a06013166de9856a8d9dba7ac Mon Sep 17 00:00:00 2001
From: Larris Xie <larris.xie@gmail.com>
Date: Sun, 14 Sep 2025 01:19:24 -0400
Subject: [PATCH 1/2] test

---
 backend/cicd/DEPLOYMENT.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cicd/DEPLOYMENT.md b/backend/cicd/DEPLOYMENT.md
index 9587e85..851056e 100644
--- a/backend/cicd/DEPLOYMENT.md
+++ b/backend/cicd/DEPLOYMENT.md
@@ -12,7 +12,7 @@ Add these in your repo settings → Secrets and variables → Actions:
 2. **`QAI_ENDPOINT`** - URL where your agent testing endpoint will be hosted
 3. **`GITHUB_TOKEN`** - ✅ Automatically provided by GitHub Actions
 
-## Optional Secrets
+## (Optional) Secrets
 
 - **`DEPLOYMENT_URL`** - Override the staging URL if needed
 - **`AGENT_TIMEOUT`** - Custom timeout in milliseconds (default: 300000)

From 9308c02e8a0862fb9edcd92e78b5c6d11a36be1a Mon Sep 17 00:00:00 2001
From: Larris Xie <larris.xie@gmail.com>
Date: Sun, 14 Sep 2025 01:36:20 -0400
Subject: [PATCH 2/2] Fixes instruction format

---
 backend/agents/database.py   | 14 ++++++++++++--
 backend/agents/runner.py     |  7 ++++++-
 backend/cicd/qai-pipeline.js |  2 --
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/backend/agents/database.py b/backend/agents/database.py
index f80a422..1ea1896 100644
--- a/backend/agents/database.py
+++ b/backend/agents/database.py
@@ -125,7 +125,12 @@ async def get_suite_with_tests(suite_id: int) -> Optional[Dict[str, Any]]:
 		for test in tests:
 			formatted_tests.append({
 				'name': test.get('name', 'Untitled Test'),
-				'instructions': test.get('summary', '').split('\n') if test.get('summary') else [{'role': 'user', 'content': 'Verify that the browser is open.'}]
+				'instructions': [
+					{
+						'role': 'user',
+						'content': (test.get('summary') or 'Verify that the browser is open.')
+					}
+				]
 			})
 		
 		return {
@@ -176,7 +181,12 @@ async def get_suites_with_tests_for_result(result_id: int) -> List[Dict[str, Any
 			for t in tests:
 				formatted_tests.append({
 					'name': t.get('name', 'Untitled Test'),
-					'instructions': t.get('summary', '').split('\n') if t.get('summary') else [{'role': 'user', 'content': 'Verify that the browser is open.'}],
+					'instructions': [
+						{
+							'role': 'user',
+							'content': (t.get('summary') or 'Verify that the browser is open.')
+						}
+					],
 				})
 			specs.append({
 				'suite_id': suite_id,
diff --git a/backend/agents/runner.py b/backend/agents/runner.py
index 08aa969..ebec2bd 100644
--- a/backend/agents/runner.py
+++ b/backend/agents/runner.py
@@ -30,6 +30,7 @@ class RunStatus(Enum):
 load_dotenv()
 
 async def run_single_agent(spec: Dict[str, Any]) -> Dict[str, Any]:
+    print(f"SPEC: {spec}")
     # Setup CUA agent
     model = spec.get("model") or os.getenv("CUA_MODEL", "claude-sonnet-4-20250514")
     budget = spec.get("budget", 5.0)
@@ -47,7 +48,8 @@ async def run_single_agent(spec: Dict[str, Any]) -> Dict[str, Any]:
     
     # Setup tests
     tests = normalize_tests(spec)
-
+    print(f"TESTS: {tests}")
+    
     async def _execute() -> Dict[str, Any]:
         # Results from all tests from the suite
         suite_results: List[Dict[str, Any]] = []
@@ -77,6 +79,7 @@ async def _execute() -> Dict[str, Any]:
                 pass
 
             for test in tests:
+                print(f"TEST: {test}")
                 test_name = test.get("name", "test")
                 test_instructions = test.get("instructions") or []
                 
@@ -96,7 +99,9 @@ async def _execute() -> Dict[str, Any]:
                     print(f"[Agent {suite_id}] recording start failed for {test_name}: {_e}")
                     
                 try:
+                    print(f"TEST INSTRUCTIONS: {test_instructions}")
                     async for result in agent.run(test_instructions):
+                        print(f"RESULT: {result}")
                         for item in result.get("output", []):
                             # Add agent's current condensed steps
                             test_agent_steps = process_item(item, suite_id, test_agent_steps)
diff --git a/backend/cicd/qai-pipeline.js b/backend/cicd/qai-pipeline.js
index 21d9d03..d1efe64 100644
--- a/backend/cicd/qai-pipeline.js
+++ b/backend/cicd/qai-pipeline.js
@@ -113,7 +113,6 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
     try {
       // Use QAI API endpoint instead of running agents locally
         console.log(`🤖 Running tests through QAI API endpoint...`);
-        console.log("QAI_ENDPOINT:", process.env.QAI_ENDPOINT);
       
       // Check if QAI_ENDPOINT is configured
       if (!process.env.QAI_ENDPOINT) {
@@ -136,7 +135,6 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
           headers: { 'Content-Type': 'application/json' }
         }
       );
-      console.log("Response:", response);
 
       if (response.data?.status !== 'success') {
         throw new Error(`API returned non-success status: ${response.data?.status || 'unknown'}`);