Profilist · Profilist · Sep 14, 2025 · Sep 14, 2025 · Sep 14, 2025
diff --git a/backend/agents/prompts.py b/backend/agents/prompts.py
@@ -7,7 +7,7 @@
 
 def build_agent_instructions(tests: List[Dict], suite: Dict) -> str:
 	"""Build optimized instructions for autonomous agent testing with concise STEP emissions."""
-	base_url = os.getenv("DEPLOYMENT_URL", "https://staging.example.com")
+	base_url = os.getenv("DEPLOYMENT_URL", "https://www.larris.me/")
 
 	# Create concise, UI-friendly testing instructions
 	instructions = f"""
@@ -22,7 +22,7 @@ def build_agent_instructions(tests: List[Dict], suite: Dict) -> str:
 - Major steps are human-meaningful actions: navigation, key clicks that change pages, form submissions, and verifications.
 - Exclude micro steps (mouse moves, small scrolls, key-by-key typing) unless they are the core action.
 - Exclude continuations from the last step (ex. "Continuing to scroll").
-- Keep each step under 7 words when possible.
+- Keep each step as simple as possible, under 7 words ideally.
 - After completing each test scenario, output exactly one line: "RESULT: PASSED" or "RESULT: FAILED".
 
 TESTING APPROACH:
@@ -46,7 +46,7 @@ def build_agent_instructions(tests: List[Dict], suite: Dict) -> str:
 TESTING GUIDELINES:
 - Be thorough, but keep communication to STEP lines only
 - Take screenshots at key moments for your own reasoning, but do not describe them
-- Test both happy paths and edge cases
+- YOU ARE ON AN EXTREME TIME CRUNCH, test as EFFICIENTLY as possible, which could mean forming a conclusion PASS/FAIL faster instead of trying over and over again.
 - Prioritize actions that meaningfully change app state or page
 
 FINAL VERDICT FORMAT (MANDATORY):

diff --git a/backend/agents/record.py b/backend/agents/record.py
@@ -213,15 +213,3 @@ def status():
         except Exception:
             running = False
     return {"ok": True, "running": running, "path": data.get("path"), "pid": pid, "fps": data.get("fps")}
-
-
-def _pid_alive(pid):
-    # Kept for compatibility if imported directly; not used by venv_exec paths
-    import os as _os
-    try:
-        _os.kill(pid, 0)
-        return True
-    except Exception:
-        return False
-
-
diff --git a/backend/agents/runner.py b/backend/agents/runner.py
@@ -32,14 +32,14 @@ class RunStatus(Enum):
 async def run_single_agent(spec: Dict[str, Any]) -> Dict[str, Any]:
     print(f"SPEC: {spec}")
     # Setup CUA agent
-    model = spec.get("model") or os.getenv("CUA_MODEL", "claude-sonnet-4-20250514")
+    model = spec.get("model") or os.getenv("CUA_MODEL", "claude-opus-4-1-20250805") # claude-sonnet-4-20250514
     budget = spec.get("budget", 5.0)
     suite_id = spec.get("suite_id")
 
     # Setup CUA computer
     os_type = "linux"
     provider_type = "cloud"
-    container_name = spec.get("container_name") or os.getenv("CUA_CONTAINER_NAME")
+    container_name = spec.get("container_name")
     api_key = os.getenv("CUA_API_KEY")
     if not api_key:
         raise RuntimeError("CUA_API_KEY is required")
@@ -276,6 +276,19 @@ async def run_suites_for_result(result_id: int) -> Dict[str, Any]:
                 "run_status": RunStatus.FAILED.value,
                 "error": "No suites found for result"
             }
+
+        # Assign containers per suite (CUA_CONTAINER_1..4)
+        container_envs: List[str] = []
+        for i in range(1, 5):
+            val = os.getenv(f"CUA_CONTAINER_{i}")
+            if val:
+                container_envs.append(val)
+        if not container_envs:
+            raise RuntimeError("No CUA_CONTAINER_[1-4] variables configured")
+
+        for idx, spec in enumerate(specs):
+            assigned = container_envs[idx] if idx < len(container_envs) else container_envs[-1]
+            spec["container_name"] = assigned
 
         # Run each suite's tests concurrently
         tasks = [run_single_agent(spec) for spec in specs]

diff --git a/backend/cicd/DEPLOYMENT.md b/backend/cicd/DEPLOYMENT.md
diff --git a/backend/cicd/qai-pipeline.js b/backend/cicd/qai-pipeline.js
@@ -57,7 +57,13 @@ CHANGES: ${diff}
 
 Generate focused test scenarios for autonomous agents.
 
-For EACH scenario, also include a concise but rich summary (1-3 sentences) that gives the agent business context and the precise objective of the test. The summary should read like: "On <deployment url or app>, you are testing <feature or flow>; in this test, you <core action and intent> to validate <expected behavior/validation>".`
+Constraints and guidance:
+- Use at most 4 distinct test suites. Choose categories that best partition the behaviors changed by this PR (ex. Authentication, Navigation, New About Page, etc.).
+- For EACH suite, prefer 2–3 high-value tests when meaningful, ideally E2E tests that a human would miss (think edge cases, race conditions, etc.). Aim for a total of ~6–10 scenarios overall, balancing coverage and noise.
+- Do NOT create trivial or duplicative scenarios. Avoid superficial variations (e.g., same flow with only a color change). Deduplicate aggressively.
+- If there is truly only one meaningful area to test, produce at least 2 complementary tests for that same persona (e.g., happy path vs clear edge/error path) rather than only one total scenario.
+
+For EACH scenario, also include a concise but rich summary (1–3 sentences) that gives the agent context and the precise objective to carry out the test efficiently. The summary could read like: "On <deployment url>, you are testing <feature or flow>; in this test, you <core action and intent> to validate <expected behavior/validation>".`
       }],
       response_format: {
         type: "json_schema",
@@ -87,7 +93,11 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
       }
     });
 
-    const parsedScenarios = completion.choices[0].message.parsed.scenarios;
+    let parsedScenarios = completion.choices[0].message.parsed.scenarios;
+    // Hard cap to 4 suites (personas) to match available containers
+    if (Array.isArray(parsedScenarios) && parsedScenarios.length > 4) {
+      parsedScenarios = parsedScenarios.slice(0, 4);
+    }
     const deploymentUrl = process.env.DEPLOYMENT_URL || 'the app';
     const scenarios = parsedScenarios.map(s => ({
       ...s,
@@ -191,10 +201,13 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
         groups[persona].push(scenario);
         return groups;
       }, {});
+      // Enforce max 4 suites (personas)
+      const limitedPersonas = Object.keys(personaGroups).slice(0, 4);
 
       // Create suite records (one per persona/agent)
       this.suiteIds = {};
-      for (const [persona, personaScenarios] of Object.entries(personaGroups)) {
+      for (const persona of limitedPersonas) {
+        const personaScenarios = personaGroups[persona];
         const suiteRecord = {
           result_id: this.resultId, // Foreign key to results table
           name: `${persona} Agent Suite`