diff --git a/backend/agents/prompts.py b/backend/agents/prompts.py
index a601a21..8ebf50f 100644
--- a/backend/agents/prompts.py
+++ b/backend/agents/prompts.py
@@ -7,7 +7,7 @@
 
 def build_agent_instructions(tests: List[Dict], suite: Dict) -> str:
 	"""Build optimized instructions for autonomous agent testing with concise STEP emissions."""
-	base_url = os.getenv("DEPLOYMENT_URL", "https://staging.example.com")
+	base_url = os.getenv("DEPLOYMENT_URL", "https://www.larris.me/")
 
 	# Create concise, UI-friendly testing instructions
 	instructions = f"""
@@ -22,7 +22,7 @@ def build_agent_instructions(tests: List[Dict], suite: Dict) -> str:
 - Major steps are human-meaningful actions: navigation, key clicks that change pages, form submissions, and verifications.
 - Exclude micro steps (mouse moves, small scrolls, key-by-key typing) unless they are the core action.
 - Exclude continuations from the last step (ex. "Continuing to scroll").
-- Keep each step under 7 words when possible.
+- Keep each step as simple as possible, under 7 words ideally.
 - After completing each test scenario, output exactly one line: "RESULT: PASSED" or "RESULT: FAILED".
 
 TESTING APPROACH:
@@ -46,7 +46,7 @@ def build_agent_instructions(tests: List[Dict], suite: Dict) -> str:
 TESTING GUIDELINES:
 - Be thorough, but keep communication to STEP lines only
 - Take screenshots at key moments for your own reasoning, but do not describe them
-- Test both happy paths and edge cases
+- YOU ARE ON AN EXTREME TIME CRUNCH, test as EFFICIENTLY as possible, which could mean forming a conclusion PASS/FAIL faster instead of trying over and over again.
 - Prioritize actions that meaningfully change app state or page
 
 FINAL VERDICT FORMAT (MANDATORY):
diff --git a/backend/agents/record.py b/backend/agents/record.py
index 1562d4d..62521db 100644
--- a/backend/agents/record.py
+++ b/backend/agents/record.py
@@ -213,15 +213,3 @@ def status():
         except Exception:
             running = False
     return {"ok": True, "running": running, "path": data.get("path"), "pid": pid, "fps": data.get("fps")}
-
-
-def _pid_alive(pid):
-    # Kept for compatibility if imported directly; not used by venv_exec paths
-    import os as _os
-    try:
-        _os.kill(pid, 0)
-        return True
-    except Exception:
-        return False
-
-
diff --git a/backend/agents/runner.py b/backend/agents/runner.py
index ebec2bd..0a37d58 100644
--- a/backend/agents/runner.py
+++ b/backend/agents/runner.py
@@ -32,14 +32,14 @@ class RunStatus(Enum):
 async def run_single_agent(spec: Dict[str, Any]) -> Dict[str, Any]:
     print(f"SPEC: {spec}")
     # Setup CUA agent
-    model = spec.get("model") or os.getenv("CUA_MODEL", "claude-sonnet-4-20250514")
+    model = spec.get("model") or os.getenv("CUA_MODEL", "claude-opus-4-1-20250805") # claude-sonnet-4-20250514
     budget = spec.get("budget", 5.0)
     suite_id = spec.get("suite_id")
     
     # Setup CUA computer
     os_type = "linux"
     provider_type = "cloud"
-    container_name = spec.get("container_name") or os.getenv("CUA_CONTAINER_NAME")
+    container_name = spec.get("container_name")
     api_key = os.getenv("CUA_API_KEY")
     if not api_key:
         raise RuntimeError("CUA_API_KEY is required")
@@ -276,6 +276,19 @@ async def run_suites_for_result(result_id: int) -> Dict[str, Any]:
                 "run_status": RunStatus.FAILED.value,
                 "error": "No suites found for result"
             }
+            
+        # Assign containers per suite (CUA_CONTAINER_1..4)
+        container_envs: List[str] = []
+        for i in range(1, 5):
+            val = os.getenv(f"CUA_CONTAINER_{i}")
+            if val:
+                container_envs.append(val)
+        if not container_envs:
+            raise RuntimeError("No CUA_CONTAINER_[1-4] variables configured")
+
+        for idx, spec in enumerate(specs):
+            assigned = container_envs[idx] if idx < len(container_envs) else container_envs[-1]
+            spec["container_name"] = assigned
 
         # Run each suite's tests concurrently
         tasks = [run_single_agent(spec) for spec in specs]
diff --git a/backend/cicd/DEPLOYMENT.md b/backend/cicd/DEPLOYMENT.md
deleted file mode 100644
index 851056e..0000000
--- a/backend/cicd/DEPLOYMENT.md
+++ /dev/null
@@ -1,102 +0,0 @@
-# QAI CI/CD Deployment Checklist
-
-## ✅ Ready to Test Once You Add Secrets
-
-Yes! The system is fully testable with a GitHub repo + PR once you add the required secrets.
-
-## Required GitHub Secrets
-
-Add these in your repo settings → Secrets and variables → Actions:
-
-1. **`OPENAI_API_KEY`** - Your OpenAI API key (starts with `sk-`)
-2. **`QAI_ENDPOINT`** - URL where your agent testing endpoint will be hosted
-3. **`GITHUB_TOKEN`** - ✅ Automatically provided by GitHub Actions
-
-## (Optional) Secrets
-
-- **`DEPLOYMENT_URL`** - Override the staging URL if needed
-- **`AGENT_TIMEOUT`** - Custom timeout in milliseconds (default: 300000)
-
-## Testing Locally
-
-1. Copy secrets to `.env`:
-```bash
-cd backend/cicd
-cp .env.example .env
-# Edit .env with your actual values
-```
-
-2. Install dependencies:
-```bash
-npm install
-```
-
-3. Test individual scripts:
-```bash
-# Requires a PR context (set GITHUB_REPOSITORY, etc.)
-node analyze-pr.js
-
-# Requires test-scenarios.json to exist
-node run-tests.js
-
-# Requires PR context
-node update-summary.js
-```
-
-## Integration Requirements
-
-### Agent Endpoint Format
-Your teammate's agent endpoint should:
-
-**Accept POST requests with:**
-```json
-{
-  "url": "https://staging-url.com",
-  "scenarios": [
-    {
-      "description": "Test login form",
-      "priority": "high",
-      "type": "ui", 
-      "persona": "new_user",
-      "steps": ["Navigate to login", "Enter credentials", "Submit"]
-    }
-  ],
-  "timeout": 300000
-}
-```
-
-**Return response:**
-```json
-[
-  {
-    "scenario": { /* original scenario object */ },
-    "success": true,
-    "error": null,
-    "video_url": "https://s3.../recording.mp4",
-    "duration": 12500
-  }
-]
-```
-
-## How to Test
-
-1. **Create a test repo** with this CI/CD setup
-2. **Add the GitHub secrets** 
-3. **Create a PR** with some code changes
-4. **Watch the action run** in Actions tab
-5. **Check outputs** in the action logs
-
-The workflow will:
-- ✅ Analyze your PR changes
-- ✅ Generate relevant test scenarios using LLM
-- ✅ Upload scenarios to database and call QAI API endpoint
-- ✅ Pass/fail the CI based on test results from the API
-- ✅ Update codebase summary if tests pass
-
-**Note:** The pipeline now calls the QAI API endpoint (`/run-suite`) instead of running agents locally in GitHub Actions. This means GitHub Actions only needs the `QAI_ENDPOINT` URL and doesn't require `CUA_API_KEY` or other agent-specific credentials.
-
-## Structured Output Benefits
-
-✅ **No JSON parsing failures** - Uses OpenAI's structured output with schema validation
-✅ **Guaranteed format** - Schema enforces required fields and types  
-✅ **Type safety** - Enum values for priority/type prevent invalid data
\ No newline at end of file
diff --git a/backend/cicd/qai-pipeline.js b/backend/cicd/qai-pipeline.js
index d1efe64..64a72de 100644
--- a/backend/cicd/qai-pipeline.js
+++ b/backend/cicd/qai-pipeline.js
@@ -57,7 +57,13 @@ CHANGES: ${diff}
 
 Generate focused test scenarios for autonomous agents.
 
-For EACH scenario, also include a concise but rich summary (1-3 sentences) that gives the agent business context and the precise objective of the test. The summary should read like: "On <deployment url or app>, you are testing <feature or flow>; in this test, you <core action and intent> to validate <expected behavior/validation>".`
+Constraints and guidance:
+- Use at most 4 distinct test suites. Choose categories that best partition the behaviors changed by this PR (ex. Authentication, Navigation, New About Page, etc.).
+- For EACH suite, prefer 2–3 high-value tests when meaningful, ideally E2E tests that a human would miss (think edge cases, race conditions, etc.). Aim for a total of ~6–10 scenarios overall, balancing coverage and noise.
+- Do NOT create trivial or duplicative scenarios. Avoid superficial variations (e.g., same flow with only a color change). Deduplicate aggressively.
+- If there is truly only one meaningful area to test, produce at least 2 complementary tests for that same persona (e.g., happy path vs clear edge/error path) rather than only one total scenario.
+
+For EACH scenario, also include a concise but rich summary (1–3 sentences) that gives the agent context and the precise objective to carry out the test efficiently. The summary could read like: "On <deployment url>, you are testing <feature or flow>; in this test, you <core action and intent> to validate <expected behavior/validation>".`
       }],
       response_format: {
         type: "json_schema",
@@ -87,7 +93,11 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
       }
     });
 
-    const parsedScenarios = completion.choices[0].message.parsed.scenarios;
+    let parsedScenarios = completion.choices[0].message.parsed.scenarios;
+    // Hard cap to 4 suites (personas) to match available containers
+    if (Array.isArray(parsedScenarios) && parsedScenarios.length > 4) {
+      parsedScenarios = parsedScenarios.slice(0, 4);
+    }
     const deploymentUrl = process.env.DEPLOYMENT_URL || 'the app';
     const scenarios = parsedScenarios.map(s => ({
       ...s,
@@ -191,10 +201,13 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
         groups[persona].push(scenario);
         return groups;
       }, {});
+      // Enforce max 4 suites (personas)
+      const limitedPersonas = Object.keys(personaGroups).slice(0, 4);
 
       // Create suite records (one per persona/agent)
       this.suiteIds = {};
-      for (const [persona, personaScenarios] of Object.entries(personaGroups)) {
+      for (const persona of limitedPersonas) {
+        const personaScenarios = personaGroups[persona];
         const suiteRecord = {
           result_id: this.resultId, // Foreign key to results table
           name: `${persona} Agent Suite`