Skip to content
Merged

test #22

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions backend/agents/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,12 @@ async def get_suite_with_tests(suite_id: int) -> Optional[Dict[str, Any]]:
for test in tests:
formatted_tests.append({
'name': test.get('name', 'Untitled Test'),
'instructions': test.get('summary', '').split('\n') if test.get('summary') else [{'role': 'user', 'content': 'Verify that the browser is open.'}]
'instructions': [
{
'role': 'user',
'content': (test.get('summary') or 'Verify that the browser is open.')
}
]
})

return {
Expand Down Expand Up @@ -176,7 +181,12 @@ async def get_suites_with_tests_for_result(result_id: int) -> List[Dict[str, Any
for t in tests:
formatted_tests.append({
'name': t.get('name', 'Untitled Test'),
'instructions': t.get('summary', '').split('\n') if t.get('summary') else [{'role': 'user', 'content': 'Verify that the browser is open.'}],
'instructions': [
{
'role': 'user',
'content': (t.get('summary') or 'Verify that the browser is open.')
}
],
})
specs.append({
'suite_id': suite_id,
Expand Down
7 changes: 6 additions & 1 deletion backend/agents/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class RunStatus(Enum):
load_dotenv()

async def run_single_agent(spec: Dict[str, Any]) -> Dict[str, Any]:
print(f"SPEC: {spec}")
# Setup CUA agent
model = spec.get("model") or os.getenv("CUA_MODEL", "claude-sonnet-4-20250514")
budget = spec.get("budget", 5.0)
Expand All @@ -47,7 +48,8 @@ async def run_single_agent(spec: Dict[str, Any]) -> Dict[str, Any]:

# Setup tests
tests = normalize_tests(spec)

print(f"TESTS: {tests}")

async def _execute() -> Dict[str, Any]:
# Results from all tests from the suite
suite_results: List[Dict[str, Any]] = []
Expand Down Expand Up @@ -77,6 +79,7 @@ async def _execute() -> Dict[str, Any]:
pass

for test in tests:
print(f"TEST: {test}")
test_name = test.get("name", "test")
test_instructions = test.get("instructions") or []

Expand All @@ -96,7 +99,9 @@ async def _execute() -> Dict[str, Any]:
print(f"[Agent {suite_id}] recording start failed for {test_name}: {_e}")

try:
print(f"TEST INSTRUCTIONS: {test_instructions}")
async for result in agent.run(test_instructions):
print(f"RESULT: {result}")
for item in result.get("output", []):
# Add agent's current condensed steps
test_agent_steps = process_item(item, suite_id, test_agent_steps)
Expand Down
2 changes: 1 addition & 1 deletion backend/cicd/DEPLOYMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Add these in your repo settings → Secrets and variables → Actions:
2. **`QAI_ENDPOINT`** - URL where your agent testing endpoint will be hosted
3. **`GITHUB_TOKEN`** - ✅ Automatically provided by GitHub Actions

## Optional Secrets
## (Optional) Secrets

- **`DEPLOYMENT_URL`** - Override the staging URL if needed
- **`AGENT_TIMEOUT`** - Custom timeout in milliseconds (default: 300000)
Expand Down
2 changes: 0 additions & 2 deletions backend/cicd/qai-pipeline.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
try {
// Use QAI API endpoint instead of running agents locally
console.log(`🤖 Running tests through QAI API endpoint...`);
console.log("QAI_ENDPOINT:", process.env.QAI_ENDPOINT);

// Check if QAI_ENDPOINT is configured
if (!process.env.QAI_ENDPOINT) {
Expand All @@ -136,7 +135,6 @@ For EACH scenario, also include a concise but rich summary (1-3 sentences) that
headers: { 'Content-Type': 'application/json' }
}
);
console.log("Response:", response);

if (response.data?.status !== 'success') {
throw new Error(`API returned non-success status: ${response.data?.status || 'unknown'}`);
Expand Down
Loading