From 1b39ca5d52d4144141313451fda9e83f3cbe0333 Mon Sep 17 00:00:00 2001
From: Daniel Prevoznik <danny@onkernel.com>
Date: Sun, 21 Dec 2025 09:18:05 -0500
Subject: [PATCH 1/3] Update Gemini cua template to reflect stagehand v3

> Updated to stagehand v3 sdk conventions
> Added ability to specify starting url + instructions
> Removed openai api key requirement that was in stagehand v2 version
---
 .../gemini-computer-use/.env.example          |  1 -
 .../typescript/gemini-computer-use/index.ts   | 56 +++++++++----------
 2 files changed, 28 insertions(+), 29 deletions(-)
diff --git a/pkg/templates/typescript/gemini-computer-use/.env.example b/pkg/templates/typescript/gemini-computer-use/.env.example
index a2973a1..0bb1de3 100644
--- a/pkg/templates/typescript/gemini-computer-use/.env.example
+++ b/pkg/templates/typescript/gemini-computer-use/.env.example
@@ -1,3 +1,2 @@
 # Copy this file to .env and fill in your API keys
 GOOGLE_API_KEY=your_google_api_key_here
-OPENAI_API_KEY=your_openai_api_key_here
diff --git a/pkg/templates/typescript/gemini-computer-use/index.ts b/pkg/templates/typescript/gemini-computer-use/index.ts
index 2d6997f..eee4015 100644
--- a/pkg/templates/typescript/gemini-computer-use/index.ts
+++ b/pkg/templates/typescript/gemini-computer-use/index.ts
@@ -7,29 +7,32 @@ const kernel = new Kernel({
 
 const app = kernel.app('ts-gemini-cua');
 
+interface CuaTaskInput {
+  startingUrl?: string;
+  instruction?: string;
+}
+
 interface SearchQueryOutput {
   success: boolean;
   result: string;
   error?: string;
 }
 
-// API Keys for LLM providers
+// API Key for LLM provider
 // - GOOGLE_API_KEY: Required for Gemini 2.5 Computer Use Agent
-// - OPENAI_API_KEY: Required for Stagehand's GPT-4o model
 // Set via environment variables or `kernel deploy <filename> --env-file .env`
 // See https://docs.onkernel.com/launch/deploy#environment-variables
 const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY;
-const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
-
-if (!OPENAI_API_KEY) {
-  throw new Error('OPENAI_API_KEY is not set');
-}
 
 if (!GOOGLE_API_KEY) {
   throw new Error('GOOGLE_API_KEY is not set');
 }
 
-async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutput> {
+async function runStagehandTask(
+  invocationId?: string,
+  startingUrl: string = "https://www.magnitasks.com/",
+  instruction: string = "Click the Tasks option in the left-side bar, and move the 5 items in the 'To Do' and 'In Progress' items to the 'Done' section of the Kanban board? You are done successfully when the items are moved."
+): Promise<SearchQueryOutput> {
   // Executes a Computer Use Agent (CUA) task using Gemini 2.5 and Stagehand
 
   const browserOptions = {
@@ -49,11 +52,7 @@ async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutpu
   const stagehand = new Stagehand({
     env: "LOCAL",
     verbose: 1,
-    domSettleTimeoutMs: 30_000,
-    modelName: "gpt-4o",
-    modelClientOptions: {
-      apiKey: OPENAI_API_KEY
-    },
+    domSettleTimeout: 30_000,
     localBrowserLaunchOptions: {
       cdpUrl: kernelBrowser.cdp_ws_url
     }
@@ -64,24 +63,21 @@ async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutpu
   // Your Stagehand implementation here
   /////////////////////////////////////
   try {
-    const page = stagehand.page;
+    const page = stagehand.context.pages()[0];
 
     const agent = stagehand.agent({
-      provider: "google",
-      model: "gemini-2.5-computer-use-preview-10-2025",
-      instructions: `You are a helpful assistant that can use a web browser.
+      cua: true,
+      model: {
+        modelName: "google/gemini-2.5-computer-use-preview-10-2025",
+        apiKey: GOOGLE_API_KEY,
+      },
+      systemPrompt: `You are a helpful assistant that can use a web browser.
       You are currently on the following page: ${page.url()}.
       Do not ask follow up questions, the user will trust your judgement.`,
-      options: {
-        apiKey: GOOGLE_API_KEY,
-      }
     });
 
-    // Navigate to YCombinator's website
-    await page.goto("https://www.ycombinator.com/companies");
-
-    // Define the instructions for the CUA agent
-    const instruction = "Find Kernel's company page on the YCombinator website and write a blog post about their product offering.";
+    // Navigate to the starting website
+    await page.goto(startingUrl);
 
     // Execute the instruction
     const result = await agent.execute({
@@ -105,10 +101,14 @@ async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutpu
 
 // Register Kernel action handler for remote invocation
 // Invoked via: kernel invoke ts-gemini-cua gemini-cua-task
-app.action<void, SearchQueryOutput>(
+app.action<CuaTaskInput, SearchQueryOutput>(
   'gemini-cua-task',
-  async (ctx: KernelContext): Promise<SearchQueryOutput> => {
-    return runStagehandTask(ctx.invocation_id);
+  async (ctx: KernelContext, payload?: CuaTaskInput): Promise<SearchQueryOutput> => {
+    return runStagehandTask(
+      ctx.invocation_id,
+      payload?.startingUrl,
+      payload?.instruction
+    );
   },
 );
 

From af7a754260c9be18384efaefbf246fc2093947f7 Mon Sep 17 00:00:00 2001
From: Daniel Prevoznik <danny@onkernel.com>
Date: Sun, 21 Dec 2025 09:18:52 -0500
Subject: [PATCH 2/3] Update readmes

Update readmes for the general app and also the specific template to reflect the new situation and also to provide some initial insight into how you can use other model providers.
---
 README.md                                     |  2 +-
 .../typescript/gemini-computer-use/README.md  | 32 ++++++++++++++++---
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 22bcf5d..4d73dc3 100644
--- a/README.md
+++ b/README.md
@@ -121,7 +121,7 @@ Create an API key from the [Kernel dashboard](https://dashboard.onkernel.com).
   - `browser-use` - Template with Browser Use SDK (Python only)
   - `anthropic-computer-use` - Anthropic Computer Use prompt loop
   - `openai-computer-use` - OpenAI Computer Use Agent sample
-  - `gemini-computer-use` - Gemini Computer Use Agent sample (TypeScript only)
+  - `gemini-computer-use` - Implements a Gemini computer use agent (TypeScript only)
   - `openagi-computer-use` - OpenAGI Lux computer-use models (Python only)
   - `magnitude` - Magnitude framework sample (TypeScript only)
 
diff --git a/pkg/templates/typescript/gemini-computer-use/README.md b/pkg/templates/typescript/gemini-computer-use/README.md
index e6ae943..bce8930 100644
--- a/pkg/templates/typescript/gemini-computer-use/README.md
+++ b/pkg/templates/typescript/gemini-computer-use/README.md
@@ -4,14 +4,13 @@ A Kernel application that demonstrates Computer Use Agent (CUA) capabilities usi
 
 ## What It Does
 
-This app uses [Gemini 2.5's computer use model](https://blog.google/technology/google-deepmind/gemini-computer-use-model/) capabilities to autonomously navigate websites and complete tasks. The example task searches for Kernel's company page on YCombinator and writes a blog post about their product.
+This app uses [Gemini 2.5's computer use model](https://blog.google/technology/google-deepmind/gemini-computer-use-model/) capabilities to autonomously navigate websites and complete tasks. The agent can interact with web pages just like a human would - clicking, typing, scrolling, and extracting information.
 
 ## Setup
 
 1. **Add your API keys as environment variables:**
    - `KERNEL_API_KEY` - Get from [Kernel dashboard](https://dashboard.onkernel.com/sign-in)
    - `GOOGLE_API_KEY` - Get from [Google AI Studio](https://aistudio.google.com/apikey)
-   - `OPENAI_API_KEY` - Get from [OpenAI platform](https://platform.openai.com/api-keys)
 
 ## Running Locally
 
@@ -25,9 +24,10 @@ This runs the agent without a Kernel invocation context and provides the browser
 
 ## Deploying to Kernel
 
-1. **Deploy the application:**
+1. **Copy the example env file, add your API keys, and deploy:**
    ```bash
-   kernel deploy index.ts --env GOOGLE_API_KEY=XXX --env OPENAI_API_KEY=XXX
+   cp .example.env .env
+   kernel deploy index.ts --env-file .env
    ```
 
 2. **Invoke the action:**
@@ -37,6 +37,30 @@ This runs the agent without a Kernel invocation context and provides the browser
 
 The action creates a Kernel-managed browser and associates it with the invocation for tracking and monitoring.
 
+## Alternative Model Providers
+
+Stagehand's CUA agent supports multiple model providers. You can switch from Gemini to OpenAI or Anthropic by changing the model configuration in `index.ts` and redeploying your Kernel app:
+
+**OpenAI Computer Use:**
+```typescript
+model: {
+    modelName: "openai/computer-use-preview",
+    apiKey: process.env.OPENAI_API_KEY
+}
+```
+
+**Anthropic Claude Sonnet:**
+```typescript
+model: {
+    modelName: "anthropic/claude-sonnet-4-20250514",
+    apiKey: process.env.ANTHROPIC_API_KEY
+}
+```
+
+When using alternative providers, make sure to:
+1. Add the corresponding API key to your environment variables
+2. Update the deploy command to include the new API key (e.g., `--env OPENAI_API_KEY=XXX`)
+
 ## Documentation
 
 - [Kernel Documentation](https://docs.onkernel.com/quickstart)

From 5678806a9348c97137a308392f32aa197fd7ca04 Mon Sep 17 00:00:00 2001
From: Daniel Prevoznik <danny@onkernel.com>
Date: Sun, 21 Dec 2025 09:46:31 -0500
Subject: [PATCH 3/3] Update qa.md

Updated qa.md with the changes made to the gemini computer use template
---
 .cursor/commands/qa.md | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/.cursor/commands/qa.md b/.cursor/commands/qa.md
index 92df8d6..6bed171 100644
--- a/.cursor/commands/qa.md
+++ b/.cursor/commands/qa.md
@@ -56,7 +56,7 @@ Here are all valid language + template combinations:
 | typescript | anthropic-computer-use | ts-anthropic-cua  | ts-anthropic-cua      | Yes            | ANTHROPIC_API_KEY              |
 | typescript | magnitude              | ts-magnitude      | ts-magnitude          | Yes            | ANTHROPIC_API_KEY              |
 | typescript | openai-computer-use    | ts-openai-cua     | ts-openai-cua         | Yes            | OPENAI_API_KEY                 |
-| typescript | gemini-computer-use    | ts-gemini-cua     | ts-gemini-cua         | Yes            | GOOGLE_API_KEY, OPENAI_API_KEY |
+| typescript | gemini-computer-use    | ts-gemini-cua     | ts-gemini-cua         | Yes            | GOOGLE_API_KEY                 |
 | python     | sample-app             | py-sample-app     | python-basic          | No             | -                              |
 | python     | captcha-solver         | py-captcha-solver | python-captcha-solver | No             | -                              |
 | python     | browser-use            | py-browser-use    | python-bu             | Yes            | OPENAI_API_KEY                 |
@@ -154,14 +154,11 @@ echo "OPENAI_API_KEY=<value from human>" > .env
 cd ..
 ```
 
-**ts-gemini-cua** (needs GOOGLE_API_KEY and OPENAI_API_KEY):
+**ts-gemini-cua** (needs GOOGLE_API_KEY):
 
 ```bash
 cd ts-gemini-cua
-cat > .env << EOF
-GOOGLE_API_KEY=<value from human>
-OPENAI_API_KEY=<value from human>
-EOF
+echo "GOOGLE_API_KEY=<value from human>" > .env
 ../bin/kernel deploy index.ts --env-file .env
 cd ..
 ```
@@ -214,7 +211,7 @@ kernel invoke ts-stagehand teamsize-task --payload '{"company": "Kernel"}'
 kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'
 kernel invoke ts-magnitude mag-url-extract --payload '{"url": "https://en.wikipedia.org/wiki/Special:Random"}'
 kernel invoke ts-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
-kernel invoke ts-gemini-cua gemini-cua-task
+kernel invoke ts-gemini-cua gemini-cua-task --payload '{"startingUrl": "https://www.magnitasks.com/", "instruction": "Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board? You are done successfully when the items are moved."}'
 
 # Python apps
 kernel invoke python-basic get-page-title --payload '{"url": "https://www.google.com"}'
@@ -232,8 +229,6 @@ kernel invoke python-openagi-cua openagi-default-task -p '{"instruction": "Navig
 If the human agrees, invoke each template and collect results. Present findings in this format:
 
 ### Testing Guidelines
-
-- **Timeout:** Cancel each invocation after 90 seconds if it has not completed. Mark the status as `TIMEOUT` in the results table.
 - **Parallel execution:** You may run multiple invocations in parallel to speed up testing.
 - **Error handling:** Capture any runtime errors and include them in the Notes column.
 
@@ -258,7 +253,6 @@ If the human agrees, invoke each template and collect results. Present findings
 Status values:
 - **SUCCESS**: App started and returned a result
 - **FAILED**: App encountered a runtime error
-- **TIMEOUT**: App did not complete within 90 seconds (cancelled)
 
 Notes should include brief error messages for failures or confirmation of successful output.