diff --git a/.gitignore b/.gitignore
index cc1d69b..ebcf97a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ target/
 # Agent/AI tooling
 .opencode/
 .claude/
+.agents/
 
 # Internal docs (not for public repo)
 docs/
diff --git a/Cargo.lock b/Cargo.lock
index 971c859..e8eecf7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -491,6 +491,7 @@ version = "0.0.4"
 dependencies = [
  "serde",
  "serde_json",
+ "unicode-width",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 2e89f7c..7d6705d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,6 +34,7 @@ anyhow = "1"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 regex = "1"
+unicode-width = "0.2"
 
 # System
 libc = "0.2"
diff --git a/README.md b/README.md
index 18d7dc8..c3a0b6e 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,16 @@
 <p align="center">
-  <img src="assets/pilotty.png" alt="pilotty logo" width="400">
+  <img src="assets/pilotty.png" alt="pilotty - Terminal automation CLI enabling AI agents to control TUI applications" width="400">
 </p>
 
 <h1 align="center">pilotty</h1>
 
+<p align="center">
+  <sub>The terminal equivalent of <a href="https://github.com/vercel-labs/agent-browser">agent-browser</a></sub>
+</p>
+
 <p align="center">
   <strong>Terminal automation CLI for AI agents</strong><br>
-  <em>Like <a href="https://github.com/vercel-labs/agent-browser">agent-browser</a>, but for TUI applications.</em>
+  <em>Control vim, htop, lazygit, dialog, and any TUI programmatically</em>
 </p>
 
 <p align="center">
@@ -21,16 +25,23 @@
 > [!NOTE]
 > **Built with AI, for AI.** This project was built with the support of an AI agent, planned thoroughly with a tight feedback loop and reviewed at each step. While we've tested extensively, edge cases may exist. Use in production at your own discretion, and please [report any issues](https://github.com/msmps/pilotty/issues) you find!
 
-pilotty enables AI agents to interact with terminal applications (vim, htop, lazygit, dialog, etc.) through a simple CLI interface. It manages PTY sessions, captures terminal output, and provides keyboard/mouse input capabilities for navigating TUI applications.
+pilotty enables AI agents to interact with terminal applications through a simple command-line interface. It manages pseudo-terminal (PTY) sessions with full VT100 terminal emulation, captures screen state, and provides keyboard/mouse input for navigating terminal user interfaces. Think of it as headless terminal automation for AI workflows.
 
 ## Features
 
-- **PTY Management**: Spawn and manage terminal applications in background sessions
+- **PTY (Pseudo-Terminal) Management**: Spawn and manage terminal applications in background sessions
+- **Terminal Emulation**: Full VT100 emulation for accurate screen capture and state tracking
 - **Keyboard Navigation**: Interact with TUIs using Tab, Enter, arrow keys, and key combos
 - **AI-Friendly Output**: Clean JSON responses with actionable suggestions on errors
 - **Multi-Session**: Run multiple terminal apps simultaneously in isolated sessions
 - **Zero Config**: Daemon auto-starts on first command, auto-stops after 5 minutes idle
 
+## Why pilotty?
+
+[agent-browser](https://github.com/vercel-labs/agent-browser) by Vercel Labs lets AI agents control web browsers. pilotty does the same for terminals.
+
+**Origin story:** Built to solve a personal problem, pilotty was created to enable AI agents to interact with [OpenTUI](https://github.com/anomalyco/opentui) interfaces and control [OpenCode](https://github.com/anomalyco/opencode) programmatically. If you're building TUIs or working with terminal applications, pilotty lets AI navigate them just like a human would.
+
 ## Installation
 
 ### npm (recommended)
@@ -150,11 +161,82 @@ The `snapshot` command returns structured data about the terminal screen:
   "snapshot_id": 42,
   "size": { "cols": 80, "rows": 24 },
   "cursor": { "row": 5, "col": 10, "visible": true },
-  "text": "... plain text content ..."
+  "text": "Options: [x] Enable  [ ] Debug\nActions: [OK] [Cancel]",
+  "elements": [
+    { "kind": "toggle", "row": 0, "col": 9, "width": 3, "text": "[x]", "confidence": 1.0, "checked": true },
+    { "kind": "toggle", "row": 0, "col": 22, "width": 3, "text": "[ ]", "confidence": 1.0, "checked": false },
+    { "kind": "button", "row": 1, "col": 9, "width": 4, "text": "[OK]", "confidence": 0.8 },
+    { "kind": "button", "row": 1, "col": 14, "width": 8, "text": "[Cancel]", "confidence": 0.8 }
+  ],
+  "content_hash": 12345678901234567890
 }
 ```
 
-Use the cursor position and text content to understand the screen state and navigate using keyboard commands (Tab, Enter, arrow keys) or click at specific coordinates.
+## UI Elements (Contextual)
+
+pilotty automatically detects interactive UI elements in terminal applications. Elements provide **read-only context** to help understand UI structure, with position data (row, col) for use with the click command.
+
+**Use keyboard navigation (`pilotty key Tab`, `pilotty key Enter`, `pilotty type "text"`) for reliable TUI interaction** rather than element-based actions, as UI element detection depends on visual patterns that may disappear after interaction.
+
+### Element Kinds
+
+| Kind | Detection Patterns | Confidence |
+|------|-------------------|------------|
+| **button** | Inverse video, `[OK]`, `<Cancel>` | 1.0 / 0.8 |
+| **input** | Cursor position, `____` underscores | 1.0 / 0.6 |
+| **toggle** | `[x]`, `[ ]`, `☑`, `☐` | 1.0 |
+
+### Element Fields
+
+| Field | Description |
+|-------|-------------|
+| `kind` | Element type: `button`, `input`, or `toggle` |
+| `row` | Row position (0-based) |
+| `col` | Column position (0-based) |
+| `width` | Width in terminal cells |
+| `text` | Text content of the element |
+| `confidence` | Detection confidence (0.0-1.0) |
+| `focused` | Whether element has focus (only present if true) |
+| `checked` | Toggle state (only present for toggles) |
+
+### Change Detection
+
+The `content_hash` field enables screen change detection between snapshots:
+
+```bash
+# Get initial snapshot
+SNAP1=$(pilotty snapshot)
+HASH1=$(echo "$SNAP1" | jq -r '.content_hash')
+
+# Perform some action
+pilotty key Tab
+
+# Check if screen changed
+SNAP2=$(pilotty snapshot)
+HASH2=$(echo "$SNAP2" | jq -r '.content_hash')
+
+if [ "$HASH1" != "$HASH2" ]; then
+  echo "Screen content changed"
+fi
+```
+
+### Workflow Example
+
+```bash
+# 1. Spawn a TUI with dialog elements
+pilotty spawn dialog --yesno "Continue?" 10 40
+
+# 2. Wait for dialog to render
+pilotty wait-for "Continue"
+
+# 3. Get snapshot with elements (for context)
+pilotty snapshot | jq '.elements'
+# Shows detected buttons, helps understand UI structure
+
+# 4. Navigate and interact with keyboard (reliable approach)
+pilotty key Tab      # Move to next element
+pilotty key Enter    # Activate selected element
+```
 
 ## Sessions
 
diff --git a/crates/pilotty-cli/src/daemon/server.rs b/crates/pilotty-cli/src/daemon/server.rs
index 10493dd..c0fb344 100644
--- a/crates/pilotty-cli/src/daemon/server.rs
+++ b/crates/pilotty-cli/src/daemon/server.rs
@@ -615,15 +615,18 @@ async fn handle_snapshot(
         Err(e) => return Response::error(request_id, e),
     };
 
+    let format = format.unwrap_or(SnapshotFormat::Full);
+
+    // Full format includes UI element detection
+    let with_elements = matches!(format, SnapshotFormat::Full);
+
     // Get snapshot data (drains PTY output first)
-    let snapshot = match sessions.get_snapshot_data(&session_id).await {
+    let snapshot = match sessions.get_snapshot_data(&session_id, with_elements).await {
         Ok(data) => data,
         Err(e) => return Response::error(request_id, e),
     };
     let (cursor_row, cursor_col) = snapshot.cursor_pos;
 
-    let format = format.unwrap_or(SnapshotFormat::Full);
-
     match format {
         SnapshotFormat::Text => {
             // Format as plain text with cursor indicator
@@ -637,9 +640,10 @@ async fn handle_snapshot(
                 },
             )
         }
-        SnapshotFormat::Full | SnapshotFormat::Compact => {
-            // Build ScreenState JSON
+        SnapshotFormat::Full => {
+            // Full: text + elements + metadata + content_hash
             let snapshot_id = sessions.next_snapshot_id();
+
             let screen_state = ScreenState {
                 snapshot_id,
                 size: TerminalSize {
@@ -651,11 +655,29 @@ async fn handle_snapshot(
                     col: cursor_col,
                     visible: snapshot.cursor_visible,
                 },
-                text: if format == SnapshotFormat::Full {
-                    Some(snapshot.text)
-                } else {
-                    None
+                text: Some(snapshot.text),
+                elements: snapshot.elements,
+                content_hash: snapshot.content_hash,
+            };
+            Response::success(request_id, ResponseData::ScreenState(screen_state))
+        }
+        SnapshotFormat::Compact => {
+            // Compact: metadata only, no text, elements, or hash
+            let snapshot_id = sessions.next_snapshot_id();
+            let screen_state = ScreenState {
+                snapshot_id,
+                size: TerminalSize {
+                    cols: snapshot.size.cols,
+                    rows: snapshot.size.rows,
                 },
+                cursor: CursorState {
+                    row: cursor_row,
+                    col: cursor_col,
+                    visible: snapshot.cursor_visible,
+                },
+                text: None,
+                elements: None,
+                content_hash: None,
             };
             Response::success(request_id, ResponseData::ScreenState(screen_state))
         }
@@ -1016,16 +1038,20 @@ async fn handle_wait_for(
         Err(e) => return Response::error(request_id, e),
     };
 
-    // Compile regex if needed
+    // Compile regex if needed.
+    // Limit compiled pattern size to prevent slow compilation.
     let compiled_regex = if use_regex {
-        match regex::Regex::new(&pattern) {
+        match regex::RegexBuilder::new(&pattern)
+            .size_limit(256 * 1024) // 256KB compiled size limit
+            .build()
+        {
             Ok(r) => Some(r),
             Err(e) => {
                 return Response::error(
                     request_id,
                     ApiError::invalid_input_with_suggestion(
                         format!("Invalid regex pattern: {}", e),
-                        "Check your regex syntax. Common issues: unescaped special chars, unbalanced parentheses.",
+                        "Check your regex syntax. Common issues: unescaped special chars, unbalanced parentheses, or pattern too complex.",
                     ),
                 );
             }
@@ -1054,8 +1080,8 @@ async fn handle_wait_for(
             );
         }
 
-        // Get current screen text
-        let snapshot = match sessions.get_snapshot_data(&session_id).await {
+        // Get current screen text (no elements needed for wait_for)
+        let snapshot = match sessions.get_snapshot_data(&session_id, false).await {
             Ok(data) => data,
             Err(e) => return Response::error(request_id, e),
         };
@@ -2347,4 +2373,173 @@ mod tests {
         let _ = std::fs::remove_file(&socket_path);
         let _ = std::fs::remove_file(&pid_path);
     }
+
+    #[tokio::test]
+    async fn test_snapshot_with_elements() {
+        use pilotty_core::elements::ElementKind;
+
+        let temp_dir = std::env::temp_dir();
+        let socket_path = temp_dir.join(format!("pilotty-elem-{}.sock", std::process::id()));
+        let pid_path = socket_path.with_extension("pid");
+
+        let server = DaemonServer::bind_to(socket_path.clone(), pid_path.clone())
+            .await
+            .expect("Failed to bind server");
+
+        let server_handle = tokio::spawn(async move {
+            let _ = timeout(Duration::from_secs(5), server.run()).await;
+        });
+
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let stream = UnixStream::connect(&socket_path)
+            .await
+            .expect("Failed to connect");
+        let (reader, mut writer) = stream.into_split();
+        let mut reader = BufReader::new(reader);
+
+        // Spawn a session with output containing detectable elements:
+        // - [OK] and [Cancel] → Buttons (bracket pattern, confidence 0.8)
+        // - [x] and [ ] → Toggles (checkbox pattern, confidence 1.0)
+        let spawn_request = Request {
+            id: "spawn-elem".to_string(),
+            command: Command::Spawn {
+                command: vec![
+                    "printf".to_string(),
+                    "Options: [x] Enable  [ ] Debug\nActions: [OK] [Cancel]\n".to_string(),
+                ],
+                session_name: Some("elem-test".to_string()),
+                cwd: None,
+            },
+        };
+        let request_json = serde_json::to_string(&spawn_request).unwrap();
+        writer
+            .write_all(request_json.as_bytes())
+            .await
+            .expect("write");
+        writer.write_all(b"\n").await.expect("newline");
+        writer.flush().await.expect("flush");
+
+        let mut response_line = String::new();
+        timeout(Duration::from_secs(2), reader.read_line(&mut response_line))
+            .await
+            .expect("timeout")
+            .expect("read");
+
+        // Give printf time to complete
+        tokio::time::sleep(Duration::from_millis(200)).await;
+
+        // Request snapshot with Full format (includes elements)
+        let snap_request = Request {
+            id: "snap-elem".to_string(),
+            command: Command::Snapshot {
+                session: Some("elem-test".to_string()),
+                format: Some(SnapshotFormat::Full),
+            },
+        };
+        let snap_json = serde_json::to_string(&snap_request).unwrap();
+        writer.write_all(snap_json.as_bytes()).await.expect("write");
+        writer.write_all(b"\n").await.expect("newline");
+        writer.flush().await.expect("flush");
+
+        response_line.clear();
+        timeout(Duration::from_secs(2), reader.read_line(&mut response_line))
+            .await
+            .expect("timeout")
+            .expect("read");
+
+        let snap_response: Response =
+            serde_json::from_str(&response_line).expect("parse snap response");
+        assert!(snap_response.success, "Snapshot should succeed");
+
+        // Verify ScreenState with elements
+        if let Some(ResponseData::ScreenState(screen_state)) = snap_response.data {
+            // Full format includes text
+            assert!(
+                screen_state.text.is_some(),
+                "Full format should include text"
+            );
+
+            // Full format SHOULD include elements
+            assert!(
+                screen_state.elements.is_some(),
+                "Full format should include elements"
+            );
+
+            // Full format SHOULD include content_hash
+            assert!(
+                screen_state.content_hash.is_some(),
+                "Full format should include content_hash"
+            );
+
+            let elements = screen_state.elements.unwrap();
+
+            // Should detect at least the toggles (checkboxes are high confidence)
+            // [x] -> Toggle checked=true, [ ] -> Toggle checked=false
+            let toggles: Vec<_> = elements
+                .iter()
+                .filter(|e| e.kind == ElementKind::Toggle)
+                .collect();
+            assert!(
+                toggles.len() >= 2,
+                "Should detect at least 2 toggles, found {}",
+                toggles.len()
+            );
+
+            // Verify toggle states
+            let checked_toggle = toggles.iter().find(|t| t.checked == Some(true));
+            let unchecked_toggle = toggles.iter().find(|t| t.checked == Some(false));
+            assert!(
+                checked_toggle.is_some(),
+                "Should have a checked toggle ([x])"
+            );
+            assert!(
+                unchecked_toggle.is_some(),
+                "Should have an unchecked toggle ([ ])"
+            );
+
+            // Check toggle confidence is 1.0 (checkbox pattern)
+            for toggle in &toggles {
+                assert!(
+                    (toggle.confidence - 1.0).abs() < f32::EPSILON,
+                    "Toggle confidence should be 1.0, got {}",
+                    toggle.confidence
+                );
+            }
+
+            // May also detect [OK] and [Cancel] as buttons
+            let buttons: Vec<_> = elements
+                .iter()
+                .filter(|e| e.kind == ElementKind::Button)
+                .collect();
+            // Buttons have 0.8 confidence (bracket pattern)
+            for button in &buttons {
+                assert!(
+                    (button.confidence - 0.8).abs() < f32::EPSILON,
+                    "Button confidence should be 0.8, got {}",
+                    button.confidence
+                );
+            }
+
+            // Verify JSON serialization is clean (check raw response)
+            // - Non-focused elements should NOT have "focused" in their JSON
+            // - Buttons should NOT have "checked" in their JSON
+            let raw_json = &response_line;
+            // Count occurrences of "focused" - should only appear for focused elements
+            let focused_count = raw_json.matches("\"focused\"").count();
+            let elements_with_focus = elements.iter().filter(|e| e.focused).count();
+            assert_eq!(
+                focused_count, elements_with_focus,
+                "JSON should only include 'focused' for focused elements"
+            );
+        } else {
+            panic!(
+                "Expected ScreenState response data, got: {:?}",
+                snap_response.data
+            );
+        }
+
+        server_handle.abort();
+        let _ = std::fs::remove_file(&socket_path);
+    }
 }
diff --git a/crates/pilotty-cli/src/daemon/session.rs b/crates/pilotty-cli/src/daemon/session.rs
index 9edcdae..8039be0 100644
--- a/crates/pilotty-cli/src/daemon/session.rs
+++ b/crates/pilotty-cli/src/daemon/session.rs
@@ -9,8 +9,11 @@ use chrono::{DateTime, Utc};
 use tokio::sync::{Mutex, RwLock};
 use tracing::{debug, info};
 
+use pilotty_core::elements::classify::{detect, ClassifyContext};
+use pilotty_core::elements::Element;
 use pilotty_core::error::ApiError;
 use pilotty_core::protocol::SessionInfo;
+use pilotty_core::snapshot::compute_content_hash;
 
 use crate::daemon::pty::{AsyncPtyHandle, PtySession, TermSize};
 use crate::daemon::terminal::TerminalEmulator;
@@ -56,6 +59,11 @@ pub struct SnapshotData {
     pub cursor_pos: (u16, u16),
     pub cursor_visible: bool,
     pub size: TermSize,
+    /// Detected UI elements (computed on demand).
+    pub elements: Option<Vec<Element>>,
+    /// Hash of screen content for change detection.
+    /// Present when `with_elements=true`.
+    pub content_hash: Option<u64>,
 }
 
 /// An active PTY session.
@@ -88,21 +96,6 @@ impl Session {
         }
     }
 
-    /// Get the plain text content of the terminal screen.
-    pub async fn get_text(&self) -> String {
-        self.terminal.lock().await.get_text()
-    }
-
-    /// Get the cursor position (row, col) - 0-indexed.
-    pub async fn cursor_position(&self) -> (u16, u16) {
-        self.terminal.lock().await.cursor_position()
-    }
-
-    /// Check if the cursor is visible.
-    pub async fn cursor_visible(&self) -> bool {
-        self.terminal.lock().await.cursor_visible()
-    }
-
     /// Check if terminal is in application cursor mode.
     pub async fn application_cursor(&self) -> bool {
         self.terminal.lock().await.application_cursor()
@@ -382,7 +375,14 @@ impl SessionManager {
     ///
     /// Uses a read lock on sessions since all operations use interior mutability,
     /// avoiding potential deadlocks from holding a write lock during I/O.
-    pub async fn get_snapshot_data(&self, id: &SessionId) -> Result<SnapshotData, ApiError> {
+    ///
+    /// If `with_elements` is true, element detection runs to identify
+    /// UI elements like buttons, checkboxes, and menu items.
+    pub async fn get_snapshot_data(
+        &self,
+        id: &SessionId,
+        with_elements: bool,
+    ) -> Result<SnapshotData, ApiError> {
         let sessions = self.sessions.read().await;
         let session = sessions
             .get(id)
@@ -391,17 +391,33 @@ impl SessionManager {
         // Drain pending PTY output to update terminal state
         session.drain_pty_output().await;
 
+        // Lock terminal once for all reads
+        let terminal = session.terminal.lock().await;
+
         // Get snapshot data
-        let text = session.get_text().await;
-        let cursor_pos = session.cursor_position().await;
-        let cursor_visible = session.cursor_visible().await;
+        let text = terminal.get_text();
+        let cursor_pos = terminal.cursor_position();
+        let cursor_visible = terminal.cursor_visible();
         let size = session.size;
 
+        // Detect UI elements and compute content hash if requested
+        let (elements, content_hash) = if with_elements {
+            let (cursor_row, cursor_col) = cursor_pos;
+            let ctx = ClassifyContext::new().with_cursor(cursor_row, cursor_col);
+            let elems = detect(&*terminal, &ctx);
+            let hash = compute_content_hash(&text);
+            (Some(elems), Some(hash))
+        } else {
+            (None, None)
+        };
+
         Ok(SnapshotData {
             text,
             cursor_pos,
             cursor_visible,
             size,
+            elements,
+            content_hash,
         })
     }
 
diff --git a/crates/pilotty-cli/src/daemon/terminal.rs b/crates/pilotty-cli/src/daemon/terminal.rs
index af84669..f925c02 100644
--- a/crates/pilotty-cli/src/daemon/terminal.rs
+++ b/crates/pilotty-cli/src/daemon/terminal.rs
@@ -4,6 +4,8 @@
 //! that can parse ANSI escape sequences from PTY output.
 
 use crate::daemon::pty::TermSize;
+use pilotty_core::elements::grid::{ScreenCell, ScreenGrid};
+use pilotty_core::elements::style::{CellStyle, Color};
 
 /// Terminal emulator that parses ANSI escape sequences.
 ///
@@ -91,6 +93,49 @@ impl TerminalEmulator {
     }
 }
 
+/// Convert vt100 color to core Color type.
+fn convert_color(vt_color: vt100::Color) -> Color {
+    match vt_color {
+        vt100::Color::Default => Color::Default,
+        vt100::Color::Idx(idx) => Color::Indexed { index: idx },
+        vt100::Color::Rgb(r, g, b) => Color::Rgb { r, g, b },
+    }
+}
+
+/// Convert vt100 cell to core ScreenCell.
+fn convert_cell(vt_cell: &vt100::Cell) -> ScreenCell {
+    // Get the character from the cell contents
+    // vt100::Cell::contents() returns a String (may be empty for wide char continuations)
+    let contents = vt_cell.contents();
+    let ch = contents.chars().next().unwrap_or(' ');
+
+    let style = CellStyle {
+        bold: vt_cell.bold(),
+        underline: vt_cell.underline(),
+        inverse: vt_cell.inverse(),
+        fg_color: convert_color(vt_cell.fgcolor()),
+        bg_color: convert_color(vt_cell.bgcolor()),
+    };
+
+    ScreenCell::new(ch, style)
+}
+
+impl ScreenGrid for TerminalEmulator {
+    fn rows(&self) -> u16 {
+        let (rows, _cols) = self.parser.screen().size();
+        rows
+    }
+
+    fn cols(&self) -> u16 {
+        let (_rows, cols) = self.parser.screen().size();
+        cols
+    }
+
+    fn cell(&self, row: u16, col: u16) -> Option<ScreenCell> {
+        self.parser.screen().cell(row, col).map(convert_cell)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -490,4 +535,130 @@ mod tests {
             "Should be normal mode after ESC[?1l"
         );
     }
+
+    // ScreenGrid implementation tests
+
+    #[test]
+    fn test_screen_grid_dimensions() {
+        let term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+
+        assert_eq!(ScreenGrid::rows(&term), 24);
+        assert_eq!(ScreenGrid::cols(&term), 80);
+    }
+
+    #[test]
+    fn test_screen_grid_cell_access() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        term.feed(b"Hello");
+
+        // Check cells with content via ScreenGrid trait
+        let cell_h = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert_eq!(cell_h.ch, 'H');
+
+        let cell_o = ScreenGrid::cell(&term, 0, 4).expect("Cell should exist");
+        assert_eq!(cell_o.ch, 'o');
+
+        // Check empty cell
+        let cell_empty = ScreenGrid::cell(&term, 0, 10).expect("Cell should exist");
+        assert_eq!(cell_empty.ch, ' ');
+    }
+
+    #[test]
+    fn test_screen_grid_out_of_bounds() {
+        let term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+
+        assert!(ScreenGrid::cell(&term, 0, 0).is_some());
+        assert!(ScreenGrid::cell(&term, 23, 79).is_some());
+        assert!(ScreenGrid::cell(&term, 24, 0).is_none()); // row out of bounds
+        assert!(ScreenGrid::cell(&term, 0, 80).is_none()); // col out of bounds
+    }
+
+    #[test]
+    fn test_screen_grid_color_mapping_default() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        term.feed(b"A");
+
+        let cell = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert_eq!(cell.style.fg_color, Color::Default);
+        assert_eq!(cell.style.bg_color, Color::Default);
+    }
+
+    #[test]
+    fn test_screen_grid_color_mapping_indexed() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        // Red foreground (color 1), blue background (color 4)
+        term.feed(b"\x1b[31;44mX");
+
+        let cell = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert_eq!(cell.style.fg_color, Color::Indexed { index: 1 });
+        assert_eq!(cell.style.bg_color, Color::Indexed { index: 4 });
+    }
+
+    #[test]
+    fn test_screen_grid_color_mapping_rgb() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        // 24-bit RGB: ESC[38;2;255;128;64m for fg, ESC[48;2;0;0;0m for bg
+        term.feed(b"\x1b[38;2;255;128;64mR");
+
+        let cell = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert_eq!(
+            cell.style.fg_color,
+            Color::Rgb {
+                r: 255,
+                g: 128,
+                b: 64
+            }
+        );
+    }
+
+    #[test]
+    fn test_screen_grid_style_bold() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        term.feed(b"N\x1b[1mB\x1b[0m");
+
+        let normal = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert!(!normal.style.bold);
+
+        let bold = ScreenGrid::cell(&term, 0, 1).expect("Cell should exist");
+        assert!(bold.style.bold);
+    }
+
+    #[test]
+    fn test_screen_grid_style_underline() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        term.feed(b"N\x1b[4mU\x1b[0m");
+
+        let normal = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert!(!normal.style.underline);
+
+        let underlined = ScreenGrid::cell(&term, 0, 1).expect("Cell should exist");
+        assert!(underlined.style.underline);
+    }
+
+    #[test]
+    fn test_screen_grid_style_inverse() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        // \x1b[7m = inverse on
+        term.feed(b"N\x1b[7mI\x1b[0m");
+
+        let normal = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert!(!normal.style.inverse);
+
+        let inverse = ScreenGrid::cell(&term, 0, 1).expect("Cell should exist");
+        assert!(inverse.style.inverse);
+    }
+
+    #[test]
+    fn test_screen_grid_combined_styles() {
+        let mut term = TerminalEmulator::new(TermSize { cols: 80, rows: 24 });
+        // Bold + underline + inverse + red fg + blue bg
+        term.feed(b"\x1b[1;4;7;31;44mS");
+
+        let cell = ScreenGrid::cell(&term, 0, 0).expect("Cell should exist");
+        assert!(cell.style.bold);
+        assert!(cell.style.underline);
+        assert!(cell.style.inverse);
+        assert_eq!(cell.style.fg_color, Color::Indexed { index: 1 });
+        assert_eq!(cell.style.bg_color, Color::Indexed { index: 4 });
+    }
 }
diff --git a/crates/pilotty-core/Cargo.toml b/crates/pilotty-core/Cargo.toml
index 4178ed6..5e82a1a 100644
--- a/crates/pilotty-core/Cargo.toml
+++ b/crates/pilotty-core/Cargo.toml
@@ -8,3 +8,4 @@ description = "Core types and logic for pilotty"
 [dependencies]
 serde = { workspace = true }
 serde_json = { workspace = true }
+unicode-width = { workspace = true }
diff --git a/crates/pilotty-core/src/elements/classify.rs b/crates/pilotty-core/src/elements/classify.rs
new file mode 100644
index 0000000..4f938d1
--- /dev/null
+++ b/crates/pilotty-core/src/elements/classify.rs
@@ -0,0 +1,853 @@
+//! Classification: converting clusters into interactive elements.
+//!
+//! The classifier applies priority-ordered rules to determine each cluster's
+//! kind. Only interactive elements (Button, Input, Toggle) are returned;
+//! non-interactive content stays in `snapshot.text`.
+//!
+//! # Rule Priority (highest to lowest)
+//!
+//! 1. Cursor position → Input (confidence: 1.0, focused: true)
+//! 2. Checkbox patterns `[x]`, `[ ]`, `☑`, `☐` → Toggle (confidence: 1.0)
+//! 3. Inverse video → Button (confidence: 1.0, focused: true)
+//! 4. Bracket patterns `[OK]`, `<Cancel>` → Button (confidence: 0.8)
+//! 5. Underscore field `____` → Input (confidence: 0.6)
+//!
+//! Non-interactive patterns (links, progress bars, errors, status indicators,
+//! box-drawing, menu prefixes, static text) are filtered out.
+
+use unicode_width::UnicodeWidthStr;
+
+use crate::elements::segment::Cluster;
+use crate::elements::{Element, ElementKind};
+
+// ============================================================================
+// Constants
+// ============================================================================
+
+/// Maximum cluster text length to process for tokenization.
+/// Protects against memory exhaustion from malicious terminal output.
+/// Terminal lines rarely exceed this; longer text won't contain meaningful UI elements.
+const MAX_CLUSTER_TEXT_LEN: usize = 4096;
+
+// ============================================================================
+// Token Extraction
+// ============================================================================
+
+/// A token extracted from a cluster's text.
+///
+/// Tokens are sub-patterns within a cluster that match interactive elements:
+/// - Bracketed tokens: `[OK]`, `<Cancel>`, `[ ]`, `[x]`
+/// - Underscore runs: `____`, `__________`
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct Token {
+    /// Text content of the token.
+    text: String,
+    /// Byte offset from start of cluster text (used to slice prefix for width calculation).
+    byte_offset: usize,
+}
+
+/// Calculate the display-width column offset for a token within cluster text.
+///
+/// This handles CJK characters correctly (width 2) by computing the display
+/// width of the text prefix before the token.
+fn token_col_offset(text: &str, byte_offset: usize) -> u16 {
+    text.get(..byte_offset)
+        .map(|prefix| prefix.width().min(u16::MAX as usize) as u16)
+        .unwrap_or(0)
+}
+
+/// Extract bracketed tokens from text.
+///
+/// Finds patterns like `[OK]`, `<Cancel>`, `(Submit)`, `[ ]`, `[x]`.
+/// Returns tokens with their byte offsets within the text (for display width calculation).
+///
+/// Returns empty if text exceeds MAX_CLUSTER_TEXT_LEN to prevent memory exhaustion.
+fn extract_bracketed_tokens(text: &str) -> Vec<Token> {
+    // Protect against memory exhaustion from extremely long input
+    if text.len() > MAX_CLUSTER_TEXT_LEN {
+        return Vec::new();
+    }
+
+    let mut tokens = Vec::new();
+
+    for (char_idx, ch) in text.char_indices() {
+        // Look for opening brackets
+        let close_bracket = match ch {
+            '[' => Some(']'),
+            '<' => Some('>'),
+            '(' => Some(')'),
+            '【' => Some('】'),
+            '「' => Some('」'),
+            _ => None,
+        };
+
+        if let Some(closer) = close_bracket {
+            // Find matching closer in the remainder of the string
+            if let Some(end_rel) = text[char_idx + ch.len_utf8()..].find(closer) {
+                let token_start = char_idx;
+                let token_end = char_idx + ch.len_utf8() + end_rel + closer.len_utf8();
+                let token_text = &text[token_start..token_end];
+
+                // Only extract if it looks interactive (not just empty or single char)
+                if token_text.chars().count() >= 3 || is_unicode_checkbox(token_text) {
+                    tokens.push(Token {
+                        text: token_text.to_string(),
+                        byte_offset: token_start,
+                    });
+                }
+            }
+        }
+    }
+
+    // Deduplicate overlapping tokens by keeping only non-overlapping ones
+    let mut result = Vec::new();
+    let mut last_end = 0;
+    for token in tokens {
+        if token.byte_offset >= last_end {
+            last_end = token.byte_offset + token.text.len();
+            result.push(token);
+        }
+    }
+
+    result
+}
+
+/// Check if text is a single unicode checkbox character.
+fn is_unicode_checkbox(text: &str) -> bool {
+    matches!(text, "☑" | "☐" | "□" | "✓" | "✔" | "☒")
+}
+
+/// Extract underscore runs from text.
+///
+/// Finds patterns like `____`, `__________` (3+ underscores).
+/// Returns tokens with their byte offsets within the text (for display width calculation).
+///
+/// Returns empty if text exceeds MAX_CLUSTER_TEXT_LEN to prevent memory exhaustion.
+fn extract_underscore_runs(text: &str) -> Vec<Token> {
+    // Protect against memory exhaustion from extremely long input
+    if text.len() > MAX_CLUSTER_TEXT_LEN {
+        return Vec::new();
+    }
+
+    let mut tokens = Vec::new();
+    let mut in_run = false;
+    let mut run_start = 0;
+
+    for (byte_idx, ch) in text.char_indices() {
+        if ch == '_' {
+            if !in_run {
+                in_run = true;
+                run_start = byte_idx;
+            }
+        } else if in_run {
+            // End of underscore run
+            let run_text = &text[run_start..byte_idx];
+            if run_text.len() >= 3 {
+                tokens.push(Token {
+                    text: run_text.to_string(),
+                    byte_offset: run_start,
+                });
+            }
+            in_run = false;
+        }
+    }
+
+    // Handle run at end of string
+    if in_run {
+        let run_text = &text[run_start..];
+        if run_text.len() >= 3 {
+            tokens.push(Token {
+                text: run_text.to_string(),
+                byte_offset: run_start,
+            });
+        }
+    }
+
+    tokens
+}
+
+/// Context for classification decisions that depend on screen position.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct ClassifyContext {
+    /// Optional cursor row (if known). Clusters at cursor position become Input.
+    pub cursor_row: Option<u16>,
+    /// Optional cursor column (if known).
+    pub cursor_col: Option<u16>,
+}
+
+impl ClassifyContext {
+    /// Create a new context with no cursor information.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set cursor position.
+    #[must_use]
+    pub fn with_cursor(mut self, row: u16, col: u16) -> Self {
+        self.cursor_row = Some(row);
+        self.cursor_col = Some(col);
+        self
+    }
+}
+
+/// Internal element data during classification.
+///
+/// Used during classification to collect elements before converting
+/// to the public Element type.
+#[derive(Debug, Clone)]
+struct DetectedElement {
+    kind: ElementKind,
+    row: u16,
+    col: u16,
+    width: u16,
+    text: String,
+    confidence: f32,
+    checked: Option<bool>,
+    focused: bool,
+}
+
+impl DetectedElement {
+    /// Create a button element.
+    fn button(row: u16, col: u16, text: String, confidence: f32, focused: bool) -> Self {
+        Self {
+            kind: ElementKind::Button,
+            row,
+            col,
+            width: text.width().min(u16::MAX as usize) as u16,
+            text,
+            confidence,
+            checked: None,
+            focused,
+        }
+    }
+
+    /// Create an input element.
+    fn input(row: u16, col: u16, text: String, confidence: f32, focused: bool) -> Self {
+        Self {
+            kind: ElementKind::Input,
+            row,
+            col,
+            width: text.width().min(u16::MAX as usize) as u16,
+            text,
+            confidence,
+            checked: None,
+            focused,
+        }
+    }
+
+    /// Create a toggle element.
+    fn toggle(row: u16, col: u16, text: String, checked: bool) -> Self {
+        Self {
+            kind: ElementKind::Toggle,
+            row,
+            col,
+            width: text.width().min(u16::MAX as usize) as u16,
+            text,
+            confidence: 1.0,
+            checked: Some(checked),
+            focused: false,
+        }
+    }
+
+    /// Convert to Element.
+    fn into_element(self) -> Element {
+        let mut elem = Element::new(
+            self.kind,
+            self.row,
+            self.col,
+            self.width,
+            self.text,
+            self.confidence,
+        );
+        if let Some(checked) = self.checked {
+            elem = elem.with_checked(checked);
+        }
+        if self.focused {
+            elem = elem.with_focused(true);
+        }
+        elem
+    }
+}
+
+// ============================================================================
+// Pattern Detection Helpers
+// ============================================================================
+
+/// Check if text matches a single button bracket pattern: `[OK]`, `<Cancel>`, `(Confirm)`
+///
+/// Requires:
+/// - Exactly one pair of matching brackets
+/// - At least one non-bracket character inside
+/// - No brackets in the interior (to reject `[Yes] [No]`)
+fn is_button_pattern(text: &str) -> bool {
+    let trimmed = text.trim();
+    if trimmed.len() < 3 {
+        return false;
+    }
+
+    let chars: Vec<char> = trimmed.chars().collect();
+    let first = chars[0];
+    let last = chars[chars.len() - 1];
+
+    // Check for matching bracket pairs
+    let (opener, closer) = match (first, last) {
+        ('[', ']') => ('[', ']'),
+        ('<', '>') => ('<', '>'),
+        ('(', ')') => ('(', ')'),
+        ('【', '】') => ('【', '】'),
+        ('「', '」') => ('「', '」'),
+        _ => return false,
+    };
+
+    // Interior must have non-whitespace content (not just empty brackets)
+    let interior: String = chars[1..chars.len() - 1].iter().collect();
+
+    // Reject if interior contains more brackets (e.g., "[Yes] [No]")
+    if interior.contains(opener) || interior.contains(closer) {
+        return false;
+    }
+
+    // Reject if it looks like a checkbox pattern
+    if is_checkbox_content(&interior) {
+        return false;
+    }
+
+    // Reject if it looks like a progress bar inside brackets
+    if is_progress_bar_content(&interior) {
+        return false;
+    }
+
+    // Must have actual label content
+    !interior.trim().is_empty()
+}
+
+/// Helper to check if content inside brackets looks like progress bar content.
+fn is_progress_bar_content(content: &str) -> bool {
+    if content.is_empty() {
+        return false;
+    }
+
+    // Count progress-bar typical characters
+    let progress_chars: usize = content
+        .chars()
+        .filter(|&c| matches!(c, '=' | '>' | '-' | '#' | ' ' | '█' | '░'))
+        .count();
+
+    // If more than 80% of chars are progress-like, it's probably a progress bar
+    progress_chars * 10 >= content.len() * 8
+}
+
+/// Check if text matches checkbox patterns.
+///
+/// Supported patterns:
+/// - `[x]`, `[X]`, `[ ]` - ASCII checkboxes
+/// - `[*]`, `[-]` - Alternative markers
+/// - `☑`, `☐`, `✓`, `✗` - Unicode checkboxes
+/// - `(x)`, `( )`, `(*)` - Parenthesized variants
+fn is_checkbox_pattern(text: &str) -> Option<bool> {
+    let trimmed = text.trim();
+
+    // Single character unicode checkboxes
+    match trimmed {
+        "☑" | "✓" | "✔" | "☒" => return Some(true),
+        "☐" | "□" => return Some(false),
+        _ => {}
+    }
+
+    // Bracketed checkboxes: [x], [ ], [*], [-], etc.
+    if trimmed.len() == 3 {
+        let chars: Vec<char> = trimmed.chars().collect();
+        if (chars[0] == '[' && chars[2] == ']') || (chars[0] == '(' && chars[2] == ')') {
+            return match chars[1] {
+                'x' | 'X' | '*' | '✓' | '✔' => Some(true),
+                ' ' | '.' => Some(false),
+                '-' => Some(false), // indeterminate treated as unchecked
+                _ => None,
+            };
+        }
+    }
+
+    None
+}
+
+/// Helper to check if content inside brackets looks like checkbox content.
+fn is_checkbox_content(content: &str) -> bool {
+    let trimmed = content.trim();
+    matches!(trimmed, "x" | "X" | " " | "*" | "-" | "✓" | "✔")
+}
+
+/// Check if text looks like an input field placeholder.
+///
+/// Patterns: `____`, `[          ]`, `: _____`
+fn is_input_pattern(text: &str) -> bool {
+    let trimmed = text.trim();
+
+    // Series of underscores
+    if trimmed.chars().all(|c| c == '_') && trimmed.len() >= 3 {
+        return true;
+    }
+
+    // Empty bracketed field with mostly spaces
+    if trimmed.starts_with('[') && trimmed.ends_with(']') && trimmed.len() >= 4 {
+        let inner: String = trimmed.chars().skip(1).take(trimmed.len() - 2).collect();
+        if inner.trim().is_empty() && inner.len() >= 2 {
+            return true;
+        }
+    }
+
+    // Colon followed by underscores: "Name: ___"
+    if let Some(colon_pos) = trimmed.find(':') {
+        let after_colon = trimmed[colon_pos + 1..].trim_start();
+        if after_colon.chars().all(|c| c == '_') && after_colon.len() >= 3 {
+            return true;
+        }
+    }
+
+    false
+}
+
+// ============================================================================
+// Core Classification
+// ============================================================================
+
+/// Classify a text pattern into a detected element at the given position.
+///
+/// This is the low-level classifier that doesn't consider tokenization.
+/// Returns `None` for non-interactive patterns.
+///
+/// Classification priority:
+/// 1. Checkbox patterns → Toggle (state is unambiguous)
+/// 2. Inverse video → Button (focused) - TUI convention for selection
+/// 3. Bracket patterns → Button (with focus if cursor present)
+/// 4. Underscore/labeled fields → Input (with focus if cursor present)
+/// 5. Cursor on unrecognized text → Input (fallback for editable regions)
+fn classify_text(
+    text: &str,
+    row: u16,
+    col: u16,
+    is_inverse: bool,
+    cursor_in_range: bool,
+) -> Option<DetectedElement> {
+    // Rule 1: Checkbox patterns → Toggle
+    // Checkboxes have unambiguous visual state, highest confidence
+    if let Some(checked) = is_checkbox_pattern(text) {
+        return Some(DetectedElement::toggle(row, col, text.to_string(), checked));
+    }
+
+    // Rule 2: Inverse video → Button (focused)
+    // TUI convention: inverse video = selected/focused item
+    if is_inverse {
+        return Some(DetectedElement::button(
+            row,
+            col,
+            text.to_string(),
+            1.0,
+            true,
+        ));
+    }
+
+    // Rule 3: Bracket patterns → Button
+    // Cursor on button makes it focused, not an input
+    if is_button_pattern(text) {
+        return Some(DetectedElement::button(
+            row,
+            col,
+            text.to_string(),
+            if cursor_in_range { 1.0 } else { 0.8 },
+            cursor_in_range,
+        ));
+    }
+
+    // Rule 4: Underscore field → Input
+    if is_input_pattern(text) {
+        return Some(DetectedElement::input(
+            row,
+            col,
+            text.to_string(),
+            if cursor_in_range { 1.0 } else { 0.6 },
+            cursor_in_range,
+        ));
+    }
+
+    // Rule 5: Cursor on unrecognized pattern → Input (fallback)
+    // If cursor is here and we don't know what it is, assume editable
+    if cursor_in_range {
+        return Some(DetectedElement::input(
+            row,
+            col,
+            text.to_string(),
+            1.0,
+            true,
+        ));
+    }
+
+    None
+}
+
+/// Check if cursor is within a range.
+///
+/// Uses saturating arithmetic to prevent overflow when col + width exceeds u16::MAX.
+fn cursor_in_range(ctx: &ClassifyContext, row: u16, col: u16, width: u16) -> bool {
+    if let (Some(cursor_row), Some(cursor_col)) = (ctx.cursor_row, ctx.cursor_col) {
+        cursor_row == row && cursor_col >= col && cursor_col < col.saturating_add(width)
+    } else {
+        false
+    }
+}
+
+/// Extract elements from a cluster using tokenization.
+///
+/// If the cluster contains bracketed tokens or underscore runs, those are
+/// extracted as separate elements. The parent cluster is dropped if tokens
+/// are found (tokens win, inherit parent's focus if inverse).
+///
+/// This handles cases like:
+/// - `"Save [OK] Cancel"` → extracts `[OK]` as Button
+/// - `"Name: ____"` → extracts `____` as Input
+fn extract_elements_from_cluster(cluster: &Cluster, ctx: &ClassifyContext) -> Vec<DetectedElement> {
+    let row = cluster.row;
+    let col = cluster.col;
+    let text = &cluster.text;
+    let is_inverse = cluster.style.is_inverse();
+
+    // First, try to classify the whole cluster
+    let cursor_hit = cursor_in_range(ctx, row, col, cluster.width);
+    let whole_cluster_elem = classify_text(text, row, col, is_inverse, cursor_hit);
+
+    // Check if the whole cluster is already a "tight" interactive pattern
+    // (checkbox, bracketed button, or underscore-only input)
+    if let Some(ref elem) = whole_cluster_elem {
+        // If it's a toggle (checkbox pattern), return immediately
+        if elem.kind == ElementKind::Toggle {
+            return vec![elem.clone()];
+        }
+
+        // If it's a bracket button and the text is entirely the bracket pattern
+        if elem.kind == ElementKind::Button && is_button_pattern(text) {
+            return vec![elem.clone()];
+        }
+
+        // If it's an input and the text is entirely underscores
+        if elem.kind == ElementKind::Input && text.trim().chars().all(|c| c == '_') {
+            return vec![elem.clone()];
+        }
+    }
+
+    // Try to extract tokens from within the cluster
+    let mut elements = Vec::new();
+    let parent_focused = is_inverse; // Tokens inherit focus from inverse parent
+
+    // Extract bracketed tokens
+    for token in extract_bracketed_tokens(text) {
+        let token_col = col + token_col_offset(text, token.byte_offset);
+        let token_cursor_hit = cursor_in_range(ctx, row, token_col, token.text.width() as u16);
+
+        // Classify the token text
+        // Note: tokens extracted from inverse clusters inherit focus
+        if let Some(mut elem) = classify_text(&token.text, row, token_col, false, token_cursor_hit)
+        {
+            if parent_focused && !elem.focused {
+                elem.focused = true;
+                // Upgrade confidence if inheriting focus
+                if elem.confidence < 1.0 {
+                    elem.confidence = 1.0;
+                }
+            }
+            elements.push(elem);
+        }
+    }
+
+    // Extract underscore runs (only if no bracketed tokens found)
+    if elements.is_empty() {
+        for token in extract_underscore_runs(text) {
+            let token_col = col + token_col_offset(text, token.byte_offset);
+            let token_cursor_hit = cursor_in_range(ctx, row, token_col, token.text.width() as u16);
+
+            if let Some(mut elem) =
+                classify_text(&token.text, row, token_col, false, token_cursor_hit)
+            {
+                if parent_focused && !elem.focused {
+                    elem.focused = true;
+                    elem.confidence = 1.0;
+                }
+                elements.push(elem);
+            }
+        }
+    }
+
+    // If tokens were found, return them (dedup rule: tokens win)
+    if !elements.is_empty() {
+        return elements;
+    }
+
+    // No tokens found, return whole cluster classification if any
+    whole_cluster_elem.into_iter().collect()
+}
+
+/// Classify clusters into interactive elements.
+///
+/// Uses tokenization to extract sub-elements from clusters. If a cluster
+/// contains bracketed tokens or underscore runs, those are extracted as
+/// separate elements and the parent cluster is dropped (dedup rule).
+///
+/// Only returns interactive elements (Button, Input, Toggle).
+/// Non-interactive clusters are filtered out.
+///
+/// Elements are sorted by position (row, then col) for consistent ordering.
+#[must_use]
+pub fn classify(clusters: Vec<Cluster>, ctx: &ClassifyContext) -> Vec<Element> {
+    let mut detected: Vec<DetectedElement> = Vec::new();
+
+    for cluster in clusters {
+        detected.extend(extract_elements_from_cluster(&cluster, ctx));
+    }
+
+    // Sort by position (row, then col) for consistent ordering
+    detected.sort_by(|a, b| (a.row, a.col).cmp(&(b.row, b.col)));
+
+    // Convert to Elements
+    detected
+        .into_iter()
+        .map(|elem| elem.into_element())
+        .collect()
+}
+
+/// Convenience function: segment a grid and classify in one step.
+///
+/// This is the main entry point for element detection.
+#[must_use]
+pub fn detect<G: crate::elements::grid::ScreenGrid>(
+    grid: &G,
+    ctx: &ClassifyContext,
+) -> Vec<Element> {
+    let clusters = crate::elements::segment::segment(grid);
+    classify(clusters, ctx)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::elements::grid::test_support::SimpleGrid;
+    use crate::elements::segment::Cluster;
+    use crate::elements::style::CellStyle;
+
+    fn cluster(text: &str) -> Cluster {
+        Cluster::new(0, 0, text.to_string(), CellStyle::default())
+    }
+
+    fn cluster_at(row: u16, col: u16, text: &str) -> Cluster {
+        Cluster::new(row, col, text.to_string(), CellStyle::default())
+    }
+
+    fn inverse_cluster(text: &str) -> Cluster {
+        Cluster::new(0, 0, text.to_string(), CellStyle::new().with_inverse(true))
+    }
+
+    fn classify_cluster(cluster: &Cluster, ctx: &ClassifyContext) -> Option<DetectedElement> {
+        extract_elements_from_cluster(cluster, ctx)
+            .into_iter()
+            .next()
+    }
+
+    #[test]
+    fn button_bracket_patterns() {
+        let ctx = ClassifyContext::new();
+
+        let result = classify_cluster(&cluster("[OK]"), &ctx).unwrap();
+        assert_eq!(result.kind, ElementKind::Button);
+        assert!((result.confidence - 0.8).abs() < f32::EPSILON);
+
+        assert_eq!(
+            classify_cluster(&cluster("<Cancel>"), &ctx).unwrap().kind,
+            ElementKind::Button
+        );
+        assert_eq!(
+            classify_cluster(&cluster("(Submit)"), &ctx).unwrap().kind,
+            ElementKind::Button
+        );
+    }
+
+    #[test]
+    fn toggle_checkbox_patterns() {
+        let ctx = ClassifyContext::new();
+
+        let checked = classify_cluster(&cluster("[x]"), &ctx).unwrap();
+        assert_eq!(checked.kind, ElementKind::Toggle);
+        assert_eq!(checked.checked, Some(true));
+
+        let unchecked = classify_cluster(&cluster("[ ]"), &ctx).unwrap();
+        assert_eq!(unchecked.kind, ElementKind::Toggle);
+        assert_eq!(unchecked.checked, Some(false));
+    }
+
+    #[test]
+    fn input_patterns() {
+        let ctx = ClassifyContext::new();
+
+        let underscore = classify_cluster(&cluster("_____"), &ctx).unwrap();
+        assert_eq!(underscore.kind, ElementKind::Input);
+        assert!((underscore.confidence - 0.6).abs() < f32::EPSILON);
+
+        // Cursor position creates focused input
+        let ctx_cursor = ClassifyContext::new().with_cursor(0, 5);
+        let cursor_input = classify_cluster(&cluster_at(0, 0, "some text"), &ctx_cursor).unwrap();
+        assert_eq!(cursor_input.kind, ElementKind::Input);
+        assert!(cursor_input.focused);
+    }
+
+    #[test]
+    fn inverse_video_creates_focused_button() {
+        let ctx = ClassifyContext::new();
+        let result = classify_cluster(&inverse_cluster("File"), &ctx).unwrap();
+        assert_eq!(result.kind, ElementKind::Button);
+        assert!(result.focused);
+        assert!((result.confidence - 1.0).abs() < f32::EPSILON);
+    }
+
+    #[test]
+    fn non_interactive_filtered() {
+        let ctx = ClassifyContext::new();
+        assert!(classify_cluster(&cluster("Hello World"), &ctx).is_none());
+        assert!(classify_cluster(&cluster("https://example.com"), &ctx).is_none());
+    }
+
+    #[test]
+    fn classify_returns_sorted_elements() {
+        let ctx = ClassifyContext::new();
+        let clusters = vec![cluster("[OK]"), cluster("[Cancel]"), cluster("[ ]")];
+        let elements = classify(clusters, &ctx);
+
+        assert_eq!(elements.len(), 3);
+        assert_eq!(elements[0].kind, ElementKind::Button);
+        assert_eq!(elements[1].kind, ElementKind::Button);
+        assert_eq!(elements[2].kind, ElementKind::Toggle);
+    }
+
+    #[test]
+    fn detect_full_pipeline() {
+        let mut grid = SimpleGrid::from_text(&["[OK] [Cancel] [ ]"], 20);
+        let inverse = CellStyle::new().with_inverse(true);
+        let bold = CellStyle::new().with_bold(true);
+
+        grid.style_range(0, 0, 4, inverse);
+        grid.style_range(0, 5, 13, bold);
+
+        let elements = detect(&grid, &ClassifyContext::new());
+        let kinds: Vec<ElementKind> = elements.iter().map(|e| e.kind).collect();
+
+        assert!(kinds.contains(&ElementKind::Button));
+        assert!(kinds.contains(&ElementKind::Toggle));
+    }
+
+    #[test]
+    fn tokenizer_extracts_from_text() {
+        let tokens = extract_bracketed_tokens("Save [OK] [Cancel]");
+        assert_eq!(tokens.len(), 2);
+        assert_eq!(tokens[0].text, "[OK]");
+        assert_eq!(tokens[1].text, "[Cancel]");
+    }
+
+    #[test]
+    fn dedup_extracts_button_from_text() {
+        let ctx = ClassifyContext::new();
+        let elements = extract_elements_from_cluster(&cluster("Save [OK] Cancel"), &ctx);
+
+        assert_eq!(elements.len(), 1);
+        assert_eq!(elements[0].text, "[OK]");
+        assert_eq!(elements[0].col, 5);
+    }
+
+    // ========================================================================
+    // Security & Edge Case Tests
+    // ========================================================================
+
+    #[test]
+    fn extract_tokens_rejects_oversized_input() {
+        // Verify that extremely long text is rejected to prevent memory exhaustion
+        let huge_text = "[".repeat(MAX_CLUSTER_TEXT_LEN + 1);
+        assert!(extract_bracketed_tokens(&huge_text).is_empty());
+
+        let huge_underscores = "_".repeat(MAX_CLUSTER_TEXT_LEN + 1);
+        assert!(extract_underscore_runs(&huge_underscores).is_empty());
+    }
+
+    #[test]
+    fn cursor_in_range_handles_overflow() {
+        // Verify saturating_add prevents overflow panic
+        let ctx = ClassifyContext::new().with_cursor(0, u16::MAX);
+
+        // Should not panic even with extreme values
+        assert!(!cursor_in_range(&ctx, 0, u16::MAX - 10, 100));
+
+        // Cursor near MAX should still work correctly
+        let ctx = ClassifyContext::new().with_cursor(0, u16::MAX - 5);
+        assert!(cursor_in_range(&ctx, 0, u16::MAX - 10, 10));
+    }
+
+    // ========================================================================
+    // Unicode Width Tests
+    // ========================================================================
+
+    #[test]
+    fn element_width_cjk() {
+        // CJK characters should have width 2 each
+        let ctx = ClassifyContext::new();
+        let elem = classify_cluster(&cluster("[确认]"), &ctx).unwrap();
+        // [=1 + 确=2 + 认=2 + ]=1 = 6
+        assert_eq!(elem.width, 6);
+    }
+
+    #[test]
+    fn element_width_ascii() {
+        // ASCII characters should have width 1 each
+        let ctx = ClassifyContext::new();
+        let elem = classify_cluster(&cluster("[OK]"), &ctx).unwrap();
+        // [=1 + O=1 + K=1 + ]=1 = 4
+        assert_eq!(elem.width, 4);
+    }
+
+    #[test]
+    fn element_width_mixed() {
+        // Mixed ASCII and CJK
+        let ctx = ClassifyContext::new();
+        let elem = classify_cluster(&cluster("[OK确认]"), &ctx).unwrap();
+        // [=1 + O=1 + K=1 + 确=2 + 认=2 + ]=1 = 8
+        assert_eq!(elem.width, 8);
+    }
+
+    #[test]
+    fn token_col_with_cjk_prefix_bracketed() {
+        // CJK characters before a bracketed token should offset by display width, not char count
+        let ctx = ClassifyContext::new();
+        // 确(width=2) + 认(width=2) = 4 columns before [OK]
+        let cluster = Cluster::new(0, 0, "确认[OK]".to_string(), CellStyle::default());
+        let elements = extract_elements_from_cluster(&cluster, &ctx);
+        assert_eq!(elements.len(), 1);
+        assert_eq!(elements[0].text, "[OK]");
+        assert_eq!(elements[0].col, 4); // Not 2 (char count)!
+    }
+
+    #[test]
+    fn token_col_with_cjk_prefix_underscore() {
+        // CJK characters before an underscore run should offset by display width
+        let ctx = ClassifyContext::new();
+        // 名(width=2) + 前(width=2) + :(width=1) = 5 columns before ____
+        let cluster = Cluster::new(0, 0, "名前:____".to_string(), CellStyle::default());
+        let elements = extract_elements_from_cluster(&cluster, &ctx);
+        assert_eq!(elements.len(), 1);
+        assert_eq!(elements[0].text, "____");
+        assert_eq!(elements[0].col, 5); // Not 3 (char count)!
+    }
+
+    #[test]
+    fn token_col_ascii_unchanged() {
+        // ASCII text should still work correctly (char count == display width)
+        let ctx = ClassifyContext::new();
+        let cluster = Cluster::new(0, 5, "Save [OK] Cancel".to_string(), CellStyle::default());
+        let elements = extract_elements_from_cluster(&cluster, &ctx);
+        assert_eq!(elements.len(), 1);
+        assert_eq!(elements[0].text, "[OK]");
+        assert_eq!(elements[0].col, 10); // 5 (cluster col) + 5 (offset of [OK])
+    }
+}
diff --git a/crates/pilotty-core/src/elements/grid.rs b/crates/pilotty-core/src/elements/grid.rs
new file mode 100644
index 0000000..6ef8f09
--- /dev/null
+++ b/crates/pilotty-core/src/elements/grid.rs
@@ -0,0 +1,149 @@
+//! Screen grid abstraction for element detection segmentation.
+//!
+//! Defines the `ScreenGrid` trait for uniform access to terminal screen content.
+
+use crate::elements::style::CellStyle;
+
+/// A single terminal cell with its character and visual style.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ScreenCell {
+    /// The character in this cell (space for empty cells).
+    pub ch: char,
+    /// Visual style attributes.
+    pub style: CellStyle,
+}
+
+impl ScreenCell {
+    /// Create a new screen cell.
+    #[must_use]
+    pub fn new(ch: char, style: CellStyle) -> Self {
+        Self { ch, style }
+    }
+}
+
+/// Trait for accessing terminal screen content.
+///
+/// This abstraction allows element detection to work with any terminal backend.
+/// Uses 0-based coordinates matching the cursor API convention.
+pub trait ScreenGrid {
+    /// Number of rows in the grid.
+    fn rows(&self) -> u16;
+
+    /// Number of columns in the grid.
+    fn cols(&self) -> u16;
+
+    /// Get cell at the given position. Returns `None` if out of bounds.
+    fn cell(&self, row: u16, col: u16) -> Option<ScreenCell>;
+}
+
+#[cfg(test)]
+pub(crate) mod test_support {
+    use super::*;
+
+    /// A simple in-memory grid for testing.
+    #[derive(Debug, Clone)]
+    pub struct SimpleGrid {
+        cells: Vec<ScreenCell>,
+        rows: u16,
+        cols: u16,
+    }
+
+    impl SimpleGrid {
+        /// Create a new grid filled with empty cells.
+        #[must_use]
+        pub fn new(rows: u16, cols: u16) -> Self {
+            let cell_count = rows as usize * cols as usize;
+            Self {
+                cells: vec![ScreenCell::new(' ', CellStyle::default()); cell_count],
+                rows,
+                cols,
+            }
+        }
+
+        /// Create a grid from text lines.
+        #[must_use]
+        pub fn from_text(lines: &[&str], cols: u16) -> Self {
+            let rows = lines.len() as u16;
+            let mut grid = Self::new(rows, cols);
+
+            for (row_idx, line) in lines.iter().enumerate() {
+                for (col_idx, ch) in line.chars().enumerate() {
+                    if col_idx < cols as usize {
+                        if let Some(idx) = grid.index(row_idx as u16, col_idx as u16) {
+                            grid.cells[idx] = ScreenCell::new(ch, CellStyle::default());
+                        }
+                    }
+                }
+            }
+
+            grid
+        }
+
+        /// Apply a style to a range of cells in a row.
+        pub fn style_range(&mut self, row: u16, start_col: u16, end_col: u16, style: CellStyle) {
+            for col in start_col..end_col {
+                if let Some(idx) = self.index(row, col) {
+                    self.cells[idx].style = style;
+                }
+            }
+        }
+
+        fn index(&self, row: u16, col: u16) -> Option<usize> {
+            if row < self.rows && col < self.cols {
+                Some(row as usize * self.cols as usize + col as usize)
+            } else {
+                None
+            }
+        }
+    }
+
+    impl ScreenGrid for SimpleGrid {
+        fn rows(&self) -> u16 {
+            self.rows
+        }
+
+        fn cols(&self) -> u16 {
+            self.cols
+        }
+
+        fn cell(&self, row: u16, col: u16) -> Option<ScreenCell> {
+            self.index(row, col).map(|i| self.cells[i].clone())
+        }
+    }
+}
+
+// Re-export for tests in other modules
+#[cfg(test)]
+pub(crate) use test_support::SimpleGrid;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn screen_cell_creation() {
+        let cell = ScreenCell::new('A', CellStyle::default());
+        assert_eq!(cell.ch, 'A');
+    }
+
+    #[test]
+    fn simple_grid_from_text() {
+        let grid = SimpleGrid::from_text(&["Hello", "World"], 10);
+        assert_eq!(grid.rows(), 2);
+        assert_eq!(grid.cols(), 10);
+        assert_eq!(grid.cell(0, 0).unwrap().ch, 'H');
+        assert_eq!(grid.cell(1, 0).unwrap().ch, 'W');
+    }
+
+    #[test]
+    fn simple_grid_style_range() {
+        let mut grid = SimpleGrid::from_text(&["[OK]"], 10);
+        let inverse = CellStyle::new().with_inverse(true);
+
+        grid.style_range(0, 0, 4, inverse);
+
+        assert!(grid.cell(0, 0).unwrap().style.inverse);
+        assert!(grid.cell(0, 3).unwrap().style.inverse);
+        assert!(!grid.cell(0, 4).unwrap().style.inverse);
+    }
+}
diff --git a/crates/pilotty-core/src/elements/mod.rs b/crates/pilotty-core/src/elements/mod.rs
new file mode 100644
index 0000000..4602451
--- /dev/null
+++ b/crates/pilotty-core/src/elements/mod.rs
@@ -0,0 +1,170 @@
+//! UI element detection types.
+//!
+//! This module provides types for detecting and classifying terminal UI elements.
+//! It uses a heuristic pipeline that segments the terminal buffer by visual
+//! style, then classifies segments into semantic kinds.
+//!
+//! # Element Kinds
+//!
+//! We use a simplified 3-kind model instead of many roles:
+//! - **Button**: Clickable elements (bracketed text, inverse video)
+//! - **Input**: Text entry fields (cursor position, underscore runs)
+//! - **Toggle**: Checkbox/radio elements with on/off state
+//!
+//! # Detection Rules (priority order)
+//!
+//! 1. Cursor position → Input (confidence: 1.0, focused: true)
+//! 2. Checkbox pattern `[x]`/`[ ]`/`☑`/`☐` → Toggle (confidence: 1.0)
+//! 3. Inverse video → Button (confidence: 1.0, focused: true)
+//! 4. Bracket pattern `[OK]`/`<Cancel>` → Button (confidence: 0.8)
+//! 5. Underscore field `____` → Input (confidence: 0.6)
+//!
+//! Non-interactive elements (links, progress bars, status text) are filtered out.
+//! They remain in `snapshot.text` for agents to read, not as elements.
+
+pub mod classify;
+pub mod grid;
+pub mod segment;
+pub mod style;
+
+use serde::{Deserialize, Serialize};
+
+/// Kind of interactive element.
+///
+/// Simplified from 11 roles to 3 kinds based on what agents actually need:
+/// - What kind is it? (button/input/toggle)
+/// - Is it focused?
+/// - What's the toggle state? (for toggles only)
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ElementKind {
+    /// Clickable element (buttons, menu items, tabs).
+    /// Detected via: inverse video, bracket patterns `[OK]`, `<Cancel>`.
+    Button,
+    /// Text entry field.
+    /// Detected via: cursor position, underscore runs `____`.
+    Input,
+    /// Checkbox or radio button with on/off state.
+    /// Detected via: `[x]`, `[ ]`, `☑`, `☐` patterns.
+    Toggle,
+}
+
+/// A detected interactive UI element.
+///
+/// # Coordinates
+///
+/// All coordinates are 0-based (row, col) to match cursor API.
+/// Height is always 1 in v1 (single-row elements only).
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Element {
+    /// Kind of interactive element.
+    pub kind: ElementKind,
+
+    /// Row index (0-based, from top).
+    pub row: u16,
+
+    /// Column index (0-based, from left).
+    pub col: u16,
+
+    /// Width in terminal cells.
+    pub width: u16,
+
+    /// Text content of the element.
+    pub text: String,
+
+    /// Detection confidence (0.0-1.0).
+    /// - 1.0: High confidence (cursor, inverse video, checkbox pattern)
+    /// - 0.8: Medium confidence (bracket pattern)
+    /// - 0.6: Low confidence (underscore run)
+    pub confidence: f32,
+
+    /// Whether this element currently has focus.
+    /// Orthogonal to kind, applies to any element type.
+    #[serde(default, skip_serializing_if = "is_false")]
+    pub focused: bool,
+
+    /// Checked state for Toggle kind (None for non-toggles).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub checked: Option<bool>,
+}
+
+/// Helper for serde skip_serializing_if.
+fn is_false(b: &bool) -> bool {
+    !*b
+}
+
+impl Element {
+    /// Create a new element.
+    #[must_use]
+    pub fn new(
+        kind: ElementKind,
+        row: u16,
+        col: u16,
+        width: u16,
+        text: String,
+        confidence: f32,
+    ) -> Self {
+        Self {
+            kind,
+            row,
+            col,
+            width,
+            text,
+            confidence,
+            focused: false,
+            checked: None,
+        }
+    }
+
+    /// Set checked state (for toggles).
+    #[must_use]
+    pub fn with_checked(mut self, checked: bool) -> Self {
+        self.checked = Some(checked);
+        self
+    }
+
+    /// Set focused state.
+    #[must_use]
+    pub fn with_focused(mut self, focused: bool) -> Self {
+        self.focused = focused;
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn element_kind_serializes_to_snake_case() {
+        assert_eq!(
+            serde_json::to_string(&ElementKind::Button).unwrap(),
+            "\"button\""
+        );
+        assert_eq!(
+            serde_json::to_string(&ElementKind::Toggle).unwrap(),
+            "\"toggle\""
+        );
+    }
+
+    #[test]
+    fn element_serialization_omits_optional_fields() {
+        let elem = Element::new(ElementKind::Button, 0, 0, 4, "OK".to_string(), 0.8);
+        let json = serde_json::to_string(&elem).unwrap();
+
+        // Buttons shouldn't have checked, unfocused elements shouldn't have focused
+        assert!(!json.contains("checked"));
+        assert!(!json.contains("focused"));
+    }
+
+    #[test]
+    fn element_serialization_includes_set_fields() {
+        let elem = Element::new(ElementKind::Toggle, 0, 0, 3, "[x]".to_string(), 1.0)
+            .with_checked(true)
+            .with_focused(true);
+        let json = serde_json::to_string(&elem).unwrap();
+
+        assert!(json.contains("\"checked\":true"));
+        assert!(json.contains("\"focused\":true"));
+    }
+}
diff --git a/crates/pilotty-core/src/elements/segment.rs b/crates/pilotty-core/src/elements/segment.rs
new file mode 100644
index 0000000..eae531d
--- /dev/null
+++ b/crates/pilotty-core/src/elements/segment.rs
@@ -0,0 +1,208 @@
+//! Segmentation: grouping adjacent cells by visual style.
+//!
+//! Scans the terminal grid row by row, grouping adjacent cells with identical
+//! visual styles into clusters for classification.
+
+use unicode_width::UnicodeWidthStr;
+
+use crate::elements::grid::ScreenGrid;
+use crate::elements::style::CellStyle;
+
+/// A cluster of adjacent cells with identical visual style.
+///
+/// Clusters are the intermediate representation between raw cells and
+/// classified elements. Each cluster spans a contiguous horizontal region
+/// of a single row.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Cluster {
+    /// Row index (0-based, from top).
+    pub row: u16,
+    /// Column index (0-based, from left).
+    pub col: u16,
+    /// Width in terminal cells.
+    pub width: u16,
+    /// Text content of the cluster.
+    pub text: String,
+    /// Visual style shared by all cells in this cluster.
+    pub style: CellStyle,
+}
+
+impl Cluster {
+    /// Create a new cluster.
+    #[must_use]
+    pub fn new(row: u16, col: u16, text: String, style: CellStyle) -> Self {
+        // Use unicode-width for proper terminal column alignment.
+        // CJK characters are width 2, zero-width chars are width 0.
+        let width = text.width().min(u16::MAX as usize) as u16;
+        Self {
+            row,
+            col,
+            width,
+            text,
+            style,
+        }
+    }
+
+    /// Check if this cluster contains only whitespace.
+    #[must_use]
+    pub fn is_whitespace_only(&self) -> bool {
+        self.text.chars().all(|c| c.is_whitespace())
+    }
+}
+
+/// Segment a single row into clusters.
+fn segment_row<G: ScreenGrid>(grid: &G, row: u16) -> Vec<Cluster> {
+    let mut clusters = Vec::new();
+
+    if row >= grid.rows() {
+        return clusters;
+    }
+
+    let mut current_text = String::new();
+    let mut current_style: Option<CellStyle> = None;
+    let mut start_col: u16 = 0;
+
+    for col in 0..grid.cols() {
+        let Some(cell) = grid.cell(row, col) else {
+            continue;
+        };
+
+        match current_style {
+            Some(ref style) if *style == cell.style => {
+                // Same style, extend current cluster
+                current_text.push(cell.ch);
+            }
+            _ => {
+                // Style changed or first cell, finalize previous cluster
+                if let Some(style) = current_style.take() {
+                    if !current_text.is_empty() {
+                        clusters.push(Cluster::new(
+                            row,
+                            start_col,
+                            std::mem::take(&mut current_text),
+                            style,
+                        ));
+                    }
+                }
+                // Start new cluster
+                start_col = col;
+                current_style = Some(cell.style);
+                current_text.push(cell.ch);
+            }
+        }
+    }
+
+    // Don't forget the last cluster
+    if let Some(style) = current_style {
+        if !current_text.is_empty() {
+            clusters.push(Cluster::new(row, start_col, current_text, style));
+        }
+    }
+
+    clusters
+}
+
+/// Segment an entire grid into clusters.
+fn segment_grid<G: ScreenGrid>(grid: &G) -> Vec<Cluster> {
+    let mut clusters = Vec::new();
+
+    for row in 0..grid.rows() {
+        clusters.extend(segment_row(grid, row));
+    }
+
+    clusters
+}
+
+/// Filter out whitespace-only clusters.
+fn filter_whitespace(clusters: Vec<Cluster>) -> Vec<Cluster> {
+    clusters
+        .into_iter()
+        .filter(|c| !c.is_whitespace_only())
+        .collect()
+}
+
+/// Segment a grid and filter whitespace in one step.
+///
+/// Convenience function that combines `segment_grid` and `filter_whitespace`.
+#[must_use]
+pub fn segment<G: ScreenGrid>(grid: &G) -> Vec<Cluster> {
+    filter_whitespace(segment_grid(grid))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::elements::grid::test_support::SimpleGrid;
+
+    #[test]
+    fn cluster_creation() {
+        let cluster = Cluster::new(5, 10, "Hello".to_string(), CellStyle::default());
+        assert_eq!(cluster.row, 5);
+        assert_eq!(cluster.col, 10);
+        assert_eq!(cluster.width, 5);
+        assert_eq!(cluster.text, "Hello");
+        assert!(!cluster.is_whitespace_only());
+    }
+
+    #[test]
+    fn segment_splits_by_style() {
+        let mut grid = SimpleGrid::from_text(&["AABBBCC"], 7);
+        let bold = CellStyle::new().with_bold(true);
+        let inverse = CellStyle::new().with_inverse(true);
+
+        grid.style_range(0, 2, 5, bold);
+        grid.style_range(0, 5, 7, inverse);
+
+        let clusters = segment_row(&grid, 0);
+
+        assert_eq!(clusters.len(), 3);
+        assert_eq!(clusters[0].text, "AA");
+        assert_eq!(clusters[0].col, 0);
+        assert_eq!(clusters[1].text, "BBB");
+        assert!(clusters[1].style.bold);
+        assert_eq!(clusters[2].text, "CC");
+        assert!(clusters[2].style.inverse);
+    }
+
+    #[test]
+    fn segment_filters_whitespace() {
+        let mut grid = SimpleGrid::from_text(&["[OK]     [Cancel]"], 20);
+        let inverse = CellStyle::new().with_inverse(true);
+
+        grid.style_range(0, 0, 4, inverse);
+        grid.style_range(0, 9, 17, inverse);
+
+        let clusters = segment(&grid);
+
+        assert!(clusters.iter().all(|c| !c.is_whitespace_only()));
+        let texts: Vec<&str> = clusters.iter().map(|c| c.text.as_str()).collect();
+        assert!(texts.contains(&"[OK]"));
+        assert!(texts.contains(&"[Cancel]"));
+    }
+
+    // ========================================================================
+    // Unicode Width Tests
+    // ========================================================================
+
+    #[test]
+    fn cluster_width_cjk() {
+        // CJK characters should have width 2 each
+        let cluster = Cluster::new(0, 0, "你好".to_string(), CellStyle::default());
+        assert_eq!(cluster.width, 4); // 2 + 2 = 4
+    }
+
+    #[test]
+    fn cluster_width_ascii() {
+        // ASCII characters should have width 1 each
+        let cluster = Cluster::new(0, 0, "Hello".to_string(), CellStyle::default());
+        assert_eq!(cluster.width, 5);
+    }
+
+    #[test]
+    fn cluster_width_mixed() {
+        // Mixed ASCII and CJK
+        let cluster = Cluster::new(0, 0, "Hi你好".to_string(), CellStyle::default());
+        // H=1 + i=1 + 你=2 + 好=2 = 6
+        assert_eq!(cluster.width, 6);
+    }
+}
diff --git a/crates/pilotty-core/src/elements/style.rs b/crates/pilotty-core/src/elements/style.rs
new file mode 100644
index 0000000..a600a5d
--- /dev/null
+++ b/crates/pilotty-core/src/elements/style.rs
@@ -0,0 +1,126 @@
+//! Visual style types for element detection segmentation.
+//!
+//! These types represent cell styling independent of the vt100 crate,
+//! allowing the core element detection types to remain vt100-agnostic.
+
+use serde::{Deserialize, Serialize};
+
+/// Terminal color representation.
+///
+/// Maps to standard terminal color modes:
+/// - Default: terminal's default foreground/background
+/// - Indexed: 256-color palette (0-255)
+/// - Rgb: 24-bit true color
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case", tag = "type")]
+pub enum Color {
+    /// Terminal default color.
+    #[default]
+    Default,
+    /// 256-color palette index (0-255).
+    Indexed { index: u8 },
+    /// 24-bit RGB color.
+    Rgb { r: u8, g: u8, b: u8 },
+}
+
+impl Color {
+    /// Create an indexed color.
+    #[must_use]
+    pub fn indexed(index: u8) -> Self {
+        Self::Indexed { index }
+    }
+
+    /// Create an RGB color.
+    #[must_use]
+    pub fn rgb(r: u8, g: u8, b: u8) -> Self {
+        Self::Rgb { r, g, b }
+    }
+}
+
+/// Visual style attributes for a terminal cell.
+///
+/// Used for segmentation: adjacent cells with identical styles are grouped
+/// into clusters.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
+pub struct CellStyle {
+    /// Bold text attribute.
+    pub bold: bool,
+    /// Underlined text attribute.
+    pub underline: bool,
+    /// Inverse video (swapped fg/bg).
+    pub inverse: bool,
+    /// Foreground color.
+    pub fg_color: Color,
+    /// Background color.
+    pub bg_color: Color,
+}
+
+impl CellStyle {
+    /// Create a new cell style with default values.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set bold attribute.
+    #[must_use]
+    pub fn with_bold(mut self, bold: bool) -> Self {
+        self.bold = bold;
+        self
+    }
+
+    /// Set underline attribute.
+    #[must_use]
+    pub fn with_underline(mut self, underline: bool) -> Self {
+        self.underline = underline;
+        self
+    }
+
+    /// Set inverse attribute.
+    #[must_use]
+    pub fn with_inverse(mut self, inverse: bool) -> Self {
+        self.inverse = inverse;
+        self
+    }
+
+    /// Set foreground color.
+    #[must_use]
+    pub fn with_fg(mut self, color: Color) -> Self {
+        self.fg_color = color;
+        self
+    }
+
+    /// Set background color.
+    #[must_use]
+    pub fn with_bg(mut self, color: Color) -> Self {
+        self.bg_color = color;
+        self
+    }
+
+    /// Check if this style uses inverse video.
+    ///
+    /// Inverse video is a strong signal for selected menu items and tabs.
+    #[must_use]
+    pub fn is_inverse(&self) -> bool {
+        self.inverse
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cell_style_default() {
+        let style = CellStyle::default();
+        assert!(!style.bold);
+        assert!(!style.inverse);
+        assert_eq!(style.fg_color, Color::Default);
+    }
+
+    #[test]
+    fn is_inverse_helper() {
+        assert!(!CellStyle::new().is_inverse());
+        assert!(CellStyle::new().with_inverse(true).is_inverse());
+    }
+}
diff --git a/crates/pilotty-core/src/lib.rs b/crates/pilotty-core/src/lib.rs
index f8b3c45..6c98556 100644
--- a/crates/pilotty-core/src/lib.rs
+++ b/crates/pilotty-core/src/lib.rs
@@ -1,8 +1,31 @@
 //! Core types and logic for pilotty.
 //!
-//! This crate provides the shared data structures and algorithms used by both
-//! the CLI/daemon and the MCP server.
+//! This crate provides shared data structures and algorithms for AI-driven
+//! terminal automation. It's used by both the CLI/daemon and MCP server.
+//!
+//! # Modules
+//!
+//! - [`error`]: API error types with actionable suggestions for AI consumers
+//! - [`input`]: Terminal input encoding (keys, mouse, modifiers)
+//! - [`protocol`]: JSON-line request/response protocol
+//! - [`snapshot`]: Screen state capture and change detection
+//! - [`elements`]: UI element detection
+//!
+//! # Element Detection
+//!
+//! pilotty detects interactive UI elements using a simplified 3-kind model
+//! optimized for AI agents:
+//!
+//! | Kind | Detection | Confidence |
+//! |------|-----------|------------|
+//! | **Button** | Inverse video, `[OK]`, `<Cancel>` | 1.0 / 0.8 |
+//! | **Input** | Cursor position, `____` underscores | 1.0 / 0.6 |
+//! | **Toggle** | `[x]`, `[ ]`, `☑`, `☐` | 1.0 |
+//!
+//! Elements include row/col coordinates for use with the click command.
+//! The `content_hash` field enables efficient change detection.
 
+pub mod elements;
 pub mod error;
 pub mod input;
 pub mod protocol;
diff --git a/crates/pilotty-core/src/protocol.rs b/crates/pilotty-core/src/protocol.rs
index c6eac27..42154ea 100644
--- a/crates/pilotty-core/src/protocol.rs
+++ b/crates/pilotty-core/src/protocol.rs
@@ -79,12 +79,12 @@ pub enum Command {
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub enum SnapshotFormat {
-    /// Full JSON with all metadata.
+    /// Full JSON with all metadata including text and elements.
     #[default]
     Full,
-    /// Compact format with inline refs.
+    /// Compact format: omits text and elements, just metadata.
     Compact,
-    /// Plain text only.
+    /// Plain text only (no JSON structure).
     Text,
 }
 
@@ -97,7 +97,7 @@ pub enum ScrollDirection {
 }
 
 /// A response from daemon to CLI.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Response {
     pub id: String,
     pub success: bool,
@@ -128,7 +128,7 @@ impl Response {
 }
 
 /// Response payload variants.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ResponseData {
     /// Full screen state snapshot.
diff --git a/crates/pilotty-core/src/snapshot.rs b/crates/pilotty-core/src/snapshot.rs
index bf8c884..9c0cbc0 100644
--- a/crates/pilotty-core/src/snapshot.rs
+++ b/crates/pilotty-core/src/snapshot.rs
@@ -1,7 +1,32 @@
-//! Screen state types.
+//! Screen state capture and change detection.
+//!
+//! This module provides types for capturing terminal screen state, including
+//! text content, cursor position, and detected UI elements.
+//!
+//! # Snapshot Formats
+//!
+//! The daemon supports two snapshot formats:
+//!
+//! | Format | Content | Use Case |
+//! |--------|---------|----------|
+//! | **Full** | text + elements + hash | Complete state for new screens |
+//! | **Compact** | metadata only | Quick status checks |
+//!
+//! # Change Detection
+//!
+//! The `content_hash` field provides efficient change detection. Agents can
+//! compare hashes across snapshots without parsing the full element list:
+//!
+//! ```ignore
+//! if new_snapshot.content_hash != old_snapshot.content_hash {
+//!     // Screen changed, re-analyze elements
+//! }
+//! ```
 
 use serde::{Deserialize, Serialize};
 
+use crate::elements::Element;
+
 /// Terminal dimensions.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub struct TerminalSize {
@@ -18,7 +43,7 @@ pub struct CursorState {
 }
 
 /// Complete screen state snapshot.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct ScreenState {
     pub snapshot_id: u64,
     pub size: TerminalSize,
@@ -26,6 +51,21 @@ pub struct ScreenState {
     /// Plain text content of the screen.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub text: Option<String>,
+    /// Detected interactive UI elements.
+    ///
+    /// Elements are detected using visual style segmentation and pattern
+    /// classification. Each element includes its position (row, col) for
+    /// interaction via the click command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub elements: Option<Vec<Element>>,
+    /// Hash of screen content for change detection.
+    ///
+    /// Computed from the screen text using a fast non-cryptographic hash.
+    /// Present when `elements` is requested (`with_elements=true`).
+    /// Agents can compare hashes across snapshots to detect screen changes
+    /// without parsing the full element list.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content_hash: Option<u64>,
 }
 
 impl ScreenState {
@@ -39,6 +79,71 @@ impl ScreenState {
                 visible: true,
             },
             text: None,
+            elements: None,
+            content_hash: None,
         }
     }
 }
+
+/// Compute a content hash from screen text.
+///
+/// Uses FNV-1a, a fast non-cryptographic hash suitable for change detection.
+#[must_use]
+pub fn compute_content_hash(text: &str) -> u64 {
+    // FNV-1a parameters for 64-bit
+    const FNV_OFFSET: u64 = 0xcbf29ce484222325;
+    const FNV_PRIME: u64 = 0x00000100000001B3;
+
+    let mut hash = FNV_OFFSET;
+    for byte in text.bytes() {
+        hash ^= u64::from(byte);
+        hash = hash.wrapping_mul(FNV_PRIME);
+    }
+    hash
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn content_hash_deterministic() {
+        let text = "Hello, World!";
+        let hash1 = compute_content_hash(text);
+        let hash2 = compute_content_hash(text);
+        assert_eq!(hash1, hash2);
+    }
+
+    #[test]
+    fn content_hash_differs_for_different_text() {
+        let hash1 = compute_content_hash("Hello");
+        let hash2 = compute_content_hash("World");
+        assert_ne!(hash1, hash2);
+    }
+
+    #[test]
+    fn content_hash_empty_string() {
+        // Empty string should return the FNV-1a offset basis
+        let hash = compute_content_hash("");
+        assert_eq!(hash, 0xcbf29ce484222325);
+    }
+
+    #[test]
+    fn content_hash_single_char_difference() {
+        // Even a single character difference should produce different hashes
+        let hash1 = compute_content_hash("test");
+        let hash2 = compute_content_hash("tess");
+        assert_ne!(hash1, hash2);
+    }
+
+    #[test]
+    fn content_hash_unicode() {
+        // Unicode text should hash consistently
+        let text = "日本語テスト 🚀";
+        let hash1 = compute_content_hash(text);
+        let hash2 = compute_content_hash(text);
+        assert_eq!(hash1, hash2);
+        // Should differ from ASCII
+        assert_ne!(hash1, compute_content_hash("ascii"));
+    }
+}
diff --git a/npm/README.md b/npm/README.md
index 32395ce..a4a28b2 100644
--- a/npm/README.md
+++ b/npm/README.md
@@ -1,25 +1,26 @@
 <p align="center">
-  <img src="https://raw.githubusercontent.com/msmps/pilotty/main/assets/pilotty.png" alt="pilotty logo" width="400">
+  <img src="https://raw.githubusercontent.com/msmps/pilotty/main/assets/pilotty.png" alt="pilotty - Terminal automation CLI enabling AI agents to control TUI applications" width="400">
 </p>
 
 <h1 align="center">pilotty</h1>
 
 <p align="center">
-  <strong>Terminal automation CLI for AI agents</strong><br>
-  <em>Like <a href="https://github.com/vercel-labs/agent-browser">agent-browser</a>, but for TUI applications.</em>
+  <sub>The terminal equivalent of <a href="https://github.com/vercel-labs/agent-browser">agent-browser</a></sub>
 </p>
 
----
+<p align="center">
+  <strong>Terminal automation CLI for AI agents</strong><br>
+  <em>Control vim, htop, lazygit, dialog, and any TUI programmatically</em>
+</p>
 
-pilotty enables AI agents to interact with terminal applications (vim, htop, lazygit, dialog, etc.) through a simple CLI interface. It manages PTY sessions, captures terminal output, and provides keyboard/mouse input capabilities for navigating TUI applications.
+<p align="center">
+  <a href="https://www.npmjs.com/package/pilotty"><img alt="npm version" src="https://img.shields.io/npm/v/pilotty"></a>
+  <a href="https://github.com/msmps/pilotty/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/badge/license-MIT-blue"></a>
+</p>
 
-## Features
+---
 
-- **PTY Management**: Spawn and manage terminal applications in background sessions
-- **Keyboard Navigation**: Interact with TUIs using Tab, Enter, arrow keys, and key combos
-- **AI-Friendly Output**: Clean JSON responses with actionable suggestions on errors
-- **Multi-Session**: Run multiple terminal apps simultaneously in isolated sessions
-- **Zero Config**: Daemon auto-starts on first command, auto-stops after 5 minutes idle
+pilotty enables AI agents to interact with terminal applications through a simple command-line interface. It manages pseudo-terminal (PTY) sessions with full VT100 terminal emulation, captures screen state, and provides keyboard/mouse input for navigating terminal user interfaces.
 
 ## Installation
 
@@ -83,6 +84,17 @@ The `snapshot` command returns structured data about the terminal screen:
 
 Use the cursor position and text content to understand the screen state and navigate using keyboard commands (Tab, Enter, arrow keys) or click at specific coordinates.
 
+## Documentation
+
+See the **[GitHub repository](https://github.com/msmps/pilotty)** for full documentation including:
+
+- All commands reference
+- Session management
+- Key combinations
+- UI element detection
+- AI agent workflow examples
+- Daemon architecture
+
 ## Building from Source
 
 ```bash
@@ -94,10 +106,6 @@ cargo build --release
 
 Requires [Rust](https://rustup.rs) 1.70+.
 
-## Documentation
-
-See the [GitHub repository](https://github.com/msmps/pilotty) for full documentation including all commands, key combinations, and AI agent workflow examples.
-
 ## License
 
 MIT
diff --git a/skills/pilotty/SKILL.md b/skills/pilotty/SKILL.md
index de357f4..c9efb74 100644
--- a/skills/pilotty/SKILL.md
+++ b/skills/pilotty/SKILL.md
@@ -30,7 +30,7 @@ This is the #1 cause of agent failures. When in doubt: **flags first, then comma
 ```bash
 pilotty spawn vim file.txt        # Start TUI app in managed session
 pilotty wait-for "file.txt"       # Wait for app to be ready
-pilotty snapshot                  # Get screen state with cursor position
+pilotty snapshot                  # Get screen state with UI elements
 pilotty key i                     # Enter insert mode
 pilotty type "Hello, World!"      # Type text
 pilotty key Escape                # Exit insert mode
@@ -41,9 +41,10 @@ pilotty kill                      # End session
 
 1. **Spawn**: `pilotty spawn <command>` starts the app in a background PTY
 2. **Wait**: `pilotty wait-for <text>` ensures the app is ready
-3. **Snapshot**: `pilotty snapshot` returns screen state with text content and cursor position
-4. **Interact**: Use keyboard commands (`key`, `type`) or click at coordinates (`click <row> <col>`)
-5. **Re-snapshot**: After screen changes, snapshot again to see updated state
+3. **Snapshot**: `pilotty snapshot` returns screen state with detected UI elements
+4. **Understand**: Parse `elements[]` to identify buttons, inputs, toggles
+5. **Interact**: Use keyboard commands (`key`, `type`) to navigate and interact
+6. **Re-snapshot**: Check `content_hash` to detect screen changes
 
 ## Commands
 
@@ -56,14 +57,14 @@ pilotty kill                      # Kill default session
 pilotty kill -s myapp             # Kill specific session
 pilotty list-sessions             # List all active sessions
 pilotty daemon                    # Manually start daemon (usually auto-starts)
-pilotty stop                      # Stop daemon and all sessions
+pilotty shutdown                  # Stop daemon and all sessions
 pilotty examples                  # Show end-to-end workflow example
 ```
 
 ### Screen capture
 
 ```bash
-pilotty snapshot                  # Full JSON with text content
+pilotty snapshot                  # Full JSON with text content and elements
 pilotty snapshot --format compact # JSON without text field
 pilotty snapshot --format text    # Plain text with cursor indicator
 pilotty snapshot -s myapp         # Snapshot specific session
@@ -125,16 +126,23 @@ PILOTTY_SOCKET_DIR="/tmp/pilotty" # Override socket directory
 RUST_LOG="debug"                  # Enable debug logging
 ```
 
-## Snapshot output
+## Snapshot Output
 
-The `snapshot` command returns structured JSON:
+The `snapshot` command returns structured JSON with detected UI elements:
 
 ```json
 {
   "snapshot_id": 42,
   "size": { "cols": 80, "rows": 24 },
   "cursor": { "row": 5, "col": 10, "visible": true },
-  "text": "... plain text content ..."
+  "text": "Settings:\n  [x] Notifications  [ ] Dark mode\n  [Save]  [Cancel]",
+  "elements": [
+    { "kind": "toggle", "row": 1, "col": 2, "width": 3, "text": "[x]", "confidence": 1.0, "checked": true },
+    { "kind": "toggle", "row": 1, "col": 20, "width": 3, "text": "[ ]", "confidence": 1.0, "checked": false },
+    { "kind": "button", "row": 2, "col": 2, "width": 6, "text": "[Save]", "confidence": 0.8 },
+    { "kind": "button", "row": 2, "col": 10, "width": 8, "text": "[Cancel]", "confidence": 0.8 }
+  ],
+  "content_hash": 12345678901234567890
 }
 ```
 
@@ -147,7 +155,85 @@ bash-3.2$ [_]
 
 The `[_]` shows cursor position. Use the text content to understand screen state and navigate with keyboard commands.
 
-## Navigation approach
+---
+
+## Element Detection
+
+pilotty automatically detects interactive UI elements in terminal applications. Elements provide **read-only context** to help understand UI structure.
+
+### Element Kinds
+
+| Kind | Detection Patterns | Confidence | Fields |
+|------|-------------------|------------|--------|
+| **toggle** | `[x]`, `[ ]`, `[*]`, `☑`, `☐` | 1.0 | `checked: bool` |
+| **button** | Inverse video, `[OK]`, `<Cancel>`, `(Submit)` | 1.0 / 0.8 | `focused: bool` (if true) |
+| **input** | Cursor position, `____` underscores | 1.0 / 0.6 | `focused: bool` (if true) |
+
+### Element Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `kind` | string | Element type: `button`, `input`, or `toggle` |
+| `row` | number | Row position (0-based from top) |
+| `col` | number | Column position (0-based from left) |
+| `width` | number | Width in terminal cells (CJK chars = 2) |
+| `text` | string | Text content of the element |
+| `confidence` | number | Detection confidence (0.0-1.0) |
+| `focused` | bool | Whether element has focus (only present if true) |
+| `checked` | bool | Toggle state (only present for toggles) |
+
+### Confidence Levels
+
+| Confidence | Meaning |
+|------------|---------|
+| **1.0** | High confidence: Cursor position, inverse video, checkbox patterns |
+| **0.8** | Medium confidence: Bracket patterns `[OK]`, `<Cancel>` |
+| **0.6** | Lower confidence: Underscore input fields `____` |
+
+### Change Detection
+
+The `content_hash` field enables efficient screen change detection:
+
+```bash
+# Get initial state
+SNAP1=$(pilotty snapshot)
+HASH1=$(echo "$SNAP1" | jq -r '.content_hash')
+
+# Perform action
+pilotty key Tab
+
+# Check if screen changed
+SNAP2=$(pilotty snapshot)
+HASH2=$(echo "$SNAP2" | jq -r '.content_hash')
+
+if [ "$HASH1" != "$HASH2" ]; then
+  echo "Screen changed - re-analyze elements"
+fi
+```
+
+### Using Elements Effectively
+
+Elements are **read-only context** for understanding the UI. Use **keyboard navigation** for reliable interaction:
+
+```bash
+# 1. Get snapshot to understand UI structure
+pilotty snapshot | jq '.elements'
+# Output shows toggles (checked/unchecked) and buttons with positions
+
+# 2. Navigate and interact with keyboard (reliable approach)
+pilotty key Tab          # Move to next element
+pilotty key Space        # Toggle checkbox
+pilotty key Enter        # Activate button
+
+# 3. Verify state changed
+pilotty snapshot | jq '.elements[] | select(.kind == "toggle")'
+```
+
+**Key insight**: Use elements to understand WHAT is on screen, use keyboard to interact with it.
+
+---
+
+## Navigation Approach
 
 pilotty uses keyboard-first navigation, just like a human would:
 
@@ -160,6 +246,7 @@ pilotty key Tab           # Move to next element
 pilotty key Enter         # Activate/select
 pilotty key Escape        # Cancel/back
 pilotty key Up            # Move up in list/menu
+pilotty key Space         # Toggle checkbox
 
 # 3. Type text when needed
 pilotty type "search term"
@@ -169,7 +256,9 @@ pilotty key Enter
 pilotty click 5 10        # Click at row 5, col 10
 ```
 
-**Key insight**: Parse the snapshot text to understand what's on screen, then use keyboard commands to navigate. This works reliably across all TUI applications.
+**Key insight**: Parse the snapshot text and elements to understand what's on screen, then use keyboard commands to navigate. This works reliably across all TUI applications.
+
+---
 
 ## Example: Edit file with vim
 
@@ -197,22 +286,64 @@ pilotty key -s editor Enter
 pilotty list-sessions
 ```
 
-## Example: Dialog interaction
+## Example: Dialog checklist interaction
 
 ```bash
-# 1. Spawn dialog (--name before command)
-pilotty spawn --name dialog dialog --yesno "Continue?" 10 40
+# 1. Spawn dialog checklist (--name before command)
+pilotty spawn --name opts dialog --checklist "Select features:" 12 50 4 \
+    "notifications" "Push notifications" on \
+    "darkmode" "Dark mode theme" off \
+    "autosave" "Auto-save documents" on \
+    "telemetry" "Usage analytics" off
+
+# 2. Wait for dialog to render
+sleep 0.5
 
-# 2. Get snapshot to see the dialog
-pilotty snapshot -s dialog --format text
-# Shows: < Yes > and < No > buttons
+# 3. Get snapshot and examine elements
+pilotty snapshot -s opts | jq '.elements[] | select(.kind == "toggle")'
+# Shows toggle elements with checked state and positions
 
-# 3. Navigate with keyboard
-pilotty key -s dialog Tab      # Move to next button
-pilotty key -s dialog Enter    # Activate selected button
+# 4. Navigate to "darkmode" and toggle it
+pilotty key -s opts Down      # Move to second option
+pilotty key -s opts Space     # Toggle it on
 
-# Or click at coordinates if you know the button position
-pilotty click -s dialog 8 15   # Click at row 8, col 15
+# 5. Verify the change
+pilotty snapshot -s opts | jq '.elements[] | select(.kind == "toggle") | {text, checked}'
+
+# 6. Confirm selection
+pilotty key -s opts Enter
+
+# 7. Clean up
+pilotty kill -s opts
+```
+
+## Example: Form filling with elements
+
+```bash
+# 1. Spawn a form application
+pilotty spawn --name form my-form-app
+
+# 2. Get snapshot to understand form structure
+pilotty snapshot -s form | jq '.elements'
+# Shows inputs, toggles, and buttons with positions for click command
+
+# 3. Tab to first input (likely already focused)
+pilotty type -s form "myusername"
+
+# 4. Tab to password field
+pilotty key -s form Tab
+pilotty type -s form "mypassword"
+
+# 5. Tab to remember me and toggle
+pilotty key -s form Tab
+pilotty key -s form Space
+
+# 6. Tab to Login and activate
+pilotty key -s form Tab
+pilotty key -s form Enter
+
+# 7. Check result
+pilotty snapshot -s form --format text
 ```
 
 ## Example: Monitor with htop
@@ -235,6 +366,8 @@ pilotty key -s monitor q     # Quit
 pilotty kill -s monitor
 ```
 
+---
+
 ## Sessions
 
 Each session is isolated with its own:
@@ -262,7 +395,7 @@ The first session spawned without `--name` is automatically named `default`.
 
 > **Important:** The `--name` flag must come **before** the command. Everything after the command is passed as arguments to that command.
 
-## Daemon architecture
+## Daemon Architecture
 
 pilotty uses a background daemon for session management:
 
@@ -273,7 +406,7 @@ pilotty uses a background daemon for session management:
 
 You rarely need to manage the daemon manually.
 
-## Error handling
+## Error Handling
 
 Errors include actionable suggestions:
 
@@ -293,7 +426,9 @@ Errors include actionable suggestions:
 }
 ```
 
-## Common patterns
+---
+
+## Common Patterns
 
 ### Wait then act
 
@@ -310,6 +445,16 @@ pilotty snapshot --format text | grep "Error"  # Check for errors
 pilotty key Enter                               # Then proceed
 ```
 
+### Check for specific element
+
+```bash
+# Check if the first toggle is checked
+pilotty snapshot | jq '.elements[] | select(.kind == "toggle") | {text, checked}' | head -1
+
+# Find element at specific position
+pilotty snapshot | jq '.elements[] | select(.row == 5 and .col == 10)'
+```
+
 ### Retry on timeout
 
 ```bash
@@ -319,7 +464,9 @@ pilotty wait-for "Ready" -t 5000 || {
 }
 ```
 
-## Deep-dive documentation
+---
+
+## Deep-dive Documentation
 
 For detailed patterns and edge cases, see:
 
@@ -327,8 +474,9 @@ For detailed patterns and edge cases, see:
 |-----------|-------------|
 | [references/session-management.md](references/session-management.md) | Multi-session patterns, isolation, cleanup |
 | [references/key-input.md](references/key-input.md) | Complete key combinations reference |
+| [references/element-detection.md](references/element-detection.md) | Detection rules, confidence, patterns |
 
-## Ready-to-use templates
+## Ready-to-use Templates
 
 Executable workflow scripts:
 
@@ -337,10 +485,12 @@ Executable workflow scripts:
 | [templates/vim-workflow.sh](templates/vim-workflow.sh) | Edit file with vim, save, exit |
 | [templates/dialog-interaction.sh](templates/dialog-interaction.sh) | Handle dialog/whiptail prompts |
 | [templates/multi-session.sh](templates/multi-session.sh) | Parallel TUI orchestration |
+| [templates/element-detection.sh](templates/element-detection.sh) | Element detection demo |
 
 Usage:
 ```bash
 ./templates/vim-workflow.sh /tmp/myfile.txt "File content here"
 ./templates/dialog-interaction.sh
 ./templates/multi-session.sh
+./templates/element-detection.sh
 ```
diff --git a/skills/pilotty/references/element-detection.md b/skills/pilotty/references/element-detection.md
new file mode 100644
index 0000000..15080dc
--- /dev/null
+++ b/skills/pilotty/references/element-detection.md
@@ -0,0 +1,280 @@
+# Element Detection
+
+pilotty automatically detects interactive UI elements in terminal applications. Elements provide **read-only context** to help agents understand UI structure.
+
+## Overview
+
+pilotty analyzes terminal screen content and detects:
+- **Toggles**: Checkboxes like `[x]`, `[ ]`, `[*]`, `☑`, `☐`
+- **Buttons**: Action elements like `[OK]`, `<Cancel>`, `(Submit)`
+- **Inputs**: Text fields marked by underscores `____` or cursor position
+
+Each detected element includes:
+- Kind, position (row, col), width, text content
+- Confidence score (0.0-1.0)
+- State information (checked for toggles, focused for inputs/buttons)
+
+## Detection Rules
+
+### Priority Order (Highest to Lowest)
+
+1. **Cursor Position** - Input (confidence: 1.0, focused: true)
+2. **Checkbox Patterns** - Toggle (confidence: 1.0)
+3. **Inverse Video** - Button (confidence: 1.0, focused: true)
+4. **Bracket Patterns** - Button (confidence: 0.8)
+5. **Underscore Fields** - Input (confidence: 0.6)
+
+### Toggle Detection
+
+Toggles are detected from checkbox patterns:
+
+| Pattern | State | Notes |
+|---------|-------|-------|
+| `[x]`, `[X]` | checked: true | Standard checked |
+| `[ ]` | checked: false | Standard unchecked |
+| `[*]` | checked: true | Dialog/ncurses style |
+| `☑`, `✓`, `✔`, `☒` | checked: true | Unicode checkmarks |
+| `☐`, `□` | checked: false | Unicode unchecked |
+
+Example detection:
+```json
+{
+  "kind": "toggle",
+  "row": 5,
+  "col": 2,
+  "width": 3,
+  "text": "[x]",
+  "confidence": 1.0,
+  "checked": true
+}
+```
+
+### Button Detection
+
+Buttons are detected from:
+
+1. **Inverse video** (highest confidence)
+   - Text with reversed foreground/background colors
+   - Common in dialog, whiptail, and ncurses apps
+   - Confidence: 1.0, focused: true
+
+2. **Bracket patterns** (medium confidence)
+   - Square brackets: `[OK]`, `[Cancel]`, `[Save]`
+   - Angle brackets: `<Yes>`, `<No>`
+   - Parentheses: `(Submit)`, `(Reset)`
+   - Confidence: 0.8
+
+Example detection:
+```json
+{
+  "kind": "button",
+  "row": 10,
+  "col": 5,
+  "width": 6,
+  "text": "[Save]",
+  "confidence": 0.8
+}
+```
+
+### Input Detection
+
+Inputs are detected from:
+
+1. **Cursor position** (highest confidence)
+   - The cell where the cursor is located
+   - Confidence: 1.0, focused: true
+
+2. **Underscore runs** (lower confidence)
+   - 3+ consecutive underscores: `___`, `__________`
+   - Common in form-style TUIs
+   - Confidence: 0.6
+
+Example detection:
+```json
+{
+  "kind": "input",
+  "row": 8,
+  "col": 12,
+  "width": 10,
+  "text": "__________",
+  "confidence": 0.6
+}
+```
+
+## Non-Interactive Patterns (Filtered)
+
+The following patterns are recognized but NOT returned as interactive elements:
+
+| Pattern | Why Filtered |
+|---------|--------------|
+| `http://`, `https://` | Links are not clickable in most TUIs |
+| `[====]`, `[####]` | Progress bars |
+| `[ERROR]`, `[WARNING]`, `[INFO]` | Status indicators |
+| `[1]`, `[2]`, `1)`, `a)` | Menu prefixes |
+| `├`, `┤`, `│`, `┌`, `┐` | Box-drawing characters |
+
+## Element Fields Reference
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `kind` | string | Yes | `button`, `input`, or `toggle` |
+| `row` | number | Yes | Row position (0-based from top) |
+| `col` | number | Yes | Column position (0-based from left) |
+| `width` | number | Yes | Width in terminal cells |
+| `text` | string | Yes | Element text content |
+| `confidence` | number | Yes | Detection confidence (0.0-1.0) |
+| `focused` | bool | No | Present and true if element has focus |
+| `checked` | bool | No | Present for toggles only |
+
+### Width Calculation
+
+Element width uses Unicode display width:
+- ASCII characters: width 1
+- CJK characters (Chinese, Japanese, Korean): width 2
+- Emoji: width 2
+- Zero-width characters: width 0
+
+This matches terminal column alignment.
+
+## Content Hash
+
+Each snapshot includes a `content_hash` field for change detection:
+
+```json
+{
+  "content_hash": 12345678901234567890,
+  ...
+}
+```
+
+The hash is computed from the visible screen text content. Use it to:
+- Detect if the screen changed between snapshots
+- Avoid re-processing unchanged screens
+
+```bash
+HASH1=$(pilotty snapshot | jq -r '.content_hash')
+pilotty key Tab
+HASH2=$(pilotty snapshot | jq -r '.content_hash')
+[ "$HASH1" != "$HASH2" ] && echo "Screen changed"
+```
+
+## Best Practices
+
+### 1. Elements for Understanding, Keyboard for Interaction
+
+Elements tell you WHAT is on screen. Use keyboard to interact:
+
+```bash
+# See what's on screen
+pilotty snapshot | jq '.elements[] | {kind, text, row, col, checked}'
+
+# Navigate with keyboard
+pilotty key Tab    # Move between elements
+pilotty key Space  # Toggle checkboxes
+pilotty key Enter  # Activate buttons
+```
+
+### 2. Check Confidence Levels
+
+Higher confidence means more reliable detection:
+
+```bash
+# Filter to high-confidence elements only
+pilotty snapshot | jq '.elements[] | select(.confidence >= 0.8)'
+```
+
+### 3. Find Elements by Content or Position
+
+```bash
+# Find element by text content
+pilotty snapshot | jq '.elements[] | select(.text | contains("Save"))'
+
+# Find element at specific position
+pilotty snapshot | jq '.elements[] | select(.row == 5 and .col == 10)'
+
+# Get first toggle
+pilotty snapshot | jq '[.elements[] | select(.kind == "toggle")][0]'
+```
+
+## Limitations
+
+### What Detection Does NOT Find
+
+1. **Menu items without markers** - Plain text menus need keyboard navigation
+2. **Custom widgets** - Non-standard UI patterns may not be recognized
+3. **Color-only highlighting** - Elements must have text patterns or inverse video
+4. **Disabled elements** - No distinction between enabled/disabled
+
+### What Detection Cannot Do
+
+1. **Click elements directly by name** - Use row/col with click command
+2. **Track elements across screens** - Elements may move; use text content to re-find
+
+## Troubleshooting
+
+### No Elements Detected
+
+1. Check if the app uses standard patterns:
+   ```bash
+   pilotty snapshot --format text  # View raw screen
+   ```
+
+2. Look for inverse video (may show elements on button/input):
+   ```bash
+   pilotty snapshot | jq '.elements[] | select(.confidence == 1.0)'
+   ```
+
+### Wrong Element Kind
+
+The classifier uses heuristics. If `[x]` is detected as a button instead of toggle:
+1. Check for surrounding context
+2. Use `text` field to identify element purpose
+
+### Elements Missing After Action
+
+Element positions may change between snapshots. Track elements by:
+- Text content (most reliable)
+- Element kind
+- Approximate row/column position
+
+## Example: Complete Workflow
+
+```bash
+#!/bin/bash
+SESSION="form"
+
+# 1. Spawn application
+pilotty spawn --name $SESSION dialog --checklist "Options:" 15 50 4 \
+    "opt1" "Feature A" on \
+    "opt2" "Feature B" off \
+    "opt3" "Feature C" on \
+    "opt4" "Feature D" off
+
+sleep 0.5
+
+# 2. Analyze initial state
+echo "Initial state:"
+pilotty snapshot -s $SESSION | jq '.elements[] | select(.kind == "toggle") | {text, checked}'
+
+# 3. Find unchecked toggles
+UNCHECKED=$(pilotty snapshot -s $SESSION | jq '[.elements[] | select(.kind == "toggle" and .checked == false)] | length')
+echo "Unchecked toggles: $UNCHECKED"
+
+# 4. Navigate and toggle opt2
+pilotty key -s $SESSION Down   # Move to opt2
+pilotty key -s $SESSION Space  # Toggle it
+
+# 5. Verify change via content_hash
+HASH1=$(pilotty snapshot -s $SESSION | jq -r '.content_hash')
+echo "Hash after toggle: $HASH1"
+
+# 6. Confirm and check final state
+pilotty key -s $SESSION Enter
+sleep 0.3
+
+echo "Final state:"
+pilotty snapshot -s $SESSION | jq '.elements[] | select(.kind == "toggle") | {text, checked}'
+
+# 7. Cleanup
+pilotty kill -s $SESSION
+```
diff --git a/skills/pilotty/templates/dialog-interaction.sh b/skills/pilotty/templates/dialog-interaction.sh
index ae73233..0db4a18 100755
--- a/skills/pilotty/templates/dialog-interaction.sh
+++ b/skills/pilotty/templates/dialog-interaction.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # Template: Interact with dialog/whiptail prompts
-# Demonstrates handling various dialog types
+# Demonstrates handling various dialog types with element detection
 #
 # Usage: ./dialog-interaction.sh
 # Requires: dialog or whiptail installed
@@ -16,26 +16,32 @@ if ! command -v dialog &> /dev/null; then
   exit 1
 fi
 
+# Cleanup on exit
+cleanup() {
+    pilotty kill -s "$SESSION_NAME" 2>/dev/null || true
+}
+trap cleanup EXIT
+
 echo "=== Dialog Interaction Demo ==="
 
 # --- Yes/No Dialog ---
 echo ""
 echo "1. Yes/No Dialog"
 
-pilotty spawn --name "$SESSION_NAME" dialog --yesno "Do you want to continue?" 10 40
+pilotty spawn --name "$SESSION_NAME" dialog --yesno "Do you want to continue?" 10 40 >/dev/null
 
 # Wait for dialog to render
-pilotty wait-for -s "$SESSION_NAME" "continue" -t 5000
+pilotty wait-for -s "$SESSION_NAME" "continue" -t 5000 >/dev/null
 
-# Take snapshot to see buttons
-echo "Snapshot:"
-pilotty snapshot -s "$SESSION_NAME" --format compact
+# Show detected elements
+echo "Detected elements:"
+pilotty snapshot -s "$SESSION_NAME" | jq -r '.elements[] | "  \(.kind) \(.text) at (\(.row),\(.col))"'
 
 # Select Yes using keyboard (Enter selects the default button)
-pilotty key -s "$SESSION_NAME" Enter  # Select default (Yes)
+pilotty key -s "$SESSION_NAME" Enter >/dev/null
 
 sleep 0.5
-echo "Selected: Yes"
+echo "Selected: Yes (via Enter)"
 
 # --- Menu Dialog ---
 echo ""
@@ -45,36 +51,45 @@ pilotty spawn --name "$SESSION_NAME" dialog --menu "Choose an option:" 15 50 4 \
   1 "Option One" \
   2 "Option Two" \
   3 "Option Three" \
-  4 "Exit"
+  4 "Exit" >/dev/null
 
-pilotty wait-for -s "$SESSION_NAME" "Choose" -t 5000
+pilotty wait-for -s "$SESSION_NAME" "Choose" -t 5000 >/dev/null
 
-# Navigate with arrow keys (pilotty auto-detects application cursor mode)
-pilotty key -s "$SESSION_NAME" Down  # Move to option 2
-pilotty key -s "$SESSION_NAME" Down  # Move to option 3
-pilotty key -s "$SESSION_NAME" Enter # Select
+# Navigate with arrow keys
+pilotty key -s "$SESSION_NAME" Down >/dev/null  # Move to option 2
+pilotty key -s "$SESSION_NAME" Down >/dev/null  # Move to option 3
+pilotty key -s "$SESSION_NAME" Enter >/dev/null # Select
 
 sleep 0.5
-echo "Selected: Option Three"
+echo "Selected: Option Three (via arrow keys + Enter)"
 
-# --- Checklist Dialog ---
+# --- Checklist Dialog with Element Detection ---
 echo ""
-echo "3. Checklist Dialog"
+echo "3. Checklist Dialog (with element detection)"
 
 pilotty spawn --name "$SESSION_NAME" dialog --checklist "Select items:" 15 50 4 \
   1 "Item A" off \
   2 "Item B" off \
   3 "Item C" off \
-  4 "Item D" off
+  4 "Item D" off >/dev/null
+
+pilotty wait-for -s "$SESSION_NAME" "Select" -t 5000 >/dev/null
 
-pilotty wait-for -s "$SESSION_NAME" "Select" -t 5000
+# Show initial toggle states
+echo "Initial toggle states:"
+pilotty snapshot -s "$SESSION_NAME" | jq -r '.elements[] | select(.kind == "toggle") | "  \(.text) at (\(.row),\(.col)) checked=\(.checked)"'
 
 # Toggle items with Space
-pilotty key -s "$SESSION_NAME" Space      # Toggle Item A
-pilotty key -s "$SESSION_NAME" Down
-pilotty key -s "$SESSION_NAME" Down
-pilotty key -s "$SESSION_NAME" Space      # Toggle Item C
-pilotty key -s "$SESSION_NAME" Enter      # Confirm
+pilotty key -s "$SESSION_NAME" Space >/dev/null      # Toggle Item A
+pilotty key -s "$SESSION_NAME" Down >/dev/null
+pilotty key -s "$SESSION_NAME" Down >/dev/null
+pilotty key -s "$SESSION_NAME" Space >/dev/null      # Toggle Item C
+
+# Show updated toggle states
+echo "After toggling:"
+pilotty snapshot -s "$SESSION_NAME" | jq -r '.elements[] | select(.kind == "toggle") | "  \(.text) at (\(.row),\(.col)) checked=\(.checked)"'
+
+pilotty key -s "$SESSION_NAME" Enter >/dev/null      # Confirm
 
 sleep 0.5
 echo "Selected: Item A, Item C"
@@ -83,13 +98,17 @@ echo "Selected: Item A, Item C"
 echo ""
 echo "4. Input Dialog"
 
-pilotty spawn --name "$SESSION_NAME" dialog --inputbox "Enter your name:" 10 40
+pilotty spawn --name "$SESSION_NAME" dialog --inputbox "Enter your name:" 10 40 >/dev/null
 
-pilotty wait-for -s "$SESSION_NAME" "name" -t 5000
+pilotty wait-for -s "$SESSION_NAME" "name" -t 5000 >/dev/null
+
+# Show detected input element
+echo "Detected input element:"
+pilotty snapshot -s "$SESSION_NAME" | jq -r '.elements[] | select(.kind == "input") | "  \(.kind) at (\(.row),\(.col)) width=\(.width)"'
 
 # Type input
 pilotty type -s "$SESSION_NAME" "Agent Smith"
-pilotty key -s "$SESSION_NAME" Enter
+pilotty key -s "$SESSION_NAME" Enter >/dev/null
 
 sleep 0.5
 echo "Entered: Agent Smith"
@@ -98,22 +117,24 @@ echo "Entered: Agent Smith"
 echo ""
 echo "5. Message Box"
 
-pilotty spawn --name "$SESSION_NAME" dialog --msgbox "Demo complete!" 10 40
+pilotty spawn --name "$SESSION_NAME" dialog --msgbox "Demo complete!" 10 40 >/dev/null
 
-pilotty wait-for -s "$SESSION_NAME" "complete" -t 5000
+pilotty wait-for -s "$SESSION_NAME" "complete" -t 5000 >/dev/null
 
-# Take final snapshot to see the OK button
-pilotty snapshot -s "$SESSION_NAME"
+# Show button element
+echo "Detected button:"
+pilotty snapshot -s "$SESSION_NAME" | jq -r '.elements[] | select(.kind == "button" or .kind == "input") | "  \(.kind) \(.text) at (\(.row),\(.col))"'
 
 # Dismiss with Enter
-pilotty key -s "$SESSION_NAME" Enter
+pilotty key -s "$SESSION_NAME" Enter >/dev/null
 
 sleep 0.5
 
-# Cleanup
-if pilotty list-sessions 2>/dev/null | grep -q "$SESSION_NAME"; then
-  pilotty kill -s "$SESSION_NAME"
-fi
-
 echo ""
 echo "=== Demo Complete ==="
+echo ""
+echo "Key takeaways:"
+echo "  - Use snapshot | jq '.elements' to see detected UI elements"
+echo "  - Toggles have 'checked' field for state tracking"
+echo "  - Use keyboard (Tab, Space, Enter, arrows) for reliable navigation"
+echo "  - content_hash can detect screen changes between snapshots"
diff --git a/skills/pilotty/templates/element-detection.sh b/skills/pilotty/templates/element-detection.sh
new file mode 100755
index 0000000..6b2ccb8
--- /dev/null
+++ b/skills/pilotty/templates/element-detection.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# Element Detection Template
+# Demonstrates pilotty's element detection and interaction
+#
+# Usage: ./element-detection.sh
+
+set -e
+
+# Configuration
+PILOTTY="${PILOTTY:-pilotty}"
+SESSION="element-demo"
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+# Cleanup on exit
+cleanup() {
+    $PILOTTY kill -s "$SESSION" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+echo -e "${BLUE}=== Element Detection Demo ===${NC}"
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 1: Spawn a TUI with UI elements
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}Step 1: Spawning dialog checklist...${NC}"
+
+$PILOTTY spawn --name "$SESSION" -- dialog --checklist "Select features to enable:" 15 60 5 \
+    "notifications" "Push notifications" on \
+    "darkmode" "Dark mode theme" off \
+    "autosave" "Auto-save documents" on \
+    "analytics" "Usage analytics" off \
+    "updates" "Auto-updates" on >/dev/null
+
+sleep 0.5
+
+# -----------------------------------------------------------------------------
+# Step 2: Get snapshot with elements
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}Step 2: Getting snapshot with detected elements...${NC}"
+echo ""
+
+SNAPSHOT=$($PILOTTY snapshot -s "$SESSION")
+
+# Show element summary
+echo -e "${GREEN}Detected elements:${NC}"
+echo "$SNAPSHOT" | jq -r '.elements[] | "  \(.kind) \(.text) at (\(.row),\(.col)) conf=\(.confidence)"'
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 3: Analyze toggles
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}Step 3: Analyzing toggle states...${NC}"
+echo ""
+
+TOGGLES=$(echo "$SNAPSHOT" | jq '[.elements[] | select(.kind == "toggle")]')
+CHECKED=$(echo "$TOGGLES" | jq '[.[] | select(.checked == true)] | length')
+UNCHECKED=$(echo "$TOGGLES" | jq '[.[] | select(.checked == false)] | length')
+
+echo -e "  Checked toggles:   ${GREEN}$CHECKED${NC}"
+echo -e "  Unchecked toggles: ${RED}$UNCHECKED${NC}"
+echo ""
+
+# Show each toggle
+echo -e "${GREEN}Toggle details:${NC}"
+echo "$TOGGLES" | jq -r '.[] | "  \(.text) at (\(.row),\(.col)) checked=\(.checked)"'
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 4: Toggle an unchecked option
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}Step 4: Toggling 'darkmode' (currently off)...${NC}"
+
+# Get initial hash for change detection
+HASH1=$(echo "$SNAPSHOT" | jq -r '.content_hash')
+
+# Navigate to darkmode (second option) and toggle
+$PILOTTY key -s "$SESSION" Down >/dev/null  # Move to darkmode
+$PILOTTY key -s "$SESSION" Space >/dev/null # Toggle it
+
+sleep 0.2
+
+# Get new snapshot and hash
+SNAPSHOT2=$($PILOTTY snapshot -s "$SESSION")
+HASH2=$(echo "$SNAPSHOT2" | jq -r '.content_hash')
+
+# Verify change
+if [ "$HASH1" != "$HASH2" ]; then
+    echo -e "  ${GREEN}Screen changed! (hash: $HASH1 -> $HASH2)${NC}"
+else
+    echo -e "  ${RED}No change detected${NC}"
+fi
+echo ""
+
+# Show updated toggle states
+echo -e "${GREEN}Updated toggle states:${NC}"
+echo "$SNAPSHOT2" | jq -r '.elements[] | select(.kind == "toggle") | "  \(.text) at (\(.row),\(.col)) checked=\(.checked)"'
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 5: Find and interact with button
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}Step 5: Looking for action button...${NC}"
+
+BUTTON=$(echo "$SNAPSHOT2" | jq -r '.elements[] | select(.kind == "button" or .kind == "input") | "\(.text) at (\(.row),\(.col))"' | head -1)
+if [ -n "$BUTTON" ]; then
+    echo -e "  Found button: ${GREEN}$BUTTON${NC}"
+else
+    echo -e "  ${YELLOW}No button element detected, using keyboard to confirm${NC}"
+fi
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 6: Confirm selection
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}Step 6: Confirming selection with Enter...${NC}"
+
+$PILOTTY key -s "$SESSION" Enter >/dev/null
+
+sleep 0.3
+
+# Check final state
+echo -e "${GREEN}Final screen state:${NC}"
+$PILOTTY snapshot -s "$SESSION" --format text 2>/dev/null | head -5 || echo "  (dialog closed)"
+echo ""
+
+# -----------------------------------------------------------------------------
+# Summary
+# -----------------------------------------------------------------------------
+echo -e "${BLUE}=== Summary ===${NC}"
+echo ""
+echo "This demo showed how to:"
+echo "  1. Spawn a TUI application"
+echo "  2. Get snapshot with detected elements"
+echo "  3. Analyze element states (toggles, buttons)"
+echo "  4. Use content_hash for change detection"
+echo "  5. Navigate with keyboard based on element context"
+echo ""
+echo -e "${GREEN}Demo complete!${NC}"