livekit · giovaborgogno · Dec 6, 2025 · Dec 20, 2025 · Dec 20, 2025 · Dec 20, 2025
diff --git a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py
@@ -302,13 +302,15 @@ def chat(
             is_gemini_3_flash = _is_gemini_3_flash_model(self._opts.model)
             thinking_cfg = self._opts.thinking_config
 
-            # Extract both parameters
+            _include_thoughts = None
             _budget = None
             _level = None
             if isinstance(thinking_cfg, dict):
+                _include_thoughts = thinking_cfg.get("include_thoughts")
                 _budget = thinking_cfg.get("thinking_budget")
                 _level = thinking_cfg.get("thinking_level")
             elif isinstance(thinking_cfg, types.ThinkingConfig):
+                _include_thoughts = thinking_cfg.include_thoughts
                 _budget = thinking_cfg.thinking_budget
                 _level = getattr(thinking_cfg, "thinking_level", None)
 
@@ -326,7 +328,10 @@ def chat(
                     else:
                         _level = "low"
                 # Use thinking_level only (pass as dict since SDK may not have this field yet)
-                extra["thinking_config"] = {"thinking_level": _level}
+                extra["thinking_config"] = {
+                    "thinking_level": _level,
+                    "include_thoughts": _include_thoughts,
+                }
 
             else:
                 # Gemini 2.5 and earlier: only support thinking_budget
@@ -337,7 +342,9 @@ def chat(
                     )
                 if _budget is not None:
                     # Use thinking_budget only
-                    extra["thinking_config"] = types.ThinkingConfig(thinking_budget=_budget)
+                    extra["thinking_config"] = types.ThinkingConfig(
+                        thinking_budget=_budget, include_thoughts=_include_thoughts
+                    )
                 else:
                     # Pass through original config if no specific handling needed
                     extra["thinking_config"] = self._opts.thinking_config
@@ -535,10 +542,16 @@ def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
             )
             return chat_chunk
 
-        if not part.text:
+        # Strip thinking tokens
+        content = part.text if not part.thought else None
+        delta_extra = None
+        if part.thought:
+            delta_extra = {"google": {"thinking": part.text}}
+
+        if not content and not delta_extra:
             return None
 
         return llm.ChatChunk(
             id=id,
-            delta=llm.ChoiceDelta(content=part.text, role="assistant"),
+            delta=llm.ChoiceDelta(content=content, role="assistant", extra=delta_extra),
         )