diff --git a/src/app/(app)/ToggleRealtime.tsx b/src/app/(app)/ToggleRealtime.tsx index e4ccf05..5932274 100644 --- a/src/app/(app)/ToggleRealtime.tsx +++ b/src/app/(app)/ToggleRealtime.tsx @@ -37,17 +37,25 @@ const languagePhrases = [ { code: 'it', text: 'Presentati' } ] -type ConnectionState = 'idle' | 'requesting' | 'ready' | 'error' +type SessionState = 'idle' | 'requesting' | 'listening' | 'error' export default function ToggleRealtime() { const { start, stop, remoteStream } = useRealtimeVoiceSession() - const [connectionState, setConnectionState] = useState('idle') + const [sessionState, setSessionState] = useState('idle') const [errorMessage, setErrorMessage] = useState(null) const [languageOrder, setLanguageOrder] = useState(languagePhrases) + const [isStandalone, setIsStandalone] = useState(false) const audioContextRef = useRef(null) const sourceRef = useRef(null) - const startedRef = useRef(false) - const cancelInitRef = useRef(false) + const startPendingRef = useRef(false) + + useEffect(() => { + if (typeof window === 'undefined') return + const standalone = + window.matchMedia?.('(display-mode: standalone)').matches || + (window.navigator as unknown as { standalone?: boolean }).standalone === true + setIsStandalone(Boolean(standalone)) + }, []) useEffect(() => { if (typeof navigator === 'undefined') return @@ -79,11 +87,11 @@ export default function ToggleRealtime() { setLanguageOrder(prioritized) }, []) - const ensureAudioContext = useCallback(() => { + const ensureAudioContext = useCallback(async () => { const Ctx = window.AudioContext ?? (window as unknown as { webkitAudioContext?: typeof AudioContext }).webkitAudioContext - if (!Ctx) throw new Error('AudioContext is not supported in this browser') + if (!Ctx) throw new Error('AudioContext is not supported on this device') if (!audioContextRef.current) { audioContextRef.current = new Ctx() @@ -91,127 +99,136 @@ export default function ToggleRealtime() { } if (audioContextRef.current.state === 'suspended') { - void audioContextRef.current - .resume() - .then(() => { - console.log('[lilac] AudioContext resumed', { - state: audioContextRef.current?.state - }) - }) - .catch(error => { - console.warn('[lilac] failed to resume AudioContext', error) - }) - } - - return audioContextRef.current - }, []) - - useEffect(() => { - if (!remoteStream) return - - let cancelled = false - - const connectRemoteAudio = async () => { - console.log('[lilac] remoteStream updated', { - hasStream: Boolean(remoteStream), - tracks: remoteStream?.getTracks().length - }) - - if (!remoteStream.getAudioTracks().length) { - const onAddTrack = () => { - remoteStream.removeEventListener('addtrack', onAddTrack as EventListener) - void connectRemoteAudio() - } - remoteStream.addEventListener('addtrack', onAddTrack as EventListener) - return - } - try { - const ctx = ensureAudioContext() - if (!ctx || cancelled) return - const src = ctx.createMediaStreamSource(remoteStream) - sourceRef.current = src - src.connect(ctx.destination) + await audioContextRef.current.resume() + console.log('[lilac] AudioContext resumed', { + state: audioContextRef.current?.state + }) } catch (error) { - console.error('[lilac] failed to connect remote audio', error) + console.warn('[lilac] failed to resume AudioContext', error) } } - void connectRemoteAudio() + return audioContextRef.current + }, []) - return () => { - cancelled = true - console.log('[lilac] cleaning audio graph') - try { - sourceRef.current?.disconnect() - } catch {} - sourceRef.current = null + const cleanupAudioGraph = useCallback(() => { + try { + sourceRef.current?.disconnect() + } catch {} + sourceRef.current = null + if (audioContextRef.current) { try { - void audioContextRef.current?.close() + void audioContextRef.current.close() } catch {} audioContextRef.current = null } - }, [remoteStream, ensureAudioContext]) + }, []) - const beginSession = useCallback(async () => { - if (startedRef.current) return - startedRef.current = true - setConnectionState('requesting') + const handleStop = useCallback(() => { + startPendingRef.current = false + stop() + cleanupAudioGraph() + setSessionState('idle') setErrorMessage(null) + }, [cleanupAudioGraph, stop]) + const handleStart = useCallback(async () => { + if (startPendingRef.current) return + startPendingRef.current = true + setErrorMessage(null) + setSessionState('requesting') try { - ensureAudioContext() + await ensureAudioContext() await start({ instructions: defaultPrompt, voice: 'verse' }) - if (cancelInitRef.current) { - startedRef.current = false - return - } - setConnectionState('ready') + setSessionState('listening') } catch (error) { - console.error('[lilac] failed to start realtime session', error) - startedRef.current = false - if (cancelInitRef.current) return - setConnectionState('error') + console.error('[lilac] unable to start session', error) const message = - error instanceof Error ? error.message : 'Something went wrong while starting Lilac.' + error instanceof Error + ? error.message + : 'Something went wrong while requesting the microphone.' setErrorMessage(message) + setSessionState('error') + cleanupAudioGraph() + stop() + } finally { + startPendingRef.current = false } - }, [ensureAudioContext, start]) + }, [cleanupAudioGraph, ensureAudioContext, start, stop]) useEffect(() => { - cancelInitRef.current = false + if (!remoteStream) return cleanupAudioGraph + let cancelled = false - const run = async () => { - if (cancelled) return - await beginSession() + const connect = async () => { + try { + const ctx = await ensureAudioContext() + if (!ctx || cancelled) return + if (!remoteStream.getAudioTracks().length) { + const handleAddTrack = () => { + remoteStream.removeEventListener('addtrack', handleAddTrack as EventListener) + void connect() + } + remoteStream.addEventListener('addtrack', handleAddTrack as EventListener) + return + } + const node = ctx.createMediaStreamSource(remoteStream) + try { + sourceRef.current?.disconnect() + } catch {} + sourceRef.current = node + node.connect(ctx.destination) + } catch (error) { + if (!cancelled) { + console.error('[lilac] failed to wire remote audio', error) + } + } } - void run() + void connect() return () => { cancelled = true - cancelInitRef.current = true - startedRef.current = false - stop() - try { - sourceRef.current?.disconnect() - } catch {} - sourceRef.current = null - try { - void audioContextRef.current?.close() - } catch {} - audioContextRef.current = null + cleanupAudioGraph() + } + }, [cleanupAudioGraph, ensureAudioContext, remoteStream]) + + useEffect(() => { + if (typeof document === 'undefined') return + const handleVisibility = () => { + if (document.visibilityState === 'hidden' && sessionState !== 'idle') { + console.log('[lilac] document hidden -> stopping session') + handleStop() + } + } + document.addEventListener('visibilitychange', handleVisibility) + return () => document.removeEventListener('visibilitychange', handleVisibility) + }, [handleStop, sessionState]) + + useEffect(() => { + if (typeof window === 'undefined') return + const handleBlur = () => { + if (sessionState !== 'idle') { + console.log('[lilac] window blur -> stopping session') + handleStop() + } } - }, [beginSession, stop]) + window.addEventListener('beforeunload', handleStop) + window.addEventListener('blur', handleBlur) + return () => { + window.removeEventListener('beforeunload', handleStop) + window.removeEventListener('blur', handleBlur) + } + }, [handleStop, sessionState]) const statusText = useMemo(() => { - if (connectionState === 'requesting') return 'Requesting microphone…' - if (connectionState === 'ready') return 'Listening' - if (connectionState === 'error') - return errorMessage ?? 'Unable to start. Check microphone permissions.' - return 'Preparing Lilac…' - }, [connectionState, errorMessage]) + if (sessionState === 'requesting') return 'Requesting microphone…' + if (sessionState === 'listening') return 'Listening' + if (sessionState === 'error') return errorMessage ?? 'Unable to start. Check microphone permissions.' + return 'Tap start to begin listening.' + }, [errorMessage, sessionState]) const [activeIndex, setActiveIndex] = useState(0) @@ -233,12 +250,13 @@ export default function ToggleRealtime() {
Lilac + {isStandalone && Home Screen}
-
+
+
+ + +
{statusText} + {sessionState === 'error' && errorMessage ? ( + + {errorMessage} + + ) : null} + {sessionState === 'idle' && ( + + {isStandalone + ? 'If the mic stops after reopening, tap Start again to refresh permissions.' + : 'For a full-screen experience add Lilac to your home screen.'} + + )}
) diff --git a/src/realtime/provider.tsx b/src/realtime/provider.tsx index 3209321..aaa2c9e 100644 --- a/src/realtime/provider.tsx +++ b/src/realtime/provider.tsx @@ -1,6 +1,14 @@ 'use client' -import { createContext, useCallback, useContext, useMemo, useRef, useState } from 'react' +import { + createContext, + useCallback, + useContext, + useEffect, + useMemo, + useRef, + useState +} from 'react' import { createRealtimeSession } from '@/app/actions/realtime' @@ -87,6 +95,7 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) { setDataChannel(dc) dc.addEventListener('open', () => { console.log('[realtime] datachannel open') + // Ensure session settings (voice/instructions) are applied then trigger a first response. try { if (opts?.voice || opts?.instructions) { dc.send( @@ -101,6 +110,18 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) { console.log('[realtime] sent session.update') } } catch {} + try { + dc.send( + JSON.stringify({ + response: { + // If instructions provided, model may greet appropriately; otherwise send a short greeting. + ...(opts?.instructions ? {} : { instructions: 'Hello! I am ready to translate.' }) + }, + type: 'response.create' + }) + ) + console.log('[realtime] sent response.create') + } catch {} }) dc.addEventListener('close', () => { console.log('[realtime] datachannel close') @@ -185,6 +206,33 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) { [dataChannel] ) + useEffect(() => { + if (typeof document === 'undefined') return + let hideTimer: ReturnType | null = null + + const handleVisibility = () => { + if (document.visibilityState !== 'hidden') return + if (hideTimer) clearTimeout(hideTimer) + hideTimer = setTimeout(() => { + console.log('[realtime] visibility hidden -> cleanup') + cleanup() + }, 150) + } + + const handlePageHide = () => { + console.log('[realtime] pagehide -> cleanup') + cleanup() + } + + document.addEventListener('visibilitychange', handleVisibility) + window.addEventListener('pagehide', handlePageHide) + return () => { + if (hideTimer) clearTimeout(hideTimer) + document.removeEventListener('visibilitychange', handleVisibility) + window.removeEventListener('pagehide', handlePageHide) + } + }, [cleanup]) + const value = useMemo( () => ({ dataChannel,