diff --git a/src/app/(app)/ToggleRealtime.tsx b/src/app/(app)/ToggleRealtime.tsx index 2bce097..8191ad4 100644 --- a/src/app/(app)/ToggleRealtime.tsx +++ b/src/app/(app)/ToggleRealtime.tsx @@ -1,7 +1,15 @@ 'use client' import { AnimatePresence, motion } from 'framer-motion' -import { type CSSProperties, useCallback, useEffect, useMemo, useRef, useState } from 'react' +import { + type CSSProperties, + useCallback, + useEffect, + useLayoutEffect, + useMemo, + useRef, + useState +} from 'react' import { useRealtimeVoiceSession } from '@/realtime/provider' @@ -357,7 +365,6 @@ export default function ToggleRealtime() { const [activeIndex, setActiveIndex] = useState(0) const transcriptListRef = useRef(null) - const transcriptBottomRef = useRef(null) const stickToBottomRef = useRef(true) useEffect(() => { @@ -373,6 +380,7 @@ export default function ToggleRealtime() { const phrase = languageOrder[activeIndex] ?? languageOrder[0] const footerText = tab === 'session' ? statusText : '' const canSendText = textDraft.trim().length > 0 + const transcriptCount = transcripts.length useEffect(() => { const el = transcriptListRef.current @@ -390,12 +398,11 @@ export default function ToggleRealtime() { } }, []) - useEffect(() => { - if (!stickToBottomRef.current) return - // Trigger on streaming updates (deltas) while the user is pinned to the bottom. - void transcripts - transcriptBottomRef.current?.scrollIntoView({ behavior: 'auto' }) - }, [transcripts]) + useLayoutEffect(() => { + const el = transcriptListRef.current + if (!el || !stickToBottomRef.current || transcriptCount === 0) return + el.scrollTop = el.scrollHeight + }, [transcriptCount]) const content = tab === 'session' ? ( @@ -430,7 +437,6 @@ export default function ToggleRealtime() { ) })} -
) : (
@@ -449,7 +455,6 @@ export default function ToggleRealtime() {

Your spoken conversation will appear here as a live transcript.

-
)}
diff --git a/src/realtime/provider.tsx b/src/realtime/provider.tsx index c6788f9..c25c7d1 100644 --- a/src/realtime/provider.tsx +++ b/src/realtime/provider.tsx @@ -1,6 +1,6 @@ 'use client' -import { createContext, useCallback, useContext, useMemo, useRef, useState } from 'react' +import { createContext, useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react' import { createRealtimeSession } from '@/app/actions/realtime' @@ -134,6 +134,7 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) { const peerRef = useRef(null) const localRef = useRef(null) const turnDelaySecondsRef = useRef(getInitialTurnDelaySeconds()) + const latestTimelineItemIdRef = useRef(null) // Cancels in-flight `start()` calls and prevents multiple concurrent sessions. const startGenerationRef = useRef(0) // Stable transcript item id we choose for a given response_id (so we don't "split" a message mid-stream). @@ -183,6 +184,10 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) { setTranscripts(prev => orderTranscriptsByPreviousItemId(prev, previousItemIdByIdRef.current)) }, []) + useEffect(() => { + latestTimelineItemIdRef.current = transcripts.at(-1)?.id ?? null + }, [transcripts]) + const upsertTranscript = useCallback( (update: { id: string @@ -854,7 +859,7 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) { const trimmed = text.trim() if (!trimmed || !dataChannel) return false const id = crypto.randomUUID() - const previousItemId = latestCommittedInputItemIdRef.current + const previousItemId = latestTimelineItemIdRef.current previousItemIdByIdRef.current.set(id, previousItemId ?? null) latestCommittedInputItemIdRef.current = id upsertTranscript({