Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 24 additions & 18 deletions src/app/(app)/ToggleRealtime.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
'use client'

import { AnimatePresence, motion } from 'framer-motion'
import { type CSSProperties, useCallback, useEffect, useMemo, useRef, useState } from 'react'
import {
type CSSProperties,
useCallback,
useEffect,
useLayoutEffect,
useMemo,
useRef,
useState
} from 'react'

import { useRealtimeVoiceSession } from '@/realtime/provider'

Expand Down Expand Up @@ -357,7 +365,6 @@ export default function ToggleRealtime() {

const [activeIndex, setActiveIndex] = useState(0)
const transcriptListRef = useRef<HTMLDivElement | null>(null)
const transcriptBottomRef = useRef<HTMLDivElement | null>(null)
const stickToBottomRef = useRef(true)

useEffect(() => {
Expand All @@ -373,6 +380,7 @@ export default function ToggleRealtime() {
const phrase = languageOrder[activeIndex] ?? languageOrder[0]
const footerText = tab === 'session' ? statusText : ''
const canSendText = textDraft.trim().length > 0
const transcriptCount = transcripts.length

useEffect(() => {
const el = transcriptListRef.current
Expand All @@ -390,36 +398,36 @@ export default function ToggleRealtime() {
}
}, [])

useEffect(() => {
if (!stickToBottomRef.current) return
// Trigger on streaming updates (deltas) while the user is pinned to the bottom.
void transcripts
transcriptBottomRef.current?.scrollIntoView({ behavior: 'auto' })
}, [transcripts])
useLayoutEffect(() => {
const el = transcriptListRef.current
if (!el || !stickToBottomRef.current || transcriptCount === 0) return
el.scrollTop = el.scrollHeight
}, [transcriptCount])

const content =
tab === 'session' ? (
<div className="flex h-full min-h-0 w-full max-w-xl flex-col gap-4">
<div
ref={transcriptListRef}
className="flex-1 overflow-y-auto rounded-3xl border border-white/30 bg-[var(--lilac-elevated)]/70 p-4 shadow-xl backdrop-blur"
className="flex-1 overflow-y-auto overscroll-contain rounded-[28px] border border-white/40 bg-white/70 p-5 shadow-[0_18px_50px_rgba(53,32,73,0.08)] backdrop-blur-xl dark:border-white/20 dark:bg-[var(--lilac-elevated)]/70 dark:shadow-[0_18px_50px_rgba(10,6,20,0.5)]"
style={{ scrollbarGutter: 'stable both-edges' }}
>
{transcripts.length ? (
<div className="flex flex-col gap-3">
<div className="flex flex-col gap-4">
{transcripts.map(item => {
const isUser = item.role === 'user'
const bubbleBase =
'max-w-[92%] whitespace-pre-wrap rounded-3xl px-4 py-3 text-sm leading-relaxed shadow-sm'
'max-w-[90%] whitespace-pre-wrap rounded-[22px] px-4 py-3 text-sm leading-relaxed shadow-md'
const bubbleClass = isUser
? `${bubbleBase} self-end bg-[var(--lilac-ink)] text-[var(--lilac-surface)]`
: `${bubbleBase} self-start border border-white/30 bg-white/70 text-[var(--lilac-ink)] dark:border-white/22 dark:bg-white/14 dark:text-[var(--lilac-ink)]`
? `${bubbleBase} self-end bg-[linear-gradient(135deg,rgba(53,32,73,0.95),rgba(80,52,110,0.92))] text-[var(--lilac-surface)]`
: `${bubbleBase} self-start border border-white/30 bg-white/70 text-[var(--lilac-ink)] dark:border-white/22 dark:bg-white/12 dark:text-[var(--lilac-ink)]`
const label = isUser ? 'You' : 'Lilac'
const text = item.text?.trim() ? item.text : '…'

return (
<div key={item.id} className="flex flex-col gap-1">
<div
className={`px-1 font-semibold text-[10px] uppercase tracking-[0.18em] ${
className={`px-1 font-semibold text-[11px] uppercase tracking-[0.2em] ${
isUser ? 'text-right text-[var(--lilac-ink-muted)]' : 'text-[var(--lilac-ink-muted)]'
}`}
>
Expand All @@ -430,7 +438,6 @@ export default function ToggleRealtime() {
</div>
)
})}
<div ref={transcriptBottomRef} />
</div>
) : (
<div className="flex h-full flex-col items-center justify-center px-6 text-center">
Expand All @@ -449,14 +456,13 @@ export default function ToggleRealtime() {
<p className="mt-6 text-[var(--lilac-ink-muted)] text-sm">
Your spoken conversation will appear here as a live transcript.
</p>
<div ref={transcriptBottomRef} />
</div>
)}
</div>
<div className="rounded-3xl border border-white/25 bg-[var(--lilac-elevated)]/80 p-4 text-[var(--lilac-ink)] shadow-lg backdrop-blur">
<div className="rounded-[28px] border border-white/35 bg-white/75 p-4 text-[var(--lilac-ink)] shadow-[0_16px_40px_rgba(53,32,73,0.08)] backdrop-blur-xl dark:border-white/20 dark:bg-[var(--lilac-elevated)]/80 dark:shadow-[0_16px_40px_rgba(10,6,20,0.45)]">
<div className="flex flex-col gap-3 sm:flex-row sm:items-end">
<textarea
className="min-h-[64px] w-full flex-1 resize-none rounded-2xl border border-white/30 bg-white/70 px-4 py-3 text-[var(--lilac-ink)] text-sm outline-none transition focus:border-white/70 focus:bg-white dark:bg-white/10 dark:focus:border-white/30 dark:focus:bg-white/20"
className="min-h-[64px] w-full flex-1 resize-none rounded-[22px] border border-white/30 bg-white/75 px-4 py-3 text-[var(--lilac-ink)] text-sm outline-none transition focus:border-white/70 focus:bg-white/90 dark:bg-white/10 dark:focus:border-white/30 dark:focus:bg-white/20"
onChange={event => setTextDraft(event.target.value)}
placeholder="Type to translate or speak back."
rows={2}
Expand Down
9 changes: 7 additions & 2 deletions src/realtime/provider.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'use client'

import { createContext, useCallback, useContext, useMemo, useRef, useState } from 'react'
import { createContext, useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react'

import { createRealtimeSession } from '@/app/actions/realtime'

Expand Down Expand Up @@ -134,6 +134,7 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) {
const peerRef = useRef<RTCPeerConnection | null>(null)
const localRef = useRef<MediaStream | null>(null)
const turnDelaySecondsRef = useRef<number>(getInitialTurnDelaySeconds())
const latestTimelineItemIdRef = useRef<string | null>(null)
// Cancels in-flight `start()` calls and prevents multiple concurrent sessions.
const startGenerationRef = useRef(0)
// Stable transcript item id we choose for a given response_id (so we don't "split" a message mid-stream).
Expand Down Expand Up @@ -183,6 +184,10 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) {
setTranscripts(prev => orderTranscriptsByPreviousItemId(prev, previousItemIdByIdRef.current))
}, [])

useEffect(() => {
latestTimelineItemIdRef.current = transcripts.at(-1)?.id ?? null
}, [transcripts])

const upsertTranscript = useCallback(
(update: {
id: string
Expand Down Expand Up @@ -854,7 +859,7 @@ export function RealtimeProvider({ children }: { children: React.ReactNode }) {
const trimmed = text.trim()
if (!trimmed || !dataChannel) return false
const id = crypto.randomUUID()
const previousItemId = latestCommittedInputItemIdRef.current
const previousItemId = latestTimelineItemIdRef.current
previousItemIdByIdRef.current.set(id, previousItemId ?? null)
latestCommittedInputItemIdRef.current = id
upsertTranscript({
Expand Down