Skip to content

Twillio + Ultravox + SDK issue: only possible to establish one websocket connection at a time #12

@tropxy

Description

@tropxy

Hi,

First of all, thanks for the project.
I have tried to setup a demo where I would use Twillio and Ultravox together to handle a call, and also setting some client tools to the agent.

This is the code I got

import time
import os
import json
import base64
import asyncio
import websockets
import httpx
import ultravox_client as uv
from fastapi import FastAPI, WebSocket, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.websockets import WebSocketDisconnect
from twilio.rest import Client
from twilio.twiml.voice_response import VoiceResponse, Connect, Say, Stream, Start
from dotenv import load_dotenv
load_dotenv()


def load_prompt(file_name):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    prompt_path = os.path.join(dir_path, "prompts", f"{file_name}.txt")

    try:
        with open(prompt_path, "r", encoding="utf-8") as file:
            return file.read().strip()
    except FileNotFoundError:
        print(f"Could not find file: {prompt_path}")
        raise


# Configuration
TWILIO_ACCOUNT_SID = os.getenv("TWILIO_ACCOUNT_SID")
TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
NGROK_URL = os.getenv("NGROK_URL")
PORT = int(os.getenv("PORT", 5050))

ULTRAVOX_API_KEY = 'XXXXXX'
ULTRAVOX_API_URL = 'https://api.ultravox.ai/api/calls'

ultravox_config = {
    "systemPrompt": "Your name is Steve. You are receiving a phone call. Ask them their name and see how they are doing. Once the user answers run the function 'live_agent' and tell the user the output of that tool",
    "model": "fixie-ai/ultravox",
    "voice": "Mark",
    "temperature": 0.3,
    "firstSpeaker": "FIRST_SPEAKER_AGENT",
    "medium": {"twilio": {}},
    "selectedTools": [
        {
            "temporaryTool": {
                "modelToolName": "live_agent",
                "description": "Call a live agent",
                # "dynamicParameters": [
                #     {
                #         "name": "symbol",
                #         "location": "PARAMETER_LOCATION_BODY",
                #         "schema": {
                #             "type": "string",
                #             "description": "Stock symbol (e.g., AAPL for Apple Inc.)"
                #             },
                #         "required": true
                #     }
                # ],
                "client": {}
            }
        }
    ]
}

app = FastAPI()

#if not TWILIO_ACCOUNT_SID or not TWILIO_AUTH_TOKEN or not TWILIO_PHONE_NUMBER:
if not TWILIO_ACCOUNT_SID or not TWILIO_AUTH_TOKEN:    
    raise ValueError("Missing Twilio configuration. Please set it in the .env file.")


async def create_ultravox_call():
    async with httpx.AsyncClient() as client:
        response = await client.post(
            ULTRAVOX_API_URL,
            json=ultravox_config,
            headers={'X-API-Key': ULTRAVOX_API_KEY}
        )
    return response.json()



session = uv.UltravoxSession(experimental_messages=set("debug"))
last_transcript = None
done = asyncio.Event()

@session.on("status")
def on_status():
    status = session.status
    print(f"status: {status}")
    if status == uv.UltravoxSessionStatus.LISTENING:
        # Prompt the user to make it clear they're expected to speak next.
        print("User:  ", end="\r")
    elif status == uv.UltravoxSessionStatus.DISCONNECTED:
        done.set()

@session.on("experimental_message")
def on_experimental_message(message):
    print(f"Received experimental message: {message}")

@session.on("error")
def on_error(error):
    print(f"Error: {error}")
    done.set()

async def live_agent_tool(parameters: dict) -> dict:
    print("live agent called")
    from datetime import datetime
    result_dict = {
        "date": datetime.date.today().isoformat(),
        "response": "Calling a live agent!"
    }
    return {"result": "Agent has been called. Connecting to live agent..."}

session.register_tool_implementation("live_agent", live_agent_tool)
    

@app.get("/", response_class=JSONResponse)
async def index_page():
    return {"message": "Twilio Media Stream Server is running!"}


@app.api_route("/incoming-call", methods=["GET", "POST"])
async def handle_incoming_call(request: Request):
    try:
        """Handle incoming call and return TwiML response to connect to Media Stream."""
        ultravox_response = await create_ultravox_call()
        print(f"Ultravox response: {ultravox_response}")
        join_url = ultravox_response['joinUrl']
        print(f"Join URL: {join_url}")
        response = VoiceResponse()
        # <Say> punctuation to improve text-to-speech flow
        response.say("Please wait while we connect your call to the AI voice assistant")
        response.pause(length=1)
        response.say("Ok, you can start talking!")
        connect = Connect() # This initiates a bidi stream with Twillio https://www.twilio.com/docs/voice/twiml/stream
        connect.stream(url=join_url, name='ultravox')
        response.append(connect)
        print(response)
        #await session.join_call(join_url)
        return HTMLResponse(content=str(response), media_type="application/xml")
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        response = VoiceResponse()
        response.say("Sorry, there was an error connecting your call.")
        return HTMLResponse(content=str(response), media_type="application/xml")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=5050)

As is evidently above, await session.join_call(join_url) is commented out because when used, the call immediately drops. However, the join_call functions works and the python SDK establishes a websocket connection.

I have done more tests and with the code above, where join_call is not called and using the example code in this repo:
https://github.com/fixie-ai/ultravox-client-sdk-python/blob/main/example/ultravox_example/client.py

And when I setup the call and let the AI agent to talk first, before running the client.py example, something funny happens and this time is the python SDK client that doesnt connect, as shown in these logs:

INFO:root:status: UltravoxSessionStatus.CONNECTING
DEBUG:websockets.client:= connection is CONNECTING
DEBUG:websockets.client:> GET /calls/bb7ba635-3a9d-494c-8210-bcce85e1b269/telephony?clientVersion=python_0.0.9&apiVersion=1 HTTP/1.1
DEBUG:websockets.client:> Host: prod-voice-pgaenaxiea-uc.a.run.app
DEBUG:websockets.client:> Upgrade: websocket
DEBUG:websockets.client:> Connection: Upgrade
DEBUG:websockets.client:> Sec-WebSocket-Key: Yq7G8rK1b34mV6S91Y/G7Q==
DEBUG:websockets.client:> Sec-WebSocket-Version: 13
DEBUG:websockets.client:> Sec-WebSocket-Extensions: permessage-deflate; client_max_window_bits
DEBUG:websockets.client:> User-Agent: Python/3.13 websockets/14.2
DEBUG:websockets.client:< HTTP/1.1 101 Switching Protocols
DEBUG:websockets.client:< sec-websocket-accept: zBatvJAF0shgvtv49lSzws2/wXo=
DEBUG:websockets.client:< sec-websocket-extensions: permessage-deflate
DEBUG:websockets.client:< date: Sun, 06 Apr 2025 16:33:04 GMT
DEBUG:websockets.client:< server: Google Frontend
DEBUG:websockets.client:< traceparent: 00-974b88ae1829a79cb1fa42746f84262e-334f541b47270ddd-01
DEBUG:websockets.client:< x-cloud-trace-context: 974b88ae1829a79cb1fa42746f84262e/3697266295229058525;o=1
DEBUG:websockets.client:< Upgrade: websocket
DEBUG:websockets.client:< Connection: Upgrade
DEBUG:websockets.client:< Alt-Svc: h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
DEBUG:websockets.client:= connection is OPEN
DEBUG:websockets.client:< CLOSE 4409 (private use) Conflict [10 bytes]
DEBUG:websockets.client:> CLOSE 4409 (private use) Conflict [10 bytes]
DEBUG:websockets.client:= connection is CLOSING
INFO:livekit:livekit_ffi::server:133:livekit_ffi::server - initializing ffi server v0.12.3
INFO:livekit:livekit_ffi::cabi:36:livekit_ffi::cabi - initializing ffi server v0.12.3
DEBUG:websockets.client:< EOF
DEBUG:websockets.client:> EOF
DEBUG:websockets.client:= connection is CLOSED
DEBUG:websockets.client:x closing TCP connection

The conclusion is that the websocket server from Ultravox seems to only be able to handle one websocket connection at a time...

I have checked everywhere I could and I dont see if there is a setting anywhere that I have missed and is needed to overcome this issue, but please if possible check my code and let me know if there is something I shall change.

Thank you!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions