From 3085c065085d15a284b37847470fe0182c9a6c67 Mon Sep 17 00:00:00 2001 From: Anurag Tomer Date: Fri, 11 Jul 2025 10:14:35 +0530 Subject: [PATCH 1/3] feat: tts: adding speed parameter for kokoro --- riva/proto/riva_tts.proto | 3 +++ 1 file changed, 3 insertions(+) diff --git a/riva/proto/riva_tts.proto b/riva/proto/riva_tts.proto index 024a78b..37e8bce 100644 --- a/riva/proto/riva_tts.proto +++ b/riva/proto/riva_tts.proto @@ -89,6 +89,9 @@ message SynthesizeSpeechRequest { // grapheme and corresponding phoneme separated by double spaces. string custom_dictionary = 7; + // Speed of generated audio, ranges between 0.5-2.0 + double speed = 8; + // The ID to be associated with the request. If provided, this will be // returned in the corresponding response. RequestId id = 100; From 84337dbb94e4dc1b5cb081f1988a365e67895cd0 Mon Sep 17 00:00:00 2001 From: Anurag Tomer Date: Tue, 6 Jan 2026 16:33:18 +0530 Subject: [PATCH 2/3] Adding ChatterBox Data --- riva/proto/riva_tts.proto | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/riva/proto/riva_tts.proto b/riva/proto/riva_tts.proto index 37e8bce..28dbeb8 100644 --- a/riva/proto/riva_tts.proto +++ b/riva/proto/riva_tts.proto @@ -61,6 +61,14 @@ message ZeroShotData { string transcript = 5; } +message ChatterboxData { + // Audio prompt for Chatterbox model. + bytes audio_prompt = 1; + + // Exaggeration factor for generated voice. + float exaggeration_factor = 2; +} + message SynthesizeSpeechRequest { // Text to be converted to audio string text = 1; @@ -89,9 +97,8 @@ message SynthesizeSpeechRequest { // grapheme and corresponding phoneme separated by double spaces. string custom_dictionary = 7; - // Speed of generated audio, ranges between 0.5-2.0 - double speed = 8; - + // Chatterbox specific params. + ChatterboxData chatterbox_data = 8; // The ID to be associated with the request. If provided, this will be // returned in the corresponding response. RequestId id = 100; @@ -115,7 +122,3 @@ message SynthesizeSpeechResponse { // The ID associated with the request RequestId id = 100; } - -/* - * - */ From 60e67e8ba30eac99d8cfb30275b03b76b6562a29 Mon Sep 17 00:00:00 2001 From: Anurag Tomer Date: Wed, 14 Jan 2026 13:42:31 +0530 Subject: [PATCH 3/3] Adding ChatterBox changes --- riva/proto/riva_tts.proto | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/riva/proto/riva_tts.proto b/riva/proto/riva_tts.proto index 28dbeb8..25109a7 100644 --- a/riva/proto/riva_tts.proto +++ b/riva/proto/riva_tts.proto @@ -59,14 +59,9 @@ message ZeroShotData { int32 quality = 4; // Transcript corresponding to audio_prompt. string transcript = 5; -} - -message ChatterboxData { - // Audio prompt for Chatterbox model. - bytes audio_prompt = 1; // Exaggeration factor for generated voice. - float exaggeration_factor = 2; + float exaggeration_factor = 6; } message SynthesizeSpeechRequest { @@ -97,8 +92,6 @@ message SynthesizeSpeechRequest { // grapheme and corresponding phoneme separated by double spaces. string custom_dictionary = 7; - // Chatterbox specific params. - ChatterboxData chatterbox_data = 8; // The ID to be associated with the request. If provided, this will be // returned in the corresponding response. RequestId id = 100;