From 3085c065085d15a284b37847470fe0182c9a6c67 Mon Sep 17 00:00:00 2001
From: Anurag Tomer <atomer@nvidia.com>
Date: Fri, 11 Jul 2025 10:14:35 +0530
Subject: [PATCH 1/3] feat: tts: adding speed parameter for kokoro

---
 riva/proto/riva_tts.proto | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/riva/proto/riva_tts.proto b/riva/proto/riva_tts.proto
index 024a78b..37e8bce 100644
--- a/riva/proto/riva_tts.proto
+++ b/riva/proto/riva_tts.proto
@@ -89,6 +89,9 @@ message SynthesizeSpeechRequest {
   // grapheme and corresponding phoneme separated by double spaces.
   string custom_dictionary = 7;
 
+  // Speed of generated audio, ranges between 0.5-2.0
+  double speed = 8;
+
   // The ID to be associated with the request. If provided, this will be
   // returned in the corresponding response.
   RequestId id = 100;

From 84337dbb94e4dc1b5cb081f1988a365e67895cd0 Mon Sep 17 00:00:00 2001
From: Anurag Tomer <atomer@nvidia.com>
Date: Tue, 6 Jan 2026 16:33:18 +0530
Subject: [PATCH 2/3] Adding ChatterBox Data

---
 riva/proto/riva_tts.proto | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/riva/proto/riva_tts.proto b/riva/proto/riva_tts.proto
index 37e8bce..28dbeb8 100644
--- a/riva/proto/riva_tts.proto
+++ b/riva/proto/riva_tts.proto
@@ -61,6 +61,14 @@ message ZeroShotData {
   string transcript = 5;
 }
 
+message ChatterboxData {
+  // Audio prompt for Chatterbox model.
+  bytes audio_prompt = 1;
+
+  // Exaggeration factor for generated voice.
+  float exaggeration_factor = 2;
+}
+
 message SynthesizeSpeechRequest {
   // Text to be converted to audio
   string text = 1;
@@ -89,9 +97,8 @@ message SynthesizeSpeechRequest {
   // grapheme and corresponding phoneme separated by double spaces.
   string custom_dictionary = 7;
 
-  // Speed of generated audio, ranges between 0.5-2.0
-  double speed = 8;
-
+  // Chatterbox specific params.
+  ChatterboxData chatterbox_data = 8;
   // The ID to be associated with the request. If provided, this will be
   // returned in the corresponding response.
   RequestId id = 100;
@@ -115,7 +122,3 @@ message SynthesizeSpeechResponse {
   // The ID associated with the request
   RequestId id = 100;
 }
-
-/*
- *
- */

From 60e67e8ba30eac99d8cfb30275b03b76b6562a29 Mon Sep 17 00:00:00 2001
From: Anurag Tomer <atomer@nvidia.com>
Date: Wed, 14 Jan 2026 13:42:31 +0530
Subject: [PATCH 3/3] Adding ChatterBox changes

---
 riva/proto/riva_tts.proto | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/riva/proto/riva_tts.proto b/riva/proto/riva_tts.proto
index 28dbeb8..25109a7 100644
--- a/riva/proto/riva_tts.proto
+++ b/riva/proto/riva_tts.proto
@@ -59,14 +59,9 @@ message ZeroShotData {
   int32 quality = 4;
   // Transcript corresponding to audio_prompt.
   string transcript = 5;
-}
-
-message ChatterboxData {
-  // Audio prompt for Chatterbox model.
-  bytes audio_prompt = 1;
 
   // Exaggeration factor for generated voice.
-  float exaggeration_factor = 2;
+  float exaggeration_factor = 6;
 }
 
 message SynthesizeSpeechRequest {
@@ -97,8 +92,6 @@ message SynthesizeSpeechRequest {
   // grapheme and corresponding phoneme separated by double spaces.
   string custom_dictionary = 7;
 
-  // Chatterbox specific params.
-  ChatterboxData chatterbox_data = 8;
   // The ID to be associated with the request. If provided, this will be
   // returned in the corresponding response.
   RequestId id = 100;