diff --git a/lexicons/Characters-Locations-System/lexicon.pls b/lexicons/Characters-Locations-System/lexicon.pls
index 2f939b1..eb440e8 100644
--- a/lexicons/Characters-Locations-System/lexicon.pls
+++ b/lexicons/Characters-Locations-System/lexicon.pls
@@ -33,7 +33,7 @@
Aether
aether
- ˈi.θɚ
+ ˈiθɛr
Aetherial
@@ -48,12 +48,12 @@
Aetheryte
aetheryte
- ˈi.θɚraɪt
+ ˈiθɛraɪt
Aetherytes
aetherytes
- ˈi.θɚraɪtz
+ ˈiθɛraɪtz
Ahm Araeng
diff --git a/src/TextToTalk.Data/Model/NpcVoice.cs b/src/TextToTalk.Data/Model/NpcVoice.cs
index bce72af..167eed2 100644
--- a/src/TextToTalk.Data/Model/NpcVoice.cs
+++ b/src/TextToTalk.Data/Model/NpcVoice.cs
@@ -8,4 +8,6 @@ public class NpcVoice
public Guid NpcId { get; init; }
public int VoicePresetId { get; init; }
+
+ public string? VoiceBackend { get; init; } // Added for composite key
}
\ No newline at end of file
diff --git a/src/TextToTalk.Data/Model/PlayerVoice.cs b/src/TextToTalk.Data/Model/PlayerVoice.cs
index 26e62bc..bf4bee1 100644
--- a/src/TextToTalk.Data/Model/PlayerVoice.cs
+++ b/src/TextToTalk.Data/Model/PlayerVoice.cs
@@ -8,4 +8,6 @@ public class PlayerVoice
public Guid PlayerId { get; init; }
public int VoicePresetId { get; init; }
+
+ public string? VoiceBackend { get; init; } // Added for composite key
}
\ No newline at end of file
diff --git a/src/TextToTalk.Data/Services/NpcCollection.cs b/src/TextToTalk.Data/Services/NpcCollection.cs
index af0f8ea..7ca836e 100644
--- a/src/TextToTalk.Data/Services/NpcCollection.cs
+++ b/src/TextToTalk.Data/Services/NpcCollection.cs
@@ -9,21 +9,8 @@ public class NpcCollection(ILiteDatabase db)
private const string NpcCollectionName = "npc";
private const string NpcVoiceCollectionName = "npc_voice";
- ///
- /// Fetches all stored NPCs from the database.
- ///
- /// The stored NPCs.
- public IEnumerable FetchAllNpcs()
- {
- return GetNpcCollection().FindAll();
- }
+ public IEnumerable FetchAllNpcs() => GetNpcCollection().FindAll();
- ///
- /// Fetches an NPC from the database using their name and world.
- ///
- /// The NPC's name.
- /// The NPC, or null if they couldn't be found.
- /// If the NPC could be found.
public bool TryFetchNpcByName(string name, [NotNullWhen(true)] out Npc? npc)
{
var collection = GetNpcCollection();
@@ -34,24 +21,17 @@ public bool TryFetchNpcByName(string name, [NotNullWhen(true)] out Npc? npc)
}
///
- /// Fetches an NPC voice from the database using their local ID.
+ /// Fetches a specific voice for an NPC and a specific backend.
///
- /// The NPC's local ID.
- /// The voice info, or null if it couldn't be found.
- /// If the voice could be found.
- public bool TryFetchNpcVoiceByNpcId(Guid id, [NotNullWhen(true)] out NpcVoice? voice)
+ public bool TryFetchNpcVoiceByCompositeKey(Guid npcId, string backend, [NotNullWhen(true)] out NpcVoice? voice)
{
var collection = GetNpcVoiceCollection();
voice = collection.Query()
- .Where(v => v.NpcId == id)
+ .Where(v => v.NpcId == npcId && v.VoiceBackend == backend)
.FirstOrDefault();
return voice != null;
}
- ///
- /// Stores an NPC in the database.
- ///
- /// The NPC to store.
public void StoreNpc(Npc npc)
{
var collection = GetNpcCollection();
@@ -61,10 +41,6 @@ public void StoreNpc(Npc npc)
}
}
- ///
- /// Stores an NPC voice in the database.
- ///
- /// The NPC voice to store.
public void StoreNpcVoice(NpcVoice voice)
{
var collection = GetNpcVoiceCollection();
@@ -74,49 +50,39 @@ public void StoreNpcVoice(NpcVoice voice)
}
}
+ public void DeleteNpcById(Guid id) => GetNpcCollection().Delete(id);
+
///
- /// Deletes an NPC from the database using their local ID.
+ /// Deletes ALL voice presets for a specific NPC (e.g., when the NPC is deleted).
///
- /// The NPC's ID.
- public void DeleteNpcById(Guid id)
+ public void DeleteNpcVoiceByNpcId(Guid id)
{
- var collection = GetNpcCollection();
- collection.Delete(id);
+ var collection = GetNpcVoiceCollection();
+ collection.DeleteMany(v => v.NpcId == id);
}
///
- /// Deletes an NPC voice from the database using their local ID.
+ /// Deletes a specific voice preset for one NPC on a specific backend.
///
- /// The NPC's ID.
- public void DeleteNpcVoiceByNpcId(Guid id)
+ public void DeleteNpcVoiceByCompositeKey(Guid npcId, string backend)
{
var collection = GetNpcVoiceCollection();
- collection.DeleteMany(v => v.NpcId == id);
+ collection.DeleteMany(v => v.NpcId == npcId && v.VoiceBackend == backend);
}
private ILiteCollection GetNpcCollection()
{
var collection = db.GetCollection(NpcCollectionName);
- EnsureIndices(collection);
+ collection.EnsureIndex(npc => npc.Name);
return collection;
}
private ILiteCollection GetNpcVoiceCollection()
{
var collection = db.GetCollection(NpcVoiceCollectionName);
- EnsureIndices(collection);
- return collection;
- }
-
- private static void EnsureIndices(ILiteCollection collection)
- {
- // "By default, an index over _id is created upon the first insertion."
- // https://www.litedb.org/docs/indexes/
- collection.EnsureIndex(npc => npc.Name);
- }
-
- private static void EnsureIndices(ILiteCollection collection)
- {
+ // Added index for the backend to speed up composite queries
collection.EnsureIndex(v => v.NpcId);
+ collection.EnsureIndex(v => v.VoiceBackend);
+ return collection;
}
}
\ No newline at end of file
diff --git a/src/TextToTalk.Data/Services/PlayerCollection.cs b/src/TextToTalk.Data/Services/PlayerCollection.cs
index fe619c8..7f8fa12 100644
--- a/src/TextToTalk.Data/Services/PlayerCollection.cs
+++ b/src/TextToTalk.Data/Services/PlayerCollection.cs
@@ -9,22 +9,8 @@ public class PlayerCollection(ILiteDatabase db)
private const string PlayerCollectionName = "player";
private const string PlayerVoiceCollectionName = "player_voice";
- ///
- /// Fetches all stored players from the database.
- ///
- /// The stored players.
- public IEnumerable FetchAllPlayers()
- {
- return GetPlayerCollection().FindAll();
- }
+ public IEnumerable FetchAllPlayers() => GetPlayerCollection().FindAll();
- ///
- /// Fetches a player from the database using their name and world.
- ///
- /// The player's name.
- /// The player's world ID.
- /// The player, or null if they couldn't be found.
- /// If the player could be found.
public bool TryFetchPlayerByNameAndWorld(string name, uint worldId, [NotNullWhen(true)] out Player? player)
{
var collection = GetPlayerCollection();
@@ -34,28 +20,7 @@ public bool TryFetchPlayerByNameAndWorld(string name, uint worldId, [NotNullWhen
return player != null;
}
- ///
- /// Fetches a player voice from the database using their local ID.
- ///
- /// The player's local ID.
- /// The voice info, or null if it couldn't be found.
- /// If the voice could be found.
- public bool TryFetchPlayerVoiceByPlayerId(Guid id, [NotNullWhen(true)] out PlayerVoice? voice)
- {
- var collection = GetPlayerVoiceCollection();
- voice = collection.Query()
- .Where(v => v.PlayerId == id)
- .FirstOrDefault();
- return voice != null;
- }
-
- ///
- /// Fetches a player from the database using their name only.
- ///
- /// The player's name.
- /// The player, or null if they couldn't be found.
- /// If the player could be found.
- public bool TryFetchPlayerByName(string name, [NotNullWhen(true)] out Player? player)
+ public bool TryFetchPlayerByName(string name,[NotNullWhen(true)] out Player? player)
{
var collection = GetPlayerCollection();
player = collection.Query()
@@ -65,9 +30,17 @@ public bool TryFetchPlayerByName(string name, [NotNullWhen(true)] out Player? pl
}
///
- /// Stores a player in the database.
+ /// Fetches a player voice using the Player's Guid and the specific backend name.
///
- /// The player to store.
+ public bool TryFetchPlayerVoiceByCompositeKey(Guid playerId, string backend, [NotNullWhen(true)] out PlayerVoice? voice)
+ {
+ var collection = GetPlayerVoiceCollection();
+ voice = collection.Query()
+ .Where(v => v.PlayerId == playerId && v.VoiceBackend == backend)
+ .FirstOrDefault();
+ return voice != null;
+ }
+
public void StorePlayer(Player player)
{
var collection = GetPlayerCollection();
@@ -77,10 +50,6 @@ public void StorePlayer(Player player)
}
}
- ///
- /// Stores a player voice in the database.
- ///
- /// The player voice to store.
public void StorePlayerVoice(PlayerVoice voice)
{
var collection = GetPlayerVoiceCollection();
@@ -90,50 +59,41 @@ public void StorePlayerVoice(PlayerVoice voice)
}
}
+ public void DeletePlayerById(Guid id) => GetPlayerCollection().Delete(id);
+
///
- /// Deletes a player from the database using their local ID.
+ /// Deletes all voices associated with a player (Cleanup).
///
- /// The player's ID.
- public void DeletePlayerById(Guid id)
+ public void DeletePlayerVoiceByPlayerId(Guid id)
{
- var collection = GetPlayerCollection();
- collection.Delete(id);
+ var collection = GetPlayerVoiceCollection();
+ collection.DeleteMany(v => v.PlayerId == id);
}
///
- /// Deletes a player voice from the database using their local ID.
+ /// Deletes the specific voice preset for a player on a specific backend.
///
- /// The player's ID.
- public void DeletePlayerVoiceByPlayerId(Guid id)
+ public void DeletePlayerVoiceByCompositeKey(Guid playerId, string backend)
{
var collection = GetPlayerVoiceCollection();
- collection.DeleteMany(v => v.PlayerId == id);
+ // FIXED: Changed v.Id to v.PlayerId to correctly target the relationship
+ collection.DeleteMany(v => v.PlayerId == playerId && v.VoiceBackend == backend);
}
private ILiteCollection GetPlayerCollection()
{
var collection = db.GetCollection(PlayerCollectionName);
- EnsureIndices(collection);
+ collection.EnsureIndex(p => p.Name);
+ collection.EnsureIndex(p => p.WorldId);
return collection;
}
private ILiteCollection GetPlayerVoiceCollection()
{
var collection = db.GetCollection(PlayerVoiceCollectionName);
- EnsureIndices(collection);
- return collection;
- }
-
- private static void EnsureIndices(ILiteCollection collection)
- {
- // "By default, an index over _id is created upon the first insertion."
- // https://www.litedb.org/docs/indexes/
- collection.EnsureIndex(p => p.Name);
- collection.EnsureIndex(p => p.WorldId);
- }
-
- private static void EnsureIndices(ILiteCollection collection)
- {
+ // Added index for the backend to speed up composite queries
collection.EnsureIndex(v => v.PlayerId);
+ collection.EnsureIndex(v => v.VoiceBackend);
+ return collection;
}
}
\ No newline at end of file
diff --git a/src/TextToTalk.Lexicons/LexiconManager.cs b/src/TextToTalk.Lexicons/LexiconManager.cs
index 0a69756..d4b58b4 100644
--- a/src/TextToTalk.Lexicons/LexiconManager.cs
+++ b/src/TextToTalk.Lexicons/LexiconManager.cs
@@ -2,6 +2,8 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
+using System.Reflection.Metadata;
+using System.Text.RegularExpressions;
using System.Xml.Linq;
namespace TextToTalk.Lexicons;
@@ -88,10 +90,12 @@ public virtual void RemoveLexicon(string lexiconId)
public string MakeSsml(
string text,
+ string style = "",
string? voice = null,
string? langCode = null,
int playbackRate = -1,
bool includeSpeakAttributes = true)
+
{
foreach (var (_, lexicon) in this.lexicons)
{
@@ -109,6 +113,10 @@ public string MakeSsml(
}
}
}
+ if (!string.IsNullOrEmpty(style) && voice != null)
+ {
+ text = $"{text}";
+ }
if (playbackRate >= 0)
{
@@ -125,7 +133,10 @@ public string MakeSsml(
var speakTag = " voices;
public AzureBackend(PluginConfiguration config, HttpClient http)
{
@@ -17,7 +22,14 @@ public AzureBackend(PluginConfiguration config, HttpClient http)
LexiconUtils.LoadFromConfigAzure(lexiconManager, config);
this.uiModel = new AzureBackendUIModel(config, lexiconManager);
+ this.voices = this.uiModel.voices;
this.ui = new AzureBackendUI(this.uiModel, config, lexiconManager, http, this);
+
+ }
+
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
}
public override void Say(SayRequest request)
@@ -34,7 +46,7 @@ public override void Say(SayRequest request)
}
_ = this.uiModel.Azure.Say(azureVoicePreset.VoiceName,
- azureVoicePreset.PlaybackRate, azureVoicePreset.Volume, request.Source, request.Text);
+ azureVoicePreset.PlaybackRate, azureVoicePreset.Volume, request.Source, request.Text, !string.IsNullOrWhiteSpace(request.Style) ? request.Style : (azureVoicePreset.Style ?? string.Empty));
}
public override void CancelAllSpeech()
@@ -82,4 +94,5 @@ protected override void Dispose(bool disposing)
this.uiModel.Azure?.Dispose();
}
}
+
}
\ No newline at end of file
diff --git a/src/TextToTalk/Backends/Azure/AzureBackendUI.cs b/src/TextToTalk/Backends/Azure/AzureBackendUI.cs
index 08b85d3..5b89744 100644
--- a/src/TextToTalk/Backends/Azure/AzureBackendUI.cs
+++ b/src/TextToTalk/Backends/Azure/AzureBackendUI.cs
@@ -1,8 +1,8 @@
using Dalamud.Bindings.ImGui;
using Dalamud.Game;
using Dalamud.Game.Text;
-using Google.Api;
using System;
+using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
@@ -10,6 +10,8 @@
using TextToTalk.Lexicons.Updater;
using TextToTalk.UI;
using TextToTalk.UI.Lexicons;
+using TextToTalk.UI.Windows;
+using System.Numerics;
namespace TextToTalk.Backends.Azure;
@@ -87,6 +89,10 @@ public void DrawSettings(IConfigUIDelegates helpers)
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
+ else if (currentVoicePreset == null && presets.Count > 0)
+ {
+ config.SetCurrentVoicePreset(presets.First().Id);
+ }
BackendUI.NewPresetButton($"New preset##{MemoizedId.Create()}", this.config);
@@ -111,14 +117,76 @@ public void DrawSettings(IConfigUIDelegates helpers)
{
var voices = this.model.Voices;
- string?[] voiceArray = voices.ToArray();
+
+ string?[] voiceArray = voices
+ .Where(v => v != null && !string.IsNullOrEmpty(v.ShortName))
+ .Select(v => v.ShortName)
+ .ToArray();
+
+ string[] displayArray = voices
+ .Where(v => v != null && !string.IsNullOrEmpty(v.ShortName))
+ .Select(v => v.Styles?.Count > 1
+ ? $"{v.ShortName} [Styles Available]"
+ : v.ShortName!)
+ .ToArray();
+
var voiceIndex = Array.IndexOf(voiceArray, currentVoicePreset.VoiceName);
- if (ImGui.Combo($"Voice##{MemoizedId.Create()}", ref voiceIndex, voiceArray, voices.Count))
+ // 1. Determine if the currently selected voice has styles
+ bool previewHasStyles = voiceIndex >= 0 && voices[voiceIndex].Styles?.Count > 1;
+ string previewName = voiceIndex >= 0 ? voiceArray[voiceIndex] : "Select a voice...";
+
+ // 2. Start combo with an empty preview string so we can draw our own
+ if (ImGui.BeginCombo($"Voice##{MemoizedId.Create()}", "", ImGuiComboFlags.HeightLarge))
{
- currentVoicePreset.VoiceName = voiceArray[voiceIndex];
- this.config.Save();
+ var filteredVoices = voices.Where(v => v != null && !string.IsNullOrEmpty(v.ShortName)).ToList();
+
+ for (int i = 0; i < filteredVoices.Count; i++)
+ {
+ var v = filteredVoices[i];
+ bool isSelected = (voiceIndex == i);
+ bool hasStyles = v.Styles?.Count > 1;
+
+ if (ImGui.Selectable($"##{v.ShortName}_{i}", isSelected))
+ {
+ voiceIndex = i;
+ currentVoicePreset.VoiceName = voiceArray[voiceIndex];
+ this.config.Save();
+ }
+
+ ImGui.SameLine();
+ ImGui.SetCursorPosX(ImGui.GetCursorPosX() + ImGui.GetStyle().ItemSpacing.X);
+ ImGui.Text(v.ShortName);
+
+ if (hasStyles)
+ {
+ ImGui.SameLine();
+ ImGui.TextColored(new Vector4(0.55f, 0.75f, 1.0f, 1.0f), "[Styles Available]");
+ }
+
+ if (isSelected) ImGui.SetItemDefaultFocus();
+ }
+ ImGui.EndCombo();
}
+ // 3. Overlay the custom text on the Combo box itself
+ // We calculate the position relative to the last item (the Combo box)
+ ImGui.SameLine();
+ float comboRectMinX = ImGui.GetItemRectMin().X;
+ float comboRectMinY = ImGui.GetItemRectMin().Y;
+ float stylePadding = ImGui.GetStyle().FramePadding.X;
+
+ // Move cursor to inside the combo box frame
+ ImGui.SetCursorScreenPos(new Vector2(comboRectMinX + stylePadding, comboRectMinY + ImGui.GetStyle().FramePadding.Y - 3.0f));
+
+ // Draw the Name
+ ImGui.Text(previewName);
+
+ // Draw the Tag if applicable
+ if (previewHasStyles)
+ {
+ ImGui.SameLine();
+ ImGui.TextColored(new Vector4(0.55f, 0.75f, 1.0f, 1.0f), "[Styles Available]");
+ }
switch (voices.Count)
{
case 0:
@@ -126,7 +194,7 @@ public void DrawSettings(IConfigUIDelegates helpers)
"No voices are available on this voice engine for the current region.\n" +
"Please log in using a different region.");
break;
- case > 0 when !voices.Any(v => v == currentVoicePreset.VoiceName):
+ case > 0 when !voiceArray.Any(v => v == currentVoicePreset.VoiceName):
BackendUI.ImGuiVoiceNotSelected();
break;
}
@@ -146,6 +214,33 @@ public void DrawSettings(IConfigUIDelegates helpers)
currentVoicePreset.Volume = (float)Math.Round((double)volume / 100, 2);
this.config.Save();
}
+
+ var voiceStyles = new List();
+ var voiceDetails = this.backend?.voices?.OrderBy(v => v.ShortName).FirstOrDefault(v => v?.ShortName == currentVoicePreset?.VoiceName);
+ // the styles list will always contain at least 1 empty string if there are no styles available
+ if (voiceStyles == null || (voiceDetails?.Styles?.Count ?? 0) == 1)
+ {
+ ImGui.BeginDisabled();
+ if (ImGui.BeginCombo("Style", "No styles available for this voice"))
+ {
+ ImGui.EndCombo();
+ }
+
+ ImGui.EndDisabled();
+ }
+ else if (voiceDetails?.Styles != null && voiceDetails.Styles.Count > 0)
+ {
+ voiceStyles.Add("");
+ voiceStyles.AddRange(voiceDetails.Styles);
+ var styleIndex = voiceStyles.IndexOf(currentVoicePreset.Style ?? "");
+ if (ImGui.Combo($"Style##{MemoizedId.Create()}", ref styleIndex, voiceStyles, voiceStyles.Count))
+ {
+ currentVoicePreset.Style = voiceStyles[styleIndex];
+ this.config.Save();
+ }
+ }
+ ImGui.Separator();
+
if (ImGui.Button($"Test##{MemoizedId.Create()}"))
{
var voice = currentVoicePreset;
@@ -167,6 +262,11 @@ public void DrawSettings(IConfigUIDelegates helpers)
backend.Say(request);
}
}
+ ImGui.SameLine();
+ if (ImGui.Button($"Configure Voice Styles##{MemoizedId.Create()}"))
+ {
+ VoiceStyles.Instance?.ToggleStyle();
+ }
this.lexiconComponent.Draw();
ImGui.Spacing();
diff --git a/src/TextToTalk/Backends/Azure/AzureBackendUIModel.cs b/src/TextToTalk/Backends/Azure/AzureBackendUIModel.cs
index 4164292..c86d14a 100644
--- a/src/TextToTalk/Backends/Azure/AzureBackendUIModel.cs
+++ b/src/TextToTalk/Backends/Azure/AzureBackendUIModel.cs
@@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Text.RegularExpressions;
using TextToTalk.Lexicons;
+using static TextToTalk.Backends.Azure.AzureClient;
namespace TextToTalk.Backends.Azure;
@@ -12,7 +13,7 @@ public class AzureBackendUIModel
private readonly PluginConfiguration config;
private readonly LexiconManager lexiconManager;
- private List voices;
+ public List voices;
private AzureLoginInfo loginInfo;
///
@@ -28,13 +29,13 @@ public class AzureBackendUIModel
///
/// Gets the available voices.
///
- public IReadOnlyList Voices => this.voices;
+ public IReadOnlyList Voices => this.voices;
public AzureBackendUIModel(PluginConfiguration config, LexiconManager lexiconManager)
{
this.config = config;
this.lexiconManager = lexiconManager;
- this.voices = new List();
+ this.voices = new List();
this.loginInfo = new AzureLoginInfo();
var credentials = AzureCredentialManager.LoadCredentials();
@@ -98,7 +99,7 @@ private bool TryAzureLogin()
DetailedLog.Info($"Logging into Azure region {this.loginInfo.Region}");
Azure = new AzureClient(this.loginInfo.SubscriptionKey, this.loginInfo.Region, this.lexiconManager, this.config);
// This should throw an exception if the login failed
- this.voices = Azure.GetVoices();
+ this.voices = Azure.GetVoicesWithStyles();
return true;
}
catch (Exception e)
diff --git a/src/TextToTalk/Backends/Azure/AzureClient.cs b/src/TextToTalk/Backends/Azure/AzureClient.cs
index b2271ce..14cf7a4 100644
--- a/src/TextToTalk/Backends/Azure/AzureClient.cs
+++ b/src/TextToTalk/Backends/Azure/AzureClient.cs
@@ -30,6 +30,27 @@ public TextSource GetCurrentlySpokenTextSource()
{
return this.soundQueue.GetCurrentlySpokenTextSource();
}
+ public List GetVoicesWithStyles()
+ {
+ // Fetches the voice result asynchronously and waits for completion
+ var res = this.synthesizer.GetVoicesAsync().GetAwaiter().GetResult();
+ HandleResult(res);
+
+ // Maps each voice to a custom object containing Name and StyleList
+ return res.Voices.Select(voice => new VoiceDetails
+ {
+ Name = voice.Name,
+ ShortName = voice.ShortName,
+ Styles = voice.StyleList.ToList() // StyleList is a string[]
+ }).ToList();
+ }
+
+ public class VoiceDetails
+ {
+ public string Name { get; set; }
+ public string ShortName { get; set; }
+ public List Styles { get; set; }
+ }
public List GetVoices()
{
@@ -38,10 +59,11 @@ public List GetVoices()
return res.Voices.Select(voice => voice.Name).ToList();
}
- public async Task Say(string? voice, int playbackRate, float volume, TextSource source, string text)
+ public async Task Say(string? voice, int playbackRate, float volume, TextSource source, string text, string style)
{
var ssml = this.lexiconManager.MakeSsml(
text,
+ style,
voice: voice,
langCode: "en-US",
playbackRate: playbackRate,
diff --git a/src/TextToTalk/Backends/Azure/AzureVoicePreset.cs b/src/TextToTalk/Backends/Azure/AzureVoicePreset.cs
index ba07f80..a2a1a2d 100644
--- a/src/TextToTalk/Backends/Azure/AzureVoicePreset.cs
+++ b/src/TextToTalk/Backends/Azure/AzureVoicePreset.cs
@@ -17,11 +17,14 @@ public class AzureVoicePreset : VoicePreset
[JsonProperty("AzureVoiceName")] public string? VoiceName { get; set; }
+ public string? Style { get; set; }
+
public override bool TrySetDefaultValues()
{
Volume = 1.0f;
PlaybackRate = 0;
VoiceName = "en-US-JennyNeural";
+ Style = string.Empty;
EnabledBackend = TTSBackend.Azure;
return true;
}
diff --git a/src/TextToTalk/Backends/Azure/AzureVoiceStylesUI.cs b/src/TextToTalk/Backends/Azure/AzureVoiceStylesUI.cs
new file mode 100644
index 0000000..04e7747
--- /dev/null
+++ b/src/TextToTalk/Backends/Azure/AzureVoiceStylesUI.cs
@@ -0,0 +1,71 @@
+using Dalamud.Bindings.ImGui;
+using System.Linq;
+using System.Numerics;
+using TextToTalk;
+using TextToTalk.Backends;
+using TextToTalk.Backends.Azure;
+using TextToTalk.UI.Windows;
+using static TextToTalk.Backends.Azure.AzureClient;
+
+public class AzureVoiceStyles : IVoiceStylesWindow
+{
+ private readonly AzureBackend backend;
+ private PluginConfiguration config;
+ private VoiceStyles voiceStyles;
+ static double lastCopyTime = -1.0;
+ static string lastCopiedStyle = "";
+
+ public AzureVoiceStyles(AzureBackend backend, PluginConfiguration config, VoiceStyles voiceStyles)
+ {
+ this.backend = backend;
+ this.config = config;
+ this.voiceStyles = voiceStyles;
+
+ }
+
+ public void Draw(IConfigUIDelegates helpers)
+ {
+ var currentVoicePreset = this.config.GetCurrentVoicePreset();
+ var voiceDetails = this.backend.voices
+ .OrderBy(v => v.Name)
+ .FirstOrDefault(v => v?.Name == currentVoicePreset?.VoiceName);
+
+ if (voiceDetails?.Styles == null || voiceDetails.Styles.Count == 0)
+ {
+ ImGui.TextDisabled("No styles available for this voice.");
+ return;
+ }
+ ImGui.Spacing();
+ ImGui.PushStyleColor(ImGuiCol.Text, new Vector4(0.8f, 0.8f, 1.0f, 1.0f));
+ ImGui.TextWrapped("Note: Azure voice styles are pre-determined and specific to the voice selected");
+ ImGui.PopStyleColor();
+ if (config.AdHocStyleTagsEnabled)
+ {
+ ImGui.Separator();
+ ImGui.Text("Click a style to copy its tag to clipboard:");
+ ImGui.Separator();
+ }
+
+ foreach (var style in voiceDetails.Styles)
+ {
+ if (config.AdHocStyleTagsEnabled)
+ {
+ if (ImGui.Selectable($"{style}##{voiceDetails.Styles.IndexOf(style)}"))
+ {
+ VoiceStyles.Instance?.CopyStyleToClipboard(style);
+ lastCopyTime = ImGui.GetTime();
+ lastCopiedStyle = style;
+ }
+
+ if (lastCopiedStyle == style && (ImGui.GetTime() - lastCopyTime < 1.0))
+ {
+ ImGui.SetTooltip("Copied!");
+ }
+ else if (ImGui.IsItemHovered())
+ {
+ ImGui.SetTooltip("Click to copy");
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/TextToTalk/Backends/BackendUI.cs b/src/TextToTalk/Backends/BackendUI.cs
index b43d6b3..05249a8 100644
--- a/src/TextToTalk/Backends/BackendUI.cs
+++ b/src/TextToTalk/Backends/BackendUI.cs
@@ -73,9 +73,21 @@ public static void DeletePresetButton(string label, VoicePreset preset, TTSBacke
var otherPreset = voiceConfig.VoicePresets.First(p => p.Id != preset.Id);
config.SetCurrentVoicePreset(otherPreset.Id);
- voiceConfig.UngenderedVoicePresets[backend].Remove(preset.Id);
- voiceConfig.MaleVoicePresets[backend].Remove(preset.Id);
- voiceConfig.FemaleVoicePresets[backend].Remove(preset.Id);
+ // Use TryGetValue to safely access the inner dictionary for the specific backend
+ if (voiceConfig.UngenderedVoicePresets.TryGetValue(backend, out var ungendered))
+ {
+ ungendered.Remove(preset.Id);
+ }
+
+ if (voiceConfig.MaleVoicePresets.TryGetValue(backend, out var male))
+ {
+ male.Remove(preset.Id);
+ }
+
+ if (voiceConfig.FemaleVoicePresets.TryGetValue(backend, out var female))
+ {
+ female.Remove(preset.Id);
+ }
voiceConfig.VoicePresets.Remove(preset);
diff --git a/src/TextToTalk/Backends/ConfigUIDelegates.cs b/src/TextToTalk/Backends/ConfigUIDelegates.cs
index 0dc9247..6d054e5 100644
--- a/src/TextToTalk/Backends/ConfigUIDelegates.cs
+++ b/src/TextToTalk/Backends/ConfigUIDelegates.cs
@@ -6,9 +6,16 @@ public class ConfigUIDelegates : IConfigUIDelegates
{
public Action? OpenVoiceUnlockerAction { get; init; }
+ public Action? OpenVoiceStylesWindow { get; init; }
+
public void OpenVoiceUnlocker()
{
OpenVoiceUnlockerAction?.Invoke();
}
+
+ public void OpenVoiceStylesConfig()
+ {
+ OpenVoiceStylesWindow?.Invoke();
+ }
}
}
\ No newline at end of file
diff --git a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackend.cs b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackend.cs
index 16eb0b7..d9dfe69 100644
--- a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackend.cs
+++ b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackend.cs
@@ -1,4 +1,5 @@
using System;
+using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
@@ -11,12 +12,19 @@ public class ElevenLabsBackend : VoiceBackend
private readonly ElevenLabsBackendUI ui;
private readonly ElevenLabsBackendUIModel uiModel;
private readonly INotificationService notificationService;
+ private readonly PluginConfiguration config;
public ElevenLabsBackend(PluginConfiguration config, HttpClient http, INotificationService notificationService)
{
this.uiModel = new ElevenLabsBackendUIModel(config, http);
this.ui = new ElevenLabsBackendUI(uiModel, config, this);
this.notificationService = notificationService;
+ this.config = config;
+ }
+
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
}
public override void Say(SayRequest request)
@@ -32,7 +40,7 @@ public override void Say(SayRequest request)
{
await this.uiModel.ElevenLabs.Say(elevenLabsVoicePreset.VoiceId, elevenLabsVoicePreset.PlaybackRate,
elevenLabsVoicePreset.Volume, elevenLabsVoicePreset.SimilarityBoost,
- elevenLabsVoicePreset.Stability, request.Source, request.Text);
+ elevenLabsVoicePreset.Stability, request.Source, request.Text, elevenLabsVoicePreset.ModelId, !string.IsNullOrWhiteSpace(request.Style) ? request.Style : (elevenLabsVoicePreset.Style ?? string.Empty));
this.uiModel.UpdateUserSubscriptionInfo();
}
catch (ElevenLabsUnauthorizedException e)
diff --git a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUI.cs b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUI.cs
index 46e3f95..02a5b76 100644
--- a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUI.cs
+++ b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUI.cs
@@ -3,9 +3,13 @@
using Dalamud.Game.Text;
using NAudio.SoundFont;
using System;
+using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using TextToTalk.UI;
+using TextToTalk.UI.Windows;
+using static FFXIVClientStructs.FFXIV.Client.UI.Agent.AgentHousingPlant;
+using static TextToTalk.Backends.Azure.AzureClient;
namespace TextToTalk.Backends.ElevenLabs;
@@ -85,6 +89,10 @@ public void DrawSettings()
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
+ else if (currentVoicePreset == null && presets.Count > 0)
+ {
+ config.SetCurrentVoicePreset(presets.First().Id);
+ }
BackendUI.NewPresetButton($"New preset##{MemoizedId.Create()}", this.config);
@@ -137,14 +145,77 @@ public void DrawSettings()
ImGui.EndCombo();
}
- if (voiceCategoriesFlat.Count == 0)
+ var modelDescriptions = this.model.Models;
+ var modelIdList = modelDescriptions.Keys.ToList();
+ var modelDescriptionsList = modelDescriptions.Values.Select(v => v.Items.First()).ToList();
+ var selectedItemIndex = modelIdList.IndexOf(currentVoicePreset.ModelId);
+
+ string modelPreviewName = "";
+ if (selectedItemIndex != -1)
+ {
+ var selectedItem = modelDescriptionsList[selectedItemIndex];
+ modelPreviewName = $"{selectedItem.ModelId} || Cost Multiplier: {selectedItem.ModelRates["character_cost_multiplier"]}";
+ if (currentVoicePreset.ModelId == "eleven_v3")
+ {
+ modelPreviewName += " [Styles Available]";
+ }
+ }
+
+ bool previewHasStyles = modelIdList[selectedItemIndex] == "eleven_v3";
+ string previewName = voiceIndex >= 0 ? $"{modelIdList[selectedItemIndex]} || Cost Multiplier: {modelDescriptionsList[selectedItemIndex].ModelRates["character_cost_multiplier"]}" : "Select a model...";
+
+ if (ImGui.BeginCombo($"Models##{MemoizedId.Create()}", "", ImGuiComboFlags.HeightLarge))
+ {
+ for (int i = 0; i < modelDescriptionsList.Count; i++)
+ {
+ var item = modelDescriptionsList[i];
+ bool isSelected = (selectedItemIndex == i);
+
+ ImGui.Selectable(item.ModelDescription, false, ImGuiSelectableFlags.Disabled);
+
+ string baseText = $" {item.ModelId} || Cost Multiplier: {item.ModelRates["character_cost_multiplier"]}";
+
+ // 3. Use a Group to keep the Selectable and the extra text on the same line behaviorally
+ if (ImGui.Selectable($"{baseText}##{i}", isSelected))
+ {
+ currentVoicePreset.ModelId = item.ModelId;
+ currentVoicePreset.Stability = (float)Math.Round(currentVoicePreset.Stability / 0.5f) * 0.5f;
+ this.config.Save();
+ }
+
+ // 4. Overlay the Yellow Text if applicable
+ if (item.ModelId == "eleven_v3")
+ {
+ ImGui.SameLine();
+ ImGui.PushStyleColor(ImGuiCol.Text, new Vector4(1.0f, 1.0f, 0.6f, 1.0f));
+ ImGui.Text(" [Styles Available]");
+ ImGui.PopStyleColor();
+ }
+
+ if (isSelected) ImGui.SetItemDefaultFocus();
+ }
+ ImGui.EndCombo();
+ }
+ ImGui.SameLine();
+ float comboRectMinX = ImGui.GetItemRectMin().X;
+ float comboRectMinY = ImGui.GetItemRectMin().Y;
+ float stylePadding = ImGui.GetStyle().FramePadding.X;
+
+ // Move cursor to inside the combo box frame
+ ImGui.SetCursorScreenPos(new Vector2(comboRectMinX + stylePadding, comboRectMinY + ImGui.GetStyle().FramePadding.Y - 3.0f));
+
+ // Draw the Name
+ ImGui.Text(previewName);
+
+ // Draw the Tag if applicable
+ if (previewHasStyles)
{
- ImGui.TextColored(ImColor.Red,
- "No voices were found. This might indicate a temporary service outage.");
+ ImGui.SameLine();
+ ImGui.TextColored(new Vector4(1.0f, 1.0f, 0.6f, 1.0f), "[Styles Available]");
}
}
- var similarityBoost = currentVoicePreset.SimilarityBoost;
+ var similarityBoost = currentVoicePreset.SimilarityBoost;
if (ImGui.SliderFloat($"Clarity/Similarity boost##{MemoizedId.Create()}", ref similarityBoost, 0, 1,
"%.2f", ImGuiSliderFlags.AlwaysClamp))
{
@@ -156,8 +227,16 @@ public void DrawSettings()
if (ImGui.SliderFloat($"Stability##{MemoizedId.Create()}", ref stability, 0, 1, "%.2f",
ImGuiSliderFlags.AlwaysClamp))
{
- currentVoicePreset.Stability = stability;
- this.config.Save();
+ if (currentVoicePreset.ModelId == "eleven_v3")
+ {
+ currentVoicePreset.Stability = (float)Math.Round(stability / 0.5f) * 0.5f; // eleven_v3 only supports 0.0, 0.5, 1.0, any other float values will return "Bad Request"
+ this.config.Save();
+ }
+ else
+ {
+ currentVoicePreset.Stability = stability;
+ this.config.Save();
+ }
}
var playbackRate = currentVoicePreset.PlaybackRate;
@@ -174,17 +253,39 @@ public void DrawSettings()
currentVoicePreset.Volume = (float)Math.Round((double)volume / 100, 2);
this.config.Save();
}
-
+ if (currentVoicePreset.ModelId == "eleven_v3")
{
- ConfigComponents.ToggleUseGenderedVoicePresets(
- $"Use gendered voices##{MemoizedId.Create()}",
- this.config);
-
- ImGui.Spacing();
- if (this.config.UseGenderedVoicePresets)
+ var voiceStyles = config.CustomVoiceStyles.ToList();
+ if (voiceStyles == null || voiceStyles.Count == 0)
{
- BackendUI.GenderedPresetConfig("Polly", TTSBackend.ElevenLabs, this.config, presets);
+ ImGui.BeginDisabled();
+ if (ImGui.BeginCombo("Style", "No styles have been configured"))
+ {
+ ImGui.EndCombo();
+ }
+ ImGui.EndDisabled();
}
+ else
+ {
+ var style = currentVoicePreset.Style;
+ voiceStyles.Insert(0, "");
+ var styleIndex = voiceStyles.IndexOf(currentVoicePreset.Style ?? "");
+ if (ImGui.Combo($"Voice Style##{MemoizedId.Create()}", ref styleIndex, voiceStyles, voiceStyles.Count))
+ {
+ currentVoicePreset.Style = voiceStyles[styleIndex];
+ this.config.Save();
+ }
+ }
+ }
+ else
+ {
+ ImGui.BeginDisabled();
+ if (ImGui.BeginCombo("Style", "Voice Styles only available on model: eleven_v3"))
+ {
+ ImGui.EndCombo();
+ }
+ ImGui.EndDisabled();
+ currentVoicePreset.Style = string.Empty;
}
if (ImGui.Button($"Test##{MemoizedId.Create()}"))
{
@@ -195,6 +296,7 @@ public void DrawSettings()
{
Source = TextSource.Chat,
Voice = currentVoicePreset,
+ Style = currentVoicePreset.Style,
Speaker = "Speaker",
Text = $"Hello from ElevenLabs, this is a test message",
TextTemplate = "Hello from ElevenLabs, this is a test message",
@@ -207,5 +309,28 @@ public void DrawSettings()
backend.Say(request);
}
}
+ ImGui.SameLine();
+ if (ImGui.Button($"Configure Voice Styles##{MemoizedId.Create()}"))
+ {
+ VoiceStyles.Instance?.ToggleStyle();
+ }
+ if (ImGui.IsItemHovered())
+ {
+ ImGui.SetTooltip("Use Tags like \"Shout\" or \"Whisper\" to direct your voices");
+ }
+
+ {
+ ConfigComponents.ToggleUseGenderedVoicePresets(
+ $"Use gendered voices##{MemoizedId.Create()}",
+ this.config);
+
+ ImGui.Spacing();
+ if (this.config.UseGenderedVoicePresets)
+ {
+ BackendUI.GenderedPresetConfig("Polly", TTSBackend.ElevenLabs, this.config, presets);
+ }
+ }
+
+
}
}
\ No newline at end of file
diff --git a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUIModel.cs b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUIModel.cs
index 49ced54..e2b002d 100644
--- a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUIModel.cs
+++ b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackendUIModel.cs
@@ -3,6 +3,7 @@
using System.Linq;
using System.Net.Http;
using System.Text.RegularExpressions;
+using Lumina.Excel.Sheets;
using R3;
namespace TextToTalk.Backends.ElevenLabs;
@@ -42,6 +43,7 @@ public class ElevenLabsBackendUIModel : IDisposable
///
public IReadOnlyDictionary> Voices { get; private set; }
+ public IReadOnlyDictionary Items, Dictionary? Rates)> Models { get; private set; }
public ElevenLabsBackendUIModel(PluginConfiguration config, HttpClient http)
{
SoundQueue = new StreamSoundQueue(config);
@@ -52,6 +54,7 @@ public ElevenLabsBackendUIModel(PluginConfiguration config, HttpClient http)
this.apiKey = "";
this.Voices = new Dictionary>();
+ this.Models = new Dictionary Items, Dictionary? Rates)>();
var credentials = ElevenLabsCredentialManager.LoadCredentials();
if (credentials != null)
@@ -142,10 +145,23 @@ private bool TryLogin(string testApiKey)
ElevenLabs.ApiKey = testApiKey;
// This should throw an exception if the API key was incorrect
var voices = ElevenLabs.GetVoices().GetAwaiter().GetResult();
+ var models = ElevenLabs.GetModels().GetAwaiter().GetResult();
Voices = voices
- .Select(kvp =>
+ .Select(kvp =>
new KeyValuePair>(kvp.Key, kvp.Value.AsReadOnly()))
.ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
+
+ Models = models
+ .Where(m => m.CanDoTts)
+ .GroupBy(m => m.ModelId)
+ .ToDictionary(
+ g => g.Key!,
+ g => (
+ Items: (IReadOnlyList)g.ToList().AsReadOnly(),
+ Rates: g.First().ModelRates
+ )
+ );
+
DetailedLog.Info("ElevenLabs authorization successful");
return true;
}
diff --git a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsClient.cs b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsClient.cs
index 9ed1e1c..1c27094 100644
--- a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsClient.cs
+++ b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsClient.cs
@@ -8,6 +8,7 @@
using System.Net.Http.Headers;
using System.Threading.Tasks;
using Newtonsoft.Json;
+using Serilog;
namespace TextToTalk.Backends.ElevenLabs;
@@ -27,21 +28,33 @@ public ElevenLabsClient(StreamSoundQueue soundQueue, HttpClient http)
}
public async Task Say(string? voice, int playbackRate, float volume, float similarityBoost, float stability,
- TextSource source, string text)
+ TextSource source, string text, string? model, string? style)
{
if (!IsAuthorizationSet())
{
throw new ElevenLabsMissingCredentialsException("No ElevenLabs authorization keys have been configured.");
}
-
+ Log.Information($"Style String = {style}");
+ if (style != "")
+ {
+ model = "eleven_v3"; //force eleven_v3 model for styles
+ text = $"[{style}] " + text; //append style tag to text
+ }
+ float finalStability = stability;
+ if (model == "eleven_v3") // eleven_v3 only supports stability float values 0.0, 0.5, 1.0
+ {
+ finalStability = (float)Math.Round(stability * 2.0f, MidpointRounding.AwayFromZero) / 2.0f;
+ }
+ Log.Information($"Message String = {text}");
+ Log.Information($"Model String = {model}");
var args = new ElevenLabsTextToSpeechRequest
{
Text = text,
- ModelId = "eleven_flash_v2_5",
+ ModelId = model,
VoiceSettings = new ElevenLabsVoiceSettings
{
SimilarityBoost = similarityBoost,
- Stability = stability,
+ Stability = finalStability,
},
};
@@ -55,7 +68,7 @@ public async Task Say(string? voice, int playbackRate, float volume, float simil
using var content = new StringContent(JsonConvert.SerializeObject(args));
content.Headers.ContentType = new MediaTypeHeaderValue("application/json");
req.Content = content;
-
+
var res = await this.http.SendAsync(req);
EnsureSuccessStatusCode(res);
@@ -104,6 +117,21 @@ public async Task>> GetVoices()
g => (IList)g.OrderByDescending(v => v.Name).ToList());
}
+ public async Task> GetModels()
+ {
+ if (!IsAuthorizationSet())
+ {
+ throw new ElevenLabsMissingCredentialsException("No ElevenLabs authorization keys have been configured.");
+ }
+ var res = await SendRequest>("/v1/models");
+ if (res == null)
+ {
+ throw new InvalidOperationException("Models endpoint returned null.");
+ }
+ return res.OrderByDescending(v => v.ModelId).ToList();
+ }
+
+
private async Task SendRequest(string endpoint, string query = "",
HttpContent? reqContent = null) where TResponse : class
{
@@ -126,7 +154,6 @@ public async Task>> GetVoices()
EnsureSuccessStatusCode(res);
var resContent = await res.Content.ReadAsStringAsync();
-
return JsonConvert.DeserializeObject(resContent);
}
diff --git a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoice.cs b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoice.cs
index 271de20..153d08c 100644
--- a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoice.cs
+++ b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoice.cs
@@ -1,4 +1,5 @@
using Newtonsoft.Json;
+using System.Collections.Generic;
namespace TextToTalk.Backends.ElevenLabs;
@@ -9,4 +10,15 @@ public class ElevenLabsVoice
[JsonProperty("name")] public string? Name { get; init; }
[JsonProperty("category")] public string? Category { get; init; }
+}
+
+public class ElevenLabsModel
+{
+ [JsonProperty("model_id")] public string? ModelId { get; init; }
+
+ [JsonProperty("description")] public string? ModelDescription { get; init; }
+
+ [JsonProperty("can_do_text_to_speech")] public bool CanDoTts { get; init; }
+
+ [JsonProperty("model_rates")] public Dictionary? ModelRates { get; init; }
}
\ No newline at end of file
diff --git a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoicePreset.cs b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoicePreset.cs
index 9a917b0..0e47edb 100644
--- a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoicePreset.cs
+++ b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoicePreset.cs
@@ -14,11 +14,17 @@ public class ElevenLabsVoicePreset : VoicePreset
public float Stability { get; set; }
+ public string? ModelId { get; set; }
+
+ public string? Style { get; set; }
+
public override bool TrySetDefaultValues()
{
Volume = 1.0f;
PlaybackRate = 100;
VoiceId = "21m00Tcm4TlvDq8ikWAM";
+ ModelId = "eleven_flash_v2_5";
+ Style = string.Empty;
SimilarityBoost = 0.5f;
Stability = 0.5f;
EnabledBackend = TTSBackend.ElevenLabs;
diff --git a/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoiceStylesUI.cs b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoiceStylesUI.cs
new file mode 100644
index 0000000..8dbeefe
--- /dev/null
+++ b/src/TextToTalk/Backends/ElevenLabs/ElevenLabsVoiceStylesUI.cs
@@ -0,0 +1,98 @@
+using Dalamud.Bindings.ImGui;
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Numerics;
+using TextToTalk.UI.Windows;
+
+namespace TextToTalk.Backends.ElevenLabs
+{
+ public class ElevenLabsVoiceStyles : IVoiceStylesWindow
+ {
+ private readonly ElevenLabsBackend backend;
+ private PluginConfiguration config;
+ private bool showVoiceStyles = false;
+ private string newStyleBuffer = string.Empty;
+ static double lastCopyTime = -1.0;
+ static string lastCopiedStyle = "";
+ public ElevenLabsVoiceStyles(ElevenLabsBackend backend, PluginConfiguration config)
+ {
+ this.backend = backend;
+ this.config = config;
+ }
+
+ public void Draw(IConfigUIDelegates helpers)
+ {
+ bool shouldAdd = false;
+ ImGui.Spacing();
+ ImGui.PushStyleColor(ImGuiCol.Text, new Vector4(1.0f, 0.8f, 0.0f, 1.0f));
+ ImGui.TextWrapped("Note: ElevenLabs ad-hoc styles require \"eleven_v3\" and will force the V3 model for messages containing the ad-hoc tags.");
+ ImGui.PopStyleColor();
+ ImGui.TextDisabled("Experiment and have fun!");
+
+
+ if (ImGui.InputText("##StyleInput", ref newStyleBuffer, 100, ImGuiInputTextFlags.EnterReturnsTrue))
+ {
+ shouldAdd = true;
+ }
+
+ ImGui.SameLine();
+ if (ImGui.Button("Add") && !string.IsNullOrWhiteSpace(newStyleBuffer))
+ {
+ shouldAdd = true;
+ }
+
+ if (shouldAdd && !string.IsNullOrWhiteSpace(newStyleBuffer))
+ {
+ config.CustomVoiceStyles ??= new List();
+ config.CustomVoiceStyles.Add(newStyleBuffer);
+ config.CustomVoiceStyles.Sort();
+ newStyleBuffer = string.Empty;
+ }
+ if (config.AdHocStyleTagsEnabled)
+ {
+ ImGui.Separator();
+ ImGui.Text("Click a style to copy its tag to clipboard:");
+ ImGui.Separator();
+ }
+
+ if (config.CustomVoiceStyles?.Count > 0)
+ {
+ int indexToRemove = -1;
+
+ for (int i = 0; i < config.CustomVoiceStyles.Count; i++)
+ {
+ string style = config.CustomVoiceStyles[i];
+ bool isLastCopied = lastCopiedStyle == style && (ImGui.GetTime() - lastCopyTime < 1.0);
+
+ if (ImGui.Selectable($"{style}##{i}") && config.AdHocStyleTagsEnabled)
+ {
+ VoiceStyles.Instance?.CopyStyleToClipboard(style);
+ lastCopyTime = ImGui.GetTime();
+ lastCopiedStyle = style;
+ }
+ if (isLastCopied)
+ ImGui.SetTooltip("Copied!");
+ else if (ImGui.IsItemHovered() && config.AdHocStyleTagsEnabled)
+ ImGui.SetTooltip("Click to copy");
+
+ if (ImGui.BeginPopupContextItem($"context_{i}"))
+ {
+ if (ImGui.MenuItem("Remove Style"))
+ indexToRemove = i;
+
+ ImGui.EndPopup();
+ }
+ }
+
+ if (indexToRemove != -1)
+ config.CustomVoiceStyles.RemoveAt(indexToRemove);
+ }
+ else
+ {
+ ImGui.TextDisabled("No voice styles have been added yet.");
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackend.cs b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackend.cs
index 0f83e08..fae42c1 100644
--- a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackend.cs
+++ b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackend.cs
@@ -16,6 +16,10 @@ public GoogleCloudBackend(PluginConfiguration config)
ui = new GoogleCloudBackendUI(config, client, this);
}
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
+ }
public override void Say(SayRequest request)
{
if (request.Voice is not GoogleCloudVoicePreset voicePreset)
diff --git a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackendUI.cs b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackendUI.cs
index 59217bd..f08b399 100644
--- a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackendUI.cs
+++ b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudBackendUI.cs
@@ -44,6 +44,10 @@ public void DrawVoicePresetOptions()
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
+ else if (currentVoicePreset == null && presets.Count > 0)
+ {
+ config.SetCurrentVoicePreset(presets.First().Id);
+ }
BackendUI.NewPresetButton($"New preset##{MemoizedId.Create()}", config);
@@ -79,15 +83,18 @@ public void DrawVoicePresetOptions()
}
var voiceNames = client.Voices;
- if (ImGui.BeginCombo($"Voice##{MemoizedId.Create()}", currentVoicePreset.VoiceName))
+ if (ImGui.BeginCombo($"Voice##{MemoizedId.Create()}", $"{currentVoicePreset.VoiceName} - {currentVoicePreset.Gender}"))
{
if (voiceNames != null && currentVoicePreset.Locale != null)
{
- voiceNames = voiceNames.Where(vn => vn.StartsWith(currentVoicePreset.Locale)).ToList();
+ voiceNames = voiceNames
+ .Where(kvp => kvp.Key.StartsWith(currentVoicePreset.Locale))
+ .ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
foreach (var voiceName in voiceNames)
{
- if (!ImGui.Selectable(voiceName, voiceName == currentVoicePreset.VoiceName)) continue;
- currentVoicePreset.VoiceName = voiceName;
+ if (!ImGui.Selectable($"{voiceName.Value.Name} - {voiceName.Value.Gender}", voiceName.Value.Name == currentVoicePreset.VoiceName)) continue;
+ currentVoicePreset.VoiceName = voiceName.Value.Name;
+ currentVoicePreset.Gender = voiceName.Value.Gender.ToString();
config.Save();
}
}
diff --git a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudClient.cs b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudClient.cs
index bc09977..e231d78 100644
--- a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudClient.cs
+++ b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudClient.cs
@@ -12,7 +12,7 @@ public class GoogleCloudClient
{
private TextToSpeechClient? client;
private readonly StreamSoundQueue? soundQueue;
- public List? Voices;
+ public Dictionary? Voices;
public List? Locales;
public GoogleCloudClient(StreamSoundQueue soundQueue, string pathToCredential)
@@ -27,18 +27,23 @@ public void Init(string pathToCredential)
Environment.SetEnvironmentVariable("GOOGLE_APPLICATION_CREDENTIALS", pathToCredential);
this.client = TextToSpeechClient.Create();
this.Voices = GetGoogleTextToSpeechVoices();
- this.Locales = ExtractUniqueLocales(Voices);
+ this.Locales = ExtractUniqueLocales(Voices.Keys.ToList());
}
- public List? GetGoogleTextToSpeechVoices()
+ public Dictionary? GetGoogleTextToSpeechVoices()
{
- if (client == null) return new List();
+ if (client == null) return new Dictionary();
var response = client.ListVoices("");
- var fetchedVoices = new List();
+ var fetchedVoices = new Dictionary();
+
foreach (var voice in response.Voices)
{
- fetchedVoices.Add(voice.Name);
+ fetchedVoices.Add(voice.Name, new
+ {
+ Name = voice.Name,
+ Gender = voice.SsmlGender,
+ });
}
return fetchedVoices;
diff --git a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudVoicePreset.cs b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudVoicePreset.cs
index b2148b0..fe8d597 100644
--- a/src/TextToTalk/Backends/GoogleCloud/GoogleCloudVoicePreset.cs
+++ b/src/TextToTalk/Backends/GoogleCloud/GoogleCloudVoicePreset.cs
@@ -16,6 +16,8 @@ public class GoogleCloudVoicePreset : VoicePreset
public string? Locale { get; set; }
+ public string? Gender { get; set; }
+
[JsonPropertyName("GoogleCloudVoiceName")] public string? VoiceName { get; set; }
public override bool TrySetDefaultValues()
@@ -26,6 +28,7 @@ public override bool TrySetDefaultValues()
PlaybackRate = 1.0f;
Locale = "en-US";
VoiceName = "en-US-Wavenet-D";
+ Gender = "Male";
EnabledBackend = TTSBackend.GoogleCloud;
return true;
}
diff --git a/src/TextToTalk/Backends/IConfigUIDelegates.cs b/src/TextToTalk/Backends/IConfigUIDelegates.cs
index 47096bc..13daddb 100644
--- a/src/TextToTalk/Backends/IConfigUIDelegates.cs
+++ b/src/TextToTalk/Backends/IConfigUIDelegates.cs
@@ -3,4 +3,7 @@
public interface IConfigUIDelegates
{
void OpenVoiceUnlocker();
+
+ void OpenVoiceStylesConfig();
+
}
diff --git a/src/TextToTalk/Backends/Kokoro/KokoroBackend.cs b/src/TextToTalk/Backends/Kokoro/KokoroBackend.cs
index 6c0ca2e..6c29df9 100644
--- a/src/TextToTalk/Backends/Kokoro/KokoroBackend.cs
+++ b/src/TextToTalk/Backends/Kokoro/KokoroBackend.cs
@@ -92,7 +92,10 @@ private async Task GetModelAsync(PluginConfiguration config)
DetailedLog.Debug("Kokoro model downloaded successfully.");
return new KokoroModel(path);
}
-
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
+ }
public override void Say(SayRequest request)
{
if (request.Voice is not KokoroVoicePreset voicePreset)
diff --git a/src/TextToTalk/Backends/Kokoro/KokoroBackendUI.cs b/src/TextToTalk/Backends/Kokoro/KokoroBackendUI.cs
index 3091920..4ebe6cc 100644
--- a/src/TextToTalk/Backends/Kokoro/KokoroBackendUI.cs
+++ b/src/TextToTalk/Backends/Kokoro/KokoroBackendUI.cs
@@ -1,8 +1,9 @@
-using System;
-using System.Linq;
using Dalamud.Bindings.ImGui;
+using Dalamud.Utility;
using KokoroSharp;
using KokoroSharp.Core;
+using System;
+using System.Linq;
using TextToTalk.UI;
namespace TextToTalk.Backends.Kokoro;
@@ -18,7 +19,7 @@ public void DrawVoicePresetOptions()
var currentVoicePreset = config.GetCurrentVoicePreset();
var presets = config.GetVoicePresetsForBackend(TTSBackend.Kokoro).ToList();
- if (presets.Count > 0)
+ if (presets.Count > 0 && currentVoicePreset != null)
{
var presetIndex = currentVoicePreset is not null ? presets.IndexOf(currentVoicePreset) : -1;
if (ImGui.Combo($"Voice preset##{MemoizedId.Create()}", ref presetIndex,
@@ -32,6 +33,10 @@ public void DrawVoicePresetOptions()
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
+ else if (currentVoicePreset == null && presets.Count > 0)
+ {
+ config.SetCurrentVoicePreset(presets.First().Id);
+ }
BackendUI.NewPresetButton($"New preset##{MemoizedId.Create()}", config);
@@ -56,11 +61,15 @@ public void DrawVoicePresetOptions()
}
var voices = KokoroVoiceManager.GetVoices(Enum.GetValues());
- if (ImGui.BeginCombo($"Voice##{MemoizedId.Create()}", currentVoicePreset.InternalName))
+ var voiceNameArray = voices.Select(v => v.Name).ToArray();
+ var voiceArray = voices.Select(v => $"{v.Name.Substring(3)} - {v.Gender} ({v.Language})").ToArray();
+ var voiceIndex = Array.IndexOf(voiceNameArray, currentVoicePreset.InternalName);
+ if (ImGui.BeginCombo($"Voice##{MemoizedId.Create()}", voiceArray[voiceIndex]))
{
foreach (var voice in voices)
{
- if (!ImGui.Selectable(voice.Name, voice.Name == currentVoicePreset.InternalName)) continue;
+ string displayName = $"{voice.Name.Substring(3)} - {voice.Gender} ({voice.Language})";
+ if (!ImGui.Selectable(displayName, voice.Name == currentVoicePreset.InternalName)) continue;
currentVoicePreset.InternalName = voice.Name;
config.Save();
}
diff --git a/src/TextToTalk/Backends/OpenAI/OpenAIVoiceStylesUI.cs b/src/TextToTalk/Backends/OpenAI/OpenAIVoiceStylesUI.cs
new file mode 100644
index 0000000..06186a3
--- /dev/null
+++ b/src/TextToTalk/Backends/OpenAI/OpenAIVoiceStylesUI.cs
@@ -0,0 +1,94 @@
+using Dalamud.Bindings.ImGui;
+using System.Collections.Generic;
+using TextToTalk;
+using TextToTalk.Backends;
+using TextToTalk.Backends.OpenAI;
+using TextToTalk.UI.Windows;
+
+public class OpenAIVoiceStyles : IVoiceStylesWindow
+{
+ private readonly OpenAiBackend backend;
+ private PluginConfiguration config;
+ private bool showVoiceStyles = false;
+ private string newStyleBuffer = string.Empty;
+ static double lastCopyTime = -1.0;
+ static string lastCopiedStyle = "";
+ public OpenAIVoiceStyles(OpenAiBackend backend, PluginConfiguration config)
+ {
+ this.backend = backend;
+ this.config = config;
+ }
+
+ public void Draw(IConfigUIDelegates helpers)
+ {
+ bool shouldAdd = false;
+ ImGui.TextDisabled("Experiment and have fun!");
+
+
+ if (ImGui.InputText("##StyleInput", ref newStyleBuffer, 100, ImGuiInputTextFlags.EnterReturnsTrue))
+ {
+ shouldAdd = true;
+ }
+
+ ImGui.SameLine();
+ if (ImGui.Button("Add") && !string.IsNullOrWhiteSpace(newStyleBuffer))
+ {
+ shouldAdd = true;
+ }
+
+
+ if (shouldAdd && !string.IsNullOrWhiteSpace(newStyleBuffer))
+ {
+ config.CustomVoiceStyles ??= new List();
+ config.CustomVoiceStyles.Add(newStyleBuffer);
+ config.CustomVoiceStyles.Sort();
+ newStyleBuffer = string.Empty;
+ }
+
+ if (config.AdHocStyleTagsEnabled)
+ {
+ ImGui.Separator();
+ ImGui.Text("Click a style to copy its tag to clipboard:");
+ ImGui.Separator();
+ }
+
+ if (config.CustomVoiceStyles?.Count > 0)
+ {
+ int indexToRemove = -1;
+
+ for (int i = 0; i < config.CustomVoiceStyles.Count; i++)
+ {
+ string style = config.CustomVoiceStyles[i];
+ bool isLastCopied = lastCopiedStyle == style && (ImGui.GetTime() - lastCopyTime < 1.0);
+
+ if (ImGui.Selectable($"{style}##{i}") && config.AdHocStyleTagsEnabled)
+ {
+ VoiceStyles.Instance?.CopyStyleToClipboard(style);
+ lastCopyTime = ImGui.GetTime();
+ lastCopiedStyle = style;
+ }
+ if (isLastCopied)
+ ImGui.SetTooltip("Copied!");
+ else if (ImGui.IsItemHovered() && config.AdHocStyleTagsEnabled)
+ ImGui.SetTooltip("Click to copy");
+
+ if (ImGui.BeginPopupContextItem($"context_{i}"))
+ {
+ if (ImGui.MenuItem("Remove Style"))
+ indexToRemove = i;
+
+ ImGui.EndPopup();
+ }
+ }
+
+ if (indexToRemove != -1)
+ config.CustomVoiceStyles.RemoveAt(indexToRemove);
+ }
+ else
+ {
+ ImGui.TextDisabled("No voice styles have been added yet.");
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/src/TextToTalk/Backends/OpenAI/OpenAiBackend.cs b/src/TextToTalk/Backends/OpenAI/OpenAiBackend.cs
index d22eec9..8a80f2b 100644
--- a/src/TextToTalk/Backends/OpenAI/OpenAiBackend.cs
+++ b/src/TextToTalk/Backends/OpenAI/OpenAiBackend.cs
@@ -1,8 +1,10 @@
-using System;
+using Dalamud.Bindings.ImGui;
+using Serilog;
+using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
-using Dalamud.Bindings.ImGui;
+using TextToTalk.Backends.ElevenLabs;
using TextToTalk.Services;
namespace TextToTalk.Backends.OpenAI;
@@ -20,6 +22,11 @@ public OpenAiBackend(PluginConfiguration config, HttpClient http, INotificationS
this.notificationService = notificationService;
}
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
+ }
+
public override void Say(SayRequest request)
{
if (request.Voice is not OpenAiVoicePreset voicePreset)
@@ -29,7 +36,8 @@ public override void Say(SayRequest request)
{
try
{
- await this.uiModel.OpenAi.Say(voicePreset, request, request.Text);
+ Log.Information($"Voice name = {voicePreset.VoiceName}");
+ await this.uiModel.OpenAi.Say(voicePreset, request, request.Text, !string.IsNullOrWhiteSpace(request.Style) ? request.Style : (voicePreset.Style ?? string.Empty));
}
catch (OpenAiUnauthorizedException e)
{
diff --git a/src/TextToTalk/Backends/OpenAI/OpenAiBackendUI.cs b/src/TextToTalk/Backends/OpenAI/OpenAiBackendUI.cs
index fb03e3f..dd2b413 100644
--- a/src/TextToTalk/Backends/OpenAI/OpenAiBackendUI.cs
+++ b/src/TextToTalk/Backends/OpenAI/OpenAiBackendUI.cs
@@ -4,6 +4,7 @@
using System;
using System.Linq;
using TextToTalk.UI;
+using TextToTalk.UI.Windows;
namespace TextToTalk.Backends.OpenAI;
@@ -114,28 +115,44 @@ public void DrawVoicePresetOptions()
}
if (currentVoicePreset.Model == null) return;
-
+
var currentModel = OpenAiClient.Models.First(x => x.ModelName == currentVoicePreset.Model);
- var voiceNames = currentModel.Voices;
- if (currentVoicePreset.VoiceName == null || !voiceNames.Contains(currentVoicePreset.VoiceName))
+ // 1. Determine what to display in the preview (the value corresponding to the current key)
+ if (!currentModel.Voices.TryGetValue(currentVoicePreset.VoiceName ?? "", out var currentPreviewName))
{
- currentVoicePreset.VoiceName = voiceNames.First();
+ // Fallback if current key is invalid or null
+ currentVoicePreset.VoiceName = currentModel.Voices.Keys.First();
+ currentPreviewName = currentModel.Voices[currentVoicePreset.VoiceName];
config.Save();
}
-
- if (ImGui.BeginCombo($"Voice##{MemoizedId.Create()}", currentVoicePreset.VoiceName))
+
+ // 2. Start the Combo Box with the Descriptive Value as the preview
+ if (ImGui.BeginCombo($"Voice##{MemoizedId.Create()}", currentPreviewName))
{
- foreach (var voiceName in voiceNames)
+ foreach (var voice in currentModel.Voices)
{
- if (!ImGui.Selectable(voiceName, voiceName == currentVoicePreset.VoiceName)) continue;
+ // voice.Key is "alloy", "ash", etc.
+ // voice.Value is "Alloy (Neutral & Balanced)", etc.
+ bool isSelected = (currentVoicePreset.VoiceName == voice.Key);
- currentVoicePreset.VoiceName = voiceName;
- config.Save();
- }
+ // 3. Display the descriptive Value to the user
+ if (ImGui.Selectable(voice.Value, isSelected))
+ {
+ // 4. Update config with the underlying Key
+ currentVoicePreset.VoiceName = voice.Key;
+ config.Save();
+ }
+ // Standard ImGui accessibility: set focus to the selected item
+ if (isSelected)
+ {
+ ImGui.SetItemDefaultFocus();
+ }
+ }
ImGui.EndCombo();
}
+
var volume = (int) (currentVoicePreset.Volume * 100);
if (ImGui.SliderInt($"Volume##{MemoizedId.Create()}", ref volume, 0, 200, "%d%%"))
{
@@ -155,17 +172,30 @@ public void DrawVoicePresetOptions()
if (currentModel.InstructionsSupported)
{
- var instructions = currentVoicePreset.Instructions ?? "";
- if (ImGui.InputTextWithHint($"Instructions##{MemoizedId.Create()}",
- "Enter instructions to direct the tone, style, pacing, pronunciation, etc.",
- ref instructions, 1024))
+ var voiceStyles = config.CustomVoiceStyles.ToList();
+ if (voiceStyles == null || voiceStyles.Count == 0)
{
- currentVoicePreset.Instructions = instructions;
- config.Save();
+ ImGui.BeginDisabled();
+ if (ImGui.BeginCombo("Style", "No styles have been configured"))
+ {
+ ImGui.EndCombo();
+ }
+ ImGui.EndDisabled();
+ }
+ else
+ {
+ var style = currentVoicePreset.Style;
+ voiceStyles.Insert(0, "");
+ var styleIndex = voiceStyles.IndexOf(currentVoicePreset.Style ?? "");
+ if (ImGui.Combo($"Voice Style##{MemoizedId.Create()}", ref styleIndex, voiceStyles, voiceStyles.Count))
+ {
+ currentVoicePreset.Style = voiceStyles[styleIndex];
+ this.config.Save();
+ }
}
Components.HelpTooltip("""
- Instructions are additional information that can be provided to the model to help it generate more accurate speech.
+ Styles are additional information that can be provided to the model to help it generate more accurate speech.
This can include things like emphasis, pronunciation, pauses, tone, pacing, voice affect, inflections, word choice etc.
Examples can be found at https://openai.fm
""");
@@ -191,6 +221,15 @@ Instructions are additional information that can be provided to the model to hel
backend.Say(request);
}
}
+ ImGui.SameLine();
+ if (ImGui.Button($"Configure Voice Styles##{MemoizedId.Create()}"))
+ {
+ VoiceStyles.Instance?.ToggleStyle();
+ }
+ if (ImGui.IsItemHovered())
+ {
+ ImGui.SetTooltip("Use Tags like \"Shout\" or \"Whisper\" to direct your voices");
+ }
ImGui.Separator();
diff --git a/src/TextToTalk/Backends/OpenAI/OpenAiClient.cs b/src/TextToTalk/Backends/OpenAI/OpenAiClient.cs
index 6f6f41a..6c94868 100644
--- a/src/TextToTalk/Backends/OpenAI/OpenAiClient.cs
+++ b/src/TextToTalk/Backends/OpenAI/OpenAiClient.cs
@@ -7,59 +7,101 @@
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
+using System.Text.RegularExpressions;
using TextToTalk.GameEnums;
+using Serilog;
namespace TextToTalk.Backends.OpenAI;
public class OpenAiClient(StreamSoundQueue soundQueue, HttpClient http)
{
private const string UrlBase = "https://api.openai.com";
-
- public record ModelConfig(string ModelName, IReadOnlySet Voices, bool InstructionsSupported, bool SpeedSupported);
+
+ public record ModelConfig(
+ string ModelName,
+ IReadOnlyDictionary Voices,
+ bool InstructionsSupported,
+ bool SpeedSupported);
+
+ private static readonly Dictionary VoiceLabels = new()
+{
+ { "alloy", "Alloy (Neutral & Balanced)" },
+ { "ash", "Ash (Clear & Precise)" },
+ { "ballad", "Ballad (Melodic & Smooth)" },
+ { "coral", "Coral (Warm & Friendly)" },
+ { "echo", "Echo (Resonant & Deep)" },
+ { "fable", "Fable (Alto Narrative)" },
+ { "onyx", "Onyx (Deep & Energetic)" },
+ { "nova", "Nova (Bright & Energetic)" },
+ { "sage", "Sage (Calm & Thoughtful)" },
+ { "shimmer", "Shimmer (Bright & Feminine)" },
+ { "verse", "Verse (Versatile & Expressive)" },
+ { "marin", "Marin (Latest and Greatest)" },
+ { "cedar", "Cedar (Latest and Greatest)" }
+};
public static readonly List Models =
[
- // Note: while speed is 'technically' supported by gpt-4o-mini-tts, it doesn't appear to influence the output.
- new("gpt-4o-mini-tts", new HashSet
- {
- "alloy",
- "ash",
- "ballad",
- "coral",
- "echo",
- "fable",
- "onyx",
- "nova",
- "sage",
- "shimmer",
- "verse"
- }, true, false),
- new("tts-1", new HashSet
- {
- "nova",
- "shimmer",
- "echo",
- "onyx",
- "fable",
- "alloy",
- "ash",
- "sage",
- "coral"
- }, false, true),
- new("tts-1-hd", new HashSet
- {
- "nova",
- "shimmer",
- "echo",
- "onyx",
- "fable",
- "alloy",
- "ash",
- "sage",
- "coral"
- }, false, false),
+ new("gpt-4o-mini-tts",
+ VoiceLabels.ToDictionary(v => v.Key, v => v.Value),
+ true, false),
+
+ new("tts-1",
+ VoiceLabels.Where(v => v.Key != "ballad" && v.Key != "verse")
+ .ToDictionary(v => v.Key, v => v.Value),
+ false, true),
+
+ new("tts-1-hd",
+ VoiceLabels.Where(v => v.Key != "ballad" && v.Key != "verse")
+ .ToDictionary(v => v.Key, v => v.Value),
+ false, false)
];
-
+
+ // public record ModelConfig(string ModelName, IReadOnlySet Voices, bool InstructionsSupported, bool SpeedSupported);
+ //
+ // public static readonly List Models =
+ // [
+ // // Note: while speed is 'technically' supported by gpt-4o-mini-tts, it doesn't appear to influence the output.
+ // new("gpt-4o-mini-tts", new HashSet
+ // {
+ // "alloy",
+ // "ash",
+ // "ballad",
+ // "coral",
+ // "echo",
+ // "fable",
+ // "onyx",
+ // "nova",
+ // "sage",
+ // "shimmer",
+ // "verse"
+ // }, true, false),
+ // new("tts-1", new HashSet
+ // {
+ // "nova",
+ // "shimmer",
+ // "echo",
+ // "onyx",
+ // "fable",
+ // "alloy",
+ // "ash",
+ // "sage",
+ // "coral"
+ // }, false, true),
+ // new("tts-1-hd", new HashSet
+ // {
+ // "nova",
+ // "shimmer",
+ // "echo",
+ // "onyx",
+ // "fable",
+ // "alloy",
+ // "ash",
+ // "sage",
+ // "coral"
+ // }, false, false),
+ // ];
+
public string? ApiKey { get; set; }
private void AddAuthorization(HttpRequestMessage req)
@@ -101,9 +143,9 @@ public async Task TestCredentials()
instructionBuilder.AppendLine($"BodyType: {request.BodyType}");
}
- if (preset.Instructions is {Length: > 0})
+ if (preset.Style is {Length: > 0})
{
- instructionBuilder.AppendLine($"Instructions: {preset.Instructions}");
+ instructionBuilder.AppendLine($"Instructions: {(!string.IsNullOrEmpty(request.Style) ? request.Style : preset.Style)}"); // Style tags from Say Request take precedence over Style tags from voice preset.
}
var instructions = instructionBuilder.ToString()
@@ -112,7 +154,7 @@ public async Task TestCredentials()
return instructions.Length > 0 ? instructions : null;
}
- public async Task Say(OpenAiVoicePreset preset, SayRequest request, string text)
+ public async Task Say(OpenAiVoicePreset preset, SayRequest request, string text, string style)
{
if (!IsAuthorizationSet())
{
@@ -134,14 +176,19 @@ public async Task Say(OpenAiVoicePreset preset, SayRequest request, string text)
model = Models.First().ModelName;
}
+ if (request.Style is {Length: > 0 })
+ {
+ model = "gpt-4o-mini-tts"; // Force Say request to model that can handle Voice Styles if user has embedded a style tag into their message
+ }
+
var modelConfig = Models.First(m => m.ModelName == model);
- if (preset.VoiceName != null && modelConfig.Voices.Contains(preset.VoiceName))
+ if (preset.VoiceName != null && modelConfig.Voices.Keys.Contains(preset.VoiceName))
{
voice = preset.VoiceName;
}
else
{
- voice = modelConfig.Voices.First();
+ voice = modelConfig.Voices.Keys.First();
}
Dictionary args = new()
@@ -152,16 +199,20 @@ public async Task Say(OpenAiVoicePreset preset, SayRequest request, string text)
["response_format"] = "mp3",
["speed"] = modelConfig.SpeedSupported ? preset.PlaybackRate ?? 1.0f : 1.0f
};
-
+
if (modelConfig.InstructionsSupported)
{
- string? instructions = GetInstructionsForRequest(request, preset);
- if (instructions != null)
+ string? configinstructions = GetInstructionsForRequest(request, preset);
+ //if (style != "")
+ //{
+ // args["instructions"] = style;
+ //}
+ // Instructions from style take precedence over preset instructions.
+ if (configinstructions != null)
{
- args["instructions"] = instructions;
+ args["instructions"] = configinstructions;
}
- }
-
+ }
var json = JsonSerializer.Serialize(args);
DetailedLog.Verbose(json);
using var content = new StringContent(json, Encoding.UTF8, "application/json");
diff --git a/src/TextToTalk/Backends/OpenAI/OpenAiVoicePreset.cs b/src/TextToTalk/Backends/OpenAI/OpenAiVoicePreset.cs
index a5bcf23..8fe4b86 100644
--- a/src/TextToTalk/Backends/OpenAI/OpenAiVoicePreset.cs
+++ b/src/TextToTalk/Backends/OpenAI/OpenAiVoicePreset.cs
@@ -14,14 +14,15 @@ public class OpenAiVoicePreset : VoicePreset
[JsonPropertyName("OpenAIVoiceName")] public string? VoiceName { get; set; }
- public string? Instructions { get; set; }
+ public string? Style { get; set; }
public override bool TrySetDefaultValues()
{
var defaultConfig = OpenAiClient.Models.First();
Volume = 1.0f;
PlaybackRate = 1.0f;
- VoiceName = defaultConfig.Voices.First();
+ VoiceName = defaultConfig.Voices.Keys.First();
+ Style = string.Empty;
EnabledBackend = TTSBackend.OpenAi;
Model = defaultConfig.ModelName;
return true;
diff --git a/src/TextToTalk/Backends/Polly/PollyBackend.cs b/src/TextToTalk/Backends/Polly/PollyBackend.cs
index 7142ff2..7042ccb 100644
--- a/src/TextToTalk/Backends/Polly/PollyBackend.cs
+++ b/src/TextToTalk/Backends/Polly/PollyBackend.cs
@@ -1,6 +1,7 @@
using System;
using System.Net.Http;
using Dalamud.Bindings.ImGui;
+using Serilog;
namespace TextToTalk.Backends.Polly
{
@@ -21,19 +22,21 @@ public PollyBackend(PluginConfiguration config, HttpClient http)
this.ui = new PollyBackendUI(this.uiModel, config, lexiconManager, http, this);
}
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
+ }
public override void Say(SayRequest request)
{
if (request.Voice is not PollyVoicePreset pollyVoicePreset)
{
throw new InvalidOperationException("Invalid voice preset provided.");
}
-
if (this.uiModel.Polly == null)
{
DetailedLog.Warn("Polly client has not yet been initialized");
return;
}
-
_ = this.uiModel.Polly.Say(pollyVoicePreset.VoiceEngine, pollyVoicePreset.VoiceName,
pollyVoicePreset.AmazonDomainName, pollyVoicePreset.SampleRate, pollyVoicePreset.PlaybackRate,
pollyVoicePreset.Volume, request.Source, request.Text);
diff --git a/src/TextToTalk/Backends/Polly/PollyBackendUI.cs b/src/TextToTalk/Backends/Polly/PollyBackendUI.cs
index a666531..8fc8150 100644
--- a/src/TextToTalk/Backends/Polly/PollyBackendUI.cs
+++ b/src/TextToTalk/Backends/Polly/PollyBackendUI.cs
@@ -95,6 +95,10 @@ public void DrawSettings(IConfigUIDelegates helpers)
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
+ else if (currentVoicePreset == null && presets.Count > 0)
+ {
+ config.SetCurrentVoicePreset(presets.First().Id);
+ }
BackendUI.NewPresetButton($"New preset##{MemoizedId.Create()}", this.config);
@@ -127,7 +131,7 @@ public void DrawSettings(IConfigUIDelegates helpers)
{
var voices = this.model.CurrentEngineVoices;
- var voiceArray = voices.Select(v => v.Name).ToArray();
+ var voiceArray = voices.Select(v => $"{v.Name} - {v.Gender} ({v.LanguageName})").ToArray();
var voiceIdArray = voices.Select(v => v.Id).ToArray();
var voiceIndex = Array.IndexOf(voiceIdArray, currentVoicePreset.VoiceName);
if (ImGui.Combo($"Voice##{MemoizedId.Create()}", ref voiceIndex, voiceArray, voices.Count))
diff --git a/src/TextToTalk/Backends/Polly/PollyBackendUIModel.cs b/src/TextToTalk/Backends/Polly/PollyBackendUIModel.cs
index 7ff1e52..9bef8aa 100644
--- a/src/TextToTalk/Backends/Polly/PollyBackendUIModel.cs
+++ b/src/TextToTalk/Backends/Polly/PollyBackendUIModel.cs
@@ -44,7 +44,7 @@ public class PollyBackendUIModel : IDisposable
///
/// Gets the available voice engines for AWS Polly.
///
- public string[] Engines { get; } = { Engine.Neural, Engine.Standard };
+ public string[] Engines { get; } = { Engine.Neural, Engine.Standard, Engine.Generative, Engine.LongForm };
public PollyBackendUIModel(PluginConfiguration config, LexiconManager lexiconManager)
{
diff --git a/src/TextToTalk/Backends/SayRequest.cs b/src/TextToTalk/Backends/SayRequest.cs
index 139c771..dfba21d 100644
--- a/src/TextToTalk/Backends/SayRequest.cs
+++ b/src/TextToTalk/Backends/SayRequest.cs
@@ -46,6 +46,11 @@ public record SayRequest
///
public required string Text { get; init; }
+ ///
+ /// Any Voice Style tags associated with the message.
+ ///
+ public string? Style { get; init; }
+
///
/// The message, with the player name replaced with a token.
///
diff --git a/src/TextToTalk/Backends/System/SystemBackend.cs b/src/TextToTalk/Backends/System/SystemBackend.cs
index 1981ce0..4133358 100644
--- a/src/TextToTalk/Backends/System/SystemBackend.cs
+++ b/src/TextToTalk/Backends/System/SystemBackend.cs
@@ -24,6 +24,10 @@ public SystemBackend(PluginConfiguration config, HttpClient http)
this.voiceExceptions = this.uiModel.SubscribeToVoiceExceptions(this.soundQueue.SelectVoiceFailed);
}
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
+ }
public override void Say(SayRequest request)
{
this.soundQueue.EnqueueSound(request.Voice, request.Source, request.Text);
diff --git a/src/TextToTalk/Backends/System/SystemBackendUI.cs b/src/TextToTalk/Backends/System/SystemBackendUI.cs
index e5d5eb2..ad8fa39 100644
--- a/src/TextToTalk/Backends/System/SystemBackendUI.cs
+++ b/src/TextToTalk/Backends/System/SystemBackendUI.cs
@@ -46,7 +46,7 @@ public void DrawSettings(IConfigUIDelegates helpers)
var presets = this.config.GetVoicePresetsForBackend(TTSBackend.System).ToList();
presets.Sort((a, b) => a.Id - b.Id);
- if (presets.Any())
+ if (presets.Any() && currentVoicePreset != null)
{
var presetIndex = currentVoicePreset is not null ? presets.IndexOf(currentVoicePreset) : -1;
if (ImGui.Combo($"Preset##{MemoizedId.Create()}", ref presetIndex, presets.Select(p => p.Name).ToArray(),
@@ -56,10 +56,14 @@ public void DrawSettings(IConfigUIDelegates helpers)
this.config.Save();
}
}
- else
+ else if (currentVoicePreset != null)
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
+ else if (currentVoicePreset == null && presets.Count > 0)
+ {
+ config.SetCurrentVoicePreset(presets.First().Id);
+ }
BackendUI.NewPresetButton($"New preset##{MemoizedId.Create()}", this.config);
@@ -193,7 +197,9 @@ private static string FormatVoiceInfo(InstalledVoice iv)
var line = new StringBuilder(iv.VoiceInfo?.Name ?? "");
line.Append(" (")
.Append(iv.VoiceInfo?.Culture?.TwoLetterISOLanguageName.ToUpperInvariant() ?? "Unknown Language")
- .Append(")");
+ .Append(")")
+ .Append(" - ")
+ .Append(iv.VoiceInfo?.Gender.ToString());
if (iv.VoiceInfo?.Name?.Contains("Zira") ?? false)
{
diff --git a/src/TextToTalk/Backends/System/SystemSoundQueue.cs b/src/TextToTalk/Backends/System/SystemSoundQueue.cs
index 25f442e..f81bd83 100644
--- a/src/TextToTalk/Backends/System/SystemSoundQueue.cs
+++ b/src/TextToTalk/Backends/System/SystemSoundQueue.cs
@@ -17,14 +17,16 @@ public class SystemSoundQueue : SoundQueue
private readonly SystemBackend backend;
private readonly PluginConfiguration config;
private int soundLock;
+ private readonly SemaphoreSlim deviceLock = new SemaphoreSlim(1, 1);
public Observable SelectVoiceFailed => selectVoiceFailed;
private readonly Subject selectVoiceFailed;
+ private bool isSynthesizing = false;
- public async Task ASyncSpeak(SpeechSynthesizer synth, string textToSpeak)
+ public async void ASyncSpeak(SpeechSynthesizer synth, string textToSpeak)
{
- synth.SpeakSsml(textToSpeak);
+ await Task.Run(() => synth.SpeakSsml(textToSpeak));
}
public SystemSoundQueue(LexiconManager lexiconManager, PluginConfiguration config)
@@ -66,16 +68,31 @@ protected override async void OnSoundLoop(SystemSoundQueueItem nextItem)
langCode: this.speechSynthesizer.Voice.Culture.IetfLanguageTag);
DetailedLog.Verbose(ssml);
- this.stream = new MemoryStream();
- this.speechSynthesizer.SetOutputToWaveStream(this.stream);
+ try
+ {
+ isSynthesizing = true;
+
+ await deviceLock.WaitAsync();
+
+ this.stream = new MemoryStream();
+ this.speechSynthesizer.SetOutputToWaveStream(this.stream);
+
+ await Task.Run(() => this.speechSynthesizer.SpeakSsml(ssml));
+
+ }
+ catch (OperationCanceledException)
+ {
- await ASyncSpeak(this.speechSynthesizer,
- ssml); // Wrapped Synchronous Speech Synthesis in an async Task. This is because the SpeakAsync and SpeakSsmlAsync methods do not output a useable MemoryStream.
+ }
+
+ finally
+ {
+ isSynthesizing = false;
+ deviceLock.Release();
+ }
this.stream.Seek(0, SeekOrigin.Begin);
- DetailedLog.Debug($"Stream Length = {this.stream.Length}");
- this.streamSoundQueue.EnqueueSound(stream, nextItem.Source, StreamFormat.Wave,
- 1f); // Hard coded 1f for volume float as ssml already takes care of user volume input
+ this.streamSoundQueue.EnqueueSound(stream, nextItem.Source, StreamFormat.Wave, 1f);
}
public override void CancelAllSounds()
@@ -90,15 +107,17 @@ public override void CancelFromSource(TextSource source)
this.streamSoundQueue.CancelFromSource(source);
}
+
protected override void OnSoundCancelled()
{
- try
+ try
{
this.speechSynthesizer.SetOutputToNull();
}
+
catch (ObjectDisposedException)
{
- // ignored
+
}
}
diff --git a/src/TextToTalk/Backends/Uberduck/UberduckBackend.cs b/src/TextToTalk/Backends/Uberduck/UberduckBackend.cs
index 602c461..6cdda83 100644
--- a/src/TextToTalk/Backends/Uberduck/UberduckBackend.cs
+++ b/src/TextToTalk/Backends/Uberduck/UberduckBackend.cs
@@ -26,6 +26,10 @@ public UberduckBackend(PluginConfiguration config, HttpClient http)
this.ui = new UberduckBackendUI(config, this.uberduck, () => voices, this);
}
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
+ }
public override void Say(SayRequest request)
{
if (request.Voice is not UberduckVoicePreset uberduckVoicePreset)
diff --git a/src/TextToTalk/Backends/Uberduck/UberduckBackendUI.cs b/src/TextToTalk/Backends/Uberduck/UberduckBackendUI.cs
index 2c69dc5..0bb5ab3 100644
--- a/src/TextToTalk/Backends/Uberduck/UberduckBackendUI.cs
+++ b/src/TextToTalk/Backends/Uberduck/UberduckBackendUI.cs
@@ -74,7 +74,7 @@ public void DrawSettings(IConfigUIDelegates helpers)
var presets = this.config.GetVoicePresetsForBackend(TTSBackend.Uberduck).ToList();
presets.Sort((a, b) => a.Id - b.Id);
- if (presets.Any())
+ if (presets.Any() && currentVoicePreset != null)
{
var presetIndex = currentVoicePreset is not null ? presets.IndexOf(currentVoicePreset) : -1;
if (ImGui.Combo($"Preset##{MemoizedId.Create()}", ref presetIndex, presets.Select(p => p.Name).ToArray(),
@@ -84,10 +84,14 @@ public void DrawSettings(IConfigUIDelegates helpers)
this.config.Save();
}
}
- else
+ else if (currentVoicePreset != null)
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
+ else if (currentVoicePreset == null && presets.Count > 0)
+ {
+ config.SetCurrentVoicePreset(presets.First().Id);
+ }
BackendUI.NewPresetButton($"New preset##{MemoizedId.Create()}", this.config);
diff --git a/src/TextToTalk/Backends/VoiceBackend.cs b/src/TextToTalk/Backends/VoiceBackend.cs
index 5feab77..99b45c6 100644
--- a/src/TextToTalk/Backends/VoiceBackend.cs
+++ b/src/TextToTalk/Backends/VoiceBackend.cs
@@ -15,6 +15,8 @@ public abstract class VoiceBackend : IDisposable
public abstract void DrawSettings(IConfigUIDelegates helpers);
+ public abstract void DrawStyles(IConfigUIDelegates helpers);
+
public abstract TextSource GetCurrentlySpokenTextSource();
protected abstract void Dispose(bool disposing);
diff --git a/src/TextToTalk/Backends/VoiceBackendManager.cs b/src/TextToTalk/Backends/VoiceBackendManager.cs
index 0827d06..e539250 100644
--- a/src/TextToTalk/Backends/VoiceBackendManager.cs
+++ b/src/TextToTalk/Backends/VoiceBackendManager.cs
@@ -1,9 +1,9 @@
-using System;
+using Dalamud.Interface;
+using System;
using System.Linq;
using System.Net.Http;
using System.Numerics;
using System.Threading.Tasks;
-using Dalamud.Interface;
using TextToTalk.Backends.Azure;
using TextToTalk.Backends.ElevenLabs;
using TextToTalk.Backends.GoogleCloud;
@@ -35,7 +35,7 @@ public VoiceBackendManager(PluginConfiguration config, HttpClient http, IUiBuild
this.uiBuilder = uiBuilder;
this.notificationService = notificationService;
- SetBackend(this.config.Backend);
+ SetBackend(this.config.Backend);
}
public override void Say(SayRequest request)
@@ -57,6 +57,10 @@ public override void DrawSettings(IConfigUIDelegates helpers)
{
Backend?.DrawSettings(helpers);
}
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ Backend?.DrawStyles(helpers);
+ }
public override TextSource GetCurrentlySpokenTextSource()
{
diff --git a/src/TextToTalk/Backends/Websocket/WebsocketBackend.cs b/src/TextToTalk/Backends/Websocket/WebsocketBackend.cs
index 853c309..058e01a 100644
--- a/src/TextToTalk/Backends/Websocket/WebsocketBackend.cs
+++ b/src/TextToTalk/Backends/Websocket/WebsocketBackend.cs
@@ -42,6 +42,10 @@ public WebsocketBackend(PluginConfiguration config, INotificationService notific
this.wsServer.Start();
}
+ public override void DrawStyles(IConfigUIDelegates helpers)
+ {
+ helpers.OpenVoiceStylesConfig();
+ }
public override void Say(SayRequest request)
{
try
diff --git a/src/TextToTalk/CommandModules/MainCommandModule.cs b/src/TextToTalk/CommandModules/MainCommandModule.cs
index e1b075c..dd516c9 100644
--- a/src/TextToTalk/CommandModules/MainCommandModule.cs
+++ b/src/TextToTalk/CommandModules/MainCommandModule.cs
@@ -1,6 +1,7 @@
using Dalamud.Plugin.Services;
using TextToTalk.Backends;
using TextToTalk.UI;
+using TextToTalk.UI.Windows;
namespace TextToTalk.CommandModules;
@@ -11,21 +12,26 @@ public class MainCommandModule : CommandModule
private readonly PluginConfiguration config;
private readonly VoiceBackendManager backendManager;
private readonly ConfigurationWindow configurationWindow;
+ private readonly IConfigUIDelegates configUIDelegates;
+ private readonly VoiceStyles StylesWindow;
public MainCommandModule(ICommandManager commandManager, IChatGui chat, PluginConfiguration config,
- VoiceBackendManager backendManager, ConfigurationWindow configurationWindow) : base(commandManager)
+ VoiceBackendManager backendManager, ConfigurationWindow configurationWindow, IConfigUIDelegates configUIDelegates, VoiceStyles StylesWindow) : base(commandManager) //ElevenLabsStylesWindow elevenLabsStylesWindow)
{
this.chat = chat;
this.config = config;
this.backendManager = backendManager;
this.configurationWindow = configurationWindow;
+ this.configUIDelegates = configUIDelegates;
+ this.StylesWindow = StylesWindow;
AddCommand("/canceltts", CancelTts, "Cancel all queued TTS messages.");
AddCommand("/toggletts", ToggleTts, "Toggle TextToTalk's text-to-speech.");
AddCommand("/disabletts", DisableTts, "Disable TextToTalk's text-to-speech.");
AddCommand("/enabletts", EnableTts, "Enable TextToTalk's text-to-speech.");
AddCommand("/tttconfig", ToggleConfig, "Toggle TextToTalk's configuration window.");
+ AddCommand("/tttstyles", ToggleStyles, "Toggle TextToTalk's styles window.");
}
public void CancelTts(string command = "", string args = "")
@@ -60,4 +66,8 @@ public void ToggleConfig(string command = "", string args = "")
{
this.configurationWindow.Toggle();
}
+ public void ToggleStyles(string command = "", string args = "")
+ {
+ this.StylesWindow.Toggle();
+ }
}
\ No newline at end of file
diff --git a/src/TextToTalk/PluginConfiguration.cs b/src/TextToTalk/PluginConfiguration.cs
index a32bdab..7f1088e 100644
--- a/src/TextToTalk/PluginConfiguration.cs
+++ b/src/TextToTalk/PluginConfiguration.cs
@@ -31,6 +31,11 @@ public class PluginConfiguration : IPluginConfiguration, ISaveable, IWebsocketCo
{
private const string DefaultPreset = "Default";
+ //public string PersistentStyleTag { get; set; } = "";
+ public string StyleTag { get; set; } = "|";
+
+ public string StyleRegex { get; set; } = "";
+
#region Obsolete Members
[Obsolete("Use EnabledChatTypesPresets.")]
@@ -107,6 +112,10 @@ public class PluginConfiguration : IPluginConfiguration, ISaveable, IWebsocketCo
#endregion
+ public List CustomVoiceStyles { get; set; }
+
+ public int ElevenLabsModelIndex { get; set; }
+
public int SelectedAudioDeviceIndex { get; set; }
public Guid SelectedAudioDeviceGuid => AudioDevices.DeviceList.ElementAt(SelectedAudioDeviceIndex).Guid;
@@ -179,6 +188,12 @@ public class PluginConfiguration : IPluginConfiguration, ISaveable, IWebsocketCo
public bool OnlyMessagesFromYou { get; set; }
+ //public bool PersistentStyleTagEnabled { get; set; }
+
+ //public bool PersistentPersonalStyleTagEnabled { get; set; }
+
+ public bool AdHocStyleTagsEnabled { get; set; }
+
[Tooltip(
"Use the American pronunciation for English instead of the British one. This doesn't affect the accents of the voices.")]
public bool KokoroUseAmericanEnglish { get; set; } = true;
@@ -221,12 +236,14 @@ public void Initialize(
this.voicePresetConfig = VoicePresetConfiguration.LoadFromFile(GetVoicePresetsConfigPath());
EnabledChatTypesPresets ??= new List();
+
foreach (var preset in EnabledChatTypesPresets)
{
preset.Initialize(this);
}
+ CustomVoiceStyles ??= new List();
AzureLexiconFiles ??= new List();
PollyLexiconFiles ??= new List();
Lexicons ??= new List();
diff --git a/src/TextToTalk/Services/NpcService.cs b/src/TextToTalk/Services/NpcService.cs
index 7a5728c..380b319 100644
--- a/src/TextToTalk/Services/NpcService.cs
+++ b/src/TextToTalk/Services/NpcService.cs
@@ -8,22 +8,19 @@ namespace TextToTalk.Services;
public class NpcService(NpcCollection collection, IList voices)
{
- public IEnumerable GetAllNpcs()
- {
- return collection.FetchAllNpcs();
- }
+ public IEnumerable GetAllNpcs() => collection.FetchAllNpcs();
public bool AddNpc(string name)
{
if (TryGetNpc(name, out _)) return false;
- var info = new Npc { Name = name };
- collection.StoreNpc(info);
+ collection.StoreNpc(new Npc { Name = name });
return true;
}
public void DeleteNpc(Npc info)
{
collection.DeleteNpcById(info.Id);
+ // Deletes all voice presets associated with this NPC across all backends
collection.DeleteNpcVoiceByNpcId(info.Id);
}
@@ -32,11 +29,13 @@ public bool TryGetNpc(string name, [NotNullWhen(true)] out Npc? info)
return collection.TryFetchNpcByName(name, out info);
}
- public bool TryGetNpcVoice(Npc? info, [NotNullWhen(true)] out VoicePreset? voice)
+ // Fetch a voice preset for a specific NPC + Backend combination
+ public bool TryGetNpcVoice(Npc? info, string backend, [NotNullWhen(true)] out VoicePreset? voice)
{
voice = null;
if (info is null) return false;
- if (collection.TryFetchNpcVoiceByNpcId(info.Id, out var voiceInfo))
+
+ if (collection.TryFetchNpcVoiceByCompositeKey(info.Id, backend, out var voiceInfo))
{
voice = voices.FirstOrDefault(v => v.Id == voiceInfo.VoicePresetId);
}
@@ -44,29 +43,27 @@ public bool TryGetNpcVoice(Npc? info, [NotNullWhen(true)] out VoicePreset? voice
return voice != null;
}
- public void UpdateNpc(Npc info)
- {
- collection.StoreNpc(info);
- }
+ public void UpdateNpc(Npc info) => collection.StoreNpc(info);
+ // Allows setting/replacing a voice specifically for one backend
public bool SetNpcVoice(Npc info, VoicePreset voice)
{
- if (info.Name is null || !TryGetNpc(info.Name, out _))
- {
- return false;
- }
+ if (info.Name is null || !TryGetNpc(info.Name, out _)) return false;
- if (voices.All(v => v.Id != voice.Id))
- {
- return false;
- }
+ if (voices.All(v => v.Id == voice.Id)) return false;
+ string backend = voice.EnabledBackend.ToString();
- if (TryGetNpcVoice(info, out _))
+ if (TryGetNpcVoice(info, backend, out _))
{
- collection.DeleteNpcVoiceByNpcId(info.Id);
+ collection.DeleteNpcVoiceByCompositeKey(info.Id, backend);
}
- collection.StoreNpcVoice(new NpcVoice { NpcId = info.Id, VoicePresetId = voice.Id });
+ collection.StoreNpcVoice(new NpcVoice
+ {
+ NpcId = info.Id,
+ VoicePresetId = voice.Id,
+ VoiceBackend = backend
+ });
return true;
}
diff --git a/src/TextToTalk/Services/PlayerService.cs b/src/TextToTalk/Services/PlayerService.cs
index 1df5dc8..e07e6c0 100644
--- a/src/TextToTalk/Services/PlayerService.cs
+++ b/src/TextToTalk/Services/PlayerService.cs
@@ -29,6 +29,7 @@ public bool AddPlayer(string name, uint worldId)
public void DeletePlayer(Player info)
{
collection.DeletePlayerById(info.Id);
+ // Deletes ALL voices for this player across all backends
collection.DeletePlayerVoiceByPlayerId(info.Id);
}
@@ -37,11 +38,13 @@ public bool TryGetPlayer(string name, uint worldId, [NotNullWhen(true)] out Play
return collection.TryFetchPlayerByNameAndWorld(name, worldId, out info);
}
- public bool TryGetPlayerVoice(Player? info, [NotNullWhen(true)] out VoicePreset? voice)
+ // Fetch a voice preset for a specific Player + Backend combination
+ public bool TryGetPlayerVoice(Player? info, [NotNullWhen(true)] out VoicePreset? voice, string backend)
{
voice = null;
if (info is null) return false;
- if (collection.TryFetchPlayerVoiceByPlayerId(info.Id, out var voiceInfo))
+
+ if (collection.TryFetchPlayerVoiceByCompositeKey(info.Id, backend, out var voiceInfo))
{
voice = voices.FirstOrDefault(v => v.Id == voiceInfo.VoicePresetId);
}
@@ -54,6 +57,7 @@ public bool TryGetPlayerOtherZone(string name, [NotNullWhen(true)] out Player? i
return collection.TryFetchPlayerByName(name, out info);
}
+ // Allows setting/replacing a voice specifically for one backend
public bool SetPlayerVoice(Player info, VoicePreset voice)
{
if (info.Name is null || !TryGetPlayer(info.Name, info.WorldId, out _))
@@ -66,12 +70,21 @@ public bool SetPlayerVoice(Player info, VoicePreset voice)
return false;
}
- if (TryGetPlayerVoice(info, out _))
+ string backend = voice.EnabledBackend.ToString();
+
+ // Modified: Only check and delete for the specific backend provided
+ if (TryGetPlayerVoice(info, out _, backend))
{
- collection.DeletePlayerVoiceByPlayerId(info.Id);
+ collection.DeletePlayerVoiceByCompositeKey(info.Id, backend);
}
- collection.StorePlayerVoice(new PlayerVoice { PlayerId = info.Id, VoicePresetId = voice.Id });
+ // Modified: Store with the backend string to satisfy the composite requirement
+ collection.StorePlayerVoice(new PlayerVoice
+ {
+ PlayerId = info.Id,
+ VoicePresetId = voice.Id,
+ VoiceBackend = backend
+ });
return true;
}
diff --git a/src/TextToTalk/TextToTalk.cs b/src/TextToTalk/TextToTalk.cs
index a9ac1d7..6aca486 100644
--- a/src/TextToTalk/TextToTalk.cs
+++ b/src/TextToTalk/TextToTalk.cs
@@ -16,6 +16,7 @@
using System.IO;
using System.Linq;
using System.Net.Http;
+using System.Text.RegularExpressions;
using TextToTalk.Backends;
using TextToTalk.Backends.Azure;
using TextToTalk.Backends.ElevenLabs;
@@ -36,8 +37,10 @@
using TextToTalk.Talk;
using TextToTalk.TextProviders;
using TextToTalk.UI;
+using TextToTalk.UI.Windows;
using TextToTalk.UngenderedOverrides;
using TextToTalk.Utils;
+using static System.Net.Mime.MediaTypeNames;
using GameObject = Dalamud.Game.ClientState.Objects.Types.IGameObject;
namespace TextToTalk
@@ -74,7 +77,6 @@ public partial class TextToTalk : IDalamudPlugin
private readonly WindowSystem windows;
private readonly IDataManager data;
private readonly NotificationService notificationService;
-
private readonly ConfigurationWindow configurationWindow;
private readonly VoiceUnlockerWindow voiceUnlockerWindow;
@@ -84,6 +86,8 @@ public partial class TextToTalk : IDalamudPlugin
private ILiteDatabase? textEventLogDatabase;
private TextEventLogCollection? textEventLog;
+ private readonly IConfigUIDelegates configUIDelegates;
+ private readonly VoiceStyles StylesWindow;
public string Name => "TextToTalk";
@@ -128,13 +132,14 @@ public TextToTalk(
this.addonTalkManager = new AddonTalkManager(framework, clientState, condition, gui);
this.addonBattleTalkManager = new AddonBattleTalkManager(framework, clientState, condition, gui);
+ this.configUIDelegates = new ConfigUIDelegates();
+
var sharedState = new SharedState();
this.http = new HttpClient();
- this.backendManager =
- new VoiceBackendManager(this.config, this.http, pi.UiBuilder, this.notificationService);
-
+ this.backendManager = new VoiceBackendManager(this.config, this.http, pi.UiBuilder, this.notificationService);
+ this.StylesWindow = new VoiceStyles(this.backendManager, this.configUIDelegates, this.config);
this.playerService = new PlayerService(playerCollection, this.config.GetVoiceConfig().VoicePresets);
this.npcService = new NpcService(npcCollection, this.config.GetVoiceConfig().VoicePresets);
@@ -160,6 +165,7 @@ public TextToTalk(
this.windows.AddWindow(this.voiceUnlockerWindow);
this.windows.AddWindow(this.configurationWindow);
this.windows.AddWindow(channelPresetModificationWindow);
+ this.windows.AddWindow(this.StylesWindow);
var filters = new MessageHandlerFilters(sharedState, this.config, this.clientState);
this.addonTalkHandler =
@@ -176,8 +182,9 @@ public TextToTalk(
this.ungenderedOverrides = new UngenderedOverrideManager();
+
this.commandModule = new MainCommandModule(commandManager, chat, this.config, this.backendManager,
- this.configurationWindow);
+ this.configurationWindow, this.configUIDelegates, this.StylesWindow);
this.debugCommandModule = new DebugCommandModule(commandManager, chat, gui, framework);
@@ -301,15 +308,38 @@ private void Say(GameObject? speaker, SeString speakerName, XivChatType? chatTyp
{
return;
}
+
+ string textContent = textValue; // Default to original text
+ string textStyle = "";
+
+ if (config.AdHocStyleTagsEnabled == true)
+ {
+ var match = Regex.Match(textValue, config.StyleRegex);
+
+ if (match.Success)
+ {
+ textStyle = match.Groups[1].Value.Trim();
+ // Replace the tagged portion with just the inner content for the final output
+ textContent = Regex.Replace(textValue, config.StyleRegex, m => m.Groups[2].Value);
+ }
+ else
+ {
+ textContent = textValue;
+ }
+ }
+ else
+ {
+ textContent = textValue;
+ }
- // Run a preprocessing pipeline to clean the text for the speech synthesizer
- var cleanText = FunctionalUtils.Pipe(
- textValue,
- TalkUtils.StripAngleBracketedText,
- TalkUtils.ReplaceSsmlTokens,
- TalkUtils.NormalizePunctuation,
- t => this.config.RemoveStutterEnabled ? TalkUtils.RemoveStutters(t) : t,
- x => x.Trim());
+ // Run a preprocessing pipeline to clean the text for the speech synthesizer
+ var cleanText = FunctionalUtils.Pipe(
+ textContent,
+ TalkUtils.StripAngleBracketedText,
+ TalkUtils.ReplaceSsmlTokens,
+ TalkUtils.NormalizePunctuation,
+ t => this.config.RemoveStutterEnabled ? TalkUtils.RemoveStutters(t) : t,
+ x => x.Trim());
// Ensure that the result is clean; ignore it otherwise
if (!cleanText.Any() || !TalkUtils.IsSpeakable(cleanText))
@@ -352,6 +382,7 @@ private void Say(GameObject? speaker, SeString speakerName, XivChatType? chatTyp
Source = source,
Speaker = cleanSpeakerName,
Text = cleanText,
+ Style = textStyle,
TextTemplate = textTemplate,
Voice = preset,
ChatType = chatType,
@@ -429,7 +460,7 @@ private static unsafe bool TryGetCharacter(GameObject? speaker,
if (speaker is null &&
this.config.UsePlayerVoicePresets &&
this.playerService.TryGetPlayerOtherZone(speakerName, out var otherPlayerInfo) &&
- this.playerService.TryGetPlayerVoice(otherPlayerInfo, out var otherPlayerVoice))
+ this.playerService.TryGetPlayerVoice(otherPlayerInfo, out var otherPlayerVoice, config.Backend.ToString()))
{
return otherPlayerVoice;
}
@@ -437,7 +468,7 @@ private static unsafe bool TryGetCharacter(GameObject? speaker,
if (speaker is IPlayerCharacter pc &&
this.config.UsePlayerVoicePresets &&
this.playerService.TryGetPlayer(speakerName, pc.HomeWorld.RowId, out var playerInfo) &&
- this.playerService.TryGetPlayerVoice(playerInfo, out var playerVoice))
+ this.playerService.TryGetPlayerVoice(playerInfo, out var playerVoice, config.Backend.ToString()))
{
return playerVoice;
}
@@ -445,7 +476,7 @@ private static unsafe bool TryGetCharacter(GameObject? speaker,
if (speaker is not null &&
this.config.UseNpcVoicePresets &&
this.npcService.TryGetNpc(speakerName, out var npcInfo) &&
- this.npcService.TryGetNpcVoice(npcInfo, out var npcVoice))
+ this.npcService.TryGetNpcVoice(npcInfo, this.config.Backend.ToString(), out var npcVoice))
{
return npcVoice;
}
diff --git a/src/TextToTalk/TextToTalk.csproj b/src/TextToTalk/TextToTalk.csproj
index 20d18bb..6258ee0 100644
--- a/src/TextToTalk/TextToTalk.csproj
+++ b/src/TextToTalk/TextToTalk.csproj
@@ -10,7 +10,7 @@
true
x64
enable
-
+
CS8785
diff --git a/src/TextToTalk/UI/ConfigurationWindow.cs b/src/TextToTalk/UI/ConfigurationWindow.cs
index e260f1d..64fcf5e 100644
--- a/src/TextToTalk/UI/ConfigurationWindow.cs
+++ b/src/TextToTalk/UI/ConfigurationWindow.cs
@@ -410,15 +410,24 @@ private void DrawPlayerVoiceSettings()
{
var (id, playerInfo, worldName) = row;
var name = playerInfo.Name;
-
- // Player voice dropdown
- var presetIndex = this.players.TryGetPlayerVoice(playerInfo, out var v) ? presets.IndexOf(v) : 0;
- if (ImGui.Combo($"##{MemoizedId.Create(uniq: id.ToString())}", ref presetIndex, presetArray,
- presets.Count))
+ var currentBackend = this.config.Backend.ToString();
+
+ // Pass currentBackend to fetch the preset specific to this backend
+ var presetIndex = this.players.TryGetPlayerVoice(playerInfo, out var v, currentBackend)
+ ? presets.IndexOf(v)
+ : 0;
+
+ if (ImGui.Combo($"##{MemoizedId.Create(uniq: id.ToString())}", ref presetIndex, presetArray, presets.Count))
{
- this.players.SetPlayerVoice(playerInfo, presets[presetIndex]);
- this.players.UpdatePlayer(playerInfo);
- DetailedLog.Debug($"Updated voice for {name}@{worldName}: {presets[presetIndex].Name}");
+ // SetPlayerVoice now handles the backend-specific composite record
+ if (this.players.SetPlayerVoice(playerInfo, presets[presetIndex]))
+ {
+ DetailedLog.Debug($"Updated voice for {name}@{worldName} on {currentBackend}: {presets[presetIndex].Name}");
+ }
+ else
+ {
+ DetailedLog.Warn($"Failed to update voice for {name}@{worldName}");
+ }
}
});
@@ -522,18 +531,22 @@ private void DrawNpcVoiceSettings()
var (id, npcInfo) = row;
var name = npcInfo.Name;
- // NPC voice dropdown
- var presetIndex = this.npc.TryGetNpcVoice(npcInfo, out var v) ? presets.IndexOf(v) : 0;
- if (ImGui.Combo($"##{MemoizedId.Create(uniq: id.ToString())}", ref presetIndex, presetArray,
- presets.Count))
+ var currentBackend = this.config.Backend.ToString();
+
+ // Pass currentBackend to find the preset specifically for this backend
+ var presetIndex = this.npc.TryGetNpcVoice(npcInfo, currentBackend, out var v)
+ ? presets.IndexOf(v)
+ : -1; // Use -1 or a "None" index if no voice is set for this backend
+
+ if (ImGui.Combo($"##{MemoizedId.Create(uniq: id.ToString())}", ref presetIndex, presetArray, presets.Count))
{
- if (this.npc.SetNpcVoice(npcInfo, presets[presetIndex]))
+ if (presetIndex >= 0 && this.npc.SetNpcVoice(npcInfo, presets[presetIndex]))
{
- DetailedLog.Debug($"Updated voice for {name}: {presets[presetIndex].Name}");
+ DetailedLog.Debug($"Updated voice for {name} on {currentBackend}: {presets[presetIndex].Name}");
}
else
{
- DetailedLog.Warn($"Failed to update voice for {name} ({{id}})");
+ DetailedLog.Warn($"Failed to update voice for {name} ({id})");
}
}
});
diff --git a/src/TextToTalk/UI/Windows/StylesWindow.cs b/src/TextToTalk/UI/Windows/StylesWindow.cs
new file mode 100644
index 0000000..2970ce8
--- /dev/null
+++ b/src/TextToTalk/UI/Windows/StylesWindow.cs
@@ -0,0 +1,135 @@
+using Dalamud.Bindings.ImGui;
+using Dalamud.Game.Text;
+using Dalamud.Interface;
+using Dalamud.Interface.Colors;
+using Dalamud.Interface.Windowing;
+using Dalamud.Plugin.Services;
+using FFXIVClientStructs;
+using Lumina.Excel.Sheets;
+using R3;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+using System.Text;
+using TextToTalk.Backends;
+using TextToTalk.Backends.Azure;
+using TextToTalk.Backends.ElevenLabs;
+using TextToTalk.Backends.OpenAI;
+using TextToTalk.Data.Model;
+using TextToTalk.GameEnums;
+using TextToTalk.Services;
+using static TextToTalk.Backends.Azure.AzureClient;
+using System.Text.RegularExpressions;
+
+namespace TextToTalk.UI.Windows
+{
+ public interface IVoiceStylesWindow
+ {
+ void Draw(IConfigUIDelegates helpers);
+ }
+
+ public interface IWindowController
+ {
+ void ToggleStyle();
+ }
+
+ public class VoiceStyles : Window
+ {
+ private readonly VoiceBackendManager backendManager;
+ private readonly IConfigUIDelegates helpers;
+ private readonly PluginConfiguration config;
+ private readonly Dictionary componentCache = new();
+ public static VoiceStyles? Instance { get; private set; }
+ private string currentPreview = "";
+
+ public string BuildWrappedPattern(string delimiter)
+ {
+ // Regex.Escape ensures characters like '$' or '*' don't break the pattern
+ string escapedDelimiter = Regex.Escape(delimiter);
+
+ // Using string interpolation to build: \$(.*?)\$
+ return $"{escapedDelimiter}(.*?){escapedDelimiter}";
+ }
+
+
+ public VoiceStyles(VoiceBackendManager backendManager, IConfigUIDelegates helpers, PluginConfiguration config)
+ : base("Voice Styles", ImGuiWindowFlags.None)
+
+ {
+ Instance = this;
+
+ this.backendManager = backendManager;
+ this.helpers = helpers;
+ this.config = config;
+ this.SizeConstraints = new WindowSizeConstraints
+ {
+ MinimumSize = new Vector2(40, 30),
+ MaximumSize = new Vector2(float.MaxValue, float.MaxValue)
+ };
+ }
+
+ public void CopyStyleToClipboard(string style)
+ {
+ ImGui.SetClipboardText($"{config.StyleTag}{style}{config.StyleTag}");
+ }
+
+ public void ToggleStyle()
+ {
+ this.IsOpen = !this.IsOpen;
+ }
+ public override void Draw()
+ {
+ var stylesTag = config.StyleTag;
+ var activeBackend = backendManager.Backend;
+ if (activeBackend == null) return;
+ var component = GetOrCreateComponent(activeBackend, config);
+ if (component != null)
+ {
+ if (ImGui.CollapsingHeader($"Configure ad-hoc style tags##{MemoizedId.Create()}"))
+ {
+ ConfigComponents.ToggleAdHocStyleTagsEnabled("Enable Ad-hoc Style Tags", this.config);
+ Components.HelpTooltip("""
+ If checked, chat messages containing a style tag will be synthesized in that style. This overrides any styles configured in the voice preset.
+ """);
+ if (config.AdHocStyleTagsEnabled == true)
+ {
+ ImGui.Text($"Style Tag Delimiter");
+ ImGui.SetNextItemWidth(35.0f);
+ if (ImGui.InputTextWithHint("##DynamicInput", "Style Tag", ref stylesTag, 30))
+ {
+ config.StyleTag = stylesTag;
+ config.StyleRegex = BuildWrappedPattern(stylesTag);
+ config.Save();
+ }
+ ImGui.SameLine();
+
+ ImGui.Text($"Example: {stylesTag}Whispering{stylesTag} Hello World");
+ }
+
+ }
+ component.Draw(helpers);
+ }
+ else
+ {
+ ImGui.TextColored(ImGuiColors.DalamudGrey3, "This backend does not yet support dynamic styles.");
+ }
+ }
+
+ private IVoiceStylesWindow? GetOrCreateComponent(VoiceBackend backend, PluginConfiguration config)
+ {
+ var type = backend.GetType();
+ if (componentCache.TryGetValue(type, out var existing)) return existing;
+ IVoiceStylesWindow? newComponent = backend switch
+ {
+ AzureBackend azure => new AzureVoiceStyles(azure, config, this),
+ ElevenLabsBackend eleven => new ElevenLabsVoiceStyles(eleven, config),
+ OpenAiBackend openai => new OpenAIVoiceStyles(openai, config),
+ _ => null
+ };
+
+ if (newComponent != null) componentCache[type] = newComponent;
+ return newComponent;
+ }
+ }
+}
diff --git a/src/TextToTalk/VoicePresetConfiguration.cs b/src/TextToTalk/VoicePresetConfiguration.cs
index 9d00130..f116042 100644
--- a/src/TextToTalk/VoicePresetConfiguration.cs
+++ b/src/TextToTalk/VoicePresetConfiguration.cs
@@ -223,6 +223,7 @@ private static VoicePreset RepairPreset(IDictionary corrupted)
PlaybackRate = Convert.ToInt32(GetNullableValue(corrupted, "PlaybackRate")),
Volume = Convert.ToSingle(GetNullableValue(corrupted, "Volume")),
VoiceName = GetNullableValue(corrupted, "VoiceName"),
+ Style = GetNullableValue(corrupted, "Style"),
EnabledBackend = TTSBackend.Azure,
},
TTSBackend.ElevenLabs => new ElevenLabsVoicePreset
@@ -232,6 +233,8 @@ private static VoicePreset RepairPreset(IDictionary corrupted)
PlaybackRate = Convert.ToInt32(GetNullableValue(corrupted, "PlaybackRate")),
Volume = Convert.ToSingle(GetNullableValue(corrupted, "Volume")),
VoiceId = GetNullableValue(corrupted, "VoiceId"),
+ ModelId = GetNullableValue(corrupted, "ModelId"),
+ Style = GetNullableValue(corrupted, "Style"),
SimilarityBoost = Convert.ToSingle(GetNullableValue(corrupted, "SimilarityBoost")),
Stability = Convert.ToSingle(GetNullableValue(corrupted, "Stability")),
EnabledBackend = TTSBackend.ElevenLabs,
@@ -243,12 +246,14 @@ private static VoicePreset RepairPreset(IDictionary corrupted)
Volume = Convert.ToSingle(GetNullableValue(corrupted, "Volume")),
VoiceName = GetNullableValue(corrupted, "VoiceName"),
Model = GetNullableValue(corrupted, "Model"),
+ Style = GetNullableValue(corrupted, "Style"),
EnabledBackend = TTSBackend.OpenAi
},
TTSBackend.GoogleCloud => new GoogleCloudVoicePreset
{
Id = Convert.ToInt32(GetNullableValue(corrupted, "Id")),
Name = GetNullableValue(corrupted, "Name"),
+ Gender = GetNullableValue(corrupted, "Gender"),
SampleRate = Convert.ToInt32(GetNullableValue(corrupted, "SampleRate")),
Pitch = Convert.ToSingle(GetNullableValue(corrupted, "Pitch")),
PlaybackRate = Convert.ToSingle(GetNullableValue(corrupted, "PlaybackRate")),