Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/TextToTalk.Lexicons/LexiconManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml.Linq;

namespace TextToTalk.Lexicons;
Expand Down Expand Up @@ -109,6 +110,26 @@ public string MakeSsml(
}
}
}
bool hasStyles = text.Contains("[") && text.Contains("]");
if (hasStyles)
{
// This regex captures the style name in group 1 and the text in group 2.
// It replaces the whole match with the SSML tag, effectively removing
// the [styleName] text from the spoken output.
text = Regex.Replace(text, @"\[\[(.*?)\]\]", m =>
{
var styleName = m.Groups[1].Value.Trim();
var content = m.Groups[2].Value; // Captured text after the bracket
if (voice != null)
{
return $"<mstts:express-as style=\"{styleName}\" styledegree=\"1.5\">{content}</mstts:express-as>";
}
else
{
return content; // If no voice is specified, just return the content without styling. This ensures System.Speech compatibility.
}
});
}

if (playbackRate >= 0)
{
Expand All @@ -125,7 +146,10 @@ public string MakeSsml(
var speakTag = "<speak";
if (includeSpeakAttributes)
{
speakTag += " version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\"";
// Correctly define both the default SSML namespace and the Microsoft-specific namespace
speakTag += " version=\"1.0\" " +
"xmlns=\"http://www.w3.org/2001/10/synthesis\" " +
"xmlns:mstts=\"http://www.w3.org/2001/mstts\""; // Fixed URI

if (langCode != null)
{
Expand Down
16 changes: 14 additions & 2 deletions src/TextToTalk/Backends/Azure/AzureBackend.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
using System;
using Dalamud.Bindings.ImGui;
using FFXIVClientStructs.FFXIV.Client.Game.UI;
using System;
using System.Collections.Generic;
using System.Net.Http;
using Dalamud.Bindings.ImGui;
using static TextToTalk.Backends.Azure.AzureClient;

namespace TextToTalk.Backends.Azure;

public class AzureBackend : VoiceBackend
{
private readonly AzureBackendUI ui;
private readonly AzureBackendUIModel uiModel;
public List<VoiceDetails> voices;

public AzureBackend(PluginConfiguration config, HttpClient http)
{
Expand All @@ -17,7 +21,14 @@ public AzureBackend(PluginConfiguration config, HttpClient http)
LexiconUtils.LoadFromConfigAzure(lexiconManager, config);

this.uiModel = new AzureBackendUIModel(config, lexiconManager);
this.voices = this.uiModel.voices;
this.ui = new AzureBackendUI(this.uiModel, config, lexiconManager, http, this);

}

public override void DrawStyles(IConfigUIDelegates helpers)
{
helpers.OpenVoiceStylesConfig();
}

public override void Say(SayRequest request)
Expand Down Expand Up @@ -82,4 +93,5 @@ protected override void Dispose(bool disposing)
this.uiModel.Azure?.Dispose();
}
}

}
15 changes: 12 additions & 3 deletions src/TextToTalk/Backends/Azure/AzureBackendUI.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using Dalamud.Bindings.ImGui;
using Dalamud.Game;
using Dalamud.Game.Text;
using Google.Api;
using System;
using System.IO;
using System.Linq;
Expand All @@ -10,6 +9,7 @@
using TextToTalk.Lexicons.Updater;
using TextToTalk.UI;
using TextToTalk.UI.Lexicons;
using TextToTalk.UI.Windows;

namespace TextToTalk.Backends.Azure;

Expand Down Expand Up @@ -87,6 +87,10 @@ public void DrawSettings(IConfigUIDelegates helpers)
{
ImGui.TextColored(ImColor.Red, "You have no presets. Please create one using the \"New preset\" button.");
}
else if (currentVoicePreset == null && presets.Count > 0)
{
config.SetCurrentVoicePreset(presets.First().Id);
}

BackendUI.NewPresetButton<AzureVoicePreset>($"New preset##{MemoizedId.Create()}", this.config);

Expand All @@ -111,7 +115,7 @@ public void DrawSettings(IConfigUIDelegates helpers)

{
var voices = this.model.Voices;
string?[] voiceArray = voices.ToArray();
string?[] voiceArray = voices.Where(v => v != null && !string.IsNullOrEmpty(v.Name)).Select(v => v.Name).ToArray();
var voiceIndex = Array.IndexOf(voiceArray, currentVoicePreset.VoiceName);
if (ImGui.Combo($"Voice##{MemoizedId.Create()}", ref voiceIndex, voiceArray, voices.Count))
{
Expand All @@ -126,7 +130,7 @@ public void DrawSettings(IConfigUIDelegates helpers)
"No voices are available on this voice engine for the current region.\n" +
"Please log in using a different region.");
break;
case > 0 when !voices.Any(v => v == currentVoicePreset.VoiceName):
case > 0 when !voiceArray.Any(v => v == currentVoicePreset.VoiceName):
BackendUI.ImGuiVoiceNotSelected();
break;
}
Expand Down Expand Up @@ -167,6 +171,11 @@ public void DrawSettings(IConfigUIDelegates helpers)
backend.Say(request);
}
}
ImGui.SameLine();
if (ImGui.Button($"Voice Styles##{MemoizedId.Create()}"))
{
VoiceStyles.Instance?.ToggleStyle();
}

this.lexiconComponent.Draw();
ImGui.Spacing();
Expand Down
9 changes: 5 additions & 4 deletions src/TextToTalk/Backends/Azure/AzureBackendUIModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Text.RegularExpressions;
using TextToTalk.Lexicons;
using static TextToTalk.Backends.Azure.AzureClient;

namespace TextToTalk.Backends.Azure;

Expand All @@ -12,7 +13,7 @@ public class AzureBackendUIModel
private readonly PluginConfiguration config;
private readonly LexiconManager lexiconManager;

private List<string> voices;
public List<VoiceDetails> voices;
private AzureLoginInfo loginInfo;

/// <summary>
Expand All @@ -28,13 +29,13 @@ public class AzureBackendUIModel
/// <summary>
/// Gets the available voices.
/// </summary>
public IReadOnlyList<string> Voices => this.voices;
public IReadOnlyList<VoiceDetails> Voices => this.voices;

public AzureBackendUIModel(PluginConfiguration config, LexiconManager lexiconManager)
{
this.config = config;
this.lexiconManager = lexiconManager;
this.voices = new List<string>();
this.voices = new List<VoiceDetails>();

this.loginInfo = new AzureLoginInfo();
var credentials = AzureCredentialManager.LoadCredentials();
Expand Down Expand Up @@ -98,7 +99,7 @@ private bool TryAzureLogin()
DetailedLog.Info($"Logging into Azure region {this.loginInfo.Region}");
Azure = new AzureClient(this.loginInfo.SubscriptionKey, this.loginInfo.Region, this.lexiconManager, this.config);
// This should throw an exception if the login failed
this.voices = Azure.GetVoices();
this.voices = Azure.GetVoicesWithStyles();
return true;
}
catch (Exception e)
Expand Down
19 changes: 19 additions & 0 deletions src/TextToTalk/Backends/Azure/AzureClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,25 @@ public TextSource GetCurrentlySpokenTextSource()
{
return this.soundQueue.GetCurrentlySpokenTextSource();
}
public List<VoiceDetails> GetVoicesWithStyles()
{
// Fetches the voice result asynchronously and waits for completion
var res = this.synthesizer.GetVoicesAsync().GetAwaiter().GetResult();
HandleResult(res);

// Maps each voice to a custom object containing Name and StyleList
return res.Voices.Select(voice => new VoiceDetails
{
Name = voice.Name,
Styles = voice.StyleList.ToList() // StyleList is a string[]
}).ToList();
}

public class VoiceDetails
{
public string Name { get; set; }
public List<string> Styles { get; set; }
}

public List<string> GetVoices()
{
Expand Down
63 changes: 63 additions & 0 deletions src/TextToTalk/Backends/Azure/AzureVoiceStylesUI.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using Dalamud.Bindings.ImGui;
using System.Linq;
using TextToTalk;
using TextToTalk.Backends;
using TextToTalk.Backends.Azure;
using TextToTalk.UI.Windows;
using static TextToTalk.Backends.Azure.AzureClient;

public class AzureVoiceStyles : IVoiceStylesWindow
{
private readonly AzureBackend backend;
private PluginConfiguration config;
private VoiceStyles voiceStyles;
static double lastCopyTime = -1.0;
static string lastCopiedStyle = "";

public AzureVoiceStyles(AzureBackend backend, PluginConfiguration config, VoiceStyles voiceStyles)
{
this.backend = backend;
this.config = config;
this.voiceStyles = voiceStyles;

}

public void Draw(IConfigUIDelegates helpers)
{
var currentVoicePreset = this.config.GetCurrentVoicePreset<AzureVoicePreset>();
var voiceDetails = this.backend.voices
.OrderBy(v => v.Name)
.FirstOrDefault(v => v?.Name == currentVoicePreset?.VoiceName);

if (voiceDetails?.Styles == null || voiceDetails.Styles.Count == 0)
{
ImGui.TextDisabled("No styles available for this voice.");
return;
}

ImGui.Text("Click a style to copy its tag to clipboard:");
ImGui.Separator();

foreach (var style in voiceDetails.Styles)
{
if (string.IsNullOrEmpty(style)) continue;

if (ImGui.Selectable(style))
{
VoiceStyles.Instance?.CopyStyleToClipboard(style);
lastCopyTime = ImGui.GetTime();
lastCopiedStyle = style;
}

if (lastCopiedStyle == style && (ImGui.GetTime() - lastCopyTime < 1.0))
{
ImGui.SetTooltip("Copied!");
}
else if (ImGui.IsItemHovered())
{
ImGui.SetTooltip($"Click to copy");
}

}
}
}
18 changes: 15 additions & 3 deletions src/TextToTalk/Backends/BackendUI.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,21 @@ public static void DeletePresetButton(string label, VoicePreset preset, TTSBacke
var otherPreset = voiceConfig.VoicePresets.First(p => p.Id != preset.Id);
config.SetCurrentVoicePreset(otherPreset.Id);

voiceConfig.UngenderedVoicePresets[backend].Remove(preset.Id);
voiceConfig.MaleVoicePresets[backend].Remove(preset.Id);
voiceConfig.FemaleVoicePresets[backend].Remove(preset.Id);
// Use TryGetValue to safely access the inner dictionary for the specific backend
if (voiceConfig.UngenderedVoicePresets.TryGetValue(backend, out var ungendered))
{
ungendered.Remove(preset.Id);
}

if (voiceConfig.MaleVoicePresets.TryGetValue(backend, out var male))
{
male.Remove(preset.Id);
}

if (voiceConfig.FemaleVoicePresets.TryGetValue(backend, out var female))
{
female.Remove(preset.Id);
}

voiceConfig.VoicePresets.Remove(preset);

Expand Down
7 changes: 7 additions & 0 deletions src/TextToTalk/Backends/ConfigUIDelegates.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,16 @@ public class ConfigUIDelegates : IConfigUIDelegates
{
public Action? OpenVoiceUnlockerAction { get; init; }

public Action? OpenVoiceStylesWindow { get; init; }

public void OpenVoiceUnlocker()
{
OpenVoiceUnlockerAction?.Invoke();
}

public void OpenVoiceStylesConfig()
{
OpenVoiceStylesWindow?.Invoke();
}
}
}
10 changes: 9 additions & 1 deletion src/TextToTalk/Backends/ElevenLabs/ElevenLabsBackend.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
Expand All @@ -11,12 +12,19 @@ public class ElevenLabsBackend : VoiceBackend
private readonly ElevenLabsBackendUI ui;
private readonly ElevenLabsBackendUIModel uiModel;
private readonly INotificationService notificationService;
private readonly PluginConfiguration config;

public ElevenLabsBackend(PluginConfiguration config, HttpClient http, INotificationService notificationService)
{
this.uiModel = new ElevenLabsBackendUIModel(config, http);
this.ui = new ElevenLabsBackendUI(uiModel, config, this);
this.notificationService = notificationService;
this.config = config;
}

public override void DrawStyles(IConfigUIDelegates helpers)
{
helpers.OpenVoiceStylesConfig();
}

public override void Say(SayRequest request)
Expand All @@ -32,7 +40,7 @@ public override void Say(SayRequest request)
{
await this.uiModel.ElevenLabs.Say(elevenLabsVoicePreset.VoiceId, elevenLabsVoicePreset.PlaybackRate,
elevenLabsVoicePreset.Volume, elevenLabsVoicePreset.SimilarityBoost,
elevenLabsVoicePreset.Stability, request.Source, request.Text);
elevenLabsVoicePreset.Stability, request.Source, request.Text, elevenLabsVoicePreset.ModelId);
this.uiModel.UpdateUserSubscriptionInfo();
}
catch (ElevenLabsUnauthorizedException e)
Expand Down
Loading