Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -360,4 +360,5 @@ MigrationBackup/
.ionide/

# Fody - auto-generated XML schema
FodyWeavers.xsd
FodyWeavers.xsd
repomix-output.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public void Normalize_RuleOrder_NumberBeforeWhitespace()
[TestMethod]
[DataRow(
" ‘Test’ 1st.. soooo cool ✨!! LOL Cost: $12.50 USD??? ",
"'Test' first. soo cool sparkles! laughing out loud Cost: twelve dollars fifty cents USD?",
"'Test' first. soo cool sparkles! laughing out loud Cost: twelve US dollars fifty cents?",
DisplayName = "All Rules Integration Test 1 - Corrected"
)]
[DataRow(
Expand All @@ -122,7 +122,7 @@ public void Normalize_RuleOrder_NumberBeforeWhitespace()
)]
[DataRow(
" OMG!!! The price is £50.00??? LOL... IDK. 1st prize! ",
"oh my god! The price is fifty pounds? laughing out loud. I don't know. first prize!",
"oh my god! The price is fifty British pounds? laughing out loud. I don't know. first prize!",
DisplayName = "All Rules Integration Test 4 - Mixed Punctuation & Abbr - Corrected"
)]
[DataRow(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ public void Apply_MultipleAbbreviations_ReplacesAll(string input, string expecte
[DataRow("lollipop", "lollipop", DisplayName = "Substring 'lol'")]
[DataRow("scrolling", "scrolling", DisplayName = "Substring 'lol' (reverse)")]
[DataRow("theory", "theory", DisplayName = "Substring 'ty'")]
[DataRow("imo-test", "imo-test", DisplayName = "Abbreviation as prefix - Corrected Expectation")] // Lookaround fixed
[DataRow("test-imo", "test-imo", DisplayName = "Abbreviation as suffix - Corrected Expectation")] // Lookaround fixed
[DataRow("imo-test", "imo-test", DisplayName = "Abbreviation as prefix")]
[DataRow("test-imo", "test-imo", DisplayName = "Abbreviation as suffix")]
public void Apply_AbbreviationAsSubstringOrAttached_DoesNotReplace(string input, string expected)
{
// Act
Expand Down
48 changes: 30 additions & 18 deletions TTSTextNormalization.Tests/Rules/CurrencyNormalizationRuleTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,39 @@ public void Apply_NoCurrency_ReturnsInput(string input, string expected)
// NOTE: Expectations updated for default Humanizer output (includes "and")
[TestMethod]
// Symbol First
[DataRow("$1", " one dollar ", DisplayName = "USD Simple ($)")]
[DataRow("$1.00", " one dollar ", DisplayName = "USD Simple zero cents ($)")]
[DataRow("$1.50", " one dollar fifty cents ", DisplayName = "USD with Cents ($)")] // No "and" for cents usually
[DataRow("$1,234.56", " one thousand two hundred and thirty-four dollars fifty-six cents ", DisplayName = "USD Large with Cents ($)")]
[DataRow("£10", " ten pounds ", DisplayName = "GBP Simple (£)")]
[DataRow("£0.50", " zero pounds fifty pence ", DisplayName = "GBP Only Pence (£)")]
[DataRow("$1", " one US dollar ", DisplayName = "USD Simple ($)")]
[DataRow("$1.00", " one US dollar ", DisplayName = "USD Simple zero cents ($)")]
[DataRow("$1.50", " one US dollar fifty cents ", DisplayName = "USD with Cents ($)")] // No "and" for cents usually
[DataRow("$1,234.56", " one thousand two hundred and thirty-four US dollars fifty-six cents ", DisplayName = "USD Large with Cents ($)")]
[DataRow("£10", " ten British pounds ", DisplayName = "GBP Simple (£)")]
[DataRow("£0.50", " zero British pounds fifty pence ", DisplayName = "GBP Only Pence (£)")]
[DataRow("€100", " one hundred euros ", DisplayName = "EUR Simple (€)")]
[DataRow("€1.25", " one euro twenty-five cents ", DisplayName = "EUR With Cents (€)")]
[DataRow("¥500", " five hundred yen ", DisplayName = "JPY Simple (¥)")]
[DataRow("¥500", " five hundred Japanese yen ", DisplayName = "JPY Simple (¥)")]
// Code Last
[DataRow("1 USD", " one dollar ", DisplayName = "USD Code Simple")]
[DataRow("1.00 USD", " one dollar ", DisplayName = "USD Code zero cents")]
[DataRow("1.50 USD", " one dollar fifty cents ", DisplayName = "USD Code with Cents")]
[DataRow("1,234.56 USD", " one thousand two hundred and thirty-four dollars fifty-six cents ", DisplayName = "USD Code Large")]
[DataRow("10 GBP", " ten pounds ", DisplayName = "GBP Code Simple")] // Uses "pound" from map
[DataRow("0.50 GBP", " zero pounds fifty pence ", DisplayName = "GBP Code Only Pence")]
[DataRow("1 USD", " one US dollar ", DisplayName = "USD Code Simple")]
[DataRow("1.00 USD", " one US dollar ", DisplayName = "USD Code zero cents")]
[DataRow("1.50 USD", " one US dollar fifty cents ", DisplayName = "USD Code with Cents")]
[DataRow("1,234.56 USD", " one thousand two hundred and thirty-four US dollars fifty-six cents ", DisplayName = "USD Code Large")]
[DataRow("10 GBP", " ten British pounds ", DisplayName = "GBP Code Simple")] // Uses "pound" from map
[DataRow("0.50 GBP", " zero British pounds fifty pence ", DisplayName = "GBP Code Only Pence")]
[DataRow("100 EUR", " one hundred euros ", DisplayName = "EUR Code Simple")]
[DataRow("1.25 EUR", " one euro twenty-five cents ", DisplayName = "EUR Code With Cents")]
[DataRow("500 JPY", " five hundred yen ", DisplayName = "JPY Code Simple")] // Uses "yen" from map
[DataRow("500 JPY", " five hundred Japanese yen ", DisplayName = "JPY Code Simple")] // Uses "yen" from map
[DataRow("100 CAD", " one hundred Canadian dollars ", DisplayName = "CAD Code Example")]
[DataRow("10 BRL", " ten reais ", DisplayName = "BRL Code Example")]
[DataRow("10 BRL", " ten Brazilian reais ", DisplayName = "BRL Code Example")]
// Combined
[DataRow("$10 USD", " ten US dollars ", DisplayName = "USD Combined ($)")]
[DataRow("$10USD", " ten US dollars ", DisplayName = "USD Combined (wihtout spaces)")]
[DataRow("$10MXN", " ten Mexican pesos ", DisplayName = "MXN Combined (without spaces)")]
[DataRow("$10 CAD", " ten Canadian dollars ", DisplayName = "CAD Combined ($)")]
[DataRow("£10 GBP", " ten British pounds ", DisplayName = "GBP Combined (£)")]
[DataRow("€100 EUR", " one hundred euros ", DisplayName = "EUR Combined (€)")]
[DataRow("¥500 JPY", " five hundred Japanese yen ", DisplayName = "JPY Combined (¥)")]
[DataRow("10 USD $", " ten US dollars $", DisplayName = "USD Combined with Trailing Symbol")]
[DataRow("10 GBP £", " ten British pounds £", DisplayName = "GBP Combined with Trailing Symbol")]
[DataRow("100 EUR €", " one hundred euros €", DisplayName = "EUR Combined with Trailing Symbol")]
[DataRow("500 JPY ¥", " five hundred Japanese yen ¥", DisplayName = "JPY Combined with Trailing Symbol")]
public void Apply_KnownCurrencies_ReplacesWithSpokenForm(string input, string expected)
{
// Act
Expand All @@ -54,9 +66,10 @@ public void Apply_KnownCurrencies_ReplacesWithSpokenForm(string input, string ex
}

[TestMethod]
[DataRow("Send $10 now", "Send ten dollars now", DisplayName = "Currency within sentence")]
[DataRow("Send $10 now", "Send ten US dollars now", DisplayName = "Currency within sentence")]
[DataRow("It costs 50 EUR.", "It costs fifty euros .", DisplayName = "Currency at end of sentence")]
[DataRow("$5 and £10", " five dollars and ten pounds ", DisplayName = "Multiple different currencies")]
[DataRow("It costs 50 EUR now.", "It costs fifty euros now.", DisplayName = "Currency within sentence")]
[DataRow("$5 and £10", " five US dollars and ten British pounds ", DisplayName = "Multiple different currencies")]
public void Apply_CurrencyInContext_ReplacesCorrectly(string input, string expected)
{
// Act
Expand All @@ -69,7 +82,6 @@ public void Apply_CurrencyInContext_ReplacesCorrectly(string input, string expec
[TestMethod]
[DataRow("10XYZ", "10XYZ", DisplayName = "Unknown Code XYZ")]
[DataRow("¤10", "¤10", DisplayName = "Generic Currency Symbol")]
[DataRow("$10MXN", "$10MXN", DisplayName = "Symbol and Code")]
public void Apply_UnknownOrAmbiguousCurrency_NoChange(string input, string expected)
{
// Act
Expand Down
10 changes: 0 additions & 10 deletions TTSTextNormalization.sln
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TTSTextNormalization.EmojiD
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TTSTextNormalization", "TTSTextNormalization\TTSTextNormalization.csproj", "{1C2CA7DF-374E-FA47-469B-9751E035B2C8}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{02EA681E-C7D8-13C7-8484-4AC65E1B71E8}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{3DCF185E-C897-4519-AB56-F4B91991DB25}"
ProjectSection(SolutionItems) = preProject
dotnet-publish.yml = dotnet-publish.yml
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -38,9 +31,6 @@ Global
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{3DCF185E-C897-4519-AB56-F4B91991DB25} = {02EA681E-C7D8-13C7-8484-4AC65E1B71E8}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {53950FEC-997F-4537-B0E2-40090BAA342B}
EndGlobalSection
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using TTSTextNormalization.Rules;
using TTSTextNormalization.Abstractions;
using TTSTextNormalization.Core;
using TTSTextNormalization.Rules;

namespace TTSTextNormalization.DependencyInjection;

Expand All @@ -29,30 +29,61 @@ public static IServiceCollection AddTextNormalization(
}

// --- Built-in Rule Extensions for the Builder ---

/// <summary>
/// Adds the <see cref="BasicSanitizationRule"/> to the text normalization pipeline.
/// Performs essential cleanup like normalizing line breaks and replacing fancy characters. Recommended Order: 10.
/// </summary>
/// <param name="builder">The text normalization builder.</param>
/// <returns>The builder instance for fluent chaining.</returns>
public static ITextNormalizationBuilder AddBasicSanitizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule<BasicSanitizationRule>(ServiceLifetime.Singleton);
}

/// <summary>
/// Adds the <see cref="EmojiNormalizationRule"/> to the text normalization pipeline.
/// Replaces standard Unicode emojis with their textual descriptions. Recommended Order: 100.
/// </summary>
/// <param name="builder">The text normalization builder.</param>
/// <returns>The builder instance for fluent chaining.</returns>
public static ITextNormalizationBuilder AddEmojiRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule<EmojiNormalizationRule>(ServiceLifetime.Singleton);
}

/// <summary>
/// Adds the <see cref="CurrencyNormalizationRule"/> to the text normalization pipeline.
/// Normalizes currency symbols and codes (e.g., "$10.50", "100 EUR") into spoken text. Recommended Order: 200.
/// </summary>
/// <param name="builder">The text normalization builder.</param>
/// <returns>The builder instance for fluent chaining.</returns>
public static ITextNormalizationBuilder AddCurrencyRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule<CurrencyNormalizationRule>(ServiceLifetime.Singleton);
}

/// <summary>
/// Adds the <see cref="AbbreviationNormalizationRule"/> to the text normalization pipeline.
/// Expands common chat/gaming abbreviations (e.g., "lol", "gg"). Recommended Order: 300.
/// </summary>
/// <param name="builder">The text normalization builder.</param>
/// <returns>The builder instance for fluent chaining.</returns>
public static ITextNormalizationBuilder AddAbbreviationNormalizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule<AbbreviationNormalizationRule>(ServiceLifetime.Singleton);
}

/// <summary>
/// Adds the <see cref="NumberNormalizationRule"/> to the text normalization pipeline.
/// Converts cardinals, ordinals, decimals, and version-like numbers into words. Recommended Order: 400.
/// </summary>
/// <param name="builder">The text normalization builder.</param>
/// <returns>The builder instance for fluent chaining.</returns>
public static ITextNormalizationBuilder AddNumberNormalizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
Expand All @@ -79,6 +110,12 @@ public static ITextNormalizationBuilder AddLetterRepetitionRule(this ITextNormal
return builder.AddRule<LetterRepetitionRule>(ServiceLifetime.Singleton);
}

/// <summary>
/// Adds the <see cref="WhitespaceNormalizationRule"/> to the text normalization pipeline.
/// Trims ends, collapses internal spaces, and adjusts spacing around punctuation. Recommended Order: 9000.
/// </summary>
/// <param name="builder">The text normalization builder.</param>
/// <returns>The builder instance for fluent chaining.</returns>
public static ITextNormalizationBuilder AddWhitespaceNormalizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
Expand Down
3 changes: 3 additions & 0 deletions TTSTextNormalization/Rules/AbbreviationNormalizationRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace TTSTextNormalization.Rules;
/// </summary>
public sealed partial class AbbreviationNormalizationRule : ITextNormalizationRule
{
/// <inheritdoc/>
public int Order => 300;
private const int RegexTimeoutMilliseconds = 150; // Slightly increased for larger pattern

Expand Down Expand Up @@ -73,8 +74,10 @@ public sealed partial class AbbreviationNormalizationRule : ITextNormalizationRu
{ "gpu", "g p u" }, // Spell out
}.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);

/// <inheritdoc/>
public AbbreviationNormalizationRule() { }

/// <inheritdoc/>
public string Apply(string inputText)
{
ArgumentNullException.ThrowIfNull(inputText);
Expand Down
3 changes: 3 additions & 0 deletions TTSTextNormalization/Rules/BasicSanitizationRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public sealed partial class BasicSanitizationRule : ITextNormalizationRule
{ "–", "-" }, // En dash
}.ToFrozenDictionary(StringComparer.Ordinal);

/// <summary>
/// Initializes a new instance of the <see cref="BasicSanitizationRule"/> class.
/// </summary>
public BasicSanitizationRule() { }

/// <summary>
Expand Down
Loading