diff --git a/.gitignore b/.gitignore
index 9491a2f..f59c3cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -360,4 +360,5 @@ MigrationBackup/
.ionide/
# Fody - auto-generated XML schema
-FodyWeavers.xsd
\ No newline at end of file
+FodyWeavers.xsd
+repomix-output.xml
diff --git a/TTSTextNormalization.Tests/Core/TextNormalizationPipelineTests.cs b/TTSTextNormalization.Tests/Core/TextNormalizationPipelineTests.cs
index d544f48..64c4813 100644
--- a/TTSTextNormalization.Tests/Core/TextNormalizationPipelineTests.cs
+++ b/TTSTextNormalization.Tests/Core/TextNormalizationPipelineTests.cs
@@ -107,7 +107,7 @@ public void Normalize_RuleOrder_NumberBeforeWhitespace()
[TestMethod]
[DataRow(
" ‘Test’ 1st.. soooo cool ✨!! LOL Cost: $12.50 USD??? ",
- "'Test' first. soo cool sparkles! laughing out loud Cost: twelve dollars fifty cents USD?",
+ "'Test' first. soo cool sparkles! laughing out loud Cost: twelve US dollars fifty cents?",
DisplayName = "All Rules Integration Test 1 - Corrected"
)]
[DataRow(
@@ -122,7 +122,7 @@ public void Normalize_RuleOrder_NumberBeforeWhitespace()
)]
[DataRow(
" OMG!!! The price is £50.00??? LOL... IDK. 1st prize! ",
- "oh my god! The price is fifty pounds? laughing out loud. I don't know. first prize!",
+ "oh my god! The price is fifty British pounds? laughing out loud. I don't know. first prize!",
DisplayName = "All Rules Integration Test 4 - Mixed Punctuation & Abbr - Corrected"
)]
[DataRow(
diff --git a/TTSTextNormalization.Tests/Rules/AbbreviationNormalizationRuleTests.cs b/TTSTextNormalization.Tests/Rules/AbbreviationNormalizationRuleTests.cs
index dd854e7..21a67a2 100644
--- a/TTSTextNormalization.Tests/Rules/AbbreviationNormalizationRuleTests.cs
+++ b/TTSTextNormalization.Tests/Rules/AbbreviationNormalizationRuleTests.cs
@@ -67,8 +67,8 @@ public void Apply_MultipleAbbreviations_ReplacesAll(string input, string expecte
[DataRow("lollipop", "lollipop", DisplayName = "Substring 'lol'")]
[DataRow("scrolling", "scrolling", DisplayName = "Substring 'lol' (reverse)")]
[DataRow("theory", "theory", DisplayName = "Substring 'ty'")]
- [DataRow("imo-test", "imo-test", DisplayName = "Abbreviation as prefix - Corrected Expectation")] // Lookaround fixed
- [DataRow("test-imo", "test-imo", DisplayName = "Abbreviation as suffix - Corrected Expectation")] // Lookaround fixed
+ [DataRow("imo-test", "imo-test", DisplayName = "Abbreviation as prefix")]
+ [DataRow("test-imo", "test-imo", DisplayName = "Abbreviation as suffix")]
public void Apply_AbbreviationAsSubstringOrAttached_DoesNotReplace(string input, string expected)
{
// Act
diff --git a/TTSTextNormalization.Tests/Rules/CurrencyNormalizationRuleTests.cs b/TTSTextNormalization.Tests/Rules/CurrencyNormalizationRuleTests.cs
index 78befe4..da77401 100644
--- a/TTSTextNormalization.Tests/Rules/CurrencyNormalizationRuleTests.cs
+++ b/TTSTextNormalization.Tests/Rules/CurrencyNormalizationRuleTests.cs
@@ -23,27 +23,39 @@ public void Apply_NoCurrency_ReturnsInput(string input, string expected)
// NOTE: Expectations updated for default Humanizer output (includes "and")
[TestMethod]
// Symbol First
- [DataRow("$1", " one dollar ", DisplayName = "USD Simple ($)")]
- [DataRow("$1.00", " one dollar ", DisplayName = "USD Simple zero cents ($)")]
- [DataRow("$1.50", " one dollar fifty cents ", DisplayName = "USD with Cents ($)")] // No "and" for cents usually
- [DataRow("$1,234.56", " one thousand two hundred and thirty-four dollars fifty-six cents ", DisplayName = "USD Large with Cents ($)")]
- [DataRow("£10", " ten pounds ", DisplayName = "GBP Simple (£)")]
- [DataRow("£0.50", " zero pounds fifty pence ", DisplayName = "GBP Only Pence (£)")]
+ [DataRow("$1", " one US dollar ", DisplayName = "USD Simple ($)")]
+ [DataRow("$1.00", " one US dollar ", DisplayName = "USD Simple zero cents ($)")]
+ [DataRow("$1.50", " one US dollar fifty cents ", DisplayName = "USD with Cents ($)")] // No "and" for cents usually
+ [DataRow("$1,234.56", " one thousand two hundred and thirty-four US dollars fifty-six cents ", DisplayName = "USD Large with Cents ($)")]
+ [DataRow("£10", " ten British pounds ", DisplayName = "GBP Simple (£)")]
+ [DataRow("£0.50", " zero British pounds fifty pence ", DisplayName = "GBP Only Pence (£)")]
[DataRow("€100", " one hundred euros ", DisplayName = "EUR Simple (€)")]
[DataRow("€1.25", " one euro twenty-five cents ", DisplayName = "EUR With Cents (€)")]
- [DataRow("¥500", " five hundred yen ", DisplayName = "JPY Simple (¥)")]
+ [DataRow("¥500", " five hundred Japanese yen ", DisplayName = "JPY Simple (¥)")]
// Code Last
- [DataRow("1 USD", " one dollar ", DisplayName = "USD Code Simple")]
- [DataRow("1.00 USD", " one dollar ", DisplayName = "USD Code zero cents")]
- [DataRow("1.50 USD", " one dollar fifty cents ", DisplayName = "USD Code with Cents")]
- [DataRow("1,234.56 USD", " one thousand two hundred and thirty-four dollars fifty-six cents ", DisplayName = "USD Code Large")]
- [DataRow("10 GBP", " ten pounds ", DisplayName = "GBP Code Simple")] // Uses "pound" from map
- [DataRow("0.50 GBP", " zero pounds fifty pence ", DisplayName = "GBP Code Only Pence")]
+ [DataRow("1 USD", " one US dollar ", DisplayName = "USD Code Simple")]
+ [DataRow("1.00 USD", " one US dollar ", DisplayName = "USD Code zero cents")]
+ [DataRow("1.50 USD", " one US dollar fifty cents ", DisplayName = "USD Code with Cents")]
+ [DataRow("1,234.56 USD", " one thousand two hundred and thirty-four US dollars fifty-six cents ", DisplayName = "USD Code Large")]
+ [DataRow("10 GBP", " ten British pounds ", DisplayName = "GBP Code Simple")] // Uses "pound" from map
+ [DataRow("0.50 GBP", " zero British pounds fifty pence ", DisplayName = "GBP Code Only Pence")]
[DataRow("100 EUR", " one hundred euros ", DisplayName = "EUR Code Simple")]
[DataRow("1.25 EUR", " one euro twenty-five cents ", DisplayName = "EUR Code With Cents")]
- [DataRow("500 JPY", " five hundred yen ", DisplayName = "JPY Code Simple")] // Uses "yen" from map
+ [DataRow("500 JPY", " five hundred Japanese yen ", DisplayName = "JPY Code Simple")] // Uses "yen" from map
[DataRow("100 CAD", " one hundred Canadian dollars ", DisplayName = "CAD Code Example")]
- [DataRow("10 BRL", " ten reais ", DisplayName = "BRL Code Example")]
+ [DataRow("10 BRL", " ten Brazilian reais ", DisplayName = "BRL Code Example")]
+ // Combined
+ [DataRow("$10 USD", " ten US dollars ", DisplayName = "USD Combined ($)")]
+ [DataRow("$10USD", " ten US dollars ", DisplayName = "USD Combined (wihtout spaces)")]
+ [DataRow("$10MXN", " ten Mexican pesos ", DisplayName = "MXN Combined (without spaces)")]
+ [DataRow("$10 CAD", " ten Canadian dollars ", DisplayName = "CAD Combined ($)")]
+ [DataRow("£10 GBP", " ten British pounds ", DisplayName = "GBP Combined (£)")]
+ [DataRow("€100 EUR", " one hundred euros ", DisplayName = "EUR Combined (€)")]
+ [DataRow("¥500 JPY", " five hundred Japanese yen ", DisplayName = "JPY Combined (¥)")]
+ [DataRow("10 USD $", " ten US dollars $", DisplayName = "USD Combined with Trailing Symbol")]
+ [DataRow("10 GBP £", " ten British pounds £", DisplayName = "GBP Combined with Trailing Symbol")]
+ [DataRow("100 EUR €", " one hundred euros €", DisplayName = "EUR Combined with Trailing Symbol")]
+ [DataRow("500 JPY ¥", " five hundred Japanese yen ¥", DisplayName = "JPY Combined with Trailing Symbol")]
public void Apply_KnownCurrencies_ReplacesWithSpokenForm(string input, string expected)
{
// Act
@@ -54,9 +66,10 @@ public void Apply_KnownCurrencies_ReplacesWithSpokenForm(string input, string ex
}
[TestMethod]
- [DataRow("Send $10 now", "Send ten dollars now", DisplayName = "Currency within sentence")]
+ [DataRow("Send $10 now", "Send ten US dollars now", DisplayName = "Currency within sentence")]
[DataRow("It costs 50 EUR.", "It costs fifty euros .", DisplayName = "Currency at end of sentence")]
- [DataRow("$5 and £10", " five dollars and ten pounds ", DisplayName = "Multiple different currencies")]
+ [DataRow("It costs 50 EUR now.", "It costs fifty euros now.", DisplayName = "Currency within sentence")]
+ [DataRow("$5 and £10", " five US dollars and ten British pounds ", DisplayName = "Multiple different currencies")]
public void Apply_CurrencyInContext_ReplacesCorrectly(string input, string expected)
{
// Act
@@ -69,7 +82,6 @@ public void Apply_CurrencyInContext_ReplacesCorrectly(string input, string expec
[TestMethod]
[DataRow("10XYZ", "10XYZ", DisplayName = "Unknown Code XYZ")]
[DataRow("¤10", "¤10", DisplayName = "Generic Currency Symbol")]
- [DataRow("$10MXN", "$10MXN", DisplayName = "Symbol and Code")]
public void Apply_UnknownOrAmbiguousCurrency_NoChange(string input, string expected)
{
// Act
diff --git a/TTSTextNormalization.sln b/TTSTextNormalization.sln
index dea0a32..9da5f40 100644
--- a/TTSTextNormalization.sln
+++ b/TTSTextNormalization.sln
@@ -9,13 +9,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TTSTextNormalization.EmojiD
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TTSTextNormalization", "TTSTextNormalization\TTSTextNormalization.csproj", "{1C2CA7DF-374E-FA47-469B-9751E035B2C8}"
EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{02EA681E-C7D8-13C7-8484-4AC65E1B71E8}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{3DCF185E-C897-4519-AB56-F4B91991DB25}"
- ProjectSection(SolutionItems) = preProject
- dotnet-publish.yml = dotnet-publish.yml
- EndProjectSection
-EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -38,9 +31,6 @@ Global
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
- GlobalSection(NestedProjects) = preSolution
- {3DCF185E-C897-4519-AB56-F4B91991DB25} = {02EA681E-C7D8-13C7-8484-4AC65E1B71E8}
- EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {53950FEC-997F-4537-B0E2-40090BAA342B}
EndGlobalSection
diff --git a/TTSTextNormalization/DependencyInjection/TextNormalizationServiceCollectionExtensions.cs b/TTSTextNormalization/DependencyInjection/TextNormalizationServiceCollectionExtensions.cs
index f6d7e7b..e224f2d 100644
--- a/TTSTextNormalization/DependencyInjection/TextNormalizationServiceCollectionExtensions.cs
+++ b/TTSTextNormalization/DependencyInjection/TextNormalizationServiceCollectionExtensions.cs
@@ -1,8 +1,8 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
-using TTSTextNormalization.Rules;
using TTSTextNormalization.Abstractions;
using TTSTextNormalization.Core;
+using TTSTextNormalization.Rules;
namespace TTSTextNormalization.DependencyInjection;
@@ -29,30 +29,61 @@ public static IServiceCollection AddTextNormalization(
}
// --- Built-in Rule Extensions for the Builder ---
+
+ ///
+ /// Adds the to the text normalization pipeline.
+ /// Performs essential cleanup like normalizing line breaks and replacing fancy characters. Recommended Order: 10.
+ ///
+ /// The text normalization builder.
+ /// The builder instance for fluent chaining.
public static ITextNormalizationBuilder AddBasicSanitizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule(ServiceLifetime.Singleton);
}
+ ///
+ /// Adds the to the text normalization pipeline.
+ /// Replaces standard Unicode emojis with their textual descriptions. Recommended Order: 100.
+ ///
+ /// The text normalization builder.
+ /// The builder instance for fluent chaining.
public static ITextNormalizationBuilder AddEmojiRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule(ServiceLifetime.Singleton);
}
+ ///
+ /// Adds the to the text normalization pipeline.
+ /// Normalizes currency symbols and codes (e.g., "$10.50", "100 EUR") into spoken text. Recommended Order: 200.
+ ///
+ /// The text normalization builder.
+ /// The builder instance for fluent chaining.
public static ITextNormalizationBuilder AddCurrencyRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule(ServiceLifetime.Singleton);
}
+ ///
+ /// Adds the to the text normalization pipeline.
+ /// Expands common chat/gaming abbreviations (e.g., "lol", "gg"). Recommended Order: 300.
+ ///
+ /// The text normalization builder.
+ /// The builder instance for fluent chaining.
public static ITextNormalizationBuilder AddAbbreviationNormalizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
return builder.AddRule(ServiceLifetime.Singleton);
}
+ ///
+ /// Adds the to the text normalization pipeline.
+ /// Converts cardinals, ordinals, decimals, and version-like numbers into words. Recommended Order: 400.
+ ///
+ /// The text normalization builder.
+ /// The builder instance for fluent chaining.
public static ITextNormalizationBuilder AddNumberNormalizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
@@ -79,6 +110,12 @@ public static ITextNormalizationBuilder AddLetterRepetitionRule(this ITextNormal
return builder.AddRule(ServiceLifetime.Singleton);
}
+ ///
+ /// Adds the to the text normalization pipeline.
+ /// Trims ends, collapses internal spaces, and adjusts spacing around punctuation. Recommended Order: 9000.
+ ///
+ /// The text normalization builder.
+ /// The builder instance for fluent chaining.
public static ITextNormalizationBuilder AddWhitespaceNormalizationRule(this ITextNormalizationBuilder builder)
{
ArgumentNullException.ThrowIfNull(builder);
diff --git a/TTSTextNormalization/Rules/AbbreviationNormalizationRule.cs b/TTSTextNormalization/Rules/AbbreviationNormalizationRule.cs
index 173c6d0..6461efd 100644
--- a/TTSTextNormalization/Rules/AbbreviationNormalizationRule.cs
+++ b/TTSTextNormalization/Rules/AbbreviationNormalizationRule.cs
@@ -9,6 +9,7 @@ namespace TTSTextNormalization.Rules;
///
public sealed partial class AbbreviationNormalizationRule : ITextNormalizationRule
{
+ ///
public int Order => 300;
private const int RegexTimeoutMilliseconds = 150; // Slightly increased for larger pattern
@@ -73,8 +74,10 @@ public sealed partial class AbbreviationNormalizationRule : ITextNormalizationRu
{ "gpu", "g p u" }, // Spell out
}.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
+ ///
public AbbreviationNormalizationRule() { }
+ ///
public string Apply(string inputText)
{
ArgumentNullException.ThrowIfNull(inputText);
diff --git a/TTSTextNormalization/Rules/BasicSanitizationRule.cs b/TTSTextNormalization/Rules/BasicSanitizationRule.cs
index 9c68a2f..e97ee17 100644
--- a/TTSTextNormalization/Rules/BasicSanitizationRule.cs
+++ b/TTSTextNormalization/Rules/BasicSanitizationRule.cs
@@ -31,6 +31,9 @@ public sealed partial class BasicSanitizationRule : ITextNormalizationRule
{ "–", "-" }, // En dash
}.ToFrozenDictionary(StringComparer.Ordinal);
+ ///
+ /// Initializes a new instance of the class.
+ ///
public BasicSanitizationRule() { }
///
diff --git a/TTSTextNormalization/Rules/CurrencyNormalizationRule.cs b/TTSTextNormalization/Rules/CurrencyNormalizationRule.cs
index d967475..e78f996 100644
--- a/TTSTextNormalization/Rules/CurrencyNormalizationRule.cs
+++ b/TTSTextNormalization/Rules/CurrencyNormalizationRule.cs
@@ -7,14 +7,21 @@
namespace TTSTextNormalization.Rules;
+///
+/// Normalizes currency amounts based on symbols and ISO codes using a multi-pass approach.
+/// Handles patterns like $10, 10 USD, $10 USD, £5.50, 100 EUR, €100 EUR.
+/// Uses Humanizer for number-to-words conversion.
+///
public sealed partial class CurrencyNormalizationRule : ITextNormalizationRule
{
+ ///
public int Order => 200;
- private const int RegexTimeoutMilliseconds = 150;
+ private const int RegexTimeoutMilliseconds = 150; // Timeout per regex operation
private static readonly TimeSpan RegexTimeout = TimeSpan.FromMilliseconds(
RegexTimeoutMilliseconds
);
+ // Structure to hold TTS specific names for a currency
private readonly record struct CurrencyTTSInfo(
string Singular,
string Plural,
@@ -22,37 +29,163 @@ private readonly record struct CurrencyTTSInfo(
string FractionPlural
);
+ // Maps ISO Code (e.g., "USD") to its spoken form info
private static readonly FrozenDictionary IsoCodeToTTSInfoMap;
+
+ // Maps Symbol (e.g., "$") or Code (e.g., "USD") to its most likely ISO Code
private static readonly FrozenDictionary SymbolOrCodeToIsoCodeMap;
- private static readonly Regex CombinedCurrencyRegex;
+
+ // Regex definitions (will be populated in static constructor)
+ private static readonly Regex? SymbolNumberCodeRegexInstance;
+ private static readonly Regex? SymbolNumberRegexInstance;
+ private static readonly Regex? NumberCodeRegexInstance;
+
+ // Flag indicating successful initialization
private static readonly bool IsInitialized;
+ // Shared number pattern part used in regexes
+ private const string NumberPatternPart =
+ @"(?\d{1,3}(?:[,\s'.]\d{3})*|\d+)(?:[.,](?\d{1,2}))?";
+
static CurrencyNormalizationRule()
{
try
{
- // Define the Manual ISO -> TTS Mapping
+ // --- TTS Map Population ---
Dictionary ttsMapBuilder = new(
StringComparer.OrdinalIgnoreCase
)
{
- // Add more common currencies...
- { "USD", new("dollar", "dollars", "cent", "cents") },
+ // === Africa ===
+ { "DZD", new("Algerian dinar", "Algerian dinars", "santeem", "santeems") },
+ { "BIF", new("Burundian franc", "Burundian francs", "centime", "centimes") },
+ { "EGP", new("Egyptian pound", "Egyptian pounds", "piastre", "piastres") },
+ { "ETB", new("Ethiopian birr", "Ethiopian birrs", "santim", "santim") },
+ { "GHS", new("Ghanaian cedi", "Ghanaian cedis", "pesewa", "pesewas") },
+ { "KES", new("Kenyan shilling", "Kenyan shillings", "cent", "cents") },
+ { "MAD", new("Moroccan dirham", "Moroccan dirhams", "centime", "centimes") },
+ { "MUR", new("Mauritian rupee", "Mauritian rupees", "cent", "cents") },
+ { "NGN", new("Nigerian naira", "Nigerian naira", "kobo", "kobo") },
+ { "TND", new("Tunisian dinar", "Tunisian dinars", "millime", "millimes") },
+ { "TZS", new("Tanzanian shilling", "Tanzanian shillings", "cent", "cents") },
+ { "UGX", new("Ugandan shilling", "Ugandan shillings", "cent", "cents") },
+ {
+ "XOF",
+ new("West African CFA franc", "West African CFA francs", "centime", "centimes")
+ },
+ { "ZAR", new("South African rand", "South African rand", "cent", "cents") },
+ // === Asia ===
+ { "AFN", new("Afghan afghani", "Afghan afghanis", "pul", "puls") },
+ { "AMD", new("Armenian dram", "Armenian drams", "luma", "luma") },
+ { "AZN", new("Azerbaijani manat", "Azerbaijani manats", "qəpik", "qəpiks") },
+ { "BDT", new("Bangladeshi taka", "Bangladeshi taka", "poisha", "poisha") },
+ { "BND", new("Brunei dollar", "Brunei dollars", "sen", "sen") },
+ { "CNY", new("Chinese yuan", "Chinese yuan", "fen", "fen") },
+ { "GEL", new("Georgian lari", "Georgian lari", "tetri", "tetri") },
+ { "HKD", new("Hong Kong dollar", "Hong Kong dollars", "cent", "cents") },
+ { "IDR", new("Indonesian rupiah", "Indonesian rupiahs", "sen", "sen") },
+ { "INR", new("Indian rupee", "Indian rupees", "paisa", "paise") },
+ { "IQD", new("Iraqi dinar", "Iraqi dinars", "fils", "fils") },
+ { "JPY", new("Japanese yen", "Japanese yen", "sen", "sen") }, // Note: JPY fraction often ignored
+ { "KHR", new("Cambodian riel", "Cambodian riels", "sen", "sen") },
+ { "KGS", new("Kyrgystani som", "Kyrgystani soms", "tyiyn", "tyiyns") },
+ { "KRW", new("South Korean won", "South Korean won", "jeon", "jeon") },
+ { "KZT", new("Kazakhstani tenge", "Kazakhstani tenge", "tiyn", "tiyn") },
+ { "LAK", new("Lao kip", "Lao kips", "att", "att") },
+ { "LKR", new("Sri Lankan rupee", "Sri Lankan rupees", "cent", "cents") },
+ { "MNT", new("Mongolian tögrög", "Mongolian tögrögs", "möngö", "möngö") },
+ { "MYR", new("Malaysian ringgit", "Malaysian ringgits", "sen", "sen") },
+ { "NPR", new("Nepalese rupee", "Nepalese rupees", "paisa", "paise") },
+ { "PHP", new("Philippine peso", "Philippine pesos", "sentimo", "sentimo") },
+ { "PKR", new("Pakistani rupee", "Pakistani rupees", "paisa", "paisa") },
+ { "RUB", new("Russian ruble", "Russian rubles", "kopek", "kopeks") },
+ { "SGD", new("Singapore dollar", "Singapore dollars", "cent", "cents") },
+ { "THB", new("Thai baht", "Thai baht", "satang", "satang") },
+ { "TWD", new("new Taiwan dollar", "new Taiwan dollars", "cent", "cents") },
+ { "UZS", new("Uzbekistani som", "Uzbekistani som", "tiyin", "tiyin") },
+ { "VND", new("Vietnamese dong", "Vietnamese dong", "hao", "hao") },
+ // === Europe ===
+ { "ALL", new("Albanian lek", "Albanian lekë", "qindarkë", "qindarka") },
+ {
+ "BAM",
+ new(
+ "Bosnia-Herzegovina convertible mark",
+ "Bosnia-Herzegovina convertible marks",
+ "fening",
+ "feninga"
+ )
+ },
+ { "BGN", new("Bulgarian lev", "Bulgarian leva", "stotinka", "stotinki") },
+ { "BYN", new("Belarusian ruble", "Belarusian rubles", "kopek", "kopeks") },
+ { "CHF", new("Swiss franc", "Swiss francs", "rappen", "rappen") },
+ { "CZK", new("Czech koruna", "Czech koruny", "haler", "haleru") },
+ { "DKK", new("Danish krone", "Danish kroner", "øre", "øre") },
+ { "EUR", new("euro", "euros", "cent", "cents") },
+ { "GBP", new("British pound", "British pounds", "penny", "pence") },
+ { "HRK", new("Croatian kuna", "Croatian kunas", "lipa", "lipa") }, // Replaced by EUR, but kept for legacy
+ { "HUF", new("Hungarian forint", "Hungarian forints", "fillér", "fillér") },
+ { "ISK", new("Icelandic krona", "Icelandic kronur", "eyrir", "aurar") }, // Often no fractions used
+ { "MDL", new("Moldovan leu", "Moldovan lei", "ban", "bani") },
+ { "MKD", new("Macedonian denar", "Macedonian denari", "deni", "deni") },
+ { "NOK", new("Norwegian krone", "Norwegian kroner", "øre", "øre") },
+ { "PLN", new("Polish zloty", "Polish zlotys", "grosz", "groszy") },
+ { "RON", new("Romanian leu", "Romanian lei", "ban", "bani") },
+ { "RSD", new("Serbian dinar", "Serbian dinars", "para", "para") },
+ { "SEK", new("Swedish krona", "Swedish kronor", "öre", "öre") },
+ { "TRY", new("Turkish lira", "Turkish liras", "kurus", "kurus") },
+ { "UAH", new("Ukrainian hryvnia", "Ukrainian hryvnias", "kopiyka", "kopiyky") },
+ // === Middle East ===
+ { "AED", new("UAE dirham", "UAE dirhams", "fils", "fils") },
+ { "BHD", new("Bahraini dinar", "Bahraini dinars", "fils", "fils") },
+ { "ILS", new("Israeli new shekel", "Israeli new shekels", "agora", "agorot") },
+ { "JOD", new("Jordanian dinar", "Jordanian dinars", "piastre", "piastres") },
+ { "KWD", new("Kuwaiti dinar", "Kuwaiti dinars", "fils", "fils") },
+ { "LBP", new("Lebanese pound", "Lebanese pounds", "piastre", "piastres") },
+ { "OMR", new("Omani rial", "Omani rials", "baisa", "baisa") },
+ { "QAR", new("Qatari riyal", "Qatari riyals", "dirham", "dirhams") },
+ { "SAR", new("Saudi riyal", "Saudi riyals", "halala", "halalas") },
+ // === North America ===
{ "CAD", new("Canadian dollar", "Canadian dollars", "cent", "cents") },
+ { "CRC", new("Costa Rican colón", "Costa Rican colones", "céntimo", "céntimos") },
+ { "DOP", new("Dominican peso", "Dominican pesos", "centavo", "centavos") },
+ { "GTQ", new("Guatemalan quetzal", "Guatemalan quetzals", "centavo", "centavos") },
+ { "HNL", new("Honduran lempira", "Honduran lempiras", "centavo", "centavos") },
+ { "JMD", new("Jamaican dollar", "Jamaican dollars", "cent", "cents") },
+ { "MXN", new("Mexican peso", "Mexican pesos", "centavo", "centavos") },
+ { "NIO", new("Nicaraguan córdoba", "Nicaraguan córdobas", "centavo", "centavos") },
+ {
+ "PAB",
+ new("Panamanian balboa", "Panamanian balboas", "centésimo", "centésimos")
+ },
+ { "USD", new("US dollar", "US dollars", "cent", "cents") },
+ // === Oceania ===
{ "AUD", new("Australian dollar", "Australian dollars", "cent", "cents") },
- { "GBP", new("pound", "pounds", "penny", "pence") }, // Using "pound" for GBP
- { "EUR", new("euro", "euros", "cent", "cents") },
- { "JPY", new("yen", "yen", "sen", "sen") },
- { "INR", new("rupee", "rupees", "paisa", "paise") },
- { "BRL", new("real", "reais", "centavo", "centavos") },
- { "CNY", new("yuan", "yuan", "fen", "fen") },
- { "RUB", new("ruble", "rubles", "kopek", "kopeks") },
+ { "FJD", new("Fijian dollar", "Fijian dollars", "cent", "cents") },
+ { "NZD", new("New Zealand dollar", "New Zealand dollars", "cent", "cents") },
+ // === South America ===
+ { "ARS", new("Argentine peso", "Argentine pesos", "centavo", "centavos") },
+ { "BOB", new("Bolivian boliviano", "Bolivian bolivianos", "centavo", "centavos") },
+ { "BRL", new("Brazilian real", "Brazilian reais", "centavo", "centavos") },
+ { "CLP", new("Chilean peso", "Chilean pesos", "", "") }, // No standard fraction
+ { "COP", new("Colombian peso", "Colombian pesos", "centavo", "centavos") },
+ { "PEN", new("Peruvian sol", "Peruvian soles", "céntimo", "céntimos") },
+ { "PYG", new("Paraguayan guaraní", "Paraguayan guaraníes", "céntimo", "céntimos") },
+ { "UYU", new("Uruguayan peso", "Uruguayan pesos", "centésimo", "centésimos") },
+ {
+ "VES",
+ new(
+ "Venezuelan bolívar soberano",
+ "Venezuelan bolívares soberanos",
+ "céntimo",
+ "céntimos"
+ )
+ },
};
IsoCodeToTTSInfoMap = ttsMapBuilder.ToFrozenDictionary(
StringComparer.OrdinalIgnoreCase
);
- // Build Symbol/Code -> ISO Code Mapping
+ // --- Symbol/Code -> ISO Code Mapping Population ---
Dictionary symbolMapBuilder = new(StringComparer.OrdinalIgnoreCase);
HashSet uniqueSymbols = new(StringComparer.OrdinalIgnoreCase);
HashSet uniqueIsoCodes = new(StringComparer.OrdinalIgnoreCase);
@@ -61,13 +194,12 @@ static CurrencyNormalizationRule()
CultureInfo ci in CultureInfo.GetCultures(
CultureTypes.SpecificCultures | CultureTypes.InstalledWin32Cultures
)
- ) // Broader search
+ )
{
- // Skip problematic cultures
if (
ci.IsNeutralCulture
|| ci.LCID == CultureInfo.InvariantCulture.LCID
- || ci.Name == "" /* Invariant */
+ || ci.Name == ""
|| ci.Name.StartsWith("x-", StringComparison.Ordinal)
)
{
@@ -81,8 +213,8 @@ CultureInfo ci in CultureInfo.GetCultures(
}
catch (ArgumentException)
{
- continue; /* Cannot create RegionInfo */
- }
+ continue;
+ } // Cannot create RegionInfo
string isoCode = region.ISOCurrencySymbol;
string symbol = region.CurrencySymbol;
@@ -90,131 +222,231 @@ CultureInfo ci in CultureInfo.GetCultures(
// Only add if we have TTS info for this ISO code
if (!string.IsNullOrEmpty(isoCode) && IsoCodeToTTSInfoMap.ContainsKey(isoCode))
{
+ // Add the ISO code itself to the map (e.g., "USD" -> "USD")
if (symbolMapBuilder.TryAdd(isoCode, isoCode))
uniqueIsoCodes.Add(isoCode);
- // FIX: Prioritize JPY for ¥ symbol if not already mapped
- if (symbol == "¥" && !symbolMapBuilder.ContainsKey("¥"))
- {
- symbolMapBuilder.Add("¥", "JPY");
- uniqueSymbols.Add("¥");
- }
- else if (
- !string.IsNullOrEmpty(symbol)
- && symbol != "¥"
- && !symbolMapBuilder.ContainsKey(symbol)
- && !symbol.All(char.IsLetterOrDigit)
- )
+
+ // Add the symbol if it's not empty, not just letters/digits, and not already mapped
+ // Special handling for Yen symbol '¥' to prioritize JPY
+ if (!string.IsNullOrEmpty(symbol) && !symbol.All(char.IsLetterOrDigit))
{
- symbolMapBuilder.Add(symbol, isoCode);
- uniqueSymbols.Add(symbol);
+ if (symbol == "¥")
+ {
+ if (symbolMapBuilder.TryAdd("¥", "JPY")) // Map '¥' only once, prioritize JPY
+ {
+ uniqueSymbols.Add("¥");
+ }
+ }
+ else if (symbolMapBuilder.TryAdd(symbol, isoCode)) // Try add other symbols
+ {
+ uniqueSymbols.Add(symbol);
+ }
}
}
}
-
- if (IsoCodeToTTSInfoMap.ContainsKey("JPY"))
+ // Ensure JPY mapping for ¥ exists if JPY TTS info is present
+ if (IsoCodeToTTSInfoMap.ContainsKey("JPY") && !symbolMapBuilder.ContainsKey("¥"))
{
symbolMapBuilder["¥"] = "JPY";
- uniqueSymbols.Add("¥"); // Ensure it's in the symbol list for regex
+ uniqueSymbols.Add("¥");
}
SymbolOrCodeToIsoCodeMap = symbolMapBuilder.ToFrozenDictionary(
StringComparer.OrdinalIgnoreCase
);
- // Dynamically Generate the Regex
- IOrderedEnumerable escapedSymbols = uniqueSymbols
- .Select(Regex.Escape)
- .OrderByDescending(s => s.Length);
- IOrderedEnumerable escapedIsoCodes = uniqueIsoCodes
- .Select(Regex.Escape)
- .OrderByDescending(s => s.Length);
-
- string symbolPatternPart = string.Join("|", escapedSymbols);
- string codePatternPart = string.Join("|", escapedIsoCodes);
-
- // Number pattern allowing flexible separators but requiring at least one digit
- string numberPatternPart =
- @"(?\d{1,3}(?:[,\s'.]\d{3})*|\d+)(?:[.,](?\d{1,2}))?";
-
- string pattern1 = !string.IsNullOrEmpty(symbolPatternPart)
- ? $@"(?{symbolPatternPart})\s?{numberPatternPart}(?![\p{{L}}\p{{N}}])"
- : string.Empty;
- string pattern2 = !string.IsNullOrEmpty(codePatternPart)
- ? $@"(?{codePatternPart})(?![\p{{L}}\p{{N}}])"
- : string.Empty;
-
- string combinedPattern = !string.IsNullOrEmpty(pattern1) && !string.IsNullOrEmpty(pattern2)
- ? $"({pattern1})|({pattern2})"
- : !string.IsNullOrEmpty(pattern1) ? pattern1 : pattern2;
-
- if (!string.IsNullOrEmpty(combinedPattern))
+ // --- Generate Regex Patterns ---
+ string symbolPatternPart = string.Join(
+ "|",
+ uniqueSymbols.Select(Regex.Escape).OrderByDescending(s => s.Length)
+ );
+ string codePatternPart = string.Join(
+ "|",
+ uniqueIsoCodes.Select(Regex.Escape).OrderByDescending(s => s.Length)
+ );
+
+ // Only initialize regexes if symbols/codes were found
+ if (!string.IsNullOrEmpty(symbolPatternPart) && !string.IsNullOrEmpty(codePatternPart))
{
- CombinedCurrencyRegex = new Regex(
- combinedPattern,
- RegexOptions.Compiled | RegexOptions.IgnoreCase,
- RegexTimeout
- );
- IsInitialized = true;
- Console.WriteLine($"INFO: Currency Regex Initialized: {CombinedCurrencyRegex}");
+ // Pattern for Symbol + Number + Code (e.g., "$10 USD")
+ string patternSNC =
+ $@"(?{symbolPatternPart})\s?{NumberPatternPart}\s?(?{codePatternPart})(?![\p{{L}}\p{{N}}])";
+ // Pattern for Symbol + Number (e.g., "$10")
+ string patternSN =
+ $@"(?{symbolPatternPart})\s?{NumberPatternPart}(?![\p{{L}}\p{{N}}])";
+ // Pattern for Number + Code (e.g., "10 USD")
+ string patternNC =
+ $@"(?{codePatternPart})(?![\p{{L}}\p{{N}}])";
+
+ SymbolNumberCodeRegexInstance = BuildRegex(patternSNC);
+ SymbolNumberRegexInstance = BuildRegex(patternSN);
+ NumberCodeRegexInstance = BuildRegex(patternNC);
+
+ IsInitialized =
+ SymbolNumberCodeRegexInstance != null
+ && SymbolNumberRegexInstance != null
+ && NumberCodeRegexInstance != null;
+
+ if (!IsInitialized)
+ {
+ Console.Error.WriteLine(
+ "Warning: One or more currency regex patterns failed to initialize."
+ );
+ }
}
else
{
- CombinedCurrencyRegex = new Regex("(?!)", RegexOptions.Compiled); // Never matches
+ Console.Error.WriteLine(
+ "Warning: Could not generate currency regex patterns. No unique symbols or codes found/mapped."
+ );
IsInitialized = false;
- Console.Error.WriteLine("Warning: No valid currency patterns generated.");
}
}
catch (Exception ex)
{
Console.Error.WriteLine($"FATAL: Currency Rule static constructor failed: {ex}");
- CombinedCurrencyRegex = new Regex("(?!)", RegexOptions.Compiled);
IsInitialized = false;
+ throw; // Re-throw fatal exceptions during static init
+ }
+ }
+
+ ///
+ /// Helper to build Regex with options and timeout handling.
+ ///
+ private static Regex? BuildRegex(string pattern)
+ {
+ try
+ {
+ return new Regex(
+ pattern,
+ RegexOptions.Compiled | RegexOptions.IgnoreCase,
+ RegexTimeout
+ );
+ }
+ catch (Exception ex)
+ {
+ Console.Error.WriteLine($"Error compiling regex pattern '{pattern}': {ex.Message}");
+ return null;
}
}
- public CurrencyNormalizationRule() { }
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public CurrencyNormalizationRule() { } // Instance constructor
+ ///
public string Apply(string inputText)
{
ArgumentNullException.ThrowIfNull(inputText);
+
if (!IsInitialized || string.IsNullOrEmpty(inputText))
return inputText;
string currentText = inputText;
try
{
- currentText = CombinedCurrencyRegex.Replace(currentText, CurrencyMatchEvaluator);
+ // Apply replacements in order of specificity: S+N+C -> S+N -> N+C
+ // This ensures that "$10 USD" is matched by the first regex and not partially by the second.
+ if (SymbolNumberCodeRegexInstance != null)
+ {
+ currentText = SymbolNumberCodeRegexInstance.Replace(
+ currentText,
+ CurrencyMatchEvaluator
+ );
+ }
+
+ if (SymbolNumberRegexInstance != null)
+ {
+ currentText = SymbolNumberRegexInstance.Replace(
+ currentText,
+ CurrencyMatchEvaluator
+ );
+ }
+
+ if (NumberCodeRegexInstance != null)
+ {
+ currentText = NumberCodeRegexInstance.Replace(currentText, CurrencyMatchEvaluator);
+ }
}
catch (RegexMatchTimeoutException ex)
{
- Console.Error.WriteLine($"Regex timeout during currency normalization: {ex.Message}");
+ Console.Error.WriteLine(
+ $"Regex timeout during currency normalization pass: {ex.Message}"
+ );
+ // Return text processed up to the point of timeout
}
- catch (Exception ex)
+ catch (Exception ex) // Catch other potential errors during replacement
{
Console.Error.WriteLine($"Error during currency normalization: {ex.Message}");
+ // Optionally return original text or partially processed text
+ // return inputText; // Safer fallback
}
return currentText;
}
+ ///
+ /// Shared evaluator for all currency regex matches. Determines ISO code and converts to spoken form.
+ ///
private static string CurrencyMatchEvaluator(Match match)
{
- // Prioritize symbol group if it exists and matched (pattern 1 or 2 specific capture)
- // This requires naming the outer groups in the combined pattern. Let's adjust:
- // combinedPattern = $"(?{pattern1})|(?{pattern2})";
- // But for simplicity now, rely on the 'symbol' group captured by either.
- string detectedSymbolOrCode = match.Groups["symbol"].Value;
- string integerPartStr = match.Groups["integer"].Value;
+ string? isoCode = null;
+ string integerPartStr = match.Groups["integer"].Value; // Always expected
string fractionPartStr = match.Groups["fraction"].Success
? match.Groups["fraction"].Value
: string.Empty;
- if (!SymbolOrCodeToIsoCodeMap.TryGetValue(detectedSymbolOrCode, out string? isoCode))
- return match.Value;
- if (!IsoCodeToTTSInfoMap.TryGetValue(isoCode, out CurrencyTTSInfo currencyTTSInfo))
+ // Determine ISO code based on captured groups in the specific match
+ // Check which groups are present to infer which regex pattern succeeded
+ if (match.Groups["symbol"].Success && match.Groups["code"].Success)
+ {
+ // S+N+C match (from SymbolNumberCodeRegexInstance): Prioritize the explicit code
+ string explicitCode = match.Groups["code"].Value;
+ // Verify the explicit code exists in our TTS map
+ if (IsoCodeToTTSInfoMap.ContainsKey(explicitCode))
+ {
+ isoCode = explicitCode;
+ }
+ else
+ {
+ // Fallback to symbol's code if explicit code isn't recognized (less likely but possible)
+ SymbolOrCodeToIsoCodeMap.TryGetValue(match.Groups["symbol"].Value, out isoCode);
+ }
+ }
+ else if (match.Groups["symbol"].Success)
+ {
+ // S+N match (from SymbolNumberRegexInstance): Use symbol's code from the map
+ SymbolOrCodeToIsoCodeMap.TryGetValue(match.Groups["symbol"].Value, out isoCode);
+ }
+ else if (match.Groups["code"].Success)
+ {
+ // N+C match (from NumberCodeRegexInstance): Use the code directly if valid
+ string explicitCode = match.Groups["code"].Value;
+ // Check if the code is known in the symbol/code map AND has TTS info
+ if (
+ SymbolOrCodeToIsoCodeMap.ContainsKey(explicitCode)
+ && IsoCodeToTTSInfoMap.ContainsKey(explicitCode)
+ )
+ {
+ isoCode = explicitCode;
+ }
+ }
+
+ // --- Proceed if a valid ISO code was found and is supported ---
+ if (
+ isoCode == null
+ || !IsoCodeToTTSInfoMap.TryGetValue(isoCode, out CurrencyTTSInfo currencyTTSInfo)
+ )
+ {
+ // Cannot determine or unsupported currency, return the original matched text
return match.Value;
+ }
+
+ // --- Parse Numbers ---
+ // Remove common separators like commas, spaces, apostrophes, periods (for thousands)
+ string integerForParsing = CleanIntegerRegex().Replace(integerPartStr, "");
- string integerForParsing = Regex.Replace(integerPartStr, "[,' .]", ""); // Remove common separators
if (
!long.TryParse(
integerForParsing,
@@ -224,13 +456,15 @@ out long integerValue
)
)
{
- return match.Value;
+ return match.Value; // Integer parsing failed
}
int fractionValue = 0;
if (!string.IsNullOrEmpty(fractionPartStr))
{
- string paddedFraction = fractionPartStr.PadRight(2, '0');
+ // Ensure fraction is treated as two digits (e.g., ".5" becomes 50)
+ string paddedFraction =
+ fractionPartStr.Length == 1 ? fractionPartStr + "0" : fractionPartStr;
if (
!int.TryParse(
paddedFraction,
@@ -242,21 +476,26 @@ out fractionValue
|| fractionValue > 99
)
{
- return match.Value;
+ return match.Value; // Invalid fraction format or value
}
}
+ // --- Convert to Words using Humanizer ---
try
{
- string integerWords = integerValue.ToWords();
- string? fractionWords = fractionValue > 0 ? fractionValue.ToWords() : null;
+ // Use InvariantCulture for ToWords to get consistent English number words
+ string integerWords = integerValue.ToWords(CultureInfo.InvariantCulture);
+ string? fractionWords =
+ fractionValue > 0 ? fractionValue.ToWords(CultureInfo.InvariantCulture) : null;
+ // --- Build Spoken String ---
StringBuilder builder = new();
builder.Append(integerWords);
builder.Append(' ');
builder.Append(integerValue == 1 ? currencyTTSInfo.Singular : currencyTTSInfo.Plural);
- if (fractionWords != null && fractionValue > 0) // Ensure fraction > 0
+ // Only add fraction part if it's greater than zero
+ if (fractionWords != null && fractionValue > 0)
{
builder.Append(' ');
builder.Append(fractionWords);
@@ -268,12 +507,19 @@ out fractionValue
);
}
+ // Pad result with spaces for proper separation in the final text
return $" {builder} ";
}
catch (Exception ex)
{
- Console.Error.WriteLine($"Humanizer failed for '{match.Value}': {ex.Message}");
- return match.Value;
+ // Log Humanizer errors
+ Console.Error.WriteLine(
+ $"Humanizer failed for '{match.Value}' (ISO: {isoCode}): {ex.Message}"
+ );
+ return match.Value; // Return original on Humanizer error
}
}
+
+ [GeneratedRegex("[,' .]", RegexOptions.Compiled)]
+ private static partial Regex CleanIntegerRegex();
}
diff --git a/TTSTextNormalization/Rules/EmojiNormalizationRule.cs b/TTSTextNormalization/Rules/EmojiNormalizationRule.cs
index b32c1ee..1c9e206 100644
--- a/TTSTextNormalization/Rules/EmojiNormalizationRule.cs
+++ b/TTSTextNormalization/Rules/EmojiNormalizationRule.cs
@@ -1,6 +1,6 @@
-using TTSTextNormalization.EmojiDataGenerated;
-using System.Text.RegularExpressions;
+using System.Text.RegularExpressions;
using TTSTextNormalization.Abstractions;
+using TTSTextNormalization.EmojiDataGenerated;
namespace TTSTextNormalization.Rules;
@@ -10,8 +10,10 @@ namespace TTSTextNormalization.Rules;
///
public sealed class EmojiNormalizationRule : ITextNormalizationRule
{
+ ///
public int Order => 100;
+ ///
public EmojiNormalizationRule() { }
///
@@ -47,8 +49,10 @@ private static string EmojiMatchEvaluator(Match match)
{
// The Regex ensures we only match keys present in the map.
if (EmojiData.EmojiToNameMap.TryGetValue(match.Value, out string? name))
+ {
// Pad with spaces for TTS separation. Use the 'name' from the JSON.
return $" {name} ";
+ }
else
{
// Should not happen if Regex and Map are generated correctly.
diff --git a/TTSTextNormalization/Rules/ExcessivePunctuationRule.cs b/TTSTextNormalization/Rules/ExcessivePunctuationRule.cs
index be8f953..f295419 100644
--- a/TTSTextNormalization/Rules/ExcessivePunctuationRule.cs
+++ b/TTSTextNormalization/Rules/ExcessivePunctuationRule.cs
@@ -8,11 +8,14 @@ namespace TTSTextNormalization.Rules;
///
public sealed partial class ExcessivePunctuationRule : ITextNormalizationRule
{
+ ///
public int Order => 500;
private const int RegexTimeoutMilliseconds = 100;
+ ///
public ExcessivePunctuationRule() { }
+ ///
public string Apply(string inputText)
{
ArgumentNullException.ThrowIfNull(inputText);
diff --git a/TTSTextNormalization/Rules/LetterRepetitionRule.cs b/TTSTextNormalization/Rules/LetterRepetitionRule.cs
index aed14b1..c1ba18c 100644
--- a/TTSTextNormalization/Rules/LetterRepetitionRule.cs
+++ b/TTSTextNormalization/Rules/LetterRepetitionRule.cs
@@ -8,11 +8,14 @@ namespace TTSTextNormalization.Rules;
///
public sealed partial class LetterRepetitionRule : ITextNormalizationRule
{
+ ///
public int Order => 510;
private const int RegexTimeoutMilliseconds = 150; // Might need slightly more time for complex strings
+ ///
public LetterRepetitionRule() { }
+ ///
public string Apply(string inputText)
{
ArgumentNullException.ThrowIfNull(inputText);
diff --git a/TTSTextNormalization/Rules/NumberNormalizationRule.cs b/TTSTextNormalization/Rules/NumberNormalizationRule.cs
index c128519..01e2f0c 100644
--- a/TTSTextNormalization/Rules/NumberNormalizationRule.cs
+++ b/TTSTextNormalization/Rules/NumberNormalizationRule.cs
@@ -6,16 +6,26 @@
namespace TTSTextNormalization.Rules;
+///
+/// Normalizes standalone numbers, including cardinals, ordinals, decimals, and multi-dot sequences (like version numbers).
+/// Uses Humanizer for cardinal and ordinal word conversion.
+///
public sealed partial class NumberNormalizationRule : ITextNormalizationRule
{
+ ///
public int Order => 400;
+
private const int RegexTimeoutMilliseconds = 150;
// Keep DigitWords for the new multi-dot logic
private static readonly string[] DigitWords = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"];
+ ///
+ /// Initializes a new instance of the class.
+ ///
public NumberNormalizationRule() { }
+ ///
public string Apply(string inputText)
{
ArgumentNullException.ThrowIfNull(inputText);
diff --git a/TTSTextNormalization/Rules/WhitespaceNormalizationRule.cs b/TTSTextNormalization/Rules/WhitespaceNormalizationRule.cs
index 4ef3639..07dbb9b 100644
--- a/TTSTextNormalization/Rules/WhitespaceNormalizationRule.cs
+++ b/TTSTextNormalization/Rules/WhitespaceNormalizationRule.cs
@@ -9,11 +9,14 @@ namespace TTSTextNormalization.Rules;
///
public sealed partial class WhitespaceNormalizationRule : ITextNormalizationRule
{
+ ///
public int Order => 9000;
private const int RegexTimeoutMilliseconds = 100; // Timeout for each step
+ ///
public WhitespaceNormalizationRule() { }
+ ///
public string Apply(string inputText)
{
ArgumentNullException.ThrowIfNull(inputText);
diff --git a/TTSTextNormalization/TTSTextNormalization.csproj b/TTSTextNormalization/TTSTextNormalization.csproj
index d3cb932..3c825c2 100644
--- a/TTSTextNormalization/TTSTextNormalization.csproj
+++ b/TTSTextNormalization/TTSTextNormalization.csproj
@@ -35,6 +35,7 @@
true
true
+ true
v