Inlines { get; } = inlines;
+
+ public override void RenderHtml(StringBuilder sb)
+ {
+ sb.Append("");
+ foreach (var inline in Inlines)
+ inline.RenderHtml(sb);
+ sb.Append("
");
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Parsing/Nodes/RootNode.cs b/cs/Markdown/Parsing/Nodes/RootNode.cs
new file mode 100644
index 000000000..fa43369db
--- /dev/null
+++ b/cs/Markdown/Parsing/Nodes/RootNode.cs
@@ -0,0 +1,14 @@
+using System.Text;
+
+namespace Markdown.Parsing.Nodes;
+
+public class RootNode(List blocks) : INode
+{
+ public List Blocks { get; } = blocks;
+
+ public void RenderHtml(StringBuilder sb)
+ {
+ foreach (var block in Blocks)
+ block.RenderHtml(sb);
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Parsing/Nodes/StrongNode.cs b/cs/Markdown/Parsing/Nodes/StrongNode.cs
new file mode 100644
index 000000000..b7743a22c
--- /dev/null
+++ b/cs/Markdown/Parsing/Nodes/StrongNode.cs
@@ -0,0 +1,16 @@
+using System.Text;
+
+namespace Markdown.Parsing.Nodes;
+
+public class StrongNode(List children) : InlineTypeNode
+{
+ public List Children { get; } = children;
+
+ public override void RenderHtml(StringBuilder sb)
+ {
+ sb.Append("");
+ foreach (var child in Children)
+ child.RenderHtml(sb);
+ sb.Append("");
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Parsing/Nodes/TextNode.cs b/cs/Markdown/Parsing/Nodes/TextNode.cs
new file mode 100644
index 000000000..c060798ed
--- /dev/null
+++ b/cs/Markdown/Parsing/Nodes/TextNode.cs
@@ -0,0 +1,14 @@
+using System.Text;
+using Markdown.Rendering;
+
+namespace Markdown.Parsing.Nodes;
+
+public class TextNode(string text) : InlineTypeNode
+{
+ public string Text { get; } = text;
+
+ public override void RenderHtml(StringBuilder sb)
+ {
+ sb.Append(HtmlRenderer.EscapeHtml(Text));
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Parsing/Parser.cs b/cs/Markdown/Parsing/Parser.cs
new file mode 100644
index 000000000..0e6d5b9ee
--- /dev/null
+++ b/cs/Markdown/Parsing/Parser.cs
@@ -0,0 +1,96 @@
+using Markdown.Tokenizing;
+using Markdown.Parsing.Nodes;
+
+namespace Markdown.Parsing;
+
+public class Parser
+{
+ public static RootNode Parse(List tokens)
+ {
+ ArgumentNullException.ThrowIfNull(tokens);
+
+ var cursor = new ParserCursor(tokens);
+ var blocks = new List();
+
+ while (!cursor.End)
+ {
+ if (cursor.Current.Type == TokenType.EndOfLine)
+ {
+ cursor.MoveNext();
+ continue;
+ }
+
+ blocks.Add(ParseBlock(cursor, tokens));
+ }
+
+ return new RootNode(blocks);
+ }
+
+ private static BlockTypeNode ParseBlock(ParserCursor cursor, IList tokens)
+ {
+ if (cursor.Current.Type == TokenType.Hash &&
+ cursor.Peek().Type == TokenType.Whitespace)
+ {
+ cursor.MoveNext();
+ cursor.MoveNext();
+
+ var inlines = ParseInlinesInBlock(cursor, tokens);
+ if (cursor.Current.Type == TokenType.EndOfLine)
+ cursor.MoveNext();
+
+ return new HeaderNode(inlines);
+ }
+
+ var parsedInlines = ParseInlinesInBlock(cursor, tokens);
+ if (cursor.Current.Type == TokenType.EndOfLine) cursor.MoveNext();
+
+ return new ParagraphNode(parsedInlines);
+ }
+
+ private static List ParseInlinesInBlock(ParserCursor cursor, IList tokens)
+ {
+ var children = new List();
+ var underscores = new List<(TokenType Type, int ChildrenIndex)>();
+
+ while (!cursor.End &&
+ cursor.Current.Type != TokenType.EndOfLine &&
+ cursor.Current.Type != TokenType.EndOfFile)
+ {
+ var token = cursor.Current;
+
+ switch (token.Type)
+ {
+ case TokenType.Text:
+ case TokenType.Whitespace:
+ AddLiteral(children, token.Value, cursor);
+ break;
+
+ case TokenType.Escape:
+ EscapeHandler.HandleEscape(children, cursor, tokens);
+ break;
+
+ case TokenType.LeftBracket:
+ LinkHandler.HandleLink(children, cursor, tokens);
+ break;
+
+ case TokenType.Underscore:
+ case TokenType.DoubleUnderscore:
+ UnderscoreHandler.HandleUnderscores(children, underscores, cursor, tokens);
+ break;
+
+ default:
+ AddLiteral(children, token.Value, cursor);
+ break;
+ }
+ }
+
+ UnderscoreHandler.InsertUnmatchedUnderscores(children, underscores);
+ return children;
+ }
+
+ private static void AddLiteral(List children, string value, ParserCursor cursor)
+ {
+ children.Add(new TextNode(value));
+ cursor.MoveNext();
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Parsing/ParserCursor.cs b/cs/Markdown/Parsing/ParserCursor.cs
new file mode 100644
index 000000000..a34eede7d
--- /dev/null
+++ b/cs/Markdown/Parsing/ParserCursor.cs
@@ -0,0 +1,31 @@
+using Markdown.Tokenizing;
+
+namespace Markdown.Parsing;
+
+public class ParserCursor(IList tokens)
+{
+ private int index;
+ public bool End => index >= tokens.Count - 1;
+ public int Index => index;
+ public Token Current => !End ? tokens[index] : tokens[^1];
+
+ public Token Peek()
+ {
+ var nextIndex = index + 1;
+ return nextIndex < tokens.Count ? tokens[nextIndex] : tokens[^1];
+ }
+
+ public void MoveNext()
+ {
+ if (!End) index++;
+ }
+
+ public void IndexJumpTo(int newIndex)
+ {
+ if (newIndex < 0) newIndex = 0;
+
+ if (newIndex >= tokens.Count) newIndex = tokens.Count - 1;
+
+ index = newIndex;
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Parsing/UnderscoreHandler.cs b/cs/Markdown/Parsing/UnderscoreHandler.cs
new file mode 100644
index 000000000..9f972ca5e
--- /dev/null
+++ b/cs/Markdown/Parsing/UnderscoreHandler.cs
@@ -0,0 +1,214 @@
+using Markdown.Tokenizing;
+using Markdown.Parsing.Nodes;
+
+namespace Markdown.Parsing;
+
+public static class UnderscoreHandler
+{
+ public static void HandleUnderscores(
+ List children,
+ List<(TokenType Type, int ChildrenIndex)> underscores,
+ ParserCursor cursor,
+ IList tokens)
+ {
+ var currentToken = cursor.Current;
+ var previousToken = tokens.ElementAtOrDefault(cursor.Index - 1);
+ var nextToken = tokens.ElementAtOrDefault(cursor.Index + 1);
+
+ if (IsUnderscoreBetweenDigits(previousToken, nextToken))
+ {
+ AddUnderscoreAsLiteral(children, currentToken.Value, cursor);
+ return;
+ }
+
+ if (!IsTokenWhitespaceLike(previousToken))
+ {
+ if (TryCloseExistingHighlight(underscores, currentToken, children, cursor))
+ return;
+ }
+
+ if (IsTokenWhitespaceLike(nextToken) ||
+ (currentToken.Type == TokenType.DoubleUnderscore && DoesDoubleUnderscoreBreak(cursor, tokens)))
+ {
+ AddUnderscoreAsLiteral(children, currentToken.Value, cursor);
+ }
+ else
+ {
+ underscores.Add((currentToken.Type, children.Count));
+ cursor.MoveNext();
+ }
+ }
+
+ public static void InsertUnmatchedUnderscores(List children,
+ List<(TokenType Type, int ChildrenIndex)> underscores)
+ {
+ for (var i = underscores.Count - 1; i >= 0; i--)
+ {
+ var (type, index) = underscores[i];
+ var literal = type == TokenType.DoubleUnderscore ? "__" : "_";
+ children.Insert(index, new TextNode(literal));
+ }
+ }
+
+ private static bool TryCloseExistingHighlight(
+ List<(TokenType Type, int ChildrenIndex)> underscores,
+ Token currentToken,
+ List children,
+ ParserCursor cursor)
+ {
+ var highlightingType = currentToken.Type;
+
+ var openerIndex = FindMatchingOpenerIndex(underscores, highlightingType);
+ if (openerIndex < 0) return false;
+
+ var openerUnderscore = underscores[openerIndex];
+ var startIndex = openerUnderscore.ChildrenIndex;
+ var innerTokensCount = children.Count - startIndex;
+
+ if (innerTokensCount == 0) return false;
+
+ var innerTokens = children.GetRange(startIndex, innerTokensCount);
+
+ if (!IsValidHighlighting(underscores, highlightingType, openerUnderscore, innerTokens)) return false;
+
+ if (HasIntersection(underscores, openerUnderscore, highlightingType, children, openerIndex, out var innerIndex))
+ {
+ InsertIntersection(children, underscores, openerUnderscore, underscores[innerIndex], openerIndex, innerIndex);
+ AddUnderscoreAsLiteral(children, currentToken.Value, cursor);
+ return true;
+ }
+
+ CloseHighlight(children, underscores, openerIndex, highlightingType, innerTokens, startIndex);
+ cursor.MoveNext();
+ return true;
+ }
+
+ private static int FindMatchingOpenerIndex(List<(TokenType Type, int ChildrenIndex)> underscores, TokenType type)
+ {
+ for (var i = underscores.Count - 1; i >= 0; i--)
+ {
+ if (underscores[i].Type == type) return i;
+ }
+
+ return -1;
+ }
+
+ private static bool IsValidHighlighting(
+ List<(TokenType Type, int ChildrenIndex)> underscores,
+ TokenType highlightingType,
+ (TokenType Type, int ChildrenIndex) openerUnderscore,
+ List innerTokens)
+ {
+ if (highlightingType != TokenType.Underscore) return true;
+
+ var highlightingIsInsideDoubleUnderscore = underscores.Any(underscore =>
+ underscore.Type == TokenType.DoubleUnderscore &&
+ underscore.ChildrenIndex < openerUnderscore.ChildrenIndex);
+
+ var areThereAnyWhitespaces = innerTokens.Any(t => t is TextNode tn && tn.Text.Any(char.IsWhiteSpace));
+
+ return highlightingIsInsideDoubleUnderscore || !areThereAnyWhitespaces;
+ }
+
+ private static bool HasIntersection(
+ List<(TokenType Type, int ChildrenIndex)> underscores,
+ (TokenType Type, int ChildrenIndex) opener,
+ TokenType highlightingType,
+ List children,
+ int openerIndex,
+ out int intersectionIndex)
+ {
+ for (var i = openerIndex + 1; i < underscores.Count; i++)
+ {
+ if (underscores[i].Type == highlightingType ||
+ underscores[i].ChildrenIndex <= opener.ChildrenIndex ||
+ underscores[i].ChildrenIndex >= children.Count)
+ continue;
+
+ intersectionIndex = i;
+ return true;
+ }
+
+ intersectionIndex = -1;
+ return false;
+ }
+
+ private static void CloseHighlight(
+ List children,
+ List<(TokenType Type, int ChildrenIndex)> underscores,
+ int openerIndex,
+ TokenType highlightingType,
+ List innerTokens,
+ int startIndex)
+ {
+ children.RemoveRange(startIndex, innerTokens.Count);
+
+ InlineTypeNode node = highlightingType == TokenType.DoubleUnderscore
+ ? new StrongNode(innerTokens)
+ : new EmNode(innerTokens);
+
+ children.Add(node);
+ underscores.RemoveAt(openerIndex);
+ }
+
+ private static void InsertIntersection(
+ List children,
+ List<(TokenType Type, int ChildrenIndex)> underscores,
+ (TokenType Type, int ChildrenIndex) opener,
+ (TokenType Type, int ChildrenIndex) inner,
+ int openerIndex, int innerIndex)
+ {
+ var innerLiteral = inner.Type == TokenType.DoubleUnderscore ? "__" : "_";
+ var openerLiteral = opener.Type == TokenType.DoubleUnderscore ? "__" : "_";
+
+ children.Insert(inner.ChildrenIndex, new TextNode(innerLiteral));
+ children.Insert(opener.ChildrenIndex, new TextNode(openerLiteral));
+
+ underscores.RemoveAt(innerIndex);
+ underscores.RemoveAt(openerIndex);
+ }
+
+ private static void AddUnderscoreAsLiteral(List children, string value, ParserCursor cursor)
+ {
+ children.Add(new TextNode(value));
+ cursor.MoveNext();
+ }
+
+ private static bool IsTokenWhitespaceLike(Token? token)
+ {
+ return token == null ||
+ token.Type == TokenType.Whitespace ||
+ token.Type == TokenType.EndOfLine ||
+ token.Type == TokenType.EndOfFile;
+ }
+
+ private static bool IsUnderscoreBetweenDigits(Token? previousToken, Token? nextToken)
+ {
+ var leftTokenIsDigit = previousToken is { Type: TokenType.Text, Value.Length: > 0 } &&
+ char.IsDigit(previousToken.Value.Last());
+ var rightTokenIsDigit = nextToken is { Type: TokenType.Text, Value.Length: > 0 } &&
+ char.IsDigit(nextToken.Value.First());
+ return leftTokenIsDigit && rightTokenIsDigit;
+ }
+
+ private static bool DoesDoubleUnderscoreBreak(ParserCursor cursor, IList tokens)
+ {
+ var singleUnderscoresCount = 0;
+
+ for (var i = cursor.Index + 1; i < tokens.Count; i++)
+ {
+ var currentToken = tokens[i];
+
+ if (currentToken.Type is TokenType.EndOfLine or TokenType.EndOfFile) return false;
+ if (currentToken.Type == TokenType.DoubleUnderscore) return (singleUnderscoresCount % 2) == 1;
+ if (currentToken.Type != TokenType.Underscore) continue;
+
+ var previousToken = tokens.ElementAtOrDefault(i - 1);
+ if (previousToken is { Type: TokenType.Escape }) continue;
+
+ singleUnderscoresCount++;
+ }
+
+ return false;
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Rendering/HtmlRenderer.cs b/cs/Markdown/Rendering/HtmlRenderer.cs
new file mode 100644
index 000000000..ce4435bee
--- /dev/null
+++ b/cs/Markdown/Rendering/HtmlRenderer.cs
@@ -0,0 +1,38 @@
+using System;
+using System.Text;
+using Markdown.Parsing.Nodes;
+
+namespace Markdown.Rendering;
+
+public static class HtmlRenderer
+{
+ public static string Render(RootNode document)
+ {
+ ArgumentNullException.ThrowIfNull(document);
+
+ var sb = new StringBuilder();
+ document.RenderHtml(sb);
+ return sb.ToString();
+ }
+
+ public static string EscapeHtml(string? text)
+ {
+ if (string.IsNullOrEmpty(text)) return string.Empty;
+
+ var sb = new StringBuilder(text.Length);
+ foreach (var c in text)
+ {
+ switch (c)
+ {
+ case '&': sb.Append("&"); break;
+ case '<': sb.Append("<"); break;
+ case '>': sb.Append(">"); break;
+ case '"': sb.Append("""); break;
+ case '\'': sb.Append("'"); break;
+ default: sb.Append(c); break;
+ }
+ }
+
+ return sb.ToString();
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tests/HtmlRendererTests.cs b/cs/Markdown/Tests/HtmlRendererTests.cs
new file mode 100644
index 000000000..ffe92a213
--- /dev/null
+++ b/cs/Markdown/Tests/HtmlRendererTests.cs
@@ -0,0 +1,191 @@
+using FluentAssertions;
+using NUnit.Framework;
+using Markdown.Rendering;
+using Markdown.Parsing.Nodes;
+
+namespace Markdown.Tests;
+
+public class HtmlRendererTests
+{
+ private static string R(RootNode node)
+ => HtmlRenderer.Render(node);
+
+ [Test]
+ public void Html_ShouldRenderEmptyDocument_Test()
+ {
+ var root = new RootNode([]);
+ R(root).Should().Be("");
+ }
+
+ [Test]
+ public void Html_ShouldRenderParagraph_WithText_Test()
+ {
+ var p = new ParagraphNode([new TextNode("hello")]);
+
+ R(new RootNode([p]))
+ .Should().Be("hello
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderParagraph_WithMultipleInlines_Test()
+ {
+ var p = new ParagraphNode([
+ new TextNode("hello "),
+ new EmNode([new TextNode("world")]),
+ new TextNode(" again")
+ ]);
+
+ R(new RootNode([p]))
+ .Should().Be("hello world again
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderHeaderWithPlainText_Test()
+ {
+ var h = new HeaderNode([new TextNode("Title")]);
+
+ R(new RootNode([h]))
+ .Should().Be("Title
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderHeaderWithInlineFormatting_Test()
+ {
+ var h = new HeaderNode([
+ new TextNode("Hello "),
+ new StrongNode([new TextNode("strong ")]),
+
+ new EmNode([new TextNode("em")])
+ ]);
+
+ R(new RootNode([h]))
+ .Should().Be("Hello strong em
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderEm_Test()
+ {
+ var p = new ParagraphNode([
+ new TextNode("a "),
+ new EmNode([new TextNode("b")]),
+ new TextNode(" c")
+ ]);
+
+ R(new RootNode([p]))
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderStrong_Test()
+ {
+ var p = new ParagraphNode([
+ new TextNode("a "),
+ new StrongNode([new TextNode("b")]),
+ new TextNode(" c")
+ ]);
+
+ R(new RootNode([p]))
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderNestedEmInsideStrong_Test()
+ {
+ var strong = new StrongNode([
+ new TextNode("a "),
+ new EmNode([new TextNode("b")]),
+ new TextNode(" c")
+ ]);
+
+ var p = new ParagraphNode([strong]);
+
+ R(new RootNode([p]))
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderEscapedChar_AsLiteral_Test()
+ {
+ var p = new ParagraphNode([
+ new TextNode("x"),
+ new EscapedNode('_'),
+ new TextNode("y")
+ ]);
+
+ R(new RootNode([p]))
+ .Should().Be("x_y
");
+ }
+
+ [Test]
+ public void Html_ShouldEscapeHtmlInsideTextNodes_Test()
+ {
+ var p = new ParagraphNode([new TextNode(" & \"world\"")]);
+
+ R(new RootNode(new() { p }))
+ .Should().Be("<hello> & "world"
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderLink_WithTextOnly_Test()
+ {
+ var link = new LinkNode(
+ [new TextNode("google")],
+ "https://google.com"
+ );
+
+ var p = new ParagraphNode([link]);
+
+ R(new RootNode([p]))
+ .Should().Be("google
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderLink_WithInlineFormatting_Test()
+ {
+ var link = new LinkNode(
+ [
+ new TextNode("a "),
+ new EmNode([new TextNode("b")]),
+ new TextNode(" c")
+ ],
+ "/x"
+ );
+
+ var p = new ParagraphNode([link]);
+
+ R(new RootNode([p]))
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Html_ShouldRenderComplexNestedFormatting_Test()
+ {
+ var p = new ParagraphNode([
+ new TextNode("start "),
+ new StrongNode([
+ new TextNode("A "),
+ new EmNode([new TextNode("B")]),
+ new TextNode(" C")
+ ]),
+
+ new TextNode(" end")
+ ]);
+
+ R(new RootNode([p]))
+ .Should().Be("start A B C end
");
+ }
+
+ [Test]
+ public void Html_ShouldEscapeUrlInHref_Test()
+ {
+ var link = new LinkNode(
+ [new TextNode("x")],
+ "https://example.com?a=1&b=<>&\""
+ );
+
+ var p = new ParagraphNode([link]);
+
+ R(new RootNode([p]))
+ .Should().Be("x
");
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tests/MdProcessorTests.cs b/cs/Markdown/Tests/MdProcessorTests.cs
new file mode 100644
index 000000000..27e06f815
--- /dev/null
+++ b/cs/Markdown/Tests/MdProcessorTests.cs
@@ -0,0 +1,415 @@
+using FluentAssertions;
+using NUnit.Framework;
+
+namespace Markdown.Tests;
+
+public class MdTests
+{
+ private Md md = null!;
+
+ [SetUp]
+ public void SetUp()
+ => md = new Md();
+
+ private string R(string markdown)
+ => Md.Render(markdown);
+
+ [Test]
+ public void Md_ShouldRenderH1_WhenLineStartsWithHashAndSpace_Test()
+ {
+ R("# Hello world")
+ .Should().Be("Hello world
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderH1_WhenHashNotFollowedBySpace_Test()
+ {
+ R("#Hello")
+ .Should().Be("#Hello
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderParagraph_WhenTextWithoutHeader_Test()
+ {
+ R("hello")
+ .Should().Be("hello
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderTwoParagraphs_WhenSeparatedByEmptyLine_Test()
+ {
+ R("hello\n\nworld")
+ .Should().Be("hello
world
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderEm_WhenSingleUnderscorePair_Test()
+ {
+ R("a _b_ c")
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderEm_WhenSpacesAroundOpening_Test()
+ {
+ R("a _ b_")
+ .Should().Be("a _ b_
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderEm_WhenSpacesBeforeClosing_Test()
+ {
+ R("a _b _")
+ .Should().Be("a _b _
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderEm_WhenUnderscoreInsideDigits_Test()
+ {
+ R("12_3")
+ .Should().Be("12_3
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderEm_WhenInsideWord_Test()
+ {
+ R("a_b_c")
+ .Should().Be("abc
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderEm_WhenBetweenWords_Test()
+ {
+ R("a _b c_")
+ .Should().Be("a _b c_
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderEm_WhenInsideDigitsExtended_Test()
+ {
+ R("12_34_56")
+ .Should().Be("12_34_56
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderStrong_WhenDoubleUnderscorePair_Test()
+ {
+ R("a __b__ c")
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderStrong_WhenEmptyContent_Test()
+ {
+ R("____")
+ .Should().Be("____
");
+ }
+
+ [Test]
+ public void Md_ShouldAllowStrongInsideStrong_WhenValid_Test()
+ {
+ R("__a __b__ c__")
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderNestedEmInsideStrong_WhenValidStructure_Test()
+ {
+ R("__a _b_ c__")
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderNestedEmInsideStrong_MultipleContent_Test()
+ {
+ R("__a _b c_ d__")
+ .Should().Be("a b c d
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderNested_WhenIntersectionInvalid_Test()
+ {
+ R("_a __b_ c__")
+ .Should().Be("_a __b_ c__
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRender_WhenDoubleSingleIntersectOpposite_Test()
+ {
+ R("__a _b__ c_")
+ .Should().Be("__a _b__ c_
");
+ }
+
+ [Test]
+ public void Md_ShouldEscapeUnderscore_WhenBackslashBefore_Test()
+ {
+ R(@"\_a\_")
+ .Should().Be("_a_
");
+ }
+
+ [Test]
+ public void Md_ShouldOutputBackslashLiteral_WhenEscapeWithoutNextChar_Test()
+ {
+ R(@"\")
+ .Should().Be("\\
");
+ }
+
+ [Test]
+ public void Md_ShouldEscapeBackslash_WhenDoubleBackslash_Test()
+ {
+ R(@"\\")
+ .Should().Be("\\
");
+ }
+
+ [Test]
+ public void Md_ShouldEscapeUnderscoreInsideWord_Test()
+ {
+ R("hel\\_lo")
+ .Should().Be("hel_lo
");
+ }
+
+ [Test]
+ public void Md_ShouldEscapeDoubleUnderscore_Test()
+ {
+ R(@"\__a__")
+ .Should().Be("__a__
");
+ }
+
+ [Test]
+ public void Md_ShouldEscapeUnderscore_WhenAtEnd_Test()
+ {
+ R("a \\_")
+ .Should().Be("a _
");
+ }
+
+ [Test]
+ public void Md_ShouldHandleEscapeBeforeLetters_AsLiteral_Test()
+ {
+ R(@"\hello")
+ .Should().Be("\\hello
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderLink_WhenValidMarkdownLink_Test()
+ {
+ R("[google](https://google.com)")
+ .Should().Be("google
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderLink_WhenMissingRightBracket_Test()
+ {
+ R("[text(https://url)")
+ .Should().Be("[text(https://url)
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderLink_WhenMissingParenthesis_Test()
+ {
+ R("[text]url)")
+ .Should().Be("[text]url)
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderLinkInsideStrong_Test()
+ {
+ R("__hello [g](u)__")
+ .Should().Be("hello g
");
+ }
+
+ [Test]
+ public void Md_ShouldHandleEscapeInsideLinkText_Test()
+ {
+ R("[go\\_og\\_le](url)")
+ .Should().Be("go_og_le
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderMixedStyles_WhenMultipleInlineTokens_Test()
+ {
+ R("Hello _world_ and __strong__ text")
+ .Should().Be("Hello world and strong text
");
+ }
+
+ [Test]
+ public void Md_ShouldKeepUnderscoresLiteral_WhenUnmatched_Test()
+ {
+ R("_a")
+ .Should().Be("_a
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderEmptyString_AsEmptyString_Test()
+ {
+ R("").Should().Be("");
+ }
+
+ [Test]
+ public void Md_ShouldThrowException_WhenGettingNull_Test()
+ {
+ Action act = () => Md.Render(null);
+ act.Should().Throw();
+ }
+
+ [Test]
+ public void Md_ShouldKeepMultipleNewlines_AsParagraphBreaks_Test()
+ {
+ R("a\n\n\nb")
+ .Should().Be("a
b
");
+ }
+
+ [Test]
+ public void Md_ShouldTreatHashInsideText_AsLiteral_Test()
+ {
+ R("a # b")
+ .Should().Be("a # b
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderLink_WhenNoClosingParenthesis_Test()
+ {
+ R("[text](url")
+ .Should().Be("[text](url
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderLink_WhenNoClosingBracket_Test()
+ {
+ R("[text(url)")
+ .Should().Be("[text(url)
");
+ }
+
+ [Test]
+ public void Md_ShouldAllowTextBeforeLink_Test()
+ {
+ R("hello [x](y)")
+ .Should().Be("hello x
");
+ }
+
+ [Test]
+ public void Md_ShouldAllowTextAfterLink_Test()
+ {
+ R("[x](y) world")
+ .Should().Be("x world
");
+ }
+
+ [Test]
+ public void Md_ShouldNotBreak_WhenLinkInsideEscapedBrackets_Test()
+ {
+ R(@"\[link](u)")
+ .Should().Be("[link](u)
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderStrong_WhenFollowedByWhitespace_Test()
+ {
+ R("__ a__")
+ .Should().Be("__ a__
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderStrong_WhenEndingWithWhitespace_Test()
+ {
+ R("__a __")
+ .Should().Be("__a __
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderStrongOverMultipleInlines_Test()
+ {
+ R("__a [x](y) b__")
+ .Should().Be("a x b
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderEmInsideWord_MultipleTimes_Test()
+ {
+ R("a_b_c_d_e")
+ .Should().Be("abcde
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderEm_WhenSingleUnderscoreAtStartAndSpaceAfter_Test()
+ {
+ R("_ a_")
+ .Should().Be("_ a_
");
+ }
+
+ [Test]
+ public void Md_ShouldHandleEscapedHash_AsLiteral_Test()
+ {
+ R(@"\# header")
+ .Should().Be("# header
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderHeader_WithInlineFormatting_Test()
+ {
+ R("# __a _b_ c__")
+ .Should().Be("a b c
");
+ }
+
+ [Test]
+ public void Md_ShouldTreatSingleBackslashAtEnd_AsLiteral_Test()
+ {
+ R("abc\\")
+ .Should().Be("abc\\
");
+ }
+
+ [Test]
+ public void Md_ShouldIgnoreUnmatchedOpeningBracket_Test()
+ {
+ R("text [abc")
+ .Should().Be("text [abc
");
+ }
+
+ [Test]
+ public void Md_ShouldIgnoreUnmatchedClosingBracket_Test()
+ {
+ R("text ]abc")
+ .Should().Be("text ]abc
");
+ }
+
+ [Test]
+ public void Md_ShouldHandleMultipleLinksInRow_Test()
+ {
+ R("[a](1)[b](2)[c](3)")
+ .Should().Be("abc
");
+ }
+
+ [Test]
+ public void Md_ShouldHandleMultipleEscapesInRow_Test()
+ {
+ R("\\\\\\\\__a__")
+ .Should().Be("\\\\a
");
+ }
+
+ [Test]
+ public void Md_ShouldNotRenderCrossingEmStrong_ComplexCase_Test()
+ {
+ R("_a __b c_ d__")
+ .Should().Be("_a __b c_ d__
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderLoneHash_AsLiteral_Test()
+ {
+ R("#")
+ .Should().Be("#
");
+ }
+
+ [Test]
+ public void Md_ShouldRenderSpaceThenHash_AsParagraph_Test()
+ {
+ R(" # heading")
+ .Should().Be(" # heading
");
+ }
+
+ [Test]
+ public void Md_ShouldNotBreakOnVeryLongGarbage_Test()
+ {
+ var s = new string('[', 2000) + new string(')', 2000);
+ R(s)
+ .Should().Be("" + s + "
");
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tests/PerformanceTests.cs b/cs/Markdown/Tests/PerformanceTests.cs
new file mode 100644
index 000000000..7b3595d88
--- /dev/null
+++ b/cs/Markdown/Tests/PerformanceTests.cs
@@ -0,0 +1,114 @@
+using System.Diagnostics;
+using System.Text;
+using FluentAssertions;
+using NUnit.Framework;
+
+namespace Markdown.Tests;
+
+[TestFixture]
+public class PerformanceTests
+{
+ private Md md = null!;
+
+ [SetUp]
+ public void SetUp()
+ => md = new Md();
+
+ [Test]
+ public void Renderer_ShouldRenderLargeTextFast_Test()
+ {
+ var input = new string('a', 1_000_000);
+
+ var sw = Stopwatch.StartNew();
+ Md.Render(input);
+ sw.Stop();
+
+ sw.ElapsedMilliseconds.Should().BeLessThan(50);
+ }
+
+ [Test]
+ public void Renderer_ShouldRenderManyUnderscoresFast_Test()
+ {
+ var input = new string('_', 20_000);
+
+ var sw = Stopwatch.StartNew();
+ Md.Render(input);
+ sw.Stop();
+
+ sw.ElapsedMilliseconds.Should().BeLessThan(20);
+ }
+
+ [Test]
+ public void Renderer_ShouldRenderManyLinksFast_Test()
+ {
+ var sb = new StringBuilder();
+ for (var i = 0; i < 5000; i++)
+ sb.Append("[g](u) ");
+
+ var input = sb.ToString();
+
+ var sw = Stopwatch.StartNew();
+ Md.Render(input);
+ sw.Stop();
+
+ sw.ElapsedMilliseconds.Should().BeLessThan(40);
+ }
+
+ [Test]
+ public void Renderer_ShouldRenderManyEscapesFast_Test()
+ {
+ var sb = new StringBuilder();
+ for (var i = 0; i < 30_000; i++)
+ sb.Append("\\_");
+
+ var input = sb.ToString();
+
+ var sw = Stopwatch.StartNew();
+ Md.Render(input);
+ sw.Stop();
+
+ sw.ElapsedMilliseconds.Should().BeLessThan(30);
+ }
+
+ [Test]
+ public void Renderer_ShouldScale_ApproximatelyLinearly_Test()
+ {
+ var chunk = @"__a _b c_ d__ [x](y) \\text ";
+
+ var inputSmall = string.Concat(Enumerable.Repeat(chunk, 2000));
+ var inputLarge = string.Concat(Enumerable.Repeat(chunk, 4000));
+
+ WarmupRenderer(md);
+
+ var tSmall = MeasureAverageTicks(() => Md.Render(inputSmall));
+ var tLarge = MeasureAverageTicks(() => Md.Render(inputLarge));
+
+ var ratio = (double)tLarge / tSmall;
+
+ TestContext.WriteLine($"tSmall={tSmall}, tLarge={tLarge}, ratio={ratio}");
+
+ ratio.Should().BeInRange(1.4, 2.7);
+ }
+
+ private static long MeasureAverageTicks(Action action)
+ {
+ var sw = new Stopwatch();
+ var sum = 0L;
+
+ for (var i = 0; i < 5; i++)
+ {
+ sw.Restart();
+ action();
+ sw.Stop();
+ sum += sw.ElapsedTicks;
+ }
+
+ return sum / 5;
+ }
+
+ private static void WarmupRenderer(Md md)
+ {
+ for (var i = 0; i < 10; i++)
+ Md.Render("_x_");
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tests/TokenizerTests.cs b/cs/Markdown/Tests/TokenizerTests.cs
new file mode 100644
index 000000000..e798ce5cb
--- /dev/null
+++ b/cs/Markdown/Tests/TokenizerTests.cs
@@ -0,0 +1,126 @@
+using FluentAssertions;
+using Markdown.Tokenizing;
+using NUnit.Framework;
+
+namespace Markdown.Tests;
+
+public class TokenizerTests
+{
+ private Tokenizer tokenizer = null!;
+
+ [SetUp]
+ public void SetUp()
+ => tokenizer = new Tokenizer();
+
+ private static List Tokens(string text)
+ => Tokenizer.Tokenize(text);
+
+ private static List Types(string text)
+ => Tokenizer.Tokenize(text).Select(t => t.Type).ToList();
+
+
+ [TestCase("hello!")]
+ public void Tokenizer_ShouldProduceTextToken_WhenSimpleWord_Test(string input)
+ {
+ var tokens = Tokens(input);
+ tokens.Should().HaveCount(2);
+ tokens[0].Type.Should().Be(TokenType.Text);
+ tokens[0].Value.Should().Be(input);
+ }
+
+
+ [TestCase("a b")]
+ public void Tokenizer_ShouldProduceWhitespaceToken_WhenSpaceEncountered_Test(string input)
+ {
+ var tokens = Tokens(input);
+ tokens[1].Type.Should().Be(TokenType.Whitespace);
+ tokens[1].Value.Should().Be(" ");
+ }
+
+
+ [TestCase("a\tb")]
+ public void Tokenizer_ShouldProduceWhitespaceTokenWithTabValue_WhenTabEncountered_Test(string input)
+ {
+ var tokens = Tokens(input);
+ tokens[1].Type.Should().Be(TokenType.Whitespace);
+ tokens[1].Value.Should().Be("\t");
+ }
+
+
+ [TestCase("a\nb")]
+ public void Tokenizer_ShouldProduceEndOfLineToken_WhenNewlineEncountered_Test(string input)
+ {
+ var tokens = Tokens(input);
+ tokens[1].Type.Should().Be(TokenType.EndOfLine);
+ tokens[2].Type.Should().Be(TokenType.Text);
+ }
+
+
+ [TestCase("# hello!")]
+ public void Tokenizer_ShouldProduceHashToken_WhenHashEncountered_Test(string input)
+ {
+ var tokens = Tokens(input);
+ tokens[0].Type.Should().Be(TokenType.Hash);
+ tokens[1].Type.Should().Be(TokenType.Whitespace);
+ tokens[2].Type.Should().Be(TokenType.Text);
+ }
+
+
+ [TestCase("")]
+ [TestCase(" ")]
+ [TestCase("hello!")]
+ public void Tokenizer_ShouldPlaceEndOfFileTokenLast_WhenTokenizingAnyInput_Test(string input)
+ {
+ var tokens = Tokens(input);
+ tokens[^1].Type.Should().Be(TokenType.EndOfFile);
+ }
+
+
+ [Test]
+ public void Tokenizer_ShouldProduceUnderscoreToken_WhenSingleUnderscoreEncountered_Test()
+ {
+ var tokens = Tokens("_");
+ tokens[0].Type.Should().Be(TokenType.Underscore);
+ }
+
+
+ [Test]
+ public void Tokenizer_ShouldProduceDoubleUnderscoreToken_WhenTwoUnderscoresEncountered_Test()
+ {
+ var tokens = Tokens("__");
+ tokens[0].Type.Should().Be(TokenType.DoubleUnderscore);
+ }
+
+
+ [Test]
+ public void Tokenizer_ShouldSplitIntoDoubleAndSingleUnderscore_WhenThreeUnderscoresEncountered_Test()
+ {
+ Types("___").Should().Equal(
+ TokenType.DoubleUnderscore,
+ TokenType.Underscore,
+ TokenType.EndOfFile
+ );
+ }
+
+
+ [Test]
+ public void Tokenizer_ShouldProduceEscapeToken_WhenBackslashEncountered_Test()
+ {
+ var tokens = Tokens("\\_");
+ tokens[0].Type.Should().Be(TokenType.Escape);
+ tokens[1].Type.Should().Be(TokenType.Underscore);
+ }
+
+
+ [Test]
+ public void Tokenizer_ShouldRecognizeBracketAndParenTokens_WhenBracketCharactersEncountered_Test()
+ {
+ Types("[]()").Should().Equal(
+ TokenType.LeftBracket,
+ TokenType.RightBracket,
+ TokenType.LeftParen,
+ TokenType.RightParen,
+ TokenType.EndOfFile
+ );
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tokenizing/CharCursor.cs b/cs/Markdown/Tokenizing/CharCursor.cs
new file mode 100644
index 000000000..885cb6398
--- /dev/null
+++ b/cs/Markdown/Tokenizing/CharCursor.cs
@@ -0,0 +1,22 @@
+namespace Markdown.Tokenizing;
+
+public class CharCursor(string text)
+{
+ private int index;
+
+ public char Current => End ? '\0' : text[index];
+
+ public bool End => index >= text.Length;
+
+ public void MoveNext()
+ {
+ if (!End)
+ index++;
+ }
+
+ public bool MatchNext(char c)
+ {
+ var next = index + 1;
+ return (next < text.Length ? text[next] : '\0') == c;
+ }
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tokenizing/Token.cs b/cs/Markdown/Tokenizing/Token.cs
new file mode 100644
index 000000000..cd1437a07
--- /dev/null
+++ b/cs/Markdown/Tokenizing/Token.cs
@@ -0,0 +1,7 @@
+namespace Markdown.Tokenizing;
+
+public class Token(TokenType type, string value)
+{
+ public TokenType Type { get; } = type;
+ public string Value { get; } = value;
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tokenizing/TokenType.cs b/cs/Markdown/Tokenizing/TokenType.cs
new file mode 100644
index 000000000..5bb8a4b8a
--- /dev/null
+++ b/cs/Markdown/Tokenizing/TokenType.cs
@@ -0,0 +1,17 @@
+namespace Markdown.Tokenizing;
+
+public enum TokenType
+{
+ Text,
+ Underscore,
+ DoubleUnderscore,
+ Escape,
+ Hash,
+ Whitespace,
+ EndOfLine,
+ EndOfFile,
+ LeftBracket,
+ RightBracket,
+ LeftParen,
+ RightParen
+}
\ No newline at end of file
diff --git a/cs/Markdown/Tokenizing/Tokenizer.cs b/cs/Markdown/Tokenizing/Tokenizer.cs
new file mode 100644
index 000000000..3da6121e7
--- /dev/null
+++ b/cs/Markdown/Tokenizing/Tokenizer.cs
@@ -0,0 +1,112 @@
+using System.Text;
+
+namespace Markdown.Tokenizing;
+
+public class Tokenizer
+{
+ public static List Tokenize(string text)
+ {
+ ArgumentNullException.ThrowIfNull(text);
+
+ var cursor = new CharCursor(text);
+ var tokens = new List();
+
+ while (!cursor.End)
+ {
+ var c = cursor.Current;
+
+ switch (c)
+ {
+ case '\n':
+ tokens.Add(new Token(TokenType.EndOfLine, "\n"));
+ cursor.MoveNext();
+ break;
+
+ case ' ':
+ case '\t':
+ tokens.Add(new Token(TokenType.Whitespace, c.ToString()));
+ cursor.MoveNext();
+ break;
+
+ case '\\':
+ tokens.Add(new Token(TokenType.Escape, "\\"));
+ cursor.MoveNext();
+ break;
+
+ case '_':
+ if (cursor.MatchNext('_'))
+ {
+ tokens.Add(new Token(TokenType.DoubleUnderscore, "__"));
+ cursor.MoveNext();
+ cursor.MoveNext();
+ }
+ else
+ {
+ tokens.Add(new Token(TokenType.Underscore, "_"));
+ cursor.MoveNext();
+ }
+
+ break;
+
+ case '#':
+ tokens.Add(new Token(TokenType.Hash, "#"));
+ cursor.MoveNext();
+ break;
+
+ case '[':
+ tokens.Add(new Token(TokenType.LeftBracket, "["));
+ cursor.MoveNext();
+ break;
+
+ case ']':
+ tokens.Add(new Token(TokenType.RightBracket, "]"));
+ cursor.MoveNext();
+ break;
+
+ case '(':
+ tokens.Add(new Token(TokenType.LeftParen, "("));
+ cursor.MoveNext();
+ break;
+
+ case ')':
+ tokens.Add(new Token(TokenType.RightParen, ")"));
+ cursor.MoveNext();
+ break;
+
+ default:
+ ReadText(tokens, cursor);
+ break;
+ }
+ }
+
+ tokens.Add(new Token(TokenType.EndOfFile, ""));
+ return tokens;
+ }
+
+ private static void ReadText(List tokens, CharCursor cursor)
+ {
+ var sb = new StringBuilder();
+
+ while (!cursor.End && IsTextChar(cursor.Current))
+ {
+ sb.Append(cursor.Current);
+ cursor.MoveNext();
+ }
+
+ if (sb.Length > 0)
+ tokens.Add(new Token(TokenType.Text, sb.ToString()));
+ }
+
+ private static bool IsTextChar(char c)
+ {
+ return c != '_' &&
+ c != '\\' &&
+ c != '[' &&
+ c != ']' &&
+ c != '(' &&
+ c != ')' &&
+ c != '#' &&
+ c != '\n' &&
+ !char.IsWhiteSpace(c);
+ }
+}
\ No newline at end of file
diff --git a/cs/clean-code.sln b/cs/clean-code.sln
index 2206d54db..54d4f9b2c 100644
--- a/cs/clean-code.sln
+++ b/cs/clean-code.sln
@@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{D4A42664-A52F-41D9-A7A0-D0A978B404E5}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -27,5 +29,9 @@ Global
{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal