diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..b32d51966 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,16 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..40c83c3fb --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,15 @@ +namespace Markdown; + +using Tokenizing; +using Parsing; +using Rendering; + +public class Md +{ + public static string Render(string textToRender) + { + var tokens = Tokenizer.Tokenize(textToRender); + var document = Parser.Parse(tokens); + return HtmlRenderer.Render(document); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/EscapeHandler.cs b/cs/Markdown/Parsing/EscapeHandler.cs new file mode 100644 index 000000000..67d7cc1c5 --- /dev/null +++ b/cs/Markdown/Parsing/EscapeHandler.cs @@ -0,0 +1,38 @@ +using Markdown.Parsing.Nodes; +using Markdown.Tokenizing; + +namespace Markdown.Parsing; + +public static class EscapeHandler +{ + public static void HandleEscape(List children, ParserCursor cursor, IList tokens) + { + cursor.MoveNext(); // skip '\' + var next = cursor.Current; + + // Lone backslash — keep it + if (next.Type is TokenType.EndOfLine or TokenType.EndOfFile) + { + children.Add(new TextNode("\\")); + return; + } + + // Escaped backslash + if (next.Type is TokenType.Escape) + { + children.Add(new TextNode("\\")); + cursor.MoveNext(); + return; + } + + if (next.Type is TokenType.Text) + { + children.Add(new TextNode("\\" + next.Value)); + cursor.MoveNext(); + return; + } + + children.Add(new TextNode(next.Value)); + cursor.MoveNext(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/LinkHandler.cs b/cs/Markdown/Parsing/LinkHandler.cs new file mode 100644 index 000000000..4dbddf5cd --- /dev/null +++ b/cs/Markdown/Parsing/LinkHandler.cs @@ -0,0 +1,109 @@ +using System.Text; +using Markdown.Parsing.Nodes; +using Markdown.Tokenizing; + +namespace Markdown.Parsing; + +public static class LinkHandler +{ + public static void HandleLink(List children, ParserCursor cursor, IList tokens) + { + var link = TryParseLink(cursor, tokens); + if (link != null) children.Add(link); + else + { + children.Add(new TextNode("[")); + cursor.MoveNext(); + } + } + + private static LinkNode? TryParseLink(ParserCursor cursor, IList tokens) + { + var rightBracket = FindTokenBeforeEol(tokens, cursor.Index + 1, TokenType.RightBracket); + if (rightBracket == -1) return null; + + if (rightBracket + 1 >= tokens.Count || tokens[rightBracket + 1].Type != TokenType.LeftParen) return null; + + var rightParen = FindTokenBeforeEol(tokens, rightBracket + 2, TokenType.RightParen); + if (rightParen == -1) return null; + + var saved = cursor.Index; + cursor.MoveNext(); + + var innerText = HandleLinkText(cursor, cursor.Index, rightBracket); + + cursor.IndexJumpTo(rightBracket + 2); + + var urlBuilder = new StringBuilder(); + while (!cursor.End && cursor.Index < rightParen) + { + urlBuilder.Append(cursor.Current.Value); + cursor.MoveNext(); + } + + if (cursor.Current.Type != TokenType.RightParen) + { + cursor.IndexJumpTo(saved); + return null; + } + + cursor.MoveNext(); + return new LinkNode(innerText, urlBuilder.ToString()); + } + + private static List HandleLinkText( + ParserCursor cursor, int from, int to) + { + var saved = cursor.Index; + cursor.IndexJumpTo(from); + + var children = new List(); + + while (cursor.Index < to && !cursor.End) + { + var token = cursor.Current; + + if (token.Type is TokenType.Text or TokenType.Whitespace) + { + children.Add(new TextNode(token.Value)); + cursor.MoveNext(); + } + else if (token.Type == TokenType.Escape) + { + cursor.MoveNext(); + if (!cursor.End && + cursor.Current.Type != TokenType.EndOfLine && + cursor.Current.Type != TokenType.EndOfFile) + { + children.Add(new TextNode(cursor.Current.Value)); + cursor.MoveNext(); + } + else + { + children.Add(new TextNode("\\")); + } + } + else + { + children.Add(new TextNode(token.Value)); + cursor.MoveNext(); + } + } + + cursor.IndexJumpTo(saved); + return children; + } + + private static int FindTokenBeforeEol(IList tokens, int start, TokenType type) + { + for (var i = start; i < tokens.Count; i++) + { + var tok = tokens[i]; + if (tok.Type == type) return i; + if (tok.Type is TokenType.EndOfLine or TokenType.EndOfFile) + return -1; + } + + return -1; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/BlockTypeNode.cs b/cs/Markdown/Parsing/Nodes/BlockTypeNode.cs new file mode 100644 index 000000000..f3baaaebc --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/BlockTypeNode.cs @@ -0,0 +1,8 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public abstract class BlockTypeNode : INode +{ + public abstract void RenderHtml(StringBuilder sb); +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/EmNode.cs b/cs/Markdown/Parsing/Nodes/EmNode.cs new file mode 100644 index 000000000..4394cceff --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/EmNode.cs @@ -0,0 +1,16 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public class EmNode(List children) : InlineTypeNode +{ + public List Children { get; } = children; + + public override void RenderHtml(StringBuilder sb) + { + sb.Append(""); + foreach (var child in Children) + child.RenderHtml(sb); + sb.Append(""); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/EscapedNode.cs b/cs/Markdown/Parsing/Nodes/EscapedNode.cs new file mode 100644 index 000000000..9f05585f3 --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/EscapedNode.cs @@ -0,0 +1,14 @@ +using System.Text; +using Markdown.Rendering; + +namespace Markdown.Parsing.Nodes; + +public class EscapedNode(char escapedChar) : InlineTypeNode +{ + public char EscapedChar { get; } = escapedChar; + + public override void RenderHtml(StringBuilder sb) + { + sb.Append(HtmlRenderer.EscapeHtml(EscapedChar.ToString())); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/HeaderNode.cs b/cs/Markdown/Parsing/Nodes/HeaderNode.cs new file mode 100644 index 000000000..54ec87f6f --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/HeaderNode.cs @@ -0,0 +1,16 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public class HeaderNode(List inlines) : BlockTypeNode +{ + public List Inlines { get; } = inlines; + + public override void RenderHtml(StringBuilder sb) + { + sb.Append("

"); + foreach (var inline in Inlines) + inline.RenderHtml(sb); + sb.Append("

"); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/INode.cs b/cs/Markdown/Parsing/Nodes/INode.cs new file mode 100644 index 000000000..e2956c90c --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/INode.cs @@ -0,0 +1,8 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public interface INode +{ + void RenderHtml(StringBuilder sb); +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/InlineTypeNode.cs b/cs/Markdown/Parsing/Nodes/InlineTypeNode.cs new file mode 100644 index 000000000..c22911485 --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/InlineTypeNode.cs @@ -0,0 +1,8 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public abstract class InlineTypeNode : INode +{ + public abstract void RenderHtml(StringBuilder sb); +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/LinkNode.cs b/cs/Markdown/Parsing/Nodes/LinkNode.cs new file mode 100644 index 000000000..cb0757d40 --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/LinkNode.cs @@ -0,0 +1,19 @@ +using System.Text; +using Markdown.Rendering; + +namespace Markdown.Parsing.Nodes; + +public class LinkNode(List children, string url) : InlineTypeNode +{ + public List Children { get; } = children; + public string Url { get; } = url; + + public override void RenderHtml(StringBuilder sb) + { + var href = HtmlRenderer.EscapeHtml(Url); + sb.Append(""); + foreach (var child in Children) + child.RenderHtml(sb); + sb.Append(""); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/ParagraphNode.cs b/cs/Markdown/Parsing/Nodes/ParagraphNode.cs new file mode 100644 index 000000000..21bd7e350 --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/ParagraphNode.cs @@ -0,0 +1,16 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public class ParagraphNode(List inlines) : BlockTypeNode +{ + public List Inlines { get; } = inlines; + + public override void RenderHtml(StringBuilder sb) + { + sb.Append("

"); + foreach (var inline in Inlines) + inline.RenderHtml(sb); + sb.Append("

"); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/RootNode.cs b/cs/Markdown/Parsing/Nodes/RootNode.cs new file mode 100644 index 000000000..fa43369db --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/RootNode.cs @@ -0,0 +1,14 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public class RootNode(List blocks) : INode +{ + public List Blocks { get; } = blocks; + + public void RenderHtml(StringBuilder sb) + { + foreach (var block in Blocks) + block.RenderHtml(sb); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/StrongNode.cs b/cs/Markdown/Parsing/Nodes/StrongNode.cs new file mode 100644 index 000000000..b7743a22c --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/StrongNode.cs @@ -0,0 +1,16 @@ +using System.Text; + +namespace Markdown.Parsing.Nodes; + +public class StrongNode(List children) : InlineTypeNode +{ + public List Children { get; } = children; + + public override void RenderHtml(StringBuilder sb) + { + sb.Append(""); + foreach (var child in Children) + child.RenderHtml(sb); + sb.Append(""); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Nodes/TextNode.cs b/cs/Markdown/Parsing/Nodes/TextNode.cs new file mode 100644 index 000000000..c060798ed --- /dev/null +++ b/cs/Markdown/Parsing/Nodes/TextNode.cs @@ -0,0 +1,14 @@ +using System.Text; +using Markdown.Rendering; + +namespace Markdown.Parsing.Nodes; + +public class TextNode(string text) : InlineTypeNode +{ + public string Text { get; } = text; + + public override void RenderHtml(StringBuilder sb) + { + sb.Append(HtmlRenderer.EscapeHtml(Text)); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Parser.cs b/cs/Markdown/Parsing/Parser.cs new file mode 100644 index 000000000..0e6d5b9ee --- /dev/null +++ b/cs/Markdown/Parsing/Parser.cs @@ -0,0 +1,96 @@ +using Markdown.Tokenizing; +using Markdown.Parsing.Nodes; + +namespace Markdown.Parsing; + +public class Parser +{ + public static RootNode Parse(List tokens) + { + ArgumentNullException.ThrowIfNull(tokens); + + var cursor = new ParserCursor(tokens); + var blocks = new List(); + + while (!cursor.End) + { + if (cursor.Current.Type == TokenType.EndOfLine) + { + cursor.MoveNext(); + continue; + } + + blocks.Add(ParseBlock(cursor, tokens)); + } + + return new RootNode(blocks); + } + + private static BlockTypeNode ParseBlock(ParserCursor cursor, IList tokens) + { + if (cursor.Current.Type == TokenType.Hash && + cursor.Peek().Type == TokenType.Whitespace) + { + cursor.MoveNext(); + cursor.MoveNext(); + + var inlines = ParseInlinesInBlock(cursor, tokens); + if (cursor.Current.Type == TokenType.EndOfLine) + cursor.MoveNext(); + + return new HeaderNode(inlines); + } + + var parsedInlines = ParseInlinesInBlock(cursor, tokens); + if (cursor.Current.Type == TokenType.EndOfLine) cursor.MoveNext(); + + return new ParagraphNode(parsedInlines); + } + + private static List ParseInlinesInBlock(ParserCursor cursor, IList tokens) + { + var children = new List(); + var underscores = new List<(TokenType Type, int ChildrenIndex)>(); + + while (!cursor.End && + cursor.Current.Type != TokenType.EndOfLine && + cursor.Current.Type != TokenType.EndOfFile) + { + var token = cursor.Current; + + switch (token.Type) + { + case TokenType.Text: + case TokenType.Whitespace: + AddLiteral(children, token.Value, cursor); + break; + + case TokenType.Escape: + EscapeHandler.HandleEscape(children, cursor, tokens); + break; + + case TokenType.LeftBracket: + LinkHandler.HandleLink(children, cursor, tokens); + break; + + case TokenType.Underscore: + case TokenType.DoubleUnderscore: + UnderscoreHandler.HandleUnderscores(children, underscores, cursor, tokens); + break; + + default: + AddLiteral(children, token.Value, cursor); + break; + } + } + + UnderscoreHandler.InsertUnmatchedUnderscores(children, underscores); + return children; + } + + private static void AddLiteral(List children, string value, ParserCursor cursor) + { + children.Add(new TextNode(value)); + cursor.MoveNext(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/ParserCursor.cs b/cs/Markdown/Parsing/ParserCursor.cs new file mode 100644 index 000000000..a34eede7d --- /dev/null +++ b/cs/Markdown/Parsing/ParserCursor.cs @@ -0,0 +1,31 @@ +using Markdown.Tokenizing; + +namespace Markdown.Parsing; + +public class ParserCursor(IList tokens) +{ + private int index; + public bool End => index >= tokens.Count - 1; + public int Index => index; + public Token Current => !End ? tokens[index] : tokens[^1]; + + public Token Peek() + { + var nextIndex = index + 1; + return nextIndex < tokens.Count ? tokens[nextIndex] : tokens[^1]; + } + + public void MoveNext() + { + if (!End) index++; + } + + public void IndexJumpTo(int newIndex) + { + if (newIndex < 0) newIndex = 0; + + if (newIndex >= tokens.Count) newIndex = tokens.Count - 1; + + index = newIndex; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/UnderscoreHandler.cs b/cs/Markdown/Parsing/UnderscoreHandler.cs new file mode 100644 index 000000000..9f972ca5e --- /dev/null +++ b/cs/Markdown/Parsing/UnderscoreHandler.cs @@ -0,0 +1,214 @@ +using Markdown.Tokenizing; +using Markdown.Parsing.Nodes; + +namespace Markdown.Parsing; + +public static class UnderscoreHandler +{ + public static void HandleUnderscores( + List children, + List<(TokenType Type, int ChildrenIndex)> underscores, + ParserCursor cursor, + IList tokens) + { + var currentToken = cursor.Current; + var previousToken = tokens.ElementAtOrDefault(cursor.Index - 1); + var nextToken = tokens.ElementAtOrDefault(cursor.Index + 1); + + if (IsUnderscoreBetweenDigits(previousToken, nextToken)) + { + AddUnderscoreAsLiteral(children, currentToken.Value, cursor); + return; + } + + if (!IsTokenWhitespaceLike(previousToken)) + { + if (TryCloseExistingHighlight(underscores, currentToken, children, cursor)) + return; + } + + if (IsTokenWhitespaceLike(nextToken) || + (currentToken.Type == TokenType.DoubleUnderscore && DoesDoubleUnderscoreBreak(cursor, tokens))) + { + AddUnderscoreAsLiteral(children, currentToken.Value, cursor); + } + else + { + underscores.Add((currentToken.Type, children.Count)); + cursor.MoveNext(); + } + } + + public static void InsertUnmatchedUnderscores(List children, + List<(TokenType Type, int ChildrenIndex)> underscores) + { + for (var i = underscores.Count - 1; i >= 0; i--) + { + var (type, index) = underscores[i]; + var literal = type == TokenType.DoubleUnderscore ? "__" : "_"; + children.Insert(index, new TextNode(literal)); + } + } + + private static bool TryCloseExistingHighlight( + List<(TokenType Type, int ChildrenIndex)> underscores, + Token currentToken, + List children, + ParserCursor cursor) + { + var highlightingType = currentToken.Type; + + var openerIndex = FindMatchingOpenerIndex(underscores, highlightingType); + if (openerIndex < 0) return false; + + var openerUnderscore = underscores[openerIndex]; + var startIndex = openerUnderscore.ChildrenIndex; + var innerTokensCount = children.Count - startIndex; + + if (innerTokensCount == 0) return false; + + var innerTokens = children.GetRange(startIndex, innerTokensCount); + + if (!IsValidHighlighting(underscores, highlightingType, openerUnderscore, innerTokens)) return false; + + if (HasIntersection(underscores, openerUnderscore, highlightingType, children, openerIndex, out var innerIndex)) + { + InsertIntersection(children, underscores, openerUnderscore, underscores[innerIndex], openerIndex, innerIndex); + AddUnderscoreAsLiteral(children, currentToken.Value, cursor); + return true; + } + + CloseHighlight(children, underscores, openerIndex, highlightingType, innerTokens, startIndex); + cursor.MoveNext(); + return true; + } + + private static int FindMatchingOpenerIndex(List<(TokenType Type, int ChildrenIndex)> underscores, TokenType type) + { + for (var i = underscores.Count - 1; i >= 0; i--) + { + if (underscores[i].Type == type) return i; + } + + return -1; + } + + private static bool IsValidHighlighting( + List<(TokenType Type, int ChildrenIndex)> underscores, + TokenType highlightingType, + (TokenType Type, int ChildrenIndex) openerUnderscore, + List innerTokens) + { + if (highlightingType != TokenType.Underscore) return true; + + var highlightingIsInsideDoubleUnderscore = underscores.Any(underscore => + underscore.Type == TokenType.DoubleUnderscore && + underscore.ChildrenIndex < openerUnderscore.ChildrenIndex); + + var areThereAnyWhitespaces = innerTokens.Any(t => t is TextNode tn && tn.Text.Any(char.IsWhiteSpace)); + + return highlightingIsInsideDoubleUnderscore || !areThereAnyWhitespaces; + } + + private static bool HasIntersection( + List<(TokenType Type, int ChildrenIndex)> underscores, + (TokenType Type, int ChildrenIndex) opener, + TokenType highlightingType, + List children, + int openerIndex, + out int intersectionIndex) + { + for (var i = openerIndex + 1; i < underscores.Count; i++) + { + if (underscores[i].Type == highlightingType || + underscores[i].ChildrenIndex <= opener.ChildrenIndex || + underscores[i].ChildrenIndex >= children.Count) + continue; + + intersectionIndex = i; + return true; + } + + intersectionIndex = -1; + return false; + } + + private static void CloseHighlight( + List children, + List<(TokenType Type, int ChildrenIndex)> underscores, + int openerIndex, + TokenType highlightingType, + List innerTokens, + int startIndex) + { + children.RemoveRange(startIndex, innerTokens.Count); + + InlineTypeNode node = highlightingType == TokenType.DoubleUnderscore + ? new StrongNode(innerTokens) + : new EmNode(innerTokens); + + children.Add(node); + underscores.RemoveAt(openerIndex); + } + + private static void InsertIntersection( + List children, + List<(TokenType Type, int ChildrenIndex)> underscores, + (TokenType Type, int ChildrenIndex) opener, + (TokenType Type, int ChildrenIndex) inner, + int openerIndex, int innerIndex) + { + var innerLiteral = inner.Type == TokenType.DoubleUnderscore ? "__" : "_"; + var openerLiteral = opener.Type == TokenType.DoubleUnderscore ? "__" : "_"; + + children.Insert(inner.ChildrenIndex, new TextNode(innerLiteral)); + children.Insert(opener.ChildrenIndex, new TextNode(openerLiteral)); + + underscores.RemoveAt(innerIndex); + underscores.RemoveAt(openerIndex); + } + + private static void AddUnderscoreAsLiteral(List children, string value, ParserCursor cursor) + { + children.Add(new TextNode(value)); + cursor.MoveNext(); + } + + private static bool IsTokenWhitespaceLike(Token? token) + { + return token == null || + token.Type == TokenType.Whitespace || + token.Type == TokenType.EndOfLine || + token.Type == TokenType.EndOfFile; + } + + private static bool IsUnderscoreBetweenDigits(Token? previousToken, Token? nextToken) + { + var leftTokenIsDigit = previousToken is { Type: TokenType.Text, Value.Length: > 0 } && + char.IsDigit(previousToken.Value.Last()); + var rightTokenIsDigit = nextToken is { Type: TokenType.Text, Value.Length: > 0 } && + char.IsDigit(nextToken.Value.First()); + return leftTokenIsDigit && rightTokenIsDigit; + } + + private static bool DoesDoubleUnderscoreBreak(ParserCursor cursor, IList tokens) + { + var singleUnderscoresCount = 0; + + for (var i = cursor.Index + 1; i < tokens.Count; i++) + { + var currentToken = tokens[i]; + + if (currentToken.Type is TokenType.EndOfLine or TokenType.EndOfFile) return false; + if (currentToken.Type == TokenType.DoubleUnderscore) return (singleUnderscoresCount % 2) == 1; + if (currentToken.Type != TokenType.Underscore) continue; + + var previousToken = tokens.ElementAtOrDefault(i - 1); + if (previousToken is { Type: TokenType.Escape }) continue; + + singleUnderscoresCount++; + } + + return false; + } +} \ No newline at end of file diff --git a/cs/Markdown/Rendering/HtmlRenderer.cs b/cs/Markdown/Rendering/HtmlRenderer.cs new file mode 100644 index 000000000..ce4435bee --- /dev/null +++ b/cs/Markdown/Rendering/HtmlRenderer.cs @@ -0,0 +1,38 @@ +using System; +using System.Text; +using Markdown.Parsing.Nodes; + +namespace Markdown.Rendering; + +public static class HtmlRenderer +{ + public static string Render(RootNode document) + { + ArgumentNullException.ThrowIfNull(document); + + var sb = new StringBuilder(); + document.RenderHtml(sb); + return sb.ToString(); + } + + public static string EscapeHtml(string? text) + { + if (string.IsNullOrEmpty(text)) return string.Empty; + + var sb = new StringBuilder(text.Length); + foreach (var c in text) + { + switch (c) + { + case '&': sb.Append("&"); break; + case '<': sb.Append("<"); break; + case '>': sb.Append(">"); break; + case '"': sb.Append("""); break; + case '\'': sb.Append("'"); break; + default: sb.Append(c); break; + } + } + + return sb.ToString(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/HtmlRendererTests.cs b/cs/Markdown/Tests/HtmlRendererTests.cs new file mode 100644 index 000000000..ffe92a213 --- /dev/null +++ b/cs/Markdown/Tests/HtmlRendererTests.cs @@ -0,0 +1,191 @@ +using FluentAssertions; +using NUnit.Framework; +using Markdown.Rendering; +using Markdown.Parsing.Nodes; + +namespace Markdown.Tests; + +public class HtmlRendererTests +{ + private static string R(RootNode node) + => HtmlRenderer.Render(node); + + [Test] + public void Html_ShouldRenderEmptyDocument_Test() + { + var root = new RootNode([]); + R(root).Should().Be(""); + } + + [Test] + public void Html_ShouldRenderParagraph_WithText_Test() + { + var p = new ParagraphNode([new TextNode("hello")]); + + R(new RootNode([p])) + .Should().Be("

hello

"); + } + + [Test] + public void Html_ShouldRenderParagraph_WithMultipleInlines_Test() + { + var p = new ParagraphNode([ + new TextNode("hello "), + new EmNode([new TextNode("world")]), + new TextNode(" again") + ]); + + R(new RootNode([p])) + .Should().Be("

hello world again

"); + } + + [Test] + public void Html_ShouldRenderHeaderWithPlainText_Test() + { + var h = new HeaderNode([new TextNode("Title")]); + + R(new RootNode([h])) + .Should().Be("

Title

"); + } + + [Test] + public void Html_ShouldRenderHeaderWithInlineFormatting_Test() + { + var h = new HeaderNode([ + new TextNode("Hello "), + new StrongNode([new TextNode("strong ")]), + + new EmNode([new TextNode("em")]) + ]); + + R(new RootNode([h])) + .Should().Be("

Hello strong em

"); + } + + [Test] + public void Html_ShouldRenderEm_Test() + { + var p = new ParagraphNode([ + new TextNode("a "), + new EmNode([new TextNode("b")]), + new TextNode(" c") + ]); + + R(new RootNode([p])) + .Should().Be("

a b c

"); + } + + [Test] + public void Html_ShouldRenderStrong_Test() + { + var p = new ParagraphNode([ + new TextNode("a "), + new StrongNode([new TextNode("b")]), + new TextNode(" c") + ]); + + R(new RootNode([p])) + .Should().Be("

a b c

"); + } + + [Test] + public void Html_ShouldRenderNestedEmInsideStrong_Test() + { + var strong = new StrongNode([ + new TextNode("a "), + new EmNode([new TextNode("b")]), + new TextNode(" c") + ]); + + var p = new ParagraphNode([strong]); + + R(new RootNode([p])) + .Should().Be("

a b c

"); + } + + [Test] + public void Html_ShouldRenderEscapedChar_AsLiteral_Test() + { + var p = new ParagraphNode([ + new TextNode("x"), + new EscapedNode('_'), + new TextNode("y") + ]); + + R(new RootNode([p])) + .Should().Be("

x_y

"); + } + + [Test] + public void Html_ShouldEscapeHtmlInsideTextNodes_Test() + { + var p = new ParagraphNode([new TextNode(" & \"world\"")]); + + R(new RootNode(new() { p })) + .Should().Be("

<hello> & "world"

"); + } + + [Test] + public void Html_ShouldRenderLink_WithTextOnly_Test() + { + var link = new LinkNode( + [new TextNode("google")], + "https://google.com" + ); + + var p = new ParagraphNode([link]); + + R(new RootNode([p])) + .Should().Be("

google

"); + } + + [Test] + public void Html_ShouldRenderLink_WithInlineFormatting_Test() + { + var link = new LinkNode( + [ + new TextNode("a "), + new EmNode([new TextNode("b")]), + new TextNode(" c") + ], + "/x" + ); + + var p = new ParagraphNode([link]); + + R(new RootNode([p])) + .Should().Be("

a b c

"); + } + + [Test] + public void Html_ShouldRenderComplexNestedFormatting_Test() + { + var p = new ParagraphNode([ + new TextNode("start "), + new StrongNode([ + new TextNode("A "), + new EmNode([new TextNode("B")]), + new TextNode(" C") + ]), + + new TextNode(" end") + ]); + + R(new RootNode([p])) + .Should().Be("

start A B C end

"); + } + + [Test] + public void Html_ShouldEscapeUrlInHref_Test() + { + var link = new LinkNode( + [new TextNode("x")], + "https://example.com?a=1&b=<>&\"" + ); + + var p = new ParagraphNode([link]); + + R(new RootNode([p])) + .Should().Be("

x

"); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/MdProcessorTests.cs b/cs/Markdown/Tests/MdProcessorTests.cs new file mode 100644 index 000000000..27e06f815 --- /dev/null +++ b/cs/Markdown/Tests/MdProcessorTests.cs @@ -0,0 +1,415 @@ +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.Tests; + +public class MdTests +{ + private Md md = null!; + + [SetUp] + public void SetUp() + => md = new Md(); + + private string R(string markdown) + => Md.Render(markdown); + + [Test] + public void Md_ShouldRenderH1_WhenLineStartsWithHashAndSpace_Test() + { + R("# Hello world") + .Should().Be("

Hello world

"); + } + + [Test] + public void Md_ShouldNotRenderH1_WhenHashNotFollowedBySpace_Test() + { + R("#Hello") + .Should().Be("

#Hello

"); + } + + [Test] + public void Md_ShouldRenderParagraph_WhenTextWithoutHeader_Test() + { + R("hello") + .Should().Be("

hello

"); + } + + [Test] + public void Md_ShouldRenderTwoParagraphs_WhenSeparatedByEmptyLine_Test() + { + R("hello\n\nworld") + .Should().Be("

hello

world

"); + } + + [Test] + public void Md_ShouldRenderEm_WhenSingleUnderscorePair_Test() + { + R("a _b_ c") + .Should().Be("

a b c

"); + } + + [Test] + public void Md_ShouldNotRenderEm_WhenSpacesAroundOpening_Test() + { + R("a _ b_") + .Should().Be("

a _ b_

"); + } + + [Test] + public void Md_ShouldNotRenderEm_WhenSpacesBeforeClosing_Test() + { + R("a _b _") + .Should().Be("

a _b _

"); + } + + [Test] + public void Md_ShouldNotRenderEm_WhenUnderscoreInsideDigits_Test() + { + R("12_3") + .Should().Be("

12_3

"); + } + + [Test] + public void Md_ShouldRenderEm_WhenInsideWord_Test() + { + R("a_b_c") + .Should().Be("

abc

"); + } + + [Test] + public void Md_ShouldNotRenderEm_WhenBetweenWords_Test() + { + R("a _b c_") + .Should().Be("

a _b c_

"); + } + + [Test] + public void Md_ShouldNotRenderEm_WhenInsideDigitsExtended_Test() + { + R("12_34_56") + .Should().Be("

12_34_56

"); + } + + [Test] + public void Md_ShouldRenderStrong_WhenDoubleUnderscorePair_Test() + { + R("a __b__ c") + .Should().Be("

a b c

"); + } + + [Test] + public void Md_ShouldNotRenderStrong_WhenEmptyContent_Test() + { + R("____") + .Should().Be("

____

"); + } + + [Test] + public void Md_ShouldAllowStrongInsideStrong_WhenValid_Test() + { + R("__a __b__ c__") + .Should().Be("

a b c

"); + } + + [Test] + public void Md_ShouldRenderNestedEmInsideStrong_WhenValidStructure_Test() + { + R("__a _b_ c__") + .Should().Be("

a b c

"); + } + + [Test] + public void Md_ShouldRenderNestedEmInsideStrong_MultipleContent_Test() + { + R("__a _b c_ d__") + .Should().Be("

a b c d

"); + } + + [Test] + public void Md_ShouldNotRenderNested_WhenIntersectionInvalid_Test() + { + R("_a __b_ c__") + .Should().Be("

_a __b_ c__

"); + } + + [Test] + public void Md_ShouldNotRender_WhenDoubleSingleIntersectOpposite_Test() + { + R("__a _b__ c_") + .Should().Be("

__a _b__ c_

"); + } + + [Test] + public void Md_ShouldEscapeUnderscore_WhenBackslashBefore_Test() + { + R(@"\_a\_") + .Should().Be("

_a_

"); + } + + [Test] + public void Md_ShouldOutputBackslashLiteral_WhenEscapeWithoutNextChar_Test() + { + R(@"\") + .Should().Be("

\\

"); + } + + [Test] + public void Md_ShouldEscapeBackslash_WhenDoubleBackslash_Test() + { + R(@"\\") + .Should().Be("

\\

"); + } + + [Test] + public void Md_ShouldEscapeUnderscoreInsideWord_Test() + { + R("hel\\_lo") + .Should().Be("

hel_lo

"); + } + + [Test] + public void Md_ShouldEscapeDoubleUnderscore_Test() + { + R(@"\__a__") + .Should().Be("

__a__

"); + } + + [Test] + public void Md_ShouldEscapeUnderscore_WhenAtEnd_Test() + { + R("a \\_") + .Should().Be("

a _

"); + } + + [Test] + public void Md_ShouldHandleEscapeBeforeLetters_AsLiteral_Test() + { + R(@"\hello") + .Should().Be("

\\hello

"); + } + + [Test] + public void Md_ShouldRenderLink_WhenValidMarkdownLink_Test() + { + R("[google](https://google.com)") + .Should().Be("

google

"); + } + + [Test] + public void Md_ShouldNotRenderLink_WhenMissingRightBracket_Test() + { + R("[text(https://url)") + .Should().Be("

[text(https://url)

"); + } + + [Test] + public void Md_ShouldNotRenderLink_WhenMissingParenthesis_Test() + { + R("[text]url)") + .Should().Be("

[text]url)

"); + } + + [Test] + public void Md_ShouldRenderLinkInsideStrong_Test() + { + R("__hello [g](u)__") + .Should().Be("

hello g

"); + } + + [Test] + public void Md_ShouldHandleEscapeInsideLinkText_Test() + { + R("[go\\_og\\_le](url)") + .Should().Be("

go_og_le

"); + } + + [Test] + public void Md_ShouldRenderMixedStyles_WhenMultipleInlineTokens_Test() + { + R("Hello _world_ and __strong__ text") + .Should().Be("

Hello world and strong text

"); + } + + [Test] + public void Md_ShouldKeepUnderscoresLiteral_WhenUnmatched_Test() + { + R("_a") + .Should().Be("

_a

"); + } + + [Test] + public void Md_ShouldRenderEmptyString_AsEmptyString_Test() + { + R("").Should().Be(""); + } + + [Test] + public void Md_ShouldThrowException_WhenGettingNull_Test() + { + Action act = () => Md.Render(null); + act.Should().Throw(); + } + + [Test] + public void Md_ShouldKeepMultipleNewlines_AsParagraphBreaks_Test() + { + R("a\n\n\nb") + .Should().Be("

a

b

"); + } + + [Test] + public void Md_ShouldTreatHashInsideText_AsLiteral_Test() + { + R("a # b") + .Should().Be("

a # b

"); + } + + [Test] + public void Md_ShouldNotRenderLink_WhenNoClosingParenthesis_Test() + { + R("[text](url") + .Should().Be("

[text](url

"); + } + + [Test] + public void Md_ShouldNotRenderLink_WhenNoClosingBracket_Test() + { + R("[text(url)") + .Should().Be("

[text(url)

"); + } + + [Test] + public void Md_ShouldAllowTextBeforeLink_Test() + { + R("hello [x](y)") + .Should().Be("

hello x

"); + } + + [Test] + public void Md_ShouldAllowTextAfterLink_Test() + { + R("[x](y) world") + .Should().Be("

x world

"); + } + + [Test] + public void Md_ShouldNotBreak_WhenLinkInsideEscapedBrackets_Test() + { + R(@"\[link](u)") + .Should().Be("

[link](u)

"); + } + + [Test] + public void Md_ShouldNotRenderStrong_WhenFollowedByWhitespace_Test() + { + R("__ a__") + .Should().Be("

__ a__

"); + } + + [Test] + public void Md_ShouldNotRenderStrong_WhenEndingWithWhitespace_Test() + { + R("__a __") + .Should().Be("

__a __

"); + } + + [Test] + public void Md_ShouldRenderStrongOverMultipleInlines_Test() + { + R("__a [x](y) b__") + .Should().Be("

a x b

"); + } + + [Test] + public void Md_ShouldRenderEmInsideWord_MultipleTimes_Test() + { + R("a_b_c_d_e") + .Should().Be("

abcde

"); + } + + [Test] + public void Md_ShouldNotRenderEm_WhenSingleUnderscoreAtStartAndSpaceAfter_Test() + { + R("_ a_") + .Should().Be("

_ a_

"); + } + + [Test] + public void Md_ShouldHandleEscapedHash_AsLiteral_Test() + { + R(@"\# header") + .Should().Be("

# header

"); + } + + [Test] + public void Md_ShouldRenderHeader_WithInlineFormatting_Test() + { + R("# __a _b_ c__") + .Should().Be("

a b c

"); + } + + [Test] + public void Md_ShouldTreatSingleBackslashAtEnd_AsLiteral_Test() + { + R("abc\\") + .Should().Be("

abc\\

"); + } + + [Test] + public void Md_ShouldIgnoreUnmatchedOpeningBracket_Test() + { + R("text [abc") + .Should().Be("

text [abc

"); + } + + [Test] + public void Md_ShouldIgnoreUnmatchedClosingBracket_Test() + { + R("text ]abc") + .Should().Be("

text ]abc

"); + } + + [Test] + public void Md_ShouldHandleMultipleLinksInRow_Test() + { + R("[a](1)[b](2)[c](3)") + .Should().Be("

abc

"); + } + + [Test] + public void Md_ShouldHandleMultipleEscapesInRow_Test() + { + R("\\\\\\\\__a__") + .Should().Be("

\\\\a

"); + } + + [Test] + public void Md_ShouldNotRenderCrossingEmStrong_ComplexCase_Test() + { + R("_a __b c_ d__") + .Should().Be("

_a __b c_ d__

"); + } + + [Test] + public void Md_ShouldRenderLoneHash_AsLiteral_Test() + { + R("#") + .Should().Be("

#

"); + } + + [Test] + public void Md_ShouldRenderSpaceThenHash_AsParagraph_Test() + { + R(" # heading") + .Should().Be("

# heading

"); + } + + [Test] + public void Md_ShouldNotBreakOnVeryLongGarbage_Test() + { + var s = new string('[', 2000) + new string(')', 2000); + R(s) + .Should().Be("

" + s + "

"); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/PerformanceTests.cs b/cs/Markdown/Tests/PerformanceTests.cs new file mode 100644 index 000000000..7b3595d88 --- /dev/null +++ b/cs/Markdown/Tests/PerformanceTests.cs @@ -0,0 +1,114 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class PerformanceTests +{ + private Md md = null!; + + [SetUp] + public void SetUp() + => md = new Md(); + + [Test] + public void Renderer_ShouldRenderLargeTextFast_Test() + { + var input = new string('a', 1_000_000); + + var sw = Stopwatch.StartNew(); + Md.Render(input); + sw.Stop(); + + sw.ElapsedMilliseconds.Should().BeLessThan(50); + } + + [Test] + public void Renderer_ShouldRenderManyUnderscoresFast_Test() + { + var input = new string('_', 20_000); + + var sw = Stopwatch.StartNew(); + Md.Render(input); + sw.Stop(); + + sw.ElapsedMilliseconds.Should().BeLessThan(20); + } + + [Test] + public void Renderer_ShouldRenderManyLinksFast_Test() + { + var sb = new StringBuilder(); + for (var i = 0; i < 5000; i++) + sb.Append("[g](u) "); + + var input = sb.ToString(); + + var sw = Stopwatch.StartNew(); + Md.Render(input); + sw.Stop(); + + sw.ElapsedMilliseconds.Should().BeLessThan(40); + } + + [Test] + public void Renderer_ShouldRenderManyEscapesFast_Test() + { + var sb = new StringBuilder(); + for (var i = 0; i < 30_000; i++) + sb.Append("\\_"); + + var input = sb.ToString(); + + var sw = Stopwatch.StartNew(); + Md.Render(input); + sw.Stop(); + + sw.ElapsedMilliseconds.Should().BeLessThan(30); + } + + [Test] + public void Renderer_ShouldScale_ApproximatelyLinearly_Test() + { + var chunk = @"__a _b c_ d__ [x](y) \\text "; + + var inputSmall = string.Concat(Enumerable.Repeat(chunk, 2000)); + var inputLarge = string.Concat(Enumerable.Repeat(chunk, 4000)); + + WarmupRenderer(md); + + var tSmall = MeasureAverageTicks(() => Md.Render(inputSmall)); + var tLarge = MeasureAverageTicks(() => Md.Render(inputLarge)); + + var ratio = (double)tLarge / tSmall; + + TestContext.WriteLine($"tSmall={tSmall}, tLarge={tLarge}, ratio={ratio}"); + + ratio.Should().BeInRange(1.4, 2.7); + } + + private static long MeasureAverageTicks(Action action) + { + var sw = new Stopwatch(); + var sum = 0L; + + for (var i = 0; i < 5; i++) + { + sw.Restart(); + action(); + sw.Stop(); + sum += sw.ElapsedTicks; + } + + return sum / 5; + } + + private static void WarmupRenderer(Md md) + { + for (var i = 0; i < 10; i++) + Md.Render("_x_"); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests.cs b/cs/Markdown/Tests/TokenizerTests.cs new file mode 100644 index 000000000..e798ce5cb --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests.cs @@ -0,0 +1,126 @@ +using FluentAssertions; +using Markdown.Tokenizing; +using NUnit.Framework; + +namespace Markdown.Tests; + +public class TokenizerTests +{ + private Tokenizer tokenizer = null!; + + [SetUp] + public void SetUp() + => tokenizer = new Tokenizer(); + + private static List Tokens(string text) + => Tokenizer.Tokenize(text); + + private static List Types(string text) + => Tokenizer.Tokenize(text).Select(t => t.Type).ToList(); + + + [TestCase("hello!")] + public void Tokenizer_ShouldProduceTextToken_WhenSimpleWord_Test(string input) + { + var tokens = Tokens(input); + tokens.Should().HaveCount(2); + tokens[0].Type.Should().Be(TokenType.Text); + tokens[0].Value.Should().Be(input); + } + + + [TestCase("a b")] + public void Tokenizer_ShouldProduceWhitespaceToken_WhenSpaceEncountered_Test(string input) + { + var tokens = Tokens(input); + tokens[1].Type.Should().Be(TokenType.Whitespace); + tokens[1].Value.Should().Be(" "); + } + + + [TestCase("a\tb")] + public void Tokenizer_ShouldProduceWhitespaceTokenWithTabValue_WhenTabEncountered_Test(string input) + { + var tokens = Tokens(input); + tokens[1].Type.Should().Be(TokenType.Whitespace); + tokens[1].Value.Should().Be("\t"); + } + + + [TestCase("a\nb")] + public void Tokenizer_ShouldProduceEndOfLineToken_WhenNewlineEncountered_Test(string input) + { + var tokens = Tokens(input); + tokens[1].Type.Should().Be(TokenType.EndOfLine); + tokens[2].Type.Should().Be(TokenType.Text); + } + + + [TestCase("# hello!")] + public void Tokenizer_ShouldProduceHashToken_WhenHashEncountered_Test(string input) + { + var tokens = Tokens(input); + tokens[0].Type.Should().Be(TokenType.Hash); + tokens[1].Type.Should().Be(TokenType.Whitespace); + tokens[2].Type.Should().Be(TokenType.Text); + } + + + [TestCase("")] + [TestCase(" ")] + [TestCase("hello!")] + public void Tokenizer_ShouldPlaceEndOfFileTokenLast_WhenTokenizingAnyInput_Test(string input) + { + var tokens = Tokens(input); + tokens[^1].Type.Should().Be(TokenType.EndOfFile); + } + + + [Test] + public void Tokenizer_ShouldProduceUnderscoreToken_WhenSingleUnderscoreEncountered_Test() + { + var tokens = Tokens("_"); + tokens[0].Type.Should().Be(TokenType.Underscore); + } + + + [Test] + public void Tokenizer_ShouldProduceDoubleUnderscoreToken_WhenTwoUnderscoresEncountered_Test() + { + var tokens = Tokens("__"); + tokens[0].Type.Should().Be(TokenType.DoubleUnderscore); + } + + + [Test] + public void Tokenizer_ShouldSplitIntoDoubleAndSingleUnderscore_WhenThreeUnderscoresEncountered_Test() + { + Types("___").Should().Equal( + TokenType.DoubleUnderscore, + TokenType.Underscore, + TokenType.EndOfFile + ); + } + + + [Test] + public void Tokenizer_ShouldProduceEscapeToken_WhenBackslashEncountered_Test() + { + var tokens = Tokens("\\_"); + tokens[0].Type.Should().Be(TokenType.Escape); + tokens[1].Type.Should().Be(TokenType.Underscore); + } + + + [Test] + public void Tokenizer_ShouldRecognizeBracketAndParenTokens_WhenBracketCharactersEncountered_Test() + { + Types("[]()").Should().Equal( + TokenType.LeftBracket, + TokenType.RightBracket, + TokenType.LeftParen, + TokenType.RightParen, + TokenType.EndOfFile + ); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizing/CharCursor.cs b/cs/Markdown/Tokenizing/CharCursor.cs new file mode 100644 index 000000000..885cb6398 --- /dev/null +++ b/cs/Markdown/Tokenizing/CharCursor.cs @@ -0,0 +1,22 @@ +namespace Markdown.Tokenizing; + +public class CharCursor(string text) +{ + private int index; + + public char Current => End ? '\0' : text[index]; + + public bool End => index >= text.Length; + + public void MoveNext() + { + if (!End) + index++; + } + + public bool MatchNext(char c) + { + var next = index + 1; + return (next < text.Length ? text[next] : '\0') == c; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizing/Token.cs b/cs/Markdown/Tokenizing/Token.cs new file mode 100644 index 000000000..cd1437a07 --- /dev/null +++ b/cs/Markdown/Tokenizing/Token.cs @@ -0,0 +1,7 @@ +namespace Markdown.Tokenizing; + +public class Token(TokenType type, string value) +{ + public TokenType Type { get; } = type; + public string Value { get; } = value; +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizing/TokenType.cs b/cs/Markdown/Tokenizing/TokenType.cs new file mode 100644 index 000000000..5bb8a4b8a --- /dev/null +++ b/cs/Markdown/Tokenizing/TokenType.cs @@ -0,0 +1,17 @@ +namespace Markdown.Tokenizing; + +public enum TokenType +{ + Text, + Underscore, + DoubleUnderscore, + Escape, + Hash, + Whitespace, + EndOfLine, + EndOfFile, + LeftBracket, + RightBracket, + LeftParen, + RightParen +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizing/Tokenizer.cs b/cs/Markdown/Tokenizing/Tokenizer.cs new file mode 100644 index 000000000..3da6121e7 --- /dev/null +++ b/cs/Markdown/Tokenizing/Tokenizer.cs @@ -0,0 +1,112 @@ +using System.Text; + +namespace Markdown.Tokenizing; + +public class Tokenizer +{ + public static List Tokenize(string text) + { + ArgumentNullException.ThrowIfNull(text); + + var cursor = new CharCursor(text); + var tokens = new List(); + + while (!cursor.End) + { + var c = cursor.Current; + + switch (c) + { + case '\n': + tokens.Add(new Token(TokenType.EndOfLine, "\n")); + cursor.MoveNext(); + break; + + case ' ': + case '\t': + tokens.Add(new Token(TokenType.Whitespace, c.ToString())); + cursor.MoveNext(); + break; + + case '\\': + tokens.Add(new Token(TokenType.Escape, "\\")); + cursor.MoveNext(); + break; + + case '_': + if (cursor.MatchNext('_')) + { + tokens.Add(new Token(TokenType.DoubleUnderscore, "__")); + cursor.MoveNext(); + cursor.MoveNext(); + } + else + { + tokens.Add(new Token(TokenType.Underscore, "_")); + cursor.MoveNext(); + } + + break; + + case '#': + tokens.Add(new Token(TokenType.Hash, "#")); + cursor.MoveNext(); + break; + + case '[': + tokens.Add(new Token(TokenType.LeftBracket, "[")); + cursor.MoveNext(); + break; + + case ']': + tokens.Add(new Token(TokenType.RightBracket, "]")); + cursor.MoveNext(); + break; + + case '(': + tokens.Add(new Token(TokenType.LeftParen, "(")); + cursor.MoveNext(); + break; + + case ')': + tokens.Add(new Token(TokenType.RightParen, ")")); + cursor.MoveNext(); + break; + + default: + ReadText(tokens, cursor); + break; + } + } + + tokens.Add(new Token(TokenType.EndOfFile, "")); + return tokens; + } + + private static void ReadText(List tokens, CharCursor cursor) + { + var sb = new StringBuilder(); + + while (!cursor.End && IsTextChar(cursor.Current)) + { + sb.Append(cursor.Current); + cursor.MoveNext(); + } + + if (sb.Length > 0) + tokens.Add(new Token(TokenType.Text, sb.ToString())); + } + + private static bool IsTextChar(char c) + { + return c != '_' && + c != '\\' && + c != '[' && + c != ']' && + c != '(' && + c != ')' && + c != '#' && + c != '\n' && + !char.IsWhiteSpace(c); + } +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..54d4f9b2c 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{D4A42664-A52F-41D9-A7A0-D0A978B404E5}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D4A42664-A52F-41D9-A7A0-D0A978B404E5}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal