diff --git a/cs/Markdown/Interfaces/IParser.cs b/cs/Markdown/Interfaces/IParser.cs new file mode 100644 index 000000000..5c2ebacf2 --- /dev/null +++ b/cs/Markdown/Interfaces/IParser.cs @@ -0,0 +1,6 @@ +namespace Markdown.Interfaces; + +public interface IParser +{ + IEnumerable Parse(string markdown); +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IRender.cs b/cs/Markdown/Interfaces/IRender.cs new file mode 100644 index 000000000..6f3d66413 --- /dev/null +++ b/cs/Markdown/Interfaces/IRender.cs @@ -0,0 +1,6 @@ +namespace Markdown.Interfaces; + +public interface IRender +{ + string Render(IEnumerable tokens); +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/ITokenHandler.cs b/cs/Markdown/Interfaces/ITokenHandler.cs new file mode 100644 index 000000000..fb6642cf2 --- /dev/null +++ b/cs/Markdown/Interfaces/ITokenHandler.cs @@ -0,0 +1,9 @@ +using Markdown.Parsers; + +namespace Markdown.Interfaces; + +public interface ITokenHandler +{ + bool CanHandle(char currentChar, char next, ParserContext context); + public void Handle(ParserContext context); +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/ITokenRender.cs b/cs/Markdown/Interfaces/ITokenRender.cs new file mode 100644 index 000000000..0a0dead1e --- /dev/null +++ b/cs/Markdown/Interfaces/ITokenRender.cs @@ -0,0 +1,9 @@ +global using Markdown.Tokens; +using System.Text; + +namespace Markdown.Interfaces; + +public interface ITokenRender +{ + void Render(Token token, StringBuilder result); +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..360133917 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,15 @@ + + + + net9.0 + enable + enable + + + + + + + + + diff --git a/cs/Markdown/MarkdownTest/MarkdownParserTest.cs b/cs/Markdown/MarkdownTest/MarkdownParserTest.cs new file mode 100644 index 000000000..2114dfecd --- /dev/null +++ b/cs/Markdown/MarkdownTest/MarkdownParserTest.cs @@ -0,0 +1,441 @@ +using FluentAssertions; +using Markdown.Handlers; +using Markdown.Interfaces; +using Markdown.Parsers; +using NUnit.Framework; +using System.Diagnostics; + +namespace Markdown.MarkdownTest; + +[TestFixture] +public class MarkdownParserTests +{ + private MarkdownParser parser; + + [SetUp] + public void Setup() + { + parser = new MarkdownParser(new List + { + new EscapeHandler(), + new HeaderHandler(), + new StrongHandler(), + new ItalicHandler(), + new NewLineHandler(), + }); + } + + [Test] + public void Parse_WhenItalicTag_ShouldReturnCorrectTokens() + { + var input = "Это _курсив_ текст"; + var expected = new List + { + new(TokenType.Text, "Это "), + new(TokenType.Italics, + children: new List { new(TokenType.Text, "курсив") }), + new(TokenType.Text, " текст") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenStrongTag_ShouldReturnCorrectTokens() + { + var input = "Это __полужирный__ текст"; + var expected = new List + { + new(TokenType.Text, "Это "), + new(TokenType.Strong, + children: new List + { new(TokenType.Text, "полужирный") }), + new(TokenType.Text, " текст") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenHeaderTag_ShouldReturnCorrectTokens() + { + var input = "# Заголовок"; + var expected = new List + { + new(TokenType.Header, + children: new List { new(TokenType.Text, "Заголовок") }) + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenNestedItalicAndStrongTags_ShouldReturnCorrectTokens() + { + var input = "Это __жирный _и курсивный_ текст__"; + var expected = new List + { + new(TokenType.Text, "Это "), + new(TokenType.Strong, children: new List + { + new(TokenType.Text, "жирный "), + new(TokenType.Italics, + children: new List + { new(TokenType.Text, "и курсивный") }), + new(TokenType.Text, " текст") + }) + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenMultipleTokensInLine_ShouldReturnCorrectTokens() + { + var input = "Это _курсив_,а это __жирный__ текст."; + var expected = new List + { + new(TokenType.Text, "Это "), + new(TokenType.Italics, + children: new List { new(TokenType.Text, "курсив") }), + new(TokenType.Text, ",а это "), + new(TokenType.Strong, + children: new List { new(TokenType.Text, "жирный") }), + new(TokenType.Text, " текст.") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenBoundedTagsInOneWord_ShouldReturnCorrectTokens() + { + var input = "en_d._ ,mi__dd__le"; + var expected = new List + { + new(TokenType.Text, "en"), + new(TokenType.Italics, + children: new List { new(TokenType.Text, "d.") }), + new(TokenType.Text, " ,mi"), + new(TokenType.Strong, + children: new List { new(TokenType.Text, "dd") }), + new(TokenType.Text, "le") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenEscapedTags_ShouldReturnPlainText() + { + var input = @"Экранированный \_символ\_"; + var expected = new List + { new(TokenType.Text, "Экранированный _символ_") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenItalicInStrong_ShouldReturnCorrectTokens() + { + var input = "Это __двойное _и одинарное_ выделение__"; + var expected = new List + { + new(TokenType.Text, "Это "), + new(TokenType.Strong, children: new List + { + new(TokenType.Text, "двойное "), + new(TokenType.Italics, + children: new List + { new(TokenType.Text, "и одинарное") }), + new(TokenType.Text, " выделение") + }) + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenHeaderWithTags_ShouldReturnCorrectTokens() + { + var input = "# Заголовок __с _разными_ символами__"; + var expected = new List + { + new(TokenType.Header, children: new List + { + new(TokenType.Text, "Заголовок "), + new(TokenType.Strong, children: new List + { + new(TokenType.Text, "с "), + new(TokenType.Italics, + children: new List + { new(TokenType.Text, "разными") }), + new(TokenType.Text, " символами") + }) + }) + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenHeaderWithoutSpace_ShouldNotBeHeader() + { + var input = "#Заголовок без пробела"; + var expected = new List + { new(TokenType.Text, "#Заголовок без пробела") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + + [Test] + public void Parse_WhenMultipleHeaders_ShouldReturnCorrectTokens() + { + var input = "# Заголовок 1\n# Заголовок 2"; + var expected = new List + { + new(TokenType.Header, + children: new List + { new(TokenType.Text, "Заголовок 1") }), + new(TokenType.Text, "\n"), + new(TokenType.Header, + children: new List + { new(TokenType.Text, "Заголовок 2") }) + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenEmptyItalic_ShouldNotReturnTags() + { + var input = "Если пустая _______ строка"; + var expected = new List + { new(TokenType.Text, "Если пустая _______ строка") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenUnderscoresInNumbers_ShouldNotReturnTags() + { + var input = "Текст с цифрами_12_3 не должен выделяться"; + var expected = new List + { + new(TokenType.Text, "Текст с цифрами_12_3 не должен выделяться") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenEscapingSymbols_ShouldNotReturnTags() + { + var input = @"Здесь сим\волы экранирования\ \должны остаться.\"; + var expected = new List + { + new(TokenType.Text, + @"Здесь сим\волы экранирования\ \должны остаться.\") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenEscapedEscapeCharacter_ShouldReturnCorrectTokens() + { + var input = @"\\_вот это будет выделено тегом_"; + var expected = new List + { + new(TokenType.Text, @"\"), + new(TokenType.Italics, + children: new List + { new(TokenType.Text, "вот это будет выделено тегом") }) + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenTagInDifferentWords_ShouldNotReturnTags() + { + var input = "Это пер_вый в_торой пример."; + var expected = new List + { new(TokenType.Text, "Это пер_вый в_торой пример.") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenUnclosedTags_ShouldNotReturnTags() + { + var input = "_e __e"; + var expected = new List { new(TokenType.Text, "_e __e") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenTagsIntersection_ShouldNotReturnTags() + { + var input = "__пересечение _двойных__ и одинарных_"; + var expected = new List + { new(TokenType.Text, "__пересечение _двойных__ и одинарных_") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenTagsIntersectionWithNewLines_ShouldNotReturnTags() + { + var input = "__s \n s__,_e \r\n e_"; + var expected = new List + { new(TokenType.Text, "__s \n s__,_e \r\n e_") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenUnpairedUnderscores_ShouldNotReturnTags() + { + var input = "__Непарные_ символы не считаются выделением"; + var expected = new List + { + new(TokenType.Text, "__Непарные_ символы не считаются выделением") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + + [Test] + public void Parse_WhenItalicInsideStrong_ShouldReturnCorrectTokens() + { + var input = "Внутри __двойного выделения _одинарное_ тоже__ работает."; + var expected = new List + { + new(TokenType.Text, "Внутри "), + new(TokenType.Strong, children: new List + { + new(TokenType.Text, "двойного выделения "), + new(TokenType.Italics, + children: new List + { new(TokenType.Text, "одинарное") }), + new(TokenType.Text, " тоже") + }), + new(TokenType.Text, " работает.") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenStrongInsideItalic_ShouldNotReturnNestedStrong() + { + var input = + "Но не наоборот — внутри _одинарного __двойное__ не_ работает."; + var expected = new List + { + new(TokenType.Text, "Но не наоборот — внутри "), + new(TokenType.Italics, + children: new List + { new(TokenType.Text, "одинарного __двойное__ не") }), + new(TokenType.Text, " работает.") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenSpaceAfterOpeningUnderscore_ShouldNotReturnTags() + { + var input = + "За подчерками, начинающими выделение, должен следовать непробельный символ. Иначе эти_ подчерки_ не считаются выделением"; + var expected = new List { new(TokenType.Text, input) }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenSpaceBeforeClosingUnderscore_ShouldNotReturnTags() + { + var input = + "Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти _подчерки _не считаются_ окончанием выделения"; + var expected = new List + { + new(TokenType.Text, + "Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти _подчерки "), + new(TokenType.Italics, + children: new List + { new(TokenType.Text, "не считаются") }), + new(TokenType.Text, " окончанием выделения") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenEmptyUnderscores_ShouldNotReturnTags() + { + var input = "____"; + var expected = new List { new(TokenType.Text, "____") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenPlainText_ShouldReturnSingleTextToken() + { + var input = "Обычный текст без разметки"; + var expected = new List + { new(TokenType.Text, "Обычный текст без разметки") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenNewLine_ShouldReturnTextTokenWithNewLine() + { + var input = "Текст с\nпереносом строки"; + var expected = new List + { new(TokenType.Text, "Текст с\nпереносом строки") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void + Parse_WhenEscapedEscapeCharacterInText_ShouldReturnCorrectTokens() + { + var input = @"Текст с\\\\экранированием"; + var expected = new List + { new(TokenType.Text, @"Текст с\\экранированием") }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + [Test] + public void Parse_WhenItalicAtWordBeginning_ShouldReturnCorrectTokens() + { + var input = "_нач_ало"; + var expected = new List + { + new(TokenType.Italics, + children: new List { new(TokenType.Text, "нач") }), + new(TokenType.Text, "ало") + }; + CompareTokens(expected, parser.Parse(input).ToList()); + } + + + [Test] + public void Parse_Performance_ShouldBeLinear() + { + var largeText = string.Join("", + Enumerable.Repeat(" _курсивом_ и __жирным__ и # Заголовок", 1000)); + + var stopwatch = Stopwatch.StartNew(); + var tokens = parser.Parse(largeText).ToList(); + stopwatch.Stop(); + + tokens.Should().NotBeEmpty(); + stopwatch.ElapsedMilliseconds.Should().BeLessThan(1000, + "Парсинг должен выполняться за линейное время"); + } + + private static void CompareTokens(IReadOnlyList expected, + IReadOnlyList actual) + { + actual.Should().HaveCount(expected.Count); + + for (var i = 0; i < expected.Count; i++) + { + var expectedToken = expected[i]; + var actualToken = actual[i]; + + actualToken.Type.Should().Be(expectedToken.Type); + actualToken.Content.Should().Be(expectedToken.Content); + + if (expectedToken.Children.Any()) + { + actualToken.Children.Should().NotBeNull(); + CompareTokens(expectedToken.Children, actualToken.Children); + } + else + actualToken.Children.Should().BeEmpty(); + } + } +} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..25bc83c6f --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,37 @@ +using Markdown.Handlers; +using Markdown.Interfaces; +using Markdown.Parsers; +using Markdown.Renders; +using Markdown.TokenHandlers; + +namespace Markdown; + +public class Md +{ + private static readonly IParser parser; + private static readonly IRender renderer; + + static Md() + { + var handlers = new List + { + new EscapeHandler(), + new HeaderHandler(), + new StrongHandler(), + new ItalicHandler(), + new NewLineHandler(), + }; + + parser = new MarkdownParser(handlers); + renderer = new HtmlRenderer(); + } + + public string Render(string markdown) + { + if (string.IsNullOrEmpty(markdown)) + return string.Empty; + + var tokens = parser.Parse(markdown); + return renderer.Render(tokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/MarkdownParser.cs b/cs/Markdown/Parsers/MarkdownParser.cs new file mode 100644 index 000000000..f380aa1ad --- /dev/null +++ b/cs/Markdown/Parsers/MarkdownParser.cs @@ -0,0 +1,63 @@ +using Markdown.Handlers; +using Markdown.Interfaces; +using Markdown.TokenHandlers; +using Markdown.Tokens; + +namespace Markdown.Parsers; + +public class MarkdownParser : IParser +{ + private readonly List handlers; + + public MarkdownParser(List tokenHandlers) + { + this.handlers = tokenHandlers; + } + + public IEnumerable Parse(string markdownText) + { + ArgumentNullException.ThrowIfNull(markdownText); + + var context = new ParserContext + { + MarkdownText = markdownText, + Parser = this + }; + + while (context.CurrentIndex < context.MarkdownText.Length) + { + var current = context.MarkdownText[context.CurrentIndex]; + var next = context.CurrentIndex + 1 < context.MarkdownText.Length + ? context.MarkdownText[context.CurrentIndex + 1] + : '\0'; + + var handler = + handlers.FirstOrDefault(h => + h.CanHandle(current, next, context)); + if (handler != null) + { + handler.Handle(context); + } + else + { + context.Buffer.Append(current); + context.CurrentIndex++; + } + } + + AddToken(context, TokenType.Text); + return context.Tokens; + } + + public static void AddToken(ParserContext context, TokenType type) + { + if (context.Buffer.Length == 0) return; + var token = new Token(type, context.Buffer.ToString()); + context.Buffer.Clear(); + + if (context.Stack.Count > 0) + context.Stack.Peek().Children.Add(token); + else + context.Tokens.Add(token); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/ParserContext.cs b/cs/Markdown/Parsers/ParserContext.cs new file mode 100644 index 000000000..c7df7f0fc --- /dev/null +++ b/cs/Markdown/Parsers/ParserContext.cs @@ -0,0 +1,15 @@ +using System.Text; +using Markdown.Interfaces; + +namespace Markdown.Parsers; + +public class ParserContext +{ + public Stack Stack { get; } = new(); + public StringBuilder Buffer { get; } = new(); + public List Tokens { get; } = []; + public List IntersectedIndexes { get; } = []; + public string MarkdownText { get; init; } = ""; + public int CurrentIndex { get; set; } + public required IParser Parser { get; init; } +} \ No newline at end of file diff --git a/cs/Markdown/Renders/HTMLRender.cs b/cs/Markdown/Renders/HTMLRender.cs new file mode 100644 index 000000000..cee53302e --- /dev/null +++ b/cs/Markdown/Renders/HTMLRender.cs @@ -0,0 +1,13 @@ +using System.Text; +using Markdown.Interfaces; +using Markdown.TokenRenders; + +namespace Markdown.Renders; + +public class HtmlRenderer : IRender +{ + public string Render(IEnumerable tokens) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenHandlers/BoundaryTokenHandler.cs b/cs/Markdown/TokenHandlers/BoundaryTokenHandler.cs new file mode 100644 index 000000000..61252f5e3 --- /dev/null +++ b/cs/Markdown/TokenHandlers/BoundaryTokenHandler.cs @@ -0,0 +1,149 @@ + + +using Markdown.Interfaces; +using Markdown.Parsers; +using Markdown.Tokens; + +namespace Markdown.TokenHandlers; + +public abstract class BoundaryTokenHandler : ITokenHandler +{ + protected abstract string Delimiter { get; } + protected abstract TokenType TokenType { get; } + + public abstract bool CanHandle(char current, char next, ParserContext context); + + public void Handle(ParserContext context) + { + if (IsValidBoundary(context) && HasValidNesting(context)) + HandleTokenBoundary(context); + else + { + context.Buffer.Append(Delimiter); + context.CurrentIndex += Delimiter.Length; + } + } + + protected abstract bool HasValidNesting(ParserContext context); + private bool IsValidBoundary(ParserContext context) + { + var index = context.CurrentIndex; + var text = context.MarkdownText; + var nextDelimiter = TokenType == TokenType.Strong ? "_" : "__"; + if (context.IntersectedIndexes.Contains(index)) + return false; + + if (context.Stack.Count > 0) + { + if (context.Stack.Peek().Type == TokenType.Italics && TokenType == TokenType.Strong) + return false; + + if (context.Stack.Count > 2) + return false; + + if (context.Buffer.Length == 0) + return false; + + if (index == 0 || index == text.Length - 1) + return true; + + var spaceIndex = text.IndexOf(' ', index + Delimiter.Length); + if (spaceIndex == -1) + return true; + + return !char.IsLetterOrDigit(text[index - 1]) || + !char.IsLetterOrDigit(text[index + 1]); + } + + var paragraphEndIndex = text.IndexOfAny(['\n', '\r'], index); + if (paragraphEndIndex == -1) + paragraphEndIndex = text.Length; + + var closingIndex = FindSingleDelimiter(text, + index + Delimiter.Length, paragraphEndIndex, Delimiter); + var anotherOpenIndex = FindSingleDelimiter(text, + index + Delimiter.Length, paragraphEndIndex, nextDelimiter); + var anotherClosingIndex = FindSingleDelimiter(text, + anotherOpenIndex + nextDelimiter.Length, paragraphEndIndex, nextDelimiter); + + if (anotherOpenIndex < closingIndex && anotherClosingIndex > closingIndex) + { + context.IntersectedIndexes.Add(index); + context.IntersectedIndexes.Add(closingIndex); + context.IntersectedIndexes.Add(anotherOpenIndex); + context.IntersectedIndexes.Add(anotherClosingIndex); + return false; + } + + if (closingIndex == -1) + return false; + + var isInsideWord = (index > 0 && char.IsLetterOrDigit(text[index - 1])) || + (closingIndex + Delimiter.Length < paragraphEndIndex && + char.IsLetterOrDigit(text[closingIndex + Delimiter.Length])); + if (isInsideWord) + { + if (index > 0 && + (char.IsDigit(text[index - 1]) || + char.IsDigit(text[index + 1])) && + closingIndex + Delimiter.Length < paragraphEndIndex && + (char.IsDigit(text[closingIndex - 1]) || + char.IsDigit(text[closingIndex + Delimiter.Length]))) + return false; + + var spaceIndex = text.IndexOf(' ', index + Delimiter.Length); + + return spaceIndex == -1 || closingIndex < spaceIndex; + } + + if (closingIndex - index <= Delimiter.Length) + return false; + + return index + 1 != closingIndex; + } + + private void HandleTokenBoundary(ParserContext context) + { + MarkdownParser.AddToken(context, TokenType.Text); + + if (context.Stack.Count > 0 && context.Stack.Peek().Type == TokenType) + { + var completedToken = context.Stack.Pop(); + + completedToken.Content = completedToken.Children.Count > 0 ? + string.Empty : completedToken.Content; + context.Buffer.Clear(); + + if (context.Stack.Count > 0) + context.Stack.Peek().Children.Add(completedToken); + else + context.Tokens.Add(completedToken); + } + else + { + var newToken = new Token(TokenType); + context.Stack.Push(newToken); + } + + context.CurrentIndex += Delimiter.Length; + } + + private static int FindSingleDelimiter(string text, int startIndex, int paragraphEndIndex, string delimiter) + { + var index = text.IndexOf(delimiter, startIndex, StringComparison.Ordinal); + + while (index != -1 && index < paragraphEndIndex) + { + if (index > 0 && text[index - 1] == '_') + { + index = text.IndexOf(delimiter, index + 1, StringComparison.Ordinal); + continue; + } + + if (index + delimiter.Length >= text.Length || + text[index + delimiter.Length] != '_') return index; + index = text.IndexOf(delimiter, index + 2, StringComparison.Ordinal); + } + return -1; + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenHandlers/EscapeHandler.cs b/cs/Markdown/TokenHandlers/EscapeHandler.cs new file mode 100644 index 000000000..6b4f44405 --- /dev/null +++ b/cs/Markdown/TokenHandlers/EscapeHandler.cs @@ -0,0 +1,27 @@ +using Markdown.Interfaces; +using Markdown.Parsers; + +namespace Markdown.Handlers; + +public class EscapeHandler : ITokenHandler +{ + public bool CanHandle(char current, char next, ParserContext context) + => current == '\\'; + + public void Handle(ParserContext context) + { + if (context.CurrentIndex + 1 < context.MarkdownText.Length) + { + var next = context.MarkdownText[context.CurrentIndex + 1]; + if (next is '_' or '#' or '\\') + { + context.Buffer.Append(next); + context.CurrentIndex += 2; + return; + } + } + + context.Buffer.Append('\\'); + context.CurrentIndex++; + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenHandlers/HeaderHandler.cs b/cs/Markdown/TokenHandlers/HeaderHandler.cs new file mode 100644 index 000000000..361b7db4f --- /dev/null +++ b/cs/Markdown/TokenHandlers/HeaderHandler.cs @@ -0,0 +1,46 @@ +using Markdown.Interfaces; +using Markdown.Parsers; +using Markdown.TokenHandlers; + +namespace Markdown.Handlers; + +public class HeaderHandler : ITokenHandler +{ + public bool CanHandle(char current, char next, ParserContext context) + => current == '#' && (context.CurrentIndex == 0 || + context.MarkdownText[context.CurrentIndex - 1] == + '\n'); + + public void Handle(ParserContext context) + { + while (context.CurrentIndex < context.MarkdownText.Length && + context.MarkdownText[context.CurrentIndex] == '#') + context.CurrentIndex++; + + if (context.CurrentIndex < context.MarkdownText.Length && + context.MarkdownText[context.CurrentIndex] == ' ') + { + context.CurrentIndex++; + + MarkdownParser.AddToken(context, TokenType.Text); + var headerToken = new Token(TokenType.Header); + + context.Tokens.Add(headerToken); + + var headerEnd = + context.MarkdownText.IndexOf('\n', context.CurrentIndex); + if (headerEnd == -1) + headerEnd = context.MarkdownText.Length; + + var headerContent = context.Parser + .Parse(context.MarkdownText[context.CurrentIndex..headerEnd]); + + foreach (var childToken in headerContent) + headerToken.Children.Add(childToken); + + context.CurrentIndex = headerEnd; + } + else + context.Buffer.Append('#'); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenHandlers/ItalicHandler.cs b/cs/Markdown/TokenHandlers/ItalicHandler.cs new file mode 100644 index 000000000..895aeca0a --- /dev/null +++ b/cs/Markdown/TokenHandlers/ItalicHandler.cs @@ -0,0 +1,17 @@ +using Markdown.Interfaces; +using Markdown.Parsers; +using Markdown.TokenHandlers; + +namespace Markdown.Handlers; + +public class ItalicHandler : BoundaryTokenHandler +{ + protected override string Delimiter => "_"; + protected override TokenType TokenType => TokenType.Italics; + + public override bool CanHandle(char current, char next, + ParserContext context) + => current == '_' && next != '_'; + + protected override bool HasValidNesting(ParserContext context) => true; +} \ No newline at end of file diff --git a/cs/Markdown/TokenHandlers/NewLineHandler.cs b/cs/Markdown/TokenHandlers/NewLineHandler.cs new file mode 100644 index 000000000..5e9100b02 --- /dev/null +++ b/cs/Markdown/TokenHandlers/NewLineHandler.cs @@ -0,0 +1,19 @@ +using Markdown.Interfaces; +using Markdown.Parsers; + +namespace Markdown.Handlers; + +public class NewLineHandler : ITokenHandler +{ + public bool CanHandle(char current, char next, ParserContext context) + => current == '\n' && context.Stack.Count > 0 && + context.Stack.Peek().Type == TokenType.Header; + + public void Handle(ParserContext context) + { + MarkdownParser.AddToken(context, TokenType.Text); + + context.Tokens.Add(context.Stack.Pop()); + context.CurrentIndex++; + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenHandlers/StrongHandler.cs b/cs/Markdown/TokenHandlers/StrongHandler.cs new file mode 100644 index 000000000..661bcaad4 --- /dev/null +++ b/cs/Markdown/TokenHandlers/StrongHandler.cs @@ -0,0 +1,22 @@ +using Markdown.Interfaces; +using Markdown.Parsers; +using Markdown.TokenHandlers; + +namespace Markdown.Handlers; + +public class StrongHandler : BoundaryTokenHandler +{ + protected override string Delimiter => "__"; + protected override TokenType TokenType => TokenType.Strong; + + public override bool CanHandle(char current, char next, + ParserContext context) + => current == '_' && next == '_'; + + protected override bool HasValidNesting(ParserContext context) + { + if (context.Stack.Count <= 0) return true; + var topToken = context.Stack.Peek(); + return topToken.Type != TokenType.Italics; + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenRenders/HeaderRender.cs b/cs/Markdown/TokenRenders/HeaderRender.cs new file mode 100644 index 000000000..5e725b9f5 --- /dev/null +++ b/cs/Markdown/TokenRenders/HeaderRender.cs @@ -0,0 +1,11 @@ +using Markdown.Interfaces; + +namespace Markdown.TokenRenders; + +public class HeaderRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenRenders/ItalicRender.cs b/cs/Markdown/TokenRenders/ItalicRender.cs new file mode 100644 index 000000000..d41c695af --- /dev/null +++ b/cs/Markdown/TokenRenders/ItalicRender.cs @@ -0,0 +1,11 @@ +using Markdown.Interfaces; + +namespace Markdown.TokenRenders; + +public class ItalicRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenRenders/RenderFactory.cs b/cs/Markdown/TokenRenders/RenderFactory.cs new file mode 100644 index 000000000..9524c9857 --- /dev/null +++ b/cs/Markdown/TokenRenders/RenderFactory.cs @@ -0,0 +1,24 @@ +using Markdown.Interfaces; + +namespace Markdown.TokenRenders; + +public static class RenderFactory +{ + private static readonly Dictionary Renders = + new Dictionary(); + + static RenderFactory() + { + Renders[TokenType.Text] = new TextRender(); + Renders[TokenType.Italics] = new ItalicRender(); + Renders[TokenType.Strong] = new StrongRender(); + Renders[TokenType.Header] = new HeaderRender(); + } + + public static ITokenRender GetRender(TokenType type) + { + return Renders.TryGetValue(type, out var converter) + ? converter + : Renders[TokenType.Text]; + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenRenders/StrongRender.cs b/cs/Markdown/TokenRenders/StrongRender.cs new file mode 100644 index 000000000..10b8040a3 --- /dev/null +++ b/cs/Markdown/TokenRenders/StrongRender.cs @@ -0,0 +1,11 @@ +using Markdown.Interfaces; + +namespace Markdown.TokenRenders; + +public class StrongRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenRenders/TextRender.cs b/cs/Markdown/TokenRenders/TextRender.cs new file mode 100644 index 000000000..79c1eec60 --- /dev/null +++ b/cs/Markdown/TokenRenders/TextRender.cs @@ -0,0 +1,11 @@ +using Markdown.Interfaces; + +namespace Markdown.TokenRenders; + +public class TextRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenRenders/TokenBaseRender.cs b/cs/Markdown/TokenRenders/TokenBaseRender.cs new file mode 100644 index 000000000..b2ac6284b --- /dev/null +++ b/cs/Markdown/TokenRenders/TokenBaseRender.cs @@ -0,0 +1,18 @@ +using System.Text; +using Markdown.Interfaces; + +namespace Markdown.TokenRenders; + +public abstract class TokenBaseRender : ITokenRender +{ + public abstract void Render(Token token, StringBuilder result); + + protected void RenderChildren(Token token, StringBuilder result) + { + foreach (var child in token.Children) + { + var render = RenderFactory.GetRender(child.Type); + render.Render(child, result); + } + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/Token.cs b/cs/Markdown/Tokens/Token.cs new file mode 100644 index 000000000..1ee2bbd30 --- /dev/null +++ b/cs/Markdown/Tokens/Token.cs @@ -0,0 +1,16 @@ +namespace Markdown.Tokens; + +public class Token +{ + public TokenType Type { get; init; } + public string Content { get; set; } + public List Children { get; init; } + + public Token(TokenType type, string content = "", + List? children = null) + { + Type = type; + Content = content; + Children = children ?? []; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/TokenType.cs b/cs/Markdown/Tokens/TokenType.cs new file mode 100644 index 000000000..76c09aa58 --- /dev/null +++ b/cs/Markdown/Tokens/TokenType.cs @@ -0,0 +1,9 @@ +namespace Markdown.Tokens; + +public enum TokenType +{ + Text, + Italics, + Strong, + Header, +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..f9c008fbb 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{774FACB4-6217-4DDC-9AA5-901CD2228DAC}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {774FACB4-6217-4DDC-9AA5-901CD2228DAC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {774FACB4-6217-4DDC-9AA5-901CD2228DAC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {774FACB4-6217-4DDC-9AA5-901CD2228DAC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {774FACB4-6217-4DDC-9AA5-901CD2228DAC}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal