From 4d08e7fe30429377a22ae58164ec207492b3df03 Mon Sep 17 00:00:00 2001 From: Krotkaya Date: Wed, 5 Nov 2025 15:21:57 +0500 Subject: [PATCH 1/6] Markdown for checking --- cs/.DS_Store | Bin 0 -> 6148 bytes cs/Markdown.Core/Lexing/ILexer.cs | 6 +++++ cs/Markdown.Core/Lexing/Lexer.cs | 16 +++++++++++ cs/Markdown.Core/Lexing/Token.cs | 16 +++++++++++ cs/Markdown.Core/Lexing/TokenKind.cs | 16 +++++++++++ cs/Markdown.Core/Markdown.Core.csproj | 9 +++++++ cs/Markdown.Core/Parsing/IParser.cs | 9 +++++++ cs/Markdown.Core/Parsing/Nodes/BlockNode.cs | 10 +++++++ .../Parsing/Nodes/DocumentNode.cs | 10 +++++++ .../Parsing/Nodes/EmphasisNode.cs | 9 +++++++ cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs | 12 +++++++++ cs/Markdown.Core/Parsing/Nodes/InlineNode.cs | 9 +++++++ cs/Markdown.Core/Parsing/Nodes/Node.cs | 10 +++++++ .../Parsing/Nodes/ParagraphNode.cs | 16 +++++++++++ cs/Markdown.Core/Parsing/Nodes/StrongNode.cs | 14 ++++++++++ cs/Markdown.Core/Parsing/Nodes/TextNode.cs | 14 ++++++++++ cs/Markdown.Core/Parsing/Parser.cs | 17 ++++++++++++ cs/Markdown.Core/Rendering/HtmlRenderer.cs | 13 +++++++++ cs/Markdown.Core/Rendering/IHtmlRenderer.cs | 8 ++++++ cs/Markdown.Tests/EmphasisTests.cs | 10 +++++++ cs/Markdown.Tests/EscapingTests.cs | 10 +++++++ cs/Markdown.Tests/HeadingTests.cs | 10 +++++++ cs/Markdown.Tests/InteractionRulesTests.cs | 10 +++++++ cs/Markdown.Tests/Markdown.Tests.csproj | 17 ++++++++++++ cs/Markdown.Tests/PerfTests.cs | 10 +++++++ cs/Markdown.Tests/StrongTests.cs | 10 +++++++ cs/Markdown/Markdown.csproj | 14 ++++++++++ cs/Markdown/Md.cs | 20 ++++++++++++++ cs/Markdown/Program.cs | 9 +++++++ cs/Samples/Samples.csproj | 1 + cs/clean-code.sln | 25 ++++++++++++++++++ cs/clean-code.sln.DotSettings | 3 +++ 32 files changed, 363 insertions(+) create mode 100644 cs/.DS_Store create mode 100644 cs/Markdown.Core/Lexing/ILexer.cs create mode 100644 cs/Markdown.Core/Lexing/Lexer.cs create mode 100644 cs/Markdown.Core/Lexing/Token.cs create mode 100644 cs/Markdown.Core/Lexing/TokenKind.cs create mode 100644 cs/Markdown.Core/Markdown.Core.csproj create mode 100644 cs/Markdown.Core/Parsing/IParser.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/BlockNode.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/InlineNode.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/Node.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/StrongNode.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/TextNode.cs create mode 100644 cs/Markdown.Core/Parsing/Parser.cs create mode 100644 cs/Markdown.Core/Rendering/HtmlRenderer.cs create mode 100644 cs/Markdown.Core/Rendering/IHtmlRenderer.cs create mode 100644 cs/Markdown.Tests/EmphasisTests.cs create mode 100644 cs/Markdown.Tests/EscapingTests.cs create mode 100644 cs/Markdown.Tests/HeadingTests.cs create mode 100644 cs/Markdown.Tests/InteractionRulesTests.cs create mode 100644 cs/Markdown.Tests/Markdown.Tests.csproj create mode 100644 cs/Markdown.Tests/PerfTests.cs create mode 100644 cs/Markdown.Tests/StrongTests.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/Md.cs create mode 100644 cs/Markdown/Program.cs diff --git a/cs/.DS_Store b/cs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..655ce4e6acd91591a2abd02e0b983a4c7f5be3fe GIT binary patch literal 6148 zcmeHK%Sr?>5Ukb}21MDT$9#d{AB1&1deRT@F$;pr4mgU}e7e=?UUW3)L8Tj#O46@M z!6az_vU=+;fF*#AO;L2v5M6Gn4%~P_lp15~$H(o%Fl-xPe&>>Y`w=ejgbhYazQ4mY z?(u?W^ce7t-F|c2D^9*Ean2|M%{O1oP8(*oHP!29GL07!3L^j7}leMT`d8K`i!o^vDOmmlN_^SF{Fkf=1MeI@e)JKo$JZ% zvSTqccZe4s;+glwi`49lpQpBkr*~~)yJJ&PyP^XM$CF0@Cwh*YRi){Zj%k-2i=jr*_3BQHhd?DHx?o@i G20j2(&MQX% literal 0 HcmV?d00001 diff --git a/cs/Markdown.Core/Lexing/ILexer.cs b/cs/Markdown.Core/Lexing/ILexer.cs new file mode 100644 index 000000000..3808e235f --- /dev/null +++ b/cs/Markdown.Core/Lexing/ILexer.cs @@ -0,0 +1,6 @@ +namespace Markdown.Core.Lexing; + +public interface ILexer +{ + public IEnumerable Tokenize(ReadOnlySpan source); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/Lexer.cs b/cs/Markdown.Core/Lexing/Lexer.cs new file mode 100644 index 000000000..dfe7ea70c --- /dev/null +++ b/cs/Markdown.Core/Lexing/Lexer.cs @@ -0,0 +1,16 @@ +namespace Markdown.Core.Lexing; + +/// +/// Сканер: +/// - идёт по символам слева направо +/// - склеивает в один токен обычный текст +/// - выделяет специальные токены +/// - применяет базовые правила экранирования +/// +public class Lexer : ILexer +{ + public IEnumerable Tokenize(ReadOnlySpan source) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/Token.cs b/cs/Markdown.Core/Lexing/Token.cs new file mode 100644 index 000000000..f93e98ac7 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Token.cs @@ -0,0 +1,16 @@ +namespace Markdown.Core.Lexing; + +/// +/// Одна часть (токен) входного текста +/// +public readonly struct Token(TokenKind kind, ReadOnlyMemory slice, int position) +{ + public TokenKind Kind { get; init; } = kind; + public ReadOnlyMemory Slice { get; init; } = slice; + public int Position { get; init; } = position; + + public override string ToString() + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/TokenKind.cs b/cs/Markdown.Core/Lexing/TokenKind.cs new file mode 100644 index 000000000..15dc4c188 --- /dev/null +++ b/cs/Markdown.Core/Lexing/TokenKind.cs @@ -0,0 +1,16 @@ +namespace Markdown.Core.Lexing; + +/// +/// Типы «частей» (токенов), на которые сканер разбивает текст +/// +public enum TokenKind +{ + Text, // Обычный текст (любой непрерывный фрагмент без спецсимволов) + Underscore, // Одинарное подчёркивание '_', кандидат на курсивный шрифт + DoubleUnderscore, // Двойное подчёркивание '__', кандидат на полужирный шрифт + Escape, // Обратный слэш '\', может либо экранировать следующий спецсимвол, либо остаться в тексте + Hash, //Решётка '#', на начале абзаца вместе с пробелом "# " означает заголовок + Space, //Одиночный пробел ' ', нужен для проверки границ выделений + NewLine, //Перевод строки (например, '\n' или '\r\n') + Eof // Служебный маркер конца входа +} \ No newline at end of file diff --git a/cs/Markdown.Core/Markdown.Core.csproj b/cs/Markdown.Core/Markdown.Core.csproj new file mode 100644 index 000000000..c874e520d --- /dev/null +++ b/cs/Markdown.Core/Markdown.Core.csproj @@ -0,0 +1,9 @@ + + + + net8.0 + enable + enable + + + diff --git a/cs/Markdown.Core/Parsing/IParser.cs b/cs/Markdown.Core/Parsing/IParser.cs new file mode 100644 index 000000000..1456ce660 --- /dev/null +++ b/cs/Markdown.Core/Parsing/IParser.cs @@ -0,0 +1,9 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing; + +public interface IParser +{ + DocumentNode Parse(IEnumerable tokens); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs new file mode 100644 index 000000000..4fe147488 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs @@ -0,0 +1,10 @@ + + +namespace Markdown.Core.Parsing.Nodes; +/// +/// Базовый класс для блочных элементов документа (заголовки и абзацы) +/// +public class BlockNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs b/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs new file mode 100644 index 000000000..441fbe51c --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs @@ -0,0 +1,10 @@ + +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Корень дерева, содержит последовательность блочных узлов (заголовки и абзацы) +/// +public class DocumentNode : Node +{ + public IList Children { get; } = new List(); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs b/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs new file mode 100644 index 000000000..9cb6e9bdd --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs @@ -0,0 +1,9 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Курсив (_..._): контейнер инлайнов внутри выделения +/// +public class EmphasisNode : InlineNode +{ + public IList Inlines { get; } = new List(); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs b/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs new file mode 100644 index 000000000..cf074698e --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs @@ -0,0 +1,12 @@ + + +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Заголовок, блочный элемент, содержащий набор инлайнов (текст и выделения) +/// +public class HeadingNode(int level) : BlockNode +{ + public int Level { get; } = level; //сейчас у нас только h1 по условию, позволяет легко расширить в дальнейшем + public List Inlines { get; } = []; +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs new file mode 100644 index 000000000..3539cfd4e --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs @@ -0,0 +1,9 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Базовый класс для инлайновых элементов +/// +public class InlineNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/Node.cs b/cs/Markdown.Core/Parsing/Nodes/Node.cs new file mode 100644 index 000000000..b0f284fc2 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/Node.cs @@ -0,0 +1,10 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Базовый тип узлов синтаксического дерева +/// Узлы бывают блочные (заголовки и абзацы) и инлайновые (текст/выделения). +/// +public abstract class Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs new file mode 100644 index 000000000..18a3cf66d --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs @@ -0,0 +1,16 @@ +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Абзац, блочный элемент, содержащий набор инлайнов (текст и выделения) +/// +public class ParagraphNode : BlockNode +{ + public IList Inlines { get; } + + public ParagraphNode() + { + Inlines = new List(); + } +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs new file mode 100644 index 000000000..a784545df --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs @@ -0,0 +1,14 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Полужирный (__...__): контейнер инлайнов внутри выделения +/// +public class StrongNode : InlineNode +{ + public IList Inlines { get; } + + public StrongNode() + { + Inlines = new List(); + } +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/TextNode.cs b/cs/Markdown.Core/Parsing/Nodes/TextNode.cs new file mode 100644 index 000000000..37ddb1ba1 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/TextNode.cs @@ -0,0 +1,14 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Обычный текст без разметки, конечный лист дерева +/// +public class TextNode : InlineNode +{ + public string Text { get; } + + public TextNode(string text) + { + Text = text; + } +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Parser.cs b/cs/Markdown.Core/Parsing/Parser.cs new file mode 100644 index 000000000..73d0980d3 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Parser.cs @@ -0,0 +1,17 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; +namespace Markdown.Core.Parsing; + +/// +/// Парсер: +/// - делит документ на блоки (заголовки и абзацы) +/// - внутри блоков собирает инлайны (Текст/Курсив/Жирный) +/// - следует спецификации (границы, цифры, пересечения, пустые выделения) +/// +public class Parser : IParser +{ + public DocumentNode Parse(IEnumerable tokens) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown.Core/Rendering/HtmlRenderer.cs b/cs/Markdown.Core/Rendering/HtmlRenderer.cs new file mode 100644 index 000000000..083a97c44 --- /dev/null +++ b/cs/Markdown.Core/Rendering/HtmlRenderer.cs @@ -0,0 +1,13 @@ +using Markdown.Core.Parsing.Nodes; +namespace Markdown.Core.Rendering; + +/// +/// Обход дерева и формирование HTML-строки +/// +public class HtmlRenderer : IHtmlRenderer +{ + public string Render(DocumentNode document) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown.Core/Rendering/IHtmlRenderer.cs b/cs/Markdown.Core/Rendering/IHtmlRenderer.cs new file mode 100644 index 000000000..cdf75d459 --- /dev/null +++ b/cs/Markdown.Core/Rendering/IHtmlRenderer.cs @@ -0,0 +1,8 @@ +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Rendering; + +public interface IHtmlRenderer +{ + string Render (DocumentNode document); +} \ No newline at end of file diff --git a/cs/Markdown.Tests/EmphasisTests.cs b/cs/Markdown.Tests/EmphasisTests.cs new file mode 100644 index 000000000..fef4fbd63 --- /dev/null +++ b/cs/Markdown.Tests/EmphasisTests.cs @@ -0,0 +1,10 @@ +using Xunit; +namespace Markdown.Tests; + +/// +/// Тесты курсивного шрифта +/// +public class EmphasisTests +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Tests/EscapingTests.cs b/cs/Markdown.Tests/EscapingTests.cs new file mode 100644 index 000000000..e14856b19 --- /dev/null +++ b/cs/Markdown.Tests/EscapingTests.cs @@ -0,0 +1,10 @@ +using Xunit; +namespace Markdown.Tests; + +/// +/// Тесты экранирования обратным слешем +/// +public class EscapingTests +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Tests/HeadingTests.cs b/cs/Markdown.Tests/HeadingTests.cs new file mode 100644 index 000000000..07a724aba --- /dev/null +++ b/cs/Markdown.Tests/HeadingTests.cs @@ -0,0 +1,10 @@ +using Xunit; +namespace Markdown.Tests; + +/// +/// Тесты заголовков +/// +public class HeadingTests +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Tests/InteractionRulesTests.cs b/cs/Markdown.Tests/InteractionRulesTests.cs new file mode 100644 index 000000000..0a4b8940f --- /dev/null +++ b/cs/Markdown.Tests/InteractionRulesTests.cs @@ -0,0 +1,10 @@ +using Xunit; +namespace Markdown.Tests; + +/// +/// Тесты с пересечениями типов +/// +public class InteractionRulesTests +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Tests/Markdown.Tests.csproj b/cs/Markdown.Tests/Markdown.Tests.csproj new file mode 100644 index 000000000..78c9292a1 --- /dev/null +++ b/cs/Markdown.Tests/Markdown.Tests.csproj @@ -0,0 +1,17 @@ + + + + net8.0 + enable + enable + + + + + + + + + + + diff --git a/cs/Markdown.Tests/PerfTests.cs b/cs/Markdown.Tests/PerfTests.cs new file mode 100644 index 000000000..8942626ef --- /dev/null +++ b/cs/Markdown.Tests/PerfTests.cs @@ -0,0 +1,10 @@ +using Xunit; +namespace Markdown.Tests; + +/// +/// Тесты, проверяющие производительность +/// +public class PerfTests +{ + +} \ No newline at end of file diff --git a/cs/Markdown.Tests/StrongTests.cs b/cs/Markdown.Tests/StrongTests.cs new file mode 100644 index 000000000..e5a71b2ee --- /dev/null +++ b/cs/Markdown.Tests/StrongTests.cs @@ -0,0 +1,10 @@ +using Xunit; +namespace Markdown.Tests; + +/// +/// Тесты жирного шрифта +/// +public class StrongTests +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..16f14dd25 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,14 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..e0ca31ade --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,20 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; + +namespace Markdown; + +/// +/// Принимает текст в упрощённой разметке и возвращает HTML +/// +public class Md(ILexer lexer, IParser parser, IHtmlRenderer htmlRenderer) +{ + private readonly ILexer _lexer = lexer; + private readonly IParser _parser = parser; + private readonly IHtmlRenderer _htmlRenderer = htmlRenderer; + + public string Render(string text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..674c654b9 --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,9 @@ +using Markdown; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; + +var md = new Md(new Lexer(), new Parser(), new HtmlRenderer()); +var input = Console.In.ReadToEnd(); +var html = md.Render(input); +Console.Write(html); \ No newline at end of file diff --git a/cs/Samples/Samples.csproj b/cs/Samples/Samples.csproj index da6cd3e3d..8b3be18d4 100644 --- a/cs/Samples/Samples.csproj +++ b/cs/Samples/Samples.csproj @@ -8,6 +8,7 @@ + diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..ed501a8a0 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,14 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Markdown", "Markdown", "{4D44D5F2-F802-4578-8C72-4A86A5360DF6}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{F8483A51-DB5F-48E6-8982-50141057DE57}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown.Core", "Markdown.Core\Markdown.Core.csproj", "{CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown.Tests", "Markdown.Tests\Markdown.Tests.csproj", "{55DBCB30-6E2B-49B4-8906-15D345DC849E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +35,22 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Release|Any CPU.Build.0 = Release|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Release|Any CPU.Build.0 = Release|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {F8483A51-DB5F-48E6-8982-50141057DE57} = {4D44D5F2-F802-4578-8C72-4A86A5360DF6} + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8} = {4D44D5F2-F802-4578-8C72-4A86A5360DF6} + {55DBCB30-6E2B-49B4-8906-15D345DC849E} = {4D44D5F2-F802-4578-8C72-4A86A5360DF6} EndGlobalSection EndGlobal diff --git a/cs/clean-code.sln.DotSettings b/cs/clean-code.sln.DotSettings index 135b83ecb..53fe49b2f 100644 --- a/cs/clean-code.sln.DotSettings +++ b/cs/clean-code.sln.DotSettings @@ -1,6 +1,9 @@  <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /> + <Policy><Descriptor Staticness="Instance" AccessRightKinds="Private" Description="Instance fields (private)"><ElementKinds><Kind Name="FIELD" /><Kind Name="READONLY_FIELD" /></ElementKinds></Descriptor><Policy Inspect="True" WarnAboutPrefixesAndSuffixes="False" Prefix="" Suffix="" Style="aaBb" /></Policy> + <Policy><Descriptor Staticness="Any" AccessRightKinds="Any" Description="Types and namespaces"><ElementKinds><Kind Name="NAMESPACE" /><Kind Name="CLASS" /><Kind Name="STRUCT" /><Kind Name="ENUM" /><Kind Name="DELEGATE" /></ElementKinds></Descriptor><Policy Inspect="True" WarnAboutPrefixesAndSuffixes="False" Prefix="" Suffix="" Style="AaBb_AaBb" /></Policy> + True True True Imported 10.10.2016 From 68ada8755c6dd1f2f9455c4d4030248f9e19baa1 Mon Sep 17 00:00:00 2001 From: Krotkaya Date: Sun, 23 Nov 2025 16:12:12 +0500 Subject: [PATCH 2/6] =?UTF-8?q?=D0=A0=D0=B5=D0=B0=D0=BB=D0=B8=D0=B7=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0=D0=BD=D0=B0=D1=8F=20=D0=B7=D0=B0=D0=B4=D0=B0=D1=87?= =?UTF-8?q?=D0=B0=20=D0=BD=D0=B0=201=20=D0=B1=D0=B0=D0=BB=D0=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown.Core/Lexing/ILexer.cs | 2 +- cs/Markdown.Core/Lexing/Lexer.cs | 108 +++- cs/Markdown.Core/Lexing/Token.cs | 2 +- cs/Markdown.Core/Lexing/TokenKind.cs | 1 - cs/Markdown.Core/Markdown.Core.csproj | 6 + cs/Markdown.Core/Parsing/Nodes/BlockNode.cs | 4 +- .../Parsing/Nodes/DocumentNode.cs | 1 - .../Parsing/Nodes/EmphasisNode.cs | 1 + cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs | 2 - cs/Markdown.Core/Parsing/Nodes/InlineNode.cs | 2 +- .../Parsing/Nodes/ParagraphNode.cs | 7 +- cs/Markdown.Core/Parsing/Nodes/StrongNode.cs | 8 +- cs/Markdown.Core/Parsing/Nodes/TextNode.cs | 9 +- cs/Markdown.Core/Parsing/Parser.cs | 490 +++++++++++++++++- cs/Markdown.Core/Rendering/HtmlRenderer.cs | 13 - .../{IHtmlRenderer.cs => IRenderer.cs} | 2 +- cs/Markdown.Core/Rendering/Renderer.cs | 74 +++ cs/Markdown.Tests/EmphasisTests.cs | 29 +- cs/Markdown.Tests/EscapingTests.cs | 36 +- cs/Markdown.Tests/HeadingTests.cs | 36 +- cs/Markdown.Tests/InteractionRulesTests.cs | 56 +- cs/Markdown.Tests/Markdown.Tests.csproj | 3 + cs/Markdown.Tests/PerfTests.cs | 48 +- cs/Markdown.Tests/StrongTests.cs | 39 +- cs/Markdown.Tests/TextTests.cs | 41 ++ cs/Markdown/Markdown.csproj | 6 + cs/Markdown/Md.cs | 10 +- cs/Markdown/Program.cs | 2 +- 28 files changed, 970 insertions(+), 68 deletions(-) delete mode 100644 cs/Markdown.Core/Rendering/HtmlRenderer.cs rename cs/Markdown.Core/Rendering/{IHtmlRenderer.cs => IRenderer.cs} (79%) create mode 100644 cs/Markdown.Core/Rendering/Renderer.cs create mode 100644 cs/Markdown.Tests/TextTests.cs diff --git a/cs/Markdown.Core/Lexing/ILexer.cs b/cs/Markdown.Core/Lexing/ILexer.cs index 3808e235f..68d1eb38c 100644 --- a/cs/Markdown.Core/Lexing/ILexer.cs +++ b/cs/Markdown.Core/Lexing/ILexer.cs @@ -2,5 +2,5 @@ namespace Markdown.Core.Lexing; public interface ILexer { - public IEnumerable Tokenize(ReadOnlySpan source); + public IEnumerable Tokenize(ReadOnlyMemory source); } \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/Lexer.cs b/cs/Markdown.Core/Lexing/Lexer.cs index dfe7ea70c..8a7511062 100644 --- a/cs/Markdown.Core/Lexing/Lexer.cs +++ b/cs/Markdown.Core/Lexing/Lexer.cs @@ -9,8 +9,112 @@ namespace Markdown.Core.Lexing; /// public class Lexer : ILexer { - public IEnumerable Tokenize(ReadOnlySpan source) + public IEnumerable Tokenize(ReadOnlyMemory source) { - throw new NotImplementedException(); + var tokensSpan = source.Span; + var tokens = new List(); + var i = 0; + var length = source.Length; + + while (i < length) + { + var symbol = tokensSpan[i]; + + switch (symbol) + { + case '#' when IsAtLineStart(tokens) && i + 1 < source.Length + && tokensSpan[i + 1] == ' ': + tokens.Add(new Token(TokenKind.Hash, source.Slice(i, 1), + i)); + i += 1; + continue; + + case '\\' when i + 1 < source.Length: + var next = tokensSpan[i + 1]; + if (next == '_' && i + 2 < source.Length && tokensSpan[i + 2] + == '_') + { + tokens.Add(new Token(TokenKind.Text, source.Slice(i + + 1, 2), i)); + i += 3; + continue; + } + + if (IsSpecialCharacter(next)) + { + tokens.Add(new Token(TokenKind.Text, source.Slice(i + + 1, 1), i)); + i += 2; + continue; + } + + tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), + i)); + i += 1; + continue; + + case '\\': + tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), + i)); + i += 1; + continue; + + case '_' when i + 1 < source.Length && tokensSpan[i + 1] == '_': + tokens.Add(new Token(TokenKind.DoubleUnderscore, + source.Slice(i, 2), i)); + i += 2; + continue; + + case '_' when i + 1 <= source.Length: + tokens.Add(new Token(TokenKind.Underscore, source.Slice(i, + 1), i)); + i += 1; + continue; + + case ' ': + tokens.Add(new Token(TokenKind.Space, source.Slice(i, + 1), i)); + i += 1; + continue; + + case '\n': + tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, + 1), i)); + i += 1; + continue; + + case '\r' when i + 1 < source.Length && tokensSpan[i + 1] == '\n': + tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, + 2), i)); + i += 2; + continue; + } + + var startText = i; + while (i < length) + { + symbol = tokensSpan[i]; + if (symbol is '\\' or '#' or '_' or '*' or '-' or '+' or ' ' + or '\n' or '\r') + break; + i++; + + } + + if (i > startText) + { + tokens.Add(new Token(TokenKind.Text, source.Slice(startText, i - startText), startText)); + } + } + + tokens.Add(new Token(TokenKind.Eof, source.Slice(0, 0), length)); + return tokens; } + + private bool IsSpecialCharacter(char c) => c is '#' or '_' or '\\' or '*' + or '-' or '+'; + + private bool IsAtLineStart(List tokens) => + tokens.Count == 0 || tokens[^1].Kind == TokenKind.NewLine; + } \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/Token.cs b/cs/Markdown.Core/Lexing/Token.cs index f93e98ac7..f2321e2e8 100644 --- a/cs/Markdown.Core/Lexing/Token.cs +++ b/cs/Markdown.Core/Lexing/Token.cs @@ -11,6 +11,6 @@ public readonly struct Token(TokenKind kind, ReadOnlyMemory slice, int pos public override string ToString() { - throw new NotImplementedException(); + return $"{Kind} '{Slice}' at {Position}"; } } \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/TokenKind.cs b/cs/Markdown.Core/Lexing/TokenKind.cs index 15dc4c188..3a82eb9e4 100644 --- a/cs/Markdown.Core/Lexing/TokenKind.cs +++ b/cs/Markdown.Core/Lexing/TokenKind.cs @@ -8,7 +8,6 @@ public enum TokenKind Text, // Обычный текст (любой непрерывный фрагмент без спецсимволов) Underscore, // Одинарное подчёркивание '_', кандидат на курсивный шрифт DoubleUnderscore, // Двойное подчёркивание '__', кандидат на полужирный шрифт - Escape, // Обратный слэш '\', может либо экранировать следующий спецсимвол, либо остаться в тексте Hash, //Решётка '#', на начале абзаца вместе с пробелом "# " означает заголовок Space, //Одиночный пробел ' ', нужен для проверки границ выделений NewLine, //Перевод строки (например, '\n' или '\r\n') diff --git a/cs/Markdown.Core/Markdown.Core.csproj b/cs/Markdown.Core/Markdown.Core.csproj index c874e520d..2179c1392 100644 --- a/cs/Markdown.Core/Markdown.Core.csproj +++ b/cs/Markdown.Core/Markdown.Core.csproj @@ -5,5 +5,11 @@ enable enable + + + + + + diff --git a/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs index 4fe147488..0de59317c 100644 --- a/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs @@ -1,10 +1,8 @@ - - namespace Markdown.Core.Parsing.Nodes; /// /// Базовый класс для блочных элементов документа (заголовки и абзацы) /// -public class BlockNode : Node +public abstract class BlockNode : Node { } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs b/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs index 441fbe51c..31e31661a 100644 --- a/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs @@ -1,4 +1,3 @@ - namespace Markdown.Core.Parsing.Nodes; /// diff --git a/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs b/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs index 9cb6e9bdd..2fb66b72d 100644 --- a/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs @@ -2,6 +2,7 @@ namespace Markdown.Core.Parsing.Nodes; /// /// Курсив (_..._): контейнер инлайнов внутри выделения +/// ОГРАНИЧЕНИЕ: Не может содержать StrongNode (по спецификации) /// public class EmphasisNode : InlineNode { diff --git a/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs b/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs index cf074698e..e6a574583 100644 --- a/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs @@ -1,5 +1,3 @@ - - namespace Markdown.Core.Parsing.Nodes; /// diff --git a/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs index 3539cfd4e..cee93c0de 100644 --- a/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs @@ -3,7 +3,7 @@ namespace Markdown.Core.Parsing.Nodes; /// /// Базовый класс для инлайновых элементов /// -public class InlineNode : Node +public abstract class InlineNode : Node { } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs index 18a3cf66d..e06a28bb0 100644 --- a/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs @@ -7,10 +7,5 @@ namespace Markdown.Core.Parsing.Nodes; /// public class ParagraphNode : BlockNode { - public IList Inlines { get; } - - public ParagraphNode() - { - Inlines = new List(); - } + public IList Inlines { get; } = new List(); } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs index a784545df..b4633a498 100644 --- a/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs @@ -2,13 +2,9 @@ namespace Markdown.Core.Parsing.Nodes; /// /// Полужирный (__...__): контейнер инлайнов внутри выделения +/// ОГРАНИЧЕНИЕ: Может содержать EmphasisNode (по спецификации) /// public class StrongNode : InlineNode { - public IList Inlines { get; } - - public StrongNode() - { - Inlines = new List(); - } + public IList Inlines { get; } = new List(); } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/TextNode.cs b/cs/Markdown.Core/Parsing/Nodes/TextNode.cs index 37ddb1ba1..02a4fe419 100644 --- a/cs/Markdown.Core/Parsing/Nodes/TextNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/TextNode.cs @@ -3,12 +3,7 @@ namespace Markdown.Core.Parsing.Nodes; /// /// Обычный текст без разметки, конечный лист дерева /// -public class TextNode : InlineNode +public class TextNode(string text) : InlineNode { - public string Text { get; } - - public TextNode(string text) - { - Text = text; - } + public string Text { get; } = text; } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Parser.cs b/cs/Markdown.Core/Parsing/Parser.cs index 73d0980d3..d45685a66 100644 --- a/cs/Markdown.Core/Parsing/Parser.cs +++ b/cs/Markdown.Core/Parsing/Parser.cs @@ -1,17 +1,491 @@ +using System.Text; using Markdown.Core.Lexing; using Markdown.Core.Parsing.Nodes; + namespace Markdown.Core.Parsing; -/// -/// Парсер: -/// - делит документ на блоки (заголовки и абзацы) -/// - внутри блоков собирает инлайны (Текст/Курсив/Жирный) -/// - следует спецификации (границы, цифры, пересечения, пустые выделения) -/// public class Parser : IParser { + private IEnumerator _tokenPointer; + private Token _currentToken; + private readonly List _allTokens = new(); + private int _currentIndex; + public DocumentNode Parse(IEnumerable tokens) { - throw new NotImplementedException(); + _allTokens.Clear(); + _allTokens.AddRange(tokens); + + _tokenPointer = _allTokens.GetEnumerator(); + _currentIndex = 0; + MoveOnNextToken(); + + var document = new DocumentNode(); + + while (_currentToken.Kind != TokenKind.Eof) + { + var block = ParseBlock(); + if (block != null) + document.Children.Add(block); + } + return document; + } + + private BlockNode? ParseBlock() + { + SkipEmptyLines(); + + if (IsEndOfFile()) + return null; + + if (IsHeadingStart()) + return ParseHeading(); + + return ParseParagraph(); } -} \ No newline at end of file + + private void SkipEmptyLines() + { + while (_currentToken.Kind == TokenKind.NewLine) + MoveOnNextToken(); + } + + private bool IsEndOfFile() => _currentToken.Kind == TokenKind.Eof; + + private bool IsHeadingStart() => + _currentToken.Kind == TokenKind.Hash && IsAtStartOfLine(); + + + private ParagraphNode ParseParagraph() + { + var paragraph = new ParagraphNode(); + + while (!IsEndOfLine()) + { + var inline = ParseInline(); + if (inline != null) + paragraph.Inlines.Add(inline); + } + + if (_currentToken.Kind == TokenKind.NewLine) + { + MoveOnNextToken(); + SkipEmptyLines(); + } + + return paragraph; + } + + + + + private bool IsEndOfLine() + { + return _currentToken.Kind == TokenKind.NewLine || _currentToken.Kind == TokenKind.Eof; + } + + private HeadingNode ParseHeading() + { + var heading = new HeadingNode(1); + + MoveOnNextToken(); + SkipSpace(); + + while (!IsEndOfLine()) + { + var inline = ParseInline(); + if (inline != null) + heading.Inlines.Add(inline); + } + + if (_currentToken.Kind == TokenKind.NewLine) + MoveOnNextToken(); + + return heading; + } + + private void SkipSpace() + { + if (_currentToken.Kind == TokenKind.Space) + MoveOnNextToken(); + } + + private InlineNode ParseInline() + { + if (_currentToken.Kind == TokenKind.Eof) + return null; + + return _currentToken.Kind switch + { + TokenKind.Text => ParseText(), + TokenKind.Underscore => ParseEmphasis(), + TokenKind.DoubleUnderscore => ParseStrong(), + TokenKind.Space => ParseText(), + _ => ParseText() + }; + } + + private TextNode ParseText() + { + var node = new TextNode(_currentToken.Slice.ToString()); + MoveOnNextToken(); + return node; + } + + private InlineNode ParseEmphasis() + { + var startIndex = _currentIndex - 1; + MoveOnNextToken(); + + if (IsInvalidEmphasisStart()) + return CreateTextNode("_"); + + var emphasis = new EmphasisNode(); + return ParseEmphasisContent(emphasis, startIndex); + } + + private bool IsInvalidEmphasisStart() => + _currentToken.Kind is TokenKind.Space or TokenKind.NewLine or + TokenKind.Eof; + + private InlineNode ParseEmphasisContent(EmphasisNode emphasis, int + startIndex) + { + while (!IsEndOfContent()) + { + if (_currentToken.Kind == TokenKind.Underscore) + { + var closeResult = TryCloseEmphasis(emphasis, startIndex); + if (closeResult.ShouldReturn) + return closeResult.Node; + } + else if (_currentToken.Kind == TokenKind.DoubleUnderscore) + { + emphasis.Inlines.Add(CreateTextNode("__")); + MoveOnNextToken(); + } + else + { + var inline = ParseInline(); + if (inline != null) + emphasis.Inlines.Add(inline); + } + } + + return ConvertToTextNode(emphasis, "_"); + } + + + private InlineNode ParseStrong() + { + var startIndex = _currentIndex - 1; + MoveOnNextToken(); + + if (IsInvalidStrongStart()) + return ConvertToTextNode(new StrongNode(), "__"); + + var strong = new StrongNode(); + return ParseStrongContent(strong, startIndex); + } + + private bool IsInvalidStrongStart() => + _currentToken.Kind is TokenKind.Space or TokenKind.NewLine or + TokenKind.Eof; + + private TextNode CreateTextNode(string text) => new(text); + private InlineNode ParseStrongContent(StrongNode strong, int startIndex) + { + while (!IsEndOfContent()) + { + if (_currentToken.Kind == TokenKind.DoubleUnderscore) + { + var closeResult = TryCloseStrong(strong, startIndex); + if (closeResult.ShouldReturn) + return closeResult.Node; + } + else if (_currentToken.Kind == TokenKind.Underscore) + { + var emphasis = ParseEmphasis(); + if (emphasis != null) + strong.Inlines.Add(emphasis); + } + else + { + var inline = ParseInline(); + if (inline != null) + strong.Inlines.Add(inline); + } + } + + return ConvertToTextNode(strong, "__"); + } + + private bool IsEndOfContent() => + _currentToken.Kind == TokenKind.NewLine || _currentToken.Kind == + TokenKind.Eof; + + private CloseResult TryCloseStrong(StrongNode strong, int startIndex) + { + if (IsValidStrongClose(startIndex)) + { + MoveOnNextToken(); + return new CloseResult(true, strong); + } + + strong.Inlines.Add(CreateTextNode("__")); + MoveOnNextToken(); + return new CloseResult(false, null); + } + + private bool IsValidEmphasisClose(int startIndex) + { + var closeIndex = _currentIndex - 1; + + if (!HasValidOpeningBoundary(startIndex) || ! + HasValidClosingBoundary(closeIndex)) + return false; + if (startIndex + 1 == closeIndex) + return false; + if (IsInDigitContext(startIndex) || IsInDigitContext(closeIndex)) + return false; + if (HasIntersectingDoubleInsideEmphasis(startIndex, closeIndex)) + return false; + if (IsInsideWord(startIndex) && IsInsideWord(closeIndex) && + ContainsWhitespaceBetween(startIndex, closeIndex)) + return false; + + return true; + } + + + private bool IsInDigitContext(int index) => + HasDigitBefore(index) || HasDigitAfter(index); + + + private bool HasDigitBefore(int index) + { + if (index == 0) + return false; + + var prev = _allTokens[index - 1]; + return prev.Kind == TokenKind.Text && + prev.Slice.Length > 0 && + char.IsDigit(prev.Slice.Span[^1]); + } + + private bool HasDigitAfter(int index) + { + if (index + 1 >= _allTokens.Count) + return false; + + var next = _allTokens[index + 1]; + return next.Kind == TokenKind.Text && + next.Slice.Length > 0 && + char.IsDigit(next.Slice.Span[0]); + } + + private bool HasValidOpeningBoundary(int startIndex) + { + if (startIndex + 1 >= _allTokens.Count) + return true; + + var next = _allTokens[startIndex + 1]; + return next.Kind is not TokenKind.Space and not TokenKind.NewLine; + } + + private bool HasValidClosingBoundary(int closeIndex) + { + if (closeIndex - 1 < 0) + return true; + + var prev = _allTokens[closeIndex - 1]; + if (prev.Kind != TokenKind.Space) + return true; + + if (closeIndex + 1 >= _allTokens.Count) + return true; + + var next = _allTokens[closeIndex + 1]; + return next.Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; + } + + private bool IsValidStrongClose(int startIndex) + { + var closeIndex = _currentIndex - 1; + + if (!HasValidOpeningBoundary(startIndex) || ! + HasValidClosingBoundary(closeIndex)) + return false; + if (startIndex + 1 == closeIndex) + return false; + if (IsInDigitContext(startIndex) || IsInDigitContext(closeIndex)) + return false; + if (HasIntersectingDoubleUnderscore(startIndex, closeIndex)) + return false; + if (HasIntersectingSingleInsideStrong(startIndex, closeIndex)) + return false; + if (IsInsideWord(startIndex) && IsInsideWord(closeIndex) && + ContainsWhitespaceBetween(startIndex, closeIndex)) + return false; + + return true; + } + + private bool HasIntersectingDoubleUnderscore(int startIndex, int + closeIndex) + { + for (var i = startIndex + 1; i < closeIndex; i++) + if (_allTokens[i].Kind == TokenKind.DoubleUnderscore) + return true; + return false; + } + + private InlineNode ConvertToTextNode(InlineNode node, string prefix) + { + var textContent = new StringBuilder(); + textContent.Append(prefix); + textContent.Append(ExtractTextFromNode(node)); + return new TextNode(textContent.ToString()); + } + + private string ExtractTextFromNode(InlineNode node) + { + var result = new StringBuilder(); + + switch (node) + { + case EmphasisNode emphasis: + foreach (var inline in emphasis.Inlines) + { + result.Append(ExtractTextFromNode(inline)); + } + + break; + case StrongNode strong: + foreach (var inline in strong.Inlines) + { + result.Append(ExtractTextFromNode(inline)); + } + + break; + case TextNode text: + result.Append(text.Text); + break; + } + + return result.ToString(); + } + + private bool IsAtStartOfLine() => + _currentIndex <= 1 || _allTokens[_currentIndex - 2].Kind == + TokenKind.NewLine; + + + private Token MoveOnNextToken() + { + if (!_tokenPointer.MoveNext()) + { + _currentToken = new Token(TokenKind.Eof, ReadOnlyMemory.Empty, -1); + } + else + { + _currentToken = _tokenPointer.Current; + _currentIndex++; + } + + return _currentToken; + } + + private class CloseResult + { + public bool ShouldReturn { get; } + public InlineNode Node { get; } + + public CloseResult(bool shouldReturn, InlineNode node) + { + ShouldReturn = shouldReturn; + Node = node; + } + } + + private bool IsInsideWord(int index) => + HasLetterOrDigitBefore(index) && HasLetterOrDigitAfter(index); + + + private bool HasLetterOrDigitBefore(int index) + { + if (index == 0) + return false; + + var prev = _allTokens[index - 1]; + return prev.Kind == TokenKind.Text && + prev.Slice.Length > 0 && + char.IsLetterOrDigit(prev.Slice.Span[^1]); + } + + private bool HasLetterOrDigitAfter(int index) + { + if (index + 1 >= _allTokens.Count) + return false; + + var next = _allTokens[index + 1]; + return next.Kind == TokenKind.Text && + next.Slice.Length > 0 && + char.IsLetterOrDigit(next.Slice.Span[0]); + } + + + private bool ContainsWhitespaceBetween(int startIndex, int closeIndex) + { + for (var i = startIndex + 1; i < closeIndex; i++) + if (_allTokens[i].Kind == TokenKind.Space) + return true; + return false; + } + + private CloseResult TryCloseEmphasis(EmphasisNode emphasis, int + startIndex) + { + if (IsValidEmphasisClose(startIndex)) + { + MoveOnNextToken(); + return new CloseResult(true, emphasis); + } + + emphasis.Inlines.Add(CreateTextNode("_")); + MoveOnNextToken(); + return new CloseResult(false, null); + } + + private bool HasIntersectingDoubleInsideEmphasis(int startIndex, int + closeIndex) + { + var pending = false; + + for (var i = startIndex + 1; i < closeIndex; i++) + { + if (_allTokens[i].Kind != TokenKind.DoubleUnderscore) + continue; + + pending = !pending; + } + + return pending; + } + + private bool HasIntersectingSingleInsideStrong(int startIndex, int closeIndex) + { + var pending = false; + + for (var i = startIndex + 1; i < closeIndex; i++) + { + if (_allTokens[i].Kind != TokenKind.Underscore) + continue; + + pending = !pending; + } + + return pending; + } + +} diff --git a/cs/Markdown.Core/Rendering/HtmlRenderer.cs b/cs/Markdown.Core/Rendering/HtmlRenderer.cs deleted file mode 100644 index 083a97c44..000000000 --- a/cs/Markdown.Core/Rendering/HtmlRenderer.cs +++ /dev/null @@ -1,13 +0,0 @@ -using Markdown.Core.Parsing.Nodes; -namespace Markdown.Core.Rendering; - -/// -/// Обход дерева и формирование HTML-строки -/// -public class HtmlRenderer : IHtmlRenderer -{ - public string Render(DocumentNode document) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown.Core/Rendering/IHtmlRenderer.cs b/cs/Markdown.Core/Rendering/IRenderer.cs similarity index 79% rename from cs/Markdown.Core/Rendering/IHtmlRenderer.cs rename to cs/Markdown.Core/Rendering/IRenderer.cs index cdf75d459..dd00fc048 100644 --- a/cs/Markdown.Core/Rendering/IHtmlRenderer.cs +++ b/cs/Markdown.Core/Rendering/IRenderer.cs @@ -2,7 +2,7 @@ namespace Markdown.Core.Rendering; -public interface IHtmlRenderer +public interface IRenderer { string Render (DocumentNode document); } \ No newline at end of file diff --git a/cs/Markdown.Core/Rendering/Renderer.cs b/cs/Markdown.Core/Rendering/Renderer.cs new file mode 100644 index 000000000..e20551c51 --- /dev/null +++ b/cs/Markdown.Core/Rendering/Renderer.cs @@ -0,0 +1,74 @@ +using System.Text; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Rendering; + +public class Renderer : IRenderer + { + public string Render(DocumentNode document) + { + var result = new StringBuilder(); + + foreach (var block in document.Children) + { + switch (block) + { + case HeadingNode heading: + result.Append(RenderHeading(heading)); + break; + case ParagraphNode paragraph: + result.Append(RenderParagraph(paragraph)); + break; + } + } + + return result.ToString(); + } + + private string RenderHeading(HeadingNode heading) + { + var content = RenderInlines(heading.Inlines); + return $"{content}"; + } + + private string RenderParagraph(ParagraphNode paragraph) + { + var content = RenderInlines(paragraph.Inlines); + return $"

{content}

"; + } + + + private string RenderInlines(IList inlines) + { + var builder = new StringBuilder(); + + foreach (var inline in inlines) + { + switch (inline) + { + case TextNode text: + builder.Append(EscapeHtml(text.Text)); + break; + case EmphasisNode emphasis: + builder.Append(""); + builder.Append(RenderInlines(emphasis.Inlines)); + builder.Append(""); + break; + case StrongNode strong: + builder.Append(""); + builder.Append(RenderInlines(strong.Inlines)); + builder.Append(""); + break; + } + } + + return builder.ToString(); + } + + private string EscapeHtml(string text) => + text.Replace("&", "&") + .Replace("<", "<") + .Replace(">", ">") + .Replace("\"", """) + .Replace("'", "'"); + } diff --git a/cs/Markdown.Tests/EmphasisTests.cs b/cs/Markdown.Tests/EmphasisTests.cs index fef4fbd63..1f271d0b8 100644 --- a/cs/Markdown.Tests/EmphasisTests.cs +++ b/cs/Markdown.Tests/EmphasisTests.cs @@ -1,4 +1,9 @@ -using Xunit; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + namespace Markdown.Tests; /// @@ -6,5 +11,27 @@ namespace Markdown.Tests; /// public class EmphasisTests { + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + [TestCase("_курсив_", "

курсив

", + TestName = "Курсив одинарными подчеркиваниями")] + [TestCase("Текст, _окруженный с двух сторон_ одинарными символами", + "

Текст, окруженный с двух сторон одинарными символами

", + TestName = "Курсив в середине текста")] + [TestCase("Этот _подчерк _ работает", "

Этот подчерк работает

", + TestName = "Одинарное выделение допускает пробел внутри")] + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } } \ No newline at end of file diff --git a/cs/Markdown.Tests/EscapingTests.cs b/cs/Markdown.Tests/EscapingTests.cs index e14856b19..91d13ea51 100644 --- a/cs/Markdown.Tests/EscapingTests.cs +++ b/cs/Markdown.Tests/EscapingTests.cs @@ -1,4 +1,8 @@ -using Xunit; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; namespace Markdown.Tests; /// @@ -6,5 +10,35 @@ namespace Markdown.Tests; /// public class EscapingTests { + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + [TestCase(@"\_Вот это\_", "

_Вот это_

", + TestName = "Экранирование подчеркиваний")] + [TestCase(@"Здесь сим\волы экранирования\ \должны остаться.\", + @"

Здесь сим\волы экранирования\ \должны остаться.\

", + TestName = "Экранирование без специальных символов остается")] + [TestCase(@"\\_вот это будет выделено тегом_", + @"

\вот это будет выделено тегом

", + TestName = "Экранирование символа экранирования")] + [TestCase("__Жирное с \\__ внутри__", "

Жирное с __ внутри

", + TestName = "Экранирование двойного подчеркивания в полужирном")] + // [TestCase("сло\\_во _курсив_", "

слово курсив

", + // TestName = "Двойное экранирование внутри слова")] + [TestCase("_Привет\\_", "

_Привет_

", + TestName = "Экранирование подчёркивания внутри курсива оставляет текст")] + + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } } \ No newline at end of file diff --git a/cs/Markdown.Tests/HeadingTests.cs b/cs/Markdown.Tests/HeadingTests.cs index 07a724aba..5949ee4fd 100644 --- a/cs/Markdown.Tests/HeadingTests.cs +++ b/cs/Markdown.Tests/HeadingTests.cs @@ -1,10 +1,40 @@ -using Xunit; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; namespace Markdown.Tests; /// /// Тесты заголовков /// +[TestFixture] public class HeadingTests { - -} \ No newline at end of file + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("# Заголовок", "

Заголовок

", + TestName = "Простой заголовок")] + [TestCase("# Заголовок __с _разными_ символами__", "

Заголовок с разными символами

", + TestName = "Заголовок с вложенными тегами")] + [TestCase("Текст # не заголовок", "

Текст # не заголовок

", + TestName = "Решетка в середине строки — не заголовок")] + [TestCase("#Заголовок без пробела", "

#Заголовок без пробела

", + TestName = "Без пробела после решетки — не заголовок")] + [TestCase(" ## C пробелом в начале", "

## C пробелом в начале

", + TestName = "Решетка не в первом символе — не заголовок")] + public void Test(string input, string expectedHtml) + { + var html = _markdown.Render(input); + html.Should().Be(expectedHtml); + } +} diff --git a/cs/Markdown.Tests/InteractionRulesTests.cs b/cs/Markdown.Tests/InteractionRulesTests.cs index 0a4b8940f..d9d466aa5 100644 --- a/cs/Markdown.Tests/InteractionRulesTests.cs +++ b/cs/Markdown.Tests/InteractionRulesTests.cs @@ -1,4 +1,8 @@ -using Xunit; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; namespace Markdown.Tests; /// @@ -6,5 +10,53 @@ namespace Markdown.Tests; /// public class InteractionRulesTests { - + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("Внутри __двойного выделения _одинарное_ тоже__ работает", + "

Внутри двойного выделения одинарное тоже работает

", + TestName = "Курсив внутри полужирного работает")] + [TestCase("Но не наоборот — внутри _одинарного __двойное__ не_ работает", + "

Но не наоборот — внутри одинарного __двойное__ не работает

", + TestName = "Полужирный внутри курсива не работает")] + [TestCase("Подчерки внутри текста c цифрами_12_3 не считаются выделением", + "

Подчерки внутри текста c цифрами_12_3 не считаются выделением

", + TestName = "Подчерки с цифрами не выделяются")] + [TestCase("и в _нач_але, и в сер_еди_не, и в кон_це._", + "

и в начале, и в середине, и в конце.

", + TestName = "Выделение внутри слов работает")] + [TestCase("В то же время выделение в ра_зных сл_овах не работает", + "

В то же время выделение в ра_зных сл_овах не работает

", + TestName = "Выделение между разными словами не работает")] + [TestCase("__Непарные_ символы в рамках одного абзаца не считаются выделением", + "

__Непарные_ символы в рамках одного абзаца не считаются выделением

", + TestName = "Непарные символы не выделяются")] + [TestCase("Иначе эти_ подчерки_ не считаются выделением", + "

Иначе эти_ подчерки_ не считаются выделением

", + TestName = "Подчерки после пробела не начинают выделение")] + [TestCase("Иначе эти _подчерки _не считаются_ окончанием выделения", + "

Иначе эти подчерки _не считаются окончанием выделения

", + TestName = "Подчерки перед пробелом не заканчивают выделение")] + [TestCase("В случае __пересечения _двойных__ и одинарных_ ни один не считается выделением", + "

В случае __пересечения _двойных__ и одинарных_ ни один не считается выделением

", + TestName = "Пересекающиеся теги не работают")] + [TestCase("Если внутри подчерков пустая строка ____, то они остаются символами подчерка", + "

Если внутри подчерков пустая строка ____, то они остаются символами подчерка

", + TestName = "Пустые выделения не работают")] + [TestCase("Подчерки с цифрами__12__3 не считаются выделением", + "

Подчерки с цифрами__12__3 не считаются выделением

", + TestName = "Двойные подчерки с цифрами не работают")] + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } } \ No newline at end of file diff --git a/cs/Markdown.Tests/Markdown.Tests.csproj b/cs/Markdown.Tests/Markdown.Tests.csproj index 78c9292a1..fb02bb751 100644 --- a/cs/Markdown.Tests/Markdown.Tests.csproj +++ b/cs/Markdown.Tests/Markdown.Tests.csproj @@ -11,6 +11,9 @@ + + + diff --git a/cs/Markdown.Tests/PerfTests.cs b/cs/Markdown.Tests/PerfTests.cs index 8942626ef..0e767b8cb 100644 --- a/cs/Markdown.Tests/PerfTests.cs +++ b/cs/Markdown.Tests/PerfTests.cs @@ -1,10 +1,56 @@ -using Xunit; +using System.Text; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; namespace Markdown.Tests; /// /// Тесты, проверяющие производительность /// +[TestFixture] public class PerfTests { + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [Test] + [Timeout(2000)] + public void Render_ShouldHandleLongInputLinearly() + { + const int paragraphs = 5000; + + var inputBuilder = new StringBuilder(); + var expectedBuilder = new StringBuilder(); + + for (var i = 0; i < paragraphs; i++) + { + inputBuilder.Append($"__жирный{i}__ _курсив{i}_ текст{i}"); + if (i < paragraphs - 1) + inputBuilder.Append("\n\n"); + + expectedBuilder.Append("

жирный").Append(i) + .Append(" курсив").Append(i) + .Append(" текст").Append(i) + .Append("

"); + } + + var input = inputBuilder.ToString(); + var expected = expectedBuilder.ToString(); + + var html = _markdown.Render(input); + + html.Should().Be(expected); + html.Length.Should().Be(expected.Length); + } } \ No newline at end of file diff --git a/cs/Markdown.Tests/StrongTests.cs b/cs/Markdown.Tests/StrongTests.cs index e5a71b2ee..e63ec9e95 100644 --- a/cs/Markdown.Tests/StrongTests.cs +++ b/cs/Markdown.Tests/StrongTests.cs @@ -1,10 +1,43 @@ -using Xunit; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + namespace Markdown.Tests; /// /// Тесты жирного шрифта /// +[TestFixture] public class StrongTests { - -} \ No newline at end of file + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("__полужирный__", "

полужирный

", + TestName = "Одно слово полужирным шрифтом ")] + [TestCase("__Выделенный двумя символами текст__ должен становиться полужирным", + "

Выделенный двумя символами текст должен становиться полужирным

", + TestName = "Полужирный в предложении")] + [TestCase("сло__во__ внутри слова", "

слово внутри слова

", + TestName = "Двойное выделение внутри слова")] + [TestCase("Эти __ подчерки__ не работают", "

Эти __ подчерки__ не работают

", + TestName = "Не начинается, если после __ пробел")] + [TestCase("Эти __подчерки __ работают", "

Эти подчерки работают

", + TestName = "Двойное выделение допускает пробел внутри")] + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } +} + diff --git a/cs/Markdown.Tests/TextTests.cs b/cs/Markdown.Tests/TextTests.cs new file mode 100644 index 000000000..55702d07e --- /dev/null +++ b/cs/Markdown.Tests/TextTests.cs @@ -0,0 +1,41 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class TextTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("Абракадабра", "

Абракадабра

",TestName = "

Простой текст без выделений

")] + [TestCase("Привет, как дела?", "

Привет, как дела?

",TestName = "

Простой текст без выделений

")] + [TestCase("Email: test@example.com", "

Email: test@example.com

",TestName = "

Текст с различными символами email

")] + [TestCase("Ссылка: https://example.com", "

Ссылка: https://example.com

", TestName = "Текст с URL")] + [TestCase("Первый параграф\nВторой параграф", "

Первый параграф

Второй параграф

", TestName = "Два параграфа")] + [TestCase("Первый\n\nТретий", "

Первый

Третий

", TestName = "Параграфы с пустой строкой")] + + public void Test(string inputText, string expectedText) + { + + var html = _markdown.Render(inputText); + + html.Should().Be(expectedText); + } + + + + +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 16f14dd25..eb5839529 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -10,5 +10,11 @@ + + + + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index e0ca31ade..026bb02ea 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -7,14 +7,18 @@ namespace Markdown; /// /// Принимает текст в упрощённой разметке и возвращает HTML /// -public class Md(ILexer lexer, IParser parser, IHtmlRenderer htmlRenderer) +public class Md(ILexer lexer, IParser parser, IRenderer renderer) { private readonly ILexer _lexer = lexer; private readonly IParser _parser = parser; - private readonly IHtmlRenderer _htmlRenderer = htmlRenderer; + private readonly IRenderer _renderer = renderer; public string Render(string text) { - throw new NotImplementedException(); + var tokens = _lexer.Tokenize(text.AsMemory()); + var document = _parser.Parse(tokens); + return _renderer.Render(document); + } + } \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs index 674c654b9..da2daec1a 100644 --- a/cs/Markdown/Program.cs +++ b/cs/Markdown/Program.cs @@ -3,7 +3,7 @@ using Markdown.Core.Parsing; using Markdown.Core.Rendering; -var md = new Md(new Lexer(), new Parser(), new HtmlRenderer()); +var md = new Md(new Lexer(), new Parser(), new Renderer()); var input = Console.In.ReadToEnd(); var html = md.Render(input); Console.Write(html); \ No newline at end of file From 30d60b263f9ac9a365834f887ac02ac065de113e Mon Sep 17 00:00:00 2001 From: Krotkaya Date: Sun, 23 Nov 2025 22:59:12 +0500 Subject: [PATCH 3/6] =?UTF-8?q?=D0=A1=D0=B4=D0=B5=D0=BB=D0=B0=D0=BD=D0=B0?= =?UTF-8?q?=20=D0=B7=D0=B0=D0=B4=D0=B0=D1=87=D0=B0=20Markdown=20=D0=BD?= =?UTF-8?q?=D0=B0=203=20=D0=B1=D0=B0=D0=BB=D0=BB=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Реализован рендеринг ссылки, все тесты проходятся, код почистила --- cs/Markdown.Core/Lexing/Lexer.cs | 66 +-- cs/Markdown.Core/Lexing/Token.cs | 10 +- cs/Markdown.Core/Lexing/TokenKind.cs | 21 +- cs/Markdown.Core/Parsing/InlineParser.cs | 237 +++++++++++ cs/Markdown.Core/Parsing/InlineValidator.cs | 165 ++++++++ cs/Markdown.Core/Parsing/Nodes/BlockNode.cs | 2 +- cs/Markdown.Core/Parsing/Nodes/InlineNode.cs | 1 - cs/Markdown.Core/Parsing/Nodes/LinkNode.cs | 7 + cs/Markdown.Core/Parsing/Nodes/Node.cs | 1 - .../Parsing/Nodes/ParagraphNode.cs | 2 - cs/Markdown.Core/Parsing/Nodes/StrongNode.cs | 1 - cs/Markdown.Core/Parsing/Parser.cs | 388 +----------------- cs/Markdown.Core/Rendering/Renderer.cs | 19 +- cs/Markdown.Tests/LinkTests.cs | 42 ++ cs/Markdown/Md.cs | 13 +- 15 files changed, 531 insertions(+), 444 deletions(-) create mode 100644 cs/Markdown.Core/Parsing/InlineParser.cs create mode 100644 cs/Markdown.Core/Parsing/InlineValidator.cs create mode 100644 cs/Markdown.Core/Parsing/Nodes/LinkNode.cs create mode 100644 cs/Markdown.Tests/LinkTests.cs diff --git a/cs/Markdown.Core/Lexing/Lexer.cs b/cs/Markdown.Core/Lexing/Lexer.cs index 8a7511062..acc23906c 100644 --- a/cs/Markdown.Core/Lexing/Lexer.cs +++ b/cs/Markdown.Core/Lexing/Lexer.cs @@ -24,81 +24,88 @@ public IEnumerable Tokenize(ReadOnlyMemory source) { case '#' when IsAtLineStart(tokens) && i + 1 < source.Length && tokensSpan[i + 1] == ' ': - tokens.Add(new Token(TokenKind.Hash, source.Slice(i, 1), - i)); + tokens.Add(new Token(TokenKind.Hash, source.Slice(i, 1), i)); i += 1; continue; case '\\' when i + 1 < source.Length: var next = tokensSpan[i + 1]; - if (next == '_' && i + 2 < source.Length && tokensSpan[i + 2] - == '_') + if (next == '_' && i + 2 < source.Length && tokensSpan[i + 2] == '_') { - tokens.Add(new Token(TokenKind.Text, source.Slice(i + - 1, 2), i)); + tokens.Add(new Token(TokenKind.Text, source.Slice(i + 1, 2), i)); i += 3; continue; } if (IsSpecialCharacter(next)) { - tokens.Add(new Token(TokenKind.Text, source.Slice(i + - 1, 1), i)); + tokens.Add(new Token(TokenKind.Text, source.Slice(i + 1, 1), i)); i += 2; continue; } - tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), - i)); + tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), i)); i += 1; continue; case '\\': - tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), - i)); + tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), i)); i += 1; continue; case '_' when i + 1 < source.Length && tokensSpan[i + 1] == '_': - tokens.Add(new Token(TokenKind.DoubleUnderscore, - source.Slice(i, 2), i)); + tokens.Add(new Token(TokenKind.DoubleUnderscore, source.Slice(i, 2), i)); i += 2; continue; case '_' when i + 1 <= source.Length: - tokens.Add(new Token(TokenKind.Underscore, source.Slice(i, - 1), i)); + tokens.Add(new Token(TokenKind.Underscore, source.Slice(i, 1), i)); i += 1; continue; case ' ': - tokens.Add(new Token(TokenKind.Space, source.Slice(i, - 1), i)); + tokens.Add(new Token(TokenKind.Space, source.Slice(i, 1), i)); i += 1; continue; case '\n': - tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, - 1), i)); + tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, 1), i)); i += 1; continue; case '\r' when i + 1 < source.Length && tokensSpan[i + 1] == '\n': - tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, - 2), i)); + tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, 2), i)); i += 2; continue; + + case '[': + tokens.Add(new Token(TokenKind.LeftBracket, source.Slice(i, 1), i)); + i += 1; + continue; + + case ']': + tokens.Add(new Token(TokenKind.RightBracket, source.Slice(i, 1), i)); + i += 1; + continue; + + case '(': + tokens.Add(new Token(TokenKind.LeftParen, source.Slice(i, 1), i)); + i += 1; + continue; + + case ')': + tokens.Add(new Token(TokenKind.RightParen, source.Slice(i, 1), i)); + i += 1; + continue; } var startText = i; while (i < length) { symbol = tokensSpan[i]; - if (symbol is '\\' or '#' or '_' or '*' or '-' or '+' or ' ' - or '\n' or '\r') + if (symbol is '\\' or '#' or '_' or '*' or '-' or '+' or '[' or ']' or '(' or ')' or ' ' or '\n' or '\r') break; i++; - } if (i > startText) @@ -106,15 +113,14 @@ public IEnumerable Tokenize(ReadOnlyMemory source) tokens.Add(new Token(TokenKind.Text, source.Slice(startText, i - startText), startText)); } } - - tokens.Add(new Token(TokenKind.Eof, source.Slice(0, 0), length)); + tokens.Add(new Token(TokenKind.Eof, source[..0], length)); return tokens; } - private bool IsSpecialCharacter(char c) => c is '#' or '_' or '\\' or '*' - or '-' or '+'; + private static bool IsSpecialCharacter(char c) => + c is '#' or '_' or '\\' or '*' or '-' or '+' or '[' or ']' or '(' or ')'; - private bool IsAtLineStart(List tokens) => + private static bool IsAtLineStart(List tokens) => tokens.Count == 0 || tokens[^1].Kind == TokenKind.NewLine; } \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/Token.cs b/cs/Markdown.Core/Lexing/Token.cs index f2321e2e8..0b91d7071 100644 --- a/cs/Markdown.Core/Lexing/Token.cs +++ b/cs/Markdown.Core/Lexing/Token.cs @@ -1,16 +1,8 @@ namespace Markdown.Core.Lexing; -/// -/// Одна часть (токен) входного текста -/// public readonly struct Token(TokenKind kind, ReadOnlyMemory slice, int position) { public TokenKind Kind { get; init; } = kind; public ReadOnlyMemory Slice { get; init; } = slice; - public int Position { get; init; } = position; - - public override string ToString() - { - return $"{Kind} '{Slice}' at {Position}"; - } + } \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/TokenKind.cs b/cs/Markdown.Core/Lexing/TokenKind.cs index 3a82eb9e4..9c32bb791 100644 --- a/cs/Markdown.Core/Lexing/TokenKind.cs +++ b/cs/Markdown.Core/Lexing/TokenKind.cs @@ -1,15 +1,16 @@ namespace Markdown.Core.Lexing; -/// -/// Типы «частей» (токенов), на которые сканер разбивает текст -/// public enum TokenKind { - Text, // Обычный текст (любой непрерывный фрагмент без спецсимволов) - Underscore, // Одинарное подчёркивание '_', кандидат на курсивный шрифт - DoubleUnderscore, // Двойное подчёркивание '__', кандидат на полужирный шрифт - Hash, //Решётка '#', на начале абзаца вместе с пробелом "# " означает заголовок - Space, //Одиночный пробел ' ', нужен для проверки границ выделений - NewLine, //Перевод строки (например, '\n' или '\r\n') - Eof // Служебный маркер конца входа + Text, // Обычный текст + Underscore, // Курсивный шрифт + DoubleUnderscore, // Полужирный шрифт + Hash, //Заголовок + Space, //Одиночный пробел + NewLine, //Перевод строки + Eof, // Конец входа + LeftBracket, // Квадратная скобка '[' открывает текст ссылки + RightBracket, // Квадратная скобка ']' закрывает текст ссылки + LeftParen, // Круглая скобка '(' открывает адрес ссылки + RightParen, // Круглая скобка ')' закрывает адрес ссылки } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/InlineParser.cs b/cs/Markdown.Core/Parsing/InlineParser.cs new file mode 100644 index 000000000..38ebe227c --- /dev/null +++ b/cs/Markdown.Core/Parsing/InlineParser.cs @@ -0,0 +1,237 @@ +using System.Text; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing; + +public class InlineParser( + IReadOnlyList tokens, + Func moveNext, + Func currentToken, + Func currentIndex, + Func isEndOfLine, + InlineValidator validator) +{ + public InlineNode? ParseInline() + { + var token = currentToken(); + if (token.Kind == TokenKind.Eof) + return null; + + return token.Kind switch + { + TokenKind.Text => ParseText(), + TokenKind.Underscore => ParseEmphasis(), + TokenKind.DoubleUnderscore => ParseStrong(), + TokenKind.LeftBracket => ParseLink(), + _ => ParseText() + }; + } + + private TextNode ParseText() + { + var node = new TextNode(currentToken().Slice.ToString()); + moveNext(); + return node; + } + + private InlineNode ParseEmphasis() + { + var startIndex = currentIndex() - 1; + moveNext(); + + if (IsInvalidEmphasisStart()) + return CreateTextNode("_"); + + var emphasis = new EmphasisNode(); + return ParseEmphasisContent(emphasis, startIndex); + } + + private bool IsInvalidEmphasisStart() => + currentToken().Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; + + private InlineNode ParseEmphasisContent(EmphasisNode emphasis, int startIndex) + { + while (!IsEndOfContent()) + { + if (currentToken().Kind == TokenKind.Underscore) + { + var closeResult = TryCloseEmphasis(emphasis, startIndex); + if (closeResult.ShouldReturn) + return closeResult.Node; + } + else if (currentToken().Kind == TokenKind.DoubleUnderscore) + { + emphasis.Inlines.Add(CreateTextNode("__")); + moveNext(); + } + else + { + var inline = ParseInline(); + if (inline != null) + emphasis.Inlines.Add(inline); + } + } + return ConvertToTextNode(emphasis, "_"); + } + + private InlineNode ParseStrong() + { + var startIndex = currentIndex() - 1; + moveNext(); + + if (IsInvalidStrongStart()) + return ConvertToTextNode(new StrongNode(), "__"); + + var strong = new StrongNode(); + return ParseStrongContent(strong, startIndex); + } + + private bool IsInvalidStrongStart() => + currentToken().Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; + + private InlineNode ParseStrongContent(StrongNode strong, int startIndex) + { + while (!IsEndOfContent()) + { + if (currentToken().Kind == TokenKind.DoubleUnderscore) + { + var closeResult = TryCloseStrong(strong, startIndex); + if (closeResult.ShouldReturn) + return closeResult.Node; + } + else if (currentToken().Kind == TokenKind.Underscore) + { + var emphasis = ParseEmphasis(); + strong.Inlines.Add(emphasis); + } + else + { + var inline = ParseInline(); + if (inline != null) + strong.Inlines.Add(inline); + } + } + return ConvertToTextNode(strong, "__"); + } + + private bool IsEndOfContent() => + currentToken().Kind is TokenKind.NewLine or TokenKind.Eof; + + private Result TryCloseStrong(StrongNode strong, int startIndex) + { + var closeIndex = currentIndex() - 1; + if (validator.IsValidStrongClose(tokens, startIndex, closeIndex)) + { + moveNext(); + return new Result(true, strong); + } + + strong.Inlines.Add(CreateTextNode("__")); + moveNext(); + return new Result(false, null); + } + + private Result TryCloseEmphasis(EmphasisNode emphasis, int startIndex) + { + var closeIndex = currentIndex() - 1; + if (validator.IsValidEmphasisClose(tokens, startIndex, closeIndex)) + { + moveNext(); + return new Result(true, emphasis); + } + emphasis.Inlines.Add(CreateTextNode("_")); + moveNext(); + return new Result(false, null); + } + + private InlineNode ParseLink() + { + var linkTextNodes = new List(); + moveNext(); + + while (!isEndOfLine() && currentToken().Kind != TokenKind.RightBracket) + { + var inline = ParseInline(); + if (inline != null) + linkTextNodes.Add(inline); + } + + if (currentToken().Kind != TokenKind.RightBracket) + return RestoreAsText("[", linkTextNodes); + + moveNext(); + + if (currentToken().Kind != TokenKind.LeftParen) + return RestoreAsText("[", linkTextNodes, "]"); + + moveNext(); + + var hrefBuilder = new StringBuilder(); + while (currentToken().Kind != TokenKind.Eof && + currentToken().Kind != TokenKind.RightParen && + currentToken().Kind != TokenKind.NewLine) + { + hrefBuilder.Append(currentToken().Slice.ToString()); + moveNext(); + } + + if (currentToken().Kind != TokenKind.RightParen) + return RestoreAsText("[", linkTextNodes, "](" + hrefBuilder); + + moveNext(); + + var href = hrefBuilder.ToString(); + return new LinkNode(href, linkTextNodes); + } + + private InlineNode RestoreAsText(string prefix, IList nodes, string suffix = "") + { + var builder = new StringBuilder(prefix); + foreach (var node in nodes) + builder.Append(ExtractTextFromNode(node)); + builder.Append(suffix); + return new TextNode(builder.ToString()); + } + + private InlineNode ConvertToTextNode(InlineNode node, string prefix) + { + var textContent = new StringBuilder(); + textContent.Append(prefix); + textContent.Append(ExtractTextFromNode(node)); + return new TextNode(textContent.ToString()); + } + + private static string ExtractTextFromNode(InlineNode node) + { + var result = new StringBuilder(); + + switch (node) + { + case EmphasisNode emphasis: + foreach (var inline in emphasis.Inlines) + result.Append(ExtractTextFromNode(inline)); + break; + case StrongNode strong: + foreach (var inline in strong.Inlines) + result.Append(ExtractTextFromNode(inline)); + break; + case TextNode text: + result.Append(text.Text); + break; + case LinkNode link: + foreach (var inline in link.Inlines) + result.Append(ExtractTextFromNode(inline)); + break; + } + return result.ToString(); + } + + private static TextNode CreateTextNode(string text) => new(text); + + private class Result(bool shouldReturn, InlineNode node) + { + public bool ShouldReturn { get; } = shouldReturn; + public InlineNode Node { get; } = node; + } +} diff --git a/cs/Markdown.Core/Parsing/InlineValidator.cs b/cs/Markdown.Core/Parsing/InlineValidator.cs new file mode 100644 index 000000000..ba3a72125 --- /dev/null +++ b/cs/Markdown.Core/Parsing/InlineValidator.cs @@ -0,0 +1,165 @@ +using Markdown.Core.Lexing; + +namespace Markdown.Core.Parsing; + +public class InlineValidator +{ + public bool IsValidEmphasisClose(IReadOnlyList tokens, int startIndex, int closeIndex) + { + if (!HasValidOpeningBoundary(tokens, startIndex) || + !HasValidClosingBoundary(tokens, closeIndex)) + return false; + if (startIndex + 1 == closeIndex) + return false; + if (IsInDigitContext(tokens, startIndex) || IsInDigitContext(tokens, closeIndex)) + return false; + if (HasIntersectingDoubleInsideEmphasis(tokens, startIndex, closeIndex)) + return false; + if (IsInsideWord(tokens, startIndex) && IsInsideWord(tokens, closeIndex) && + ContainsWhitespaceBetween(tokens, startIndex, closeIndex)) + return false; + + return true; + } + + public bool IsValidStrongClose(IReadOnlyList tokens, int startIndex, int closeIndex) + { + if (!HasValidOpeningBoundary(tokens, startIndex) || + !HasValidClosingBoundary(tokens, closeIndex)) + return false; + if (startIndex + 1 == closeIndex) + return false; + if (IsInDigitContext(tokens, startIndex) || IsInDigitContext(tokens, closeIndex)) + return false; + if (HasIntersectingDoubleUnderscore(tokens, startIndex, closeIndex)) + return false; + if (HasIntersectingSingleInsideStrong(tokens, startIndex, closeIndex)) + return false; + if (IsInsideWord(tokens, startIndex) && IsInsideWord(tokens, closeIndex) && + ContainsWhitespaceBetween(tokens, startIndex, closeIndex)) + return false; + + return true; + } + + private bool HasValidOpeningBoundary(IReadOnlyList tokens, int startIndex) + { + if (startIndex + 1 >= tokens.Count) + return true; + + var next = tokens[startIndex + 1]; + return next.Kind is not TokenKind.Space and not TokenKind.NewLine; + } + + private bool HasValidClosingBoundary(IReadOnlyList tokens, int closeIndex) + { + if (closeIndex - 1 < 0) + return true; + + var prev = tokens[closeIndex - 1]; + if (prev.Kind != TokenKind.Space) + return true; + + if (closeIndex + 1 >= tokens.Count) + return true; + + var next = tokens[closeIndex + 1]; + return next.Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; + } + + private bool IsInDigitContext(IReadOnlyList tokens, int index) => + HasDigitBefore(tokens, index) || HasDigitAfter(tokens, index); + + private static bool HasDigitBefore(IReadOnlyList tokens, int index) + { + if (index == 0) + return false; + + var prev = tokens[index - 1]; + return prev.Kind == TokenKind.Text && + prev.Slice.Length > 0 && + char.IsDigit(prev.Slice.Span[^1]); + } + + private static bool HasDigitAfter(IReadOnlyList tokens, int index) + { + if (index + 1 >= tokens.Count) + return false; + + var next = tokens[index + 1]; + return next.Kind == TokenKind.Text && + next.Slice.Length > 0 && + char.IsDigit(next.Slice.Span[0]); + } + + private static bool HasIntersectingDoubleInsideEmphasis(IReadOnlyList tokens, int startIndex, int closeIndex) + { + var pending = false; + + for (var i = startIndex + 1; i < closeIndex; i++) + { + if (tokens[i].Kind != TokenKind.DoubleUnderscore) + continue; + + pending = !pending; + } + + return pending; + } + + private static bool HasIntersectingSingleInsideStrong(IReadOnlyList tokens, int startIndex, int closeIndex) + { + var pending = false; + + for (var i = startIndex + 1; i < closeIndex; i++) + { + if (tokens[i].Kind != TokenKind.Underscore) + continue; + + pending = !pending; + } + + return pending; + } + + private static bool HasIntersectingDoubleUnderscore(IReadOnlyList tokens, int startIndex, int closeIndex) + { + for (var i = startIndex + 1; i < closeIndex; i++) + if (tokens[i].Kind == TokenKind.DoubleUnderscore) + return true; + return false; + } + + private bool IsInsideWord(IReadOnlyList tokens, int index) => + HasLetterOrDigitBefore(tokens, index) && HasLetterOrDigitAfter(tokens, index); + + private static bool HasLetterOrDigitBefore(IReadOnlyList tokens, int index) + { + if (index == 0) + return false; + + var prev = tokens[index - 1]; + return prev.Kind == TokenKind.Text && + prev.Slice.Length > 0 && + char.IsLetterOrDigit(prev.Slice.Span[^1]); + } + + private static bool HasLetterOrDigitAfter(IReadOnlyList tokens, int index) + { + if (index + 1 >= tokens.Count) + return false; + + var next = tokens[index + 1]; + return next.Kind == TokenKind.Text && + next.Slice.Length > 0 && + char.IsLetterOrDigit(next.Slice.Span[0]); + } + + private static bool ContainsWhitespaceBetween(IReadOnlyList tokens, int startIndex, int closeIndex) + { + for (var i = startIndex + 1; i < closeIndex; i++) + if (tokens[i].Kind == TokenKind.Space) + return true; + return false; + } +} diff --git a/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs index 0de59317c..6e1b59531 100644 --- a/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs @@ -1,8 +1,8 @@ namespace Markdown.Core.Parsing.Nodes; + /// /// Базовый класс для блочных элементов документа (заголовки и абзацы) /// public abstract class BlockNode : Node { - } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs index cee93c0de..3650e4259 100644 --- a/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs @@ -5,5 +5,4 @@ namespace Markdown.Core.Parsing.Nodes; ///
public abstract class InlineNode : Node { - } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/LinkNode.cs b/cs/Markdown.Core/Parsing/Nodes/LinkNode.cs new file mode 100644 index 000000000..24f32aefd --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/LinkNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.Core.Parsing.Nodes; + +public class LinkNode(string href, IList inlines) : InlineNode +{ + public string Href { get; } = href; + public IList Inlines { get; } = inlines; +} diff --git a/cs/Markdown.Core/Parsing/Nodes/Node.cs b/cs/Markdown.Core/Parsing/Nodes/Node.cs index b0f284fc2..48f0ab995 100644 --- a/cs/Markdown.Core/Parsing/Nodes/Node.cs +++ b/cs/Markdown.Core/Parsing/Nodes/Node.cs @@ -6,5 +6,4 @@ namespace Markdown.Core.Parsing.Nodes; /// public abstract class Node { - } \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs index e06a28bb0..a844132f8 100644 --- a/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs @@ -1,5 +1,3 @@ -using Markdown.Core.Parsing.Nodes; - namespace Markdown.Core.Parsing.Nodes; /// diff --git a/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs index b4633a498..ea98c8f68 100644 --- a/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs +++ b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs @@ -2,7 +2,6 @@ namespace Markdown.Core.Parsing.Nodes; /// /// Полужирный (__...__): контейнер инлайнов внутри выделения -/// ОГРАНИЧЕНИЕ: Может содержать EmphasisNode (по спецификации) /// public class StrongNode : InlineNode { diff --git a/cs/Markdown.Core/Parsing/Parser.cs b/cs/Markdown.Core/Parsing/Parser.cs index d45685a66..5659ae69f 100644 --- a/cs/Markdown.Core/Parsing/Parser.cs +++ b/cs/Markdown.Core/Parsing/Parser.cs @@ -1,4 +1,3 @@ -using System.Text; using Markdown.Core.Lexing; using Markdown.Core.Parsing.Nodes; @@ -8,9 +7,11 @@ public class Parser : IParser { private IEnumerator _tokenPointer; private Token _currentToken; - private readonly List _allTokens = new(); + private readonly List _allTokens = []; private int _currentIndex; - + private readonly InlineValidator _inlineValidator = new(); + private InlineParser _inlineParser; + public DocumentNode Parse(IEnumerable tokens) { _allTokens.Clear(); @@ -18,6 +19,13 @@ public DocumentNode Parse(IEnumerable tokens) _tokenPointer = _allTokens.GetEnumerator(); _currentIndex = 0; + _inlineParser = new InlineParser( + _allTokens, + MoveOnNextToken, + () => _currentToken, + () => _currentIndex, + IsEndOfLine, + _inlineValidator); MoveOnNextToken(); var document = new DocumentNode(); @@ -62,23 +70,18 @@ private ParagraphNode ParseParagraph() while (!IsEndOfLine()) { - var inline = ParseInline(); + var inline = _inlineParser.ParseInline(); if (inline != null) paragraph.Inlines.Add(inline); } - if (_currentToken.Kind == TokenKind.NewLine) - { - MoveOnNextToken(); - SkipEmptyLines(); - } + if (_currentToken.Kind != TokenKind.NewLine) return paragraph; + MoveOnNextToken(); + SkipEmptyLines(); return paragraph; } - - - private bool IsEndOfLine() { return _currentToken.Kind == TokenKind.NewLine || _currentToken.Kind == TokenKind.Eof; @@ -93,7 +96,7 @@ private HeadingNode ParseHeading() while (!IsEndOfLine()) { - var inline = ParseInline(); + var inline = _inlineParser.ParseInline(); if (inline != null) heading.Inlines.Add(inline); } @@ -110,272 +113,6 @@ private void SkipSpace() MoveOnNextToken(); } - private InlineNode ParseInline() - { - if (_currentToken.Kind == TokenKind.Eof) - return null; - - return _currentToken.Kind switch - { - TokenKind.Text => ParseText(), - TokenKind.Underscore => ParseEmphasis(), - TokenKind.DoubleUnderscore => ParseStrong(), - TokenKind.Space => ParseText(), - _ => ParseText() - }; - } - - private TextNode ParseText() - { - var node = new TextNode(_currentToken.Slice.ToString()); - MoveOnNextToken(); - return node; - } - - private InlineNode ParseEmphasis() - { - var startIndex = _currentIndex - 1; - MoveOnNextToken(); - - if (IsInvalidEmphasisStart()) - return CreateTextNode("_"); - - var emphasis = new EmphasisNode(); - return ParseEmphasisContent(emphasis, startIndex); - } - - private bool IsInvalidEmphasisStart() => - _currentToken.Kind is TokenKind.Space or TokenKind.NewLine or - TokenKind.Eof; - - private InlineNode ParseEmphasisContent(EmphasisNode emphasis, int - startIndex) - { - while (!IsEndOfContent()) - { - if (_currentToken.Kind == TokenKind.Underscore) - { - var closeResult = TryCloseEmphasis(emphasis, startIndex); - if (closeResult.ShouldReturn) - return closeResult.Node; - } - else if (_currentToken.Kind == TokenKind.DoubleUnderscore) - { - emphasis.Inlines.Add(CreateTextNode("__")); - MoveOnNextToken(); - } - else - { - var inline = ParseInline(); - if (inline != null) - emphasis.Inlines.Add(inline); - } - } - - return ConvertToTextNode(emphasis, "_"); - } - - - private InlineNode ParseStrong() - { - var startIndex = _currentIndex - 1; - MoveOnNextToken(); - - if (IsInvalidStrongStart()) - return ConvertToTextNode(new StrongNode(), "__"); - - var strong = new StrongNode(); - return ParseStrongContent(strong, startIndex); - } - - private bool IsInvalidStrongStart() => - _currentToken.Kind is TokenKind.Space or TokenKind.NewLine or - TokenKind.Eof; - - private TextNode CreateTextNode(string text) => new(text); - private InlineNode ParseStrongContent(StrongNode strong, int startIndex) - { - while (!IsEndOfContent()) - { - if (_currentToken.Kind == TokenKind.DoubleUnderscore) - { - var closeResult = TryCloseStrong(strong, startIndex); - if (closeResult.ShouldReturn) - return closeResult.Node; - } - else if (_currentToken.Kind == TokenKind.Underscore) - { - var emphasis = ParseEmphasis(); - if (emphasis != null) - strong.Inlines.Add(emphasis); - } - else - { - var inline = ParseInline(); - if (inline != null) - strong.Inlines.Add(inline); - } - } - - return ConvertToTextNode(strong, "__"); - } - - private bool IsEndOfContent() => - _currentToken.Kind == TokenKind.NewLine || _currentToken.Kind == - TokenKind.Eof; - - private CloseResult TryCloseStrong(StrongNode strong, int startIndex) - { - if (IsValidStrongClose(startIndex)) - { - MoveOnNextToken(); - return new CloseResult(true, strong); - } - - strong.Inlines.Add(CreateTextNode("__")); - MoveOnNextToken(); - return new CloseResult(false, null); - } - - private bool IsValidEmphasisClose(int startIndex) - { - var closeIndex = _currentIndex - 1; - - if (!HasValidOpeningBoundary(startIndex) || ! - HasValidClosingBoundary(closeIndex)) - return false; - if (startIndex + 1 == closeIndex) - return false; - if (IsInDigitContext(startIndex) || IsInDigitContext(closeIndex)) - return false; - if (HasIntersectingDoubleInsideEmphasis(startIndex, closeIndex)) - return false; - if (IsInsideWord(startIndex) && IsInsideWord(closeIndex) && - ContainsWhitespaceBetween(startIndex, closeIndex)) - return false; - - return true; - } - - - private bool IsInDigitContext(int index) => - HasDigitBefore(index) || HasDigitAfter(index); - - - private bool HasDigitBefore(int index) - { - if (index == 0) - return false; - - var prev = _allTokens[index - 1]; - return prev.Kind == TokenKind.Text && - prev.Slice.Length > 0 && - char.IsDigit(prev.Slice.Span[^1]); - } - - private bool HasDigitAfter(int index) - { - if (index + 1 >= _allTokens.Count) - return false; - - var next = _allTokens[index + 1]; - return next.Kind == TokenKind.Text && - next.Slice.Length > 0 && - char.IsDigit(next.Slice.Span[0]); - } - - private bool HasValidOpeningBoundary(int startIndex) - { - if (startIndex + 1 >= _allTokens.Count) - return true; - - var next = _allTokens[startIndex + 1]; - return next.Kind is not TokenKind.Space and not TokenKind.NewLine; - } - - private bool HasValidClosingBoundary(int closeIndex) - { - if (closeIndex - 1 < 0) - return true; - - var prev = _allTokens[closeIndex - 1]; - if (prev.Kind != TokenKind.Space) - return true; - - if (closeIndex + 1 >= _allTokens.Count) - return true; - - var next = _allTokens[closeIndex + 1]; - return next.Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; - } - - private bool IsValidStrongClose(int startIndex) - { - var closeIndex = _currentIndex - 1; - - if (!HasValidOpeningBoundary(startIndex) || ! - HasValidClosingBoundary(closeIndex)) - return false; - if (startIndex + 1 == closeIndex) - return false; - if (IsInDigitContext(startIndex) || IsInDigitContext(closeIndex)) - return false; - if (HasIntersectingDoubleUnderscore(startIndex, closeIndex)) - return false; - if (HasIntersectingSingleInsideStrong(startIndex, closeIndex)) - return false; - if (IsInsideWord(startIndex) && IsInsideWord(closeIndex) && - ContainsWhitespaceBetween(startIndex, closeIndex)) - return false; - - return true; - } - - private bool HasIntersectingDoubleUnderscore(int startIndex, int - closeIndex) - { - for (var i = startIndex + 1; i < closeIndex; i++) - if (_allTokens[i].Kind == TokenKind.DoubleUnderscore) - return true; - return false; - } - - private InlineNode ConvertToTextNode(InlineNode node, string prefix) - { - var textContent = new StringBuilder(); - textContent.Append(prefix); - textContent.Append(ExtractTextFromNode(node)); - return new TextNode(textContent.ToString()); - } - - private string ExtractTextFromNode(InlineNode node) - { - var result = new StringBuilder(); - - switch (node) - { - case EmphasisNode emphasis: - foreach (var inline in emphasis.Inlines) - { - result.Append(ExtractTextFromNode(inline)); - } - - break; - case StrongNode strong: - foreach (var inline in strong.Inlines) - { - result.Append(ExtractTextFromNode(inline)); - } - - break; - case TextNode text: - result.Append(text.Text); - break; - } - - return result.ToString(); - } - private bool IsAtStartOfLine() => _currentIndex <= 1 || _allTokens[_currentIndex - 2].Kind == TokenKind.NewLine; @@ -395,97 +132,4 @@ private Token MoveOnNextToken() return _currentToken; } - - private class CloseResult - { - public bool ShouldReturn { get; } - public InlineNode Node { get; } - - public CloseResult(bool shouldReturn, InlineNode node) - { - ShouldReturn = shouldReturn; - Node = node; - } - } - - private bool IsInsideWord(int index) => - HasLetterOrDigitBefore(index) && HasLetterOrDigitAfter(index); - - - private bool HasLetterOrDigitBefore(int index) - { - if (index == 0) - return false; - - var prev = _allTokens[index - 1]; - return prev.Kind == TokenKind.Text && - prev.Slice.Length > 0 && - char.IsLetterOrDigit(prev.Slice.Span[^1]); - } - - private bool HasLetterOrDigitAfter(int index) - { - if (index + 1 >= _allTokens.Count) - return false; - - var next = _allTokens[index + 1]; - return next.Kind == TokenKind.Text && - next.Slice.Length > 0 && - char.IsLetterOrDigit(next.Slice.Span[0]); - } - - - private bool ContainsWhitespaceBetween(int startIndex, int closeIndex) - { - for (var i = startIndex + 1; i < closeIndex; i++) - if (_allTokens[i].Kind == TokenKind.Space) - return true; - return false; - } - - private CloseResult TryCloseEmphasis(EmphasisNode emphasis, int - startIndex) - { - if (IsValidEmphasisClose(startIndex)) - { - MoveOnNextToken(); - return new CloseResult(true, emphasis); - } - - emphasis.Inlines.Add(CreateTextNode("_")); - MoveOnNextToken(); - return new CloseResult(false, null); - } - - private bool HasIntersectingDoubleInsideEmphasis(int startIndex, int - closeIndex) - { - var pending = false; - - for (var i = startIndex + 1; i < closeIndex; i++) - { - if (_allTokens[i].Kind != TokenKind.DoubleUnderscore) - continue; - - pending = !pending; - } - - return pending; - } - - private bool HasIntersectingSingleInsideStrong(int startIndex, int closeIndex) - { - var pending = false; - - for (var i = startIndex + 1; i < closeIndex; i++) - { - if (_allTokens[i].Kind != TokenKind.Underscore) - continue; - - pending = !pending; - } - - return pending; - } - } diff --git a/cs/Markdown.Core/Rendering/Renderer.cs b/cs/Markdown.Core/Rendering/Renderer.cs index e20551c51..6d15eebc1 100644 --- a/cs/Markdown.Core/Rendering/Renderer.cs +++ b/cs/Markdown.Core/Rendering/Renderer.cs @@ -21,24 +21,23 @@ public string Render(DocumentNode document) break; } } - return result.ToString(); } - private string RenderHeading(HeadingNode heading) + private static string RenderHeading(HeadingNode heading) { var content = RenderInlines(heading.Inlines); return $"{content}"; } - private string RenderParagraph(ParagraphNode paragraph) + private static string RenderParagraph(ParagraphNode paragraph) { var content = RenderInlines(paragraph.Inlines); return $"

{content}

"; } - private string RenderInlines(IList inlines) + private static string RenderInlines(IList inlines) { var builder = new StringBuilder(); @@ -47,7 +46,7 @@ private string RenderInlines(IList inlines) switch (inline) { case TextNode text: - builder.Append(EscapeHtml(text.Text)); + builder.Append(Escape(text.Text)); break; case EmphasisNode emphasis: builder.Append(""); @@ -59,13 +58,19 @@ private string RenderInlines(IList inlines) builder.Append(RenderInlines(strong.Inlines)); builder.Append(""); break; + case LinkNode link: + builder.Append(""); + builder.Append(RenderInlines(link.Inlines)); + builder.Append(""); + break; } } - return builder.ToString(); } - private string EscapeHtml(string text) => + private static string Escape(string text) => text.Replace("&", "&") .Replace("<", "<") .Replace(">", ">") diff --git a/cs/Markdown.Tests/LinkTests.cs b/cs/Markdown.Tests/LinkTests.cs new file mode 100644 index 000000000..02726e544 --- /dev/null +++ b/cs/Markdown.Tests/LinkTests.cs @@ -0,0 +1,42 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class LinkTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("[ссылка](https://example.com)", "

ссылка

", + TestName = "Простая ссылка")] + [TestCase("Перед [ссылка](url) после", "

Перед ссылка после

", + TestName = "Ссылка в середине текста")] + [TestCase("[незакрытая ссылка(url)", "

[незакрытая ссылка(url)

", + TestName = "Нет закрывающей скобки – остаётся текст")] + [TestCase("[:текст](url)", "

:текст

", + TestName = "Допустимые символы в тексте ссылки")] + [TestCase("[ссылка](url с пробелом)", "

ссылка

", + TestName = "URL допускает пробелы")] + [TestCase("[ссылка]url)", "

[ссылка]url)

", + TestName = "Нет круглых скобок – остаётся текст")] + [TestCase("\\[ссылка](url)", "

[ссылка](url)

", + TestName = "Экранированная квадратная скобка не образует ссылку")] + public void Render_ShouldHandleLinks(string markdown, string expectedHtml) + { + var html = _markdown.Render(markdown); + html.Should().Be(expectedHtml); + } +} diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index 026bb02ea..6fe995fe0 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -4,20 +4,13 @@ namespace Markdown; -/// -/// Принимает текст в упрощённой разметке и возвращает HTML -/// public class Md(ILexer lexer, IParser parser, IRenderer renderer) { - private readonly ILexer _lexer = lexer; - private readonly IParser _parser = parser; - private readonly IRenderer _renderer = renderer; - public string Render(string text) { - var tokens = _lexer.Tokenize(text.AsMemory()); - var document = _parser.Parse(tokens); - return _renderer.Render(document); + var tokens = lexer.Tokenize(text.AsMemory()); + var document = parser.Parse(tokens); + return renderer.Render(document); } From 73ed0d4d297e0b315b54ea1e37df676bacd427ad Mon Sep 17 00:00:00 2001 From: Krotkaya Date: Mon, 24 Nov 2025 12:16:55 +0500 Subject: [PATCH 4/6] =?UTF-8?q?=D0=98=D1=81=D0=BF=D1=80=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D1=8B=20=D0=B1=D0=B0=D0=B3=D0=B8,=20=D0=BF=D0=BE?= =?UTF-8?q?=D0=B2=D1=8B=D1=88=D0=B5=D0=BD=D0=B0=20=D1=87=D0=B8=D1=82=D0=B0?= =?UTF-8?q?=D0=B5=D0=BC=D0=BE=D1=81=D1=82=D1=8C=20=D0=BA=D0=BE=D0=B4=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown.Core/Lexing/Lexer.cs | 31 ++-- cs/Markdown.Core/Parsing/InlineParser.cs | 161 ++++++++++---------- cs/Markdown.Core/Parsing/InlineValidator.cs | 8 +- cs/Markdown.Core/Parsing/Parser.cs | 22 +-- cs/Markdown.Tests/EmphasisTests.cs | 5 +- cs/Markdown.Tests/HeadingTests.cs | 2 +- cs/Markdown.Tests/PerfTests.cs | 56 ------- cs/Markdown.Tests/PerfomanceTests.cs | 98 ++++++++++++ cs/Markdown/Md.cs | 3 +- 9 files changed, 207 insertions(+), 179 deletions(-) delete mode 100644 cs/Markdown.Tests/PerfTests.cs create mode 100644 cs/Markdown.Tests/PerfomanceTests.cs diff --git a/cs/Markdown.Core/Lexing/Lexer.cs b/cs/Markdown.Core/Lexing/Lexer.cs index acc23906c..db70bdba4 100644 --- a/cs/Markdown.Core/Lexing/Lexer.cs +++ b/cs/Markdown.Core/Lexing/Lexer.cs @@ -22,22 +22,26 @@ public IEnumerable Tokenize(ReadOnlyMemory source) switch (symbol) { - case '#' when IsAtLineStart(tokens) && i + 1 < source.Length - && tokensSpan[i + 1] == ' ': + case '#' when IsAtLineStart(tokens) && i + 1 < length && tokensSpan[i + 1] == ' ': tokens.Add(new Token(TokenKind.Hash, source.Slice(i, 1), i)); i += 1; continue; - case '\\' when i + 1 < source.Length: - var next = tokensSpan[i + 1]; - if (next == '_' && i + 2 < source.Length && tokensSpan[i + 2] == '_') + case '#': + tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), i)); + i += 1; + continue; + + case '\\' when i + 1 < length: + var nextToken = tokensSpan[i + 1]; + if (nextToken == '_' && i + 2 < length && tokensSpan[i + 2] == '_') { tokens.Add(new Token(TokenKind.Text, source.Slice(i + 1, 2), i)); i += 3; continue; } - if (IsSpecialCharacter(next)) + if (IsSpecialCharacter(nextToken)) { tokens.Add(new Token(TokenKind.Text, source.Slice(i + 1, 1), i)); i += 2; @@ -53,12 +57,12 @@ public IEnumerable Tokenize(ReadOnlyMemory source) i += 1; continue; - case '_' when i + 1 < source.Length && tokensSpan[i + 1] == '_': + case '_' when i + 1 < length && tokensSpan[i + 1] == '_': tokens.Add(new Token(TokenKind.DoubleUnderscore, source.Slice(i, 2), i)); i += 2; continue; - case '_' when i + 1 <= source.Length: + case '_' when i + 1 <= length: tokens.Add(new Token(TokenKind.Underscore, source.Slice(i, 1), i)); i += 1; continue; @@ -73,7 +77,7 @@ public IEnumerable Tokenize(ReadOnlyMemory source) i += 1; continue; - case '\r' when i + 1 < source.Length && tokensSpan[i + 1] == '\n': + case '\r' when i + 1 < length && tokensSpan[i + 1] == '\n': tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, 2), i)); i += 2; continue; @@ -99,11 +103,11 @@ public IEnumerable Tokenize(ReadOnlyMemory source) continue; } - var startText = i; + var startText = i; while (i < length) { symbol = tokensSpan[i]; - if (symbol is '\\' or '#' or '_' or '*' or '-' or '+' or '[' or ']' or '(' or ')' or ' ' or '\n' or '\r') + if (IsSpecialCharacter(symbol) || symbol is ' ' or '\n' or '\r') break; i++; } @@ -118,9 +122,8 @@ public IEnumerable Tokenize(ReadOnlyMemory source) } private static bool IsSpecialCharacter(char c) => - c is '#' or '_' or '\\' or '*' or '-' or '+' or '[' or ']' or '(' or ')'; + c is '#' or '_' or '\\' or '[' or ']' or '(' or ')'; private static bool IsAtLineStart(List tokens) => tokens.Count == 0 || tokens[^1].Kind == TokenKind.NewLine; - -} \ No newline at end of file +} diff --git a/cs/Markdown.Core/Parsing/InlineParser.cs b/cs/Markdown.Core/Parsing/InlineParser.cs index 38ebe227c..297b5292a 100644 --- a/cs/Markdown.Core/Parsing/InlineParser.cs +++ b/cs/Markdown.Core/Parsing/InlineParser.cs @@ -4,7 +4,7 @@ namespace Markdown.Core.Parsing; -public class InlineParser( +internal class InlineParser( IReadOnlyList tokens, Func moveNext, Func currentToken, @@ -46,6 +46,58 @@ private InlineNode ParseEmphasis() var emphasis = new EmphasisNode(); return ParseEmphasisContent(emphasis, startIndex); } + + private InlineNode ParseStrong() + { + var startIndex = currentIndex() - 1; + moveNext(); + + if (IsInvalidStrongStart()) + return ConvertToTextNode(new StrongNode(), "__"); + + var strong = new StrongNode(); + return ParseStrongContent(strong, startIndex); + } + + private InlineNode ParseLink() + { + var linkTextNodes = new List(); + moveNext(); + + while (!isEndOfLine() && currentToken().Kind != TokenKind.RightBracket) + { + var inline = ParseInline(); + if (inline != null) + linkTextNodes.Add(inline); + } + + if (currentToken().Kind != TokenKind.RightBracket) + return RestoreAsText("[", linkTextNodes); + + moveNext(); + + if (currentToken().Kind != TokenKind.LeftParen) + return RestoreAsText("[", linkTextNodes, "]"); + + moveNext(); + + var hrefBuilder = new StringBuilder(); + while (currentToken().Kind != TokenKind.Eof && + currentToken().Kind != TokenKind.RightParen && + currentToken().Kind != TokenKind.NewLine) + { + hrefBuilder.Append(currentToken().Slice.ToString()); + moveNext(); + } + + if (currentToken().Kind != TokenKind.RightParen) + return RestoreAsText("[", linkTextNodes, "](" + hrefBuilder); + + moveNext(); + + var href = hrefBuilder.ToString(); + return new LinkNode(href, linkTextNodes); + } private bool IsInvalidEmphasisStart() => currentToken().Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; @@ -56,9 +108,8 @@ private InlineNode ParseEmphasisContent(EmphasisNode emphasis, int startIndex) { if (currentToken().Kind == TokenKind.Underscore) { - var closeResult = TryCloseEmphasis(emphasis, startIndex); - if (closeResult.ShouldReturn) - return closeResult.Node; + if (TryCloseEmphasis(emphasis, startIndex, out var node)) + return node!; } else if (currentToken().Kind == TokenKind.DoubleUnderscore) { @@ -75,19 +126,7 @@ private InlineNode ParseEmphasisContent(EmphasisNode emphasis, int startIndex) return ConvertToTextNode(emphasis, "_"); } - private InlineNode ParseStrong() - { - var startIndex = currentIndex() - 1; - moveNext(); - - if (IsInvalidStrongStart()) - return ConvertToTextNode(new StrongNode(), "__"); - - var strong = new StrongNode(); - return ParseStrongContent(strong, startIndex); - } - - private bool IsInvalidStrongStart() => + private bool IsInvalidStrongStart() => currentToken().Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; private InlineNode ParseStrongContent(StrongNode strong, int startIndex) @@ -96,9 +135,8 @@ private InlineNode ParseStrongContent(StrongNode strong, int startIndex) { if (currentToken().Kind == TokenKind.DoubleUnderscore) { - var closeResult = TryCloseStrong(strong, startIndex); - if (closeResult.ShouldReturn) - return closeResult.Node; + if (TryCloseStrong(strong, startIndex, out var node)) + return node!; } else if (currentToken().Kind == TokenKind.Underscore) { @@ -115,77 +153,42 @@ private InlineNode ParseStrongContent(StrongNode strong, int startIndex) return ConvertToTextNode(strong, "__"); } - private bool IsEndOfContent() => - currentToken().Kind is TokenKind.NewLine or TokenKind.Eof; - - private Result TryCloseStrong(StrongNode strong, int startIndex) - { - var closeIndex = currentIndex() - 1; - if (validator.IsValidStrongClose(tokens, startIndex, closeIndex)) - { - moveNext(); - return new Result(true, strong); - } - - strong.Inlines.Add(CreateTextNode("__")); - moveNext(); - return new Result(false, null); - } - - private Result TryCloseEmphasis(EmphasisNode emphasis, int startIndex) + private bool IsEndOfContent() => currentToken().Kind is TokenKind.NewLine or TokenKind.Eof; + + private bool TryCloseEmphasis(EmphasisNode emphasis, int startIndex, out InlineNode? node) { var closeIndex = currentIndex() - 1; if (validator.IsValidEmphasisClose(tokens, startIndex, closeIndex)) { moveNext(); - return new Result(true, emphasis); + node = emphasis; + return true; } emphasis.Inlines.Add(CreateTextNode("_")); moveNext(); - return new Result(false, null); + node = null; + return false; } - private InlineNode ParseLink() + private bool TryCloseStrong(StrongNode strong, int startIndex, out InlineNode? node) { - var linkTextNodes = new List(); - moveNext(); - - while (!isEndOfLine() && currentToken().Kind != TokenKind.RightBracket) - { - var inline = ParseInline(); - if (inline != null) - linkTextNodes.Add(inline); - } - - if (currentToken().Kind != TokenKind.RightBracket) - return RestoreAsText("[", linkTextNodes); - - moveNext(); - - if (currentToken().Kind != TokenKind.LeftParen) - return RestoreAsText("[", linkTextNodes, "]"); - - moveNext(); - - var hrefBuilder = new StringBuilder(); - while (currentToken().Kind != TokenKind.Eof && - currentToken().Kind != TokenKind.RightParen && - currentToken().Kind != TokenKind.NewLine) + var closeIndex = currentIndex() - 1; + if (validator.IsValidStrongClose(tokens, startIndex, closeIndex)) { - hrefBuilder.Append(currentToken().Slice.ToString()); moveNext(); + node = strong; + return true; } - if (currentToken().Kind != TokenKind.RightParen) - return RestoreAsText("[", linkTextNodes, "](" + hrefBuilder); - - moveNext(); - - var href = hrefBuilder.ToString(); - return new LinkNode(href, linkTextNodes); + strong.Inlines.Add(CreateTextNode("__")); + moveNext(); + node = null; + return false; } + + private static TextNode CreateTextNode(string text) => new(text); - private InlineNode RestoreAsText(string prefix, IList nodes, string suffix = "") + private static TextNode RestoreAsText(string prefix, IList nodes, string suffix = "") { var builder = new StringBuilder(prefix); foreach (var node in nodes) @@ -194,7 +197,7 @@ private InlineNode RestoreAsText(string prefix, IList nodes, string return new TextNode(builder.ToString()); } - private InlineNode ConvertToTextNode(InlineNode node, string prefix) + private static TextNode ConvertToTextNode(InlineNode node, string prefix) { var textContent = new StringBuilder(); textContent.Append(prefix); @@ -226,12 +229,4 @@ private static string ExtractTextFromNode(InlineNode node) } return result.ToString(); } - - private static TextNode CreateTextNode(string text) => new(text); - - private class Result(bool shouldReturn, InlineNode node) - { - public bool ShouldReturn { get; } = shouldReturn; - public InlineNode Node { get; } = node; - } } diff --git a/cs/Markdown.Core/Parsing/InlineValidator.cs b/cs/Markdown.Core/Parsing/InlineValidator.cs index ba3a72125..fd63da613 100644 --- a/cs/Markdown.Core/Parsing/InlineValidator.cs +++ b/cs/Markdown.Core/Parsing/InlineValidator.cs @@ -2,7 +2,7 @@ namespace Markdown.Core.Parsing; -public class InlineValidator +internal class InlineValidator { public bool IsValidEmphasisClose(IReadOnlyList tokens, int startIndex, int closeIndex) { @@ -42,7 +42,7 @@ public bool IsValidStrongClose(IReadOnlyList tokens, int startIndex, int return true; } - private bool HasValidOpeningBoundary(IReadOnlyList tokens, int startIndex) + private static bool HasValidOpeningBoundary(IReadOnlyList tokens, int startIndex) { if (startIndex + 1 >= tokens.Count) return true; @@ -51,7 +51,7 @@ private bool HasValidOpeningBoundary(IReadOnlyList tokens, int startIndex return next.Kind is not TokenKind.Space and not TokenKind.NewLine; } - private bool HasValidClosingBoundary(IReadOnlyList tokens, int closeIndex) + private static bool HasValidClosingBoundary(IReadOnlyList tokens, int closeIndex) { if (closeIndex - 1 < 0) return true; @@ -67,7 +67,7 @@ private bool HasValidClosingBoundary(IReadOnlyList tokens, int closeIndex return next.Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; } - private bool IsInDigitContext(IReadOnlyList tokens, int index) => + private static bool IsInDigitContext(IReadOnlyList tokens, int index) => HasDigitBefore(tokens, index) || HasDigitAfter(tokens, index); private static bool HasDigitBefore(IReadOnlyList tokens, int index) diff --git a/cs/Markdown.Core/Parsing/Parser.cs b/cs/Markdown.Core/Parsing/Parser.cs index 5659ae69f..3fbd6fdc5 100644 --- a/cs/Markdown.Core/Parsing/Parser.cs +++ b/cs/Markdown.Core/Parsing/Parser.cs @@ -60,10 +60,11 @@ private void SkipEmptyLines() private bool IsEndOfFile() => _currentToken.Kind == TokenKind.Eof; - private bool IsHeadingStart() => - _currentToken.Kind == TokenKind.Hash && IsAtStartOfLine(); - - + private bool IsHeadingStart() => _currentToken.Kind == TokenKind.Hash && IsAtStartOfLine(); + + private bool IsAtStartOfLine() => _currentIndex <= 1 || + _allTokens[_currentIndex - 2].Kind == TokenKind.NewLine; + private ParagraphNode ParseParagraph() { var paragraph = new ParagraphNode(); @@ -82,10 +83,7 @@ private ParagraphNode ParseParagraph() return paragraph; } - private bool IsEndOfLine() - { - return _currentToken.Kind == TokenKind.NewLine || _currentToken.Kind == TokenKind.Eof; - } + private bool IsEndOfLine() => _currentToken.Kind is TokenKind.NewLine or TokenKind.Eof; private HeadingNode ParseHeading() { @@ -112,12 +110,7 @@ private void SkipSpace() if (_currentToken.Kind == TokenKind.Space) MoveOnNextToken(); } - - private bool IsAtStartOfLine() => - _currentIndex <= 1 || _allTokens[_currentIndex - 2].Kind == - TokenKind.NewLine; - - + private Token MoveOnNextToken() { if (!_tokenPointer.MoveNext()) @@ -129,7 +122,6 @@ private Token MoveOnNextToken() _currentToken = _tokenPointer.Current; _currentIndex++; } - return _currentToken; } } diff --git a/cs/Markdown.Tests/EmphasisTests.cs b/cs/Markdown.Tests/EmphasisTests.cs index 1f271d0b8..687d52d8e 100644 --- a/cs/Markdown.Tests/EmphasisTests.cs +++ b/cs/Markdown.Tests/EmphasisTests.cs @@ -6,9 +6,6 @@ namespace Markdown.Tests; -/// -/// Тесты курсивного шрифта -/// public class EmphasisTests { private Md _markdown; @@ -34,4 +31,4 @@ public void Test(string inputText, string expectedHtml) var html = _markdown.Render(inputText); html.Should().Be(expectedHtml); } -} \ No newline at end of file +} diff --git a/cs/Markdown.Tests/HeadingTests.cs b/cs/Markdown.Tests/HeadingTests.cs index 5949ee4fd..61371a7b9 100644 --- a/cs/Markdown.Tests/HeadingTests.cs +++ b/cs/Markdown.Tests/HeadingTests.cs @@ -24,7 +24,7 @@ public void Setup() [TestCase("# Заголовок", "

Заголовок

", TestName = "Простой заголовок")] - [TestCase("# Заголовок __с _разными_ символами__", "

Заголовок с разными символами

", + [TestCase("# Заголовок __с _разными_ символами__", "

Заголовок с разными символами

", TestName = "Заголовок с вложенными тегами")] [TestCase("Текст # не заголовок", "

Текст # не заголовок

", TestName = "Решетка в середине строки — не заголовок")] diff --git a/cs/Markdown.Tests/PerfTests.cs b/cs/Markdown.Tests/PerfTests.cs deleted file mode 100644 index 0e767b8cb..000000000 --- a/cs/Markdown.Tests/PerfTests.cs +++ /dev/null @@ -1,56 +0,0 @@ -using System.Text; -using FluentAssertions; -using Markdown.Core.Lexing; -using Markdown.Core.Parsing; -using Markdown.Core.Rendering; -using NUnit.Framework; -namespace Markdown.Tests; - -/// -/// Тесты, проверяющие производительность -/// -[TestFixture] -public class PerfTests -{ - private Md _markdown; - - [SetUp] - public void Setup() - { - var lexer = new Lexer(); - var parser = new Parser(); - var renderer = new Renderer(); - _markdown = new Md(lexer, parser, renderer); - } - - [Test] - [Timeout(2000)] - public void Render_ShouldHandleLongInputLinearly() - { - const int paragraphs = 5000; - - var inputBuilder = new StringBuilder(); - var expectedBuilder = new StringBuilder(); - - for (var i = 0; i < paragraphs; i++) - { - inputBuilder.Append($"__жирный{i}__ _курсив{i}_ текст{i}"); - if (i < paragraphs - 1) - inputBuilder.Append("\n\n"); - - expectedBuilder.Append("

жирный").Append(i) - .Append(" курсив").Append(i) - .Append(" текст").Append(i) - .Append("

"); - } - - var input = inputBuilder.ToString(); - var expected = expectedBuilder.ToString(); - - var html = _markdown.Render(input); - - html.Should().Be(expected); - html.Length.Should().Be(expected.Length); - } - -} \ No newline at end of file diff --git a/cs/Markdown.Tests/PerfomanceTests.cs b/cs/Markdown.Tests/PerfomanceTests.cs new file mode 100644 index 000000000..267216340 --- /dev/null +++ b/cs/Markdown.Tests/PerfomanceTests.cs @@ -0,0 +1,98 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class PerformanceTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [Test] + public void Render_ShouldHandleLongInputLinearly() + { + const int paragraphs = 5000; + + var inputBuilder = new StringBuilder(); + var expectedBuilder = new StringBuilder(); + + for (var i = 0; i < paragraphs; i++) + { + inputBuilder.Append($"__жирный__ {i} _курсив_ {i} текст"); + if (i < paragraphs - 1) + inputBuilder.Append("\n\n"); + + expectedBuilder.Append("

жирный ") + .Append(i) + .Append(" курсив ") + .Append(i) + .Append(" текст

"); + } + + var input = inputBuilder.ToString(); + var expected = expectedBuilder.ToString(); + + var html = _markdown.Render(input); + + html.Should().Be(expected); + } + + [Test] + public void Render_ShouldScaleApproximatelyLinearlyWithInputSize() + { + const int smallParagraphs = 500; + const int largeParagraphs = 5000; + + var smallInput = BuildRepeatedParagraphs(smallParagraphs, "__жирный__ _курсив_ текст"); + var largeInput = BuildRepeatedParagraphs(largeParagraphs, "__жирный__ _курсив_ текст"); + + var smallDuration = MeasureMedianRenderMilliseconds(smallInput); + var largeDuration = MeasureMedianRenderMilliseconds(largeInput); + + var baseline = Math.Max(1, smallDuration); + largeDuration.Should().BeLessThanOrEqualTo(baseline * 25); + } + + private long MeasureMedianRenderMilliseconds(string input) + { + const int runs = 3; + var results = new long[runs]; + for (var i = 0; i < runs; i++) + { + var sw = Stopwatch.StartNew(); + _markdown.Render(input); + sw.Stop(); + results[i] = sw.ElapsedMilliseconds; + } + Array.Sort(results); + return results[runs / 2]; + } + + private static string BuildRepeatedParagraphs(int count, string paragraph) + { + var builder = new StringBuilder(); + + for (var i = 0; i < count; i++) + { + builder.Append(paragraph); + if (i < count - 1) + builder.Append("\n\n"); + } + return builder.ToString(); + } + +} diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index 6fe995fe0..13003f4ba 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -8,10 +8,9 @@ public class Md(ILexer lexer, IParser parser, IRenderer renderer) { public string Render(string text) { - var tokens = lexer.Tokenize(text.AsMemory()); + var tokens = lexer.Tokenize(text.AsMemory()); var document = parser.Parse(tokens); return renderer.Render(document); - } } \ No newline at end of file From d610154456be4dd8b7f5b0ff4d9291f3f4ecafc7 Mon Sep 17 00:00:00 2001 From: Krotkaya Date: Mon, 24 Nov 2025 12:19:38 +0500 Subject: [PATCH 5/6] =?UTF-8?q?=D0=A3=D0=B1=D1=80=D0=B0=D0=BD=20=D0=BD?= =?UTF-8?q?=D0=B5=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D1=8B?= =?UTF-8?q?=D0=B9=20=D1=82=D0=B5=D1=81=D1=82=20=D0=B8=20=D0=B4=D0=BE=D0=B1?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D1=8B=20=D0=BE=D1=82=D1=81=D1=82?= =?UTF-8?q?=D1=83=D0=BF=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown.Tests/EscapingTests.cs | 4 +--- cs/Markdown.Tests/LinkTests.cs | 4 ++-- cs/Markdown.Tests/StrongTests.cs | 4 ++-- cs/Markdown.Tests/TextTests.cs | 18 ++++++++++++------ 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cs/Markdown.Tests/EscapingTests.cs b/cs/Markdown.Tests/EscapingTests.cs index 91d13ea51..6d0eb69b1 100644 --- a/cs/Markdown.Tests/EscapingTests.cs +++ b/cs/Markdown.Tests/EscapingTests.cs @@ -30,9 +30,7 @@ public void Setup() @"

\вот это будет выделено тегом

", TestName = "Экранирование символа экранирования")] [TestCase("__Жирное с \\__ внутри__", "

Жирное с __ внутри

", - TestName = "Экранирование двойного подчеркивания в полужирном")] - // [TestCase("сло\\_во _курсив_", "

слово курсив

", - // TestName = "Двойное экранирование внутри слова")] + TestName = "Экранирование двойного подчеркивания в полужирном")] [TestCase("_Привет\\_", "

_Привет_

", TestName = "Экранирование подчёркивания внутри курсива оставляет текст")] diff --git a/cs/Markdown.Tests/LinkTests.cs b/cs/Markdown.Tests/LinkTests.cs index 02726e544..7b7acd264 100644 --- a/cs/Markdown.Tests/LinkTests.cs +++ b/cs/Markdown.Tests/LinkTests.cs @@ -21,9 +21,9 @@ public void Setup() } [TestCase("[ссылка](https://example.com)", "

ссылка

", - TestName = "Простая ссылка")] + TestName = "Простая ссылка")] [TestCase("Перед [ссылка](url) после", "

Перед ссылка после

", - TestName = "Ссылка в середине текста")] + TestName = "Ссылка в середине текста")] [TestCase("[незакрытая ссылка(url)", "

[незакрытая ссылка(url)

", TestName = "Нет закрывающей скобки – остаётся текст")] [TestCase("[:текст](url)", "

:текст

", diff --git a/cs/Markdown.Tests/StrongTests.cs b/cs/Markdown.Tests/StrongTests.cs index e63ec9e95..5b66c4ed0 100644 --- a/cs/Markdown.Tests/StrongTests.cs +++ b/cs/Markdown.Tests/StrongTests.cs @@ -29,9 +29,9 @@ public void Setup() "

Выделенный двумя символами текст должен становиться полужирным

", TestName = "Полужирный в предложении")] [TestCase("сло__во__ внутри слова", "

слово внутри слова

", - TestName = "Двойное выделение внутри слова")] + TestName = "Двойное выделение внутри слова")] [TestCase("Эти __ подчерки__ не работают", "

Эти __ подчерки__ не работают

", - TestName = "Не начинается, если после __ пробел")] + TestName = "Не начинается, если после __ пробел")] [TestCase("Эти __подчерки __ работают", "

Эти подчерки работают

", TestName = "Двойное выделение допускает пробел внутри")] public void Test(string inputText, string expectedHtml) diff --git a/cs/Markdown.Tests/TextTests.cs b/cs/Markdown.Tests/TextTests.cs index 55702d07e..f3fa585f0 100644 --- a/cs/Markdown.Tests/TextTests.cs +++ b/cs/Markdown.Tests/TextTests.cs @@ -20,12 +20,18 @@ public void Setup() _markdown = new Md(lexer, parser, renderer); } - [TestCase("Абракадабра", "

Абракадабра

",TestName = "

Простой текст без выделений

")] - [TestCase("Привет, как дела?", "

Привет, как дела?

",TestName = "

Простой текст без выделений

")] - [TestCase("Email: test@example.com", "

Email: test@example.com

",TestName = "

Текст с различными символами email

")] - [TestCase("Ссылка: https://example.com", "

Ссылка: https://example.com

", TestName = "Текст с URL")] - [TestCase("Первый параграф\nВторой параграф", "

Первый параграф

Второй параграф

", TestName = "Два параграфа")] - [TestCase("Первый\n\nТретий", "

Первый

Третий

", TestName = "Параграфы с пустой строкой")] + [TestCase("Абракадабра", "

Абракадабра

", + TestName = "

Простой текст без выделений

")] + [TestCase("Привет, как дела?", "

Привет, как дела?

", + TestName = "

Простой текст без выделений

")] + [TestCase("Email: test@example.com", "

Email: test@example.com

", + TestName = "

Текст с различными символами email

")] + [TestCase("Ссылка: https://example.com", "

Ссылка: https://example.com

", + TestName = "Текст с URL")] + [TestCase("Первый параграф\nВторой параграф", "

Первый параграф

Второй параграф

", + TestName = "Два параграфа")] + [TestCase("Первый\n\nТретий", "

Первый

Третий

", + TestName = "Параграфы с пустой строкой")] public void Test(string inputText, string expectedText) { From f06893ede9e0db8535563b790c3944f7c8b41b2c Mon Sep 17 00:00:00 2001 From: Krotkaya Date: Wed, 26 Nov 2025 17:35:57 +0500 Subject: [PATCH 6/6] =?UTF-8?q?=D0=92=D0=BD=D0=B5=D1=81=D0=BB=D0=B0=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B0=D0=B2=D0=BA=D0=B8=20=D1=81=D0=BE=D0=B3=D0=BB?= =?UTF-8?q?=D0=B0=D1=81=D0=BD=D0=BE=20=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D1=82=D0=B0=D1=80=D0=B8=D1=8F=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Lexing/Handlers/BracketTokenHandler.cs | 16 +++ .../Lexing/Handlers/EscapeTokenHandler.cs | 35 +++++ .../Lexing/Handlers/HashTokenHandler.cs | 20 +++ .../Lexing/Handlers/NewLineTokenHandler.cs | 28 ++++ .../Handlers/ParenthesisTokenHandler.cs | 16 +++ .../Lexing/Handlers/SpaceTokenHandler.cs | 13 ++ .../Lexing/Handlers/TextTokenHandler.cs | 33 +++++ .../Lexing/Handlers/UnderscoreTokenHandler.cs | 20 +++ cs/Markdown.Core/Lexing/ITokenHandler.cs | 6 + cs/Markdown.Core/Lexing/Lexer.cs | 136 ++++-------------- cs/Markdown.Core/Lexing/Token.cs | 3 +- cs/Markdown.Core/Lexing/TokenKind.cs | 35 +++-- .../Parsing/Blocks/HeadingBlockParser.cs | 28 ++++ .../Parsing/Blocks/IBlockParser.cs | 9 ++ .../Parsing/Blocks/ParagraphBlockParser.cs | 27 ++++ cs/Markdown.Core/Parsing/InlineParser.cs | 2 +- cs/Markdown.Core/Parsing/InlineValidator.cs | 2 +- cs/Markdown.Core/Parsing/Parser.cs | 123 ++++------------ cs/Markdown.Core/Parsing/ParserState.cs | 55 +++++++ 19 files changed, 386 insertions(+), 221 deletions(-) create mode 100644 cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs create mode 100644 cs/Markdown.Core/Lexing/ITokenHandler.cs create mode 100644 cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs create mode 100644 cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs create mode 100644 cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs create mode 100644 cs/Markdown.Core/Parsing/ParserState.cs diff --git a/cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs new file mode 100644 index 000000000..b1b840a0e --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs @@ -0,0 +1,16 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class BracketTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is '[' or ']'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var kind = source.Span[index] == '[' + ? TokenKind.LeftBracket + : TokenKind.RightBracket; + tokens.Add(new Token(kind, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs new file mode 100644 index 000000000..854e7978b --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs @@ -0,0 +1,35 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class EscapeTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] == '\\'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + var length = source.Length; + + if (index + 1 < length) + { + var nextToken = span[index + 1]; + if (nextToken == '_' && index + 2 < length && span[index + 2] == '_') + { + tokens.Add(new Token(TokenKind.Text, source.Slice(index + 1, 2))); + return 3; + } + + if (IsSpecialCharacter(nextToken)) + { + tokens.Add(new Token(TokenKind.Text, source.Slice(index + 1, 1))); + return 2; + } + } + + tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1))); + return 1; + } + + private static bool IsSpecialCharacter(char c) => + c is '#' or '_' or '\\' or '[' or ']' or '(' or ')'; +} diff --git a/cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs new file mode 100644 index 000000000..a1d4c0c3a --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs @@ -0,0 +1,20 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class HashTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] == '#'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + if (IsAtLineStart(tokens) && index + 1 < source.Length && span[index + 1] == ' ') + tokens.Add(new Token(TokenKind.Hash, source.Slice(index, 1))); + else + tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1))); + return 1; + } + + private static bool IsAtLineStart(List tokens) => + tokens.Count == 0 || tokens[^1].Kind == TokenKind.NewLine; +} diff --git a/cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs new file mode 100644 index 000000000..8b432c6bc --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs @@ -0,0 +1,28 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class NewLineTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is '\n' or '\r'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + var length = source.Length; + + if (span[index] == '\r' && index + 1 < length && span[index + 1] == '\n') + { + tokens.Add(new Token(TokenKind.NewLine, source.Slice(index, 2))); + return 2; + } + + if (span[index] == '\n') + { + tokens.Add(new Token(TokenKind.NewLine, source.Slice(index, 1))); + return 1; + } + + tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs new file mode 100644 index 000000000..95c1f6106 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs @@ -0,0 +1,16 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class ParenthesisTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is '(' or ')'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var kind = source.Span[index] == '(' + ? TokenKind.LeftParen + : TokenKind.RightParen; + tokens.Add(new Token(kind, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs new file mode 100644 index 000000000..2691a5968 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs @@ -0,0 +1,13 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class SpaceTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is ' ' or '\t'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + tokens.Add(new Token(TokenKind.Space, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs new file mode 100644 index 000000000..a19b39e7d --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs @@ -0,0 +1,33 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class TextTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => true; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + var length = source.Length; + var startText = index; + + while (index < length) + { + var symbol = span[index]; + if (IsSpecialCharacter(symbol) || symbol is ' ' or '\t' or '\n' or '\r') + break; + index++; + } + + if (index > startText) + { + tokens.Add(new Token(TokenKind.Text, source.Slice(startText, index - startText))); + return index - startText; + } + + tokens.Add(new Token(TokenKind.Text, source.Slice(startText, 1))); + return 1; + } + + private static bool IsSpecialCharacter(char c) => + c is '#' or '_' or '\\' or '[' or ']' or '(' or ')'; +} diff --git a/cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs new file mode 100644 index 000000000..e51b4a495 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs @@ -0,0 +1,20 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class UnderscoreTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] == '_'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + if (index + 1 < source.Length && span[index + 1] == '_') + { + tokens.Add(new Token(TokenKind.DoubleUnderscore, source.Slice(index, 2))); + return 2; + } + + tokens.Add(new Token(TokenKind.Underscore, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/ITokenHandler.cs b/cs/Markdown.Core/Lexing/ITokenHandler.cs new file mode 100644 index 000000000..772341290 --- /dev/null +++ b/cs/Markdown.Core/Lexing/ITokenHandler.cs @@ -0,0 +1,6 @@ +namespace Markdown.Core.Lexing; +public interface ITokenHandler +{ + bool CanHandle(ReadOnlySpan source, int index); + int Handle(ReadOnlyMemory source, int index, List tokens); +} diff --git a/cs/Markdown.Core/Lexing/Lexer.cs b/cs/Markdown.Core/Lexing/Lexer.cs index db70bdba4..678a77d28 100644 --- a/cs/Markdown.Core/Lexing/Lexer.cs +++ b/cs/Markdown.Core/Lexing/Lexer.cs @@ -1,129 +1,45 @@ +using Markdown.Core.Lexing.Handlers; + namespace Markdown.Core.Lexing; -/// -/// Сканер: -/// - идёт по символам слева направо -/// - склеивает в один токен обычный текст -/// - выделяет специальные токены -/// - применяет базовые правила экранирования -/// public class Lexer : ILexer { + private readonly List _handlers = + [ + new HashTokenHandler(), + new EscapeTokenHandler(), + new UnderscoreTokenHandler(), + new SpaceTokenHandler(), + new NewLineTokenHandler(), + new BracketTokenHandler(), + new ParenthesisTokenHandler() + ]; + private readonly ITokenHandler _textHandler = new TextTokenHandler(); + public IEnumerable Tokenize(ReadOnlyMemory source) { - var tokensSpan = source.Span; var tokens = new List(); var i = 0; var length = source.Length; while (i < length) { - var symbol = tokensSpan[i]; - - switch (symbol) + var handled = false; + foreach (var consumed in from handler in _handlers + where handler + .CanHandle(source.Span, i) select handler + .Handle(source, i, tokens)) { - case '#' when IsAtLineStart(tokens) && i + 1 < length && tokensSpan[i + 1] == ' ': - tokens.Add(new Token(TokenKind.Hash, source.Slice(i, 1), i)); - i += 1; - continue; - - case '#': - tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), i)); - i += 1; - continue; - - case '\\' when i + 1 < length: - var nextToken = tokensSpan[i + 1]; - if (nextToken == '_' && i + 2 < length && tokensSpan[i + 2] == '_') - { - tokens.Add(new Token(TokenKind.Text, source.Slice(i + 1, 2), i)); - i += 3; - continue; - } - - if (IsSpecialCharacter(nextToken)) - { - tokens.Add(new Token(TokenKind.Text, source.Slice(i + 1, 1), i)); - i += 2; - continue; - } - - tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), i)); - i += 1; - continue; - - case '\\': - tokens.Add(new Token(TokenKind.Text, source.Slice(i, 1), i)); - i += 1; - continue; - - case '_' when i + 1 < length && tokensSpan[i + 1] == '_': - tokens.Add(new Token(TokenKind.DoubleUnderscore, source.Slice(i, 2), i)); - i += 2; - continue; - - case '_' when i + 1 <= length: - tokens.Add(new Token(TokenKind.Underscore, source.Slice(i, 1), i)); - i += 1; - continue; - - case ' ': - tokens.Add(new Token(TokenKind.Space, source.Slice(i, 1), i)); - i += 1; - continue; - - case '\n': - tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, 1), i)); - i += 1; - continue; - - case '\r' when i + 1 < length && tokensSpan[i + 1] == '\n': - tokens.Add(new Token(TokenKind.NewLine, source.Slice(i, 2), i)); - i += 2; - continue; - - case '[': - tokens.Add(new Token(TokenKind.LeftBracket, source.Slice(i, 1), i)); - i += 1; - continue; - - case ']': - tokens.Add(new Token(TokenKind.RightBracket, source.Slice(i, 1), i)); - i += 1; - continue; - - case '(': - tokens.Add(new Token(TokenKind.LeftParen, source.Slice(i, 1), i)); - i += 1; - continue; - - case ')': - tokens.Add(new Token(TokenKind.RightParen, source.Slice(i, 1), i)); - i += 1; - continue; + i += consumed; + handled = true; + break; } + if (handled) + continue; - var startText = i; - while (i < length) - { - symbol = tokensSpan[i]; - if (IsSpecialCharacter(symbol) || symbol is ' ' or '\n' or '\r') - break; - i++; - } - - if (i > startText) - { - tokens.Add(new Token(TokenKind.Text, source.Slice(startText, i - startText), startText)); - } + i += _textHandler.Handle(source, i, tokens); } - tokens.Add(new Token(TokenKind.Eof, source[..0], length)); + tokens.Add(new Token(TokenKind.Eof, source[..0])); return tokens; } - - private static bool IsSpecialCharacter(char c) => - c is '#' or '_' or '\\' or '[' or ']' or '(' or ')'; - - private static bool IsAtLineStart(List tokens) => - tokens.Count == 0 || tokens[^1].Kind == TokenKind.NewLine; } diff --git a/cs/Markdown.Core/Lexing/Token.cs b/cs/Markdown.Core/Lexing/Token.cs index 0b91d7071..2506f78b8 100644 --- a/cs/Markdown.Core/Lexing/Token.cs +++ b/cs/Markdown.Core/Lexing/Token.cs @@ -1,8 +1,7 @@ namespace Markdown.Core.Lexing; -public readonly struct Token(TokenKind kind, ReadOnlyMemory slice, int position) +public readonly struct Token(TokenKind kind, ReadOnlyMemory slice) { public TokenKind Kind { get; init; } = kind; public ReadOnlyMemory Slice { get; init; } = slice; - } \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/TokenKind.cs b/cs/Markdown.Core/Lexing/TokenKind.cs index 9c32bb791..1c11c1615 100644 --- a/cs/Markdown.Core/Lexing/TokenKind.cs +++ b/cs/Markdown.Core/Lexing/TokenKind.cs @@ -2,15 +2,26 @@ namespace Markdown.Core.Lexing; public enum TokenKind { - Text, // Обычный текст - Underscore, // Курсивный шрифт - DoubleUnderscore, // Полужирный шрифт - Hash, //Заголовок - Space, //Одиночный пробел - NewLine, //Перевод строки - Eof, // Конец входа - LeftBracket, // Квадратная скобка '[' открывает текст ссылки - RightBracket, // Квадратная скобка ']' закрывает текст ссылки - LeftParen, // Круглая скобка '(' открывает адрес ссылки - RightParen, // Круглая скобка ')' закрывает адрес ссылки -} \ No newline at end of file + /// Обычный текст + Text, + /// Курсивный шрифт + Underscore, + /// Полужирный шрифт + DoubleUnderscore, + /// Заголовок + Hash, + /// Одиночный пробел + Space, + /// Перевод строки + NewLine, + /// Конец входа + Eof, + /// Квадратная скобка '[' открывает текст ссылки + LeftBracket, + /// Квадратная скобка ']' закрывает текст ссылки + RightBracket, + /// Круглая скобка '(' открывает адрес ссылки + LeftParen, + /// Круглая скобка ')' закрывает адрес ссылки + RightParen, +} diff --git a/cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs b/cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs new file mode 100644 index 000000000..3094563f4 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs @@ -0,0 +1,28 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing.Blocks; +public class HeadingBlockParser : IBlockParser +{ + public bool CanParse(ParserState state) => + state.CurrentToken.Kind == TokenKind.Hash && state.IsAtLineStart(); + + public BlockNode? Parse(ParserState state, InlineParser inlineParser) + { + var heading = new HeadingNode(1); + + state.MoveNext(); + state.SkipSpace(); + + while (!state.IsEndOfLine()) + { + var inline = inlineParser.ParseInline(); + if (inline != null) + heading.Inlines.Add(inline); + } + + if (state.CurrentToken.Kind == TokenKind.NewLine) + state.MoveNext(); + return heading; + } +} diff --git a/cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs b/cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs new file mode 100644 index 000000000..1fa59f877 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs @@ -0,0 +1,9 @@ +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing.Blocks; + +public interface IBlockParser +{ + bool CanParse(ParserState state); + BlockNode? Parse(ParserState state, InlineParser inlineParser); +} diff --git a/cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs b/cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs new file mode 100644 index 000000000..9a0d95537 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs @@ -0,0 +1,27 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing.Blocks; + +public class ParagraphBlockParser : IBlockParser +{ + public bool CanParse(ParserState state) => + state.CurrentToken.Kind is not TokenKind.Eof; + + public BlockNode? Parse(ParserState state, InlineParser inlineParser) + { + var paragraph = new ParagraphNode(); + + while (!state.IsEndOfLine()) + { + var inline = inlineParser.ParseInline(); + if (inline != null) + paragraph.Inlines.Add(inline); + } + + if (state.CurrentToken.Kind != TokenKind.NewLine) return paragraph; + state.MoveNext(); + state.SkipEmptyLines(); + return paragraph; + } +} diff --git a/cs/Markdown.Core/Parsing/InlineParser.cs b/cs/Markdown.Core/Parsing/InlineParser.cs index 297b5292a..928343429 100644 --- a/cs/Markdown.Core/Parsing/InlineParser.cs +++ b/cs/Markdown.Core/Parsing/InlineParser.cs @@ -4,7 +4,7 @@ namespace Markdown.Core.Parsing; -internal class InlineParser( +public class InlineParser( IReadOnlyList tokens, Func moveNext, Func currentToken, diff --git a/cs/Markdown.Core/Parsing/InlineValidator.cs b/cs/Markdown.Core/Parsing/InlineValidator.cs index fd63da613..464fd66de 100644 --- a/cs/Markdown.Core/Parsing/InlineValidator.cs +++ b/cs/Markdown.Core/Parsing/InlineValidator.cs @@ -2,7 +2,7 @@ namespace Markdown.Core.Parsing; -internal class InlineValidator +public class InlineValidator { public bool IsValidEmphasisClose(IReadOnlyList tokens, int startIndex, int closeIndex) { diff --git a/cs/Markdown.Core/Parsing/Parser.cs b/cs/Markdown.Core/Parsing/Parser.cs index 3fbd6fdc5..8d3c6996c 100644 --- a/cs/Markdown.Core/Parsing/Parser.cs +++ b/cs/Markdown.Core/Parsing/Parser.cs @@ -1,127 +1,60 @@ using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Blocks; using Markdown.Core.Parsing.Nodes; namespace Markdown.Core.Parsing; public class Parser : IParser { - private IEnumerator _tokenPointer; - private Token _currentToken; private readonly List _allTokens = []; - private int _currentIndex; private readonly InlineValidator _inlineValidator = new(); private InlineParser _inlineParser; + private ParserState _state; + private readonly List _blockParsers; + + public Parser() + { + _blockParsers = + [ + new HeadingBlockParser(), + new ParagraphBlockParser() + ]; + } public DocumentNode Parse(IEnumerable tokens) { _allTokens.Clear(); _allTokens.AddRange(tokens); - _tokenPointer = _allTokens.GetEnumerator(); - _currentIndex = 0; + _state = new ParserState(_allTokens); _inlineParser = new InlineParser( _allTokens, - MoveOnNextToken, - () => _currentToken, - () => _currentIndex, - IsEndOfLine, + _state.MoveNext, + () => _state.CurrentToken, + () => _state.CurrentIndex, + _state.IsEndOfLine, _inlineValidator); - MoveOnNextToken(); + _state.Start(); var document = new DocumentNode(); - while (_currentToken.Kind != TokenKind.Eof) + while (_state.CurrentToken.Kind != TokenKind.Eof) { + _state.SkipEmptyLines(); + if (_state.CurrentToken.Kind == TokenKind.Eof) + break; + var block = ParseBlock(); - if (block != null) - document.Children.Add(block); + if (block != null) document.Children.Add(block); } return document; } private BlockNode? ParseBlock() { - SkipEmptyLines(); - - if (IsEndOfFile()) - return null; - - if (IsHeadingStart()) - return ParseHeading(); - - return ParseParagraph(); - } - - private void SkipEmptyLines() - { - while (_currentToken.Kind == TokenKind.NewLine) - MoveOnNextToken(); - } - - private bool IsEndOfFile() => _currentToken.Kind == TokenKind.Eof; - - private bool IsHeadingStart() => _currentToken.Kind == TokenKind.Hash && IsAtStartOfLine(); - - private bool IsAtStartOfLine() => _currentIndex <= 1 || - _allTokens[_currentIndex - 2].Kind == TokenKind.NewLine; - - private ParagraphNode ParseParagraph() - { - var paragraph = new ParagraphNode(); - - while (!IsEndOfLine()) - { - var inline = _inlineParser.ParseInline(); - if (inline != null) - paragraph.Inlines.Add(inline); - } - - if (_currentToken.Kind != TokenKind.NewLine) return paragraph; - MoveOnNextToken(); - SkipEmptyLines(); - - return paragraph; - } - - private bool IsEndOfLine() => _currentToken.Kind is TokenKind.NewLine or TokenKind.Eof; - - private HeadingNode ParseHeading() - { - var heading = new HeadingNode(1); - - MoveOnNextToken(); - SkipSpace(); - - while (!IsEndOfLine()) - { - var inline = _inlineParser.ParseInline(); - if (inline != null) - heading.Inlines.Add(inline); - } - - if (_currentToken.Kind == TokenKind.NewLine) - MoveOnNextToken(); - - return heading; - } - - private void SkipSpace() - { - if (_currentToken.Kind == TokenKind.Space) - MoveOnNextToken(); - } - - private Token MoveOnNextToken() - { - if (!_tokenPointer.MoveNext()) - { - _currentToken = new Token(TokenKind.Eof, ReadOnlyMemory.Empty, -1); - } - else - { - _currentToken = _tokenPointer.Current; - _currentIndex++; - } - return _currentToken; + return (from blockParser in _blockParsers where blockParser + .CanParse(_state) select blockParser + .Parse(_state, _inlineParser)) + .FirstOrDefault(); } } diff --git a/cs/Markdown.Core/Parsing/ParserState.cs b/cs/Markdown.Core/Parsing/ParserState.cs new file mode 100644 index 000000000..d762ef315 --- /dev/null +++ b/cs/Markdown.Core/Parsing/ParserState.cs @@ -0,0 +1,55 @@ +using Markdown.Core.Lexing; + +namespace Markdown.Core.Parsing; + +public class ParserState +{ + private readonly IReadOnlyList _tokens; + private IEnumerator _tokenPointer; + + public ParserState(IReadOnlyList tokens) + { + _tokens = tokens; + _tokenPointer = _tokens.GetEnumerator(); + } + + public Token CurrentToken { get; private set; } + public int CurrentIndex { get; private set; } + + public void Start() + { + CurrentIndex = 0; + _tokenPointer = _tokens.GetEnumerator(); + MoveNext(); + } + + public Token MoveNext() + { + if (!_tokenPointer.MoveNext()) + { + CurrentToken = new Token(TokenKind.Eof, ReadOnlyMemory.Empty); + } + else + { + CurrentToken = _tokenPointer.Current; + CurrentIndex++; + } + return CurrentToken; + } + + public void SkipEmptyLines() + { + while (CurrentToken.Kind == TokenKind.NewLine) + MoveNext(); + } + + public void SkipSpace() + { + if (CurrentToken.Kind == TokenKind.Space) + MoveNext(); + } + + public bool IsEndOfLine() => CurrentToken.Kind is TokenKind.NewLine or TokenKind.Eof; + + public bool IsAtLineStart() => CurrentIndex <= 1 || _tokens[CurrentIndex - 2].Kind == TokenKind.NewLine; +}