From fb3a3fc7c8c34e8f56b89dc50e70018851815e89 Mon Sep 17 00:00:00 2001 From: Anton Savitskikh Date: Sun, 2 Nov 2025 21:56:43 +0500 Subject: [PATCH 1/5] Made markdown project architecture --- cs/Markdown/Markdown.cs | 22 +++++++++++++++++ cs/Markdown/Markdown.csproj | 10 ++++++++ cs/Markdown/Node.cs | 22 +++++++++++++++++ cs/Markdown/NodeType.cs | 18 ++++++++++++++ cs/Markdown/Scanner.cs | 41 ++++++++++++++++++++++++++++++++ cs/Markdown/SyntaxTreeBuilder.cs | 16 +++++++++++++ cs/Markdown/Token.cs | 20 ++++++++++++++++ cs/Markdown/TokenType.cs | 23 ++++++++++++++++++ 8 files changed, 172 insertions(+) create mode 100644 cs/Markdown/Markdown.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/Node.cs create mode 100644 cs/Markdown/NodeType.cs create mode 100644 cs/Markdown/Scanner.cs create mode 100644 cs/Markdown/SyntaxTreeBuilder.cs create mode 100644 cs/Markdown/Token.cs create mode 100644 cs/Markdown/TokenType.cs diff --git a/cs/Markdown/Markdown.cs b/cs/Markdown/Markdown.cs new file mode 100644 index 000000000..75cc1e136 --- /dev/null +++ b/cs/Markdown/Markdown.cs @@ -0,0 +1,22 @@ + +using Markdown; + +namespace markdown +{ + public class Markdown + { + public static string ParseMarkdownToHtml(string text) + { + var tokens = Scanner.TextToListOfTokens(text); + + var AST = SyntaxTreeBuilder.BuildTreeFromTokensList(tokens); + + return RenderTextFromAST(AST); + } + + private static string RenderTextFromAST(List AST) + { + throw new NotImplementedException(); + } + } +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..2150e3797 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,10 @@ + + + + Exe + net8.0 + enable + enable + + + diff --git a/cs/Markdown/Node.cs b/cs/Markdown/Node.cs new file mode 100644 index 000000000..72414df95 --- /dev/null +++ b/cs/Markdown/Node.cs @@ -0,0 +1,22 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + internal class Node + { + public NodeType Type { get; } + public List ChildrenNodes { get; } + public string Value { get; } + + public Node(NodeType type, List childrenNodes, string value) + { + Type = type; + ChildrenNodes = childrenNodes; + Value = value; + } + } +} diff --git a/cs/Markdown/NodeType.cs b/cs/Markdown/NodeType.cs new file mode 100644 index 000000000..4c22d134f --- /dev/null +++ b/cs/Markdown/NodeType.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + public enum NodeType + { + Document, + Paragraph, + Header, + Bold, + Italic, + Text + } +} diff --git a/cs/Markdown/Scanner.cs b/cs/Markdown/Scanner.cs new file mode 100644 index 000000000..a0d0aba1f --- /dev/null +++ b/cs/Markdown/Scanner.cs @@ -0,0 +1,41 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + internal class Scanner + { + public static List TextToListOfTokens(string text) + { + var lines = TextToLines(text); + var tokens = LinesToTokens(lines); + throw new NotImplementedException(); + } + + public static List TextToLines(string text) + { + throw new NotImplementedException(); + } + + public static List LinesToTokens(IEnumerable lines) + { + var tokens = new List(); + + foreach (var line in lines) + { + tokens.AddRange(ConvertLineToTokens(line)); + } + + return tokens; + } + + public static List ConvertLineToTokens(string line) + { + throw new NotImplementedException(); + } + } +} diff --git a/cs/Markdown/SyntaxTreeBuilder.cs b/cs/Markdown/SyntaxTreeBuilder.cs new file mode 100644 index 000000000..5eb00b527 --- /dev/null +++ b/cs/Markdown/SyntaxTreeBuilder.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + internal class SyntaxTreeBuilder + { + public static List BuildTreeFromTokensList(List tokens) + { + throw new NotImplementedException(); + } + } +} diff --git a/cs/Markdown/Token.cs b/cs/Markdown/Token.cs new file mode 100644 index 000000000..904f6a2ef --- /dev/null +++ b/cs/Markdown/Token.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + internal class Token + { + public TokenType Type { get; } + public string Value { get; } + + public Token(TokenType type, string value = null) + { + Type = type; + Value = value; + } + } +} diff --git a/cs/Markdown/TokenType.cs b/cs/Markdown/TokenType.cs new file mode 100644 index 000000000..593042c01 --- /dev/null +++ b/cs/Markdown/TokenType.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown +{ + /// + /// HeaderMarker = # + /// BoldMarker = __ + /// ItalicsMarker = _ + /// + public enum TokenType + { + Text, + Newline, + HeaderMarker, + BoldMarker, + ItalicMarker, + EndOfFile + } +} From 3d2aa9fc186947e3c6a1b46c5e39a2865e7d1a59 Mon Sep 17 00:00:00 2001 From: Anton Savitskikh Date: Tue, 4 Nov 2025 10:53:10 +0500 Subject: [PATCH 2/5] Updated markdown project architecture --- cs/Markdown/Entities/Builders/HtmlBuilder.cs | 31 ++++++++++++++ cs/Markdown/Entities/Builders/IBuilder.cs | 20 +++++++++ cs/Markdown/Entities/Converters/IConverter.cs | 17 ++++++++ .../Converters/MarkdownToHtmlConverter.cs | 36 ++++++++++++++++ cs/Markdown/Entities/Tokenizers/ITokenizer.cs | 24 +++++++++++ .../Entities/Tokenizers/MarkdownTokenizer.cs | 40 ++++++++++++++++++ cs/Markdown/{ => Enums}/NodeType.cs | 2 +- cs/Markdown/{ => Enums}/TokenType.cs | 5 +-- cs/Markdown/Markdown.cs | 29 +++++++------ cs/Markdown/Markdown.csproj | 4 ++ cs/Markdown/{ => Models}/Node.cs | 7 ++-- .../Models/SyntaxTreeModels/ISyntaxTree.cs | 16 ++++++++ .../Models/SyntaxTreeModels/SyntaxTree.cs | 28 +++++++++++++ cs/Markdown/{ => Models}/Token.cs | 10 +++-- cs/Markdown/Scanner.cs | 41 ------------------- cs/Markdown/SyntaxTreeBuilder.cs | 16 -------- 16 files changed, 246 insertions(+), 80 deletions(-) create mode 100644 cs/Markdown/Entities/Builders/HtmlBuilder.cs create mode 100644 cs/Markdown/Entities/Builders/IBuilder.cs create mode 100644 cs/Markdown/Entities/Converters/IConverter.cs create mode 100644 cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs create mode 100644 cs/Markdown/Entities/Tokenizers/ITokenizer.cs create mode 100644 cs/Markdown/Entities/Tokenizers/MarkdownTokenizer.cs rename cs/Markdown/{ => Enums}/NodeType.cs (91%) rename cs/Markdown/{ => Enums}/TokenType.cs (86%) rename cs/Markdown/{ => Models}/Node.cs (83%) create mode 100644 cs/Markdown/Models/SyntaxTreeModels/ISyntaxTree.cs create mode 100644 cs/Markdown/Models/SyntaxTreeModels/SyntaxTree.cs rename cs/Markdown/{ => Models}/Token.cs (54%) delete mode 100644 cs/Markdown/Scanner.cs delete mode 100644 cs/Markdown/SyntaxTreeBuilder.cs diff --git a/cs/Markdown/Entities/Builders/HtmlBuilder.cs b/cs/Markdown/Entities/Builders/HtmlBuilder.cs new file mode 100644 index 000000000..222296dc7 --- /dev/null +++ b/cs/Markdown/Entities/Builders/HtmlBuilder.cs @@ -0,0 +1,31 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Models; +using Markdown.Models.SyntaxTreeModels; + +namespace Markdown.Entities.Builders +{ + /// + /// Преобразует абстрактное синтаксическое дерево (AST) в HTML-код. + /// Выполняет обход дерева в глубину и генерирует соответствующие HTML-теги для каждого узла. + /// + /// Абстрактное синтаксическое дерево для преобразования + /// HTML-код, соответствующий структуре исходного дерева + /// + /// Для каждого типа узла AST генерирует соответствующий HTML-тег: + /// - Заголовки → h1, h2, etc. + /// - Жирный текст → strong + /// - Курсив → em + /// - Параграфы → p + /// + public class HtmlBuilder : IBuilder + { + public string Build(ISyntaxTree tree) + { + throw new NotImplementedException(); + } + } +} diff --git a/cs/Markdown/Entities/Builders/IBuilder.cs b/cs/Markdown/Entities/Builders/IBuilder.cs new file mode 100644 index 000000000..d3b02cdfa --- /dev/null +++ b/cs/Markdown/Entities/Builders/IBuilder.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Models.SyntaxTree; +using Markdown.Models.SyntaxTreeModels; + +namespace Markdown.Entities.Builders +{ + /// + /// Преобразует абстрактное синтаксическое дерево (AST) в текст с новой разметкой. + /// + /// Абстрактное синтаксическое дерево для преобразования + /// Текст с измененной разметкой + public interface IBuilder + { + string Build(ISyntaxTree tree); + } +} diff --git a/cs/Markdown/Entities/Converters/IConverter.cs b/cs/Markdown/Entities/Converters/IConverter.cs new file mode 100644 index 000000000..23fba3428 --- /dev/null +++ b/cs/Markdown/Entities/Converters/IConverter.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Entities.Renderers +{ + /// + /// Интерфейс для класса-конвертера который умеет конвертировать один язык разметки в другой, например конвертер из Markdown в HTML + /// Исходный текст с определенным языком разметки который хотим преобразовать + /// + public interface IConverter + { + string Convert(string text); + } +} diff --git a/cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs b/cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs new file mode 100644 index 000000000..ce8eb96bd --- /dev/null +++ b/cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs @@ -0,0 +1,36 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Entities.Builders; +using Markdown.Entities.Parsers; +using Markdown.Entities.Renderers; +using Markdown.Models; +using Markdown.Models.SyntaxTree; + +namespace Markdown.Entities.Converters +{ + /// + /// Выполняет полный цикл преобразования Markdown в HTML. + /// Координирует работу токенизатора, парсера и HTML-билдера. + /// + /// Исходный Markdown-текст + /// Строка с HTML-разметкой + /// + /// Последовательность преобразований: + /// 1. Токенизация исходного текста + /// 2. Построение абстрактного синтаксического дерева (AST) + /// 3. Рендеринг AST в HTML-формат + /// + public class MarkdownToHtmlConverter : IConverter + { + public string Convert(string text) + { + var tokens = new MarkdownTokenizer().Tokenize(text); + var ast = new SyntaxTree(tokens); + var convertedText = new HtmlBuilder().Build(ast); + return convertedText; + } + } +} diff --git a/cs/Markdown/Entities/Tokenizers/ITokenizer.cs b/cs/Markdown/Entities/Tokenizers/ITokenizer.cs new file mode 100644 index 000000000..beaadd845 --- /dev/null +++ b/cs/Markdown/Entities/Tokenizers/ITokenizer.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Models; + +namespace Markdown.Entities.Parsers +{ + /// + /// Интерфейс для класса который будет заниматься преобразованием исходного текста в список токенов для дальнейшего построения синтаксического дерева. + /// Умеет преобразовывать как целый текст в набор токенов, так и обрабатывать коллекции отдельных строк превращая их в список токенов + /// + public interface ITokenizer + { + List Tokenize(string text); + + List TextToLines(string text); + + List TokenizeLines(IEnumerable lines); + + List TokenizeLine(string line); + } +} diff --git a/cs/Markdown/Entities/Tokenizers/MarkdownTokenizer.cs b/cs/Markdown/Entities/Tokenizers/MarkdownTokenizer.cs new file mode 100644 index 000000000..26c596df5 --- /dev/null +++ b/cs/Markdown/Entities/Tokenizers/MarkdownTokenizer.cs @@ -0,0 +1,40 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Models; + +namespace Markdown.Entities.Parsers +{ + /// + /// Tokenizer который работает с markdown текстом + /// + public class MarkdownTokenizer : ITokenizer + { + public List Tokenize(string text) + { + return TokenizeLines(TextToLines(text)); + } + + public List TextToLines(string text) + { + return text.Split("\n").ToList(); + } + + public List TokenizeLines(IEnumerable lines) + { + var tokens = new List(); + foreach (var line in lines) + { + tokens.AddRange(TokenizeLine(line)); + } + return tokens; + } + + public List TokenizeLine(string line) + { + throw new NotImplementedException(); + } + } +} diff --git a/cs/Markdown/NodeType.cs b/cs/Markdown/Enums/NodeType.cs similarity index 91% rename from cs/Markdown/NodeType.cs rename to cs/Markdown/Enums/NodeType.cs index 4c22d134f..bff511b58 100644 --- a/cs/Markdown/NodeType.cs +++ b/cs/Markdown/Enums/NodeType.cs @@ -4,7 +4,7 @@ using System.Text; using System.Threading.Tasks; -namespace Markdown +namespace Markdown.Enums { public enum NodeType { diff --git a/cs/Markdown/TokenType.cs b/cs/Markdown/Enums/TokenType.cs similarity index 86% rename from cs/Markdown/TokenType.cs rename to cs/Markdown/Enums/TokenType.cs index 593042c01..75bd38b01 100644 --- a/cs/Markdown/TokenType.cs +++ b/cs/Markdown/Enums/TokenType.cs @@ -4,7 +4,7 @@ using System.Text; using System.Threading.Tasks; -namespace Markdown +namespace Markdown.Enums { /// /// HeaderMarker = # @@ -17,7 +17,6 @@ public enum TokenType Newline, HeaderMarker, BoldMarker, - ItalicMarker, - EndOfFile + ItalicMarker } } diff --git a/cs/Markdown/Markdown.cs b/cs/Markdown/Markdown.cs index 75cc1e136..7021ad92c 100644 --- a/cs/Markdown/Markdown.cs +++ b/cs/Markdown/Markdown.cs @@ -1,22 +1,25 @@ - -using Markdown; +using Markdown.Entities.Converters; +using Markdown.Models; namespace markdown { + /// + /// Преобразует Markdown-разметку в HTML-код. + /// Этот класс оставил, хотя в данный момент просто отдает строку преобразованную из Markdown в HTML, но тут можно развить реализацию дальше, + /// допустим если захотим выгружать текст куда-то или печатать на консоль то можем реализовывать логику обработки вывода здесь + /// + /// + /// Принцип работы: + /// 1. Исходный текст разбивается на токены (лексемиы) с помощью + /// 2. Токены преобразуются в абстрактное синтаксическое дерево (AST) класса + /// 3. Обходом графа AST полученное дерево превращается в текст с HTML с помощью + /// public class Markdown { - public static string ParseMarkdownToHtml(string text) + public static string Render(string text) { - var tokens = Scanner.TextToListOfTokens(text); - - var AST = SyntaxTreeBuilder.BuildTreeFromTokensList(tokens); - - return RenderTextFromAST(AST); - } - - private static string RenderTextFromAST(List AST) - { - throw new NotImplementedException(); + var markdownConverter = new MarkdownToHtmlConverter(); + return markdownConverter.Convert(text); } } } \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 2150e3797..8a9d71d1a 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -7,4 +7,8 @@ enable + + + + diff --git a/cs/Markdown/Node.cs b/cs/Markdown/Models/Node.cs similarity index 83% rename from cs/Markdown/Node.cs rename to cs/Markdown/Models/Node.cs index 72414df95..177fe8468 100644 --- a/cs/Markdown/Node.cs +++ b/cs/Markdown/Models/Node.cs @@ -1,12 +1,13 @@ -using System; +using Markdown.Enums; +using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; -namespace Markdown +namespace Markdown.Models { - internal class Node + public class Node { public NodeType Type { get; } public List ChildrenNodes { get; } diff --git a/cs/Markdown/Models/SyntaxTreeModels/ISyntaxTree.cs b/cs/Markdown/Models/SyntaxTreeModels/ISyntaxTree.cs new file mode 100644 index 000000000..e4e643207 --- /dev/null +++ b/cs/Markdown/Models/SyntaxTreeModels/ISyntaxTree.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Models.SyntaxTreeModels +{ + /// + /// Представляет абстрактное синтаксическое дерево (AST) документа + /// + public interface ISyntaxTree + { + public List Tree { get; } + } +} diff --git a/cs/Markdown/Models/SyntaxTreeModels/SyntaxTree.cs b/cs/Markdown/Models/SyntaxTreeModels/SyntaxTree.cs new file mode 100644 index 000000000..05a127a3b --- /dev/null +++ b/cs/Markdown/Models/SyntaxTreeModels/SyntaxTree.cs @@ -0,0 +1,28 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Models.SyntaxTreeModels; + +namespace Markdown.Models.SyntaxTree +{ + public class SyntaxTree : ISyntaxTree + { + /// + /// Коллекция корневых узлов абстрактного синтаксического дерева + /// + public List Tree { get; } + + /// + /// Строит абстрактное синтаксическое дерево (AST) из потока токенов. + /// Анализирует последовательность токенов и создает иерархическую структуру узлов, + /// отражающую семантическую структуру исходного документа. + /// + /// Коллекция токенов, полученная от лексического анализатора + public SyntaxTree(List tokens) + { + throw new NotImplementedException(); + } + } +} diff --git a/cs/Markdown/Token.cs b/cs/Markdown/Models/Token.cs similarity index 54% rename from cs/Markdown/Token.cs rename to cs/Markdown/Models/Token.cs index 904f6a2ef..952ea3951 100644 --- a/cs/Markdown/Token.cs +++ b/cs/Markdown/Models/Token.cs @@ -1,12 +1,16 @@ -using System; +using Markdown.Enums; +using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; -namespace Markdown +namespace Markdown.Models { - internal class Token + /// + /// Представляет лексему (токен), извлеченную в процессе лексического анализа + /// + public class Token { public TokenType Type { get; } public string Value { get; } diff --git a/cs/Markdown/Scanner.cs b/cs/Markdown/Scanner.cs deleted file mode 100644 index a0d0aba1f..000000000 --- a/cs/Markdown/Scanner.cs +++ /dev/null @@ -1,41 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Markdown -{ - internal class Scanner - { - public static List TextToListOfTokens(string text) - { - var lines = TextToLines(text); - var tokens = LinesToTokens(lines); - throw new NotImplementedException(); - } - - public static List TextToLines(string text) - { - throw new NotImplementedException(); - } - - public static List LinesToTokens(IEnumerable lines) - { - var tokens = new List(); - - foreach (var line in lines) - { - tokens.AddRange(ConvertLineToTokens(line)); - } - - return tokens; - } - - public static List ConvertLineToTokens(string line) - { - throw new NotImplementedException(); - } - } -} diff --git a/cs/Markdown/SyntaxTreeBuilder.cs b/cs/Markdown/SyntaxTreeBuilder.cs deleted file mode 100644 index 5eb00b527..000000000 --- a/cs/Markdown/SyntaxTreeBuilder.cs +++ /dev/null @@ -1,16 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Markdown -{ - internal class SyntaxTreeBuilder - { - public static List BuildTreeFromTokensList(List tokens) - { - throw new NotImplementedException(); - } - } -} From 89b0a04150eb350192f2b0952dd773080e121839 Mon Sep 17 00:00:00 2001 From: Anton Savitskikh Date: Tue, 4 Nov 2025 17:45:39 +0500 Subject: [PATCH 3/5] Updated Converter class in markdown project --- cs/Markdown/Entities/Converters/Converter.cs | 53 +++++++++++++++++++ .../Converters/MarkdownToHtmlConverter.cs | 36 ------------- cs/Markdown/Markdown.cs | 6 ++- cs/Markdown/Models/Node.cs | 3 ++ 4 files changed, 60 insertions(+), 38 deletions(-) create mode 100644 cs/Markdown/Entities/Converters/Converter.cs delete mode 100644 cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs diff --git a/cs/Markdown/Entities/Converters/Converter.cs b/cs/Markdown/Entities/Converters/Converter.cs new file mode 100644 index 000000000..d232eb0a1 --- /dev/null +++ b/cs/Markdown/Entities/Converters/Converter.cs @@ -0,0 +1,53 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Entities.Builders; +using Markdown.Entities.Parsers; +using Markdown.Entities.Renderers; +using Markdown.Models; +using Markdown.Models.SyntaxTree; + +namespace Markdown.Entities.Converters +{ + /// + /// Выполняет полный цикл преобразования одного языка разметки в другой. + /// Координирует работу токенизатора и билдера. + /// Конструктор позволяет определить желаемый билдер и токенизатор под текущую задачу, например + /// в нашем случае токенизатор умеющий работать с markdown-разметкой а buidler который умеет строить + /// по синтаксическому дереву исходный текст с html-разметкой + /// + /// Исходный Markdown-текст + /// Строка с HTML-разметкой + /// + /// Последовательность преобразований: + /// 1. Токенизация исходного текста + /// 2. Построение абстрактного синтаксического дерева (AST) + /// 3. Рендеринг AST в HTML-формат + /// + public class Converter : IConverter + { + public IBuilder Builder { get; } + public ITokenizer Tokenizer { get; } + + public Converter(IBuilder builder, ITokenizer tokenizer) + { + Builder = builder; + Tokenizer = tokenizer; + } + + public string Convert(string text) + { + if (Builder != null && Tokenizer != null) + { + var tokens = Tokenizer.Tokenize(text); + var ast = new SyntaxTree(tokens); + var convertedText = Builder.Build(ast); + return convertedText; + } + + throw new NullReferenceException(); + } + } +} diff --git a/cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs b/cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs deleted file mode 100644 index ce8eb96bd..000000000 --- a/cs/Markdown/Entities/Converters/MarkdownToHtmlConverter.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Markdown.Entities.Builders; -using Markdown.Entities.Parsers; -using Markdown.Entities.Renderers; -using Markdown.Models; -using Markdown.Models.SyntaxTree; - -namespace Markdown.Entities.Converters -{ - /// - /// Выполняет полный цикл преобразования Markdown в HTML. - /// Координирует работу токенизатора, парсера и HTML-билдера. - /// - /// Исходный Markdown-текст - /// Строка с HTML-разметкой - /// - /// Последовательность преобразований: - /// 1. Токенизация исходного текста - /// 2. Построение абстрактного синтаксического дерева (AST) - /// 3. Рендеринг AST в HTML-формат - /// - public class MarkdownToHtmlConverter : IConverter - { - public string Convert(string text) - { - var tokens = new MarkdownTokenizer().Tokenize(text); - var ast = new SyntaxTree(tokens); - var convertedText = new HtmlBuilder().Build(ast); - return convertedText; - } - } -} diff --git a/cs/Markdown/Markdown.cs b/cs/Markdown/Markdown.cs index 7021ad92c..c1773cc5d 100644 --- a/cs/Markdown/Markdown.cs +++ b/cs/Markdown/Markdown.cs @@ -1,4 +1,6 @@ -using Markdown.Entities.Converters; +using Markdown.Entities.Builders; +using Markdown.Entities.Converters; +using Markdown.Entities.Parsers; using Markdown.Models; namespace markdown @@ -18,7 +20,7 @@ public class Markdown { public static string Render(string text) { - var markdownConverter = new MarkdownToHtmlConverter(); + var markdownConverter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); return markdownConverter.Convert(text); } } diff --git a/cs/Markdown/Models/Node.cs b/cs/Markdown/Models/Node.cs index 177fe8468..a71c8039a 100644 --- a/cs/Markdown/Models/Node.cs +++ b/cs/Markdown/Models/Node.cs @@ -7,6 +7,9 @@ namespace Markdown.Models { + /// + /// Представляет узел абстрактного синтаксического дерева (AST) + /// public class Node { public NodeType Type { get; } From fb8d33b500031c88e13e01f90bff1293475a60c1 Mon Sep 17 00:00:00 2001 From: Anton Savitskikh Date: Fri, 14 Nov 2025 01:01:39 +0500 Subject: [PATCH 4/5] Made markdown to html converter with tests --- .../Entities/Builders/HtmlBuilder.cs | 129 +++++ .../Entities/Builders/IBuilder.cs | 20 + .../Entities/Converters/Converter.cs | 46 ++ .../Entities/Converters/IConverter.cs | 12 + .../Entities/Tokenizers/ITokenizer.cs | 19 + .../Entities/Tokenizers/MarkdownTokenizer.cs | 469 +++++++++++++++ cs/MarkdownTests/Enums/NodeType.cs | 12 + cs/MarkdownTests/Enums/TokenType.cs | 18 + cs/MarkdownTests/Markdown.cs | 26 + cs/MarkdownTests/Markdown.csproj | 21 + .../BuildersTests/HtmlBuilderTests.cs | 317 ++++++++++ .../ConverterPerformanceTest.cs | 79 +++ .../ConverterTests/ConverterTests.cs | 437 ++++++++++++++ .../SyntaxTreeTests/SyntaxTreeTests.cs | 315 ++++++++++ .../TokenizersTests/MarkdownTokenizerTests.cs | 543 ++++++++++++++++++ cs/MarkdownTests/Models/Node.cs | 21 + .../Models/SyntaxTreeModels/ISyntaxTree.cs | 11 + .../Models/SyntaxTreeModels/SyntaxTree.cs | 221 +++++++ cs/MarkdownTests/Models/Token.cs | 24 + 19 files changed, 2740 insertions(+) create mode 100644 cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs create mode 100644 cs/MarkdownTests/Entities/Builders/IBuilder.cs create mode 100644 cs/MarkdownTests/Entities/Converters/Converter.cs create mode 100644 cs/MarkdownTests/Entities/Converters/IConverter.cs create mode 100644 cs/MarkdownTests/Entities/Tokenizers/ITokenizer.cs create mode 100644 cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs create mode 100644 cs/MarkdownTests/Enums/NodeType.cs create mode 100644 cs/MarkdownTests/Enums/TokenType.cs create mode 100644 cs/MarkdownTests/Markdown.cs create mode 100644 cs/MarkdownTests/Markdown.csproj create mode 100644 cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs create mode 100644 cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs create mode 100644 cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs create mode 100644 cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs create mode 100644 cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs create mode 100644 cs/MarkdownTests/Models/Node.cs create mode 100644 cs/MarkdownTests/Models/SyntaxTreeModels/ISyntaxTree.cs create mode 100644 cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs create mode 100644 cs/MarkdownTests/Models/Token.cs diff --git a/cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs b/cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs new file mode 100644 index 000000000..08ddc61d8 --- /dev/null +++ b/cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs @@ -0,0 +1,129 @@ +using System.Text; +using Markdown.Enums; +using Markdown.Models; +using Markdown.Models.SyntaxTreeModels; + +namespace Markdown.Entities.Builders +{ + /// + /// Преобразует абстрактное синтаксическое дерево (AST) в HTML-код. + /// Выполняет обход дерева в глубину и генерирует соответствующие HTML-теги для каждого узла. + /// + /// Абстрактное синтаксическое дерево для преобразования + /// HTML-код, соответствующий структуре исходного дерева + /// + /// Для каждого типа узла AST генерирует соответствующий HTML-тег: + /// - Заголовки → h1 + /// - Жирный текст → strong + /// - Курсив → em + /// - Параграфы → p + /// + public class HtmlBuilder : IBuilder + { + private readonly StringBuilder _htmlBuilder; + private readonly Dictionary _tagMapping; + + public HtmlBuilder() + { + _htmlBuilder = new StringBuilder(); + _tagMapping = new Dictionary + { + { NodeType.Document, "" }, + { NodeType.Paragraph, "p" }, + { NodeType.Header, "h1" }, + { NodeType.Bold, "strong" }, + { NodeType.Italic, "em" }, + { NodeType.Text, "" } + }; + } + + public string Build(ISyntaxTree tree) + { + if (tree == null) + return string.Empty; + + _htmlBuilder.Clear(); + BuildNodes(tree.Tree); + return _htmlBuilder.ToString(); + } + + private void BuildNodes(List nodes) + { + foreach (var node in nodes) + { + BuildNode(node); + } + } + + private void BuildNode(Node node) + { + switch (node.Type) + { + case NodeType.Text: + BuildTextNode(node); + break; + case NodeType.Document: + BuildDocumentNode(node); + break; + default: + BuildFormattedNode(node); + break; + } + } + + private void BuildTextNode(Node node) + { + if (!string.IsNullOrEmpty(node.Value)) + { + var escapedText = EscapeHtml(node.Value); + _htmlBuilder.Append(escapedText); + } + } + + private void BuildDocumentNode(Node node) + { + if (node.ChildrenNodes != null) + { + BuildNodes(node.ChildrenNodes); + } + } + + private void BuildFormattedNode(Node node) + { + var tagName = _tagMapping[node.Type]; + + if (!string.IsNullOrEmpty(tagName)) + { + _htmlBuilder.Append($"<{tagName}>"); + } + + if (node.ChildrenNodes != null && node.ChildrenNodes.Count > 0) + { + BuildNodes(node.ChildrenNodes); + } + else if (!string.IsNullOrEmpty(node.Value)) + { + BuildTextNode(node); + } + + if (!string.IsNullOrEmpty(tagName)) + { + _htmlBuilder.Append($""); + } + } + + private string EscapeHtml(string text) + { + if (string.IsNullOrEmpty(text)) + return text; + + // Эскейпинг HTML-символов для безопасности + return text + .Replace("&", "&") + .Replace("<", "<") + .Replace(">", ">") + .Replace("\"", """) + .Replace("'", "'"); + } + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Entities/Builders/IBuilder.cs b/cs/MarkdownTests/Entities/Builders/IBuilder.cs new file mode 100644 index 000000000..d3b02cdfa --- /dev/null +++ b/cs/MarkdownTests/Entities/Builders/IBuilder.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.Models.SyntaxTree; +using Markdown.Models.SyntaxTreeModels; + +namespace Markdown.Entities.Builders +{ + /// + /// Преобразует абстрактное синтаксическое дерево (AST) в текст с новой разметкой. + /// + /// Абстрактное синтаксическое дерево для преобразования + /// Текст с измененной разметкой + public interface IBuilder + { + string Build(ISyntaxTree tree); + } +} diff --git a/cs/MarkdownTests/Entities/Converters/Converter.cs b/cs/MarkdownTests/Entities/Converters/Converter.cs new file mode 100644 index 000000000..fccd6fa92 --- /dev/null +++ b/cs/MarkdownTests/Entities/Converters/Converter.cs @@ -0,0 +1,46 @@ +using Markdown.Entities.Builders; +using Markdown.Entities.Parsers; +using Markdown.Entities.Renderers; +using Markdown.Models.SyntaxTree; + +namespace Markdown.Entities.Converters +{ + /// + /// Выполняет полный цикл преобразования одного языка разметки в другой. + /// Координирует работу токенизатора и билдера. + /// Конструктор позволяет определить желаемый билдер и токенизатор под текущую задачу, например + /// в нашем случае токенизатор умеющий работать с markdown-разметкой а buidler который умеет строить + /// по синтаксическому дереву исходный текст с html-разметкой + /// + /// Исходный Markdown-текст + /// Строка с HTML-разметкой + /// + /// Последовательность преобразований: + /// 1. Токенизация исходного текста + /// 2. Построение абстрактного синтаксического дерева (AST) + /// 3. Рендеринг AST в HTML-формат + /// + public class Converter : IConverter + { + public IBuilder Builder { get; } + public ITokenizer Tokenizer { get; } + + public Converter(IBuilder builder, ITokenizer tokenizer) + { + Builder = builder; + Tokenizer = tokenizer; + } + + public string Convert(string text) + { + if (Builder != null && Tokenizer != null) + { + var tokens = Tokenizer.Tokenize(text); + var ast = new SyntaxTree(tokens); + var convertedText = Builder.Build(ast); + return convertedText; + } + throw new NullReferenceException(); + } + } +} diff --git a/cs/MarkdownTests/Entities/Converters/IConverter.cs b/cs/MarkdownTests/Entities/Converters/IConverter.cs new file mode 100644 index 000000000..658b61716 --- /dev/null +++ b/cs/MarkdownTests/Entities/Converters/IConverter.cs @@ -0,0 +1,12 @@ + +namespace Markdown.Entities.Renderers +{ + /// + /// Интерфейс для класса-конвертера который умеет конвертировать один язык разметки в другой, например конвертер из Markdown в HTML + /// Исходный текст с определенным языком разметки который хотим преобразовать + /// + public interface IConverter + { + string Convert(string text); + } +} diff --git a/cs/MarkdownTests/Entities/Tokenizers/ITokenizer.cs b/cs/MarkdownTests/Entities/Tokenizers/ITokenizer.cs new file mode 100644 index 000000000..0fdc35bff --- /dev/null +++ b/cs/MarkdownTests/Entities/Tokenizers/ITokenizer.cs @@ -0,0 +1,19 @@ +using Markdown.Models; + +namespace Markdown.Entities.Parsers +{ + /// + /// Интерфейс для класса который будет заниматься преобразованием исходного текста в список токенов для дальнейшего построения синтаксического дерева. + /// Умеет преобразовывать как целый текст в набор токенов, так и обрабатывать коллекции отдельных строк превращая их в список токенов + /// + public interface ITokenizer + { + List Tokenize(string text); + + List TextToLines(string text); + + List TokenizeLines(IEnumerable lines); + + List TokenizeLine(string line); + } +} diff --git a/cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs b/cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs new file mode 100644 index 000000000..bc440eeae --- /dev/null +++ b/cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs @@ -0,0 +1,469 @@ +using System.Text; +using Markdown.Enums; +using Markdown.Models; + +namespace Markdown.Entities.Parsers +{ + /// + /// Tokenizer который работает с markdown текстом + /// + public class MarkdownTokenizer : ITokenizer + { + public List Tokenize(string text) + { + return TokenizeLines(TextToLines(text)); + } + + public List TextToLines(string text) + { + return text.Split("\n").ToList(); + } + + public List TokenizeLines(IEnumerable lines) + { + var tokens = new List(); + foreach (var line in lines) + { + tokens.AddRange(TokenizeLine(line)); + tokens.Add(new Token(TokenType.Newline)); + } + return tokens; + } + + /// + /// Ключевые правила из спецификации: + /// Приоритет обработки: экранирование → заголовки → жирный → курсив + /// Вложенность: жирный может содержать курсив, но не наоборот + /// Экранирование: \ отменяет разметку следующего символа + /// Заголовки: только в начале строки с # + /// Валидация: проверки на пробелы, цифры, пересечение слов + /// Непарные теги: остаются как обычный текст + /// + /// + /// + public List TokenizeLine(string line) + { + var tokens = new List(); + var currentPosition = 0; + var textBuffer = new StringBuilder(); + + if (IsHeader(line)) + { + ProcessHeader(line, ref currentPosition, tokens); + } + + while (currentPosition < line.Length) + { + var currentChar = line[currentPosition]; + + if (currentChar == '\\') + { + ProcessEscapeCharacter(line, ref currentPosition, textBuffer); + continue; + } + + if (IsBoldMarker(line, currentPosition)) + { + ProcessBoldMarker(line, ref currentPosition, tokens, textBuffer); + continue; + } + + if (IsItalicsMarker(line, currentPosition)) + { + ProcessItalicsMarker(line, ref currentPosition, tokens, textBuffer, false); + continue; + } + + textBuffer.Append(currentChar); + currentPosition++; + } + + if(textBuffer.Length > 0) FlushTextBufferIfNotEmpty(textBuffer, tokens); + return tokens; + } + + /// + /// Метод обработки тэга курсива, превращает часть строки в набор токенов если она удовлетворяет условиям спецификации + /// Дополнительно обрабатывает случай неправильной вложенности тэга курсива и полужирного тэга + /// + /// + /// + /// + /// + /// - флаг для обработки случая когда находим курсив внутри полужирного текста + private void ProcessItalicsMarker(string line, ref int currentPosition, List tokens, StringBuilder originalTextBuffer, bool isNestedCall) + { + var isInWord = false; + var startPosition = currentPosition; + var foundBoldMarker = false; + + var textBuffer = new StringBuilder(); + + if (IsMarkerInsideWord(line, currentPosition, false)) isInWord = true; + + SkipItalicsMarker(ref currentPosition); + + while (currentPosition < line.Length) + { + var currentChar = line[currentPosition]; + + if (IsValidClosingMarker(line, currentPosition, false)) + { + var isEmptyWord = IsEmptyMarkedWord(currentPosition, startPosition, false); + if (isEmptyWord) + { + originalTextBuffer.Append(textBuffer); + return; + } + + if (foundBoldMarker) + { + PrependMarkerToBuffer(textBuffer, false); + originalTextBuffer.Append(textBuffer); + return; + } + + FlushTextBufferIfNotEmpty(originalTextBuffer, tokens); + AddTokensFromBufferWithSpecifiedTags(textBuffer, tokens, TokenType.ItalicsStart, TokenType.ItalicsEnd); + SkipItalicsMarker(ref currentPosition); + return; + } + + if (currentChar == '_') + { + if (line[currentPosition + 1] == '_') + { + if (isNestedCall) + { + AppendMarkerToBuffer(textBuffer, true); + SkipBoldMarker(ref currentPosition); + + AppendMarkerToBuffer(originalTextBuffer, false); + originalTextBuffer.Append(textBuffer); + return; + } + //Меняем состояние флага на обратное + foundBoldMarker = !foundBoldMarker; + } + } + + if (currentChar == ' ') + { + if (isInWord) + { + AppendMarkerToBuffer(originalTextBuffer, false); + originalTextBuffer.Append(textBuffer); + return; + } + + } + + if (currentChar == '\\') + { + ProcessEscapeCharacter(line, ref currentPosition, textBuffer); + continue; + } + + textBuffer.Append(currentChar); + currentPosition++; + } + + PrependMarkerToBuffer(textBuffer, false); + originalTextBuffer.Append(textBuffer); + } + + /// + /// Метод обработки тэга полужирного текста, превращает часть строки в набор токенов если она удовлетворяет условиям спецификации + /// + /// + /// + /// + /// + private void ProcessBoldMarker(string line, ref int currentPosition, List tokens, StringBuilder originalTextBuffer) + { + var isClosingFound = false; + var isInWord = IsMarkerInsideWord(line, currentPosition, true); + var startPosition = currentPosition; + + var textBuffer = new StringBuilder(); + var innerTokens = new List(); + + SkipBoldMarker(ref currentPosition); + + while (currentPosition < line.Length) + { + var currentChar = line[currentPosition]; + + if (IsValidClosingMarker(line, currentPosition, true)) + { + var isEmptyWord = IsEmptyMarkedWord(currentPosition, startPosition, true); + if (isEmptyWord) + { + AppendMarkerToBuffer(textBuffer, true); + originalTextBuffer.Append(textBuffer); + return; + } + + //если нашли токены после вложенного вызова ProcessItalicsMarker + if (innerTokens.Count > 0) + { + FlushTextBufferIfNotEmpty(originalTextBuffer, tokens); + AddNestedItalicsTokens(textBuffer, tokens, innerTokens); + SkipBoldMarker(ref currentPosition); + return; + } + + FlushTextBufferIfNotEmpty(originalTextBuffer, tokens); + AddTokensFromBufferWithSpecifiedTags(textBuffer, tokens, TokenType.BoldStart, TokenType.BoldEnd); + SkipBoldMarker(ref currentPosition); + return; + } + + if (currentChar == '_') + { + ProcessItalicsMarker(line, ref currentPosition, innerTokens, textBuffer, true); + continue; + } + + if (currentChar == ' ') + { + if (isInWord) + { + AppendMarkerToBuffer(originalTextBuffer, true); + originalTextBuffer.Append(textBuffer); + return; + } + + } + + if (currentChar == '\\') + { + ProcessEscapeCharacter(line, ref currentPosition, textBuffer); + continue; + } + + textBuffer.Append(currentChar); + currentPosition++; + } + + PrependMarkerToBuffer(textBuffer, true); + originalTextBuffer.Append(textBuffer); + } + + private void AddNestedItalicsTokens(StringBuilder textBuffer, List tokens, List innerTokens) + { + tokens.Add(new Token(TokenType.BoldStart)); + tokens.AddRange(innerTokens); + FlushTextBufferIfNotEmpty(textBuffer, tokens); + tokens.Add(new Token(TokenType.BoldEnd)); + } + + private void AddTokensFromBufferWithSpecifiedTags(StringBuilder textBuffer, List tokens, TokenType startToken, TokenType endToken) + { + tokens.Add(new Token(startToken)); + FlushTextBufferIfNotEmpty(textBuffer, tokens); + tokens.Add(new Token(endToken)); + } + + private bool IsEmptyMarkedWord(int currentPosition, int startPosition, bool isBold) + { + if (isBold) return currentPosition - startPosition == 2; + return currentPosition - startPosition == 1; + } + + private void SkipItalicsMarker(ref int currentPosition) + { + currentPosition++; + } + + private void SkipBoldMarker(ref int currentPosition) + { + currentPosition += 2; + } + + private void PrependMarkerToBuffer(StringBuilder textBuffer, bool isBold) + { + if (isBold) + { + textBuffer.Insert(0, "__"); + } + else + { + textBuffer.Insert(0, '_'); + } + } + + private void AppendMarkerToBuffer(StringBuilder textBuffer, bool isBold) + { + if (isBold) + { + textBuffer.Append("__"); + } + else + { + textBuffer.Append('_'); + } + } + + //проверяем что открывающий маркер находится внутри слова + private bool IsMarkerInsideWord(string line, int currentPosition, bool isBold) + { + if (currentPosition == 0) return false; + + if (isBold) + { + return currentPosition + 2 < line.Length + && char.IsLetter(line[currentPosition - 1]) + && char.IsLetter(line[currentPosition + 2]); + } + + return (currentPosition + 1 < line.Length) + && char.IsLetter(line[currentPosition - 1]) + && char.IsLetter(line[currentPosition + 1]); + } + + private bool IsValidClosingMarker(string line, int currentPosition, bool isBold) + { + if (isBold) + { + return currentPosition + 1 < line.Length + && line[currentPosition] == '_' + && line[currentPosition + 1] == '_' + && !char.IsWhiteSpace(line[currentPosition - 1]); + } + //обработка для курсива + if (currentPosition + 1 < line.Length) + { + return line[currentPosition] == '_' + && line[currentPosition + 1] != '_' + && line[currentPosition - 1] != '_' + && !char.IsWhiteSpace(line[currentPosition - 1]); + } + return line[currentPosition] == '_' + && !char.IsWhiteSpace(line[currentPosition - 1]); + } + + private bool IsItalicsMarker(string line, int currentPosition) + { + return currentPosition + 1 < line.Length + && line[currentPosition] == '_' && line[currentPosition + 1] != '_' + && !IsSpaceAfterMarker(line, currentPosition, false) + && !IsAmongDigits(line, currentPosition, false); + } + + private bool IsAmongDigits(string line, int currentPosition, bool isBold) + { + //если маркер не в самом начале то посмотрим назад + if (currentPosition > 0) + { + if (isBold) + { + return currentPosition + 2 < line.Length + && (char.IsDigit(line[currentPosition - 1]) || char.IsDigit(line[currentPosition + 2])); + } + return char.IsDigit(line[currentPosition - 1]) + || char.IsDigit(line[currentPosition + 1]); + } + + //если маркер в начале строки посмотрим что числа только спереди + if (isBold) + { + return currentPosition + 2 < line.Length + && char.IsDigit(line[currentPosition + 2]); + } + return char.IsDigit(line[currentPosition + 1]); + } + + //проверим что текущий и след.символы у нас подчеркивания и непробельный символ после + private bool IsBoldMarker(string line, int currentPosition) + { + return currentPosition + 1 < line.Length + && line[currentPosition] == '_' + && line[currentPosition + 1] == '_' + && !IsSpaceAfterMarker(line, currentPosition, true) + && !IsAmongDigits(line, currentPosition, true); + } + + private bool IsSpaceAfterMarker(string line, int currentPosition, bool isBold) + { + if(isBold) return char.IsWhiteSpace(line[currentPosition + 2]); + return char.IsWhiteSpace(line[currentPosition + 1]); + } + + private void FlushTextBufferIfNotEmpty(StringBuilder textBuffer, List tokens) + { + if (textBuffer.Length > 0) + { + tokens.Add(new Token(TokenType.Text, textBuffer.ToString())); + textBuffer.Clear(); + } + } + + private void ProcessEscapeCharacter(string line, ref int currentPosition, StringBuilder textBuffer) + { + var anyTagsAfter = IsEscapeBeforeMarkers(line, currentPosition); + if (anyTagsAfter) + { + //пропустить текущий, добавить новый в буфер, перейти вперед снова + if (currentPosition + 2 < line.Length) + { + currentPosition++; + textBuffer.Append(line[currentPosition]); + if (line[currentPosition] == '_' && line[currentPosition + 1] == '_') + { + currentPosition++; + textBuffer.Append(line[currentPosition]); + } + if (currentPosition + 1 < line.Length) + { + currentPosition++; + } + } + else if (currentPosition + 1 < line.Length) + { + currentPosition++; + textBuffer.Append(line[currentPosition]); + currentPosition++; + } + } + else + { + //записать текущий слэш как есть + if (currentPosition < line.Length) + { + textBuffer.Append(line[currentPosition]); + currentPosition++; + } + } + } + + //Находится ли символ экранирования перед маркерами или другим экранированием + private bool IsEscapeBeforeMarkers(string line, int currentPosition) + { + if (currentPosition + 2 < line.Length) + { + return (line[currentPosition + 1] == '_' || + line[currentPosition + 1] == '\\' || + (line[currentPosition + 1] == '_' && line[currentPosition + 2] == '_')); + } + else if (currentPosition + 1 < line.Length) + { + return (line[currentPosition + 1] == '_' || + line[currentPosition + 1] == '\\'); + } + return false; + } + + private void ProcessHeader(string line, ref int currentPosition, List tokens) + { + tokens.Add(new Token(TokenType.Header)); + if (line.Length > 2) currentPosition = 2; + } + + private bool IsHeader(string line) + { + return line.StartsWith("# "); + } + + } +} diff --git a/cs/MarkdownTests/Enums/NodeType.cs b/cs/MarkdownTests/Enums/NodeType.cs new file mode 100644 index 000000000..d13433dba --- /dev/null +++ b/cs/MarkdownTests/Enums/NodeType.cs @@ -0,0 +1,12 @@ +namespace Markdown.Enums +{ + public enum NodeType + { + Document, + Paragraph, + Header, + Bold, + Italic, + Text + } +} diff --git a/cs/MarkdownTests/Enums/TokenType.cs b/cs/MarkdownTests/Enums/TokenType.cs new file mode 100644 index 000000000..63eac4032 --- /dev/null +++ b/cs/MarkdownTests/Enums/TokenType.cs @@ -0,0 +1,18 @@ +namespace Markdown.Enums +{ + /// + /// HeaderMarker = # + /// BoldMarker = __ + /// ItalicsMarker = _ + /// + public enum TokenType + { + Text, + Header, + BoldStart, + BoldEnd, + ItalicsStart, + ItalicsEnd, + Newline + } +} diff --git a/cs/MarkdownTests/Markdown.cs b/cs/MarkdownTests/Markdown.cs new file mode 100644 index 000000000..4c9bc2bbe --- /dev/null +++ b/cs/MarkdownTests/Markdown.cs @@ -0,0 +1,26 @@ +using Markdown.Entities.Builders; +using Markdown.Entities.Converters; +using Markdown.Entities.Parsers; + +namespace markdown +{ + /// + /// Преобразует Markdown-разметку в HTML-код. + /// Этот класс оставил, хотя в данный момент просто отдает строку преобразованную из Markdown в HTML, но тут можно развить реализацию дальше, + /// допустим если захотим выгружать текст куда-то или печатать на консоль то можем реализовывать логику обработки вывода здесь + /// + /// + /// Принцип работы: + /// 1. Исходный текст разбивается на токены (лексемиы) с помощью + /// 2. Токены преобразуются в абстрактное синтаксическое дерево (AST) класса + /// 3. Обходом графа AST полученное дерево превращается в текст с HTML с помощью + /// + public class Markdown + { + public static string Render(string text) + { + var markdownConverter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + return markdownConverter.Convert(text); + } + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Markdown.csproj b/cs/MarkdownTests/Markdown.csproj new file mode 100644 index 000000000..561c21035 --- /dev/null +++ b/cs/MarkdownTests/Markdown.csproj @@ -0,0 +1,21 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + diff --git a/cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs b/cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs new file mode 100644 index 000000000..9407e662b --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs @@ -0,0 +1,317 @@ +using NUnit.Framework; +using Markdown.Models; +using Markdown.Entities.Builders; +using Markdown.Enums; +using Markdown.Models.SyntaxTreeModels; + +namespace Markdown.MarkdownTests.BuildersTests +{ + [TestFixture] + public class HtmlBuilderTests + { + private HtmlBuilder _htmlBuilder; + + [SetUp] + public void Setup() + { + _htmlBuilder = new HtmlBuilder(); + } + + [Test] + public void Build_ReturnsEmTag_WithItalicText() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateItalic("окруженный с двух сторон") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

окруженный с двух сторон

")); + } + + [Test] + public void Build_ReturnsStrongTag_WithBoldText() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateBold("выделенный текст") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

выделенный текст

")); + } + + [Test] + public void Build_ReturnsPlainText_WithEscapedUnderscore() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("_Вот это_") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

_Вот это_

")); + } + + [Test] + public void Build_ReturnsNestedTags_WithBoldInsideItalic() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateBold( + CreateText("внутри двойного "), + CreateItalic("одинарное работает"), + CreateText(" правильно") + ) + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

внутри двойного одинарное работает правильно

")); + } + + [Test] + public void Build_ItalicTreatedAsText_WithItalicInsideBold() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateItalic( + CreateText("внутри одинарного "), + CreateText("__двойное не работает__"), + CreateText(" как ожидается") + ) + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

внутри одинарного __двойное не работает__ как ожидается

")); + } + + [Test] + public void Build_ReturnsPlainText_WithUnderscoresWithNumbers() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("цифрами_12_3") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

цифрами_12_3

")); + } + + [Test] + public void Build_CanBeFormatted_WithWordParts() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("в нач"), + CreateItalic("але"), + CreateText(" слова") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

в начале слова

")); + } + + [Test] + public void Build_ReturnsPlainUnderscores_WithEmptyFormatting() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("____") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

____

")); + } + + [Test] + public void Build_ReturnsHeaderWithNestedTags_WithHeaderWithFormatting() + { + var tree = CreateSyntaxTree( + CreateHeader( + CreateText("Заголовок "), + CreateBold( + CreateText("с "), + CreateItalic("разными"), + CreateText(" символами") + ) + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

Заголовок с разными символами

")); + } + + [Test] + public void Build_ReturnsSeparatedParagraphs_WithMultipleParagraphs() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("Первый абзац") + ), + CreateParagraph( + CreateText("Второй абзац") + ), + CreateParagraph( + CreateText("Третий абзац") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

Первый абзац

Второй абзац

Третий абзац

")); + } + + [Test] + public void Build_ReturnsCorrectHtml_WithComplexDocument() + { + var tree = CreateSyntaxTree( + CreateHeader( + CreateText("Заголовок") + ), + CreateParagraph( + CreateText("Обычный "), + CreateItalic("курсив"), + CreateText(" и "), + CreateBold("жирный") + ), + CreateParagraph( + CreateText("Новый абзац") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

Заголовок

Обычный курсив и жирный

Новый абзац

")); + } + + [Test] + public void Build_ReturnsTextInParagraph_WithOnlyText() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("Простой текст без разметки") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

Простой текст без разметки

")); + } + + [Test] + public void Build_ReturnsEmptyString_WithEmptyTree() + { + var tree = CreateSyntaxTree(); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("")); + } + + [Test] + public void Build_ReturnsCorrectHtml_WithMixedFormattingInSentence() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("Это "), + CreateItalic("курсив"), + CreateText(", а это "), + CreateBold("жирный"), + CreateText(", а это "), + CreateBold( + CreateItalic("жирный курсив") + ), + CreateText(".") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

Это курсив, а это жирный, а это жирный курсив.

")); + } + + [Test] + public void Build_HandlesGracefully_WithUnclosedFormatting() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateItalic("незакрытый курсив"), + CreateText(" и обычный текст") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

незакрытый курсив и обычный текст

")); + } + + // Вспомогательные методы для создания тестовых данных + private MockSyntaxTree CreateSyntaxTree(params Node[] nodes) + { + return new MockSyntaxTree(new List(nodes)); + } + + private Node CreateParagraph(params Node[] children) + { + return new Node(NodeType.Paragraph, new List(children), null); + } + + private Node CreateHeader(params Node[] children) + { + return new Node(NodeType.Header, new List(children), null); + } + + private Node CreateBold(params Node[] children) + { + return new Node(NodeType.Bold, new List(children), null); + } + + private Node CreateItalic(params Node[] children) + { + return new Node(NodeType.Italic, new List(children), null); + } + + private Node CreateText(string value) + { + return new Node(NodeType.Text, null, value); + } + + private Node CreateBold(string text) + { + return new Node(NodeType.Bold, new List { CreateText(text) }, null); + } + + private Node CreateItalic(string text) + { + return new Node(NodeType.Italic, new List { CreateText(text) }, null); + } + } + + // Mock-класс для тестирования + internal class MockSyntaxTree : ISyntaxTree + { + public List Tree { get; } + + public MockSyntaxTree(List tree) + { + Tree = tree; + } + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs new file mode 100644 index 000000000..4a8f2532d --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs @@ -0,0 +1,79 @@ +using Markdown.Entities.Builders; +using Markdown.Entities.Converters; +using Markdown.Entities.Parsers; +using NUnit.Framework; +using System.Diagnostics; +using System.Text; + +namespace Markdown.MarkdownTests.ConverterTests +{ + [TestFixture] + [Category("Performance")] + public class ConverterPerformanceTest + { + private Converter _converter; + + [SetUp] + public void Setup() + { + var builder = new HtmlBuilder(); + var tokenizer = new MarkdownTokenizer(); + _converter = new Converter(builder, tokenizer); + } + + [Test] + public void Convert_ShouldHaveNearLinearTimeComplexity() + { + var sizes = new[] { 1000, 2000, 4000, 8000, 16000 }; + var executionTimes = new List(); + var basePattern = "Простой текст с _курсивом_ и __жирным__ форматированием."; + + foreach (var size in sizes) + { + var testText = GenerateText(basePattern, size); + var time = MeasureExecutionTime(() => _converter.Convert(testText)); + executionTimes.Add(time); + } + + AssertNearLinearComplexity(sizes, executionTimes); + } + + private long MeasureExecutionTime(Action action) + { + // Прогрев + action(); + + var stopwatch = Stopwatch.StartNew(); + action(); + stopwatch.Stop(); + + return stopwatch.ElapsedMilliseconds; + } + + private string GenerateText(string pattern, int targetSize) + { + var result = new StringBuilder(); + while (result.Length < targetSize) + { + result.Append(pattern); + } + return result.ToString().Substring(0, targetSize); + } + + private void AssertNearLinearComplexity(int[] sizes, List times) + { + for (int i = 1; i < sizes.Length; i++) + { + var sizeRatio = (double)sizes[i] / sizes[i - 1]; + var timeRatio = (double)times[i] / times[i - 1]; + var deviation = (timeRatio - sizeRatio) / sizeRatio; + + // Допускаем отклонение до 30% от идеальной линейной сложности + Assert.That(deviation, Is.LessThan(0.30), + $"При увеличении размера с {sizes[i - 1]} до {sizes[i]} " + + $"время выросло в {timeRatio:F2} раз вместо ожидаемых {sizeRatio:F2} " + + $"(отклонение {deviation:P0})"); + } + } + } +} diff --git a/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs new file mode 100644 index 000000000..f8b09040a --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs @@ -0,0 +1,437 @@ +using Markdown.Entities.Builders; +using NUnit.Framework; +using Markdown.Entities.Converters; +using Markdown.Entities.Parsers; + +namespace Markdown.MarkdownTests.ConverterTests +{ + [TestFixture] + public class ConverterTests + { + [TestFixture] + public class BasicConverterTests + { + private Converter converter; + [SetUp] + public void Setup() + { + converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + [Test] + public void Convert_ThrowsNullReferenceException_WithNullBuilder() + { + var converter = new Converter(null, new MarkdownTokenizer()); + + Assert.Throws(() => converter.Convert("test")); + } + + [Test] + public void Convert_ThrowsNullReferenceException_WithNullTokenizer() + { + var converter = new Converter(new HtmlBuilder(), null); + + Assert.Throws(() => converter.Convert("test")); + } + + [Test] + public void Convert_ReturnsEmptyString_WithEmptyInput() + { + var result = converter.Convert(""); + + Assert.That(result, Is.EqualTo("")); + } + + [Test] + public void Convert_ReturnsSimpleText_WithPlainText() + { + var result = converter.Convert("Hello World"); + + Assert.That(result, Is.EqualTo("

Hello World

")); + } + + [Test] + public void Convert_ReturnsBoldText_WithBoldMarkers() + { + var result = converter.Convert("__bold text__"); + + Assert.That(result, Is.EqualTo("

bold text

")); + } + + [Test] + public void Convert_ReturnsItalicText_WithItalicMarkers() + { + var result = converter.Convert("_italic text_"); + + Assert.That(result, Is.EqualTo("

italic text

")); + } + + [Test] + public void Convert_ReturnsHeader_WithHeaderMarker() + { + var result = converter.Convert("# Header Text"); + + Assert.That(result, Is.EqualTo("

Header Text

")); + } + + [Test] + public void Convert_ReturnsComplexHtml_WithMixedMarkup() + { + var result = converter.Convert("# Header with __bold__ and _italic_"); + + Assert.That(result, Is.EqualTo("

Header with bold and italic

")); + } + + [Test] + public void Convert_HandlesMultipleLines_WithLineBreaks() + { + var result = converter.Convert("First line\n\nSecond line"); + + Assert.That(result, Is.EqualTo("

First line

Second line

")); + } + + [Test] + public void Convert_ReturnsComplexHtml_WithNestedMarkup() + { + var result = converter.Convert("# Header __with _bold_ and italic__"); + + Assert.That(result, Is.EqualTo("

Header with bold and italic

")); + } + } + + + [TestFixture] + public class NumbersAndUnderscoresTests + { + private Converter converter; + + [SetUp] + public void Setup() + { + converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithUnderscoresAmongNumbers() + { + var markdown = "Цифры_12_3 не должны выделяться"; + + var result = converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Цифры_12_3 не должны выделяться

")); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithBoldAmongNumbers() + { + var markdown = "Цифры__12__3 не должны выделяться"; + + var result = converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Цифры__12__3 не должны выделяться

")); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithUnderscoresInMixedAlphanumeric() + { + var markdown = "Текст123_45_678 не выделяется"; + + var result = converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Текст123_45_678 не выделяется

")); + } + + [Test] + public void Convert_ShouldWork_WithNumbersOutsideMarkers() + { + var markdown = "123 _выделенный_ текст 456"; + + var result = converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

123 выделенный текст 456

")); + } + } + + [TestFixture] + public class CrossWordMarkersTests + { + private Converter _converter; + + [SetUp] + public void Setup() + { + _converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithItalicMarkersSpanningMultipleWords() + { + var markdown = "раз_ные слова_ не должны выделяться курсивом"; + + var result = _converter.Convert(markdown); + + // Курсив не должен работать через разные слова + Assert.That(result, Is.EqualTo("

раз_ные слова_ не должны выделяться курсивом

")); + } + } + + [TestFixture] + public class WordPartMarkersTests + { + private Converter _converter; + + [SetUp] + public void Setup() + { + _converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + [Test] + public void Convert_ShouldCreateEmphasis_WithItalicAtWordBeginning() + { + var markdown = "В _нач_але слова"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

В начале слова

")); + } + + [Test] + public void Convert_ShouldCreateEmphasis_WithItalicInWordMiddle() + { + var markdown = "В сер_еди_не слова"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

В середине слова

")); + } + + [Test] + public void Convert_ShouldCreateEmphasis_WithItalicAtWordEnd() + { + var markdown = "В кон_це_ слова"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

В конце слова

")); + } + + [Test] + public void Convert_ShouldCreateBold_WithBoldAtWordBeginning() + { + var markdown = "В __нач__але слова"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

В начале слова

")); + } + + [Test] + public void Convert_ShouldCreateBold_WithBoldInWordMiddle() + { + var markdown = "В сер__еди__не слова"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

В середине слова

")); + } + + [Test] + public void Convert_ShouldCreateBold_WithBoldAtWordEnd() + { + var markdown = "В кон__це__ слова"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

В конце слова

")); + } + } + + [TestFixture] + public class EscapedMarkersTests + { + private Converter _converter; + + [SetUp] + public void Setup() + { + _converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithEscapedItalicMarker() + { + var markdown = "\\_Вот это\\_, не должно выделиться"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

_Вот это_, не должно выделиться

")); + } + + [Test] + public void Convert_ShouldNotCreateBold_WithEscapedBoldMarker() + { + var markdown = "\\_\\_это не жирный\\_\\_ текст"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

__это не жирный__ текст

")); + } + + [Test] + public void Convert_ShouldCreateEmphasis_WithEscapedEscapeBeforeItalic() + { + var markdown = "\\\\_вот это будет выделено_"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

\\вот это будет выделено

")); + } + + [Test] + public void Convert_ShouldKeepEscapeChars_WhenNotEscapingMarkers() + { + var markdown = "Сим\\волы экранирования\\ должны остаться"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Сим\\волы экранирования\\ должны остаться

")); + } + + [Test] + public void Convert_ShouldHandleMixedEscapedAndNormalMarkers() + { + var markdown = "\\_не выделяется_ а _выделяется_"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

_не выделяется_ а выделяется

")); + } + } + + [TestFixture] + public class UnpairedMarkersTests + { + private Converter _converter; + + [SetUp] + public void Setup() + { + _converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithSingleItalicMarker() + { + var markdown = "Текст с _одиночным маркером"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Текст с _одиночным маркером

")); + } + + [Test] + public void Convert_ShouldNotCreateBold_WithSingleBoldMarker() + { + var markdown = "Текст с __одиночным маркером"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Текст с __одиночным маркером

")); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithMismatchedMarkers() + { + var markdown = "__Непарные_ символы"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

__Непарные_ символы

")); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithIntersectingMarkers() + { + var markdown = "__пересечение _двойных__ и одинарных_"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

__пересечение _двойных__ и одинарных_

")); + } + + [Test] + public void Convert_ShouldNotCreateEmphasis_WithEmptyMarkers() + { + var markdown = "Пустые ____ подчерки"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Пустые ____ подчерки

")); + } + + [Test] + public void Convert_ShouldHandleValidAndInvalidMarkersInSameText() + { + var markdown = "_валидный_ и _невалидный текст"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

валидный и _невалидный текст

")); + } + + [Test] + public void Convert_ShouldHandleBoldWithUnclosedItalicInside() + { + var markdown = "__жирный с _незакрытым курсивом__"; + + var result = _converter.Convert(markdown); + + // В этом случае у нас пересечение маркеров - во всей строке обрабатываем как подчеркивания + Assert.That(result, Is.EqualTo("

__жирный с _незакрытым курсивом__

")); + } + } + + [TestFixture] + public class ComplexCombinationTests + { + private Converter _converter; + + [SetUp] + public void Setup() + { + _converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + [Test] + public void Convert_ShouldHandleComplexRealWorldExample() + { + var markdown = "# Заголовок\n\nТекст с _курсивом_, __жирным__ и со_чета_ниями.\n\nЦифры_123_456 не выделяются, а _части_ слов - выделяются."; + + var result = _converter.Convert(markdown); + + Assert.That(result, Contains.Substring("

Заголовок

")); + Assert.That(result, Contains.Substring("курсивом")); + Assert.That(result, Contains.Substring("жирным")); + Assert.That(result, Contains.Substring("сочетаниями")); + Assert.That(result, Contains.Substring("Цифры_123_456")); + Assert.That(result, Contains.Substring("части")); + } + + [Test] + public void Convert_ShouldHandleMultipleEdgeCasesInOneDocument() + { + var markdown = "# Тест\n\n_корректный_ и _не корректный между словами\\_экранированный_ и обычный текст.\n\n__жирный__ и __незакрытый жирный"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Contains.Substring("корректный")); + Assert.That(result, Contains.Substring("не корректный между словами")); + Assert.That(result, Contains.Substring("_экранированный")); + Assert.That(result, Contains.Substring("жирный")); + Assert.That(result, Contains.Substring("__незакрытый жирный")); + } + } + } +} diff --git a/cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs b/cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs new file mode 100644 index 000000000..b171601b9 --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs @@ -0,0 +1,315 @@ +using NUnit.Framework; +using Markdown.Models; +using Markdown.Models.SyntaxTree; +using Markdown.Enums; + +namespace Markdown.MarkdownTests.SyntaxTreeTests +{ + [TestFixture] + public class SyntaxTreeTests + { + [Test] + public void Parse_ItalicText_ReturnsItalicNode() + { + var tokens = new List + { + new Token(TokenType.ItalicsStart), + new Token(TokenType.Text, "окруженный с двух сторон"), + new Token(TokenType.ItalicsEnd) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + Assert.That(paragraph.Type, Is.EqualTo(NodeType.Paragraph)); + Assert.That(paragraph.ChildrenNodes, Has.Count.EqualTo(1)); + + var italicNode = paragraph.ChildrenNodes[0]; + Assert.That(italicNode.Type, Is.EqualTo(NodeType.Italic)); + Assert.That(italicNode.ChildrenNodes, Has.Count.EqualTo(1)); + Assert.That(italicNode.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(italicNode.ChildrenNodes[0].Value, Is.EqualTo("окруженный с двух сторон")); + } + + [Test] + public void Parse_BoldText_ReturnsBoldNode() + { + var tokens = new List + { + new Token(TokenType.BoldStart), + new Token(TokenType.Text, "выделенный текст"), + new Token(TokenType.BoldEnd) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + Assert.That(paragraph.Type, Is.EqualTo(NodeType.Paragraph)); + + var boldNode = paragraph.ChildrenNodes[0]; + Assert.That(boldNode.Type, Is.EqualTo(NodeType.Bold)); + Assert.That(boldNode.ChildrenNodes, Has.Count.EqualTo(1)); + Assert.That(boldNode.ChildrenNodes[0].Value, Is.EqualTo("выделенный текст")); + } + + [Test] + public void Parse_EscapedUnderscore_ReturnsTextNode() + { + var tokens = new List + { + new Token(TokenType.Text, "_Вот это_") + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + Assert.That(paragraph.Type, Is.EqualTo(NodeType.Paragraph)); + Assert.That(paragraph.ChildrenNodes, Has.Count.EqualTo(1)); + Assert.That(paragraph.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(paragraph.ChildrenNodes[0].Value, Is.EqualTo("_Вот это_")); + } + + [Test] + public void Parse_BoldInsideItalic_WorksCorrectly() + { + var tokens = new List + { + new Token(TokenType.BoldStart), + new Token(TokenType.Text, "внутри двойного "), + new Token(TokenType.ItalicsStart), + new Token(TokenType.Text, "одинарное работает"), + new Token(TokenType.ItalicsEnd), + new Token(TokenType.BoldEnd) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + + var boldNode = paragraph.ChildrenNodes[0]; + Assert.That(boldNode.Type, Is.EqualTo(NodeType.Bold)); + Assert.That(boldNode.ChildrenNodes, Has.Count.EqualTo(2)); + + Assert.That(boldNode.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(boldNode.ChildrenNodes[0].Value, Is.EqualTo("внутри двойного ")); + + var italicNode = boldNode.ChildrenNodes[1]; + Assert.That(italicNode.Type, Is.EqualTo(NodeType.Italic)); + Assert.That(italicNode.ChildrenNodes[0].Value, Is.EqualTo("одинарное работает")); + } + + [Test] + public void Parse_ItalicInsideBold_DoesNotWork() + { + var tokens = new List + { + new Token(TokenType.ItalicsStart), + new Token(TokenType.Text, "внутри одинарного __двойное__ не работает"), + new Token(TokenType.ItalicsEnd) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + + var italicNode = paragraph.ChildrenNodes[0]; + Assert.That(italicNode.Type, Is.EqualTo(NodeType.Italic)); + + Assert.That(italicNode.ChildrenNodes, Has.Count.EqualTo(1)); + Assert.That(italicNode.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(italicNode.ChildrenNodes[0].Value, Is.EqualTo("внутри одинарного __двойное__ не работает")); + } + + [Test] + public void Parse_UnderscoresWithNumbers_RemainText() + { + var tokens = new List + { + new Token(TokenType.Text, "цифрами_12_3") + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + Assert.That(paragraph.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(paragraph.ChildrenNodes[0].Value, Is.EqualTo("цифрами_12_3")); + } + + [Test] + public void Parse_WordParts_CanBeFormatted() + { + var tokens = new List + { + new Token(TokenType.Text, "в нач"), + new Token(TokenType.ItalicsStart), + new Token(TokenType.Text, "але"), + new Token(TokenType.ItalicsEnd) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + Assert.That(paragraph.ChildrenNodes, Has.Count.EqualTo(2)); + Assert.That(paragraph.ChildrenNodes[0].Value, Is.EqualTo("в нач")); + Assert.That(paragraph.ChildrenNodes[1].Type, Is.EqualTo(NodeType.Italic)); + Assert.That(paragraph.ChildrenNodes[1].ChildrenNodes[0].Value, Is.EqualTo("але")); + } + + [Test] + public void Parse_DifferentWords_NotFormatted() + { + var tokens = new List + { + new Token(TokenType.Text, "ра"), + new Token(TokenType.ItalicsStart), + new Token(TokenType.Text, "зных сл"), + new Token(TokenType.ItalicsEnd), + new Token(TokenType.Text, "овах") + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + + Assert.That(paragraph.ChildrenNodes, Has.Count.EqualTo(3)); + Assert.That(paragraph.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(paragraph.ChildrenNodes[1].Type, Is.EqualTo(NodeType.Italic)); + Assert.That(paragraph.ChildrenNodes[2].Type, Is.EqualTo(NodeType.Text)); + } + + [Test] + public void Parse_EmptyFormatting_RemainsText() + { + var tokens = new List + { + new Token(TokenType.Text, "____") + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + Assert.That(paragraph.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(paragraph.ChildrenNodes[0].Value, Is.EqualTo("____")); + } + + [Test] + public void Parse_HeaderWithFormatting_CreatesHeaderWithNestedFormatting() + { + var tokens = new List + { + new Token(TokenType.Header), + new Token(TokenType.Text, "Заголовок "), + new Token(TokenType.BoldStart), + new Token(TokenType.Text, "с "), + new Token(TokenType.ItalicsStart), + new Token(TokenType.Text, "разными"), + new Token(TokenType.ItalicsEnd), + new Token(TokenType.Text, " символами"), + new Token(TokenType.BoldEnd), + new Token(TokenType.Newline) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var header = result[0]; + Assert.That(header.Type, Is.EqualTo(NodeType.Header)); + Assert.That(header.ChildrenNodes, Has.Count.EqualTo(2)); + + Assert.That(header.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(header.ChildrenNodes[0].Value, Is.EqualTo("Заголовок ")); + + var boldNode = header.ChildrenNodes[1]; + Assert.That(boldNode.Type, Is.EqualTo(NodeType.Bold)); + Assert.That(boldNode.ChildrenNodes, Has.Count.EqualTo(3)); + + Assert.That(boldNode.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(boldNode.ChildrenNodes[0].Value, Is.EqualTo("с ")); + + var italicNode = boldNode.ChildrenNodes[1]; + Assert.That(italicNode.Type, Is.EqualTo(NodeType.Italic)); + Assert.That(italicNode.ChildrenNodes[0].Value, Is.EqualTo("разными")); + + Assert.That(boldNode.ChildrenNodes[2].Type, Is.EqualTo(NodeType.Text)); + Assert.That(boldNode.ChildrenNodes[2].Value, Is.EqualTo(" символами")); + } + + [Test] + public void Parse_MultipleParagraphs_SeparatesCorrectly() + { + var tokens = new List + { + new Token(TokenType.Text, "Первый абзац"), + new Token(TokenType.Newline), + new Token(TokenType.Text, "Второй абзац"), + new Token(TokenType.Newline), + new Token(TokenType.Newline), + new Token(TokenType.Text, "Третий абзац") + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(3)); + Assert.That(result[0].Type, Is.EqualTo(NodeType.Paragraph)); + Assert.That(result[1].Type, Is.EqualTo(NodeType.Paragraph)); + Assert.That(result[2].Type, Is.EqualTo(NodeType.Paragraph)); + } + + [Test] + public void Parse_ComplexDocument_CreatesCorrectStructure() + { + var tokens = new List + { + new Token(TokenType.Header), + new Token(TokenType.Text, "Заголовок"), + new Token(TokenType.Newline), + new Token(TokenType.Text, "Обычный "), + new Token(TokenType.ItalicsStart), + new Token(TokenType.Text, "курсив"), + new Token(TokenType.ItalicsEnd), + new Token(TokenType.Text, " и "), + new Token(TokenType.BoldStart), + new Token(TokenType.Text, "жирный"), + new Token(TokenType.BoldEnd), + new Token(TokenType.Newline), + new Token(TokenType.Newline), + new Token(TokenType.Text, "Новый абзац") + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(3)); + + Assert.That(result[0].Type, Is.EqualTo(NodeType.Header)); + + var firstParagraph = result[1]; + Assert.That(firstParagraph.Type, Is.EqualTo(NodeType.Paragraph)); + Assert.That(firstParagraph.ChildrenNodes, Has.Count.EqualTo(4)); + + var secondParagraph = result[2]; + Assert.That(secondParagraph.Type, Is.EqualTo(NodeType.Paragraph)); + Assert.That(secondParagraph.ChildrenNodes[0].Value, Is.EqualTo("Новый абзац")); + } + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs b/cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs new file mode 100644 index 000000000..f4f76728b --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs @@ -0,0 +1,543 @@ +using Markdown.Entities.Parsers; +using Markdown.Enums; +using NUnit.Framework; + +namespace Markdown.MarkdownTests.TokenizersTests +{ + [TestFixture] + public class MarkdownTokenizerTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [TestFixture] + public class ItalicsOnlyTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [Test] + public void TokenizeLine_ReturnsEmphasisTokens_WithItalicSurroundedByUnderscores() + { + var line = "Текст, _окруженный с двух сторон_ одинарными символами подчерка"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(5)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Текст, ")); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[2].Value, Is.EqualTo("окруженный с двух сторон")); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.ItalicsEnd)); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[4].Value, Is.EqualTo(" одинарными символами подчерка")); + } + + [Test] + public void TokenizeLine_ShouldCreateEmphasisTokens_WithItalicInWordParts() + { + var line = "Однако выделять часть слова они могут: и в _нач_але, и в сер_еди_не, и в кон_це._"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(12)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Однако выделять часть слова они могут: и в ")); + + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.ItalicsStart)); + + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[2].Value, Is.EqualTo("нач")); + + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.ItalicsEnd)); + + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[4].Value, Is.EqualTo("але, и в сер")); + + Assert.That(tokens[5].Type, Is.EqualTo(TokenType.ItalicsStart)); + + Assert.That(tokens[6].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[6].Value, Is.EqualTo("еди")); + + Assert.That(tokens[7].Type, Is.EqualTo(TokenType.ItalicsEnd)); + + Assert.That(tokens[8].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[8].Value, Is.EqualTo("не, и в кон")); + + Assert.That(tokens[9].Type, Is.EqualTo(TokenType.ItalicsStart)); + + Assert.That(tokens[10].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[10].Value, Is.EqualTo("це.")); + + Assert.That(tokens[11].Type, Is.EqualTo(TokenType.ItalicsEnd)); + + } + + [Test] + public void TokenizeLine_ShouldNotWork_WithItalicAcrossDifferentWords() + { + var line = "В то же время выделение в ра_зных сл_овах не работает."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("В то же время выделение в ра_зных сл_овах не работает.")); + } + + [Test] + public void TokenizeLine_ShouldNotEndEmphasis_WithUnderscorePrecededBySpace() + { + var line = "За подчерками, начинающими выделение, должен _ следовать_ непробельный символ."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("За подчерками, начинающими выделение, должен _ следовать_ непробельный символ.")); + } + + [Test] + public void TokenizeLine_ShouldNotStartEmphasis_WithClosingUnderscoreAfterSpace() + { + var line = "Иначе эти _подчерки _не считаются выделением"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Иначе эти _подчерки _не считаются выделением")); + } + } + + [TestFixture] + public class BoldOnlyTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [Test] + public void TokenizeLine_ReturnsStrongTokens_WithBoldSurroundedByDoubleUnderscores() + { + var line = "__Выделенный двумя символами текст__ должен становиться полужирным"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(4)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.BoldStart)); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[1].Value, Is.EqualTo("Выделенный двумя символами текст")); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.BoldEnd)); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[3].Value, Is.EqualTo(" должен становиться полужирным")); + } + + [Test] + public void TokenizeLine_ReturnsStrongTokens_WithWordSurroundedByDoubleUnderscores() + { + var line = "Выделенный двумя символами __текст__ должен становиться полужирным"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(5)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Выделенный двумя символами ")); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.BoldStart)); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[2].Value, Is.EqualTo("текст")); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.BoldEnd)); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[4].Value, Is.EqualTo(" должен становиться полужирным")); + } + + + [Test] + public void TokenizeLine_ShouldNotStartEmphasis_WithUnderscoreFollowedBySpace() + { + var line = "Иначе эти__ подчерки__ не считаются выделением"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Иначе эти__ подчерки__ не считаются выделением")); + } + + [Test] + public void TokenizeLine_ShouldNotStartEmphasis_WithClosingUnderscoreAfterSpace() + { + var line = "Иначе эти __подчерки __не считаются выделением"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Иначе эти __подчерки __не считаются выделением")); + } + } + + [TestFixture] + public class TagInteractionTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [Test] + public void TokenizeLine_ShouldCreateNestedTokens_WithBoldContainingItalic() + { + var line = "Внутри __двойного выделения _одинарное_ тоже__ работает."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(9)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Внутри ")); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.BoldStart)); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[2].Value, Is.EqualTo("двойного выделения ")); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[4].Value, Is.EqualTo("одинарное")); + Assert.That(tokens[5].Type, Is.EqualTo(TokenType.ItalicsEnd)); + Assert.That(tokens[6].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[6].Value, Is.EqualTo(" тоже")); + Assert.That(tokens[7].Type, Is.EqualTo(TokenType.BoldEnd)); + Assert.That(tokens[8].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[8].Value, Is.EqualTo(" работает.")); + } + + [Test] + public void TokenizeLine_ShouldNotCreateBoldTokens_WithItalicContainingBold() + { + var line = "Но не наоборот — внутри _одинарного __двойное__ не_ работает."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(5)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Но не наоборот — внутри ")); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[2].Value, Is.EqualTo("одинарного __двойное__ не")); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.ItalicsEnd)); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[4].Value, Is.EqualTo(" работает.")); + } + + [Test] + public void TokenizeLine_ShouldNotCreateEmphasis_WithUnderscoresInNumbers() + { + var line = "Подчерки внутри текста c цифрами_12_3 не считаются выделением"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Подчерки внутри текста c цифрами_12_3 не считаются выделением")); + } + + [Test] + public void TokenizeLine_ShouldNotCreateEmphasis_WithUnpairedUnderscores() + { + var line = "__Непарные_ символы в рамках одного абзаца не считаются выделением."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("__Непарные_ символы в рамках одного абзаца не считаются выделением.")); + } + + [Test] + public void TokenizeLine_ShouldNotCreateEmphasis_WithItalicsIntersectingUnderscores() + { + var line = "В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением.")); + } + + [Test] + public void TokenizeLine_ShouldNotCreateEmphasis_WithBoldIntersectingUnderscores() + { + var line = "В случае _пересечения __двойных_ и одинарных__ подчерков ни один из них не считается выделением."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("В случае _пересечения __двойных_ и одинарных__ подчерков ни один из них не считается выделением.")); + } + + [Test] + public void TokenizeLine_ShouldNotCreateEmphasis_WithEmptyUnderscores() + { + var line = "Если внутри подчерков пустая строка ____, то они остаются символами подчерка."; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Если внутри подчерков пустая строка ____, то они остаются символами подчерка.")); + } + } + + [TestFixture] + public class HeaderTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [Test] + public void TokenizeLine_ReturnsHeaderToken_WithHeader() + { + var line = "# Заголовок"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(2)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Header)); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[1].Value, Is.EqualTo("Заголовок")); + } + + [Test] + public void TokenizeLine_ReturnsCombinedTokens_WithHeaderContainingFormatting() + { + var line = "# Заголовок __с _разными_ символами__"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(9)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Header)); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[1].Value, Is.EqualTo("Заголовок ")); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.BoldStart)); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[3].Value, Is.EqualTo("с ")); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[5].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[5].Value, Is.EqualTo("разными")); + Assert.That(tokens[6].Type, Is.EqualTo(TokenType.ItalicsEnd)); + Assert.That(tokens[7].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[7].Value, Is.EqualTo(" символами")); + Assert.That(tokens[8].Type, Is.EqualTo(TokenType.BoldEnd)); + } + + [Test] + public void TokenizeLine_ShouldNotBeHeader_WithHeaderWithoutSpace() + { + var line = "#Заголовок без пробела"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("#Заголовок без пробела")); + } + + [Test] + public void TokenizeLine_ShouldNotBeHeader_WithHeaderInMiddleOfLine() + { + var line = "Текст # Заголовок в середине"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Текст # Заголовок в середине")); + } + + [Test] + public void Tokenize_ReturnsCorrectTokens_WithCompleteExampleFromSpecification() + { + var text = "# Заголовок __с _разными_ символами__"; + + var tokens = _tokenizer.TokenizeLine(text); + + Assert.That(tokens, Has.Count.EqualTo(9)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Header)); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[1].Value, Is.EqualTo("Заголовок ")); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.BoldStart)); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[3].Value, Is.EqualTo("с ")); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[5].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[5].Value, Is.EqualTo("разными")); + Assert.That(tokens[6].Type, Is.EqualTo(TokenType.ItalicsEnd)); + Assert.That(tokens[7].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[7].Value, Is.EqualTo(" символами")); + Assert.That(tokens[8].Type, Is.EqualTo(TokenType.BoldEnd)); + } + } + + [TestFixture] + public class EscapeTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [Test] + public void TokenizeLine_ShouldNotCreateEmphasisTokens_WithEscapedItalic() + { + var line = "\\_Вот это\\_, не должно выделиться тегом"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("_Вот это_, не должно выделиться тегом")); + } + + [Test] + public void TokenizeLine_ShouldKeepEscapeCharacters_WithEscapeCharactersNotEscaping() + { + var line = "Здесь сим\\волы экранирования\\ \\должны остаться.\\"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Здесь сим\\волы экранирования\\ \\должны остаться.\\")); + } + + [Test] + public void TokenizeLine_ShouldCreateItalic_WithEscapedEscapeCharacter() + { + var line = "Символ экранирования тоже можно экранировать: \\\\_вот это будет выделено тегом_"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(4)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Символ экранирования тоже можно экранировать: \\")); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[2].Value, Is.EqualTo("вот это будет выделено тегом")); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.ItalicsEnd)); + } + + [Test] + public void TokenizeLine_ReturnsTextToken_WithOnlyEscapeCharacters() + { + var line = "\\\\\\"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("\\\\")); + } + + [Test] + public void TokenizeLine_HandlesCorrectly_WithMixedEscapeAndFormatting() + { + var line = "\\__это не жирный\\__ а _\\это не курсив\\_"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(1)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("__это не жирный__ а _\\это не курсив_")); + } + + [Test] + public void TokenizeLine_HandlesCorrectly_WithMultipleEscapeSequences() + { + var line = "\\\\_одинарный экранирование_ и \\\\\\_двойной экранирование_"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(5)); + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("\\")); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[2].Value, Is.EqualTo("одинарный экранирование")); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.ItalicsEnd)); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[4].Value, Is.EqualTo(" и \\_двойной экранирование_")); + } + } + + [TestFixture] + public class HelperMethodsTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [Test] + public void TextToLines_ReturnsCorrectLines_WithMultipleLines() + { + var text = "line1\nline2\nline3"; + + var lines = _tokenizer.TextToLines(text); + + Assert.That(lines, Has.Count.EqualTo(3)); + Assert.That(lines[0], Is.EqualTo("line1")); + Assert.That(lines[1], Is.EqualTo("line2")); + Assert.That(lines[2], Is.EqualTo("line3")); + } + + [Test] + public void TextToLines_ReturnsEmptyList_WithEmptyString() + { + var text = ""; + + var lines = _tokenizer.TextToLines(text); + + Assert.That(lines, Has.Count.EqualTo(1)); + } + + [Test] + public void TokenizeLines_CombinesAllTokens_WithMultipleLines() + { + var lines = new List { "line1", "line2", "line3" }; + + var tokens = _tokenizer.TokenizeLines(lines); + + Assert.That(tokens, Has.Count.EqualTo(6)); + } + } + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Models/Node.cs b/cs/MarkdownTests/Models/Node.cs new file mode 100644 index 000000000..f3f15288c --- /dev/null +++ b/cs/MarkdownTests/Models/Node.cs @@ -0,0 +1,21 @@ +using Markdown.Enums; + +namespace Markdown.Models +{ + /// + /// Представляет узел абстрактного синтаксического дерева (AST) + /// + public class Node + { + public NodeType Type { get; } + public List ChildrenNodes { get; } + public string Value { get; } + + public Node(NodeType type, List childrenNodes, string value) + { + Type = type; + ChildrenNodes = childrenNodes; + Value = value; + } + } +} diff --git a/cs/MarkdownTests/Models/SyntaxTreeModels/ISyntaxTree.cs b/cs/MarkdownTests/Models/SyntaxTreeModels/ISyntaxTree.cs new file mode 100644 index 000000000..cfc594df6 --- /dev/null +++ b/cs/MarkdownTests/Models/SyntaxTreeModels/ISyntaxTree.cs @@ -0,0 +1,11 @@ + +namespace Markdown.Models.SyntaxTreeModels +{ + /// + /// Представляет абстрактное синтаксическое дерево (AST) документа + /// + public interface ISyntaxTree + { + public List Tree { get; } + } +} diff --git a/cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs b/cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs new file mode 100644 index 000000000..7bba13ef0 --- /dev/null +++ b/cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs @@ -0,0 +1,221 @@ +using Markdown.Enums; +using Markdown.Models.SyntaxTreeModels; + +namespace Markdown.Models.SyntaxTree +{ + public class SyntaxTree : ISyntaxTree + { + /// + /// Коллекция корневых узлов абстрактного синтаксического дерева + /// + public List Tree { get; } + + private List tokens; + private int currentIndex; + + /// + /// Строит абстрактное синтаксическое дерево (AST) из потока токенов. + /// Анализирует последовательность токенов и создает иерархическую структуру узлов, + /// отражающую семантическую структуру исходного документа. + /// + /// Коллекция токенов, полученная от лексического анализатора + public SyntaxTree(List tokens) + { + this.tokens = tokens ?? new List(); + currentIndex = 0; + Tree = ParseDocument(); + } + + private List ParseDocument() + { + var documentNodes = new List(); + + while (currentIndex < tokens.Count) + { + var node = ParseBlock(); + if (node != null) + { + documentNodes.Add(node); + } + } + + return documentNodes; + } + + private Node ParseBlock() + { + if (currentIndex >= tokens.Count) + return null; + + var token = tokens[currentIndex]; + + return token.Type switch + { + TokenType.Header => ParseHeader(), + TokenType.Newline => ParseNewline(), + _ => ParseParagraph() + }; + } + + private Node ParseHeader() + { + currentIndex++; + + var headerContent = new List(); + while (CanContinueParsingHeaderContent()) + { + var inlineNode = ParseInline(); + if (inlineNode != null) + { + headerContent.Add(inlineNode); + } + } + + if (HasNewlineTokenAtCurrentPosition()) + { + currentIndex++; + } + + return new Node(NodeType.Header, headerContent, null); + } + + private Node ParseParagraph() + { + var paragraphContent = new List(); + + while (CanContinueParsingParagraphContent()) + { + var inlineNode = ParseInline(); + if (inlineNode != null) + { + paragraphContent.Add(inlineNode); + } + } + + if (HasNewlineTokenAtCurrentPosition()) + { + currentIndex++; + } + + return paragraphContent.Count > 0 + ? new Node(NodeType.Paragraph, paragraphContent, null) + : null; + } + + private Node ParseNewline() + { + currentIndex++; + return null; + } + + private Node ParseInline() + { + if (currentIndex >= tokens.Count) + return null; + + var token = tokens[currentIndex]; + + return token.Type switch + { + TokenType.BoldStart => ParseBold(), + TokenType.ItalicsStart => ParseItalic(), + TokenType.Text => ParseText(), + _ => HandleUnexpectedToken() + }; + } + + private Node ParseBold() + { + currentIndex++; + + var boldContent = new List(); + + while (CanContinueParsingBoldContent()) + { + var inlineNode = ParseInline(); + if (inlineNode != null) + { + boldContent.Add(inlineNode); + } + } + + if (HasBoldEndTokenAtCurrentPosition()) + { + currentIndex++; + } + + return new Node(NodeType.Bold, boldContent, null); + } + + private Node ParseItalic() + { + currentIndex++; + + var italicContent = new List(); + + while (CanContinueParsingItalicContent()) + { + var inlineNode = ParseInline(); + if (inlineNode != null) + { + italicContent.Add(inlineNode); + } + } + + if (HasItalicsEndTokenAtCurrentPosition()) + { + currentIndex++; + } + + return new Node(NodeType.Italic, italicContent, null); + } + + private Node ParseText() + { + var token = tokens[currentIndex]; + currentIndex++; + return new Node(NodeType.Text, null, token.Value); + } + + private Node HandleUnexpectedToken() + { + currentIndex++; + return null; + } + + private bool CanContinueParsingHeaderContent() + { + return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.Newline; + } + + private bool HasNewlineTokenAtCurrentPosition() + { + return currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.Newline; + } + + private bool CanContinueParsingParagraphContent() + { + return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.Newline; + } + + private bool CanContinueParsingBoldContent() + { + return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.BoldEnd; + } + + private bool HasBoldEndTokenAtCurrentPosition() + { + return currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.BoldEnd; + } + + private bool CanContinueParsingItalicContent() + { + return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.ItalicsEnd; + } + + private bool HasItalicsEndTokenAtCurrentPosition() + { + return currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.ItalicsEnd; + } + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/Models/Token.cs b/cs/MarkdownTests/Models/Token.cs new file mode 100644 index 000000000..952ea3951 --- /dev/null +++ b/cs/MarkdownTests/Models/Token.cs @@ -0,0 +1,24 @@ +using Markdown.Enums; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Markdown.Models +{ + /// + /// Представляет лексему (токен), извлеченную в процессе лексического анализа + /// + public class Token + { + public TokenType Type { get; } + public string Value { get; } + + public Token(TokenType type, string value = null) + { + Type = type; + Value = value; + } + } +} From 5f2ce8900b7bd32f3015f79ec39a9244b474e2dc Mon Sep 17 00:00:00 2001 From: Anton Savitskikh Date: Wed, 19 Nov 2025 05:44:50 +0500 Subject: [PATCH 5/5] Added link parsing with fixes --- .../Entities/Builders/HtmlBuilder.cs | 89 ++++-- .../Entities/Builders/IBuilder.cs | 8 +- .../Entities/Converters/Converter.cs | 24 +- .../Entities/Tokenizers/MarkdownTokenizer.cs | 270 +++++++++++++----- cs/MarkdownTests/Enums/NodeType.cs | 3 +- cs/MarkdownTests/Enums/TokenType.cs | 10 +- .../BuildersTests/HtmlBuilderTests.cs | 114 ++++++++ .../ConverterPerformanceTest.cs | 4 +- .../ConverterTests/ConverterTests.cs | 38 ++- .../SyntaxTreeTests/SyntaxTreeTests.cs | 95 ++++++ .../TokenizersTests/MarkdownTokenizerTests.cs | 69 +++++ cs/MarkdownTests/Models/MarkdownTags.cs | 14 + cs/MarkdownTests/Models/Node.cs | 1 + .../Models/SyntaxTreeModels/SyntaxTree.cs | 106 ++++--- 14 files changed, 684 insertions(+), 161 deletions(-) create mode 100644 cs/MarkdownTests/Models/MarkdownTags.cs diff --git a/cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs b/cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs index 08ddc61d8..b5b7ea379 100644 --- a/cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs +++ b/cs/MarkdownTests/Entities/Builders/HtmlBuilder.cs @@ -20,31 +20,26 @@ namespace Markdown.Entities.Builders /// public class HtmlBuilder : IBuilder { - private readonly StringBuilder _htmlBuilder; - private readonly Dictionary _tagMapping; - - public HtmlBuilder() + private readonly StringBuilder htmlBuilder = new StringBuilder(); + private readonly Dictionary tagMapping = new Dictionary { - _htmlBuilder = new StringBuilder(); - _tagMapping = new Dictionary - { - { NodeType.Document, "" }, - { NodeType.Paragraph, "p" }, - { NodeType.Header, "h1" }, - { NodeType.Bold, "strong" }, - { NodeType.Italic, "em" }, - { NodeType.Text, "" } - }; - } + { NodeType.Document, "" }, + { NodeType.Paragraph, "p" }, + { NodeType.Header, "h1" }, + { NodeType.Bold, "strong" }, + { NodeType.Italic, "em" }, + { NodeType.Text, "" }, + { NodeType.Link, "a" } + }; public string Build(ISyntaxTree tree) { if (tree == null) return string.Empty; - _htmlBuilder.Clear(); + htmlBuilder.Clear(); BuildNodes(tree.Tree); - return _htmlBuilder.ToString(); + return htmlBuilder.ToString(); } private void BuildNodes(List nodes) @@ -65,6 +60,9 @@ private void BuildNode(Node node) case NodeType.Document: BuildDocumentNode(node); break; + case NodeType.Link: + BuildLinkNode(node); + break; default: BuildFormattedNode(node); break; @@ -76,7 +74,7 @@ private void BuildTextNode(Node node) if (!string.IsNullOrEmpty(node.Value)) { var escapedText = EscapeHtml(node.Value); - _htmlBuilder.Append(escapedText); + htmlBuilder.Append(escapedText); } } @@ -90,11 +88,11 @@ private void BuildDocumentNode(Node node) private void BuildFormattedNode(Node node) { - var tagName = _tagMapping[node.Type]; + var tagName = tagMapping[node.Type]; if (!string.IsNullOrEmpty(tagName)) { - _htmlBuilder.Append($"<{tagName}>"); + htmlBuilder.Append($"<{tagName}>"); } if (node.ChildrenNodes != null && node.ChildrenNodes.Count > 0) @@ -108,10 +106,44 @@ private void BuildFormattedNode(Node node) if (!string.IsNullOrEmpty(tagName)) { - _htmlBuilder.Append($""); + htmlBuilder.Append($""); } } + /// + /// Строит HTML для узла-ссылки + /// + /// Узел ссылки с атрибутами URL и Title + private void BuildLinkNode(Node node) + { + if (string.IsNullOrEmpty(node.Value)) + { + if (node.ChildrenNodes != null && node.ChildrenNodes.Count > 0) + { + BuildNodes(node.ChildrenNodes); + } + return; + } + + var url = EscapeHtmlAttribute(node.Value); + htmlBuilder.Append($""); + + if (node.ChildrenNodes != null && node.ChildrenNodes.Count > 0) + { + BuildNodes(node.ChildrenNodes); + } + + htmlBuilder.Append(""); + } + private string EscapeHtml(string text) { if (string.IsNullOrEmpty(text)) @@ -125,5 +157,20 @@ private string EscapeHtml(string text) .Replace("\"", """) .Replace("'", "'"); } + + /// + /// Эскейпинг для атрибутов HTML (URL и title) + /// + private string EscapeHtmlAttribute(string attribute) + { + if (string.IsNullOrEmpty(attribute)) + return attribute; + + // Для атрибутов нужно экранировать кавычки и амперсанды + return attribute + .Replace("&", "&") + .Replace("\"", """) + .Replace("'", "'"); + } } } \ No newline at end of file diff --git a/cs/MarkdownTests/Entities/Builders/IBuilder.cs b/cs/MarkdownTests/Entities/Builders/IBuilder.cs index d3b02cdfa..8a4f44b69 100644 --- a/cs/MarkdownTests/Entities/Builders/IBuilder.cs +++ b/cs/MarkdownTests/Entities/Builders/IBuilder.cs @@ -1,10 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Markdown.Models.SyntaxTree; -using Markdown.Models.SyntaxTreeModels; +using Markdown.Models.SyntaxTreeModels; namespace Markdown.Entities.Builders { diff --git a/cs/MarkdownTests/Entities/Converters/Converter.cs b/cs/MarkdownTests/Entities/Converters/Converter.cs index fccd6fa92..ee0793ce4 100644 --- a/cs/MarkdownTests/Entities/Converters/Converter.cs +++ b/cs/MarkdownTests/Entities/Converters/Converter.cs @@ -22,25 +22,25 @@ namespace Markdown.Entities.Converters /// public class Converter : IConverter { - public IBuilder Builder { get; } - public ITokenizer Tokenizer { get; } + private readonly IBuilder builder; + private readonly ITokenizer tokenizer; public Converter(IBuilder builder, ITokenizer tokenizer) { - Builder = builder; - Tokenizer = tokenizer; + this.builder = builder; + this.tokenizer = tokenizer; } public string Convert(string text) { - if (Builder != null && Tokenizer != null) - { - var tokens = Tokenizer.Tokenize(text); - var ast = new SyntaxTree(tokens); - var convertedText = Builder.Build(ast); - return convertedText; - } - throw new NullReferenceException(); + if (builder == null) throw new NullReferenceException("Builder can't be null"); + if (tokenizer == null) throw new NullReferenceException("Tokenizer can't be null"); + + var tokens = tokenizer.Tokenize(text); + var ast = new SyntaxTree(tokens); + var convertedText = builder.Build(ast); + + return convertedText; } } } diff --git a/cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs b/cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs index bc440eeae..d3fcaa14e 100644 --- a/cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs +++ b/cs/MarkdownTests/Entities/Tokenizers/MarkdownTokenizer.cs @@ -1,6 +1,8 @@ -using System.Text; +using System.Runtime.CompilerServices; +using System.Text; using Markdown.Enums; using Markdown.Models; +using Newtonsoft.Json.Linq; namespace Markdown.Entities.Parsers { @@ -9,6 +11,15 @@ namespace Markdown.Entities.Parsers ///
public class MarkdownTokenizer : ITokenizer { + private const char Underscore = '_'; + private const string DoubleUnderscore = "__"; + private const char EscapeCharacter = '\\'; + private const char SpaceCharacter = ' '; + private const string HeaderCharacter = "# "; + private const char SquareOpen = '['; + private const char SquareClose = ']'; + private const char RoundOpen = '('; + private const char RoundClose = ')'; public List Tokenize(string text) { return TokenizeLines(TextToLines(text)); @@ -56,7 +67,7 @@ public List TokenizeLine(string line) { var currentChar = line[currentPosition]; - if (currentChar == '\\') + if (currentChar == EscapeCharacter) { ProcessEscapeCharacter(line, ref currentPosition, textBuffer); continue; @@ -64,13 +75,19 @@ public List TokenizeLine(string line) if (IsBoldMarker(line, currentPosition)) { - ProcessBoldMarker(line, ref currentPosition, tokens, textBuffer); + ProcessBoldMarker(line, ref currentPosition, tokens, textBuffer, TokenType.Text); continue; } if (IsItalicsMarker(line, currentPosition)) { - ProcessItalicsMarker(line, ref currentPosition, tokens, textBuffer, false); + ProcessItalicsMarker(line, ref currentPosition, tokens, textBuffer, false, TokenType.Text); + continue; + } + + if (currentChar == SquareOpen) + { + ProcessLink(line, ref currentPosition, tokens, textBuffer); continue; } @@ -78,10 +95,113 @@ public List TokenizeLine(string line) currentPosition++; } - if(textBuffer.Length > 0) FlushTextBufferIfNotEmpty(textBuffer, tokens); + if(textBuffer.Length > 0) FlushTextBufferIfNotEmpty(textBuffer, tokens, TokenType.Text); return tokens; } + private void ProcessLink(string line, ref int currentPosition, List tokens, StringBuilder textBuffer) + { + FlushTextBufferIfNotEmpty(textBuffer, tokens, TokenType.Text); + + tokens.Add(new Token(TokenType.LinkStart)); + currentPosition++; + + var linkTextBuffer = new StringBuilder(); + + while (currentPosition < line.Length && line[currentPosition] != SquareClose) + { + if (line[currentPosition] == EscapeCharacter) + { + ProcessEscapeCharacter(line, ref currentPosition, linkTextBuffer); + } + + if (IsBoldMarker(line, currentPosition)) + { + ProcessBoldMarker(line, ref currentPosition, tokens, linkTextBuffer, TokenType.LinkText); + continue; + } + + if (IsItalicsMarker(line, currentPosition)) + { + ProcessItalicsMarker(line, ref currentPosition, tokens, linkTextBuffer, false, TokenType.LinkText); + continue; + } + + linkTextBuffer.Append(line[currentPosition]); + currentPosition++; + + } + + if (linkTextBuffer.Length > 0) + { + tokens.Add(new Token(TokenType.LinkText, linkTextBuffer.ToString())); + } + + if (currentPosition < line.Length && line[currentPosition] == SquareClose) + { + tokens.Add(new Token(TokenType.LinkEnd)); + currentPosition++; + } + + if (currentPosition < line.Length && line[currentPosition] == RoundOpen) + { + ProcessUrl(line, ref currentPosition, tokens); + } + } + + private void ProcessUrl(string line, ref int currentPosition, List tokens) + { + tokens.Add(new Token(TokenType.UrlStart)); + currentPosition++; + + var urlBuffer = new StringBuilder(); + var titleBuffer = new StringBuilder(); + var isParsingTitle = false; + + while (currentPosition < line.Length && line[currentPosition] != RoundClose) + { + if (line[currentPosition] == EscapeCharacter) + { + ProcessEscapeCharacter(line, ref currentPosition, isParsingTitle ? titleBuffer : urlBuffer); + } + else if (line[currentPosition] == '"' && !isParsingTitle && urlBuffer.Length > 0) + { + isParsingTitle = true; + tokens.Add(new Token(TokenType.Url, urlBuffer.ToString())); + tokens.Add(new Token(TokenType.UrlTitleDelimiter)); + currentPosition++; + } + else + { + if (isParsingTitle) + { + titleBuffer.Append(line[currentPosition]); + } + else + { + urlBuffer.Append(line[currentPosition]); + } + currentPosition++; + } + } + + if (!isParsingTitle && urlBuffer.Length > 0) + { + tokens.Add(new Token(TokenType.Url, urlBuffer.ToString())); + } + else if (isParsingTitle && titleBuffer.Length > 0) + { + tokens.Add(new Token(TokenType.UrlTitle, titleBuffer.ToString())); + tokens.Add(new Token(TokenType.UrlTitleDelimiter)); + } + + if (currentPosition < line.Length && line[currentPosition] == RoundClose) + { + tokens.Add(new Token(TokenType.UrlEnd)); + currentPosition++; + } + } + /// /// Метод обработки тэга курсива, превращает часть строки в набор токенов если она удовлетворяет условиям спецификации /// Дополнительно обрабатывает случай неправильной вложенности тэга курсива и полужирного тэга @@ -91,7 +211,8 @@ public List TokenizeLine(string line) /// /// /// - флаг для обработки случая когда находим курсив внутри полужирного текста - private void ProcessItalicsMarker(string line, ref int currentPosition, List tokens, StringBuilder originalTextBuffer, bool isNestedCall) + private void ProcessItalicsMarker(string line, ref int currentPosition, List tokens, StringBuilder originalTextBuffer, + bool isNestedCall, TokenType textType) { var isInWord = false; var startPosition = currentPosition; @@ -118,27 +239,27 @@ private void ProcessItalicsMarker(string line, ref int currentPosition, List /// /// - private void ProcessBoldMarker(string line, ref int currentPosition, List tokens, StringBuilder originalTextBuffer) + private void ProcessBoldMarker(string line, ref int currentPosition, List tokens, + StringBuilder originalTextBuffer, TokenType textType) { var isClosingFound = false; var isInWord = IsMarkerInsideWord(line, currentPosition, true); @@ -196,47 +318,47 @@ private void ProcessBoldMarker(string line, ref int currentPosition, List if (IsValidClosingMarker(line, currentPosition, true)) { - var isEmptyWord = IsEmptyMarkedWord(currentPosition, startPosition, true); + var isEmptyWord = IsEmptyMarkedWord(currentPosition, startPosition, true); if (isEmptyWord) { - AppendMarkerToBuffer(textBuffer, true); + textBuffer.Append(DoubleUnderscore); originalTextBuffer.Append(textBuffer); return; } - //если нашли токены после вложенного вызова ProcessItalicsMarker - if (innerTokens.Count > 0) + bool shouldProcessNestedItalics = innerTokens.Count > 0; + + if (shouldProcessNestedItalics) { - FlushTextBufferIfNotEmpty(originalTextBuffer, tokens); - AddNestedItalicsTokens(textBuffer, tokens, innerTokens); - SkipBoldMarker(ref currentPosition); - return; + ProcessNestedItalicsTokens(originalTextBuffer, textBuffer, tokens, innerTokens, + ref currentPosition, TokenType.Text); + } + else + { + ProcessBoldTokens(originalTextBuffer, textBuffer, tokens, ref currentPosition, textType); } - FlushTextBufferIfNotEmpty(originalTextBuffer, tokens); - AddTokensFromBufferWithSpecifiedTags(textBuffer, tokens, TokenType.BoldStart, TokenType.BoldEnd); - SkipBoldMarker(ref currentPosition); return; } - if (currentChar == '_') + if (currentChar == Underscore) { - ProcessItalicsMarker(line, ref currentPosition, innerTokens, textBuffer, true); + ProcessItalicsMarker(line, ref currentPosition, innerTokens, textBuffer, true, TokenType.Text); continue; } - if (currentChar == ' ') + if (currentChar == SpaceCharacter) { if (isInWord) { - AppendMarkerToBuffer(originalTextBuffer, true); + originalTextBuffer.Append(DoubleUnderscore); originalTextBuffer.Append(textBuffer); return; } } - if (currentChar == '\\') + if (currentChar == EscapeCharacter) { ProcessEscapeCharacter(line, ref currentPosition, textBuffer); continue; @@ -246,22 +368,38 @@ private void ProcessBoldMarker(string line, ref int currentPosition, List currentPosition++; } - PrependMarkerToBuffer(textBuffer, true); + textBuffer.Insert(0, DoubleUnderscore); originalTextBuffer.Append(textBuffer); } + private void ProcessBoldTokens(StringBuilder originalTextBuffer, StringBuilder textBuffer, List tokens, + ref int currentPosition, TokenType textType) + { + FlushTextBufferIfNotEmpty(originalTextBuffer, tokens, textType); + AddTokensFromBufferWithSpecifiedTags(textBuffer, tokens, TokenType.BoldStart, TokenType.BoldEnd); + SkipBoldMarker(ref currentPosition); + } + + private void ProcessNestedItalicsTokens(StringBuilder originalTextBuffer, StringBuilder textBuffer, + List tokens, List innerTokens, ref int currentPosition, TokenType textType) + { + FlushTextBufferIfNotEmpty(originalTextBuffer, tokens, textType); + AddNestedItalicsTokens(textBuffer, tokens, innerTokens); + SkipBoldMarker(ref currentPosition); + } + private void AddNestedItalicsTokens(StringBuilder textBuffer, List tokens, List innerTokens) { tokens.Add(new Token(TokenType.BoldStart)); tokens.AddRange(innerTokens); - FlushTextBufferIfNotEmpty(textBuffer, tokens); + FlushTextBufferIfNotEmpty(textBuffer, tokens, TokenType.Text); tokens.Add(new Token(TokenType.BoldEnd)); } private void AddTokensFromBufferWithSpecifiedTags(StringBuilder textBuffer, List tokens, TokenType startToken, TokenType endToken) { tokens.Add(new Token(startToken)); - FlushTextBufferIfNotEmpty(textBuffer, tokens); + FlushTextBufferIfNotEmpty(textBuffer, tokens, TokenType.Text); tokens.Add(new Token(endToken)); } @@ -281,30 +419,6 @@ private void SkipBoldMarker(ref int currentPosition) currentPosition += 2; } - private void PrependMarkerToBuffer(StringBuilder textBuffer, bool isBold) - { - if (isBold) - { - textBuffer.Insert(0, "__"); - } - else - { - textBuffer.Insert(0, '_'); - } - } - - private void AppendMarkerToBuffer(StringBuilder textBuffer, bool isBold) - { - if (isBold) - { - textBuffer.Append("__"); - } - else - { - textBuffer.Append('_'); - } - } - //проверяем что открывающий маркер находится внутри слова private bool IsMarkerInsideWord(string line, int currentPosition, bool isBold) { @@ -327,26 +441,26 @@ private bool IsValidClosingMarker(string line, int currentPosition, bool isBold) if (isBold) { return currentPosition + 1 < line.Length - && line[currentPosition] == '_' - && line[currentPosition + 1] == '_' + && line[currentPosition] == Underscore + && line[currentPosition + 1] == Underscore && !char.IsWhiteSpace(line[currentPosition - 1]); } //обработка для курсива if (currentPosition + 1 < line.Length) { - return line[currentPosition] == '_' - && line[currentPosition + 1] != '_' - && line[currentPosition - 1] != '_' + return line[currentPosition] == Underscore + && line[currentPosition + 1] != Underscore + && line[currentPosition - 1] != Underscore && !char.IsWhiteSpace(line[currentPosition - 1]); } - return line[currentPosition] == '_' + return line[currentPosition] == Underscore && !char.IsWhiteSpace(line[currentPosition - 1]); } private bool IsItalicsMarker(string line, int currentPosition) { return currentPosition + 1 < line.Length - && line[currentPosition] == '_' && line[currentPosition + 1] != '_' + && line[currentPosition] == Underscore && line[currentPosition + 1] != Underscore && !IsSpaceAfterMarker(line, currentPosition, false) && !IsAmongDigits(line, currentPosition, false); } @@ -378,8 +492,8 @@ private bool IsAmongDigits(string line, int currentPosition, bool isBold) private bool IsBoldMarker(string line, int currentPosition) { return currentPosition + 1 < line.Length - && line[currentPosition] == '_' - && line[currentPosition + 1] == '_' + && line[currentPosition] == Underscore + && line[currentPosition + 1] == Underscore && !IsSpaceAfterMarker(line, currentPosition, true) && !IsAmongDigits(line, currentPosition, true); } @@ -390,11 +504,11 @@ private bool IsSpaceAfterMarker(string line, int currentPosition, bool isBold) return char.IsWhiteSpace(line[currentPosition + 1]); } - private void FlushTextBufferIfNotEmpty(StringBuilder textBuffer, List tokens) + private void FlushTextBufferIfNotEmpty(StringBuilder textBuffer, List tokens, TokenType textType) { if (textBuffer.Length > 0) { - tokens.Add(new Token(TokenType.Text, textBuffer.ToString())); + tokens.Add(new Token(textType, textBuffer.ToString())); textBuffer.Clear(); } } @@ -409,7 +523,7 @@ private void ProcessEscapeCharacter(string line, ref int currentPosition, String { currentPosition++; textBuffer.Append(line[currentPosition]); - if (line[currentPosition] == '_' && line[currentPosition + 1] == '_') + if (line[currentPosition] == Underscore && line[currentPosition + 1] == Underscore) { currentPosition++; textBuffer.Append(line[currentPosition]); @@ -442,14 +556,14 @@ private bool IsEscapeBeforeMarkers(string line, int currentPosition) { if (currentPosition + 2 < line.Length) { - return (line[currentPosition + 1] == '_' || - line[currentPosition + 1] == '\\' || - (line[currentPosition + 1] == '_' && line[currentPosition + 2] == '_')); + return (line[currentPosition + 1] == Underscore || + line[currentPosition + 1] == EscapeCharacter || + (line[currentPosition + 1] == Underscore && line[currentPosition + 2] == Underscore)); } else if (currentPosition + 1 < line.Length) { - return (line[currentPosition + 1] == '_' || - line[currentPosition + 1] == '\\'); + return (line[currentPosition + 1] == Underscore || + line[currentPosition + 1] == EscapeCharacter); } return false; } @@ -462,7 +576,7 @@ private void ProcessHeader(string line, ref int currentPosition, List tok private bool IsHeader(string line) { - return line.StartsWith("# "); + return line.StartsWith(HeaderCharacter); } } diff --git a/cs/MarkdownTests/Enums/NodeType.cs b/cs/MarkdownTests/Enums/NodeType.cs index d13433dba..3b3c85e08 100644 --- a/cs/MarkdownTests/Enums/NodeType.cs +++ b/cs/MarkdownTests/Enums/NodeType.cs @@ -7,6 +7,7 @@ public enum NodeType Header, Bold, Italic, - Text + Text, + Link } } diff --git a/cs/MarkdownTests/Enums/TokenType.cs b/cs/MarkdownTests/Enums/TokenType.cs index 63eac4032..bf08f854a 100644 --- a/cs/MarkdownTests/Enums/TokenType.cs +++ b/cs/MarkdownTests/Enums/TokenType.cs @@ -13,6 +13,14 @@ public enum TokenType BoldEnd, ItalicsStart, ItalicsEnd, - Newline + Newline, + UrlTitleDelimiter, + Url, + UrlTitle, + UrlEnd, + UrlStart, + LinkEnd, + LinkText, + LinkStart } } diff --git a/cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs b/cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs index 9407e662b..d63ad4598 100644 --- a/cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs +++ b/cs/MarkdownTests/MarkdownTests/BuildersTests/HtmlBuilderTests.cs @@ -262,6 +262,85 @@ public void Build_HandlesGracefully_WithUnclosedFormatting() Assert.That(result, Is.EqualTo("

незакрытый курсив и обычный текст

")); } + [Test] + public void Build_ReturnsAnchorTag_WithSimpleLink() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateLink("пример ссылки", "https://example.com") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

пример ссылки

")); + } + + [Test] + public void Build_ReturnsAnchorTagWithTitle_WithLinkWithTitle() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateLinkWithTitle("ссылка с заголовком", "https://site.com", "Заголовок ссылки") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

ссылка с заголовком

")); + } + + [Test] + public void Build_ReturnsAnchorWithStrongTag_WithLinkContainingBoldText() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateLink( + CreateText("это "), + CreateBold("жирный текст") + , "https://example.com") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

это жирный текст

")); + } + + [Test] + public void Build_ReturnsAnchorWithEmTag_WithLinkContainingItalicText() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateLink( + CreateText("это "), + CreateItalic("курсивный текст") + , "https://example.com") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

это курсивный текст

")); + } + + [Test] + public void Build_ReturnsParagraphWithAnchor_WithLinkInText() + { + var tree = CreateSyntaxTree( + CreateParagraph( + CreateText("Посетите "), + CreateLink("наш сайт", "https://example.com"), + CreateText(" для деталей") + ) + ); + + var result = _htmlBuilder.Build(tree); + + Assert.That(result, Is.EqualTo("

Посетите наш сайт для деталей

")); + } + + // Вспомогательные методы для создания тестовых данных private MockSyntaxTree CreateSyntaxTree(params Node[] nodes) { @@ -302,6 +381,41 @@ private Node CreateItalic(string text) { return new Node(NodeType.Italic, new List { CreateText(text) }, null); } + + // Новые вспомогательные методы для ссылок + private Node CreateLink(string text, string url) + { + return new Node(NodeType.Link, new List { CreateText(text) }, url); + } + + private Node CreateLinkWithTitle(string text, string url, string title) + { + return new Node(NodeType.Link, new List { CreateText(text) }, url) + { + Title = title + }; + } + + private Node CreateLink(params object[] childrenAndUrl) + { + // Последний элемент - это URL + var url = childrenAndUrl[childrenAndUrl.Length - 1] as string; + var children = new List(); + + for (int i = 0; i < childrenAndUrl.Length - 1; i++) + { + if (childrenAndUrl[i] is Node node) + { + children.Add(node); + } + else if (childrenAndUrl[i] is string text) + { + children.Add(CreateText(text)); + } + } + + return new Node(NodeType.Link, children, url); + } } // Mock-класс для тестирования diff --git a/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs index 4a8f2532d..0b05d6208 100644 --- a/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs +++ b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterPerformanceTest.cs @@ -68,8 +68,8 @@ private void AssertNearLinearComplexity(int[] sizes, List times) var timeRatio = (double)times[i] / times[i - 1]; var deviation = (timeRatio - sizeRatio) / sizeRatio; - // Допускаем отклонение до 30% от идеальной линейной сложности - Assert.That(deviation, Is.LessThan(0.30), + // Допускаем отклонение до 40% от идеальной линейной сложности + Assert.That(deviation, Is.LessThan(0.40), $"При увеличении размера с {sizes[i - 1]} до {sizes[i]} " + $"время выросло в {timeRatio:F2} раз вместо ожидаемых {sizeRatio:F2} " + $"(отклонение {deviation:P0})"); diff --git a/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs index f8b09040a..b2cccbfbf 100644 --- a/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs +++ b/cs/MarkdownTests/MarkdownTests/ConverterTests/ConverterTests.cs @@ -23,7 +23,8 @@ public void Convert_ThrowsNullReferenceException_WithNullBuilder() { var converter = new Converter(null, new MarkdownTokenizer()); - Assert.Throws(() => converter.Convert("test")); + var exception = Assert.Throws(() => converter.Convert("test")); + Assert.That(exception.Message, Is.EqualTo("Builder can't be null")); } [Test] @@ -31,7 +32,8 @@ public void Convert_ThrowsNullReferenceException_WithNullTokenizer() { var converter = new Converter(new HtmlBuilder(), null); - Assert.Throws(() => converter.Convert("test")); + var exception = Assert.Throws(() => converter.Convert("test")); + Assert.That(exception.Message, Is.EqualTo("Tokenizer can't be null")); } [Test] @@ -433,5 +435,37 @@ public void Convert_ShouldHandleMultipleEdgeCasesInOneDocument() Assert.That(result, Contains.Substring("__незакрытый жирный")); } } + + [TestFixture] + public class LinkTests + { + private Converter _converter; + + [SetUp] + public void Setup() + { + _converter = new Converter(new HtmlBuilder(), new MarkdownTokenizer()); + } + + [Test] + public void Convert_ReturnsAnchorTag_WithSimpleLink() + { + var markdown = "[пример ссылки](https://example.com)"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

пример ссылки

")); + } + + [Test] + public void Convert_ReturnsHeaderWithAnchor_WithLinkInHeader() + { + var markdown = "# Заголовок с [ссылкой](https://header.com)"; + + var result = _converter.Convert(markdown); + + Assert.That(result, Is.EqualTo("

Заголовок с ссылкой

")); + } + } } } diff --git a/cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs b/cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs index b171601b9..e2fa92ba2 100644 --- a/cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs +++ b/cs/MarkdownTests/MarkdownTests/SyntaxTreeTests/SyntaxTreeTests.cs @@ -311,5 +311,100 @@ public void Parse_ComplexDocument_CreatesCorrectStructure() Assert.That(secondParagraph.Type, Is.EqualTo(NodeType.Paragraph)); Assert.That(secondParagraph.ChildrenNodes[0].Value, Is.EqualTo("Новый абзац")); } + + [Test] + public void Parse_SimpleLink_ReturnsLinkNode() + { + var tokens = new List + { + new Token(TokenType.LinkStart), + new Token(TokenType.LinkText, "пример ссылки"), + new Token(TokenType.LinkEnd), + new Token(TokenType.UrlStart), + new Token(TokenType.Url, "https://example.com"), + new Token(TokenType.UrlEnd), + new Token(TokenType.Newline) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + Assert.That(paragraph.Type, Is.EqualTo(NodeType.Paragraph)); + Assert.That(paragraph.ChildrenNodes, Has.Count.EqualTo(1)); + + var linkNode = paragraph.ChildrenNodes[0]; + Assert.That(linkNode.Type, Is.EqualTo(NodeType.Link)); + Assert.That(linkNode.Value, Is.EqualTo("https://example.com")); + Assert.That(linkNode.ChildrenNodes, Has.Count.EqualTo(1)); + Assert.That(linkNode.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(linkNode.ChildrenNodes[0].Value, Is.EqualTo("пример ссылки")); + } + + [Test] + public void Parse_LinkWithTitle_ReturnsLinkNodeWithTitle() + { + var tokens = new List + { + new Token(TokenType.LinkStart), + new Token(TokenType.LinkText, "ссылка с заголовком"), + new Token(TokenType.LinkEnd), + new Token(TokenType.UrlStart), + new Token(TokenType.Url, "https://site.com"), + new Token(TokenType.UrlTitleDelimiter), + new Token(TokenType.UrlTitle, "Заголовок ссылки"), + new Token(TokenType.UrlTitleDelimiter), + new Token(TokenType.UrlEnd), + new Token(TokenType.Newline) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + + var linkNode = paragraph.ChildrenNodes[0]; + Assert.That(linkNode.Type, Is.EqualTo(NodeType.Link)); + Assert.That(linkNode.Value, Is.EqualTo("https://site.com")); + Assert.That(linkNode.Title, Is.EqualTo("Заголовок ссылки")); + } + + [Test] + public void Parse_LinkWithBoldText_CreatesNestedBoldNode() + { + var tokens = new List + { + new Token(TokenType.LinkStart), + new Token(TokenType.LinkText, "текст с "), + new Token(TokenType.BoldStart), + new Token(TokenType.Text, "жирным"), + new Token(TokenType.BoldEnd), + new Token(TokenType.LinkEnd), + new Token(TokenType.UrlStart), + new Token(TokenType.Url, "https://example.com"), + new Token(TokenType.UrlEnd), + new Token(TokenType.Newline) + }; + + var syntaxTree = new SyntaxTree(tokens); + var result = syntaxTree.Tree; + + Assert.That(result, Has.Count.EqualTo(1)); + var paragraph = result[0]; + + var linkNode = paragraph.ChildrenNodes[0]; + Assert.That(linkNode.Type, Is.EqualTo(NodeType.Link)); + Assert.That(linkNode.ChildrenNodes, Has.Count.EqualTo(2)); + + Assert.That(linkNode.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(linkNode.ChildrenNodes[0].Value, Is.EqualTo("текст с ")); + + var boldNode = linkNode.ChildrenNodes[1]; + Assert.That(boldNode.Type, Is.EqualTo(NodeType.Bold)); + Assert.That(boldNode.ChildrenNodes[0].Type, Is.EqualTo(NodeType.Text)); + Assert.That(boldNode.ChildrenNodes[0].Value, Is.EqualTo("жирным")); + } } } \ No newline at end of file diff --git a/cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs b/cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs index f4f76728b..f661bd591 100644 --- a/cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs +++ b/cs/MarkdownTests/MarkdownTests/TokenizersTests/MarkdownTokenizerTests.cs @@ -495,6 +495,75 @@ public void TokenizeLine_HandlesCorrectly_WithMultipleEscapeSequences() } } + [TestFixture] + public class LinkTests + { + private MarkdownTokenizer _tokenizer; + + [SetUp] + public void Setup() + { + _tokenizer = new MarkdownTokenizer(); + } + + [Test] + public void TokenizeLine_ReturnsLinkTokens_WithValidLinkFormat() + { + var line = "Посетите [наш сайт](https://example.com) для деталей"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens, Has.Count.EqualTo(8)); + + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[0].Value, Is.EqualTo("Посетите ")); + + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.LinkStart)); + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.LinkText)); + Assert.That(tokens[2].Value, Is.EqualTo("наш сайт")); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.LinkEnd)); + + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.UrlStart)); + Assert.That(tokens[5].Type, Is.EqualTo(TokenType.Url)); + Assert.That(tokens[5].Value, Is.EqualTo("https://example.com")); + Assert.That(tokens[6].Type, Is.EqualTo(TokenType.UrlEnd)); + + Assert.That(tokens[7].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[7].Value, Is.EqualTo(" для деталей")); + } + + [Test] + public void TokenizeLine_ReturnsLinkTokens_WithNestedFormatting() + { + var line = "[текст с _курсивом_ и __жирным__](https://example.com)"; + + var tokens = _tokenizer.TokenizeLine(line); + + Assert.That(tokens[0].Type, Is.EqualTo(TokenType.LinkStart)); + Assert.That(tokens[1].Type, Is.EqualTo(TokenType.LinkText)); + Assert.That(tokens[1].Value, Is.EqualTo("текст с ")); + + Assert.That(tokens[2].Type, Is.EqualTo(TokenType.ItalicsStart)); + Assert.That(tokens[3].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[3].Value, Is.EqualTo("курсивом")); + Assert.That(tokens[4].Type, Is.EqualTo(TokenType.ItalicsEnd)); + + Assert.That(tokens[5].Type, Is.EqualTo(TokenType.LinkText)); + Assert.That(tokens[5].Value, Is.EqualTo(" и ")); + + Assert.That(tokens[6].Type, Is.EqualTo(TokenType.BoldStart)); + Assert.That(tokens[7].Type, Is.EqualTo(TokenType.Text)); + Assert.That(tokens[7].Value, Is.EqualTo("жирным")); + Assert.That(tokens[8].Type, Is.EqualTo(TokenType.BoldEnd)); + + Assert.That(tokens[9].Type, Is.EqualTo(TokenType.LinkEnd)); + Assert.That(tokens[10].Type, Is.EqualTo(TokenType.UrlStart)); + Assert.That(tokens[11].Type, Is.EqualTo(TokenType.Url)); + Assert.That(tokens[11].Value, Is.EqualTo("https://example.com")); + Assert.That(tokens[12].Type, Is.EqualTo(TokenType.UrlEnd)); + } + } + [TestFixture] public class HelperMethodsTests { diff --git a/cs/MarkdownTests/Models/MarkdownTags.cs b/cs/MarkdownTests/Models/MarkdownTags.cs new file mode 100644 index 000000000..8c7fc9f8e --- /dev/null +++ b/cs/MarkdownTests/Models/MarkdownTags.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Markdown.MarkdownTests.TokenizersTests; + +namespace Markdown.Models +{ + public static class MarkdownTags + { + private static readonly char Underscore = '_'; + } +} diff --git a/cs/MarkdownTests/Models/Node.cs b/cs/MarkdownTests/Models/Node.cs index f3f15288c..8737950f4 100644 --- a/cs/MarkdownTests/Models/Node.cs +++ b/cs/MarkdownTests/Models/Node.cs @@ -10,6 +10,7 @@ public class Node public NodeType Type { get; } public List ChildrenNodes { get; } public string Value { get; } + public string? Title { get; set; } public Node(NodeType type, List childrenNodes, string value) { diff --git a/cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs b/cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs index 7bba13ef0..4248913e6 100644 --- a/cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs +++ b/cs/MarkdownTests/Models/SyntaxTreeModels/SyntaxTree.cs @@ -62,7 +62,7 @@ private Node ParseHeader() currentIndex++; var headerContent = new List(); - while (CanContinueParsingHeaderContent()) + while (CanContinueParsingMarkerContent(TokenType.Newline)) { var inlineNode = ParseInline(); if (inlineNode != null) @@ -71,7 +71,7 @@ private Node ParseHeader() } } - if (HasNewlineTokenAtCurrentPosition()) + if (HasMarkerTokenAtCurrentPosition(TokenType.Newline)) { currentIndex++; } @@ -83,7 +83,7 @@ private Node ParseParagraph() { var paragraphContent = new List(); - while (CanContinueParsingParagraphContent()) + while (CanContinueParsingMarkerContent(TokenType.Newline)) { var inlineNode = ParseInline(); if (inlineNode != null) @@ -92,7 +92,7 @@ private Node ParseParagraph() } } - if (HasNewlineTokenAtCurrentPosition()) + if (HasMarkerTokenAtCurrentPosition(TokenType.Newline)) { currentIndex++; } @@ -119,18 +119,75 @@ private Node ParseInline() { TokenType.BoldStart => ParseBold(), TokenType.ItalicsStart => ParseItalic(), + TokenType.LinkStart => ParseLink(), TokenType.Text => ParseText(), _ => HandleUnexpectedToken() }; } + private Node ParseLink() + { + currentIndex++; + + var linkContent = new List(); + string url = null; + string title = null; + + while (currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.LinkEnd) + { + if (tokens[currentIndex].Type == TokenType.LinkText) + { + linkContent.Add(new Node(NodeType.Text, null, tokens[currentIndex].Value)); + } + else + { + var inlineNode = ParseInline(); + if (inlineNode != null) + { + linkContent.Add(inlineNode); + } + } + currentIndex++; + } + + if (currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.LinkEnd) + { + currentIndex++; + } + + if (currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.UrlStart) + { + currentIndex++; + + while (currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.UrlEnd) + { + if (tokens[currentIndex].Type == TokenType.Url) + { + url = tokens[currentIndex].Value; + } + else if (tokens[currentIndex].Type == TokenType.UrlTitle) + { + title = tokens[currentIndex].Value; + } + currentIndex++; + } + + if (currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.UrlEnd) + { + currentIndex++; + } + } + + return new Node(NodeType.Link, linkContent, url) { Title = title }; + } + private Node ParseBold() { currentIndex++; var boldContent = new List(); - while (CanContinueParsingBoldContent()) + while (CanContinueParsingMarkerContent(TokenType.BoldEnd)) { var inlineNode = ParseInline(); if (inlineNode != null) @@ -139,7 +196,7 @@ private Node ParseBold() } } - if (HasBoldEndTokenAtCurrentPosition()) + if (HasMarkerTokenAtCurrentPosition(TokenType.BoldEnd)) { currentIndex++; } @@ -153,7 +210,7 @@ private Node ParseItalic() var italicContent = new List(); - while (CanContinueParsingItalicContent()) + while (CanContinueParsingMarkerContent(TokenType.ItalicsEnd)) { var inlineNode = ParseInline(); if (inlineNode != null) @@ -162,7 +219,7 @@ private Node ParseItalic() } } - if (HasItalicsEndTokenAtCurrentPosition()) + if (HasMarkerTokenAtCurrentPosition(TokenType.ItalicsEnd)) { currentIndex++; } @@ -183,39 +240,14 @@ private Node HandleUnexpectedToken() return null; } - private bool CanContinueParsingHeaderContent() - { - return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.Newline; - } - - private bool HasNewlineTokenAtCurrentPosition() - { - return currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.Newline; - } - - private bool CanContinueParsingParagraphContent() - { - return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.Newline; - } - - private bool CanContinueParsingBoldContent() - { - return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.BoldEnd; - } - - private bool HasBoldEndTokenAtCurrentPosition() - { - return currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.BoldEnd; - } - - private bool CanContinueParsingItalicContent() + private bool CanContinueParsingMarkerContent(TokenType stopTokenType) { - return currentIndex < tokens.Count && tokens[currentIndex].Type != TokenType.ItalicsEnd; + return currentIndex < tokens.Count && tokens[currentIndex].Type != stopTokenType; } - private bool HasItalicsEndTokenAtCurrentPosition() + private bool HasMarkerTokenAtCurrentPosition(TokenType expectedTokenType) { - return currentIndex < tokens.Count && tokens[currentIndex].Type == TokenType.ItalicsEnd; + return currentIndex < tokens.Count && tokens[currentIndex].Type == expectedTokenType; } } } \ No newline at end of file