diff --git a/cs/.DS_Store b/cs/.DS_Store new file mode 100644 index 000000000..655ce4e6a Binary files /dev/null and b/cs/.DS_Store differ diff --git a/cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs new file mode 100644 index 000000000..b1b840a0e --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs @@ -0,0 +1,16 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class BracketTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is '[' or ']'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var kind = source.Span[index] == '[' + ? TokenKind.LeftBracket + : TokenKind.RightBracket; + tokens.Add(new Token(kind, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs new file mode 100644 index 000000000..854e7978b --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs @@ -0,0 +1,35 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class EscapeTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] == '\\'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + var length = source.Length; + + if (index + 1 < length) + { + var nextToken = span[index + 1]; + if (nextToken == '_' && index + 2 < length && span[index + 2] == '_') + { + tokens.Add(new Token(TokenKind.Text, source.Slice(index + 1, 2))); + return 3; + } + + if (IsSpecialCharacter(nextToken)) + { + tokens.Add(new Token(TokenKind.Text, source.Slice(index + 1, 1))); + return 2; + } + } + + tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1))); + return 1; + } + + private static bool IsSpecialCharacter(char c) => + c is '#' or '_' or '\\' or '[' or ']' or '(' or ')'; +} diff --git a/cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs new file mode 100644 index 000000000..a1d4c0c3a --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs @@ -0,0 +1,20 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class HashTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] == '#'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + if (IsAtLineStart(tokens) && index + 1 < source.Length && span[index + 1] == ' ') + tokens.Add(new Token(TokenKind.Hash, source.Slice(index, 1))); + else + tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1))); + return 1; + } + + private static bool IsAtLineStart(List tokens) => + tokens.Count == 0 || tokens[^1].Kind == TokenKind.NewLine; +} diff --git a/cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs new file mode 100644 index 000000000..8b432c6bc --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs @@ -0,0 +1,28 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class NewLineTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is '\n' or '\r'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + var length = source.Length; + + if (span[index] == '\r' && index + 1 < length && span[index + 1] == '\n') + { + tokens.Add(new Token(TokenKind.NewLine, source.Slice(index, 2))); + return 2; + } + + if (span[index] == '\n') + { + tokens.Add(new Token(TokenKind.NewLine, source.Slice(index, 1))); + return 1; + } + + tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs new file mode 100644 index 000000000..95c1f6106 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs @@ -0,0 +1,16 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class ParenthesisTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is '(' or ')'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var kind = source.Span[index] == '(' + ? TokenKind.LeftParen + : TokenKind.RightParen; + tokens.Add(new Token(kind, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs new file mode 100644 index 000000000..2691a5968 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs @@ -0,0 +1,13 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class SpaceTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] is ' ' or '\t'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + tokens.Add(new Token(TokenKind.Space, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs new file mode 100644 index 000000000..a19b39e7d --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs @@ -0,0 +1,33 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class TextTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => true; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + var length = source.Length; + var startText = index; + + while (index < length) + { + var symbol = span[index]; + if (IsSpecialCharacter(symbol) || symbol is ' ' or '\t' or '\n' or '\r') + break; + index++; + } + + if (index > startText) + { + tokens.Add(new Token(TokenKind.Text, source.Slice(startText, index - startText))); + return index - startText; + } + + tokens.Add(new Token(TokenKind.Text, source.Slice(startText, 1))); + return 1; + } + + private static bool IsSpecialCharacter(char c) => + c is '#' or '_' or '\\' or '[' or ']' or '(' or ')'; +} diff --git a/cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs b/cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs new file mode 100644 index 000000000..e51b4a495 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs @@ -0,0 +1,20 @@ +namespace Markdown.Core.Lexing.Handlers; + +public class UnderscoreTokenHandler : ITokenHandler +{ + public bool CanHandle(ReadOnlySpan source, int index) => + source[index] == '_'; + + public int Handle(ReadOnlyMemory source, int index, List tokens) + { + var span = source.Span; + if (index + 1 < source.Length && span[index + 1] == '_') + { + tokens.Add(new Token(TokenKind.DoubleUnderscore, source.Slice(index, 2))); + return 2; + } + + tokens.Add(new Token(TokenKind.Underscore, source.Slice(index, 1))); + return 1; + } +} diff --git a/cs/Markdown.Core/Lexing/ILexer.cs b/cs/Markdown.Core/Lexing/ILexer.cs new file mode 100644 index 000000000..68d1eb38c --- /dev/null +++ b/cs/Markdown.Core/Lexing/ILexer.cs @@ -0,0 +1,6 @@ +namespace Markdown.Core.Lexing; + +public interface ILexer +{ + public IEnumerable Tokenize(ReadOnlyMemory source); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/ITokenHandler.cs b/cs/Markdown.Core/Lexing/ITokenHandler.cs new file mode 100644 index 000000000..772341290 --- /dev/null +++ b/cs/Markdown.Core/Lexing/ITokenHandler.cs @@ -0,0 +1,6 @@ +namespace Markdown.Core.Lexing; +public interface ITokenHandler +{ + bool CanHandle(ReadOnlySpan source, int index); + int Handle(ReadOnlyMemory source, int index, List tokens); +} diff --git a/cs/Markdown.Core/Lexing/Lexer.cs b/cs/Markdown.Core/Lexing/Lexer.cs new file mode 100644 index 000000000..678a77d28 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Lexer.cs @@ -0,0 +1,45 @@ +using Markdown.Core.Lexing.Handlers; + +namespace Markdown.Core.Lexing; + +public class Lexer : ILexer +{ + private readonly List _handlers = + [ + new HashTokenHandler(), + new EscapeTokenHandler(), + new UnderscoreTokenHandler(), + new SpaceTokenHandler(), + new NewLineTokenHandler(), + new BracketTokenHandler(), + new ParenthesisTokenHandler() + ]; + private readonly ITokenHandler _textHandler = new TextTokenHandler(); + + public IEnumerable Tokenize(ReadOnlyMemory source) + { + var tokens = new List(); + var i = 0; + var length = source.Length; + + while (i < length) + { + var handled = false; + foreach (var consumed in from handler in _handlers + where handler + .CanHandle(source.Span, i) select handler + .Handle(source, i, tokens)) + { + i += consumed; + handled = true; + break; + } + if (handled) + continue; + + i += _textHandler.Handle(source, i, tokens); + } + tokens.Add(new Token(TokenKind.Eof, source[..0])); + return tokens; + } +} diff --git a/cs/Markdown.Core/Lexing/Token.cs b/cs/Markdown.Core/Lexing/Token.cs new file mode 100644 index 000000000..2506f78b8 --- /dev/null +++ b/cs/Markdown.Core/Lexing/Token.cs @@ -0,0 +1,7 @@ +namespace Markdown.Core.Lexing; + +public readonly struct Token(TokenKind kind, ReadOnlyMemory slice) +{ + public TokenKind Kind { get; init; } = kind; + public ReadOnlyMemory Slice { get; init; } = slice; +} \ No newline at end of file diff --git a/cs/Markdown.Core/Lexing/TokenKind.cs b/cs/Markdown.Core/Lexing/TokenKind.cs new file mode 100644 index 000000000..1c11c1615 --- /dev/null +++ b/cs/Markdown.Core/Lexing/TokenKind.cs @@ -0,0 +1,27 @@ +namespace Markdown.Core.Lexing; + +public enum TokenKind +{ + /// Обычный текст + Text, + /// Курсивный шрифт + Underscore, + /// Полужирный шрифт + DoubleUnderscore, + /// Заголовок + Hash, + /// Одиночный пробел + Space, + /// Перевод строки + NewLine, + /// Конец входа + Eof, + /// Квадратная скобка '[' открывает текст ссылки + LeftBracket, + /// Квадратная скобка ']' закрывает текст ссылки + RightBracket, + /// Круглая скобка '(' открывает адрес ссылки + LeftParen, + /// Круглая скобка ')' закрывает адрес ссылки + RightParen, +} diff --git a/cs/Markdown.Core/Markdown.Core.csproj b/cs/Markdown.Core/Markdown.Core.csproj new file mode 100644 index 000000000..2179c1392 --- /dev/null +++ b/cs/Markdown.Core/Markdown.Core.csproj @@ -0,0 +1,15 @@ + + + + net8.0 + enable + enable + + + + + + + + + diff --git a/cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs b/cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs new file mode 100644 index 000000000..3094563f4 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs @@ -0,0 +1,28 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing.Blocks; +public class HeadingBlockParser : IBlockParser +{ + public bool CanParse(ParserState state) => + state.CurrentToken.Kind == TokenKind.Hash && state.IsAtLineStart(); + + public BlockNode? Parse(ParserState state, InlineParser inlineParser) + { + var heading = new HeadingNode(1); + + state.MoveNext(); + state.SkipSpace(); + + while (!state.IsEndOfLine()) + { + var inline = inlineParser.ParseInline(); + if (inline != null) + heading.Inlines.Add(inline); + } + + if (state.CurrentToken.Kind == TokenKind.NewLine) + state.MoveNext(); + return heading; + } +} diff --git a/cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs b/cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs new file mode 100644 index 000000000..1fa59f877 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs @@ -0,0 +1,9 @@ +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing.Blocks; + +public interface IBlockParser +{ + bool CanParse(ParserState state); + BlockNode? Parse(ParserState state, InlineParser inlineParser); +} diff --git a/cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs b/cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs new file mode 100644 index 000000000..9a0d95537 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs @@ -0,0 +1,27 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing.Blocks; + +public class ParagraphBlockParser : IBlockParser +{ + public bool CanParse(ParserState state) => + state.CurrentToken.Kind is not TokenKind.Eof; + + public BlockNode? Parse(ParserState state, InlineParser inlineParser) + { + var paragraph = new ParagraphNode(); + + while (!state.IsEndOfLine()) + { + var inline = inlineParser.ParseInline(); + if (inline != null) + paragraph.Inlines.Add(inline); + } + + if (state.CurrentToken.Kind != TokenKind.NewLine) return paragraph; + state.MoveNext(); + state.SkipEmptyLines(); + return paragraph; + } +} diff --git a/cs/Markdown.Core/Parsing/IParser.cs b/cs/Markdown.Core/Parsing/IParser.cs new file mode 100644 index 000000000..1456ce660 --- /dev/null +++ b/cs/Markdown.Core/Parsing/IParser.cs @@ -0,0 +1,9 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing; + +public interface IParser +{ + DocumentNode Parse(IEnumerable tokens); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/InlineParser.cs b/cs/Markdown.Core/Parsing/InlineParser.cs new file mode 100644 index 000000000..928343429 --- /dev/null +++ b/cs/Markdown.Core/Parsing/InlineParser.cs @@ -0,0 +1,232 @@ +using System.Text; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing; + +public class InlineParser( + IReadOnlyList tokens, + Func moveNext, + Func currentToken, + Func currentIndex, + Func isEndOfLine, + InlineValidator validator) +{ + public InlineNode? ParseInline() + { + var token = currentToken(); + if (token.Kind == TokenKind.Eof) + return null; + + return token.Kind switch + { + TokenKind.Text => ParseText(), + TokenKind.Underscore => ParseEmphasis(), + TokenKind.DoubleUnderscore => ParseStrong(), + TokenKind.LeftBracket => ParseLink(), + _ => ParseText() + }; + } + + private TextNode ParseText() + { + var node = new TextNode(currentToken().Slice.ToString()); + moveNext(); + return node; + } + + private InlineNode ParseEmphasis() + { + var startIndex = currentIndex() - 1; + moveNext(); + + if (IsInvalidEmphasisStart()) + return CreateTextNode("_"); + + var emphasis = new EmphasisNode(); + return ParseEmphasisContent(emphasis, startIndex); + } + + private InlineNode ParseStrong() + { + var startIndex = currentIndex() - 1; + moveNext(); + + if (IsInvalidStrongStart()) + return ConvertToTextNode(new StrongNode(), "__"); + + var strong = new StrongNode(); + return ParseStrongContent(strong, startIndex); + } + + private InlineNode ParseLink() + { + var linkTextNodes = new List(); + moveNext(); + + while (!isEndOfLine() && currentToken().Kind != TokenKind.RightBracket) + { + var inline = ParseInline(); + if (inline != null) + linkTextNodes.Add(inline); + } + + if (currentToken().Kind != TokenKind.RightBracket) + return RestoreAsText("[", linkTextNodes); + + moveNext(); + + if (currentToken().Kind != TokenKind.LeftParen) + return RestoreAsText("[", linkTextNodes, "]"); + + moveNext(); + + var hrefBuilder = new StringBuilder(); + while (currentToken().Kind != TokenKind.Eof && + currentToken().Kind != TokenKind.RightParen && + currentToken().Kind != TokenKind.NewLine) + { + hrefBuilder.Append(currentToken().Slice.ToString()); + moveNext(); + } + + if (currentToken().Kind != TokenKind.RightParen) + return RestoreAsText("[", linkTextNodes, "](" + hrefBuilder); + + moveNext(); + + var href = hrefBuilder.ToString(); + return new LinkNode(href, linkTextNodes); + } + + private bool IsInvalidEmphasisStart() => + currentToken().Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; + + private InlineNode ParseEmphasisContent(EmphasisNode emphasis, int startIndex) + { + while (!IsEndOfContent()) + { + if (currentToken().Kind == TokenKind.Underscore) + { + if (TryCloseEmphasis(emphasis, startIndex, out var node)) + return node!; + } + else if (currentToken().Kind == TokenKind.DoubleUnderscore) + { + emphasis.Inlines.Add(CreateTextNode("__")); + moveNext(); + } + else + { + var inline = ParseInline(); + if (inline != null) + emphasis.Inlines.Add(inline); + } + } + return ConvertToTextNode(emphasis, "_"); + } + + private bool IsInvalidStrongStart() => + currentToken().Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; + + private InlineNode ParseStrongContent(StrongNode strong, int startIndex) + { + while (!IsEndOfContent()) + { + if (currentToken().Kind == TokenKind.DoubleUnderscore) + { + if (TryCloseStrong(strong, startIndex, out var node)) + return node!; + } + else if (currentToken().Kind == TokenKind.Underscore) + { + var emphasis = ParseEmphasis(); + strong.Inlines.Add(emphasis); + } + else + { + var inline = ParseInline(); + if (inline != null) + strong.Inlines.Add(inline); + } + } + return ConvertToTextNode(strong, "__"); + } + + private bool IsEndOfContent() => currentToken().Kind is TokenKind.NewLine or TokenKind.Eof; + + private bool TryCloseEmphasis(EmphasisNode emphasis, int startIndex, out InlineNode? node) + { + var closeIndex = currentIndex() - 1; + if (validator.IsValidEmphasisClose(tokens, startIndex, closeIndex)) + { + moveNext(); + node = emphasis; + return true; + } + emphasis.Inlines.Add(CreateTextNode("_")); + moveNext(); + node = null; + return false; + } + + private bool TryCloseStrong(StrongNode strong, int startIndex, out InlineNode? node) + { + var closeIndex = currentIndex() - 1; + if (validator.IsValidStrongClose(tokens, startIndex, closeIndex)) + { + moveNext(); + node = strong; + return true; + } + + strong.Inlines.Add(CreateTextNode("__")); + moveNext(); + node = null; + return false; + } + + private static TextNode CreateTextNode(string text) => new(text); + + private static TextNode RestoreAsText(string prefix, IList nodes, string suffix = "") + { + var builder = new StringBuilder(prefix); + foreach (var node in nodes) + builder.Append(ExtractTextFromNode(node)); + builder.Append(suffix); + return new TextNode(builder.ToString()); + } + + private static TextNode ConvertToTextNode(InlineNode node, string prefix) + { + var textContent = new StringBuilder(); + textContent.Append(prefix); + textContent.Append(ExtractTextFromNode(node)); + return new TextNode(textContent.ToString()); + } + + private static string ExtractTextFromNode(InlineNode node) + { + var result = new StringBuilder(); + + switch (node) + { + case EmphasisNode emphasis: + foreach (var inline in emphasis.Inlines) + result.Append(ExtractTextFromNode(inline)); + break; + case StrongNode strong: + foreach (var inline in strong.Inlines) + result.Append(ExtractTextFromNode(inline)); + break; + case TextNode text: + result.Append(text.Text); + break; + case LinkNode link: + foreach (var inline in link.Inlines) + result.Append(ExtractTextFromNode(inline)); + break; + } + return result.ToString(); + } +} diff --git a/cs/Markdown.Core/Parsing/InlineValidator.cs b/cs/Markdown.Core/Parsing/InlineValidator.cs new file mode 100644 index 000000000..464fd66de --- /dev/null +++ b/cs/Markdown.Core/Parsing/InlineValidator.cs @@ -0,0 +1,165 @@ +using Markdown.Core.Lexing; + +namespace Markdown.Core.Parsing; + +public class InlineValidator +{ + public bool IsValidEmphasisClose(IReadOnlyList tokens, int startIndex, int closeIndex) + { + if (!HasValidOpeningBoundary(tokens, startIndex) || + !HasValidClosingBoundary(tokens, closeIndex)) + return false; + if (startIndex + 1 == closeIndex) + return false; + if (IsInDigitContext(tokens, startIndex) || IsInDigitContext(tokens, closeIndex)) + return false; + if (HasIntersectingDoubleInsideEmphasis(tokens, startIndex, closeIndex)) + return false; + if (IsInsideWord(tokens, startIndex) && IsInsideWord(tokens, closeIndex) && + ContainsWhitespaceBetween(tokens, startIndex, closeIndex)) + return false; + + return true; + } + + public bool IsValidStrongClose(IReadOnlyList tokens, int startIndex, int closeIndex) + { + if (!HasValidOpeningBoundary(tokens, startIndex) || + !HasValidClosingBoundary(tokens, closeIndex)) + return false; + if (startIndex + 1 == closeIndex) + return false; + if (IsInDigitContext(tokens, startIndex) || IsInDigitContext(tokens, closeIndex)) + return false; + if (HasIntersectingDoubleUnderscore(tokens, startIndex, closeIndex)) + return false; + if (HasIntersectingSingleInsideStrong(tokens, startIndex, closeIndex)) + return false; + if (IsInsideWord(tokens, startIndex) && IsInsideWord(tokens, closeIndex) && + ContainsWhitespaceBetween(tokens, startIndex, closeIndex)) + return false; + + return true; + } + + private static bool HasValidOpeningBoundary(IReadOnlyList tokens, int startIndex) + { + if (startIndex + 1 >= tokens.Count) + return true; + + var next = tokens[startIndex + 1]; + return next.Kind is not TokenKind.Space and not TokenKind.NewLine; + } + + private static bool HasValidClosingBoundary(IReadOnlyList tokens, int closeIndex) + { + if (closeIndex - 1 < 0) + return true; + + var prev = tokens[closeIndex - 1]; + if (prev.Kind != TokenKind.Space) + return true; + + if (closeIndex + 1 >= tokens.Count) + return true; + + var next = tokens[closeIndex + 1]; + return next.Kind is TokenKind.Space or TokenKind.NewLine or TokenKind.Eof; + } + + private static bool IsInDigitContext(IReadOnlyList tokens, int index) => + HasDigitBefore(tokens, index) || HasDigitAfter(tokens, index); + + private static bool HasDigitBefore(IReadOnlyList tokens, int index) + { + if (index == 0) + return false; + + var prev = tokens[index - 1]; + return prev.Kind == TokenKind.Text && + prev.Slice.Length > 0 && + char.IsDigit(prev.Slice.Span[^1]); + } + + private static bool HasDigitAfter(IReadOnlyList tokens, int index) + { + if (index + 1 >= tokens.Count) + return false; + + var next = tokens[index + 1]; + return next.Kind == TokenKind.Text && + next.Slice.Length > 0 && + char.IsDigit(next.Slice.Span[0]); + } + + private static bool HasIntersectingDoubleInsideEmphasis(IReadOnlyList tokens, int startIndex, int closeIndex) + { + var pending = false; + + for (var i = startIndex + 1; i < closeIndex; i++) + { + if (tokens[i].Kind != TokenKind.DoubleUnderscore) + continue; + + pending = !pending; + } + + return pending; + } + + private static bool HasIntersectingSingleInsideStrong(IReadOnlyList tokens, int startIndex, int closeIndex) + { + var pending = false; + + for (var i = startIndex + 1; i < closeIndex; i++) + { + if (tokens[i].Kind != TokenKind.Underscore) + continue; + + pending = !pending; + } + + return pending; + } + + private static bool HasIntersectingDoubleUnderscore(IReadOnlyList tokens, int startIndex, int closeIndex) + { + for (var i = startIndex + 1; i < closeIndex; i++) + if (tokens[i].Kind == TokenKind.DoubleUnderscore) + return true; + return false; + } + + private bool IsInsideWord(IReadOnlyList tokens, int index) => + HasLetterOrDigitBefore(tokens, index) && HasLetterOrDigitAfter(tokens, index); + + private static bool HasLetterOrDigitBefore(IReadOnlyList tokens, int index) + { + if (index == 0) + return false; + + var prev = tokens[index - 1]; + return prev.Kind == TokenKind.Text && + prev.Slice.Length > 0 && + char.IsLetterOrDigit(prev.Slice.Span[^1]); + } + + private static bool HasLetterOrDigitAfter(IReadOnlyList tokens, int index) + { + if (index + 1 >= tokens.Count) + return false; + + var next = tokens[index + 1]; + return next.Kind == TokenKind.Text && + next.Slice.Length > 0 && + char.IsLetterOrDigit(next.Slice.Span[0]); + } + + private static bool ContainsWhitespaceBetween(IReadOnlyList tokens, int startIndex, int closeIndex) + { + for (var i = startIndex + 1; i < closeIndex; i++) + if (tokens[i].Kind == TokenKind.Space) + return true; + return false; + } +} diff --git a/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs new file mode 100644 index 000000000..6e1b59531 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/BlockNode.cs @@ -0,0 +1,8 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Базовый класс для блочных элементов документа (заголовки и абзацы) +/// +public abstract class BlockNode : Node +{ +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs b/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs new file mode 100644 index 000000000..31e31661a --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/DocumentNode.cs @@ -0,0 +1,9 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Корень дерева, содержит последовательность блочных узлов (заголовки и абзацы) +/// +public class DocumentNode : Node +{ + public IList Children { get; } = new List(); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs b/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs new file mode 100644 index 000000000..2fb66b72d --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/EmphasisNode.cs @@ -0,0 +1,10 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Курсив (_..._): контейнер инлайнов внутри выделения +/// ОГРАНИЧЕНИЕ: Не может содержать StrongNode (по спецификации) +/// +public class EmphasisNode : InlineNode +{ + public IList Inlines { get; } = new List(); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs b/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs new file mode 100644 index 000000000..e6a574583 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/HeadingNode.cs @@ -0,0 +1,10 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Заголовок, блочный элемент, содержащий набор инлайнов (текст и выделения) +/// +public class HeadingNode(int level) : BlockNode +{ + public int Level { get; } = level; //сейчас у нас только h1 по условию, позволяет легко расширить в дальнейшем + public List Inlines { get; } = []; +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs new file mode 100644 index 000000000..3650e4259 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/InlineNode.cs @@ -0,0 +1,8 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Базовый класс для инлайновых элементов +/// +public abstract class InlineNode : Node +{ +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/LinkNode.cs b/cs/Markdown.Core/Parsing/Nodes/LinkNode.cs new file mode 100644 index 000000000..24f32aefd --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/LinkNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.Core.Parsing.Nodes; + +public class LinkNode(string href, IList inlines) : InlineNode +{ + public string Href { get; } = href; + public IList Inlines { get; } = inlines; +} diff --git a/cs/Markdown.Core/Parsing/Nodes/Node.cs b/cs/Markdown.Core/Parsing/Nodes/Node.cs new file mode 100644 index 000000000..48f0ab995 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/Node.cs @@ -0,0 +1,9 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Базовый тип узлов синтаксического дерева +/// Узлы бывают блочные (заголовки и абзацы) и инлайновые (текст/выделения). +/// +public abstract class Node +{ +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs new file mode 100644 index 000000000..a844132f8 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/ParagraphNode.cs @@ -0,0 +1,9 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Абзац, блочный элемент, содержащий набор инлайнов (текст и выделения) +/// +public class ParagraphNode : BlockNode +{ + public IList Inlines { get; } = new List(); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs new file mode 100644 index 000000000..ea98c8f68 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/StrongNode.cs @@ -0,0 +1,9 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Полужирный (__...__): контейнер инлайнов внутри выделения +/// +public class StrongNode : InlineNode +{ + public IList Inlines { get; } = new List(); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Nodes/TextNode.cs b/cs/Markdown.Core/Parsing/Nodes/TextNode.cs new file mode 100644 index 000000000..02a4fe419 --- /dev/null +++ b/cs/Markdown.Core/Parsing/Nodes/TextNode.cs @@ -0,0 +1,9 @@ +namespace Markdown.Core.Parsing.Nodes; + +/// +/// Обычный текст без разметки, конечный лист дерева +/// +public class TextNode(string text) : InlineNode +{ + public string Text { get; } = text; +} \ No newline at end of file diff --git a/cs/Markdown.Core/Parsing/Parser.cs b/cs/Markdown.Core/Parsing/Parser.cs new file mode 100644 index 000000000..8d3c6996c --- /dev/null +++ b/cs/Markdown.Core/Parsing/Parser.cs @@ -0,0 +1,60 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing.Blocks; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Parsing; + +public class Parser : IParser +{ + private readonly List _allTokens = []; + private readonly InlineValidator _inlineValidator = new(); + private InlineParser _inlineParser; + private ParserState _state; + private readonly List _blockParsers; + + public Parser() + { + _blockParsers = + [ + new HeadingBlockParser(), + new ParagraphBlockParser() + ]; + } + + public DocumentNode Parse(IEnumerable tokens) + { + _allTokens.Clear(); + _allTokens.AddRange(tokens); + + _state = new ParserState(_allTokens); + _inlineParser = new InlineParser( + _allTokens, + _state.MoveNext, + () => _state.CurrentToken, + () => _state.CurrentIndex, + _state.IsEndOfLine, + _inlineValidator); + _state.Start(); + + var document = new DocumentNode(); + + while (_state.CurrentToken.Kind != TokenKind.Eof) + { + _state.SkipEmptyLines(); + if (_state.CurrentToken.Kind == TokenKind.Eof) + break; + + var block = ParseBlock(); + if (block != null) document.Children.Add(block); + } + return document; + } + + private BlockNode? ParseBlock() + { + return (from blockParser in _blockParsers where blockParser + .CanParse(_state) select blockParser + .Parse(_state, _inlineParser)) + .FirstOrDefault(); + } +} diff --git a/cs/Markdown.Core/Parsing/ParserState.cs b/cs/Markdown.Core/Parsing/ParserState.cs new file mode 100644 index 000000000..d762ef315 --- /dev/null +++ b/cs/Markdown.Core/Parsing/ParserState.cs @@ -0,0 +1,55 @@ +using Markdown.Core.Lexing; + +namespace Markdown.Core.Parsing; + +public class ParserState +{ + private readonly IReadOnlyList _tokens; + private IEnumerator _tokenPointer; + + public ParserState(IReadOnlyList tokens) + { + _tokens = tokens; + _tokenPointer = _tokens.GetEnumerator(); + } + + public Token CurrentToken { get; private set; } + public int CurrentIndex { get; private set; } + + public void Start() + { + CurrentIndex = 0; + _tokenPointer = _tokens.GetEnumerator(); + MoveNext(); + } + + public Token MoveNext() + { + if (!_tokenPointer.MoveNext()) + { + CurrentToken = new Token(TokenKind.Eof, ReadOnlyMemory.Empty); + } + else + { + CurrentToken = _tokenPointer.Current; + CurrentIndex++; + } + return CurrentToken; + } + + public void SkipEmptyLines() + { + while (CurrentToken.Kind == TokenKind.NewLine) + MoveNext(); + } + + public void SkipSpace() + { + if (CurrentToken.Kind == TokenKind.Space) + MoveNext(); + } + + public bool IsEndOfLine() => CurrentToken.Kind is TokenKind.NewLine or TokenKind.Eof; + + public bool IsAtLineStart() => CurrentIndex <= 1 || _tokens[CurrentIndex - 2].Kind == TokenKind.NewLine; +} diff --git a/cs/Markdown.Core/Rendering/IRenderer.cs b/cs/Markdown.Core/Rendering/IRenderer.cs new file mode 100644 index 000000000..dd00fc048 --- /dev/null +++ b/cs/Markdown.Core/Rendering/IRenderer.cs @@ -0,0 +1,8 @@ +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Rendering; + +public interface IRenderer +{ + string Render (DocumentNode document); +} \ No newline at end of file diff --git a/cs/Markdown.Core/Rendering/Renderer.cs b/cs/Markdown.Core/Rendering/Renderer.cs new file mode 100644 index 000000000..6d15eebc1 --- /dev/null +++ b/cs/Markdown.Core/Rendering/Renderer.cs @@ -0,0 +1,79 @@ +using System.Text; +using Markdown.Core.Parsing.Nodes; + +namespace Markdown.Core.Rendering; + +public class Renderer : IRenderer + { + public string Render(DocumentNode document) + { + var result = new StringBuilder(); + + foreach (var block in document.Children) + { + switch (block) + { + case HeadingNode heading: + result.Append(RenderHeading(heading)); + break; + case ParagraphNode paragraph: + result.Append(RenderParagraph(paragraph)); + break; + } + } + return result.ToString(); + } + + private static string RenderHeading(HeadingNode heading) + { + var content = RenderInlines(heading.Inlines); + return $"{content}"; + } + + private static string RenderParagraph(ParagraphNode paragraph) + { + var content = RenderInlines(paragraph.Inlines); + return $"

{content}

"; + } + + + private static string RenderInlines(IList inlines) + { + var builder = new StringBuilder(); + + foreach (var inline in inlines) + { + switch (inline) + { + case TextNode text: + builder.Append(Escape(text.Text)); + break; + case EmphasisNode emphasis: + builder.Append(""); + builder.Append(RenderInlines(emphasis.Inlines)); + builder.Append(""); + break; + case StrongNode strong: + builder.Append(""); + builder.Append(RenderInlines(strong.Inlines)); + builder.Append(""); + break; + case LinkNode link: + builder.Append(""); + builder.Append(RenderInlines(link.Inlines)); + builder.Append(""); + break; + } + } + return builder.ToString(); + } + + private static string Escape(string text) => + text.Replace("&", "&") + .Replace("<", "<") + .Replace(">", ">") + .Replace("\"", """) + .Replace("'", "'"); + } diff --git a/cs/Markdown.Tests/EmphasisTests.cs b/cs/Markdown.Tests/EmphasisTests.cs new file mode 100644 index 000000000..687d52d8e --- /dev/null +++ b/cs/Markdown.Tests/EmphasisTests.cs @@ -0,0 +1,34 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +public class EmphasisTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("_курсив_", "

курсив

", + TestName = "Курсив одинарными подчеркиваниями")] + [TestCase("Текст, _окруженный с двух сторон_ одинарными символами", + "

Текст, окруженный с двух сторон одинарными символами

", + TestName = "Курсив в середине текста")] + [TestCase("Этот _подчерк _ работает", "

Этот подчерк работает

", + TestName = "Одинарное выделение допускает пробел внутри")] + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } +} diff --git a/cs/Markdown.Tests/EscapingTests.cs b/cs/Markdown.Tests/EscapingTests.cs new file mode 100644 index 000000000..6d0eb69b1 --- /dev/null +++ b/cs/Markdown.Tests/EscapingTests.cs @@ -0,0 +1,42 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; +namespace Markdown.Tests; + +/// +/// Тесты экранирования обратным слешем +/// +public class EscapingTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase(@"\_Вот это\_", "

_Вот это_

", + TestName = "Экранирование подчеркиваний")] + [TestCase(@"Здесь сим\волы экранирования\ \должны остаться.\", + @"

Здесь сим\волы экранирования\ \должны остаться.\

", + TestName = "Экранирование без специальных символов остается")] + [TestCase(@"\\_вот это будет выделено тегом_", + @"

\вот это будет выделено тегом

", + TestName = "Экранирование символа экранирования")] + [TestCase("__Жирное с \\__ внутри__", "

Жирное с __ внутри

", + TestName = "Экранирование двойного подчеркивания в полужирном")] + [TestCase("_Привет\\_", "

_Привет_

", + TestName = "Экранирование подчёркивания внутри курсива оставляет текст")] + + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } +} \ No newline at end of file diff --git a/cs/Markdown.Tests/HeadingTests.cs b/cs/Markdown.Tests/HeadingTests.cs new file mode 100644 index 000000000..61371a7b9 --- /dev/null +++ b/cs/Markdown.Tests/HeadingTests.cs @@ -0,0 +1,40 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; +namespace Markdown.Tests; + +/// +/// Тесты заголовков +/// +[TestFixture] +public class HeadingTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("# Заголовок", "

Заголовок

", + TestName = "Простой заголовок")] + [TestCase("# Заголовок __с _разными_ символами__", "

Заголовок с разными символами

", + TestName = "Заголовок с вложенными тегами")] + [TestCase("Текст # не заголовок", "

Текст # не заголовок

", + TestName = "Решетка в середине строки — не заголовок")] + [TestCase("#Заголовок без пробела", "

#Заголовок без пробела

", + TestName = "Без пробела после решетки — не заголовок")] + [TestCase(" ## C пробелом в начале", "

## C пробелом в начале

", + TestName = "Решетка не в первом символе — не заголовок")] + public void Test(string input, string expectedHtml) + { + var html = _markdown.Render(input); + html.Should().Be(expectedHtml); + } +} diff --git a/cs/Markdown.Tests/InteractionRulesTests.cs b/cs/Markdown.Tests/InteractionRulesTests.cs new file mode 100644 index 000000000..d9d466aa5 --- /dev/null +++ b/cs/Markdown.Tests/InteractionRulesTests.cs @@ -0,0 +1,62 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; +namespace Markdown.Tests; + +/// +/// Тесты с пересечениями типов +/// +public class InteractionRulesTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("Внутри __двойного выделения _одинарное_ тоже__ работает", + "

Внутри двойного выделения одинарное тоже работает

", + TestName = "Курсив внутри полужирного работает")] + [TestCase("Но не наоборот — внутри _одинарного __двойное__ не_ работает", + "

Но не наоборот — внутри одинарного __двойное__ не работает

", + TestName = "Полужирный внутри курсива не работает")] + [TestCase("Подчерки внутри текста c цифрами_12_3 не считаются выделением", + "

Подчерки внутри текста c цифрами_12_3 не считаются выделением

", + TestName = "Подчерки с цифрами не выделяются")] + [TestCase("и в _нач_але, и в сер_еди_не, и в кон_це._", + "

и в начале, и в середине, и в конце.

", + TestName = "Выделение внутри слов работает")] + [TestCase("В то же время выделение в ра_зных сл_овах не работает", + "

В то же время выделение в ра_зных сл_овах не работает

", + TestName = "Выделение между разными словами не работает")] + [TestCase("__Непарные_ символы в рамках одного абзаца не считаются выделением", + "

__Непарные_ символы в рамках одного абзаца не считаются выделением

", + TestName = "Непарные символы не выделяются")] + [TestCase("Иначе эти_ подчерки_ не считаются выделением", + "

Иначе эти_ подчерки_ не считаются выделением

", + TestName = "Подчерки после пробела не начинают выделение")] + [TestCase("Иначе эти _подчерки _не считаются_ окончанием выделения", + "

Иначе эти подчерки _не считаются окончанием выделения

", + TestName = "Подчерки перед пробелом не заканчивают выделение")] + [TestCase("В случае __пересечения _двойных__ и одинарных_ ни один не считается выделением", + "

В случае __пересечения _двойных__ и одинарных_ ни один не считается выделением

", + TestName = "Пересекающиеся теги не работают")] + [TestCase("Если внутри подчерков пустая строка ____, то они остаются символами подчерка", + "

Если внутри подчерков пустая строка ____, то они остаются символами подчерка

", + TestName = "Пустые выделения не работают")] + [TestCase("Подчерки с цифрами__12__3 не считаются выделением", + "

Подчерки с цифрами__12__3 не считаются выделением

", + TestName = "Двойные подчерки с цифрами не работают")] + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } +} \ No newline at end of file diff --git a/cs/Markdown.Tests/LinkTests.cs b/cs/Markdown.Tests/LinkTests.cs new file mode 100644 index 000000000..7b7acd264 --- /dev/null +++ b/cs/Markdown.Tests/LinkTests.cs @@ -0,0 +1,42 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class LinkTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("[ссылка](https://example.com)", "

ссылка

", + TestName = "Простая ссылка")] + [TestCase("Перед [ссылка](url) после", "

Перед ссылка после

", + TestName = "Ссылка в середине текста")] + [TestCase("[незакрытая ссылка(url)", "

[незакрытая ссылка(url)

", + TestName = "Нет закрывающей скобки – остаётся текст")] + [TestCase("[:текст](url)", "

:текст

", + TestName = "Допустимые символы в тексте ссылки")] + [TestCase("[ссылка](url с пробелом)", "

ссылка

", + TestName = "URL допускает пробелы")] + [TestCase("[ссылка]url)", "

[ссылка]url)

", + TestName = "Нет круглых скобок – остаётся текст")] + [TestCase("\\[ссылка](url)", "

[ссылка](url)

", + TestName = "Экранированная квадратная скобка не образует ссылку")] + public void Render_ShouldHandleLinks(string markdown, string expectedHtml) + { + var html = _markdown.Render(markdown); + html.Should().Be(expectedHtml); + } +} diff --git a/cs/Markdown.Tests/Markdown.Tests.csproj b/cs/Markdown.Tests/Markdown.Tests.csproj new file mode 100644 index 000000000..fb02bb751 --- /dev/null +++ b/cs/Markdown.Tests/Markdown.Tests.csproj @@ -0,0 +1,20 @@ + + + + net8.0 + enable + enable + + + + + + + + + + + + + + diff --git a/cs/Markdown.Tests/PerfomanceTests.cs b/cs/Markdown.Tests/PerfomanceTests.cs new file mode 100644 index 000000000..267216340 --- /dev/null +++ b/cs/Markdown.Tests/PerfomanceTests.cs @@ -0,0 +1,98 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class PerformanceTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [Test] + public void Render_ShouldHandleLongInputLinearly() + { + const int paragraphs = 5000; + + var inputBuilder = new StringBuilder(); + var expectedBuilder = new StringBuilder(); + + for (var i = 0; i < paragraphs; i++) + { + inputBuilder.Append($"__жирный__ {i} _курсив_ {i} текст"); + if (i < paragraphs - 1) + inputBuilder.Append("\n\n"); + + expectedBuilder.Append("

жирный ") + .Append(i) + .Append(" курсив ") + .Append(i) + .Append(" текст

"); + } + + var input = inputBuilder.ToString(); + var expected = expectedBuilder.ToString(); + + var html = _markdown.Render(input); + + html.Should().Be(expected); + } + + [Test] + public void Render_ShouldScaleApproximatelyLinearlyWithInputSize() + { + const int smallParagraphs = 500; + const int largeParagraphs = 5000; + + var smallInput = BuildRepeatedParagraphs(smallParagraphs, "__жирный__ _курсив_ текст"); + var largeInput = BuildRepeatedParagraphs(largeParagraphs, "__жирный__ _курсив_ текст"); + + var smallDuration = MeasureMedianRenderMilliseconds(smallInput); + var largeDuration = MeasureMedianRenderMilliseconds(largeInput); + + var baseline = Math.Max(1, smallDuration); + largeDuration.Should().BeLessThanOrEqualTo(baseline * 25); + } + + private long MeasureMedianRenderMilliseconds(string input) + { + const int runs = 3; + var results = new long[runs]; + for (var i = 0; i < runs; i++) + { + var sw = Stopwatch.StartNew(); + _markdown.Render(input); + sw.Stop(); + results[i] = sw.ElapsedMilliseconds; + } + Array.Sort(results); + return results[runs / 2]; + } + + private static string BuildRepeatedParagraphs(int count, string paragraph) + { + var builder = new StringBuilder(); + + for (var i = 0; i < count; i++) + { + builder.Append(paragraph); + if (i < count - 1) + builder.Append("\n\n"); + } + return builder.ToString(); + } + +} diff --git a/cs/Markdown.Tests/StrongTests.cs b/cs/Markdown.Tests/StrongTests.cs new file mode 100644 index 000000000..5b66c4ed0 --- /dev/null +++ b/cs/Markdown.Tests/StrongTests.cs @@ -0,0 +1,43 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +/// +/// Тесты жирного шрифта +/// +[TestFixture] +public class StrongTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("__полужирный__", "

полужирный

", + TestName = "Одно слово полужирным шрифтом ")] + [TestCase("__Выделенный двумя символами текст__ должен становиться полужирным", + "

Выделенный двумя символами текст должен становиться полужирным

", + TestName = "Полужирный в предложении")] + [TestCase("сло__во__ внутри слова", "

слово внутри слова

", + TestName = "Двойное выделение внутри слова")] + [TestCase("Эти __ подчерки__ не работают", "

Эти __ подчерки__ не работают

", + TestName = "Не начинается, если после __ пробел")] + [TestCase("Эти __подчерки __ работают", "

Эти подчерки работают

", + TestName = "Двойное выделение допускает пробел внутри")] + public void Test(string inputText, string expectedHtml) + { + var html = _markdown.Render(inputText); + html.Should().Be(expectedHtml); + } +} + diff --git a/cs/Markdown.Tests/TextTests.cs b/cs/Markdown.Tests/TextTests.cs new file mode 100644 index 000000000..f3fa585f0 --- /dev/null +++ b/cs/Markdown.Tests/TextTests.cs @@ -0,0 +1,47 @@ +using FluentAssertions; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class TextTests +{ + private Md _markdown; + + [SetUp] + public void Setup() + { + var lexer = new Lexer(); + var parser = new Parser(); + var renderer = new Renderer(); + _markdown = new Md(lexer, parser, renderer); + } + + [TestCase("Абракадабра", "

Абракадабра

", + TestName = "

Простой текст без выделений

")] + [TestCase("Привет, как дела?", "

Привет, как дела?

", + TestName = "

Простой текст без выделений

")] + [TestCase("Email: test@example.com", "

Email: test@example.com

", + TestName = "

Текст с различными символами email

")] + [TestCase("Ссылка: https://example.com", "

Ссылка: https://example.com

", + TestName = "Текст с URL")] + [TestCase("Первый параграф\nВторой параграф", "

Первый параграф

Второй параграф

", + TestName = "Два параграфа")] + [TestCase("Первый\n\nТретий", "

Первый

Третий

", + TestName = "Параграфы с пустой строкой")] + + public void Test(string inputText, string expectedText) + { + + var html = _markdown.Render(inputText); + + html.Should().Be(expectedText); + } + + + + +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..eb5839529 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,20 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..13003f4ba --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,16 @@ +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; + +namespace Markdown; + +public class Md(ILexer lexer, IParser parser, IRenderer renderer) +{ + public string Render(string text) + { + var tokens = lexer.Tokenize(text.AsMemory()); + var document = parser.Parse(tokens); + return renderer.Render(document); + } + +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..da2daec1a --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,9 @@ +using Markdown; +using Markdown.Core.Lexing; +using Markdown.Core.Parsing; +using Markdown.Core.Rendering; + +var md = new Md(new Lexer(), new Parser(), new Renderer()); +var input = Console.In.ReadToEnd(); +var html = md.Render(input); +Console.Write(html); \ No newline at end of file diff --git a/cs/Samples/Samples.csproj b/cs/Samples/Samples.csproj index da6cd3e3d..8b3be18d4 100644 --- a/cs/Samples/Samples.csproj +++ b/cs/Samples/Samples.csproj @@ -8,6 +8,7 @@ + diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..ed501a8a0 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,14 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Markdown", "Markdown", "{4D44D5F2-F802-4578-8C72-4A86A5360DF6}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{F8483A51-DB5F-48E6-8982-50141057DE57}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown.Core", "Markdown.Core\Markdown.Core.csproj", "{CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown.Tests", "Markdown.Tests\Markdown.Tests.csproj", "{55DBCB30-6E2B-49B4-8906-15D345DC849E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +35,22 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F8483A51-DB5F-48E6-8982-50141057DE57}.Release|Any CPU.Build.0 = Release|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8}.Release|Any CPU.Build.0 = Release|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {55DBCB30-6E2B-49B4-8906-15D345DC849E}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {F8483A51-DB5F-48E6-8982-50141057DE57} = {4D44D5F2-F802-4578-8C72-4A86A5360DF6} + {CDF8C90A-9D21-45DF-BD6E-DE5B89CBDED8} = {4D44D5F2-F802-4578-8C72-4A86A5360DF6} + {55DBCB30-6E2B-49B4-8906-15D345DC849E} = {4D44D5F2-F802-4578-8C72-4A86A5360DF6} EndGlobalSection EndGlobal diff --git a/cs/clean-code.sln.DotSettings b/cs/clean-code.sln.DotSettings index 135b83ecb..53fe49b2f 100644 --- a/cs/clean-code.sln.DotSettings +++ b/cs/clean-code.sln.DotSettings @@ -1,6 +1,9 @@  <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /> + <Policy><Descriptor Staticness="Instance" AccessRightKinds="Private" Description="Instance fields (private)"><ElementKinds><Kind Name="FIELD" /><Kind Name="READONLY_FIELD" /></ElementKinds></Descriptor><Policy Inspect="True" WarnAboutPrefixesAndSuffixes="False" Prefix="" Suffix="" Style="aaBb" /></Policy> + <Policy><Descriptor Staticness="Any" AccessRightKinds="Any" Description="Types and namespaces"><ElementKinds><Kind Name="NAMESPACE" /><Kind Name="CLASS" /><Kind Name="STRUCT" /><Kind Name="ENUM" /><Kind Name="DELEGATE" /></ElementKinds></Descriptor><Policy Inspect="True" WarnAboutPrefixesAndSuffixes="False" Prefix="" Suffix="" Style="AaBb_AaBb" /></Policy> + True True True Imported 10.10.2016