diff --git a/MarkdownSpec.md b/MarkdownSpec.md index 886e99c95..d464aee6c 100644 --- a/MarkdownSpec.md +++ b/MarkdownSpec.md @@ -70,4 +70,11 @@ __Непарные_ символы в рамках одного абзаца н превратится в: -\

Заголовок \с \разными\ символами\\

\ No newline at end of file +\

Заголовок \с \разными\ символами\\

+ +# Image + +Конструкция, начинающаяся с ![](), преобразуется в тег \. +Внутри квадратных скобок [] указывается альтернативный текст (alt), а внутри круглых скобок () — ссылка на изображение (src). +Альтернативный текст и ссылка могут содержать остальные элементы разметки согласно общим правилам. В альтернативном тексте допускается использование символа [ только при наличии соответствующей закрывающей ]. +Аналогично, в URL допускается использование символа ( только при наличии соответствующей закрывающей ). \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..9d639a2d4 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,17 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..e69c18a8d --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,18 @@ +using Markdown.Parsers; +using Markdown.Tokenizer; + +namespace Markdown; + +public class Md +{ + public string Render(string markdownText) + { + var tokenizer = new MarkdownTokenizer(markdownText); + var tokens = tokenizer.Tokenize(); + var parser = new MarkdownParser(tokens); + var markdownDocument = parser.ParseTokens(); + var htmlText = markdownDocument.ToHtml(); + + return htmlText; + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs new file mode 100644 index 000000000..62995295d --- /dev/null +++ b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs @@ -0,0 +1,18 @@ +namespace Markdown.Nodes.Interfaces; + +public abstract class InternalMarkdownNode : MarkdownNode +{ + protected InternalMarkdownNode(string value) : base(value) + { + } + + public override void AddChild(MarkdownNode node) + { + Children.Add(node); + } + + public override void AddChildren(List nodes) + { + Children.AddRange(nodes); + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs new file mode 100644 index 000000000..6e6e51977 --- /dev/null +++ b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs @@ -0,0 +1,8 @@ +namespace Markdown.Nodes.Interfaces; + +public abstract class LeafMarkdownNode : MarkdownNode +{ + protected LeafMarkdownNode(string value) : base(value) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs new file mode 100644 index 000000000..f2555d170 --- /dev/null +++ b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs @@ -0,0 +1,22 @@ +namespace Markdown.Nodes.Interfaces; + +public abstract class MarkdownNode +{ + public readonly List Children = []; + public readonly string Value; + + protected MarkdownNode(string value) + { + Value = value; + } + + public virtual void AddChild(MarkdownNode node) + { + } + + public virtual void AddChildren(List nodes) + { + } + + public abstract string ToHtml(); +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/AltNode.cs b/cs/Markdown/Nodes/Internal/AltNode.cs new file mode 100644 index 000000000..418346b48 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/AltNode.cs @@ -0,0 +1,20 @@ +using System.Text; +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class AltNode : InternalMarkdownNode +{ + public AltNode(string value) : base(value) + { + } + + public override string ToHtml() + { + var textBuilder = new StringBuilder(); + foreach (var child in Children) + textBuilder.Append(child.ToHtml()); + + return textBuilder.ToString(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/BoldNode.cs b/cs/Markdown/Nodes/Internal/BoldNode.cs new file mode 100644 index 000000000..d06ef57c8 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/BoldNode.cs @@ -0,0 +1,19 @@ +using System.Text; +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class BoldNode : InternalMarkdownNode +{ + public BoldNode(string value) : base(value) + { + } + + public override string ToHtml() + { + var textBuilder = new StringBuilder(); + foreach (var child in Children) textBuilder.Append(child.ToHtml()); + + return $"{textBuilder}"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/HeaderNode.cs b/cs/Markdown/Nodes/Internal/HeaderNode.cs new file mode 100644 index 000000000..c9e5d0aad --- /dev/null +++ b/cs/Markdown/Nodes/Internal/HeaderNode.cs @@ -0,0 +1,24 @@ +using System.Text; +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class HeaderNode : InternalMarkdownNode +{ + public HeaderNode(string value) : base(value) + { + } + + public override string ToHtml() + { + var textBuilder = new StringBuilder(); + var controlCharacters = new StringBuilder(); + foreach (var child in Children) + if (child.Value is "\n" or "\r") + controlCharacters.Append(child.ToHtml()); + else + textBuilder.Append(child.ToHtml()); + + return $"

{textBuilder}

{controlCharacters}"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/ImageNode.cs b/cs/Markdown/Nodes/Internal/ImageNode.cs new file mode 100644 index 000000000..e962fdc28 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/ImageNode.cs @@ -0,0 +1,17 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class ImageNode : InternalMarkdownNode +{ + public ImageNode(string value) : base(value) + { + } + + public override string ToHtml() + { + var alt = Children[0].ToHtml(); + var url = Children[1].ToHtml(); + return $"\"{alt}\""; + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/ItalicNode.cs b/cs/Markdown/Nodes/Internal/ItalicNode.cs new file mode 100644 index 000000000..eca912e8e --- /dev/null +++ b/cs/Markdown/Nodes/Internal/ItalicNode.cs @@ -0,0 +1,19 @@ +using System.Text; +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class ItalicNode : InternalMarkdownNode +{ + public ItalicNode(string value) : base(value) + { + } + + public override string ToHtml() + { + var textBuilder = new StringBuilder(); + foreach (var child in Children) textBuilder.Append(child.ToHtml()); + + return $"{textBuilder}"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs new file mode 100644 index 000000000..9445a859e --- /dev/null +++ b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs @@ -0,0 +1,20 @@ +using System.Text; +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class MarkdownDocumentNode : InternalMarkdownNode +{ + public MarkdownDocumentNode(string value) : base(value) + { + } + + public override string ToHtml() + { + var textBuilder = new StringBuilder(); + foreach (var child in Children) + textBuilder.Append(child.ToHtml()); + + return textBuilder.ToString(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/TextNode.cs b/cs/Markdown/Nodes/Leaf/TextNode.cs new file mode 100644 index 000000000..5914a1056 --- /dev/null +++ b/cs/Markdown/Nodes/Leaf/TextNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Leaf; + +public class TextNode : LeafMarkdownNode +{ + public TextNode(string value) : base(value) + { + } + + public override string ToHtml() + { + return Value; + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/UrlNode.cs b/cs/Markdown/Nodes/Leaf/UrlNode.cs new file mode 100644 index 000000000..ff94c4ef4 --- /dev/null +++ b/cs/Markdown/Nodes/Leaf/UrlNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Leaf; + +public class UrlNode : LeafMarkdownNode +{ + public UrlNode(string value) : base(value) + { + } + + public override string ToHtml() + { + return Value; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/EscapeParser.cs b/cs/Markdown/Parsers/EscapeParser.cs new file mode 100644 index 000000000..544738939 --- /dev/null +++ b/cs/Markdown/Parsers/EscapeParser.cs @@ -0,0 +1,28 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Leaf; +using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; + +namespace Markdown.Parsers; + +public class EscapeParser : IParser +{ + private readonly MarkdownParser parser; + + public EscapeParser(MarkdownParser parser) + { + this.parser = parser; + } + + public ParseStatus TryParse(out MarkdownNode node) + { + node = new TextNode(@"\"); + if (parser.CurrentToken.Type != TokenType.Escape) + return ParseStatus.Fail(); + + parser.MoveNext(); + node = new TextNode($"{parser.CurrentToken.Value}"); + + return ParseStatus.Ok(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/HeaderParser.cs b/cs/Markdown/Parsers/HeaderParser.cs new file mode 100644 index 000000000..ae9406e2f --- /dev/null +++ b/cs/Markdown/Parsers/HeaderParser.cs @@ -0,0 +1,44 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; + +namespace Markdown.Parsers; + +public class HeaderParser : IParser +{ + private readonly MarkdownParser parser; + private readonly HashSet expectedStopSymbols = [TokenType.NewLine, TokenType.Carriage]; + + public HeaderParser(MarkdownParser parser) + { + this.parser = parser; + } + + public ParseStatus TryParse(out MarkdownNode node) + { + if (parser.NextToken != null + && (parser.CurrentToken.Type != TokenType.Hash + || (parser.CurrentToken.Type == TokenType.Hash && parser.NextToken.Type != TokenType.Space))) + { + node = new TextNode("#"); + return ParseStatus.Fail(); + } + parser.MoveNext(); + + node = new HeaderNode("#"); + parser.ParentStack.Push(parser.CurrentParent); + parser.CurrentParent = node; + parser.MoveNext(); + parser.ParseTokens(null, expectedStopSymbols); + + if (parser.CurrentToken.Type != TokenType.Eof) + node.AddChild(new TextNode($"{parser.CurrentToken.Value}")); + + if (!expectedStopSymbols.Contains(parser.CurrentToken.Type)) + parser.CurrentParent = parser.ParentStack.Pop(); + + return ParseStatus.Ok(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/ImageParser.cs b/cs/Markdown/Parsers/ImageParser.cs new file mode 100644 index 000000000..06baecc2d --- /dev/null +++ b/cs/Markdown/Parsers/ImageParser.cs @@ -0,0 +1,94 @@ +using System.Text; +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; + +namespace Markdown.Parsers; + +public class ImageParser : IParser +{ + private readonly MarkdownParser parser; + private readonly HashSet altRollbackSymbols = [TokenType.NewLine, TokenType.Hash]; + private readonly HashSet altExpectedStopSymbols = [TokenType.RBracket]; + + public ImageParser(MarkdownParser parser) + { + this.parser = parser; + } + + public ParseStatus TryParse(out MarkdownNode node) + { + node = new TextNode("!"); + if (parser.CurrentToken.Type != TokenType.Exclamation) + return ParseStatus.Fail(); + + parser.MoveNext(); + + parser.ParentStack.Push(parser.CurrentParent); + node = new ImageNode("!"); + parser.CurrentParent = node; + if (TryReadAlt(out var altNode)) + { + node.AddChild(altNode); + if (TryReadUrl(out var urlNode)) + { + node.AddChild(urlNode); + parser.CurrentParent = parser.ParentStack.Pop(); + return ParseStatus.Ok(); + } + } + + var prevParent = parser.ParentStack.Peek(); + prevParent.AddChildren(MarkdownParser.Rollback(node)); + parser.CurrentParent = prevParent; + + return ParseStatus.Fail(); + } + + private bool TryReadAlt(out MarkdownNode node) + { + node = new TextNode("["); + if (parser.CurrentToken.Type != TokenType.LBracket) + return false; + + parser.ParentStack.Push(parser.CurrentParent); + node = new AltNode("["); + parser.CurrentParent = node; + parser.MoveNext(); + parser.ParseTokens(altRollbackSymbols, altExpectedStopSymbols); + + return parser.CurrentToken.Type == TokenType.RBracket; + } + + private bool TryReadUrl(out MarkdownNode node) + { + var urlBuilder = new StringBuilder(); + node = new TextNode(""); + parser.MoveNext(); + + if (parser.CurrentToken.Type != TokenType.LParenthesis) + return false; + + parser.MoveNext(); + while (parser.CurrentToken.Type != TokenType.RParenthesis + && parser.CurrentToken.Type != TokenType.NewLine + && parser.CurrentToken.Type != TokenType.Carriage + && parser.CurrentToken.Type != TokenType.Eof) + { + urlBuilder.Append(parser.CurrentToken.Value); + parser.MoveNext(); + } + + if (parser.CurrentToken.Type != TokenType.RParenthesis) + { + node = new TextNode(urlBuilder.ToString()); + return false; + } + parser.MoveNext(); + node = new UrlNode(urlBuilder.ToString()); + + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/Interfaces/IParser.cs b/cs/Markdown/Parsers/Interfaces/IParser.cs new file mode 100644 index 000000000..5fa54cdc5 --- /dev/null +++ b/cs/Markdown/Parsers/Interfaces/IParser.cs @@ -0,0 +1,8 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Parsers.Interfaces; + +public interface IParser +{ + public ParseStatus TryParse(out MarkdownNode node); +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/MarkdownParser.cs b/cs/Markdown/Parsers/MarkdownParser.cs new file mode 100644 index 000000000..44ef183dd --- /dev/null +++ b/cs/Markdown/Parsers/MarkdownParser.cs @@ -0,0 +1,109 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; + +namespace Markdown.Parsers; + +public class MarkdownParser +{ + + public readonly Stack ParentStack = new(); + public MarkdownNode CurrentParent; + public Token CurrentToken => tokens[position]; + public Token? NextToken => position + 1 == tokens.Count ? null : tokens[position + 1]; + public Token? PrevToken => position - 1 == -1 ? null : tokens[position - 1]; + + private readonly List parsers; + private readonly List tokens; + private int position; + public MarkdownParser(List tokens) + { + this.tokens = tokens; + parsers = new List + { + new HeaderParser(this), + new ImageParser(this), + new UnderscoresParser(this), + new EscapeParser(this) + }; + var root = new MarkdownDocumentNode(""); + CurrentParent = root; + } + + public MarkdownNode ParseTokens( + HashSet? rollbackStopSymbols = null, + HashSet? expectedStopSymbols = null) + { + while (CurrentToken.Type != TokenType.Eof) + { + var parseStatus = TryParseNode(out var node); + + if (parseStatus.Type is not ParseResultType.Success) + if (parseStatus.Type is not ParseResultType.WasRollback) + { + if (expectedStopSymbols != null && expectedStopSymbols.Contains(CurrentToken.Type)) + { + var currNode = CurrentParent; + if (ParentStack.Count > 0) + CurrentParent = ParentStack.Pop(); + return currNode; + } + + if (parseStatus.Type == ParseResultType.NeedRollback + || (rollbackStopSymbols != null && rollbackStopSymbols.Contains(CurrentToken.Type))) + { + var children = Rollback(CurrentParent); + var currNode = CurrentParent; + CurrentParent = ParentStack.Pop(); + CurrentParent.AddChildren(children); + + return CurrentToken.Type != TokenType.Eof ? currNode : CurrentParent; + } + } + + CurrentParent.AddChild(node); + if (CurrentToken.Type != TokenType.Eof) + MoveNext(); + } + + return CurrentParent; + } + + private ParseStatus TryParseNode(out MarkdownNode node) + { + foreach (var parser in parsers) + { + var status = parser.TryParse(out var parsedNode); + if (status.Type is ParseResultType.Fail) + continue; + node = parsedNode; + return status; + } + + node = new TextNode(CurrentToken.Value); + + return ParseStatus.Fail(); + } + + public static List Rollback(MarkdownNode node) + { + var result = new List(); + RollBackRecursive(node, result); + + return result; + } + + private static void RollBackRecursive(MarkdownNode node, List result) + { + result.Add(new TextNode(node.Value)); + foreach (var child in node.Children) + RollBackRecursive(child, result); + } + + public void MoveNext() + { + position++; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/ParseStatus.cs b/cs/Markdown/Parsers/ParseStatus.cs new file mode 100644 index 000000000..bf624be20 --- /dev/null +++ b/cs/Markdown/Parsers/ParseStatus.cs @@ -0,0 +1,39 @@ +namespace Markdown.Parsers; + +public readonly struct ParseStatus +{ + public ParseResultType Type { get; } + + private ParseStatus(ParseResultType type) + { + Type = type; + } + + public static ParseStatus Ok() + { + return new ParseStatus(ParseResultType.Success); + } + + public static ParseStatus Fail() + { + return new ParseStatus(ParseResultType.Fail); + } + + public static ParseStatus WasRollback() + { + return new ParseStatus(ParseResultType.WasRollback); + } + + public static ParseStatus NeedRollback() + { + return new ParseStatus(ParseResultType.NeedRollback); + } +} + +public enum ParseResultType +{ + Success, + Fail, + WasRollback, + NeedRollback +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/UnderscoresParser.cs b/cs/Markdown/Parsers/UnderscoresParser.cs new file mode 100644 index 000000000..76570b2fb --- /dev/null +++ b/cs/Markdown/Parsers/UnderscoresParser.cs @@ -0,0 +1,394 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; + +namespace Markdown.Parsers; + +public class UnderscoresParser : IParser +{ + private readonly HashSet boldExpectedStopSymbols = + [ + TokenType.DoubleUnderscore, + TokenType.WordDoubleUnderscore + ]; + + private readonly HashSet boldRollbackSymbols = [TokenType.NewLine, TokenType.Eof]; + private readonly HashSet boldWordRollbackSymbols = [TokenType.Space]; + + private readonly HashSet italicExpectedStopSymbols = + [ + TokenType.Underscore, + TokenType.WordUnderscore + ]; + + private readonly HashSet italicRollbackSymbols = + [ + TokenType.DoubleUnderscore, + TokenType.WordDoubleUnderscore, + TokenType.NewLine, + TokenType.Eof + ]; + + private readonly HashSet italicWordRollbackSymbols = + [ + TokenType.DoubleUnderscore, + TokenType.WordDoubleUnderscore, + TokenType.Space + ]; + + private readonly MarkdownParser parser; + private readonly Stack underscores = new(); + + public UnderscoresParser(MarkdownParser parser) + { + this.parser = parser; + } + + public ParseStatus TryParse(out MarkdownNode node) + { + node = new TextNode(parser.CurrentToken.Value); + var status = ParseStatus.Fail(); + if (parser.CurrentToken.Type is not + (TokenType.Underscore or TokenType.DoubleUnderscore + or TokenType.WordUnderscore or TokenType.WordDoubleUnderscore)) + return status; + + if (parser.CurrentToken.Type is TokenType.Underscore or TokenType.DoubleUnderscore) + { + status = TryReadUnderscores(out var parsedUnderscoresNode); + node = parsedUnderscoresNode; + return status; + } + + status = TryReadWordUnderscores(out var parsedWordUnderscoresNode); + node = parsedWordUnderscoresNode; + return status; + } + + private ParseStatus TryReadUnderscores(out MarkdownNode node) + { + node = new TextNode(parser.CurrentToken.Value); + + return TryReadUnderscoreBase( + CanOpen, + CanClose, + t => t == TokenType.Underscore + ? italicExpectedStopSymbols + : boldExpectedStopSymbols, + t => t is TokenType.Underscore or TokenType.WordUnderscore + ? italicRollbackSymbols + : boldRollbackSymbols, + () => + { + var rollback = new HashSet + { + TokenType.Underscore, + TokenType.WordUnderscore + }; + rollback.UnionWith(italicRollbackSymbols); + return rollback; + }, + supportNeedRollbackStatus: true, + out node); + } + + private ParseStatus TryReadWordUnderscores(out MarkdownNode node) + { + node = new TextNode(parser.CurrentToken.Value); + if (parser.CurrentToken.Type is not (TokenType.WordUnderscore or TokenType.WordDoubleUnderscore)) + return ParseStatus.Fail(); + + return TryReadUnderscoreBase( + canOpenFunc: () => !CanCloseWordUnderscore(), + canCloseFunc: CanCloseWordUnderscore, + getExpectedStopSymbols: t => + t == TokenType.WordUnderscore + ? italicExpectedStopSymbols + : boldExpectedStopSymbols, + getRollbackSymbols: t => t is TokenType.Underscore or TokenType.WordUnderscore + ? italicWordRollbackSymbols + : boldWordRollbackSymbols, + getFallbackRollback: () => + { + var rollback = new HashSet + { + TokenType.Underscore, + TokenType.WordUnderscore + }; + rollback.UnionWith(italicWordRollbackSymbols); + return rollback; + }, + supportNeedRollbackStatus: false, + out node); + } + + private ParseStatus TryReadUnderscoreBase( + Func canOpenFunc, + Func canCloseFunc, + Func> getExpectedStopSymbols, + Func> getRollbackSymbols, + Func> getFallbackRollback, + bool supportNeedRollbackStatus, + out MarkdownNode node) + { + node = new TextNode(parser.CurrentToken.Value); + var underscoreType = parser.CurrentToken.Type; + + if (parser.NextToken?.Type == underscoreType) + return ParseStatus.Fail(); + + var canOpen = canOpenFunc(); + var canClose = canCloseFunc(); + + var handled = TryHandleUnderscore( + canOpen, + canClose, + IsValidOpenUnderscores, + IsValidCloseUnderscores, + out var handledNode, + underscoreType); + + if (handled && canOpen) + { + parser.ParentStack.Push(parser.CurrentParent); + parser.CurrentParent = handledNode; + node = handledNode; + + var expectedStopSymbols = getExpectedStopSymbols(underscoreType); + var rollbackSymbols = getRollbackSymbols(underscoreType); + + parser.MoveNext(); + parser.ParseTokens(rollbackSymbols, expectedStopSymbols); + + if (supportNeedRollbackStatus && + rollbackSymbols.Contains(parser.CurrentToken.Type)) + { + node = new TextNode($"{parser.CurrentToken.Value}"); + + if (parser.CurrentToken.Type is TokenType.Eof or TokenType.NewLine) + { + underscores.Clear(); + if (parser.CurrentToken.Type == TokenType.Eof) + return ParseStatus.NeedRollback(); + } + + return ParseStatus.WasRollback(); + } + + return expectedStopSymbols.Contains(parser.CurrentToken.Type) + ? ParseStatus.Ok() + : ParseStatus.Fail(); + } + + if (!handled && canOpen) + { + var rollback = getFallbackRollback(); + parser.ParentStack.Push(parser.CurrentParent); + parser.CurrentParent = GetNodeFromUnderscoreType(underscoreType); + parser.MoveNext(); + parser.ParseTokens(rollback); + node = new TextNode($"{parser.CurrentToken.Value}"); + + return ParseStatus.Ok(); + } + + if (supportNeedRollbackStatus && canClose) + return ParseStatus.NeedRollback(); + + return ParseStatus.Fail(); + } + + private bool TryHandleUnderscore( + bool canOpen, + bool canClose, + Func isValidOpen, + Func isValidClose, + out MarkdownNode node, + TokenType type) + { + if (canOpen && canClose) + { + if (underscores.Count > 0) + { + var peeked = underscores.Peek(); + if (!IsIntersecting(peeked)) + underscores.Pop(); + } + else + { + underscores.Push(parser.CurrentToken); + } + } + else if (canOpen) + { + if (TryHandleOpen(isValidOpen, type, out var node1)) + { + node = node1; + return true; + } + } + else if (canClose && underscores.Count > 0) + { + if (TryHandleClose(isValidClose, out var node2)) + { + node = node2; + return true; + } + } + + node = new TextNode($"{parser.CurrentToken.Value}"); + return false; + } + + private bool TryHandleOpen(Func isValidOpen, TokenType type, out MarkdownNode node) + { + var value = GetValueFromUnderscoreType(type); + if (underscores.Count > 0) + { + var peeked = underscores.Peek(); + if (!isValidOpen(peeked)) + { + underscores.Push(parser.CurrentToken); + node = new TextNode(value); + return false; + } + + underscores.Push(parser.CurrentToken); + } + else + { + underscores.Push(parser.CurrentToken); + } + + node = GetNodeFromUnderscoreType(type); + return true; + } + + private bool TryHandleClose(Func isValidClose, out MarkdownNode node) + { + var peeked = underscores.Peek(); + node = new TextNode(""); + if (!isValidClose(peeked) && underscores.Count == 1) + { + underscores.Pop(); + return false; + } + + if (IsIntersecting(peeked)) + { + underscores.Pop(); + return false; + } + + underscores.Pop(); + + return true; + } + + private bool IsValidOpenUnderscores(Token peeked) + { + var curr = parser.CurrentToken.Type; + var peekedType = peeked.Type; + + if (curr == peekedType) + return false; + + return curr switch + { + TokenType.Underscore => peekedType is TokenType.DoubleUnderscore, + TokenType.WordUnderscore => peekedType is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore, + TokenType.DoubleUnderscore => peekedType is not TokenType.Underscore, + TokenType.WordDoubleUnderscore => peekedType != TokenType.Underscore && + peekedType != TokenType.WordUnderscore, + _ => true + }; + } + + private bool IsValidCloseUnderscores(Token peeked) + { + var currentType = parser.CurrentToken.Type; + var peekedType = peeked.Type; + + if (currentType == TokenType.Underscore) return peekedType is TokenType.Underscore or TokenType.WordUnderscore; + + return peekedType is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore; + } + + private bool CanOpen() + { + return parser.NextToken is { Type: TokenType.Text } && + !parser.NextToken.Value.StartsWith(" "); + } + + private bool CanClose() + { + if (underscores.Count == 0 + || parser.PrevToken == null + || parser.PrevToken.Value.EndsWith(" ") + || parser.PrevToken.Type is TokenType.Underscore or TokenType.DoubleUnderscore) + return false; + + return parser.NextToken != null; + } + private bool CanCloseWordUnderscore() + { + if (underscores.Count <= 0) + return false; + + var peeked = underscores.Peek(); + + if (parser.CurrentToken.Type is TokenType.WordUnderscore) + return peeked.Type is TokenType.WordUnderscore or TokenType.Underscore; + + return peeked.Type is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore; + } + + private bool IsIntersecting(Token peeked) + { + switch (parser.CurrentToken.Type) + { + case TokenType.Underscore: + case TokenType.WordUnderscore: + { + if (peeked.Type is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore) + return true; + break; + } + case TokenType.DoubleUnderscore: + case TokenType.WordDoubleUnderscore: + { + if (peeked.Type is TokenType.Underscore or TokenType.WordUnderscore) + return true; + break; + } + } + + return false; + } + + private string GetValueFromUnderscoreType(TokenType type) + { + return type switch + { + TokenType.Underscore or TokenType.WordUnderscore => "_", + TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore => "__", + _ => throw new ArgumentException("Token type must be Underscore or WordUnderscore or DoubleUnderscore or " + + "WordDoubleUnderscore") + }; + } + + private MarkdownNode GetNodeFromUnderscoreType(TokenType type) + { + var value = GetValueFromUnderscoreType(type); + + return type switch + { + TokenType.Underscore or TokenType.WordUnderscore => new ItalicNode(value), + TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore => new BoldNode(value), + _ => throw new ArgumentException("Token type must be Underscore or WordUnderscore or DoubleUnderscore or " + + "WordDoubleUnderscore") + }; + } +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..e02417453 --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,4 @@ +using Markdown; + +var md = new Md(); +md.Render(""); \ No newline at end of file diff --git a/cs/Markdown/Tests/MdPerformanceTest.cs b/cs/Markdown/Tests/MdPerformanceTest.cs new file mode 100644 index 000000000..67591c212 --- /dev/null +++ b/cs/Markdown/Tests/MdPerformanceTest.cs @@ -0,0 +1,53 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class MdPerformanceTest +{ + [Test] + public void Render_Performance_WithAllTokens() + { + var repeatCounts = new List + { + 1000, + 2000, + 4000, + 8000, + 16000 + }; + const string markdownText = "# Заголовок _курсив_ __жирный__ ![Cat](https://example.com/image.jpg) текст" + + " \\экранирование_\n"; + long? baseTimeMs = null; + var baseRepeatCount = repeatCounts[0]; + + foreach (var repeatCount in repeatCounts) + { + var sb = new StringBuilder(); + var md = new Md(); + for (var i = 0; i < repeatCount; i++) + sb.Append(markdownText); + var input = sb.ToString(); + + var stopwatch = Stopwatch.StartNew(); + md.Render(input); + stopwatch.Stop(); + + var elapsedMs = stopwatch.ElapsedMilliseconds; + if (baseTimeMs == null) + { + baseTimeMs = elapsedMs; + continue; + } + + var multiplier = repeatCount / baseRepeatCount; + var expectedTime = baseTimeMs.Value * multiplier; + const double tolerance = 0.20; + var max = (long)(expectedTime * (1 + tolerance)); + elapsedMs.Should().BeLessThanOrEqualTo(max); + } + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/MdSpec.txt b/cs/Markdown/Tests/MdSpec.txt new file mode 100644 index 000000000..b75aa2501 --- /dev/null +++ b/cs/Markdown/Tests/MdSpec.txt @@ -0,0 +1,86 @@ +# Спецификация языка разметки + +Посмотрите этот файл в сыром виде. Сравните с тем, что показывает github. +Все совпадения случайны ;) + + + +# Курсив + +Текст, _окруженный с двух сторон_ одинарными символами подчерка, +должен помещаться в HTML-тег \ вот так: + +Текст, \окруженный с двух сторон\ одинарными символами подчерка, +должен помещаться в HTML-тег \. + + + +# Полужирный + +__Выделенный двумя символами текст__ должен становиться полужирным с помощью тега \. + + + +# Экранирование + +Любой символ можно экранировать, чтобы он не считался частью разметки. +\_Вот это\_, не должно выделиться тегом \. + +Символ экранирования исчезает из результата, только если экранирует что-то. +Здесь сим\волы экранирования\ \должны остаться.\ + +Символ экранирования тоже можно экранировать: \\_вот это будет выделено тегом_ \ + + + +# Взаимодействие тегов + +Внутри __двойного выделения _одинарное_ тоже__ работает. + +Но не наоборот — внутри _одинарного __двойное__ не_ работает. + +Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка. + +Однако выделять часть слова они могут: и в _нач_але, и в сер_еди_не, и в кон_це._ + +В то же время выделение в ра_зных сл_овах не работает. + +__Непарные_ символы в рамках одного абзаца не считаются выделением. + +За подчерками, начинающими выделение, должен следовать непробельный символ. Иначе эти_ подчерки_ не считаются выделением +и остаются просто символами подчерка. + +Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти _подчерки _не считаются_ окончанием выделения +и остаются просто символами подчерка. + +В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением. + +Если внутри подчерков пустая строка ____, то они остаются символами подчерка. + + + +# Заголовки + +Абзац, начинающийся с "# ", выделяется тегом \

в заголовок. +В тексте заголовка могут присутствовать все прочие символы разметки с указанными правилами. + +Таким образом + +\# Заголовок \__с \_разными_ символами__ + +превратится в: + +

Заголовок с разными символами

+ +# Изображения + +Изображения задаются с помощью специальной конструкции вида: \![alt](url), где: +alt — альтернативный текст изображения; +url — ссылка на изображение. + +Например, запись: + +\![Cat](https://example.com/image.jpg) + +превратится в: +![Cat](https://example.com/image.jpg) \ No newline at end of file diff --git a/cs/Markdown/Tests/MdSpecApprovalTest.cs b/cs/Markdown/Tests/MdSpecApprovalTest.cs new file mode 100644 index 000000000..51f5ec344 --- /dev/null +++ b/cs/Markdown/Tests/MdSpecApprovalTest.cs @@ -0,0 +1,23 @@ +using ApprovalTests; +using ApprovalTests.Namers; +using ApprovalTests.Reporters; +using NUnit.Framework; + +namespace Markdown.Tests; + +[UseApprovalSubdirectory("Results")] +[TestFixture] +public class MdSpecApprovalTest +{ + [Test] + [UseReporter(typeof(DiffReporter))] + public void Render_MdSpec_ReturnCorrectHtml() + { + var markdown = File.ReadAllText(@"..\..\..\Tests\MdSpec.txt"); + + var renderer = new Md(); + var html = renderer.Render(markdown); + + Approvals.Verify(html); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/MdTests.cs b/cs/Markdown/Tests/MdTests.cs new file mode 100644 index 000000000..ea2cfa335 --- /dev/null +++ b/cs/Markdown/Tests/MdTests.cs @@ -0,0 +1,104 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class MdTests +{ + [TestCase("![Cat](https://example.com/image.jpg)", + "\"Cat\"", TestName = "ValidUrlAndAlt")] + [TestCase("!\n[Cat](https://example.com/image.jpg)", + "!\n[Cat](https://example.com/image.jpg)", TestName = "NewlineAfterExclamation")] + [TestCase("![Ca\nt](https://example.com/image.jpg)", + "![Ca\nt](https://example.com/image.jpg)", TestName = "AltWithNewline")] + [TestCase("![](https://example.com/image.jpg)", + "\"\"", TestName = "EmptyAltText")] + [TestCase("![__Cat__](https://example.com/image.jpg)", + "\"<strongCat
\">", TestName = "AltWithBold")] + [TestCase("![Cat](https://example.com/i__m__age.jpg)", + "\"Cat\"", TestName = "UrlWithBold")] + public void Render_Image_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + [TestCase("# текст", "

текст

", TestName = "SimpleHeader")] + [TestCase("# Первый \n# Второй", "

Первый

\n

Второй

", TestName = "TwoHeadersInDifferentLines")] + [TestCase("# Первый \n просто текст \n# Второй", "

Первый

\n просто текст \n

Второй

", + TestName = "HeadersWithTextBetween")] + [TestCase("#текст", "#текст", TestName = "WithNoSpace")] + [TestCase("# Заголовок с _курсивом_ и __жирным шрифтом__", + "

Заголовок с курсивом и жирным шрифтом

", TestName = "WithInlineFormatting")] + [TestCase("# ![Cat](https://example.com/image.jpg)", + "

\"Cat\"

", TestName = "WithImage")] + [TestCase("# __текст__", "

текст

", TestName = "WithBold")] + [TestCase(" # текст", " # текст", TestName = "LeadingSpace")] + [TestCase("# ", "

", TestName = "Empty")] + [TestCase("# текст # ", "

текст #

", TestName = "TrailingHashIgnored")] + [TestCase("# текст", "

текст

", TestName = "ManySpaceAfterHash")] + public void Render_Header_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + [TestCase("_текст_", "текст", TestName = "SimpleItalic")] + [TestCase("_те\nкст_", "_те\nкст_", TestName = "ItalicWithNewLineInside")] + [TestCase("__текст__", "текст", TestName = "SimpleBold")] + [TestCase("Внутри __двойного выделения _одинарное_ тоже__ работает", + "Внутри двойного выделения одинарное тоже работает", + TestName = "BoldWithItalicInside")] + [TestCase("_те_кст", "текст", TestName = "ItalicAtStartWord")] + [TestCase("т_екс_т", "текст", TestName = "ItalicInMiddleWord")] + [TestCase("текс_т_", "текст", TestName = "ItalicAtEndWord")] + [TestCase("__текст_", "__текст_", TestName = "UnpairedUnderscores")] + [TestCase("_ текст_", "_ текст_", TestName = "LeadingSpacePreventsItalic")] + [TestCase("1_2_3", "1_2_3", TestName = "DigitsPreventItalic")] + [TestCase("те_кст те_кст", "те_кст те_кст", TestName = "SeparateWordsUnderscores")] + [TestCase("__", "__", TestName = "OnlyDoubleUnderscore")] + [TestCase("____", "____", TestName = "OnlyFourUnderscores")] + [TestCase("__текст _текст__ текст_", "__текст _текст__ текст_", TestName = "BoldWithNestedItalic")] + [TestCase("_текст __текст__ текст_", "текст __текст__ текст", TestName = "ItalicWithBoldInside")] + [TestCase("_текст т__екс__т текст_", "текст т__екс__т текст", TestName = "ItalicWithBoldInsideWord")] + [TestCase("т__е_к_с__т", "текст", TestName = "BoldInWordWithItalicInside")] + [TestCase("__те_к_ст__", "текст", TestName = "BoldWithItalicInsideInWord")] + [TestCase("_те__к__ст_", "те__к__ст", TestName = "ItalicWithBoldInsideInWord")] + [TestCase("_123_", "123", TestName = "ItalicWithNumbers")] + [TestCase("__123__", "123", TestName = "BoldWithNumbers")] + [TestCase("__++++__", "++++", TestName = "BoldWithSymbols")] + [TestCase("__ текст __", "__ текст __", TestName = "BoldWithSpacesPrevents")] + [TestCase("__текст __", "__текст __", TestName = "BoldWithSpace")] + public void Render_Underscores_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + [TestCase(@"\_текст_", "_текст_", TestName = "EscapedUnderscore")] + [TestCase(@"\\_текст\\_", @"\текст\", TestName = "DoubleEscapedUnderscoresAroundText")] + [TestCase(@"\# текст", "# текст", TestName = "EscapedHeader")] + [TestCase(@"\![Cat](https://example.com/image.jpg)", + "![Cat](https://example.com/image.jpg)", TestName = "EscapedExclamationInImage")] + [TestCase(@"__текст \_текст__ текст_", "текст _текст текст_", + TestName = "BoldWithEscapedUnderscoreInside")] + [TestCase(@"т\екс\т", @"т\екс\т", TestName = "TextWithMultipleEscapes")] + public void Render_Escape_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + +} \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseEscapeTests.cs b/cs/Markdown/Tests/ParserTests/ParseEscapeTests.cs new file mode 100644 index 000000000..580b8881a --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseEscapeTests.cs @@ -0,0 +1,102 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseEscapeTests +{ + [Test] + public void ParseTokens_EscapeHeader_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("\\", TokenType.Escape), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("#")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("текст")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_EscapeUnderscores_ReturnsMarkdownDocument() + { + var tokens = new List + { + new ("\\", TokenType.Escape), + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("__")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_EscapeWordUnderscores_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("\\", TokenType.Escape), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("кст")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_EscapeImage_ReturnsMarkdownDocument() + { + var tokens = new List + { + new ("\\", TokenType.Escape), + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("__")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseHeaderTests.cs b/cs/Markdown/Tests/ParserTests/ParseHeaderTests.cs new file mode 100644 index 000000000..88f3bbe26 --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseHeaderTests.cs @@ -0,0 +1,147 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseHeaderTests +{ + [Test] + public void ParseTokens_SimpleHeader_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocumentDocument = new MarkdownDocumentNode(""); + var headerNode = new HeaderNode("#"); + var textNode = new TextNode("текст"); + expectedDocumentDocument.AddChild(headerNode); + headerNode.AddChild(textNode); + + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocumentDocument); + } + + [Test] + public void ParseTokens_TwoHeadersInDifferentLines_ReturnsCorrectTree() + { + var tokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Первый", TokenType.Text), + new(" ", TokenType.Space), + new("\n", TokenType.NewLine), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Второй", TokenType.Text), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var headerNode1 = new HeaderNode("#"); + var headerNode2 = new HeaderNode("#"); + expectedDocument.AddChild(headerNode1); + headerNode1.AddChild(new TextNode("Первый")); + headerNode1.AddChild(new TextNode(" ")); + headerNode1.AddChild(new TextNode("\n")); + expectedDocument.AddChild(headerNode2); + headerNode2.AddChild(new TextNode("Второй")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_HeaderWithInlineTextBetween_ReturnsCorrectTree() + { + var tokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Первый", TokenType.Text), + new(" ", TokenType.Space), + new("\n", TokenType.NewLine), + new("просто", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("\n", TokenType.NewLine), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Второй", TokenType.Text), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var h1 = new HeaderNode("#"); + h1.AddChild(new TextNode("Первый")); + h1.AddChild(new TextNode(" ")); + h1.AddChild(new TextNode("\n")); + expectedDocument.AddChild(h1); + expectedDocument.AddChild(new TextNode("просто")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("\n")); + var h2 = new HeaderNode("#"); + h2.AddChild(new TextNode("Второй")); + expectedDocument.AddChild(h2); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_HeaderWithoutSpace_TreatedAsPlainText() + { + var tokens = new List + { + new("#текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("#текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_HeaderWithLeadingSpace_NotAHeader() + { + var tokens = new List + { + new(" ", TokenType.Space), + new("#", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("#")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseImageTests.cs b/cs/Markdown/Tests/ParserTests/ParseImageTests.cs new file mode 100644 index 000000000..a4d6d3b82 --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseImageTests.cs @@ -0,0 +1,118 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseImageTests +{ + [Test] + public void ParseTokens_SimpleImage_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var imageNode = new ImageNode("!"); + var altNode = new AltNode("["); + var altText = new TextNode("Cat"); + var urlNode = new UrlNode("https://example.com/image.jpg"); + expectedDocument.AddChild(imageNode); + imageNode.AddChild(altNode); + altNode.AddChild(altText); + imageNode.AddChild(urlNode); + + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_NewlineAfterExclamation_ReturnsPlainText() + { + var tokens = new List + { + new("!", TokenType.Text), + new("\n", TokenType.NewLine), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + + var expectedDocument = new MarkdownDocumentNode(""); + var exclamationText = new TextNode("!"); + var newLineText = new TextNode("\n"); + var lBracket = new TextNode("["); + var text = new TextNode("Cat"); + var rBracket = new TextNode("]"); + var lParenthesis = new TextNode("("); + var urlText = new TextNode("https://example.com/image.jpg"); + var rParenthesis = new TextNode(")"); + expectedDocument.AddChild(exclamationText); + expectedDocument.AddChild(newLineText); + expectedDocument.AddChild(lBracket); + expectedDocument.AddChild(text); + expectedDocument.AddChild(rBracket); + expectedDocument.AddChild(lParenthesis); + expectedDocument.AddChild(urlText); + expectedDocument.AddChild(rParenthesis); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_AltWithNewline_ReturnsPlainText() + { + var tokens = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Ca", TokenType.Text), + new("\n", TokenType.NewLine), + new("t", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("!")); + expectedDocument.AddChild(new TextNode("[")); + expectedDocument.AddChild(new TextNode("Ca")); + expectedDocument.AddChild(new TextNode("\n")); + expectedDocument.AddChild(new TextNode("t")); + expectedDocument.AddChild(new TextNode("]")); + expectedDocument.AddChild(new TextNode("(")); + expectedDocument.AddChild(new TextNode("https://example.com/image.jpg")); + expectedDocument.AddChild(new TextNode(")")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseUnderscoresTests.cs b/cs/Markdown/Tests/ParserTests/ParseUnderscoresTests.cs new file mode 100644 index 000000000..f4b64922b --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseUnderscoresTests.cs @@ -0,0 +1,379 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseUnderscoresTests +{ + [Test] + public void ParseTokens_SimpleBold_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var boldNode = new BoldNode("__"); + expectedDocument.AddChild(boldNode); + boldNode.AddChild(new TextNode("текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_SimpleItalic_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italicNode = new ItalicNode("_"); + expectedDocument.AddChild(italicNode); + italicNode.AddChild(new TextNode("текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_SingleInsideDouble_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("двойное", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("одинарное", TokenType.Text), + new("_", TokenType.Underscore), + new(" ", TokenType.Space), + new("тоже", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var bold = new BoldNode("__"); + expectedDocument.AddChild(bold); + bold.AddChild(new TextNode("двойное")); + bold.AddChild(new TextNode(" ")); + var italic = new ItalicNode("_"); + italic.AddChild(new TextNode("одинарное")); + bold.AddChild(italic); + bold.AddChild(new TextNode(" ")); + bold.AddChild(new TextNode("тоже")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicWithBoldInside_ReturnsDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("начало", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.DoubleUnderscore), + new("внутри", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("конец", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italic = new ItalicNode("_"); + expectedDocument.AddChild(italic); + italic.AddChild(new TextNode("начало")); + italic.AddChild(new TextNode(" ")); + italic.AddChild(new TextNode("__")); + italic.AddChild(new TextNode("внутри")); + italic.AddChild(new TextNode("__")); + italic.AddChild(new TextNode(" ")); + italic.AddChild(new TextNode("конец")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_UnpairedUnderscores_ReturnsDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("_")); + + var actual = parser.ParseTokens(); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicAtStartWord_ReturnsDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italicNode = new ItalicNode("_"); + expectedDocument.AddChild(italicNode); + italicNode.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("кст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicInMiddleWord_ReturnsDocument() + { + var tokens = new List + { + new("т", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("екс", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("т")); + var italic = new ItalicNode("_"); + italic.AddChild(new TextNode("екс")); + expectedDocument.AddChild(italic); + expectedDocument.AddChild(new TextNode("т")); + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicInEndWord_ReturnsDocument() + { + var tokens = new List + { + new("тек", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("ст.", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("тек")); + var italic = new ItalicNode("_"); + italic.AddChild(new TextNode("ст.")); + expectedDocument.AddChild(italic); + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_SeparateWordsUnderscores_ReturnsDocument() + { + var tokens = new List + { + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new(" ", TokenType.Space), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("кст")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("кст")); + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_OnlyFourUnderscores_ReturnsDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("__")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X_ReturnsDocument() + { + var tokens = new List + { + new("\n", TokenType.NewLine), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("\n", TokenType.NewLine), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italic = new ItalicNode("_"); + expectedDocument.AddChild(new TextNode("\n")); + italic.AddChild(new TextNode("текст")); + expectedDocument.AddChild(italic); + expectedDocument.AddChild(new TextNode("\n")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X1_ReturnsDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("пересечения", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("двойных", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("и", TokenType.Text), + new(" ", TokenType.Space), + new("одинарных", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("пересечения")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("двойных")); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("и")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("одинарных")); + expectedDocument.AddChild(new TextNode("_")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X2_ReturnsDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("подчерки", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("не", TokenType.Text), + new(" ", TokenType.Space), + new("считаются", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("подчерки")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("не")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("считаются")); + expectedDocument.AddChild(new TextNode("_")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X3_ReturnsDocument() + { + var tokens = new List + { + new("эти", TokenType.Text), + new("_", TokenType.Underscore), + new(" ", TokenType.Space), + new("подчерки", TokenType.Text), + new("_", TokenType.Underscore), + new("\n", TokenType.NewLine), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("эти")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("подчерки")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("\n")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Results/MdSpecApprovalTest.Render_MdSpec_ReturnCorrectHtml.approved.txt b/cs/Markdown/Tests/Results/MdSpecApprovalTest.Render_MdSpec_ReturnCorrectHtml.approved.txt new file mode 100644 index 000000000..d963fff5d --- /dev/null +++ b/cs/Markdown/Tests/Results/MdSpecApprovalTest.Render_MdSpec_ReturnCorrectHtml.approved.txt @@ -0,0 +1,86 @@ +

Спецификация языка разметки

+ +Посмотрите этот файл в сыром виде. Сравните с тем, что показывает github. +Все совпадения случайны ;) + + + +

Курсив

+ +Текст, окруженный с двух сторон одинарными символами подчерка, +должен помещаться в HTML-тег \ вот так: + +Текст, \окруженный с двух сторон\ одинарными символами подчерка, +должен помещаться в HTML-тег \. + + + +

Полужирный

+ +Выделенный двумя символами текст должен становиться полужирным с помощью тега \. + + + +

Экранирование

+ +Любой символ можно экранировать, чтобы он не считался частью разметки. +_Вот это_, не должно выделиться тегом \. + +Символ экранирования исчезает из результата, только если экранирует что-то. +Здесь сим\волы экранирования\ \должны остаться.\ + +Символ экранирования тоже можно экранировать: \вот это будет выделено тегом \ + + + +

Взаимодействие тегов

+ +Внутри двойного выделения одинарное тоже работает. + +Но не наоборот — внутри одинарного __двойное__ не работает. + +Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка. + +Однако выделять часть слова они могут: и в начале, и в середине, и в конце. + +В то же время выделение в ра_зных сл_овах не работает. + +__Непарные_ символы в рамках одного абзаца не считаются выделением. + +За подчерками, начинающими выделение, должен следовать непробельный символ. Иначе эти_ подчерки_ не считаются выделением +и остаются просто символами подчерка. + +Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти _подчерки _не считаются_ окончанием выделения +и остаются просто символами подчерка. + +В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением. + +Если внутри подчерков пустая строка ____, то они остаются символами подчерка. + + + +

Заголовки

+ +Абзац, начинающийся с "# ", выделяется тегом \

в заголовок. +В тексте заголовка могут присутствовать все прочие символы разметки с указанными правилами. + +Таким образом + +# Заголовок __с _разными_ символами__ + +превратится в: + +

Заголовок с разными символами

+ +

Изображения

+ +Изображения задаются с помощью специальной конструкции вида: ![alt](url), где: +alt — альтернативный текст изображения; +url — ссылка на изображение. + +Например, запись: + +![Cat](https://example.com/image.jpg) + +превратится в: +Cat \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenAssert.cs b/cs/Markdown/Tests/TokenizerTests/TokenAssert.cs new file mode 100644 index 000000000..a32360700 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenAssert.cs @@ -0,0 +1,16 @@ +using FluentAssertions; +using Markdown.Tokenizer; + +namespace Markdown.Tests.TokenizerTests; + +public static class TokenAssert +{ + public static void AssertToken(string markdownText, List expectedTokens) + { + var tokenizer = new MarkdownTokenizer(markdownText); + + var tokens = tokenizer.Tokenize(); + + tokens.Should().BeEquivalentTo(expectedTokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeEscapeTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeEscapeTests.cs new file mode 100644 index 000000000..6c64ddd55 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeEscapeTests.cs @@ -0,0 +1,122 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeEscapeTests +{ + [TestCase(@"\_текст_")] + public void Tokenize_EscapedUnderscore(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"\\_текст\\_")] + public void Tokenize_DoubleEscapedUnderscoresAroundText(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new(@"\", TokenType.Escape), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(@"\", TokenType.Escape), + new(@"\", TokenType.Escape), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"\# текст")] + public void Tokenize_EscapedHeader(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new("#", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"\![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)")] + public void Tokenize_EscapedExclamationInImage(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"!\[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)")] + public void Tokenize_EscapedAltInImage(string markdown) + { + var expected = new List + { + new("!", TokenType.Text), + new(@"\", TokenType.Escape), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"![Cat]\(https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)")] + public void Tokenize_EscapedUrlInImage(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new(@"\", TokenType.Escape), + new("(", TokenType.LParenthesis), + new("https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"т\екс\т")] + public void Tokenize_TextWithMultipleEscapes(string markdown) + { + var expected = new List + { + new("т", TokenType.Text), + new(@"\", TokenType.Text), + new("екс", TokenType.Text), + new(@"\", TokenType.Text), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeHeaderTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeHeaderTests.cs new file mode 100644 index 000000000..5a53012d6 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeHeaderTests.cs @@ -0,0 +1,85 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeHeaderTests +{ + [TestCase("# текст")] + public void Tokenize_SimpleHeader_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase("#текст")] + public void Tokenize_NoSpaceAfterHash_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Text), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase("# текст \n# текст")] + public void Tokenize_TwoHeadersInDifferentLines_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("\n", TokenType.NewLine), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase(" # текст")] + public void Tokenize_X_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new(" ", TokenType.Space), + new("#", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase("# текст")] + public void Tokenize_ManySpacesAfterHash_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new(" ", TokenType.Space), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeImageTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeImageTests.cs new file mode 100644 index 000000000..c239eadb1 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeImageTests.cs @@ -0,0 +1,118 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeImageTests +{ + [TestCase("![Cat](https://example.com/image.jpg)")] + public void Tokenize_ValidUrlAndAlt(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("!\n[Cat](https://example.com/image.jpg)")] + public void Tokenize_NewlineAfterExclamation(string markdown) + { + var expected = new List + { + new("!", TokenType.Text), + new("\n", TokenType.NewLine), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![C[]at](https://example.com/image.jpg)")] + public void Tokenize_AltWithBrackets(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("C", TokenType.Text), + new("[", TokenType.LBracket), + new("]", TokenType.RBracket), + new("at", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![Cat](https://example.com()/image.jpg)")] + public void Tokenize_UrlWithParenthesis(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com", TokenType.Text), + new("(", TokenType.LParenthesis), + new(")", TokenType.RParenthesis), + new("/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![](https://example.com/image.jpg)")] + public void Tokenize_EmptyAltText(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![__Cat__](https://example.com/image.jpg)")] + public void Tokenize_AltWithBold(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("__", TokenType.DoubleUnderscore), + new("Cat", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeUnderscoresTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeUnderscoresTests.cs new file mode 100644 index 000000000..76516fc13 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeUnderscoresTests.cs @@ -0,0 +1,440 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeUnderscoresTests +{ + [TestCase("_текст_")] + public void Tokenize_SimpleItalic_ReturnsCorrectTokens(string markdownText) + { + var expectedToken = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedToken); + } + + [TestCase("__текст__")] + public void Tokenize_SimpleBold_ReturnsCorrectTokens(string markdownText) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expected); + } + + [TestCase("__начало _внутри_ конец__")] + public void Tokenize_BoldWithItalicInside(string markdownText) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("начало", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("внутри", TokenType.Text), + new("_", TokenType.Underscore), + new(" ", TokenType.Space), + new("конец", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expected); + } + + [TestCase("_те\nкст_")] + public void Tokenize_ItalicWithNewLineInside(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("\n", TokenType.NewLine), + new("кст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст__")] + public void Tokenize_SimpleBold(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_те_кст")] + public void Tokenize_ItalicAtStartWord(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("т_екс_т")] + public void Tokenize_ItalicInMiddleWord(string markdown) + { + var expected = new List + { + new("т", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("екс", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("текс_т._")] + public void Tokenize_ItalicAtEndWord(string markdown) + { + var expected = new List + { + new("текс", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("т.", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст_")] + public void Tokenize_UnpairedUnderscores(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_ текст_")] + public void Tokenize_LeadingSpacePreventsItalic(string markdown) + { + var expected = new List + { + new("_", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("1_2_3")] + public void Tokenize_DigitsPreventItalic(string markdown) + { + var expected = new List + { + new("1", TokenType.Text), + new("_", TokenType.Text), + new("2", TokenType.Text), + new("_", TokenType.Text), + new("3", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("те_кст те_кст")] + public void Tokenize_SeparateWordsUnderscores(string markdown) + { + var expected = new List + { + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new(" ", TokenType.Space), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_текст _текст")] + public void Tokenize_UnclosedItalicMultipleWords(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__")] + public void Tokenize_OnlyDoubleUnderscore(string markdown) + { + var expected = new List + { + new("__", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("____")] + public void Tokenize_OnlyFourUnderscores(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст _текст__ текст_")] + public void Tokenize_BoldWithNestedItalic(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_текст __текст__ текст_")] + public void Tokenize_ItalicWithBoldInside(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_текст т__екс__т текст_")] + public void Tokenize_ItalicWithBoldInsideWord(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("т", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("екс", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("т", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст т_екст текст___")] + public void Tokenize_BoldWithTrailingUnderscore(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("т", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("екст", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("т__е_к_с__т")] + public void Tokenize_BoldInWordWithItalicInside(string markdown) + { + var expected = new List + { + new("т", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("е", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("к", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("с", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__те_к_ст__")] + public void Tokenize_BoldWithItalicInsideInWord(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("к", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("ст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_те__к__ст_")] + public void Tokenize_ItalicWithBoldInsideInWord(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("к", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("ст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_123_")] + public void Tokenize_ItalicWithNumbers(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("123", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__123__")] + public void Tokenize_BoldWithNumbers(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("123", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__++++__")] + public void Tokenize_BoldWithSymbols(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("++++", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст___текст_")] + public void Tokenize_BoldThenItalic(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__ текст __")] + public void Tokenize_BoldWithSpacesPrevents(string markdown) + { + var expected = new List + { + new("__", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст __")] + public void Tokenize_BoldAfterSpacesPrevents(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs new file mode 100644 index 000000000..5981f31a8 --- /dev/null +++ b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs @@ -0,0 +1,186 @@ +namespace Markdown.Tokenizer; + +public class MarkdownTokenizer +{ + private readonly string markdownText; + private readonly HashSet specialsSymbols = ['#', '_', '!', '[', ']', '(', ')', ' ', '\n', '\\', '\r']; + private readonly HashSet escapableSymbols = ['#', '_', '!', '[', ']', '(', ')', '\\']; + private char? PrevSymbol => Position - 1 < 0 ? null : markdownText[Position - 1]; + private char? NextSymbol => Position + 1 == markdownText.Length ? null : markdownText[Position + 1]; + private int Position { get; set; } + + public MarkdownTokenizer(string markdownText) + { + this.markdownText = markdownText; + } + + public List Tokenize() + { + var tokens = new List(); + while (Position < markdownText.Length) + { + var symbol = markdownText[Position]; + var token = symbol switch + { + '#' => TokenizeHeader(), + '_' => TokenizeUnderscore(), + '!' => TokenizeExclamation(), + '(' => TokenizeLParenthesis(), + ')' => TokenizeRParenthesis(), + '[' => TokenizeLBracket(), + ']' => TokenizeRBracket(), + '\\' => TokenizeEscape(), + ' ' => TokenizeSpace(), + '\n' => TokenizeNewLine(), + '\r' => TokenizeCarriage(), + _ => TokenizeText() + }; + tokens.Add(token); + Position++; + } + + var eof = TokenizeEndOfFile(); + tokens.Add(eof); + return tokens; + } + + private Token TokenizeCarriage() + { + return new Token("\r", TokenType.Carriage); + } + + private Token TokenizeHeader() + { + if (PrevSymbol is null or '\n' && NextSymbol is ' ') return new Token("#", TokenType.Hash); + + return new Token("#", TokenType.Text); + } + + private Token TokenizeEndOfFile() + { + return new Token("", TokenType.Eof); + } + + private Token TokenizeNewLine() + { + return new Token("\n", TokenType.NewLine); + } + + private Token TokenizeSpace() + { + return new Token(" ", TokenType.Space); + } + + private Token TokenizeEscape() + { + return NextSymbol != null && IsEscapableSymbol(NextSymbol.Value) + ? new Token(@"\", TokenType.Escape) + : new Token(@"\", TokenType.Text); + } + + private Token TokenizeUnderscore() + { + var startPos = Position; + + if (NextSymbol != '_') + { + if (!IsValidUnderscores(PrevSymbol, NextSymbol)) + return new Token("_", TokenType.Text); + + if (IsInsideWord(PrevSymbol, NextSymbol)) + return IsValidWordUnderscores(PrevSymbol!.Value, NextSymbol!.Value) + ? new Token("_", TokenType.WordUnderscore) + : new Token("_", TokenType.Text); + + return new Token("_", TokenType.Underscore); + } + + Position++; + char? leftSymbol = startPos == 0 ? null : markdownText[startPos - 1]; + if (!IsValidUnderscores(leftSymbol, NextSymbol)) + return new Token("__", TokenType.Text); + + if (IsInsideWord(leftSymbol, NextSymbol)) + return IsValidWordUnderscores(markdownText[startPos - 1], NextSymbol.Value) + ? new Token("__", TokenType.WordDoubleUnderscore) + : new Token("__", TokenType.Text); + + return new Token("__", TokenType.DoubleUnderscore); + } + + private static bool IsValidWordUnderscores(char left, char right) + { + if (char.IsDigit(right) && char.IsDigit(left)) + return false; + + if (char.IsDigit(right) && char.IsLetter(left)) + return false; + + if (char.IsDigit(left) && char.IsLetter(right)) + return false; + + return true; + } + + private static bool IsInsideWord(char? left, char? right) + { + return right != null && left != null + && char.IsLetterOrDigit(left.Value) + && char.IsLetterOrDigit(right.Value); + } + + private bool IsValidUnderscores(char? left, char? right) + { + var leftValid = left.HasValue && left != ' '; + var rightValid = right.HasValue && right != ' '; + + return leftValid || rightValid; + } + + private Token TokenizeLParenthesis() + { + return new Token("(", TokenType.LParenthesis); + } + + private Token TokenizeRParenthesis() + { + return new Token(")", TokenType.RParenthesis); + } + + private Token TokenizeLBracket() + { + return new Token("[", TokenType.LBracket); + } + + private Token TokenizeRBracket() + { + return new Token("]", TokenType.RBracket); + } + + private Token TokenizeExclamation() + { + return NextSymbol == '[' ? new Token("!", TokenType.Exclamation) : new Token("!", TokenType.Text); + } + + private Token TokenizeText() + { + var start = Position; + while (Position < markdownText.Length && !IsSpecialSymbol(markdownText[Position])) + Position++; + + var text = markdownText[start..Position]; + Position--; + + return new Token(text, TokenType.Text); + } + + private bool IsSpecialSymbol(char c) + { + return specialsSymbols.Contains(c); + } + + private bool IsEscapableSymbol(char c) + { + return escapableSymbols.Contains(c); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Token.cs b/cs/Markdown/Tokenizer/Token.cs new file mode 100644 index 000000000..ab584cff6 --- /dev/null +++ b/cs/Markdown/Tokenizer/Token.cs @@ -0,0 +1,13 @@ +namespace Markdown.Tokenizer; + +public class Token +{ + public readonly TokenType Type; + public readonly string Value; + + public Token(string value, TokenType type) + { + Value = value; + Type = type; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenType.cs b/cs/Markdown/Tokenizer/TokenType.cs new file mode 100644 index 000000000..b4c243d9b --- /dev/null +++ b/cs/Markdown/Tokenizer/TokenType.cs @@ -0,0 +1,21 @@ +namespace Markdown.Tokenizer; + +public enum TokenType +{ + Hash, + Underscore, + DoubleUnderscore, + WordUnderscore, + WordDoubleUnderscore, + Text, + NewLine, + Exclamation, + LBracket, + RBracket, + LParenthesis, + RParenthesis, + Escape, + Space, + Carriage, + Eof +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..d2d3c4e69 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{737AD9F0-D941-4026-AA57-A9CB28028C28}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Debug|Any CPU.Build.0 = Debug|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Release|Any CPU.ActiveCfg = Release|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal