Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added cs/.DS_Store
Binary file not shown.
16 changes: 16 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/BracketTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
namespace Markdown.Core.Lexing.Handlers;

public class BracketTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) =>
source[index] is '[' or ']';

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
var kind = source.Span[index] == '['
? TokenKind.LeftBracket
: TokenKind.RightBracket;
tokens.Add(new Token(kind, source.Slice(index, 1)));
return 1;
}
}
35 changes: 35 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/EscapeTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
namespace Markdown.Core.Lexing.Handlers;

public class EscapeTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) =>
source[index] == '\\';

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
var span = source.Span;
var length = source.Length;

if (index + 1 < length)
{
var nextToken = span[index + 1];
if (nextToken == '_' && index + 2 < length && span[index + 2] == '_')
{
tokens.Add(new Token(TokenKind.Text, source.Slice(index + 1, 2)));
return 3;
}

if (IsSpecialCharacter(nextToken))
{
tokens.Add(new Token(TokenKind.Text, source.Slice(index + 1, 1)));
return 2;
}
}

tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1)));
return 1;
}

private static bool IsSpecialCharacter(char c) =>
c is '#' or '_' or '\\' or '[' or ']' or '(' or ')';
}
20 changes: 20 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/HashTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
namespace Markdown.Core.Lexing.Handlers;

public class HashTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) =>
source[index] == '#';

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
var span = source.Span;
if (IsAtLineStart(tokens) && index + 1 < source.Length && span[index + 1] == ' ')
tokens.Add(new Token(TokenKind.Hash, source.Slice(index, 1)));
else
tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1)));
return 1;
}

private static bool IsAtLineStart(List<Token> tokens) =>
tokens.Count == 0 || tokens[^1].Kind == TokenKind.NewLine;
}
28 changes: 28 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/NewLineTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace Markdown.Core.Lexing.Handlers;

public class NewLineTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) =>
source[index] is '\n' or '\r';

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
var span = source.Span;
var length = source.Length;

if (span[index] == '\r' && index + 1 < length && span[index + 1] == '\n')
{
tokens.Add(new Token(TokenKind.NewLine, source.Slice(index, 2)));
return 2;
}

if (span[index] == '\n')
{
tokens.Add(new Token(TokenKind.NewLine, source.Slice(index, 1)));
return 1;
}

tokens.Add(new Token(TokenKind.Text, source.Slice(index, 1)));
return 1;
}
}
16 changes: 16 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/ParenthesisTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
namespace Markdown.Core.Lexing.Handlers;

public class ParenthesisTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) =>
source[index] is '(' or ')';

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
var kind = source.Span[index] == '('
? TokenKind.LeftParen
: TokenKind.RightParen;
tokens.Add(new Token(kind, source.Slice(index, 1)));
return 1;
}
}
13 changes: 13 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/SpaceTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace Markdown.Core.Lexing.Handlers;

public class SpaceTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) =>
source[index] is ' ' or '\t';

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
tokens.Add(new Token(TokenKind.Space, source.Slice(index, 1)));
return 1;
}
}
33 changes: 33 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/TextTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
namespace Markdown.Core.Lexing.Handlers;

public class TextTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) => true;

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
var span = source.Span;
var length = source.Length;
var startText = index;

while (index < length)
{
var symbol = span[index];
if (IsSpecialCharacter(symbol) || symbol is ' ' or '\t' or '\n' or '\r')
break;
index++;
}

if (index > startText)
{
tokens.Add(new Token(TokenKind.Text, source.Slice(startText, index - startText)));
return index - startText;
}

tokens.Add(new Token(TokenKind.Text, source.Slice(startText, 1)));
return 1;
}

private static bool IsSpecialCharacter(char c) =>
c is '#' or '_' or '\\' or '[' or ']' or '(' or ')';
}
20 changes: 20 additions & 0 deletions cs/Markdown.Core/Lexing/Handlers/UnderscoreTokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
namespace Markdown.Core.Lexing.Handlers;

public class UnderscoreTokenHandler : ITokenHandler
{
public bool CanHandle(ReadOnlySpan<char> source, int index) =>
source[index] == '_';

public int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens)
{
var span = source.Span;
if (index + 1 < source.Length && span[index + 1] == '_')
{
tokens.Add(new Token(TokenKind.DoubleUnderscore, source.Slice(index, 2)));
return 2;
}

tokens.Add(new Token(TokenKind.Underscore, source.Slice(index, 1)));
return 1;
}
}
6 changes: 6 additions & 0 deletions cs/Markdown.Core/Lexing/ILexer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Markdown.Core.Lexing;

public interface ILexer
{
public IEnumerable<Token> Tokenize(ReadOnlyMemory<char> source);
}
6 changes: 6 additions & 0 deletions cs/Markdown.Core/Lexing/ITokenHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Markdown.Core.Lexing;
public interface ITokenHandler
{
bool CanHandle(ReadOnlySpan<char> source, int index);
int Handle(ReadOnlyMemory<char> source, int index, List<Token> tokens);
}
45 changes: 45 additions & 0 deletions cs/Markdown.Core/Lexing/Lexer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using Markdown.Core.Lexing.Handlers;

namespace Markdown.Core.Lexing;

public class Lexer : ILexer
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

вообще класс содержит в себе много ответсвенности. Считай он бежит по строке и разбивает ее на токены, при этом содержит еще и логику разбиения на токены.

Предлагаю чуть поднять тут srp и сделать более расширяемым, при этом не сильно меняя. Для этого можно сделать Handler/Processon на каждый символ, тут будем так же в switchе проверять символ, в зависимости от которого будем вызывать соответсвующий Handler, который уже сделает обработку и вернет токен

{
private readonly List<ITokenHandler> _handlers =
[
new HashTokenHandler(),
new EscapeTokenHandler(),
new UnderscoreTokenHandler(),
new SpaceTokenHandler(),
new NewLineTokenHandler(),
new BracketTokenHandler(),
new ParenthesisTokenHandler()
];
private readonly ITokenHandler _textHandler = new TextTokenHandler();

public IEnumerable<Token> Tokenize(ReadOnlyMemory<char> source)
{
var tokens = new List<Token>();
var i = 0;
var length = source.Length;

while (i < length)
{
var handled = false;
foreach (var consumed in from handler in _handlers
where handler
.CanHandle(source.Span, i) select handler
.Handle(source, i, tokens))
{
i += consumed;
handled = true;
break;
}
if (handled)
continue;

i += _textHandler.Handle(source, i, tokens);
}
tokens.Add(new Token(TokenKind.Eof, source[..0]));
return tokens;
}
}
7 changes: 7 additions & 0 deletions cs/Markdown.Core/Lexing/Token.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace Markdown.Core.Lexing;

public readonly struct Token(TokenKind kind, ReadOnlyMemory<char> slice)
{
public TokenKind Kind { get; init; } = kind;
public ReadOnlyMemory<char> Slice { get; init; } = slice;
}
27 changes: 27 additions & 0 deletions cs/Markdown.Core/Lexing/TokenKind.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
namespace Markdown.Core.Lexing;

public enum TokenKind
{
/// <summary>Обычный текст</summary>
Text,
/// <summary>Курсивный шрифт</summary>
Underscore,
/// <summary>Полужирный шрифт</summary>
DoubleUnderscore,
/// <summary>Заголовок</summary>
Hash,
/// <summary>Одиночный пробел</summary>
Space,
/// <summary>Перевод строки</summary>
NewLine,
/// <summary>Конец входа</summary>
Eof,
/// <summary>Квадратная скобка '[' открывает текст ссылки</summary>
LeftBracket,
/// <summary>Квадратная скобка ']' закрывает текст ссылки</summary>
RightBracket,
/// <summary>Круглая скобка '(' открывает адрес ссылки</summary>
LeftParen,
/// <summary>Круглая скобка ')' закрывает адрес ссылки</summary>
RightParen,
}
15 changes: 15 additions & 0 deletions cs/Markdown.Core/Markdown.Core.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="FluentAssertions" Version="8.8.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.0.0" />
<PackageReference Include="NUnit" Version="4.4.0" />
</ItemGroup>

</Project>
28 changes: 28 additions & 0 deletions cs/Markdown.Core/Parsing/Blocks/HeadingBlockParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using Markdown.Core.Lexing;
using Markdown.Core.Parsing.Nodes;

namespace Markdown.Core.Parsing.Blocks;
public class HeadingBlockParser : IBlockParser
{
public bool CanParse(ParserState state) =>
state.CurrentToken.Kind == TokenKind.Hash && state.IsAtLineStart();

public BlockNode? Parse(ParserState state, InlineParser inlineParser)
{
var heading = new HeadingNode(1);

state.MoveNext();
state.SkipSpace();

while (!state.IsEndOfLine())
{
var inline = inlineParser.ParseInline();
if (inline != null)
heading.Inlines.Add(inline);
}

if (state.CurrentToken.Kind == TokenKind.NewLine)
state.MoveNext();
return heading;
}
}
9 changes: 9 additions & 0 deletions cs/Markdown.Core/Parsing/Blocks/IBlockParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using Markdown.Core.Parsing.Nodes;

namespace Markdown.Core.Parsing.Blocks;

public interface IBlockParser
{
bool CanParse(ParserState state);
BlockNode? Parse(ParserState state, InlineParser inlineParser);
}
27 changes: 27 additions & 0 deletions cs/Markdown.Core/Parsing/Blocks/ParagraphBlockParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using Markdown.Core.Lexing;
using Markdown.Core.Parsing.Nodes;

namespace Markdown.Core.Parsing.Blocks;

public class ParagraphBlockParser : IBlockParser
{
public bool CanParse(ParserState state) =>
state.CurrentToken.Kind is not TokenKind.Eof;

public BlockNode? Parse(ParserState state, InlineParser inlineParser)
{
var paragraph = new ParagraphNode();

while (!state.IsEndOfLine())
{
var inline = inlineParser.ParseInline();
if (inline != null)
paragraph.Inlines.Add(inline);
}

if (state.CurrentToken.Kind != TokenKind.NewLine) return paragraph;
state.MoveNext();
state.SkipEmptyLines();
return paragraph;
}
}
9 changes: 9 additions & 0 deletions cs/Markdown.Core/Parsing/IParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using Markdown.Core.Lexing;
using Markdown.Core.Parsing.Nodes;

namespace Markdown.Core.Parsing;

public interface IParser
{
DocumentNode Parse(IEnumerable<Token> tokens);
}
Loading