From ae9d580d1bec3dc715ccde586bb323329e97fe24 Mon Sep 17 00:00:00 2001 From: taku0 Date: Thu, 10 Aug 2023 20:18:21 +0900 Subject: [PATCH 1/5] Add test for list tightness --- test/regression.txt | 191 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) diff --git a/test/regression.txt b/test/regression.txt index f905677d6..9111c98d7 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -213,3 +213,194 @@ x ```````````````````````````````` +https://github.com/commonmark/commonmark.js/pull/269 +Block-quoted blank line shouldn't make parent list loose. +```````````````````````````````` example +## Case 1 + +- > a + > +- b + + +## Case 2 + +- > - a + > +- b + + +## Case 3 + +- > > a + > +- b + + +## Case 4 + +- > # a + > +- b + + +## Case 5 + +- ``` + The following line is part of code block. + +- b + +## Case 6 + +- The following line is **not** part of code block. + +- b + +## Case 7 + +-
The following line is part of HTML block.
+
+- 
+- b +. +

Case 1

+ +

Case 2

+ +

Case 3

+ +

Case 4

+ +

Case 5

+ +

Case 6

+ +

Case 7

+ +```````````````````````````````` + +Link reference definitions are blocks when checking list tightness. +```````````````````````````````` example +## Case 1 + +- [aaa]: / + + [aaa]: / +- b + + +## Case 2 + +- a + + [aaa]: / +- b + + +## Case 3 + +- [aaa]: / + + a +- b + +## Case 4 +- *** + [aaa]: / + + bbb +- c +. +

Case 1

+ +

Case 2

+ +

Case 3

+ +

Case 4

+ +```````````````````````````````` From 161f7f10b4aff57fcb62de5e3e26b8efd29d9c07 Mon Sep 17 00:00:00 2001 From: taku0 Date: Thu, 10 Aug 2023 23:22:07 +0900 Subject: [PATCH 2/5] Defer resolution of link reference definitions We must not remove link reference definitions until we check for list tightness. This commit defers resolving of link reference definitions until finalization of the document. We still need to eagerly remove link reference definitions in setext headings to determine whether it is a setext heading or a thematic break. So this commit provides slightly different functions for resolving link reference definitions and checking if a line is blank for `cmark_strbuf` and `cmark_chunk`. --- src/blocks.c | 126 +++++++++++++++++++++++++++++++++++++++++++++------ src/node.c | 3 ++ 2 files changed, 114 insertions(+), 15 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 3f262cb11..fa57d71b9 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -135,9 +135,10 @@ void cmark_parser_free(cmark_parser *parser) { static cmark_node *finalize(cmark_parser *parser, cmark_node *b); // Returns true if line has only space characters, else false. -static bool is_blank(cmark_strbuf *s, bufsize_t offset) { - while (offset < s->size) { - switch (s->ptr[offset]) { +static bool is_blank_raw(const unsigned char *ptr, const bufsize_t size, + bufsize_t offset) { + while (offset < size) { + switch (ptr[offset]) { case '\r': case '\n': return true; @@ -155,6 +156,17 @@ static bool is_blank(cmark_strbuf *s, bufsize_t offset) { return true; } +// Returns true if line has only space characters, else false. +static CMARK_INLINE bool is_blank_strbuf(cmark_strbuf *s, bufsize_t offset) { + return is_blank_raw(s->ptr, s->size, offset); +} + +// Returns true if line has only space characters, else false. +static CMARK_INLINE bool is_blank_chunk(cmark_chunk *s, bufsize_t offset) { + return is_blank_raw(s->data, s->len, offset); +} + + static CMARK_INLINE bool can_contain(cmark_node_type parent_type, cmark_node_type child_type) { return (parent_type == CMARK_NODE_DOCUMENT || @@ -244,7 +256,96 @@ static bool resolve_reference_link_definitions(cmark_parser *parser) { chunk.len -= pos; } cmark_strbuf_drop(node_content, (node_content->size - chunk.len)); - return !is_blank(node_content, 0); + return !is_blank_strbuf(node_content, 0); +} + +// Parse link reference definitions in the given finalized paragraph. +// Results are added to `refmap` of the parser. +// `data`, `len`, `start_line`, and `start_column` of the parser will be +// updated. +// If the paragraph contains only link reference definitions, it is removed +// from the tree and freed. +static void resolve_deferred_reference_link_definitions(cmark_parser *parser, + cmark_node *paragraph) { + bufsize_t pos; + cmark_chunk chunk = {paragraph->data, paragraph->len}; + int new_start_line; + int new_start_column; + unsigned char *p; + unsigned char *resized; + + while (chunk.len && chunk.data[0] == '[' && + (pos = cmark_parse_reference_inline(parser->mem, &chunk, + parser->refmap))) { + + chunk.data += pos; + chunk.len -= pos; + } + + if (paragraph->data == chunk.data) { + // No definitions found. + return; + } + + if (chunk.len == 0) { + // The paragraph contained only definitions. + cmark_node_free(paragraph); + } else { + // Adjust the start position and the data. + + new_start_line = paragraph->start_line; + new_start_column = paragraph->start_column; + + for (p = paragraph->data; p < chunk.data; p++) { + switch (*p) { + case '\r': + new_start_line++; + new_start_column = 0; + if (p + 1 < chunk.data && p[1] == '\n') { + p++; + } + break; + case '\n': + new_start_line++; + new_start_column = 0; + break; + default: + new_start_column++; + break; + } + } + + paragraph->start_line = new_start_line; + paragraph->start_column = new_start_column; + + memmove(paragraph->data, chunk.data, chunk.len); + resized = parser->mem->realloc(paragraph->data, chunk.len); + chunk.data = resized; + paragraph->data = resized; + paragraph->len = chunk.len; + + if (is_blank_chunk(&chunk, 0)) { + cmark_node_free(paragraph); + } + } +} + +static void resolve_all_reference_link_definitions(cmark_parser *parser) { + cmark_iter *iter = cmark_iter_new(parser->root); + cmark_node *cur; + cmark_event_type ev_type; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + // Process at exit so we can free the node if it contains only definitions. + if (ev_type == CMARK_EVENT_EXIT) { + if (S_type(cur) == CMARK_NODE_PARAGRAPH) { + resolve_deferred_reference_link_definitions(parser, cur); + } + } + } + + cmark_iter_free(iter); } static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { @@ -252,7 +353,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { cmark_node *item; cmark_node *subitem; cmark_node *parent; - bool has_content; parent = b->parent; assert(b->flags & @@ -281,17 +381,9 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { switch (S_type(b)) { case CMARK_NODE_PARAGRAPH: - { - has_content = resolve_reference_link_definitions(parser); - if (!has_content) { - // remove blank node (former reference def) - cmark_node_free(b); - } else { - b->len = node_content->size; - b->data = cmark_strbuf_detach(node_content); - } + b->len = node_content->size; + b->data = cmark_strbuf_detach(node_content); break; - } case CMARK_NODE_CODE_BLOCK: if (!b->as.code.fenced) { // indented code @@ -360,6 +452,10 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { break; + case CMARK_NODE_DOCUMENT: + resolve_all_reference_link_definitions(parser); + break; + default: break; } diff --git a/src/node.c b/src/node.c index 3b0cf1361..278dade95 100644 --- a/src/node.c +++ b/src/node.c @@ -127,6 +127,9 @@ static void S_free_nodes(cmark_node *e) { mem->free(e->as.custom.on_enter); mem->free(e->as.custom.on_exit); break; + case CMARK_NODE_PARAGRAPH: + mem->free(e->data); + break; default: break; } From 00eefd46939b754b4bec3aa4ffe7cdf157eb4008 Mon Sep 17 00:00:00 2001 From: taku0 Date: Thu, 17 Aug 2023 13:16:44 +0900 Subject: [PATCH 3/5] Add test for strict source positions --- api_test/main.c | 170 +++++++++++++++++++++++++++++++++++++++++++- test/regression.txt | 1 + 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index c2db7370d..d163db530 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -952,7 +952,7 @@ static void source_pos(test_batch_runner *runner) { " \n" " \n" " \n" - " \n" + " \n" " \n" " Okay.\n" " \n" @@ -1067,6 +1067,173 @@ static void ref_source_pos(test_batch_runner *runner) { cmark_node_free(doc); } +static void source_pos_pathological(test_batch_runner *runner) { + static const char markdown[] = + "> ```\n" + "> This code block is closed implicitly by the end of the block quote.\n" + "> \n" + "\n" + ">
\n"
+    "> This HTML block is closed implicitly by the end of the block quote.\n"
+    ">   \n"
+    "\n"
+    "- ```\n"
+    "  This code block is closed implicitly by the end of the list item.\n"
+    "   \n"
+    "- 
\n"
+    "  This code block is closed implicitly by the end of the list item.\n"
+    "   \n"
+    "- \n"
+    "\n"
+    "    \n"
+    "    Indented code block...\n"
+    "    \n"
+    "    doesn't contain trailing blank lines,\n"
+    "    but it does contain trailing spaces at end of the line   \n"
+    "    \n"
+    "\n"
+    "```\n"
+    "Explicitly closed code block\n"
+    "```\n"
+    "\n"
+    "
\n"
+    "Explicitly closed HTML block\n"
+    "
\n" + "\n" + "\n" + " with trailing text.\n" + "\n" + "\n" + " Setext heading across\n" + " multiple lines\n" + " =====================\n" + "\n" + "\n" + "---\n" + "\n" + " ### ATX heading\n" + "\n" + "\n" + "[\n" + "foo\n" + "]:\n" + "/url\n" + "'\n" + "Link reference definitions followed by a paragraph\n" + "'\n" + "bar\n" + "\n" + "> [foo]: /url 'Block containing only link reference definitions'\n" + "\n" + "\n" + ">\n" + "> \n" + "> \n" + "\n" + "\n" + " > Indented code block\n" + "> with offseted first line.\n" + "\n" + "> Indented code block\n" + " > with offseted second line.\n" + "\n" + "\n" + "- Empty list items\n" + "- \n" + "- -\n" + " - -\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " This code block is closed implicitly by the end of the block quote.\n" + " \n" + "\n" + " \n" + " \n" + " <pre>\n" + "This HTML block is closed implicitly by the end of the block quote.\n" + " \n" + "\n" + " \n" + " \n" + " \n" + " This code block is closed implicitly by the end of the list item.\n" + " \n" + "\n" + " \n" + " \n" + " <pre>\n" + "This code block is closed implicitly by the end of the list item.\n" + " \n" + "\n" + " \n" + " \n" + " \n" + " Indented code block...\n" + "\n" + "doesn't contain trailing blank lines,\n" + "but it does contain trailing spaces at end of the line \n" + "\n" + " Explicitly closed code block\n" + "\n" + " <pre>\n" + "Explicitly closed HTML block\n" + "</pre>\n" + "\n" + " <!-- multiple HTML blocks --> <!-- in a line --> with trailing text.\n" + "\n" + " \n" + " Setext heading across\n" + " \n" + " multiple lines\n" + " \n" + " \n" + " \n" + " ATX heading\n" + " \n" + " \n" + " bar\n" + " \n" + " \n" + " \n" + " \n" + " Indented code block\n" + "with offseted first line.\n" + "\n" + " \n" + " \n" + " Indented code block\n" + "with offseted second line.\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " Empty list items\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); +} + int main(void) { int retval; test_batch_runner *runner = test_batch_runner_new(); @@ -1096,6 +1263,7 @@ int main(void) { source_pos(runner); source_pos_inlines(runner); ref_source_pos(runner); + source_pos_pathological(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/test/regression.txt b/test/regression.txt index 9111c98d7..838ce0e4f 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -404,3 +404,4 @@ Link reference definitions are blocks when checking list tightness. ```````````````````````````````` + From 1e73dead1cceb11acf28296cb5b0e5f1b35d3395 Mon Sep 17 00:00:00 2001 From: taku0 Date: Thu, 10 Aug 2023 20:19:41 +0900 Subject: [PATCH 4/5] Remove CMARK_NODE__LAST_LINE_CHECKED flag This flag was introduced by https://github.com/commonmark/cmark/issues/284, but we will not need it once we update `S_ends_with_blank_line` to not use resursion in the next commit. --- src/blocks.c | 16 ++-------------- src/node.h | 1 - 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index fa57d71b9..aeab3b850 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -36,10 +36,6 @@ static bool S_last_line_blank(const cmark_node *node) { return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0; } -static bool S_last_line_checked(const cmark_node *node) { - return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0; -} - static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) { return (cmark_node_type)node->type; } @@ -51,10 +47,6 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) { node->flags &= ~CMARK_NODE__LAST_LINE_BLANK; } -static void S_set_last_line_checked(cmark_node *node) { - node->flags |= CMARK_NODE__LAST_LINE_CHECKED; -} - static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); } @@ -231,14 +223,10 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) { // Check to see if a node ends with a blank line, descending // if needed into lists and sublists. static bool S_ends_with_blank_line(cmark_node *node) { - if (S_last_line_checked(node)) { - return(S_last_line_blank(node)); - } else if ((S_type(node) == CMARK_NODE_LIST || - S_type(node) == CMARK_NODE_ITEM) && node->last_child) { - S_set_last_line_checked(node); + if ((S_type(node) == CMARK_NODE_LIST || + S_type(node) == CMARK_NODE_ITEM) && node->last_child) { return(S_ends_with_blank_line(node->last_child)); } else { - S_set_last_line_checked(node); return (S_last_line_blank(node)); } } diff --git a/src/node.h b/src/node.h index 1cae5d745..5b391e8a3 100644 --- a/src/node.h +++ b/src/node.h @@ -49,7 +49,6 @@ typedef struct { enum cmark_node__internal_flags { CMARK_NODE__OPEN = (1 << 0), CMARK_NODE__LAST_LINE_BLANK = (1 << 1), - CMARK_NODE__LAST_LINE_CHECKED = (1 << 2), }; struct cmark_node { From ff0d224bd2836aa44d3fd8af87960296935fad9d Mon Sep 17 00:00:00 2001 From: taku0 Date: Thu, 17 Aug 2023 13:27:35 +0900 Subject: [PATCH 5/5] Fix list tightness - Set the end position precisely - Check list tightness by comparing line numbers - Remove `LAST_LINE_BLANK` flag See also https://github.com/commonmark/commonmark.js/pull/269 . Classification of end positions: - The end of the current line: - Thematic breaks - ATX headings - Setext headings - Fenced code blocks closed explicitly - HTML blocks (`pre`, comments, and others) - The end of the previous line: - Fenced code blocks closed by the end of the parent or EOF - HTML blocks (`div` and others) - HTML blocks closed by the end of the parent or EOF - Paragraphs - Block quotes - Empty list items - The end position of the last child: - Non-empty list items - Lists - The end position of the last non-blank line: - Indented code blocks The first two cases are handed by `finalize` and `closed_explicitly` flag. Non empty list items and lists are handled in `switch` statements in `finalize`. Indented code blocks are handled by setting the end position every time non-blank line is added to the block. --- src/blocks.c | 139 ++++++++++++++++++++++++--------------------------- src/node.h | 1 - 2 files changed, 65 insertions(+), 75 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index aeab3b850..832a587d3 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -32,21 +32,10 @@ #define peek_at(i, n) (i)->data[n] -static bool S_last_line_blank(const cmark_node *node) { - return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0; -} - static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) { return (cmark_node_type)node->type; } -static void S_set_last_line_blank(cmark_node *node, bool is_blank) { - if (is_blank) - node->flags |= CMARK_NODE__LAST_LINE_BLANK; - else - node->flags &= ~CMARK_NODE__LAST_LINE_BLANK; -} - static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); } @@ -124,8 +113,6 @@ void cmark_parser_free(cmark_parser *parser) { mem->free(parser); } -static cmark_node *finalize(cmark_parser *parser, cmark_node *b); - // Returns true if line has only space characters, else false. static bool is_blank_raw(const unsigned char *ptr, const bufsize_t size, bufsize_t offset) { @@ -209,26 +196,25 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) { return; } + // Scan forward until line end to keep trailing spaces of the last line. for (; i < ln->size; ++i) { c = ln->ptr[i]; if (!S_is_line_end_char(c)) continue; - cmark_strbuf_truncate(ln, i); + if (c == '\r' && i + 1 < ln->size && ln->ptr[i + 1] == '\n') { + i++; + } + + cmark_strbuf_truncate(ln, i + 1); break; } } -// Check to see if a node ends with a blank line, descending -// if needed into lists and sublists. -static bool S_ends_with_blank_line(cmark_node *node) { - if ((S_type(node) == CMARK_NODE_LIST || - S_type(node) == CMARK_NODE_ITEM) && node->last_child) { - return(S_ends_with_blank_line(node->last_child)); - } else { - return (S_last_line_blank(node)); - } +// Check to see if a node ends with a blank line. +static CMARK_INLINE bool S_ends_with_blank_line(cmark_node *node) { + return node->next && node->end_line != node->next->start_line - 1; } // returns true if content remains after link defs are resolved. @@ -336,7 +322,15 @@ static void resolve_all_reference_link_definitions(cmark_parser *parser) { cmark_iter_free(iter); } -static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { +// `closed_explicitly` states that the node is closed by explicit markers, or +// the node cannot span more than one line: +// +// - Close tag of HTML blocks +// - Closing code fence +// - ATX headings +// - Thematic breaks +static cmark_node *finalize(cmark_parser *parser, cmark_node *b, + bool closed_explicitly) { bufsize_t pos; cmark_node *item; cmark_node *subitem; @@ -347,22 +341,22 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks b->flags &= ~CMARK_NODE__OPEN; - if (parser->curline.size == 0) { - // end of input - line number has not been incremented - b->end_line = parser->line_number; - b->end_column = parser->last_line_length; - } else if (S_type(b) == CMARK_NODE_DOCUMENT || - (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || - (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) { - b->end_line = parser->line_number; - b->end_column = parser->curline.size; - if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') - b->end_column -= 1; - if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r') - b->end_column -= 1; - } else { - b->end_line = parser->line_number - 1; - b->end_column = parser->last_line_length; + if (S_type(b) != CMARK_NODE_CODE_BLOCK || b->as.code.fenced) { + if (parser->curline.size == 0) { + // end of input - line number has not been incremented + b->end_line = parser->line_number; + b->end_column = parser->last_line_length; + } else if (closed_explicitly) { + b->end_line = parser->line_number; + b->end_column = parser->curline.size; + if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') + b->end_column -= 1; + if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\r') + b->end_column -= 1; + } else { + b->end_line = parser->line_number - 1; + b->end_column = parser->last_line_length; + } } cmark_strbuf *node_content = &parser->content; @@ -376,7 +370,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { case CMARK_NODE_CODE_BLOCK: if (!b->as.code.fenced) { // indented code remove_trailing_blank_lines(node_content); - cmark_strbuf_putc(node_content, '\n'); } else { // first line of contents becomes info for (pos = 0; pos < node_content->size; ++pos) { @@ -417,7 +410,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { while (item) { // check for non-final non-empty list item ending with blank line: - if (S_last_line_blank(item) && item->next) { + if (item->next && S_ends_with_blank_line(item)) { b->as.list.tight = false; break; } @@ -425,8 +418,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { // spaces between them: subitem = item->first_child; while (subitem) { - if ((item->next || subitem->next) && - S_ends_with_blank_line(subitem)) { + if (subitem->next && S_ends_with_blank_line(subitem)) { b->as.list.tight = false; break; } @@ -437,9 +429,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { } item = item->next; } + b->end_line = b->last_child->end_line; + b->end_column = b->last_child->end_column; break; + case CMARK_NODE_ITEM: + if (b->last_child) { + b->end_line = b->last_child->end_line; + b->end_column = b->last_child->end_column; + } + // If the item is empty, it is closed when the next line is processed and + // the end position is set by the normal path. Note that if the first line + // and second line of a item are blank, it is closed. + break; + case CMARK_NODE_DOCUMENT: resolve_all_reference_link_definitions(parser); break; @@ -459,7 +463,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, // if 'parent' isn't the kind of node that can accept this child, // then back up til we hit a node that can. while (!can_contain(S_type(parent), block_type)) { - parent = finalize(parser, parent); + parent = finalize(parser, parent, false); } cmark_node *child = @@ -599,10 +603,10 @@ static int lists_match(cmark_list *list_data, cmark_list *item_data) { static cmark_node *finalize_document(cmark_parser *parser) { while (parser->current != parser->root) { - parser->current = finalize(parser, parser->current); + parser->current = finalize(parser, parser->current, false); } - finalize(parser, parser->root); + finalize(parser, parser->root, false); // Limit total size of extra content created from reference links to // document size to avoid superlinear growth. Always allow 100KB. @@ -922,7 +926,7 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input, // the end of a line, we can stop processing it: *should_continue = false; S_advance_offset(parser, input, matched, false); - parser->current = finalize(parser, container); + parser->current = finalize(parser, container, true); } else { // skip opt. spaces of fence parser->offset int i = container->as.code.fence_offset; @@ -1126,6 +1130,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // it's only now that we know the line is not part of a setext heading: *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); + *container = finalize(parser, *container, true); S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if ((!indented || cont_type == CMARK_NODE_LIST) && parser->indent < 4 && @@ -1212,35 +1217,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, static void add_text_to_container(cmark_parser *parser, cmark_node *container, cmark_node *last_matched_container, cmark_chunk *input) { - cmark_node *tmp; // what remains at parser->offset is a text line. add the text to the // appropriate container. S_find_first_nonspace(parser, input); - if (parser->blank && container->last_child) - S_set_last_line_blank(container->last_child, true); - - // block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. we also don't set last_line_blank - // on an empty list item. - const cmark_node_type ctype = S_type(container); - const bool last_line_blank = - (parser->blank && ctype != CMARK_NODE_BLOCK_QUOTE && - ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK && - !(ctype == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) && - !(ctype == CMARK_NODE_ITEM && container->first_child == NULL && - container->start_line == parser->line_number)); - - S_set_last_line_blank(container, last_line_blank); - - tmp = container; - while (tmp->parent) { - S_set_last_line_blank(tmp->parent, false); - tmp = tmp->parent; - } - // If the last line processed belonged to a paragraph node, // and we didn't match all of the line prefixes for the open containers, // and we didn't start any new containers, @@ -1254,7 +1235,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } else { // not a lazy continuation // Finalize any blocks that were not matched and set cur to container: while (parser->current != last_matched_container) { - parser->current = finalize(parser, parser->current); + parser->current = finalize(parser, parser->current, false); assert(parser->current != NULL); } @@ -1296,7 +1277,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } if (matches_end_condition) { - container = finalize(parser, container); + container = finalize(parser, container, true); assert(parser->current != NULL); } } else if (parser->blank) { @@ -1329,6 +1310,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bool all_matched = true; cmark_node *container; cmark_chunk input; + bool need_set_end_position = false; if (parser->options & CMARK_OPT_VALIDATE_UTF8) cmark_utf8proc_check(&parser->curline, buffer, bytes); @@ -1366,6 +1348,10 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, add_text_to_container(parser, container, last_matched_container, &input); + need_set_end_position = S_type(container) == CMARK_NODE_CODE_BLOCK && + !container->as.code.fenced && + !parser->blank; + finished: parser->last_line_length = input.len; if (parser->last_line_length && @@ -1375,6 +1361,11 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, input.data[parser->last_line_length - 1] == '\r') parser->last_line_length -= 1; + if (need_set_end_position) { + container->end_line = parser->line_number; + container->end_column = parser->last_line_length; + } + cmark_strbuf_clear(&parser->curline); } diff --git a/src/node.h b/src/node.h index 5b391e8a3..4df042438 100644 --- a/src/node.h +++ b/src/node.h @@ -48,7 +48,6 @@ typedef struct { enum cmark_node__internal_flags { CMARK_NODE__OPEN = (1 << 0), - CMARK_NODE__LAST_LINE_BLANK = (1 << 1), }; struct cmark_node {