From 4874cb4498588beaff3663f08779428f48d7a408 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 3 Jun 2022 08:54:49 -0700 Subject: [PATCH 01/34] Allow closing fence to be followed by tabs. Closes #258. --- lib/blocks.js | 2 +- test/regression.txt | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/blocks.js b/lib/blocks.js index 9262a4d1..7ee320fa 100644 --- a/lib/blocks.js +++ b/lib/blocks.js @@ -47,7 +47,7 @@ var reATXHeadingMarker = /^#{1,6}(?:[ \t]+|$)/; var reCodeFence = /^`{3,}(?!.*`)|^~{3,}/; -var reClosingCodeFence = /^(?:`{3,}|~{3,})(?= *$)/; +var reClosingCodeFence = /^(?:`{3,}|~{3,})(?=[ \t]*$)/; var reSetextHeadingLine = /^(?:=+|-+)[ \t]*$/; diff --git a/test/regression.txt b/test/regression.txt index be426f87..af7e1845 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -227,3 +227,14 @@ https://talk.commonmark.org/t/link-label-collapse-all-internal-whitespace/3919/5 .

foo

```````````````````````````````` + +Issue #258 + +```````````````````````````````` example +``` +abc +``` +. +
abc
+
+```````````````````````````````` From 3ea1589ae33c56e5943142ea5f2869df095aa214 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Aug 2022 14:05:32 -0700 Subject: [PATCH 02/34] Updaet spec.txt. --- test/spec.txt | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index fefb308b..5a5b11d5 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -114,7 +114,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -122,7 +122,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -155,7 +155,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -1330,10 +1330,7 @@ interpretable as a [code fence], [ATX heading][ATX headings], A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 -spaces of indentation and any number of trailing spaces or tabs. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. +spaces of indentation and any number of trailing spaces or tabs. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` @@ -1967,7 +1964,7 @@ has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the +much less efficient, and there seems to be no real downside to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require @@ -2397,7 +2394,7 @@ followed by an ASCII letter.\ ``. -6. **Start condition:** line begins the string `<` or ``, or the string `/>`.\ @@ -4533,7 +4530,7 @@ inside the code block: Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -characer other than a space or tab, and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by @@ -5357,7 +5354,7 @@ well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) -In order to solve of unwanted lists in paragraphs with +In order to solve the problem of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, @@ -8606,7 +8603,7 @@ My ![foo bar](/path/to/train.jpg "title" ) ```````````````````````````````` example ![](/url) . -

+

```````````````````````````````` @@ -8754,7 +8751,7 @@ a link to the URI, with the URI as the link's label. An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) -followed by zero or more characters other [ASCII control +followed by zero or more characters other than [ASCII control characters][ASCII control character], [space], `<`, and `>`. If the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). From 4082a182653a8b1c123c7d8a32e71ff44755b4c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Aug 2022 14:06:12 -0700 Subject: [PATCH 03/34] Omit alt attribute if it would be empty. See commonmark/commonmark-spec#718. --- lib/render/html.js | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/render/html.js b/lib/render/html.js index 2963deae..b7de412d 100644 --- a/lib/render/html.js +++ b/lib/render/html.js @@ -80,19 +80,25 @@ function image(node, entering) { if (entering) { if (this.disableTags === 0) { if (this.options.safe && potentiallyUnsafe(node.destination)) { - this.lit('');
+                this.lit('<img src='); + this.lit(' />'); } } } From 664594977648b8d13bdc5792893153dc257ac5a0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Aug 2022 08:57:01 -0700 Subject: [PATCH 04/34] Revert "Omit alt attribute if it would be empty." This reverts commit 4082a182653a8b1c123c7d8a32e71ff44755b4c0. --- lib/render/html.js | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/render/html.js b/lib/render/html.js index b7de412d..2963deae 100644 --- a/lib/render/html.js +++ b/lib/render/html.js @@ -80,25 +80,19 @@ function image(node, entering) { if (entering) { if (this.disableTags === 0) { if (this.options.safe && potentiallyUnsafe(node.destination)) { - this.lit('');
             } else {
-                this.lit('<img src='); } } } From 347590b4f71cd8b016f69be8eb35439237e18fcf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Aug 2022 08:57:05 -0700 Subject: [PATCH 05/34] Revert "Updaet spec.txt." This reverts commit 3ea1589ae33c56e5943142ea5f2869df095aa214. --- test/spec.txt | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 5a5b11d5..fefb308b 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -114,7 +114,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -122,7 +122,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -155,7 +155,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -1330,7 +1330,10 @@ interpretable as a [code fence], [ATX heading][ATX headings], A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 -spaces of indentation and any number of trailing spaces or tabs. +spaces of indentation and any number of trailing spaces or tabs. If a line +containing a single `-` can be interpreted as an +empty [list items], it should be interpreted this way +and not as a [setext heading underline]. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` @@ -1964,7 +1967,7 @@ has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real downside to the +much less efficient, and there seems to be no real down side to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require @@ -2394,7 +2397,7 @@ followed by an ASCII letter.\ ``. -6. **Start condition:** line begins with the string `<` or ``, or the string `/>`.\ @@ -4530,7 +4533,7 @@ inside the code block: Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -character other than a space or tab, and (b) cases in which +characer other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by @@ -5354,7 +5357,7 @@ well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) -In order to solve the problem of unwanted lists in paragraphs with +In order to solve of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, @@ -8603,7 +8606,7 @@ My ![foo bar](/path/to/train.jpg "title" ) ```````````````````````````````` example ![](/url) . -

+

```````````````````````````````` @@ -8751,7 +8754,7 @@ a link to the URI, with the URI as the link's label. An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) -followed by zero or more characters other than [ASCII control +followed by zero or more characters other [ASCII control characters][ASCII control character], [space], `<`, and `>`. If the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). From c0f56ac2c2133deb0dde75b5eee407633e686b5f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Aug 2022 10:46:27 -0700 Subject: [PATCH 06/34] Fix a pathological performance case in link titles. Unclosed (potential) link titles containing lots of consecutive underscores caused an exponential slowdown. Thanks to Stefan Dobre for reporting. --- lib/inlines.js | 9 ++++++--- test/test.js | 8 ++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/inlines.js b/lib/inlines.js index 6dc17a40..635a3ac2 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -42,15 +42,18 @@ var rePunctuation = new RegExp( var reLinkTitle = new RegExp( '^(?:"(' + ESCAPED_CHAR + - '|[^"\\x00])*"' + + '|\\\\[^\\\\]' + + '|[^\\\\"\\x00])*"' + "|" + "'(" + ESCAPED_CHAR + - "|[^'\\x00])*'" + + '|\\\\[^\\\\]' + + "|[^\\\\'\\x00])*'" + "|" + "\\((" + ESCAPED_CHAR + - "|[^()\\x00])*\\))" + '|\\\\[^\\\\]' + + "|[^\\\\()\\x00])*\\))" ); var reLinkDestinationBraces = /^(?:<(?:[^<>\n\\\x00]|\\.)*>)/; diff --git a/test/test.js b/test/test.js index ca360948..34d1546d 100755 --- a/test/test.js +++ b/test/test.js @@ -289,6 +289,14 @@ for (x = 1000; x <= 10000; x *= 10) { expected: "

" + "[" + repeat("\\", x / 2) + "

\n" }); } +for (x = 10; x <= 1000; x *= 10) { + cases.push({ + name: x + " backslashes in unclosed link title", + input: "[test](\\url \"" + repeat("\\", x) + "\n", + expected: "

[test](\\url "" + repeat("\\", x / 2) + "

\n" + }); +} + // Commented out til we have a fix... see #129 // for (x = 1000; x <= 10000; x *= 10) { // cases.push( From 5ff18af43bf9e3b69ed683b6cd7097e261a23b6b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Sep 2022 10:14:04 -0700 Subject: [PATCH 07/34] Update spec.txt. --- test/spec.txt | 54 +++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index fefb308b..43247fee 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -114,7 +114,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -122,7 +122,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -155,7 +155,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -1330,10 +1330,7 @@ interpretable as a [code fence], [ATX heading][ATX headings], A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 -spaces of indentation and any number of trailing spaces or tabs. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. +spaces of indentation and any number of trailing spaces or tabs. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` @@ -1967,7 +1964,7 @@ has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the +much less efficient, and there seems to be no real downside to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require @@ -2397,7 +2394,7 @@ followed by an ASCII letter.\ ``. -6. **Start condition:** line begins the string `<` or ``, or the string `/>`.\ @@ -4533,7 +4530,7 @@ inside the code block: Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -characer other than a space or tab, and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by @@ -5357,7 +5354,7 @@ well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) -In order to solve of unwanted lists in paragraphs with +In order to solve the problem of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, @@ -8754,7 +8751,7 @@ a link to the URI, with the URI as the link's label. An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) -followed by zero or more characters other [ASCII control +followed by zero or more characters other than [ASCII control characters][ASCII control character], [space], `<`, and `>`. If the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). @@ -8980,10 +8977,9 @@ A [closing tag](@) consists of the string ``. -An [HTML comment](@) consists of ``, -where *text* does not start with `>` or `->`, does not end with `-`, -and does not contain `--`. (See the -[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). A [processing instruction](@) consists of the string ` +foo . -

foo

+

foo

```````````````````````````````` - -```````````````````````````````` example -foo -. -

foo <!-- not a comment -- two hyphens -->

-```````````````````````````````` - - -Not comments: - ```````````````````````````````` example foo foo --> -foo +foo foo --> . -

foo <!--> foo -->

-

foo <!-- foo--->

+

foo foo -->

+

foo foo -->

```````````````````````````````` From 28c82d89e6c45e10483b058e06a009ec14312f97 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Sep 2022 10:14:15 -0700 Subject: [PATCH 08/34] Fix HTML comment regex to conform to commonmark/commonmark-spec#713. --- lib/common.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common.js b/lib/common.js index 298e9b0e..72d84301 100644 --- a/lib/common.js +++ b/lib/common.js @@ -24,7 +24,7 @@ var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; var CLOSETAG = "]"; -var HTMLCOMMENT = "|"; +var HTMLCOMMENT = "||" var PROCESSINGINSTRUCTION = "[<][?][\\s\\S]*?[?][>]"; var DECLARATION = "]*>"; var CDATA = ""; From 5346db4d6071526403b78ae3ed766e6a7afdd426 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Sep 2022 10:35:17 -0700 Subject: [PATCH 09/34] Fix HTML comment regex. --- lib/common.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common.js b/lib/common.js index 72d84301..5b8912f8 100644 --- a/lib/common.js +++ b/lib/common.js @@ -24,7 +24,7 @@ var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; var CLOSETAG = "]"; -var HTMLCOMMENT = "||" +var HTMLCOMMENT = "||" var PROCESSINGINSTRUCTION = "[<][?][\\s\\S]*?[?][>]"; var DECLARATION = "]*>"; var CDATA = ""; From 46538e5677db95885be4a74d963e2ee26b5aad23 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 21 Sep 2022 11:00:50 -0700 Subject: [PATCH 10/34] Allow `|$)/i, /^||" var PROCESSINGINSTRUCTION = "[<][?][\\s\\S]*?[?][>]"; -var DECLARATION = "]*>"; +var DECLARATION = "]*>"; var CDATA = ""; var HTMLTAG = "(?:" + diff --git a/test/regression.txt b/test/regression.txt index adbcf05f..91513f04 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -245,3 +245,11 @@ abc . ```````````````````````````````` + +Declarations don't need spaces, according to the spec (cmark#456) +```````````````````````````````` example +x +. +

x

+```````````````````````````````` + From 20b52e5bfe6729faccc0919026245da0f1b40ba3 Mon Sep 17 00:00:00 2001 From: Martin Geisler Date: Wed, 7 Jun 2023 17:45:30 +0200 Subject: [PATCH 12/34] Fix "CommomMark" typo (#270) --- lib/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/index.js b/lib/index.js index 5d437197..355715b2 100755 --- a/lib/index.js +++ b/lib/index.js @@ -1,6 +1,6 @@ "use strict"; -// commonmark.js - CommomMark in JavaScript +// commonmark.js - CommonMark in JavaScript // Copyright (C) 2014 John MacFarlane // License: BSD3. From df3ea1e80d98fce5ad7c72505f9230faa6f23492 Mon Sep 17 00:00:00 2001 From: taku0 Date: Thu, 9 Feb 2023 23:15:30 +0900 Subject: [PATCH 13/34] Fix list tightness. According to the specification, blank lines in a block quote doesn't separate list items: https://spec.commonmark.org/0.30/#example-320 Therefore, the following example should be tight: - > - a > - b The specification also say that link reference definitions can be children of list items when checking list tightness: https://spec.commonmark.org/0.30/#example-317 Therefore, the following example should be loose: - [aaa]: / [bbb]: / - b This commit fixes those problems with the following strategy: - Using source end position and start position of adjoining elements to check tightness. This requires adjusting source end position of some block types to exclude trailing blank lines. - Delaying removal of link reference definitions until the entire document is parsed. --- lib/blocks.js | 145 ++++++++++++++++++++----------------- lib/node.js | 2 - test/regression.txt | 173 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 250 insertions(+), 70 deletions(-) diff --git a/lib/blocks.js b/lib/blocks.js index 6548a186..0f0f844c 100644 --- a/lib/blocks.js +++ b/lib/blocks.js @@ -74,23 +74,10 @@ var peek = function(ln, pos) { // These are methods of a Parser object, defined below. -// Returns true if block ends with a blank line, descending if needed -// into lists and sublists. +// Returns true if block ends with a blank line. var endsWithBlankLine = function(block) { - while (block) { - if (block._lastLineBlank) { - return true; - } - var t = block.type; - if (!block._lastLineChecked && (t === "list" || t === "item")) { - block._lastLineChecked = true; - block = block._lastChild; - } else { - block._lastLineChecked = true; - break; - } - } - return false; + return block.next && + block.sourcepos[1][0] !== block.next.sourcepos[0][0] - 1; }; // Add a line to the block at the tip. We assume the tip @@ -221,6 +208,50 @@ var closeUnmatchedBlocks = function() { } }; +// Remove link reference definitions from given tree. +var removeLinkReferenceDefinitions = function(parser, tree) { + var event, node; + var walker = tree.walker(); + var emptyNodes = []; + + while ((event = walker.next())) { + node = event.node; + if (event.entering && node.type === "paragraph") { + var pos; + var hasReferenceDefs = false; + + // Try parsing the beginning as link reference definitions; + // Note that link reference definitions must be the beginning of a + // paragraph node since link reference definitions cannot interrupt + // paragraphs. + while ( + peek(node._string_content, 0) === C_OPEN_BRACKET && + (pos = parser.inlineParser.parseReference( + node._string_content, + parser.refmap + )) + ) { + const removedText = node._string_content.slice(0, pos); + + node._string_content = node._string_content.slice(pos); + hasReferenceDefs = true; + + const lines = removedText.split("\n"); + + // -1 for final newline. + node.sourcepos[0][0] += lines.length - 1; + } + if (hasReferenceDefs && isBlank(node._string_content)) { + emptyNodes.push(node); + } + } + } + + for (node of emptyNodes) { + node.unlink(); + } +}; + // 'finalize' is run when the block is closed. // 'continue' is run to check whether the block is continuing // at a certain line and offset (e.g. whether a block quote @@ -231,7 +262,8 @@ var blocks = { continue: function() { return 0; }, - finalize: function() { + finalize: function(parser, block) { + removeLinkReferenceDefinitions(parser, block); return; }, canContain: function(t) { @@ -247,7 +279,7 @@ var blocks = { var item = block._firstChild; while (item) { // check for non-final list item ending with blank line: - if (endsWithBlankLine(item) && item._next) { + if (item._next && endsWithBlankLine(item)) { block._listData.tight = false; break; } @@ -256,8 +288,8 @@ var blocks = { var subitem = item._firstChild; while (subitem) { if ( - endsWithBlankLine(subitem) && - (item._next || subitem._next) + subitem._next && + endsWithBlankLine(subitem) ) { block._listData.tight = false; break; @@ -266,6 +298,7 @@ var blocks = { } item = item._next; } + block.sourcepos[1] = block._lastChild.sourcepos[1]; }, canContain: function(t) { return t === "item"; @@ -320,7 +353,16 @@ var blocks = { } return 0; }, - finalize: function() { + finalize: function(parser, block) { + if (block._lastChild) { + block.sourcepos[1] = block._lastChild.sourcepos[1]; + } else { + // Empty list item + block.sourcepos[1][0] = block.sourcepos[0][0]; + block.sourcepos[1][1] = + block._listData.markerOffset + block._listData.padding; + } + return; }, canContain: function(t) { @@ -402,10 +444,17 @@ var blocks = { block._literal = rest; } else { // indented - block._literal = block._string_content.replace( - /(\n *)+$/, - "\n" - ); + var lines = block._string_content.split("\n"); + // Note that indented code block cannot be empty, so + // lines.length cannot be zero. + while (/^[ \t]*$/.test(lines[lines.length - 1])) { + lines.pop(); + } + block._literal = lines.join("\n") + "\n"; + block.sourcepos[1][0] = + block.sourcepos[0][0] + lines.length - 1; + block.sourcepos[1][1] = + block.sourcepos[0][1] + lines[lines.length - 1].length - 1; } block._string_content = null; // allow GC }, @@ -423,7 +472,7 @@ var blocks = { : 0; }, finalize: function(parser, block) { - block._literal = block._string_content.replace(/(\n *)+$/, ""); + block._literal = block._string_content.replace(/\n$/, ''); block._string_content = null; // allow GC }, canContain: function() { @@ -435,24 +484,8 @@ var blocks = { continue: function(parser) { return parser.blank ? 1 : 0; }, - finalize: function(parser, block) { - var pos; - var hasReferenceDefs = false; - - // try parsing the beginning as link reference definitions: - while ( - peek(block._string_content, 0) === C_OPEN_BRACKET && - (pos = parser.inlineParser.parseReference( - block._string_content, - parser.refmap - )) - ) { - block._string_content = block._string_content.slice(pos); - hasReferenceDefs = true; - } - if (hasReferenceDefs && isBlank(block._string_content)) { - block.unlink(); - } + finalize: function() { + return; }, canContain: function() { return false; @@ -835,33 +868,9 @@ var incorporateLine = function(ln) { // finalize any blocks not matched this.closeUnmatchedBlocks(); - if (this.blank && container.lastChild) { - container.lastChild._lastLineBlank = true; - } t = container.type; - // Block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. We also don't set _lastLineBlank - // on an empty list item, or if we just closed a fenced block. - var lastLineBlank = - this.blank && - !( - t === "block_quote" || - (t === "code_block" && container._isFenced) || - (t === "item" && - !container._firstChild && - container.sourcepos[0][0] === this.lineNumber) - ); - - // propagate lastLineBlank up through parents: - var cont = container; - while (cont) { - cont._lastLineBlank = lastLineBlank; - cont = cont._parent; - } - if (this.blocks[t].acceptsLines) { this.addLine(); // if HtmlBlock, check for end condition diff --git a/lib/node.js b/lib/node.js index 0e9c4b6f..12a17e03 100644 --- a/lib/node.js +++ b/lib/node.js @@ -74,8 +74,6 @@ var Node = function(nodeType, sourcepos) { this._prev = null; this._next = null; this._sourcepos = sourcepos; - this._lastLineBlank = false; - this._lastLineChecked = false; this._open = true; this._string_content = null; this._literal = null; diff --git a/test/regression.txt b/test/regression.txt index 91513f04..678e46ad 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -253,3 +253,176 @@ x

x

```````````````````````````````` +Block-quoted blank line shouldn't make parent list loose. +```````````````````````````````` example +## Case 1 + +- > a + > +- b + + +## Case 2 + +- > - a + > +- b + + +## Case 3 + +- > > a + > +- b + + +## Case 4 + +- > # a + > +- b + + +## Case 5 + +- ``` + The following line is part of code block. + +- b + +## Case 6 + +- The following line is **not** part of code block. + +- b + +## Case 7 + +-
The following line is part of HTML block.
+
+- 
+- b +. +

Case 1

+
    +
  • +
    +

    a

    +
    +
  • +
  • b
  • +
+

Case 2

+
    +
  • +
    +
      +
    • a
    • +
    +
    +
  • +
  • b
  • +
+

Case 3

+
    +
  • +
    +
    +

    a

    +
    +
    +
  • +
  • b
  • +
+

Case 4

+
    +
  • +
    +

    a

    +
    +
  • +
  • b
  • +
+

Case 5

+
    +
  • +
    The following line is part of code block.
    +
    +
    +
  • +
  • b
  • +
+

Case 6

+
    +
  • +
    The following line is **not** part of code block.
    +
    +
  • +
  • +

    b

    +
  • +
+

Case 7

+
    +
  • +
    The following line is part of HTML block.
    +
    +
  • +
  • + +
  • +
  • b
  • +
+```````````````````````````````` + +Link reference definitions are blocks when checking list tightness. +```````````````````````````````` example +## Case 1 + +- [aaa]: / + + [aaa]: / +- b + + +## Case 2 + +- a + + [aaa]: / +- b + + +## Case 3 + +- [aaa]: / + + a +- b +. +

Case 1

+
    +
  • +
  • +

    b

    +
  • +
+

Case 2

+
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
+

Case 3

+
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
+```````````````````````````````` From 97da298feb94e8c404c9c734a8b5965bbcbb398e Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Wed, 25 Oct 2023 19:07:04 -0700 Subject: [PATCH 14/34] Track underscore bottom separately mod 3, like asterisk The reasoning that a failed delimiter means future delimiters will also fail only applies if the reason they failed was not the multiple-of-three rule. This was already implemented correctly for asterisks, but not for underscore. --- lib/inlines.js | 7 ++++--- test/regression.txt | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/lib/inlines.js b/lib/inlines.js index 635a3ac2..501a251f 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -382,7 +382,7 @@ var processEmphasis = function(stack_bottom) { var openers_bottom_index; var odd_match = false; - for (var i = 0; i < 8; i++) { + for (var i = 0; i < 14; i++) { openers_bottom[i] = stack_bottom; } // find first closer above stack_bottom: @@ -407,10 +407,11 @@ var processEmphasis = function(stack_bottom) { openers_bottom_index = 1; break; case C_UNDERSCORE: - openers_bottom_index = 2; + openers_bottom_index = 2 + (closer.can_open ? 3 : 0) + + (closer.origdelims % 3); break; case C_ASTERISK: - openers_bottom_index = 3 + (closer.can_open ? 3 : 0) + openers_bottom_index = 8 + (closer.can_open ? 3 : 0) + (closer.origdelims % 3); break; } diff --git a/test/regression.txt b/test/regression.txt index 678e46ad..98e98f11 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -426,3 +426,32 @@ Link reference definitions are blocks when checking list tightness. ```````````````````````````````` + +An underscore that is not part of a delimiter should not prevent another +pair of underscores from forming part of their own. +```````````````````````````````` example +__!_!__ + +__!x!__ + +**!*!** + +--- + +_*__*_* + +_*xx*_* + +_*__-_- + +_*xx-_- +. +

!_!

+

!x!

+

!*!

+
+

__*

+

xx*

+

*__--

+

*xx--

+```````````````````````````````` From 9f548fedcb74b527825078fe31f5748499f2937d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 13 Nov 2023 18:47:28 -0800 Subject: [PATCH 15/34] Fix pathological regex for HTML comments. Closes #273. --- lib/common.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common.js b/lib/common.js index 4139e80b..dd8f6984 100644 --- a/lib/common.js +++ b/lib/common.js @@ -24,7 +24,7 @@ var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; var CLOSETAG = "]"; -var HTMLCOMMENT = "||" +var HTMLCOMMENT = "||" var PROCESSINGINSTRUCTION = "[<][?][\\s\\S]*?[?][>]"; var DECLARATION = "]*>"; var CDATA = ""; From c7aef3387d12eb5d3d702e827a4ad3f197cc6407 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 22 Jan 2024 09:46:17 -0800 Subject: [PATCH 16/34] Html renderer: don't add `language-` to code block class... if the info string already starts with `language-`. Closes #277. --- lib/render/html.js | 6 +++++- test/regression.txt | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/render/html.js b/lib/render/html.js index 2963deae..4dc2fb58 100644 --- a/lib/render/html.js +++ b/lib/render/html.js @@ -144,7 +144,11 @@ function code_block(node) { var info_words = node.info ? node.info.split(/\s+/) : [], attrs = this.attrs(node); if (info_words.length > 0 && info_words[0].length > 0) { - attrs.push(["class", "language-" + this.esc(info_words[0])]); + var cls = this.esc(info_words[0]); + if (!/^language-/.exec(cls)) { + cls = "language-" + cls; + } + attrs.push(["class", cls]); } this.cr(); this.tag("pre"); diff --git a/test/regression.txt b/test/regression.txt index 98e98f11..48ba72a3 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -455,3 +455,19 @@ _*xx-_-

*__--

*xx--

```````````````````````````````` + +#277: +```````````````````````````````` example +```language-r +x <- 1 +``` + +```r +x <- 1 +``` +. +
x <- 1
+
+
x <- 1
+
+```````````````````````````````` From 0f947550a2d093b8490da949def8fc7ba4fdbc7a Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 27 Jan 2024 14:34:22 -0700 Subject: [PATCH 17/34] Do not process `&`-entities that don't end in `;` Fixes #278 --- lib/common.js | 4 ++-- lib/inlines.js | 4 ++-- test/regression.txt | 13 +++++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/common.js b/lib/common.js index dd8f6984..9d9cfe65 100644 --- a/lib/common.js +++ b/lib/common.js @@ -1,7 +1,7 @@ "use strict"; import encode from "mdurl/encode.js"; -import { decodeHTML } from "entities"; +import { decodeHTMLStrict } from "entities"; var C_BACKSLASH = 92; @@ -58,7 +58,7 @@ var unescapeChar = function(s) { if (s.charCodeAt(0) === C_BACKSLASH) { return s.charAt(1); } else { - return decodeHTML(s); + return decodeHTMLStrict(s); } }; diff --git a/lib/inlines.js b/lib/inlines.js index 501a251f..83f08a69 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -3,7 +3,7 @@ import Node from "./node.js"; import * as common from "./common.js"; import fromCodePoint from "./from-code-point.js"; -import { decodeHTML } from "entities"; +import { decodeHTMLStrict } from "entities"; import "string.prototype.repeat"; // Polyfill for String.prototype.repeat var normalizeURI = common.normalizeURI; @@ -773,7 +773,7 @@ var removeBracket = function() { var parseEntity = function(block) { var m; if ((m = this.match(reEntityHere))) { - block.appendChild(text(decodeHTML(m))); + block.appendChild(text(decodeHTMLStrict(m))); return true; } else { return false; diff --git a/test/regression.txt b/test/regression.txt index 48ba72a3..da235fb2 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -471,3 +471,16 @@ x <- 1
x <- 1
 
```````````````````````````````` + +#278 +```````````````````````````````` example +¶g; + +¶ + +¶ +. +

&parag;

+

&para

+

+```````````````````````````````` From d9a5ca8f727f549fcc7b7c41cdcfca0fdf2bb44e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Jan 2024 09:01:27 -0800 Subject: [PATCH 18/34] Update spec.txt to 0.31.1. --- test/spec.txt | 116 ++++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 51 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 43247fee..2442d811 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -1,9 +1,9 @@ --- title: CommonMark Spec author: John MacFarlane -version: '0.30' +version: '0.31.1' date: '2021-06-19' -license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +license: '[CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)' ... # Introduction @@ -14,7 +14,7 @@ Markdown is a plain text format for writing structured documents, based on conventions for indicating formatting in email and usenet posts. It was developed by John Gruber (with help from Aaron Swartz) and released in 2004 in the form of a -[syntax description](http://daringfireball.net/projects/markdown/syntax) +[syntax description](https://daringfireball.net/projects/markdown/syntax) and a Perl script (`Markdown.pl`) for converting Markdown to HTML. In the next decade, dozens of implementations were developed in many languages. Some extended the original @@ -34,10 +34,10 @@ As Gruber writes: > Markdown-formatted document should be publishable as-is, as > plain text, without looking like it's been marked up with tags > or formatting instructions. -> () +> () The point can be illustrated by comparing a sample of -[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +[AsciiDoc](https://asciidoc.org/) with an equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc manual: @@ -103,7 +103,7 @@ source, not just in the processed document. ## Why is a spec needed? John Gruber's [canonical description of Markdown's -syntax](http://daringfireball.net/projects/markdown/syntax) +syntax](https://daringfireball.net/projects/markdown/syntax) does not specify the syntax unambiguously. Here are some examples of questions it does not answer: @@ -316,9 +316,9 @@ A line containing no characters, or a line containing only spaces The following definitions of character classes will be used in this spec: -A [Unicode whitespace character](@) is -any code point in the Unicode `Zs` general category, or a tab (`U+0009`), -line feed (`U+000A`), form feed (`U+000C`), or carriage return (`U+000D`). +A [Unicode whitespace character](@) is a character in the Unicode `Zs` general +category, or a tab (`U+0009`), line feed (`U+000A`), form feed (`U+000C`), or +carriage return (`U+000D`). [Unicode whitespace](@) is a sequence of one or more [Unicode whitespace characters]. @@ -337,9 +337,8 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, `[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), `{`, `|`, `}`, or `~` (U+007B–007E). -A [Unicode punctuation character](@) is an [ASCII -punctuation character] or anything in -the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. +A [Unicode punctuation character](@) is a character in the Unicode `P` +(puncuation) or `S` (symbol) general categories. ## Tabs @@ -579,9 +578,9 @@ raw HTML: ```````````````````````````````` example - + . -

http://example.com?find=\*

+

https://example.com?find=\*

```````````````````````````````` @@ -2403,7 +2402,7 @@ followed by one of the strings (case-insensitive) `address`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`, `html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`, -`section`, `summary`, `table`, `tbody`, `td`, +`search`, `section`, `summary`, `table`, `tbody`, `td`, `tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed by a space, a tab, the end of the line, the string `>`, or the string `/>`.\ @@ -4115,7 +4114,7 @@ The following rules define [list items]: blocks *Bs* starting with a character other than a space or tab, and *M* is a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation, then the result of prepending *M* and the following spaces to the first line - of Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a list item with *Bs* as its contents. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start @@ -5350,7 +5349,7 @@ by itself should be a paragraph followed by a nested sublist. Since it is well established Markdown practice to allow lists to interrupt paragraphs inside list items, the [principle of uniformity] requires us to allow this outside list items as -well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +well. ([reStructuredText](https://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) @@ -6055,18 +6054,18 @@ But this is an HTML tag: And this is code: ```````````````````````````````` example -`` +`` . -

<http://foo.bar.baz>`

+

<https://foo.bar.baz>`

```````````````````````````````` But this is an autolink: ```````````````````````````````` example -` +` . -

http://foo.bar.`baz`

+

https://foo.bar.`baz`

```````````````````````````````` @@ -6099,7 +6098,7 @@ closing backtick strings to be equal in length: ## Emphasis and strong emphasis John Gruber's original [Markdown syntax -description](http://daringfireball.net/projects/markdown/syntax#em) says: +description](https://daringfireball.net/projects/markdown/syntax#em) says: > Markdown treats asterisks (`*`) and underscores (`_`) as indicators of > emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML @@ -6201,7 +6200,7 @@ Here are some examples of delimiter runs. (The idea of distinguishing left-flanking and right-flanking delimiter runs based on the character before and the character after comes from Roopesh Chander's -[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +[vfmd](https://web.archive.org/web/20220608143320/http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). vfmd uses the terminology "emphasis indicator string" instead of "delimiter run," and its rules for distinguishing left- and right-flanking runs are a bit more complex than the ones given here.) @@ -6343,6 +6342,21 @@ Unicode nonbreaking spaces count as whitespace, too: ```````````````````````````````` +Unicode symbols count as punctuation, too: + +```````````````````````````````` example +*$*alpha. + +*£*bravo. + +*€*charlie. +. +

*$*alpha.

+

*£*bravo.

+

*€*charlie.

+```````````````````````````````` + + Intraword emphasis with `*` is permitted: ```````````````````````````````` example @@ -7428,16 +7442,16 @@ _a `_`_ ```````````````````````````````` example -**a +**a . -

**ahttp://foo.bar/?q=**

+

**ahttps://foo.bar/?q=**

```````````````````````````````` ```````````````````````````````` example -__a +__a . -

__ahttp://foo.bar/?q=__

+

__ahttps://foo.bar/?q=__

```````````````````````````````` @@ -7685,13 +7699,13 @@ A link can contain fragment identifiers and queries: ```````````````````````````````` example [link](#fragment) -[link](http://example.com#fragment) +[link](https://example.com#fragment) -[link](http://example.com?foo=3#frag) +[link](https://example.com?foo=3#frag) .

link

-

link

-

link

+

link

+

link

```````````````````````````````` @@ -7935,9 +7949,9 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo . -

[foohttp://example.com/?search=](uri)

+

[foohttps://example.com/?search=](uri)

```````````````````````````````` @@ -8091,11 +8105,11 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo [ref]: /uri . -

[foohttp://example.com/?search=][ref]

+

[foohttps://example.com/?search=][ref]

```````````````````````````````` @@ -8295,7 +8309,7 @@ A [collapsed reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document, followed by the string `[]`. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`. @@ -8348,7 +8362,7 @@ A [shortcut reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document and is not followed by `[]` or a link label. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`. @@ -8435,7 +8449,7 @@ following closing bracket: ```````````````````````````````` -Full and compact references take precedence over shortcut +Full and collapsed references take precedence over shortcut references: ```````````````````````````````` example @@ -8771,9 +8785,9 @@ Here are some valid autolinks: ```````````````````````````````` example - + . -

http://foo.bar.baz/test?q=hello&id=22&boolean

+

https://foo.bar.baz/test?q=hello&id=22&boolean

```````````````````````````````` @@ -8813,9 +8827,9 @@ with their syntax: ```````````````````````````````` example - + . -

http://../

+

https://../

```````````````````````````````` @@ -8829,18 +8843,18 @@ with their syntax: Spaces are not allowed in autolinks: ```````````````````````````````` example - + . -

<http://foo.bar/baz bim>

+

<https://foo.bar/baz bim>

```````````````````````````````` Backslash-escapes do not work inside autolinks: ```````````````````````````````` example - + . -

http://example.com/\[\

+

https://example.com/\[\

```````````````````````````````` @@ -8892,9 +8906,9 @@ These are not autolinks: ```````````````````````````````` example -< http://foo.bar > +< https://foo.bar > . -

< http://foo.bar >

+

< https://foo.bar >

```````````````````````````````` @@ -8913,9 +8927,9 @@ These are not autolinks: ```````````````````````````````` example -http://example.com +https://example.com . -

http://example.com

+

https://example.com

```````````````````````````````` @@ -9660,7 +9674,7 @@ through the stack for an opening `[` or `![` delimiter. delimiter from the stack, and return a literal text node `]`. - If we find one and it's active, then we parse ahead to see if - we have an inline link/image, reference link/image, compact reference + we have an inline link/image, reference link/image, collapsed reference link/image, or shortcut reference link/image. + If we don't, then we remove the opening delimiter from the From aef681b51f2cad6c4aa343dcc31355bf515fcf1e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Jan 2024 09:17:54 -0800 Subject: [PATCH 19/34] Treat unicode symbols like punctuation for purposes of flankingness. This updates the library to conform to the 0.31 spec. --- lib/inlines.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/inlines.js b/lib/inlines.js index 83f08a69..948eb41b 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -36,8 +36,7 @@ var ENTITY = common.ENTITY; var reHtmlTag = common.reHtmlTag; var rePunctuation = new RegExp( - /^[!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]/ -); + /^[!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~\p{P}\p{S}]/u); var reLinkTitle = new RegExp( '^(?:"(' + From 52546e91f937e9462ab6bd73e1c6cc46d8c5c2dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Jan 2024 11:26:49 -0800 Subject: [PATCH 20/34] Update to 0.31.2 spec.txt. --- test/spec.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index 2442d811..f1fab281 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -1,8 +1,8 @@ --- title: CommonMark Spec author: John MacFarlane -version: '0.31.1' -date: '2021-06-19' +version: '0.31.2' +date: '2024-01-28' license: '[CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)' ... From cc7febb2196f0570b4c2f27b1ef8f40c7505e4a3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Jan 2024 13:11:11 -0800 Subject: [PATCH 21/34] Bump to 0.31.0, update changelog. --- changelog.txt | 23 +++++++++++++++++++++++ package.json | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index 49284cb5..8e5e61f8 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,26 @@ +[0.31.0] + + * Update to 0.31 spec.txt. + * Treat unicode symbols like punctuation for purposes of flankingness. + This updates the library to conform to the 0.31 spec. + * Do not process `&`-entities that don't end in `;` (#278, Michael Howell). + * Html renderer: don't add `language-` to code block class + if the info string already starts with `language-` (#277). + * Fix pathological regex for HTML comments (#273). + * Track underscore bottom separately mod 3, like asterisk (Michael Howell). + * Fix list tightness (taku0). + * Fix "CommomMark" typo (#270, Martin Geisler). + * Declarations do not need a space, per the spec (commonmark/cmark#456). + * Allow ` Date: Sun, 28 Jan 2024 13:22:31 -0800 Subject: [PATCH 22/34] Remove obsolete things from release checklist. --- release_checklist.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/release_checklist.md b/release_checklist.md index d1935c8a..455957f8 100644 --- a/release_checklist.md +++ b/release_checklist.md @@ -2,11 +2,10 @@ Release checklist _ update changelog.txt _ update version in package.json -_ make dist _ test _ tag release _ git push _ git push --tags +_ npm login _ npm publish _ create github release -_ update babelmark2: copy commonmark.js to src/babelmark2/js on server From 2df33d533d6d5fced28a7a70b44637e59a8cd56c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Jan 2024 18:28:10 -0800 Subject: [PATCH 23/34] Remove source, add search to list of recognized block tags. A spec 0.31 change we forgot in last release. --- lib/blocks.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/blocks.js b/lib/blocks.js index 0f0f844c..aba1eacf 100644 --- a/lib/blocks.js +++ b/lib/blocks.js @@ -20,7 +20,7 @@ var reHtmlBlockOpen = [ /^<[?]/, /^]|$)/i, + /^<[/]?(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|search|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[/]?[>]|$)/i, new RegExp("^(?:" + OPENTAG + "|" + CLOSETAG + ")\\s*$", "i") ]; From ba128a806dfa5376379e845152bcbb210c80fab4 Mon Sep 17 00:00:00 2001 From: Steven Date: Mon, 29 Jan 2024 11:00:27 -0500 Subject: [PATCH 24/34] Remove string.prototype.repeat polyfill --- lib/inlines.js | 1 - package.json | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/inlines.js b/lib/inlines.js index 948eb41b..dd30bd4f 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -4,7 +4,6 @@ import Node from "./node.js"; import * as common from "./common.js"; import fromCodePoint from "./from-code-point.js"; import { decodeHTMLStrict } from "entities"; -import "string.prototype.repeat"; // Polyfill for String.prototype.repeat var normalizeURI = common.normalizeURI; var unescapeString = common.unescapeString; diff --git a/package.json b/package.json index c4849fcb..8f09b17f 100644 --- a/package.json +++ b/package.json @@ -37,8 +37,7 @@ "dependencies": { "entities": "~3.0.1", "mdurl": "~1.0.1", - "minimist": "~1.2.5", - "string.prototype.repeat": "^1.0.0" + "minimist": "~1.2.5" }, "directories": { "lib": "./lib" From 3ef341bdd7f59e272badb40c17bc3958eea288e2 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 2 Feb 2024 15:03:50 -0700 Subject: [PATCH 25/34] Fix title-related backtracking with empty string Fixes #281 --- lib/inlines.js | 7 +++---- test/regression.txt | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/inlines.js b/lib/inlines.js index dd30bd4f..1ecf8eef 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -884,7 +884,6 @@ var parseReference = function(s, refmap) { title = this.parseLinkTitle(); } if (title === null) { - title = ""; // rewind before spaces this.pos = beforetitle; } @@ -892,13 +891,13 @@ var parseReference = function(s, refmap) { // make sure we're at line end: var atLineEnd = true; if (this.match(reSpaceAtEndOfLine) === null) { - if (title === "") { + if (title === null) { atLineEnd = false; } else { // the potential title we found is not at the line end, // but it could still be a legal link reference if we // discard the title - title = ""; + title = null; // rewind before spaces this.pos = beforetitle; // and instead check if the link URL is at the line end @@ -919,7 +918,7 @@ var parseReference = function(s, refmap) { } if (!refmap[normlabel]) { - refmap[normlabel] = { destination: dest, title: title }; + refmap[normlabel] = { destination: dest, title: title === null ? "" : title }; } return this.pos - startpos; }; diff --git a/test/regression.txt b/test/regression.txt index da235fb2..aea81378 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -484,3 +484,11 @@ x <- 1

&para

```````````````````````````````` + +#281 +```````````````````````````````` example +[test]:example +""third [test] +. +

""third test

+```````````````````````````````` \ No newline at end of file From a4d859c3ba5794f798d74e181297a94349776336 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 1 Mar 2024 14:21:36 -0700 Subject: [PATCH 26/34] Accept lowercase inline HTML declarations --- lib/common.js | 2 +- test/regression.txt | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/common.js b/lib/common.js index 9d9cfe65..2db7dc81 100644 --- a/lib/common.js +++ b/lib/common.js @@ -26,7 +26,7 @@ var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; var CLOSETAG = "]"; var HTMLCOMMENT = "||" var PROCESSINGINSTRUCTION = "[<][?][\\s\\S]*?[?][>]"; -var DECLARATION = "]*>"; +var DECLARATION = "]*>"; var CDATA = ""; var HTMLTAG = "(?:" + diff --git a/test/regression.txt b/test/regression.txt index aea81378..bf34ab6c 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -491,4 +491,14 @@ x <- 1 ""third [test] .

""third test

-```````````````````````````````` \ No newline at end of file +```````````````````````````````` + +#283 +```````````````````````````````` example +x + +x +. +

x

+

x<!>

+```````````````````````````````` From 6f94a997e9c0ece4467e47f94f93b89ce5dd3ff8 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Wed, 6 Mar 2024 13:55:47 +1100 Subject: [PATCH 27/34] Fix HTML comment parsing with `-` before closing `-->` (#286) Fixes #285. --- lib/common.js | 2 +- test/regression.txt | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/common.js b/lib/common.js index 2db7dc81..7f31f265 100644 --- a/lib/common.js +++ b/lib/common.js @@ -24,7 +24,7 @@ var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; var CLOSETAG = "]"; -var HTMLCOMMENT = "||" +var HTMLCOMMENT = "||" var PROCESSINGINSTRUCTION = "[<][?][\\s\\S]*?[?][>]"; var DECLARATION = "]*>"; var CDATA = ""; diff --git a/test/regression.txt b/test/regression.txt index bf34ab6c..40630a7b 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -502,3 +502,19 @@ x

x

x<!>

```````````````````````````````` + +#285 +```````````````````````````````` example +foo + +foo + +foo + +foo more --> +. +

foo

+

foo

+

foo

+

foo more -->

+```````````````````````````````` From 16ff08a7f86c2b958474bf94b515e4626a4fb8f3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 28 Jul 2024 10:26:59 -0700 Subject: [PATCH 28/34] Bump to 0.31.1, update changelog --- changelog.txt | 11 +++++++++++ package.json | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index 8e5e61f8..0e1ddb2f 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,14 @@ +[0.31.1] + + * Fix HTML comment parsing with `-` before closing `-->` + (#285, Robin Stocker). + * Accept lowercase inline HTML declarations (Michael Howell). + * Fix title-related backtracking with empty string (#281, + Michael Howell). + * Remove `string.prototype.repeat` polyfill (Steven). + * Remove `source`, add `search` to list of recognized block tags. + (a spec 0.31 change we forgot in last release). + [0.31.0] * Update to 0.31 spec.txt. diff --git a/package.json b/package.json index 8f09b17f..b777556c 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "commonmark", "description": "a strongly specified, highly compatible variant of Markdown", - "version": "0.31.0", + "version": "0.31.1", "homepage": "https://commonmark.org", "keywords": [ "markdown", From cb7e2e3b30b51968c25fe9182660cf2512aa2941 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Sep 2024 17:53:51 -0700 Subject: [PATCH 29/34] Require minimist >= 1.2.8. Closes #290. See https://nvd.nist.gov/vuln/detail/CVE-2021-44906 t --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index b777556c..730f2c5a 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "dependencies": { "entities": "~3.0.1", "mdurl": "~1.0.1", - "minimist": "~1.2.5" + "minimist": "~1.2.8" }, "directories": { "lib": "./lib" From cb2c2303d3550ec6ef28ceb2841f148e8761eebf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Sep 2024 08:15:53 -0700 Subject: [PATCH 30/34] Bump to 0.31.2, update changelog. --- changelog.txt | 4 ++++ package.json | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index 0e1ddb2f..baf4ec3f 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,7 @@ +0.31.2] + + * Require minimist >= 1.2.8 (#290), see CVE-2021-44906. + [0.31.1] * Fix HTML comment parsing with `-` before closing `-->` diff --git a/package.json b/package.json index 730f2c5a..20f6c93a 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "commonmark", "description": "a strongly specified, highly compatible variant of Markdown", - "version": "0.31.1", + "version": "0.31.2", "homepage": "https://commonmark.org", "keywords": [ "markdown", From f3145e85366d8f3322e09f856ea0a3370f7dde82 Mon Sep 17 00:00:00 2001 From: Tatsunori Uchino Date: Sun, 1 Sep 2024 14:46:59 +0900 Subject: [PATCH 31/34] Don't trim non-ASCII whitespace --- lib/inlines.js | 6 +++++- test/regression.txt | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/lib/inlines.js b/lib/inlines.js index 1ecf8eef..1f0cbb7c 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -980,7 +980,11 @@ var parseInline = function(block) { // Parse string content in block into inline children, // using refmap to resolve references. var parseInlines = function(block) { - this.subject = block._string_content.trim(); + // trim() removes non-ASCII whitespaces, vertical tab, form feed and so on. + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim#return_value + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space + // Removes only ASCII tab and space. + this.subject = block._string_content.replace(/^[\t \r\n]+|[\t \r\n]+$/g, "") this.pos = 0; this.delimiters = null; this.brackets = null; diff --git a/test/regression.txt b/test/regression.txt index 40630a7b..6de5a111 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -518,3 +518,31 @@ foo more -->

foo

foo more -->

```````````````````````````````` + +#261 +```````````````````````````````` example + Vertical Tab + + Form Feed + + NBSP (U+00A0) NBSP  + + Em Space (U+2003) Em Space  + +
Line Separator (U+2028) Line Separator
 + +
Paragraph Separator (U+2029) Paragraph Separator
 + + 全角スペース (U+3000) 全形空白  + +ZWNBSP (U+FEFF) ZWNBSP +. +

Vertical Tab

+

Form Feed

+

 NBSP (U+00A0) NBSP 

+

 Em Space (U+2003) Em Space 

+


Line Separator (U+2028) Line Separator


+


Paragraph Separator (U+2029) Paragraph Separator


+

 全角スペース (U+3000) 全形空白 

+

ZWNBSP (U+FEFF) ZWNBSP

+```````````````````````````````` From 38d2938fb2ead542a5119c1085d2248eb59537d1 Mon Sep 17 00:00:00 2001 From: Tatsunori Uchino Date: Sun, 15 Dec 2024 04:41:12 +0900 Subject: [PATCH 32/34] Avoid use of slow regex in `trim()` (#295) `trim()` as previously defined could take a long time on very long strings. --- lib/inlines.js | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/lib/inlines.js b/lib/inlines.js index 1f0cbb7c..a37d1665 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -980,17 +980,38 @@ var parseInline = function(block) { // Parse string content in block into inline children, // using refmap to resolve references. var parseInlines = function(block) { - // trim() removes non-ASCII whitespaces, vertical tab, form feed and so on. + // String.protoype.trim() removes non-ASCII whitespaces, vertical tab, form feed and so on. // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim#return_value // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space // Removes only ASCII tab and space. - this.subject = block._string_content.replace(/^[\t \r\n]+|[\t \r\n]+$/g, "") + this.subject = trim(block._string_content) this.pos = 0; this.delimiters = null; this.brackets = null; while (this.parseInline(block)) {} block._string_content = null; // allow raw string to be garbage collected this.processEmphasis(null); + + function trim(str) { + var start = 0; + for(; start < str.length; start++) { + if (!isSpace(str.charCodeAt(start))) { + break; + } + } + var end = str.length - 1; + for(; end >= start; end--) { + if (!isSpace(str.charCodeAt(end))) { + break; + } + } + return str.slice(start, end + 1); + + function isSpace(c) { + // U+0020 = space, U+0009 = tab, U+000A = LF, U+000D = CR + return c === 0x20 || c === 9 || c === 0xa || c === 0xd; + } + } }; // The InlineParser object. From e489bf504d9884f40d142a0f3c5f8d72f7fac8de Mon Sep 17 00:00:00 2001 From: Tatsunori Uchino Date: Wed, 18 Dec 2024 12:53:09 +0900 Subject: [PATCH 33/34] Recognize non-BMP punctuation & symbols (#297) Closes #296. --- lib/inlines.js | 24 ++++++++++++++++++++++-- test/regression.txt | 16 ++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/lib/inlines.js b/lib/inlines.js index a37d1665..a8d9d6ed 100644 --- a/lib/inlines.js +++ b/lib/inlines.js @@ -127,9 +127,10 @@ var match = function(re) { // Returns the code for the character at the current subject position, or -1 // there are no more characters. +// This function must be non-BMP aware because the Unicode category of its result is used. var peek = function() { if (this.pos < this.subject.length) { - return this.subject.charCodeAt(this.pos); + return this.subject.codePointAt(this.pos); } else { return -1; } @@ -270,7 +271,7 @@ var scanDelims = function(cc) { return null; } - char_before = startpos === 0 ? "\n" : this.subject.charAt(startpos - 1); + char_before = previousChar(this.subject, startpos); cc_after = this.peek(); if (cc_after === -1) { @@ -304,6 +305,25 @@ var scanDelims = function(cc) { } this.pos = startpos; return { numdelims: numdelims, can_open: can_open, can_close: can_close }; + + function previousChar(str, pos) { + if (pos === 0) { + return "\n"; + } + var previous_cc = str.charCodeAt(pos - 1); + // not low surrogate (BMP) + if ((previous_cc & 0xfc00) !== 0xdc00) { + return str.charAt(pos - 1); + } + // returns NaN if out of range + var two_previous_cc = str.charCodeAt(pos - 2); + // NaN & 0xfc00 = 0 + // checks if 2 previous char is high surrogate + if ((two_previous_cc & 0xfc00) !== 0xd800) { + return previous_char; + } + return str.slice(pos - 2, pos); + } }; // Handle a delimiter marker for emphasis or a quote. diff --git a/test/regression.txt b/test/regression.txt index 6de5a111..624703bd 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -546,3 +546,19 @@ foo more -->

 全角スペース (U+3000) 全形空白 

ZWNBSP (U+FEFF) ZWNBSP

```````````````````````````````` + +#296 +```````````````````````````````` example +a**a∇**a + +a**∇a**a + +a**a𝜵**a + +a**𝜵a**a +. +

a**a∇**a

+

a**∇a**a

+

a**a𝜵**a

+

a**𝜵a**a

+```````````````````````````````` From da97efbfd2b5112ac889dff476e6361158463309 Mon Sep 17 00:00:00 2001 From: Jesse Hallam Date: Tue, 25 Mar 2025 14:08:32 -0300 Subject: [PATCH 34/34] fix tests, versions --- dist/commonmark.js | 2 +- test/spec.txt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dist/commonmark.js b/dist/commonmark.js index 241c5542..65e4a614 100644 --- a/dist/commonmark.js +++ b/dist/commonmark.js @@ -1,4 +1,4 @@ -/* commonmark 0.31.2-1 https://github.com/commonmark/commonmark.js @license BSD3 */ +/* commonmark 0.31.2-0 https://github.com/commonmark/commonmark.js @license BSD3 */ (function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : diff --git a/test/spec.txt b/test/spec.txt index 755ac56c..885dc42c 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -8842,7 +8842,7 @@ Spaces are not allowed in autolinks: ```````````````````````````````` example . -

<http://foo.bar/baz bim>

+

<https://foo.bar/baz bim>

```````````````````````````````` @@ -8905,7 +8905,7 @@ These are not autolinks: ```````````````````````````````` example < https://foo.bar > . -

< http://foo.bar >

+

< https://foo.bar >

```````````````````````````````` @@ -8926,7 +8926,7 @@ These are not autolinks: ```````````````````````````````` example https://example.com . -

http://example.com

+

https://example.com

````````````````````````````````