diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 00c1f719..239978b6 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -512,6 +512,11 @@ private void CollectUpdatableTokens(UsfmParserState state) while (_tokenIndex <= state.Index + state.SpecialTokenCount) { UsfmToken token = state.Tokens[_tokenIndex]; + if (token.Type == UsfmTokenType.Verse) + { + string sanitizedVerseData = SanitizeVerseData(token.Data); + token = new UsfmToken(token.Type, token.Marker, token.Text, token.EndMarker, sanitizedVerseData); + } if (CurrentTextType == ScriptureTextType.Embed) { _embedTokens.Add(token); @@ -746,6 +751,11 @@ private void UpdateVerseRows() } } + private static string SanitizeVerseData(string verseData) + { + return verseData.Replace("\u200F", ""); + } + private class RowInfo { public RowInfo(int rowIndex) diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 77c9f7cc..ac7af2dc 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -906,6 +906,34 @@ public void UpdateBlock_Verse_Range() ); } + [Test] + public void UpdateBlock_Verse_Range_RightToLeftMarker() + { + var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1", "MAT 1:2", "MAT 1:3"), "Update 1-3") }; + string usfm = + @"\id MAT - Test +\c 1 +\v 1‏-3 verse 1 through 3 +"; + TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + string updatedUsfm = UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); + string expectedUsfm = + @"\id MAT - Test +\c 1 +\v 1-3 Update 1-3 +"; + Assert.That(updatedUsfm, Is.EqualTo(expectedUsfm).IgnoreLineEndings()); + Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + + UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; + AssertUpdateBlockEquals( + usfmUpdateBlock, + ["MAT 1:1", "MAT 1:2", "MAT 1:3"], + (UsfmUpdateBlockElementType.Text, "Update 1-3 ", false), + (UsfmUpdateBlockElementType.Text, "verse 1 through 3 ", true) + ); + } + [Test] public void UpdateBlock_Footnote_PreserveEmbeds() { diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index d1321f7c..7fae89bf 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -425,6 +425,40 @@ public void GetRows_PrivateUseMarker() }); } + [Test] + public void GetRows_VerseRangeWithRightToLeftMarker() + { + TextRow[] rows = GetRows( + @"\id MAT - Test +\h +\mt +\c 1 +\v 1‏-2 Verse one and two. +" + ); + + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(2)); + + Assert.That( + rows[0].Ref, + Is.EqualTo(ScriptureRef.Parse("MAT 1:1")), + string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString())) + ); + Assert.That( + rows[0].Text, + Is.EqualTo("Verse one and two."), + string.Join(",", rows.ToList().Select(tr => tr.Text)) + ); + Assert.That( + rows[1].Ref, + Is.EqualTo(ScriptureRef.Parse("MAT 1:2")), + string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString())) + ); + }); + } + private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false) { UsfmMemoryText text =