From 1f045b04dff5469e5159c9f8040b748739bc01eb Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Thu, 8 Jan 2026 08:36:44 -0800 Subject: [PATCH 1/2] Fix LT-22303: Add ParagraphParser.EndsWithEOS --- src/SIL.LCModel/DomainServices/ITextUtils.cs | 19 ++++++++++++++++++- .../DomainServices/ParagraphParserTests.cs | 8 ++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/SIL.LCModel/DomainServices/ITextUtils.cs b/src/SIL.LCModel/DomainServices/ITextUtils.cs index 342991e0..4170f86e 100644 --- a/src/SIL.LCModel/DomainServices/ITextUtils.cs +++ b/src/SIL.LCModel/DomainServices/ITextUtils.cs @@ -275,6 +275,13 @@ public static void ParseText(IStText sttext) } } + public static bool EndsWithEOS(ITsString text, LcmCache cache) + { + var collector = new SegmentMaker(text, cache.WritingSystemFactory, null); + collector.Run(); + return !collector.ExtraSegment; + } + /// /// tokenize the paragraph with segments and analyses (wordforms generally, though we try to preserve other existing ones). /// @@ -1632,6 +1639,7 @@ internal abstract class SegmentBreaker private int m_csegs; private int m_prevCh; private readonly ILgWritingSystemFactory m_wsf; + internal bool ExtraSegment = false; // The idea here is that certain characters more-or-less mark the end of a segment: // basically, sentence-terminating characters like period, question-mark, and so forth. @@ -1824,7 +1832,13 @@ public void Run() } // We reached the end of the loop. Make a segment out of anything left over. if (ichStartSeg < m_tssText.Length) + { + if (state != SegParseState.FoundEosChar) + { + ExtraSegment = true; + } CreateSegment(ichStartSeg, m_tssText.Length); + } } @@ -1985,7 +1999,10 @@ internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphPars protected override void CreateSegment(int ichMin, int ichLim) { base.CreateSegment(ichMin, ichLim); - m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim)); + if (m_paraParser != null) + { + m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim)); + } } /// diff --git a/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs b/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs index 64285c49..ecf03591 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs @@ -946,6 +946,14 @@ public void CheckValidGuessesAfterInsertNewWord_LT8467() ValidateGuesses(expectedGuessesAfterEdit, paraGuessed); } + [Test] + public void EndsWithEOS() + { + Assert.IsFalse(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc", Cache.DefaultVernWs), Cache)); + Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.", Cache.DefaultVernWs), Cache)); + Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.\"", Cache.DefaultVernWs), Cache)); + } + private void ValidateGuesses(IList expectedGuesses, IStTxtPara paraWithGuesses) { var segsParaGuesses = paraWithGuesses.SegmentsOS; From 26705f6b2d81b0d5a4beab0179389ab11f79d017 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Thu, 8 Jan 2026 08:56:56 -0800 Subject: [PATCH 2/2] Adds comment as requested by Jason --- src/SIL.LCModel/DomainServices/ITextUtils.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/SIL.LCModel/DomainServices/ITextUtils.cs b/src/SIL.LCModel/DomainServices/ITextUtils.cs index 4170f86e..f0591a49 100644 --- a/src/SIL.LCModel/DomainServices/ITextUtils.cs +++ b/src/SIL.LCModel/DomainServices/ITextUtils.cs @@ -275,6 +275,11 @@ public static void ParseText(IStText sttext) } } + /// + /// Determine whether text ends with an EOS character. + /// This is used by the FieldWorks interlinear importer + /// to make sure that segments are well-formed. + /// public static bool EndsWithEOS(ITsString text, LcmCache cache) { var collector = new SegmentMaker(text, cache.WritingSystemFactory, null);