Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/SIL.LCModel/DomainServices/ITextUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,18 @@ public static void ParseText(IStText sttext)
}
}

/// <summary>
/// Determine whether text ends with an EOS character.
/// This is used by the FieldWorks interlinear importer
/// to make sure that segments are well-formed.
/// </summary>
public static bool EndsWithEOS(ITsString text, LcmCache cache)
{
var collector = new SegmentMaker(text, cache.WritingSystemFactory, null);
collector.Run();
return !collector.ExtraSegment;
}

/// <summary>
/// tokenize the paragraph with segments and analyses (wordforms generally, though we try to preserve other existing ones).
/// </summary>
Expand Down Expand Up @@ -1632,6 +1644,7 @@ internal abstract class SegmentBreaker
private int m_csegs;
private int m_prevCh;
private readonly ILgWritingSystemFactory m_wsf;
internal bool ExtraSegment = false;

// The idea here is that certain characters more-or-less mark the end of a segment:
// basically, sentence-terminating characters like period, question-mark, and so forth.
Expand Down Expand Up @@ -1824,7 +1837,13 @@ public void Run()
}
// We reached the end of the loop. Make a segment out of anything left over.
if (ichStartSeg < m_tssText.Length)
{
if (state != SegParseState.FoundEosChar)
{
ExtraSegment = true;
}
CreateSegment(ichStartSeg, m_tssText.Length);
}

}

Expand Down Expand Up @@ -1985,7 +2004,10 @@ internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphPars
protected override void CreateSegment(int ichMin, int ichLim)
{
base.CreateSegment(ichMin, ichLim);
m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim));
if (m_paraParser != null)
{
m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim));
}
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,14 @@ public void CheckValidGuessesAfterInsertNewWord_LT8467()
ValidateGuesses(expectedGuessesAfterEdit, paraGuessed);
}

[Test]
public void EndsWithEOS()
{
Assert.IsFalse(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc", Cache.DefaultVernWs), Cache));
Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.", Cache.DefaultVernWs), Cache));
Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.\"", Cache.DefaultVernWs), Cache));
}

private void ValidateGuesses(IList<IWfiGloss> expectedGuesses, IStTxtPara paraWithGuesses)
{
var segsParaGuesses = paraWithGuesses.SegmentsOS;
Expand Down
Loading