Skip to content

Commit

Permalink
Improve brat writer newline
Browse files Browse the repository at this point in the history
This handle the case when multiple newlines are within a annotation
  • Loading branch information
Nicolas Paris committed Jul 1, 2018
1 parent 742f4ae commit 2db002e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -615,27 +615,32 @@ private void writeRelationAnnotation(BratAnnotationDocument aDoc, FeatureStructu
}
}



private BratTextAnnotation splitNewline(AnnotationFS aFS)
{
String[] textSplit = explodeNewlines(aFS.getCoveredText());
int[] begins = new int[textSplit.length];
int[] ends = new int[textSplit.length];
int pos = aFS.getBegin();
int end = aFS.getBegin();
for (int i = 0; i < textSplit.length; i++) {
end += textSplit[i].length();
begins[i] = pos;
ends[i] = end;
end++;
pos = end;

Pattern p = Pattern.compile("(.+?)(?:\\R|$)+", Pattern.DOTALL);
Matcher m = p.matcher(aFS.getCoveredText());
// counts the number of matches to initialize arrays sized
int i = 0;
while (m.find()) {
i++;
}
// initialize arrays
int[] begins = new int[i];
int[] ends = new int[i];
m = p.matcher(aFS.getCoveredText());
i = 0;
while (m.find()) {
System.out.println(m.group(1));
begins[i] = m.start(1) + aFS.getBegin();
ends[i] = m.end(1) + aFS.getBegin();
i++;
}

return new BratTextAnnotation(nextTextAnnotationId, getBratType(aFS.getType()), begins,
ends, new String[] { aFS.getCoveredText().replaceAll("\\R", " ") });
}

private String[] explodeNewlines(String str) {
return str.split("\\R");
ends, new String[] { aFS.getCoveredText().replaceAll("\\R+", " ") });
}

private void writeTextAnnotation(BratAnnotationDocument aDoc, AnnotationFS aFS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public void testWithLongNames() throws Exception
}

@Test
public void testBratWithNewlines() throws Exception
public void testBratWithDiscontinuousFragmentNear() throws Exception
{
testRoundTrip(createReaderDescription(BratReader.class,
BratReader.PARAM_TEXT_ANNOTATION_TYPE_MAPPINGS,
Expand All @@ -145,7 +145,7 @@ public void testBratWithNewlines() throws Exception
}

@Test
public void testBratWithDiscontinousTwo() throws Exception
public void testBratWithDiscontinuousFragmentFar() throws Exception
{
testOneWay(createReaderDescription(BratReader.class,
BratReader.PARAM_TEXT_ANNOTATION_TYPE_MAPPINGS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,12 @@ public void parseTestZeroLength()
}

@Test
public void parseTestDiscontinous1()
public void parseTestDiscontinousMergeFragments()
{
final String in = "T1\tOrganization 0 13;14 43\tInternational Business Machines Corporation";
final String out = "T1\tOrganization 0 43\tInternational Business Machines Corporation";
BratTextAnnotation v = BratTextAnnotation.parse(in);
assertEquals(out, v.toString());
}

@Test
public void parseTestDiscontinous2()
{
final String in = "T1\tOrganization 0 13;15 43\tInternational Business Machines Corporation";
final String out = "T1\tOrganization 0 13;15 43\tInternational";
BratTextAnnotation v = BratTextAnnotation.parse(in);
System.out.println(v);
assertEquals(out, v.toString());
}

}

0 comments on commit 2db002e

Please sign in to comment.