Skip to content

Commit 17250ff

Browse files
committed
fix sentence segmentation for table notes
1 parent 7680864 commit 17250ff

File tree

1 file changed

+14
-6
lines changed
  • grobid-core/src/main/java/org/grobid/core/data

1 file changed

+14
-6
lines changed

grobid-core/src/main/java/org/grobid/core/data/Table.java

+14-6
Original file line numberDiff line numberDiff line change
@@ -227,17 +227,25 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form
227227
}
228228
p.appendChild(textNode(clusterContent));
229229
}
230-
231-
if (config.isWithSentenceSegmentation()) {
232-
// we need a sentence segmentation of the figure caption
233-
formatter.segmentIntoSentences(noteNode, this.noteLayoutTokens, config, doc.getLanguage(), doc.getPDFAnnotations());
234-
}
235230
}
236231
if (p.getChildCount() > 0) {
237232
noteNode.appendChild(p);
238233
}
234+
if (config.isWithSentenceSegmentation()) {
235+
// we need a sentence segmentation of the figure caption
236+
formatter.segmentIntoSentences(p, this.noteLayoutTokens, config, doc.getLanguage(), doc.getPDFAnnotations());
237+
}
239238
} else {
240-
noteNode = XmlBuilderUtils.teiElement("note", LayoutTokensUtil.normalizeText(note.toString()).trim());
239+
Element p = teiElement("p");
240+
p.appendChild(LayoutTokensUtil.normalizeText(note.toString()).trim());
241+
242+
if (config.isWithSentenceSegmentation()) {
243+
// we need a sentence segmentation of the figure caption
244+
formatter.segmentIntoSentences(p, this.noteLayoutTokens, config, doc.getLanguage(), doc.getPDFAnnotations());
245+
}
246+
247+
noteNode = XmlBuilderUtils.teiElement("note");
248+
noteNode.appendChild(p);
241249
}
242250

243251
String coords = null;

0 commit comments

Comments
 (0)