-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
The vignette on sentence annotation explains how you can use an existing POS annotation with the STTS to generate an annotation of sentences.
This chunk of code explains what you can do to use openNLP. However, it is somewhat slow at the very end. This is why I hesitate to integrate it into the vignette.
library(RcppCWB)
library(NLP)
library(openNLP)
corpus_size <- cl_attribute_size("UNGA", attribute = "word", attribute_type ="p")
cpos <- 0L:(corpus_size - 1L)
ids <- cl_cpos2id("UNGA", p_attribute = "word", cpos = cpos)
word <- cl_id2str("UNGA", p_attribute = "word", id = ids)
whitespace_after <- c(ifelse(word %in% c(".", ",", ":", "!", "?", ";"), FALSE, TRUE)[2L:length(word)], FALSE)
word_with_whitespace <- paste(word, ifelse(whitespace_after, " ", ""), sep = "")
s <- String(paste(word_with_whitespace, collapse = ""))
word_length <- sapply(word, nchar)
left_offset <- c(1L, (cumsum(sapply(word_with_whitespace, nchar)) + 1L)[1L:(length(word) - 1L)] )
right_offset <- left_offset + word_length - 1L
word_annotation <- NLP::Annotation(
id = cpos,
rep.int("word", length(cpos)),
start = left_offset,
end = right_offset
)
sent_token_annotator <- Maxent_Sent_Token_Annotator()
sentence_annotation <- annotate(s, sent_token_annotator)
a <- c(word_annotation, sentence_annotation)
sentences_cpos <- lapply(annotations_in_spans(a[a$type == "word"], a[a$type == "sentence"]), function(a) a$id)
region_matrix <- do.call(rbind, lapply(sentences_cpos, function(cpos) c(cpos[1L], cpos[length(cpos)])))
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels