-
Notifications
You must be signed in to change notification settings - Fork 1
Description
After parsing 142 optically identical documents, get_text_from_boxes throws following error:
Fehler in if (position["top"] > box["top"] && position["top"] < box["bottom"] && :
Fehlender Wert, wo TRUE/FALSE nötig ist
Document can be found online here: https://www.landtag-saar.de/Plenarprotokoll/PlPr15_041.pdf
Used box coordinates:
P$add_box(box = c(top = 105, height = 690, left = 45, width = 250), page = NULL, replace = TRUE)
P$add_box(box = c(top = 105, height = 690, left = 297, width = 250), page = NULL, replace = FALSE)
P$add_box(box = c(top = 86, height = 700, left = 45, width = 250), page = 1, replace = TRUE)
P$add_box(box = c(top = 86, height = 700, left = 293, width = 250), page = 1, replace = FALSE)
Error Traceback:
- FUN(X[[i]], ...)
- lapply(X = X, FUN = FUN, ...)
- sapply(boxes, function(box) {
if (position["top"] > box["top"] && position["top"] < box["bottom"] &&
position["left"] > box["left"] && position["left"] <
box["right"]) { ... - FUN(X[[i]], ...)
- lapply(xml2::xml_find_all(node_returned, xpath = "//text"), function(text_node) {
position <- xml2::xml_attrs(text_node)
position <- setNames(as.integer(position), names(position))
boxed <- sapply(boxes, function(box) { ... - self$drop_unboxed_text_nodes(node = page_node, box_list, copy = TRUE)
.fun(piece, ...)
10. (function (i)
{
piece <- pieces[[i]]
if (.inform) { ...
9. loop_apply(n, do.ply)
8.
llply(.data = pieces, .fun = .fun, ..., .progress = .progress,
.inform = .inform, .parallel = .parallel, .paropts = .paropts)
7. dlply(.data = df, .variables = .(box_id), .fun = function(df2) {
page_node <- xml_new_root(page_nodes[[df2[["page_node"]]]],
.copy = TRUE)
box_list <- lapply(1:nrow(df2), function(j) setNames(as.numeric(df2[j, ...
6. .fun(piece, ...)
5. (function (i)
{
piece <- pieces[[i]]
if (.inform) { ...
4. loop_apply(n, do.ply)
3. llply(.data = pieces, .fun = .fun, ..., .progress = .progress,
.inform = .inform, .parallel = .parallel, .paropts = .paropts)
2. dlply(.data = self$boxes, .variables = .(page_node), .fun = function(df) {
df[["box_id"]] <- 1:nrow(df)
page_no <- unique(df[["page_node"]])
dlply(.data = df, .variables = .(box_id), .fun = function(df2) { ...
- P$get_text_from_boxes(paragraphs = FALSE)