-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Description
The germaparl_encode_lda_topics() returns an error because the decode() function of polmineR is deprecated. I found a solution by using the s_attribute_decode() function from RcppCWB:
germaparl_encode_lda_topics() <-
function (k = 450, n = 5) {
germaparl_data_dir <- registry_file_parse(corpus = "GERMAPARL",
registry_dir = germaparl_regdir())[["home"]]
corpus_charset <- registry_file_parse(corpus = "GERMAPARL")[["properties"]][["charset"]]
model <- germaparl_load_topicmodel(k = 250)
message("... getting topic matrix")
topic_matrix <- topicmodels::topics(model, k = 5)
topic_dt <- data.table(speech = colnames(topic_matrix), topics = apply(topic_matrix,
2, function(x) sprintf("|%s|", paste(x, collapse = "|"))),
key = "speech")
message("... decoding s-attribute speech")
if (!"speech" %in% s_attributes("GERMAPARL")) {
stop("The s-attributes 'speech' is not yet present.",
"Use the function germaparl_add_s_attribute_speech to generate it.")
}
cpos_dt <- as.data.table(RcppCWB::s_attribute_decode("GERMAPARL", data_dir = germaparl_data_dir, s_attribute = "speech")) # Returns a data frame but setkeyv requires a data.table - converting
names(cpos_dt)[names(cpos_dt) == "value"] <- "speech" # With the new function, speech gets renamed to "value" - changing it back to speech
setkeyv(cpos_dt, "speech")
cpos_dt2 <- topic_dt[cpos_dt]
setorderv(cpos_dt2, cols = "cpos_left", order = 1L)
cpos_dt2[["speech"]] <- NULL
cpos_dt2[["id"]] <- NULL
cpos_dt2[, `:=`(topics, ifelse(is.na(topics), "||", topics))]
setcolorder(cpos_dt2, c("cpos_left", "cpos_right", "topics"))
message("... running some sanity checks")
coverage <- sum(cpos_dt2[["cpos_right"]] - cpos_dt2[["cpos_left"]]) +
nrow(cpos_dt2)
if (coverage != size("GERMAPARL"))
stop()
P <- partition("GERMAPARL", speech = ".*", regex = TRUE)
if (sum(cpos_dt2[["cpos_left"]] - P@cpos[, 1]) != 0)
stop()
if (sum(cpos_dt2[["cpos_right"]] - P@cpos[, 2]) != 0)
stop()
if (length(sAttributes("GERMAPARL", "speech", unique = FALSE)) !=
nrow(cpos_dt2))
stop()
message("... encoding s-attribute 'topics'")
s_attribute_encode(values = cpos_dt2[["topics"]], data_dir = germaparl_data_dir,
s_attribute = "topics", corpus = "GERMAPARL", region_matrix = as.matrix(cpos_dt2[,
c("cpos_left", "cpos_right")]), registry_dir = germaparl_regdir(),
encoding = corpus_charset, method = "R", verbose = TRUE)
}
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels