Skip to content

Commit

Permalink
feat: transliteration functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomeriko96 committed Jan 12, 2024
1 parent a588f97 commit d0c0a88
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 0 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ RoxygenNote: 7.2.3
Imports:
dplyr,
httr,
jsonlite,
magrittr,
purrr,
RCurl,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

export("%>%")
export(create_translation_table)
export(create_transliteration_table)
export(google_get_supported_languages)
export(google_is_valid_language_code)
export(google_translate)
export(google_transliterate)
export(language_detect)
export(linguee_external_sources)
export(linguee_translation_examples)
Expand Down
31 changes: 31 additions & 0 deletions R/create_transliteration_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#' Create a Transliteration Table
#'
#' This function generates a transliteration table by transliterating a list of words into multiple languages.
#'
#' @param words A character vector containing the words to be transliterated.
#' @param languages A character vector specifying the target languages for transliteration.
#' @return A data frame representing the transliteration table with original words and transliterations in each language.
#'
#' @importFrom rlang :=
#' @export
#'
#' @examples
#' words <- c("Hello world", "Goodbye", "Thank you", "Please")
#' languages <- c("ar", "he", "el", "ru", "fa")
#' transliterations <- create_transliteration_table(words, languages)
#' print(transliterations)
create_transliteration_table <- function(words, languages) {
original_word <- NULL
transliterations <- data.frame(original_word = words)

for (language in languages) {
column_name <- language
transliterations <- transliterations %>%
dplyr::mutate("{column_name}" := purrr::map_chr(
original_word,
~ google_transliterate(., language, num=1)
))
}

return(transliterations)
}
66 changes: 66 additions & 0 deletions R/google_transliterate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#' Transliterate a single word or a sentence to the required language.
#'
#' @param text The word or sentence to transliterate from Latin/Roman (English) script.
#' @param language_tag The target language's ISO639 code. The default value for this argument is "el" for Greek.
#' @param num The maximum number of suggestions to fetch. The default value for this argument is 5.
#'
#' @return Character vector of transliterated sentences or larger pieces of text.
#' @export
#'
#' @examples
#' \dontrun{
#' google_transliterate("Hello world", "fr", 10)
#' google_transliterate("hello", "el", 10)
#' }
google_transliterate <- function(text, language_tag = "el", num = 5) {
# Check if language code is valid
if (!google_is_valid_language_code(language_tag)) {
stop("Invalid language code.")
}

# Check if the input is a single word or a sentence
if (grepl("\\s+", text)) {
# It's a sentence
# Split the text into words
words <- strsplit(text, " ")[[1]]

# Transliterate each word
transliterated_words <- lapply(words, function(word) {
# Define the API URL
api_url <- paste0('https://inputtools.google.com/request?text=', word, '&itc=', language_tag, '-t-i0-und&num=', num, '&cp=0&cs=1&ie=utf-8&oe=utf-8&app=test')

# Send the GET request
response <- httr::GET(api_url)

# Parse the response content as JSON
content <- httr::content(response, "text")
json_content <- jsonlite::fromJSON(content, simplifyVector = TRUE)

# Extract the transliterated outputs
transliterations <- json_content[[2]][[1]][[2]]

return(transliterations)
})

# Merge the transliterated words into a single string for each suggestion
merged_transliterated_words <- do.call(mapply, c(function(...) paste(..., sep = " "), transliterated_words, SIMPLIFY = TRUE, USE.NAMES = FALSE))

return(merged_transliterated_words)
} else {
# It's a single word
# Define the API URL
api_url <- paste0('https://inputtools.google.com/request?text=', text, '&itc=', language_tag, '-t-i0-und&num=', num, '&cp=0&cs=1&ie=utf-8&oe=utf-8&app=test')

# Send the GET request
response <- httr::GET(api_url)

# Parse the response content as JSON
content <- httr::content(response, "text")
json_content <- jsonlite::fromJSON(content, simplifyVector = TRUE)

# Extract the transliterated outputs
transliterations <- json_content[[2]][[1]][[2]]

return(transliterations)
}
}
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ reference:
contents:
- batch_translate
- create_translation_table
- create_transliteration_table
- google_get_supported_languages
- google_is_valid_language_code
- google_supported_languages
- google_translate
- google_transliterate
- language_detect
- translate_file

Expand Down
25 changes: 25 additions & 0 deletions man/create_transliteration_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/google_transliterate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d0c0a88

Please sign in to comment.