diff --git a/NAMESPACE b/NAMESPACE index e7fc984..c462454 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ importFrom(httr2,req_body_json) importFrom(httr2,req_perform) importFrom(httr2,req_retry) importFrom(httr2,req_url_path_append) +importFrom(httr2,req_user_agent) importFrom(httr2,request) importFrom(httr2,resp_body_json) importFrom(methods,is) diff --git a/R/embed-bedrock.R b/R/embed-bedrock.R index 7890669..a224eb0 100644 --- a/R/embed-bedrock.R +++ b/R/embed-bedrock.R @@ -51,7 +51,8 @@ embed_bedrock <- function(x, model, profile, api_args = list()) { "https://bedrock-runtime.", credentials$region, ".amazonaws.com" - )) + )) |> + req_user_agent(ragnar_user_agent()) req <- httr2::req_url_path_append( req, diff --git a/R/embed-vertex.R b/R/embed-vertex.R index f0702dd..20c0e93 100644 --- a/R/embed-vertex.R +++ b/R/embed-vertex.R @@ -24,7 +24,13 @@ #' ) #' } #'@export -embed_google_vertex <- function(x, model, location, project_id, task_type = "RETRIEVAL_QUERY") { +embed_google_vertex <- function( + x, + model, + location, + project_id, + task_type = "RETRIEVAL_QUERY" +) { if (missing(x) || is.null(x)) { args <- capture_args() fn <- partial(quote(ragnar::embed_google_vertex), alist(x = ), args) @@ -55,6 +61,7 @@ embed_google_vertex <- function(x, model, location, project_id, task_type = "RET base_req <- vertex_url(location, project_id) |> httr2::request() |> + req_user_agent(ragnar_user_agent()) |> httr2::req_headers(!!!credentials, .redact = names(credentials)) |> httr2::req_url_path_append( "models", @@ -65,13 +72,14 @@ embed_google_vertex <- function(x, model, location, project_id, task_type = "RET json$error$message }) - out <- list() # The gemini model does not support batches chunk_size <- if (grepl("gemini", model)) 1 else 20 for (indices in chunk_list(seq_along(x), chunk_size)) { - instances <- lapply(indices, \(i) list(task_type = task_type, content = x[[i]])) + instances <- lapply(indices, \(i) { + list(task_type = task_type, content = x[[i]]) + }) resp <- base_req |> httr2::req_body_json(list( @@ -95,16 +103,19 @@ embed_google_vertex <- function(x, model, location, project_id, task_type = "RET } vertex_url <- function(location, project_id) { - paste(unlist(list( - c("https://", location, "-aiplatform.googleapis.com/v1"), - c("/projects/", project_id), - c("/locations/", location), - "/publishers/google/" - )), collapse="") + paste( + unlist(list( + c("https://", location, "-aiplatform.googleapis.com/v1"), + c("/projects/", project_id), + c("/locations/", location), + "/publishers/google/" + )), + collapse = "" + ) } -google_credentials <- function (error_call = caller_env()) { +google_credentials <- function(error_call = caller_env()) { scope <- "https://www.googleapis.com/auth/cloud-platform" if (has_connect_viewer_token(scope = scope)) { return(function() { @@ -119,7 +130,6 @@ google_credentials <- function (error_call = caller_env()) { check_installed("gargle", "for Google authentication") - gargle::with_cred_funs( funs = list(credentials_app_default = gargle::credentials_app_default), { @@ -133,10 +143,13 @@ google_credentials <- function (error_call = caller_env()) { } if (is.null(token)) { - cli::cli_abort(c( - "No Google credentials are available.", - i = "Try suppling an API key or configuring Google's application default credentials." - ), call = error_call) + cli::cli_abort( + c( + "No Google credentials are available.", + i = "Try suppling an API key or configuring Google's application default credentials." + ), + call = error_call + ) } if (!token$can_refresh()) { @@ -155,7 +168,7 @@ google_credentials <- function (error_call = caller_env()) { }) } -has_connect_viewer_token <- function (...) { +has_connect_viewer_token <- function(...) { if (!is_installed("connectcreds")) { return(FALSE) } diff --git a/R/embed.R b/R/embed.R index 62b9087..b2f9bf4 100644 --- a/R/embed.R +++ b/R/embed.R @@ -66,6 +66,7 @@ embed_ollama <- function( embeddings <- map2(starts, ends, function(start, end) { req <- request(base_url) |> + req_user_agent(ragnar_user_agent()) |> req_url_path_append("/api/embed") |> req_body_json(list(model = model, input = x[start:end])) @@ -90,7 +91,7 @@ embed_openai <- function( base_url = "https://api.openai.com/v1", api_key = get_envvar("OPENAI_API_KEY"), dims = NULL, - user = get_ragnar_username(), + user = get_user(), batch_size = 20L ) { if (missing(x) || is.null(x)) { @@ -138,6 +139,7 @@ embed_openai <- function( data$input <- as.list(text[start:end]) req <- request(base_url) |> + req_user_agent(ragnar_user_agent()) |> req_url_path_append("/embeddings") |> req_auth_bearer_token(api_key) |> req_retry(max_tries = 2L) |> @@ -184,10 +186,25 @@ get_envvar <- function(name, error_call = caller_env()) { val } -get_ragnar_username <- function() { - sprintf("'%s' via ragnar", Sys.info()[["user"]]) +get_user <- function() { + sys_info <- Sys.info() + user <- sys_info[["effective_user"]] + if (user != "unknown") { + return(user) + } + user <- sys_info[["user"]] + if (user != "unknown") { + return(user) + } + NULL +} + +ragnar_user_agent <- function() { + paste0("r-ragnar/", .package_version) } is_testing <- function() { identical(Sys.getenv("TESTTHAT"), "true") } + +.package_version <- c(read.dcf('DESCRIPTION', 'Version')) diff --git a/R/utils.R b/R/utils.R index cc0fce7..a35bf62 100644 --- a/R/utils.R +++ b/R/utils.R @@ -17,7 +17,7 @@ #' vec_fill_missing vec_unique vec_slice vec_c list_unchop new_data_frame #' vec_chop #' @importFrom httr2 request req_url_path_append req_body_json req_perform -#' resp_body_json req_retry req_auth_bearer_token +#' resp_body_json req_retry req_auth_bearer_token req_user_agent #' @importFrom DBI dbExecute dbConnect dbExistsTable dbGetQuery dbQuoteString #' dbWriteTable dbListTables dbReadTable #' @importFrom glue glue glue_data as_glue @@ -49,13 +49,17 @@ map_lgl <- function(.x, .f, ...) vapply(X = .x, FUN = .f, FUN.VALUE = TRUE, ...) map2 <- function(.x, .y, .f, ...) { out <- .mapply(.f, list(.x, .y), list(...)) - if (length(.x) == length(out)) names(out) <- names(.x) + if (length(.x) == length(out)) { + names(out) <- names(.x) + } out } map3 <- function(.x, .y, .z, .f, ...) { out <- .mapply(.f, list(.x, .y, .z), list(...)) - if (length(.x) == length(out)) names(out) <- names(.x) + if (length(.x) == length(out)) { + names(out) <- names(.x) + } out } @@ -84,13 +88,19 @@ imap <- function(.x, .f, ...) { # is_double2(x, c(NA, NA, NA)) # FALSE # is_double2(x, 12) # FALSE is_double2 <- function(x, dim = NULL) { - if (is.null(dim)) return(is_double(x)) + if (is.null(dim)) { + return(is_double(x)) + } - if (!is.double(x)) return(FALSE) + if (!is.double(x)) { + return(FALSE) + } actual_size <- base::dim(x) expected_size <- as.integer(dim) - if (length(actual_size) != length(expected_size)) return(FALSE) + if (length(actual_size) != length(expected_size)) { + return(FALSE) + } all(expected_size == actual_size, na.rm = TRUE) } @@ -136,7 +146,9 @@ partial <- function(.fn, .sig, ...) { reorder_names <- function(..., last = NULL) { x <- unique(c(...)) - if (!is.null(last)) x <- unique(c(x, last), fromLast = TRUE) + if (!is.null(last)) { + x <- unique(c(x, last), fromLast = TRUE) + } x } diff --git a/man/embed_ollama.Rd b/man/embed_ollama.Rd index 57a7d79..f9cff1d 100644 --- a/man/embed_ollama.Rd +++ b/man/embed_ollama.Rd @@ -18,7 +18,7 @@ embed_openai( base_url = "https://api.openai.com/v1", api_key = get_envvar("OPENAI_API_KEY"), dims = NULL, - user = get_ragnar_username(), + user = get_user(), batch_size = 20L ) } diff --git a/man/read_as_markdown.Rd b/man/read_as_markdown.Rd index 70141c8..273f2f8 100644 --- a/man/read_as_markdown.Rd +++ b/man/read_as_markdown.Rd @@ -61,8 +61,8 @@ read_as_markdown("https://r4ds.hadley.nz/base-R.html", canonical = TRUE) |> # browser to better understand an HTML page's structure. # # For comprehensive or advanced usage of CSS selectors, consult: -# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#css-selectors-through-the-cssproperty -# https://facelessuser.github.io/soupsieveselectors/ +# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#css-selectors-through-the-css-property +# https://facelessuser.github.io/soupsieve/selectors/ url <- "https://duckdb.org/code_of_conduct" # Includes the sidebar and other navigational elements