From da0b844c22a11f9bd99eb41b3be55c5346919456 Mon Sep 17 00:00:00 2001
From: Bernhard Meindl <bernhard.meindl@statistik.gv.at>
Date: Thu, 22 Jan 2026 10:26:03 +0100
Subject: [PATCH] Setup Pkgdown; Standardize exported function names

---
 .Rbuildignore                        |  3 ++
 .github/workflows/check.yaml         |  2 +
 .github/workflows/pkgdown.yaml       | 35 ++++++++++++++
 .gitignore                           |  1 +
 DESCRIPTION                          |  2 +
 NAMESPACE                            |  8 ++--
 NEWS.md                              |  4 +-
 R/UrlScraper.R                       | 10 ++--
 R/UrlScraper_utils_html.R            | 72 ++++++++++++++--------------
 R/UrlScraper_utils_scrape.R          | 12 ++---
 R/parameter_manager.R                | 28 +++++------
 R/search_url.R                       | 24 +++++-----
 README.md                            |  8 ++--
 _pkgdown.yml                         | 47 ++++++++++++++++++
 tests/testthat/test-scraper-config.R | 12 ++---
 vignettes/Config.Rmd                 | 12 ++---
 vignettes/Intro.Rmd                  | 14 +++---
 17 files changed, 192 insertions(+), 102 deletions(-)
 create mode 100644 .github/workflows/pkgdown.yaml
 create mode 100644 _pkgdown.yml

diff --git a/.Rbuildignore b/.Rbuildignore
index a5a2e58..2b79998 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -4,3 +4,6 @@
 ^doc$
 ^Meta$
 ^\.github$
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml
index e5e346f..9aa1e1e 100644
--- a/.github/workflows/check.yaml
+++ b/.github/workflows/check.yaml
@@ -40,3 +40,5 @@ jobs:
         with:
           error-on: '"error"'
           check-dir: '"check"'
+        env:
+          _R_CHECK_VIGNETTES_CHECK_TITLE_: FALSE
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
new file mode 100644
index 0000000..3d63e9e
--- /dev/null
+++ b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,35 @@
+name: pkgdown
+
+on:
+  workflow_run:
+    workflows: ["R-CMD-check"]
+    types:
+      - completed
+  workflow_dispatch:
+
+jobs:
+  pkgdown:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+
+      - name: Build and Deploy Site
+        run: |
+          git config --local user.name "$GITHUB_ACTOR"
+          git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
+          Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
diff --git a/.gitignore b/.gitignore
index 8bf50dd..fc742c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ man
 debug
 /doc/
 /Meta/
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
index 87b32e7..e54d5ed 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -45,5 +45,7 @@ Suggests:
     yaml,
     devtools
 Roxygen: list(markdown = TRUE)
+URL: https://statistikat.github.io/taRantula, https://github.com/statistikat/taRantula
+BugReports: https://github.com/statistikat/taRantula/issues
 VignetteBuilder: knitr
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 65d0db3..de42aa9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,10 +5,10 @@ export(buildQuery)
 export(cfg_googlesearch)
 export(cfg_scraper)
 export(check_links)
-export(extract_links)
-export(get_google_creds)
-export(params_googlesearch)
-export(params_scraper)
+export(extractLinks)
+export(getGoogleCreds)
+export(paramsGoogleSearch)
+export(paramsScraper)
 export(runGoogleSearch)
 export(searchURL)
 import(data.table)
diff --git a/NEWS.md b/NEWS.md
index 01806dd..6f79d47 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,8 +9,8 @@
 
 ### Configuration (`params_manager`)
 * **R6-based Config System**: Introduced a robust, hierarchical configuration system with strict validation logic.
-    * `params_scraper()`: Dedicated configuration for generic web crawling and JS rendering.
-    * `params_googlesearch()`: Tailored configuration for Google Search API interactions including rate-limit management.
+    * `paramsScraper()`: Dedicated configuration for generic web crawling and JS rendering.
+    * `paramsGoogleSearch()`: Tailored configuration for Google Search API interactions including rate-limit management.
 * **Deep Merging**: Configuration methods now support nested path updates (e.g., `cfg$set("selenium$host", ...)`). 
 * **Validation**: Built-in defensive programming with type-checking for integers, booleans, character vectors, and directory paths.
 * **Export/Import functionality**: Added `$export()` and `$write_defaults()` methods to support YAML-based configuration round-trips.
diff --git a/R/UrlScraper.R b/R/UrlScraper.R
index a92e2ef..d4731e4 100644
--- a/R/UrlScraper.R
+++ b/R/UrlScraper.R
@@ -20,7 +20,7 @@
 #' * Regex‑based extraction of text from previously scraped HTML
 #'
 #' @section Configuration:
-#' A configuration object (typically created via `params_scraper()`) is
+#' A configuration object (typically created via [paramsScraper]) is
 #' expected to contain at least the following entries:
 #'
 #' * `db_file` – path to the DuckDB database file
@@ -41,7 +41,7 @@
 #'   - `robots_user_agent` – user agent string used for robots queries
 #' * `exclude_social_links` – logical, whether to exclude social media links
 #'
-#' The exact structure depends on `params_scraper()` and related helpers.
+#' The exact structure depends on [paramsScraper] and related helpers.
 #'
 #' @section Methods:
 #' * `initialize(config)` – create a new `UrlScraper` instance
@@ -64,7 +64,7 @@
 #' @examples
 #' \dontrun{
 #' # Create a default configuration object
-#' cfg <- params_scraper()
+#' cfg <- paramsScraper()
 #'
 #' # Example Selenium settings
 #' cfg$set("selenium$host", "localhost")
@@ -115,7 +115,7 @@ UrlScraper <- R6::R6Class(
     #' if present, and configures progress handlers.
     #'
     #' @param config A list (or configuration object) of settings, typically
-    #'   created by `params_scraper()`. It should include:
+    #'   created by [paramsScraper()]. It should include:
     #'   * `db_file` – path to the DuckDB database file.
     #'   * `snapshot_dir` – directory for snapshot files.
     #'   * `progress_dir` – directory for progress/log files.
@@ -320,7 +320,7 @@ UrlScraper <- R6::R6Class(
             ".write_snapshot",
             ".worker_scrape",
             ".scrape_single_url",
-            "extract_links",
+            "extractLinks",
             "check_links",
             "check_robotsdata",
             "query_robotsdata",
diff --git a/R/UrlScraper_utils_html.R b/R/UrlScraper_utils_html.R
index 6e4835a..0007223 100644
--- a/R/UrlScraper_utils_html.R
+++ b/R/UrlScraper_utils_html.R
@@ -2,22 +2,22 @@
 #'
 #' @description
 #' Extracts all valid hyperlinks from an HTML document and returns them as a
-#' cleaned and normalized `data.table`.  
+#' cleaned and normalized `data.table`.
 #' The function parses `<a>`, `<area>`, `<base>`, and `<link>` elements,
 #' resolves relative URLs, removes invalid or unwanted links, and enriches the
 #' output with metadata such as the source URL, extraction level, and timestamp.
 #'
 #' @details
 #' This extractor is designed for web‑scraping pipelines where only meaningful,
-#' navigable hyperlinks are desired.  
+#' navigable hyperlinks are desired.
 #' The function:
 #'
-#' * Converts inputs to an XML document when necessary  
-#' * Extracts link text and normalizes whitespace  
-#' * Resolves relative URLs against the provided `baseurl`  
-#' * Forces all URLs to use `https://`  
-#' * Removes invalid links using [`check_links()`]  
-#' * Ensures uniqueness of extracted links  
+#' * Converts inputs to an XML document when necessary
+#' * Extracts link text and normalizes whitespace
+#' * Resolves relative URLs against the provided `baseurl`
+#' * Forces all URLs to use `https://`
+#' * Removes invalid links using [`check_links()`]
+#' * Ensures uniqueness of extracted links
 #'
 #' @param doc A character string containing HTML or an `xml_document` object.
 #' @param baseurl Character string representing the URL from which the document
@@ -25,11 +25,11 @@
 #'
 #' @return
 #' A `data.table` containing the following columns:
-#' * `href` – Cleaned and validated absolute URLs  
-#' * `label` – Link text extracted from the anchor element  
-#' * `source_url` – The originating page from which links were extracted  
-#' * `level` – Extraction depth (always 0 for this function)  
-#' * `scraped_at` – Timestamp of extraction  
+#' * `href` – Cleaned and validated absolute URLs
+#' * `label` – Link text extracted from the anchor element
+#' * `source_url` – The originating page from which links were extracted
+#' * `level` – Extraction depth (always 0 for this function)
+#' * `scraped_at` – Timestamp of extraction
 #'
 #' Duplicate URLs are automatically removed.
 #'
@@ -37,8 +37,8 @@
 #'
 #' @examples
 #' html <- "<html><body><a href='/about'>About</a></body></html>"
-#' extract_links(html, baseurl = "https://example.com")
-extract_links <- function(doc, baseurl) {
+#' extractLinks(html, baseurl = "https://example.com")
+extractLinks <- function(doc, baseurl) {
   href <- NULL
   if (!inherits(doc, "xml_document")) {
     doc <- rvest::read_html(doc)
@@ -91,13 +91,13 @@ extract_links <- function(doc, baseurl) {
 #'
 #' @description
 #' Evaluates extracted URLs and determines which of them should be retained
-#' for further processing.  
+#' for further processing.
 #' The function filters out links that:
 #'
-#' * Do not belong to the same domain as `baseurl`  
-#' * Point to files such as images, audio, video, archives, executables, etc.  
-#' * Refer to fragments or anchor points  
-#' * Refer back to the same path as the main page  
+#' * Do not belong to the same domain as `baseurl`
+#' * Point to files such as images, audio, video, archives, executables, etc.
+#' * Refer to fragments or anchor points
+#' * Refer back to the same path as the main page
 #'
 #' @param hrefs Character vector of URLs to check.
 #' @param baseurl Character string giving the original page URL for domain and
@@ -190,7 +190,7 @@ check_links <- function(hrefs, baseurl) {
 #'
 #' @description
 #' Extracts the domain portion of URLs and optionally includes the scheme
-#' (`http://` or `https://`).  
+#' (`http://` or `https://`).
 #' The function removes common subdomains such as `www.` for consistency.
 #'
 #' @param x Character vector of URLs.
@@ -226,16 +226,16 @@ get_domain <- function(x, include_scheme = FALSE) {
 #'
 #' @description
 #' Converts an HTML document into a cleaned representation where scripts,
-#' styles, and similar elements are removed.  
+#' styles, and similar elements are removed.
 #' If `keep_only_text = TRUE`, the function returns only the visible text of
 #' the page.
 #'
 #' @details
 #' This helper is used to prepare HTML content for downstream text extraction.
 #' It:
-#' * Removes `<script>`, `<style>`, and `<noscript>` nodes  
-#' * Optionally extracts only visible text  
-#' * Supports both raw HTML input and already parsed XML documents  
+#' * Removes `<script>`, `<style>`, and `<noscript>` nodes
+#' * Optionally extracts only visible text
+#' * Supports both raw HTML input and already parsed XML documents
 #'
 #' @param doc Either HTML content as a character string or an
 #'   `xml_document`. `NA` inputs are returned unchanged.
@@ -291,37 +291,37 @@ parse_HTML <- function(doc, keep_only_text = FALSE) {
 #'
 #' @description
 #' Applies a regular expression to previously scraped HTML documents, optionally
-#' restricted to a specific capture group.  
+#' restricted to a specific capture group.
 #' Each document is first cleaned using [`parse_HTML()`] to remove non‑text
 #' content, ensuring reliable pattern extraction.
 #'
 #' @details
 #' The function:
 #'
-#' * Cleans and normalizes each HTML document  
-#' * Converts text to lowercase when `ignore_cases = TRUE`  
-#' * Extracts all regex matches using `stringr::str_match_all()`  
-#' * Supports named or numbered capture groups  
-#' * Returns a unified `data.table` indexed by URL  
+#' * Cleans and normalizes each HTML document
+#' * Converts text to lowercase when `ignore_cases = TRUE`
+#' * Extracts all regex matches using `stringr::str_match_all()`
+#' * Supports named or numbered capture groups
+#' * Returns a unified `data.table` indexed by URL
 #'
 #' Named groups allow meaningful column labeling in the result.
 #'
 #' @param docs Character vector or list of HTML source documents.
-#' @param urls Character vector of URLs corresponding to `docs`.  
+#' @param urls Character vector of URLs corresponding to `docs`.
 #' @param pattern A regular expression to search for.
-#' @param group Optional capture group name or index to extract.  
+#' @param group Optional capture group name or index to extract.
 #'   If `NULL`, the full match is returned.
 #' @param ignore_cases Logical; if `TRUE`, performs case‑insensitive matching.
 #'
 #' @return
 #' A `data.table` where each row corresponds to a match and includes:
-#' * `url` – The originating document URL  
-#' * `pattern` (or the given group name) – Extracted values  
+#' * `url` – The originating document URL
+#' * `pattern` (or the given group name) – Extracted values
 #'
 #' Missing matches are returned as `NA_character_`.
 #'
 #' @keywords internal
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' ## Extract email-like patterns:
diff --git a/R/UrlScraper_utils_scrape.R b/R/UrlScraper_utils_scrape.R
index 1b35d98..4bce1b8 100644
--- a/R/UrlScraper_utils_scrape.R
+++ b/R/UrlScraper_utils_scrape.R
@@ -24,11 +24,11 @@
 #'   }
 #'
 #' @details
-#' The function first checks robots.txt rules using `check_robotsdata()`.  
-#' If scraping is disallowed, a standardized record is returned.  
+#' The function first checks robots.txt rules using `check_robotsdata()`.
+#' If scraping is disallowed, a standardized record is returned.
 #' When using Selenium, the browser is navigated to the URL and the potentially
 #' redirected final URL is captured. For non-Selenium inputs, an HTTP GET request
-#' is performed.  
+#' is performed.
 #' Errors during scraping are caught and converted into structured output.
 #'
 #'
@@ -84,7 +84,7 @@
         url_redirect <- NA_character_
       }
 
-      dt_links <- extract_links(
+      dt_links <- extractLinks(
         doc = html_source,
         baseurl = url
       )
@@ -142,8 +142,8 @@
 #' @details
 #' The function iterates over provided URLs, invoking `.scrape_single_url()` for each.
 #' Progress is logged to file, and optional snapshot files store intermediate results to
-#' safeguard against worker interruptions.  
-#' When the stop file is detected, the worker terminates early.  
+#' safeguard against worker interruptions.
+#' When the stop file is detected, the worker terminates early.
 #' Any remaining un-snapshotted results are written at the end of execution.
 #'
 #' @examples
diff --git a/R/parameter_manager.R b/R/parameter_manager.R
index 8f3d76d..503f498 100644
--- a/R/parameter_manager.R
+++ b/R/parameter_manager.R
@@ -423,7 +423,7 @@ params_manager <- R6::R6Class(
 #' @docType class
 #' @keywords internal
 #' @format An `R6::R6Class` generator object.
-#' @rdname params_googlesearch
+#' @rdname paramsGoogleSearch
 #' @export
 cfg_googlesearch <- R6::R6Class(
   classname = "cfg_googlesearch",
@@ -465,7 +465,7 @@ cfg_googlesearch <- R6::R6Class(
     #'   * `credentials`: A named list containing Google API credentials. Use
     #'     `"key"` or `"SCRAPING_APIKEY_GOOGLE"` for the API Key, and `"engine"`
     #'     or `"SCRAPING_ENGINE_GOOGLE"` for the Search Engine ID. If omitted,
-    #'     environment variables are used. See also [get_google_creds()].
+    #'     environment variables are used. See also [getGoogleCreds()].
     #'
     #' @param path Path to the directory where project data are stored.
     #'   Overrides the `path` setting in `config_file`.
@@ -537,7 +537,7 @@ cfg_googlesearch <- R6::R6Class(
         max_query_rate = 100,
         file = NULL,
         overwrite = FALSE,
-        credentials = get_google_creds()
+        credentials = getGoogleCreds()
       )
     }
   ),
@@ -597,15 +597,15 @@ cfg_googlesearch <- R6::R6Class(
 #'
 #' @return A `cfg_googlesearch` object.
 #' @export
-#' @rdname params_googlesearch
+#' @rdname paramsGoogleSearch
 #' @examples
 #' # Create with defaults
 #' # in this case, Environment-Variables `SCRAPING_APIKEY_GOOGLE` and `SCRAPING_ENGINE_GOOGLE`
 #' # need to be set beforehand
-#' cfg <- params_googlesearch()
+#' cfg <- paramsGoogleSearch()
 #'
 #' # Create with overrides
-#' cfg <- params_googlesearch(
+#' cfg <- paramsGoogleSearch(
 #'   path = getwd(),
 #'   credentials = list(
 #'     key = "my_google_apikey",
@@ -622,7 +622,7 @@ cfg_googlesearch <- R6::R6Class(
 #' cfg$export(f)
 #'
 #' # Load from exported config-file and override
-#' cfg <- params_googlesearch(config_file = f, verbose = TRUE)
+#' cfg <- paramsGoogleSearch(config_file = f, verbose = TRUE)
 #' try(file.remove(f))
 #'
 #' # Return the current configuration
@@ -634,7 +634,7 @@ cfg_googlesearch <- R6::R6Class(
 #' # Update the configuration
 #' cfg$set("max_query_rate", 200)
 #' cfg$get("max_query_rate")
-params_googlesearch <- function(config_file = NULL, path = tempdir(), ...) {
+paramsGoogleSearch <- function(config_file = NULL, path = tempdir(), ...) {
   cfg_googlesearch$new(config_file = config_file, path = path, ...)
 }
 
@@ -670,7 +670,7 @@ params_googlesearch <- function(config_file = NULL, path = tempdir(), ...) {
 #' @docType class
 #' @keywords internal
 #' @format An `R6::R6Class` generator object.
-#' @rdname params_scraper
+#' @rdname paramsScraper
 #' @export
 cfg_scraper <- R6::R6Class(
   classname = "cfg_scraper",
@@ -933,21 +933,21 @@ cfg_scraper <- R6::R6Class(
 #' @return A [`cfg_scraper`] object.
 #'
 #' @export
-#' @rdname params_scraper
+#' @rdname paramsScraper
 #'
 #' @examples
 #' # Create with defaults
-#' cfg <- params_scraper()
+#' cfg <- paramsScraper()
 #'
 #' # Create with overrides
-#' cfg <- params_scraper(base_dir = tempdir(), project = "my-project")
+#' cfg <- paramsScraper(base_dir = tempdir(), project = "my-project")
 #'
 #' # Write current configuration to file
 #' f <- tempfile(fileext = ".yaml")
 #' cfg$export(f)
 #'
 #' # Load from exported config-file and override
-#' cfg <- params_scraper(config_file = f, project = "some-other-proj")
+#' cfg <- paramsScraper(config_file = f, project = "some-other-proj")
 #' try(file.remove(f))
 #'
 #' # Return the current configuration
@@ -961,6 +961,6 @@ cfg_scraper <- R6::R6Class(
 #' # Update the configuration
 #' cfg$set(c("selenium", "port"), 4445)
 #' cfg$set("selenium$host", "127.0.0.1")
-params_scraper <- function(config_file = NULL, base_dir = getwd(), ...) {
+paramsScraper <- function(config_file = NULL, base_dir = getwd(), ...) {
   cfg_scraper$new(config_file = config_file, base_dir = base_dir, ...)
 }
diff --git a/R/search_url.R b/R/search_url.R
index 8d8e1b0..ebfc535 100644
--- a/R/search_url.R
+++ b/R/search_url.R
@@ -8,7 +8,7 @@
 #' @details
 #' To use this function, you must provide a valid Google Custom Search API key
 #' via the environment variable `SCRAPING_APIKEY_GOOGLE`, for example by adding
-#' it to your `~/.Renviron` file.  
+#' it to your `~/.Renviron` file.
 #' In addition, the Custom Search Engine (CSE) identifier must be available via
 #' the environment variable `SCRAPING_ENGINE_GOOGLE`.
 #'
@@ -24,9 +24,9 @@
 #'
 #' @return
 #' A `data.table` containing the following columns:
-#' * `idcol` – Identifier for the business  
-#' * `attributes` – Fields extracted from Google results (e.g., link, title)  
-#' * `position` – Search result ranking position  
+#' * `idcol` – Identifier for the business
+#' * `attributes` – Fields extracted from Google results (e.g., link, title)
+#' * `position` – Search result ranking position
 #'
 #' @export
 #'
@@ -291,7 +291,7 @@ buildQuery <- function(dat, selectCols = NULL) {
 #'   `cfg$query_col`. All referenced columns must exist in `dat`.
 #'
 #' @returns
-#' Returns `TRUE` invisibly when all queries have completed successfully.  
+#' Returns `TRUE` invisibly when all queries have completed successfully.
 #' Result files are written to the directory specified in `cfg`.
 #'
 #' @export
@@ -360,21 +360,21 @@ runGoogleSearch <- function(cfg = cfg_googlesearch$new(), dat) {
 #' @title Retrieve Google Search Credentials
 #'
 #' @description
-#' Reads Google Custom Search API credentials from environment variables.  
+#' Reads Google Custom Search API credentials from environment variables.
 #' This allows secure decoupling of API keys from code.
 #'
 #' @details
 #' The following environment variables must be defined:
-#' * `SCRAPING_APIKEY_GOOGLE` – Your Google Custom Search API key  
-#' * `SCRAPING_ENGINE_GOOGLE` – The Custom Search Engine (CSE) identifier  
+#' * `SCRAPING_APIKEY_GOOGLE` – Your Google Custom Search API key
+#' * `SCRAPING_ENGINE_GOOGLE` – The Custom Search Engine (CSE) identifier
 #'
 #' These values can be defined inside `~/.Renviron` or set at runtime using
 #' `Sys.setenv()`.
 #'
 #' @return
 #' A named list with elements:
-#' * `engine` – The Google Custom Search Engine ID  
-#' * `key` – The API key string  
+#' * `engine` – The Google Custom Search Engine ID
+#' * `key` – The API key string
 #'
 #' @export
 #'
@@ -382,9 +382,9 @@ runGoogleSearch <- function(cfg = cfg_googlesearch$new(), dat) {
 #' ## Example:
 #' Sys.setenv(SCRAPING_APIKEY_GOOGLE = "your_key")
 #' Sys.setenv(SCRAPING_ENGINE_GOOGLE = "your_engine")
-#' creds <- get_google_creds()
+#' creds <- getGoogleCreds()
 #' print(creds)
-get_google_creds <- function() {
+getGoogleCreds <- function() {
   envname <- "SCRAPING_APIKEY_GOOGLE"
   key <- Sys.getenv(envname, unset = NA)
   if (is.na(key)) {
diff --git a/README.md b/README.md
index c1f12dc..4e1cec9 100644
--- a/README.md
+++ b/README.md
@@ -18,8 +18,8 @@
 
 The package uses a robust, `R6`-based configuration system with strict type validation:
 
-* **`params_scraper()`**: General web crawling and JS rendering settings.
-* **`params_googlesearch()`**: Specialized config for Google Search API and rate-limit handling.
+* **`paramsScraper()`**: General web crawling and JS rendering settings.
+* **`paramsGoogleSearch()`**: Specialized config for Google Search API and rate-limit handling.
 * **YAML Support**: Easily export or import configurations for reproducible scraping pipelines.
 
 ## Compliance and Safety
@@ -41,13 +41,13 @@ remotes::install_github("statistikat/taRantula")
 
 Below is a basic example of how to initialize a scraping job using the Selenium engine and DuckDB storage. 
 
-For advanced users looking to run this in a containerized environment, please refer to the **[Intro Vignette: Docker-based Selenium Setup](vignettes/Intro.Rmd)**.
+For advanced users looking to run this in a containerized environment, please refer to the **[Intro Vignette: Docker-based Selenium Setup](https://statistikat.github.io/taRantula/articles/Intro.html)**.
 
 ```r
 library(taRantula)
 
 # 1. Setup Configuration
-cfg <- params_scraper()
+cfg <- paramsScraper()
 cfg$set("selenium$host", "localhost")
 cfg$set("selenium$port", 4444L)
 cfg$set("storage$path", "scraping_results.duckdb")
diff --git a/_pkgdown.yml b/_pkgdown.yml
new file mode 100644
index 0000000..d4b39f1
--- /dev/null
+++ b/_pkgdown.yml
@@ -0,0 +1,47 @@
+url: https://statistikat.github.io/taRantula/
+template:
+  bootstrap: 5
+  bootswatch: flatly
+
+navbar:
+  structure:
+    left:  [intro, reference, articles, news]
+    right: [github]
+  components:
+    intro:
+      text: Get Started
+      href: articles/Intro.html
+
+# Organize the Reference Page
+reference:
+  - title: "Scraping"
+    desc: "The main engines for web data extraction."
+    contents:
+      - paramsScraper
+      - UrlScraper
+      - extractLinks
+
+  - title: "Searching"
+    desc: "Defining parameters for Scraping/Search Jobs"
+    contents:
+      - paramsGoogleSearch
+      - buildQuery
+      - runGoogleSearch
+      - searchURL
+      - getGoogleCreds
+
+
+# Configure Articles (Vignettes)
+articles:
+  - title: "User Guides"
+    contents:
+      - Intro
+      - Config
+
+footer:
+  structure:
+    left:  my_authors
+    right: my_built_with
+  components:
+    my_authors: "Developed by Statistics Austria"
+    my_built_with: "Built with [pkgdown](https://pkgdown.r-lib.org/)."
diff --git a/tests/testthat/test-scraper-config.R b/tests/testthat/test-scraper-config.R
index ecdc72d..bfac64e 100644
--- a/tests/testthat/test-scraper-config.R
+++ b/tests/testthat/test-scraper-config.R
@@ -1,6 +1,6 @@
 test_that("cfg_scraper R6 configuration and validation works", {
   # Initialization and Defaults
-  cfg <- params_scraper()
+  cfg <- paramsScraper()
   expect_true(inherits(cfg, "cfg_scraper"))
 
   expect_equal(cfg$get("project"), "my-project")
@@ -57,7 +57,7 @@ test_that("cfg_googlesearch specific validation works", {
   Sys.setenv("SCRAPING_APIKEY_GOOGLE" = "mygoogleAPIKey")
   Sys.setenv("SCRAPING_ENGINE_GOOGLE" = "mysearchEngineID")
 
-  gcfg <- params_googlesearch(path = tempdir())
+  gcfg <- paramsGoogleSearch(path = tempdir())
 
   expect_equal(gcfg$get("max_queries"), 10000L)
 
@@ -75,7 +75,7 @@ test_that("YAML export and round-trip loading works", {
   yaml_path <- file.path(td, "test_config.yaml")
 
   # 1. Setup a config with non-default nested values
-  cfg <- params_scraper(base_dir = td)
+  cfg <- paramsScraper(base_dir = td)
   cfg$set("selenium$port", 5555L)
   cfg$set("selenium$ecaps$args", c("--headless", "--no-sandbox", "--custom-flag"))
 
@@ -85,7 +85,7 @@ test_that("YAML export and round-trip loading works", {
 
   # 3. Load into a new object and verify
   # Pass the file to initialize via the config_file argument
-  cfg_new <- params_scraper(config_file = yaml_path, base_dir = td)
+  cfg_new <- paramsScraper(config_file = yaml_path, base_dir = td)
 
   expect_equal(cfg_new$get("selenium$port"), 5555L)
   expect_contains(cfg_new$get("selenium$ecaps$args"), "--custom-flag")
@@ -98,7 +98,7 @@ test_that("YAML export and round-trip loading works", {
 })
 
 test_that("Complex nested list validation works", {
-  cfg <- params_scraper()
+  cfg <- paramsScraper()
 
   # Test that providing a partial list to a top-level key works
   # because your .validate uses modifyList with defaults
@@ -116,7 +116,7 @@ test_that("Complex nested list validation works", {
 })
 
 test_that("Path splitting and validation works", {
-  cfg <- params_scraper()
+  cfg <- paramsScraper()
 
   # Test $ syntax
   cfg$set("selenium$host", "test-host")
diff --git a/vignettes/Config.Rmd b/vignettes/Config.Rmd
index ea8b726..ae28b1d 100644
--- a/vignettes/Config.Rmd
+++ b/vignettes/Config.Rmd
@@ -27,7 +27,7 @@ The `params_manager` class follows a specific priority when loading settings:
 
 ## Using the Configuration for a scraping job
 
-The scraper configuration initialized using function `params_scraper()` manages settings for Selenium, robots.txt, and directory paths.
+The scraper configuration initialized using function `paramsScraper()` manages settings for Selenium, robots.txt, and directory paths.
 
 ### Initializing
 
@@ -37,10 +37,10 @@ You can initialize with defaults or override specific values immediately.
 library(taRantula)
 
 # Basic initialization
-cfg <- params_scraper(project = "census_scrape")
+cfg <- paramsScraper(project = "census_scrape")
 
 # Advanced initialization with nested Selenium settings
-cfg <- params_scraper(
+cfg <- paramsScraper(
   project = "census_scrape",
   selenium = list(host = "192.168.1.xx", workers = 5)
 )
@@ -64,11 +64,11 @@ cfg$set("robots$check", FALSE)
 
 ## Using the Configuration for a Google Custom Search
 
-The Google Search configuration which can be initialized with function `params_googlesearch` is more streamlined, focusing on API limits and result attributes.
+The Google Search configuration which can be initialized with function `paramsGoogleSearch` is more streamlined, focusing on API limits and result attributes.
 
 ```{r google-init}
 # Set up a Google Search task
-gcfg <- params_googlesearch(
+gcfg <- paramsGoogleSearch(
   path = "~/google_results",
   max_queries = 500,
   scrape_attributes = c("link", "snippet") # Only keep specific fields
@@ -98,7 +98,7 @@ cfg$write_defaults("template.yaml")
 
 ```{r import}
 # Recreate a scraper state from a YAML file
-new_cfg <- params_scraper(config_file = "my_config.yaml")
+new_cfg <- paramsScraper(config_file = "my_config.yaml")
 ```
 
 ------------------------------------------------------------------------
diff --git a/vignettes/Intro.Rmd b/vignettes/Intro.Rmd
index 785fd3d..cccb253 100644
--- a/vignettes/Intro.Rmd
+++ b/vignettes/Intro.Rmd
@@ -68,8 +68,8 @@ networks:
 
 The package contains the following processes: 
     
-    - define google custom search parameters via an `R6`-based configuration and run a google custom search.
-    - define scraping parameters via an `R6`-based configuration and initializing the scraper.
+- define google custom search parameters via an `R6`-based configuration and run a google custom search.
+- define scraping parameters via an `R6`-based configuration and initializing the scraper.
     
 
 ## Google custom search    
@@ -81,7 +81,7 @@ Set the following environmental variables:
   - `SCRAPING_APIKEY_GOOGLE` for the google custom search api key
   - `SCRAPING_ENGINE_GOOGLE` for the google custom search engine ID
 
-Use `params_googlesearch()` to manage settings. This ensures all nested parameters are validated before the google custom search starts.
+Use `paramsGoogleSearch()` to manage settings. This ensures all nested parameters are validated before the google custom search starts.
 
 ### Custom search
 
@@ -89,11 +89,11 @@ Run google custom search api with a queries build from enterprise name and addre
 
 ```{r}
 Sys.setenv(
-  SCRAPING_APIKEY_GOOGLE = "blablabla",
+  SCRAPING_APIKEY_GOOGLE = "My_ApiKey",
   SCRAPING_ENGINE_GOOGLE = "My_Engine"
 )
 
-cfg <- params_googlesearch(
+cfg <- paramsGoogleSearch(
   path = "~/path/to/my/project",
   scrape_attributes = c("title", "link", "displayLink", "snippet")
 )
@@ -130,13 +130,13 @@ runGoogleSearch(
 
 ### Configuration 
 
-Use `params_scraper()` to manage settings. This ensures all nested parameters are validated before the scraper starts. The `UrlScraper` class manages the state, connection to the `DuckDB` database, and the parallel worker pool.
+Use `paramsScraper()` to manage settings. This ensures all nested parameters are validated before the scraper starts. The `UrlScraper` class manages the state, connection to the `DuckDB` database, and the parallel worker pool.
 
 ```{r setup-config}
 library(taRantula)
 
 # Initialize scraping parameters
-cfg <- params_scraper()
+cfg <- paramsScraper()
 cfg$set("selenium$host", "localhost")
 cfg$set("selenium$workers", 3)
 cfg$set("urls", c("https://nytimes.com", "https://whitehouse.gov"))