diff --git a/Dockerfile b/Dockerfile index 392b2d23..07ef7c6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,7 +57,6 @@ ENV DEBIAN_FRONTEND=noninteractive \ ENV FLASK_PORT=8084 # Configure locale, timezone, and perform initial cleanup in a single layer -# User/group creation is removed RUN apt-get update && \ apt-get install -y --no-install-recommends \ # For locale @@ -89,6 +88,13 @@ RUN apt-get update && \ echo "LC_ALL=en_US.UTF-8" >> /etc/environment && \ echo "LANG=en_US.UTF-8" > /etc/locale.conf +# Create a fixed runtime user/group so hardened Docker/Kubernetes deployments +# can start the container directly as a non-root user with a passwd entry. +RUN groupadd -g 1000 shelfmark && \ + useradd -u 1000 -g shelfmark -d /home/shelfmark -s /usr/sbin/nologin shelfmark && \ + mkdir -p /home/shelfmark && \ + chown 1000:1000 /home/shelfmark + # Set working directory WORKDIR /app @@ -103,10 +109,19 @@ COPY . . # Copy built frontend from frontend-builder stage COPY --from=frontend-builder /frontend/dist /app/frontend-dist -# Final setup: permissions and directories in one layer -# Only creating directories and setting executable bits. -# Ownership will be handled by the entrypoint script. -RUN mkdir -p /var/log/shelfmark /books && \ +# Final setup: create image-owned runtime paths for the fixed non-root user. +# Root/PUID mode still re-homes ownership at startup when needed. +RUN mkdir -p \ + /config \ + /books \ + /var/log/shelfmark \ + /tmp/shelfmark/seleniumbase/downloaded_files \ + /tmp/shelfmark/seleniumbase/archived_files && \ + rm -rf /app/downloaded_files /app/archived_files && \ + ln -s /tmp/shelfmark/seleniumbase/downloaded_files /app/downloaded_files && \ + ln -s /tmp/shelfmark/seleniumbase/archived_files /app/archived_files && \ + chown -R 1000:1000 /config /books /home/shelfmark /tmp/shelfmark /var/log/shelfmark && \ + chmod -R a+rX /app && \ chmod +x /app/entrypoint.sh /app/tor.sh /app/genDebug.sh # Expose the application port @@ -149,6 +164,12 @@ RUN apt-get update && \ RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --locked --no-default-groups --extra browser +# Keep SeleniumBase's bundled driver cache writable for the fixed non-root user. +RUN SELENIUMBASE_DRIVERS_DIR=$(/app/.venv/bin/python -c "import pathlib, seleniumbase; print(pathlib.Path(seleniumbase.__file__).resolve().parent / 'drivers')") && \ + chown -R 1000:1000 "${SELENIUMBASE_DRIVERS_DIR}" && \ + chmod -R u+rwX,go+rX "${SELENIUMBASE_DRIVERS_DIR}" && \ + if [ -f "${SELENIUMBASE_DRIVERS_DIR}/uc_driver" ]; then chmod +x "${SELENIUMBASE_DRIVERS_DIR}/uc_driver"; fi + # Grant read/execute permissions to others RUN chmod -R o+rx /usr/bin/chromium diff --git a/compose/docker-compose.tor.yml b/compose/docker-compose.tor.yml index 95de8125..0dff1164 100644 --- a/compose/docker-compose.tor.yml +++ b/compose/docker-compose.tor.yml @@ -1,4 +1,4 @@ -# Routes all traffic through Tor - requires NET_ADMIN capability +# Routes all traffic through Tor - requires root startup services: shelfmark-tor: image: ghcr.io/calibrain/shelfmark:latest diff --git a/docs/configuration.md b/docs/configuration.md index 7b96eb03..a82474a4 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -43,7 +43,12 @@ services: Notes: - Point `/books` to your library ingest folder (Calibre-Web, Booklore, Audiobookshelf, etc) for automatic import. - If you set Books Output Mode to Booklore (API), books are uploaded via API instead of written to `/books`. Audiobooks still use a destination folder. -- Ensure `PUID`/`PGID` (or legacy `UID`/`GID`) match the owner of the host directories to avoid permission errors. +- Ensure `PUID`/`PGID` (or legacy `UID`/`GID`) match the owner of the host directories. +- For non-root mode, start the container as `1000:1000`. +- On Kubernetes, set `runAsUser: 1000`, `runAsGroup: 1000`, and `runAsNonRoot: true` together. +- `PUID`/`PGID` keep the default root startup flow. +- In non-root mode, mounted paths must already be writable by `1000:1000`. +- `USING_TOR=true` requires root startup. ## Torrent / Usenet Setup @@ -113,6 +118,7 @@ Configure templates in Settings -> Downloads. Template syntax details are docume - "Download failed - file not found": Path mismatch between Shelfmark and the download client. Ensure container paths match or use Remote Path Mappings. - "Permission denied": `PUID`/`PGID` do not match the host directories. Ensure Shelfmark can read the client path and write to the destination. +- "Permission denied" in non-root Docker/Kubernetes mode: ensure the mounted path is writable by UID/GID `1000:1000`, or switch back to root startup with `PUID`/`PGID`. - "Hardlinks not working" or "Files being copied instead": Source and destination are on different filesystems. Move the destination or accept copy fallback. - "Downloads work but library does not see them": Destination does not point to the library ingest folder. Check Settings -> Downloads -> Destination. - CIFS/SMB shares: Use the `nobrl` mount option to avoid database lock errors. Example: `//server/share /mnt/share cifs nobrl,... 0 0` diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 932df17c..b65b43a9 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -777,7 +777,7 @@ Custom label for the OIDC sign-in button on the login page. | `CUSTOM_DNS` | DNS provider for domain resolution. 'Auto' rotates through providers on failure. | string (choice) | `auto` | | `CUSTOM_DNS_MANUAL` | Comma-separated list of DNS server IP addresses (e.g., 8.8.8.8, 1.1.1.1). | string | _none_ | | `USE_DOH` | Use encrypted DNS queries for improved reliability and privacy. | boolean | `true` | -| `USING_TOR` | Route all traffic through Tor for enhanced privacy. | boolean | `false` | +| `USING_TOR` | Route all traffic through Tor for enhanced privacy. Requires root startup. | boolean | `false` | | `PROXY_MODE` | Choose proxy type. SOCKS5 handles all traffic through a single proxy. | string (choice) | `none` | | `HTTP_PROXY` | HTTP proxy URL (e.g., http://proxy:8080) | string | _none_ | | `HTTPS_PROXY` | HTTPS proxy URL (leave empty to use HTTP proxy for HTTPS) | string | _none_ | @@ -829,7 +829,7 @@ Use encrypted DNS queries for improved reliability and privacy. **Tor Routing** -Route all traffic through Tor for enhanced privacy. +Route all traffic through Tor for enhanced privacy. Requires root startup. - **Type:** boolean - **Default:** `false` diff --git a/entrypoint.sh b/entrypoint.sh index 4fa616aa..77c29ce3 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + is_truthy() { case "${1,,}" in true|yes|1|y) return 0 ;; @@ -11,6 +13,14 @@ ENABLE_LOGGING_VALUE="${ENABLE_LOGGING:-true}" LOG_PIPE_DIR="" LOG_PIPE="" TEE_PID="" +FILE_LOGGING_ENABLED="false" +CURRENT_UID=$(id -u) +CURRENT_GID=$(id -g) +RUN_AS_NON_ROOT="false" + +if [ "$CURRENT_UID" != "0" ]; then + RUN_AS_NON_ROOT="true" +fi start_file_logging() { local logfile="$1" @@ -43,30 +53,51 @@ stop_file_logging() { if is_truthy "$ENABLE_LOGGING_VALUE"; then LOG_DIR=${LOG_ROOT:-/var/log/}/shelfmark - mkdir -p "$LOG_DIR" - LOG_FILE="${LOG_DIR}/shelfmark_entrypoint.log" - # Keep the previous entrypoint log instead of deleting all history on boot. - [ -f "${LOG_FILE}.prev" ] && rm -f "${LOG_FILE}.prev" - [ -f "$LOG_FILE" ] && mv "$LOG_FILE" "${LOG_FILE}.prev" + if mkdir -p "$LOG_DIR" 2>/dev/null; then + LOG_FILE="${LOG_DIR}/shelfmark_entrypoint.log" + # Keep the previous entrypoint log instead of deleting all history on boot. + rotation_ok="true" + if [ -f "${LOG_FILE}.prev" ] && ! rm -f "${LOG_FILE}.prev"; then + echo "Warning: could not remove previous entrypoint log ${LOG_FILE}.prev, continuing without file logging" >&2 + rotation_ok="false" + fi + if [ "$rotation_ok" = "true" ] && [ -f "$LOG_FILE" ] && ! mv "$LOG_FILE" "${LOG_FILE}.prev"; then + echo "Warning: could not rotate entrypoint log $LOG_FILE, continuing without file logging" >&2 + rotation_ok="false" + fi + + if [ "$rotation_ok" = "true" ]; then + FILE_LOGGING_ENABLED="true" + else + ENABLE_LOGGING_VALUE="false" + export ENABLE_LOGGING="false" + fi + else + echo "Warning: could not create log directory $LOG_DIR, continuing without file logging" >&2 + ENABLE_LOGGING_VALUE="false" + export ENABLE_LOGGING="false" + fi fi -( - if [ "$USING_TOR" = "true" ]; then - ./tor.sh +if [ "$USING_TOR" = "true" ]; then + if [ "$RUN_AS_NON_ROOT" = "true" ]; then + echo "USING_TOR=true requires the container to start as root." >&2 + echo "Non-root mode skips the privileged filesystem and network setup Tor depends on." >&2 + exit 1 fi -) + ./tor.sh +fi -if is_truthy "$ENABLE_LOGGING_VALUE"; then +if [ "$FILE_LOGGING_ENABLED" = "true" ]; then start_file_logging "$LOG_FILE" fi echo "Starting entrypoint script" -if is_truthy "$ENABLE_LOGGING_VALUE"; then +if [ "$FILE_LOGGING_ENABLED" = "true" ]; then echo "Log file: $LOG_FILE" else echo "File logging disabled (ENABLE_LOGGING=$ENABLE_LOGGING_VALUE)" fi -set -e PYTHON_BIN="/app/.venv/bin/python" if [ ! -x "$PYTHON_BIN" ]; then @@ -79,57 +110,75 @@ echo "Release version: $RELEASE_VERSION" # Configure timezone if [ "$TZ" ]; then - echo "Setting timezone to $TZ" - ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + if [ "$RUN_AS_NON_ROOT" = "true" ]; then + echo "TZ is set to $TZ (non-root mode leaves /etc/localtime unchanged)" + else + echo "Setting timezone to $TZ" + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + fi fi -# Determine user ID with proper precedence: -# 1. PUID (LinuxServer.io standard - recommended) -# 2. UID (legacy, for backward compatibility with existing installs) -# 3. Default to 1000 -# -# Note: $UID is a bash builtin that's always set. We use `printenv` to detect -# if UID was explicitly set as an environment variable (e.g., via docker-compose). -if [ -n "$PUID" ]; then - RUN_UID="$PUID" - echo "Using PUID=$RUN_UID" -elif printenv UID >/dev/null 2>&1; then - RUN_UID="$(printenv UID)" - echo "Using UID=$RUN_UID (legacy - consider migrating to PUID)" +if [ "$RUN_AS_NON_ROOT" = "true" ]; then + RUN_UID="$CURRENT_UID" + RUN_GID="$CURRENT_GID" + USERNAME=$(getent passwd "$RUN_UID" 2>/dev/null | cut -d: -f1 || true) + if [ -z "$USERNAME" ]; then + USERNAME="$RUN_UID" + echo "No passwd entry found for UID $RUN_UID; using numeric identity" + fi + TARGET_USER_SPEC="${RUN_UID}:${RUN_GID}" else - RUN_UID=1000 - echo "Using default UID=$RUN_UID" -fi + # Determine user ID with proper precedence: + # 1. PUID (LinuxServer.io standard - recommended) + # 2. UID (legacy, for backward compatibility with existing installs) + # 3. Default to 1000 + # + # Note: $UID is a bash builtin that's always set. We use `printenv` to detect + # if UID was explicitly set as an environment variable (e.g., via docker-compose). + if [ -n "$PUID" ]; then + RUN_UID="$PUID" + echo "Using PUID=$RUN_UID" + elif printenv UID >/dev/null 2>&1; then + RUN_UID="$(printenv UID)" + echo "Using UID=$RUN_UID (legacy - consider migrating to PUID)" + else + RUN_UID=1000 + echo "Using default UID=$RUN_UID" + fi -# Determine group ID with proper precedence: -# 1. PGID (LinuxServer.io standard - recommended) -# 2. GID (legacy, for backward compatibility with existing installs) -# 3. Default to 1000 -if [ -n "$PGID" ]; then - RUN_GID="$PGID" - echo "Using PGID=$RUN_GID" -elif [ -n "$GID" ]; then - RUN_GID="$GID" - echo "Using GID=$RUN_GID (legacy - consider migrating to PGID)" -else - RUN_GID=1000 - echo "Using default GID=$RUN_GID" -fi + # Determine group ID with proper precedence: + # 1. PGID (LinuxServer.io standard - recommended) + # 2. GID (legacy, for backward compatibility with existing installs) + # 3. Default to 1000 + if [ -n "$PGID" ]; then + RUN_GID="$PGID" + echo "Using PGID=$RUN_GID" + elif [ -n "$GID" ]; then + RUN_GID="$GID" + echo "Using GID=$RUN_GID (legacy - consider migrating to PGID)" + else + RUN_GID=1000 + echo "Using default GID=$RUN_GID" + fi -if ! getent group "$RUN_GID" >/dev/null; then - echo "Adding group $RUN_GID with name appuser" - groupadd -g "$RUN_GID" appuser -fi + if ! getent group "$RUN_GID" >/dev/null; then + echo "Adding group $RUN_GID with name appuser" + groupadd -g "$RUN_GID" appuser + fi -# Create user if it doesn't exist -if ! id -u "$RUN_UID" >/dev/null 2>&1; then - echo "Adding user $RUN_UID with name appuser" - useradd -u "$RUN_UID" -g "$RUN_GID" -d /app -s /sbin/nologin appuser -fi + # Create user if it doesn't exist for this UID yet. + if ! getent passwd "$RUN_UID" >/dev/null; then + echo "Adding user $RUN_UID with name appuser" + useradd -u "$RUN_UID" -g "$RUN_GID" -d /app -s /sbin/nologin appuser + fi -# Get username for the UID (whether we just created it or it existed) -USERNAME=$(getent passwd "$RUN_UID" | cut -d: -f1) -echo "Username for UID $RUN_UID is $USERNAME" + # Get username for the UID (whether we just created it or it existed) + USERNAME=$(getent passwd "$RUN_UID" | cut -d: -f1) + if [ -z "$USERNAME" ]; then + USERNAME="$RUN_UID" + fi + TARGET_USER_SPEC="${RUN_UID}:${RUN_GID}" +fi # Avoid unnecessary gosu hops when we're already running as the target user. # Some nested LXC setups spin on root-to-root gosu invocations. @@ -145,7 +194,7 @@ needs_user_switch() { run_as_target_user() { if needs_user_switch; then - gosu "$USERNAME" "$@" + gosu "$TARGET_USER_SPEC" "$@" return $? fi @@ -154,7 +203,7 @@ run_as_target_user() { exec_as_target_user() { if needs_user_switch; then - exec gosu "$USERNAME" "$@" + exec gosu "$TARGET_USER_SPEC" "$@" fi exec "$@" @@ -243,6 +292,22 @@ change_ownership() { chown -R "${RUN_UID}:${RUN_GID}" "${folder}" || echo "Failed to change ownership for ${folder}, continuing..." } +require_writable_dir() { + local folder="$1" + local label="${2:-Directory}" + + if ! mkdir -p "$folder"; then + echo "Failed to create ${label} directory: $folder" + exit 1 + fi + + if ! test_write "$folder"; then + echo "${label} directory is not writable in non-root mode: $folder" + echo "Prepare ownership outside the container (for example with a pre-owned volume or Kubernetes fsGroup)." + exit 1 + fi +} + ensure_tree_writable() { local folder="$1" @@ -287,81 +352,75 @@ ensure_symlinked_dir() { fi } -fix_misowned /var/log/shelfmark -fix_misowned /tmp/shelfmark - -# Keep SeleniumBase on its default /app-based paths, but redirect the scratch -# directories into /tmp so bypasser startup doesn't depend on image-layer writes. -if [ "${USING_EXTERNAL_BYPASSER}" != "true" ]; then - ensure_symlinked_dir /app/downloaded_files /tmp/shelfmark/seleniumbase/downloaded_files - ensure_symlinked_dir /app/archived_files /tmp/shelfmark/seleniumbase/archived_files +if [ "$RUN_AS_NON_ROOT" = "true" ]; then + require_writable_dir /tmp/shelfmark "Temporary" - # Keep SeleniumBase's bundled drivers directory writable as well for - # compatibility with legacy UC code paths that still probe bundled assets. - set +e - SELENIUMBASE_DRIVERS_DIR=$("$PYTHON_BIN" -c "import pathlib, seleniumbase; print(pathlib.Path(seleniumbase.__file__).resolve().parent / 'drivers')" 2>/dev/null) - set -e - - if [ -n "$SELENIUMBASE_DRIVERS_DIR" ] && [ -d "$SELENIUMBASE_DRIVERS_DIR" ]; then - change_ownership "$SELENIUMBASE_DRIVERS_DIR" - - # If the legacy driver already exists, ensure it's executable for the runtime user. - if [ -f "${SELENIUMBASE_DRIVERS_DIR}/uc_driver" ]; then - chmod +x "${SELENIUMBASE_DRIVERS_DIR}/uc_driver" || echo "Failed to chmod uc_driver, continuing..." - fi + if [ "${USING_EXTERNAL_BYPASSER}" != "true" ]; then + require_writable_dir /tmp/shelfmark/seleniumbase/downloaded_files "SeleniumBase downloads" + require_writable_dir /tmp/shelfmark/seleniumbase/archived_files "SeleniumBase archive" fi -fi -# Config can contain existing state we must keep accessing, so it keeps the -# thorough repair path. Output destination roots only need top-level writability. -make_writable "${CONFIG_DIR:-/config}" tree -# Entrypoint only has env vars available at this stage, so use the legacy -# INGEST_DIR env var as the fallback source for the default destination root. -make_writable "${INGEST_DIR:-/books}" root - -# Check any additional configured destination roots from saved settings -echo "Checking for additional configured destination roots..." -if [ -f /app/scripts/fix_permissions.py ]; then - configured_dirs=$("$PYTHON_BIN" /app/scripts/fix_permissions.py 2>/dev/null || echo "") - if [ -n "$configured_dirs" ]; then - echo "$configured_dirs" | while read -r dir; do - if [ -n "$dir" ] && [ -d "$dir" ]; then - echo "Checking configured destination root: $dir" - make_writable "$dir" root + require_writable_dir "${CONFIG_DIR:-/config}" "Config" +else + fix_misowned /var/log/shelfmark + fix_misowned /tmp/shelfmark + + # Keep SeleniumBase on its default /app-based paths, but redirect the scratch + # directories into /tmp so bypasser startup doesn't depend on image-layer writes. + if [ "${USING_EXTERNAL_BYPASSER}" != "true" ]; then + ensure_symlinked_dir /app/downloaded_files /tmp/shelfmark/seleniumbase/downloaded_files + ensure_symlinked_dir /app/archived_files /tmp/shelfmark/seleniumbase/archived_files + + # Keep SeleniumBase's bundled drivers directory writable as well for + # compatibility with legacy UC code paths that still probe bundled assets. + set +e + SELENIUMBASE_DRIVERS_DIR=$("$PYTHON_BIN" -c "import pathlib, seleniumbase; print(pathlib.Path(seleniumbase.__file__).resolve().parent / 'drivers')" 2>/dev/null) + set -e + + if [ -n "$SELENIUMBASE_DRIVERS_DIR" ] && [ -d "$SELENIUMBASE_DRIVERS_DIR" ]; then + change_ownership "$SELENIUMBASE_DRIVERS_DIR" + + # If the legacy driver already exists, ensure it's executable for the runtime user. + if [ -f "${SELENIUMBASE_DRIVERS_DIR}/uc_driver" ]; then + chmod +x "${SELENIUMBASE_DRIVERS_DIR}/uc_driver" || echo "Failed to chmod uc_driver, continuing..." fi - done + fi fi -fi -# Fallback to root if config dir is still not writable (common on NAS/Unraid after upgrade from v0.4.0) -CONFIG_PATH=${CONFIG_DIR:-/config} -set +e -test_write "$CONFIG_PATH" >/dev/null 2>&1 -config_ok=$? -set -e + # Config is Shelfmark-owned state, so it keeps the thorough repair path. + make_writable "${CONFIG_DIR:-/config}" tree + + # Fallback to root if config dir is still not writable (common on NAS/Unraid after upgrade from v0.4.0) + CONFIG_PATH=${CONFIG_DIR:-/config} + set +e + test_write "$CONFIG_PATH" >/dev/null 2>&1 + config_ok=$? + set -e -if [ $config_ok -ne 0 ] && [ "$RUN_UID" != "0" ]; then - config_owner=$(stat -c '%u' "$CONFIG_PATH" 2>/dev/null || echo "unknown") - if [ "$config_owner" = "0" ]; then - echo "" - echo "========================================================" - echo "WARNING: Permission issue detected!" - echo "" - echo "Config directory is owned by root but PUID=$RUN_UID." - echo "This typically happens after upgrading from v0.4.0 where" - echo "PUID/PGID settings were not respected." - echo "" - echo "Falling back to running as root to prevent data loss." - echo "" - echo "To fix this permanently, run on your HOST machine:" - echo " chown -R $RUN_UID:$RUN_GID /path/to/config" - echo "" - echo "Then restart the container." - echo "========================================================" - echo "" - RUN_UID=0 - RUN_GID=0 - USERNAME=root + if [ $config_ok -ne 0 ] && [ "$RUN_UID" != "0" ]; then + config_owner=$(stat -c '%u' "$CONFIG_PATH" 2>/dev/null || echo "unknown") + if [ "$config_owner" = "0" ]; then + echo "" + echo "========================================================" + echo "WARNING: Permission issue detected!" + echo "" + echo "Config directory is owned by root but PUID=$RUN_UID." + echo "This typically happens after upgrading from v0.4.0 where" + echo "PUID/PGID settings were not respected." + echo "" + echo "Falling back to running as root to prevent data loss." + echo "" + echo "To fix this permanently, run on your HOST machine:" + echo " chown -R $RUN_UID:$RUN_GID /path/to/config" + echo "" + echo "Then restart the container." + echo "========================================================" + echo "" + RUN_UID=0 + RUN_GID=0 + USERNAME=root + TARGET_USER_SPEC="0:0" + fi fi fi @@ -437,7 +496,25 @@ else exit 1 fi -echo "Running command: '$command' as '$USERNAME' (debug=$is_debug)" +TARGET_HOME="/app" +if [ "$RUN_AS_NON_ROOT" = "true" ]; then + TARGET_HOME=$(getent passwd "$RUN_UID" 2>/dev/null | cut -d: -f6 || true) + if [ -z "$TARGET_HOME" ]; then + TARGET_HOME="/tmp/shelfmark/home" + fi + require_writable_dir "$TARGET_HOME" "Home" +fi + +if [ "$RUN_AS_NON_ROOT" = "true" ]; then + echo "Startup mode: non-root" +elif [ "$RUN_UID" = "0" ] && [ "$RUN_GID" = "0" ]; then + echo "Startup mode: root" +else + echo "Startup mode: root bootstrap with privilege drop" +fi +echo "Runtime identity: $USERNAME (${RUN_UID}:${RUN_GID})" + +echo "Running command: '$command' as '$USERNAME' (debug=${DEBUG:-false})" # Set umask for file permissions (default: 0022 = files 644, dirs 755) UMASK_VALUE=${UMASK:-0022} @@ -445,4 +522,4 @@ echo "Setting umask to $UMASK_VALUE" umask $UMASK_VALUE stop_file_logging -exec_as_target_user env HOME=/app $command +exec_as_target_user env HOME="$TARGET_HOME" $command diff --git a/readme.md b/readme.md index b9ca3565..bc834d87 100644 --- a/readme.md +++ b/readme.md @@ -75,6 +75,14 @@ volumes: > **Note**: CIFS shares require `nobrl` mount option to avoid database lock errors. +### Non-root container mode + +- Start the container as `1000:1000` with Docker `user: "1000:1000"` or `docker run --user 1000:1000`. +- For Kubernetes, set `runAsUser: 1000`, `runAsGroup: 1000`, and `runAsNonRoot: true` together. +- `PUID`/`PGID` keep the default root startup flow. +- Mounted paths must already be writable by `1000:1000`. +- `USING_TOR=true` requires root startup. + ## ⚙️ Configuration ### Search Modes @@ -99,9 +107,9 @@ Environment variables work for initial setup and Docker deployments. They serve | `FLASK_PORT` | Web interface port | `8084` | | `INGEST_DIR` | Book download directory | `/books` | | `TZ` | Container timezone | `UTC` | -| `PUID` / `PGID` | Runtime user/group ID (also supports legacy `UID`/`GID`) | `1000` / `1000` | +| `PUID` / `PGID` | Runtime user/group for the default root-startup flow (also supports legacy `UID`/`GID`) | `1000` / `1000` | | `SEARCH_MODE` | `direct` or `universal` | `direct` | -| `USING_TOR` | Enable Tor routing (requires `NET_ADMIN` capability) | `false` | +| `USING_TOR` | Enable Tor routing (requires root startup) | `false` | See the full [Environment Variables Reference](docs/environment-variables.md) for all available options. @@ -133,6 +141,7 @@ docker compose -f docker-compose.tor.yml up -d ``` **Notes:** +- Requires root startup - Requires `NET_ADMIN` and `NET_RAW` capabilities - Timezone is auto-detected from Tor exit node - Custom DNS/proxy settings are ignored when Tor is active diff --git a/scripts/bypasser_permission_lab.sh b/scripts/bypasser_permission_lab.sh index ac53ee78..6f84a045 100755 --- a/scripts/bypasser_permission_lab.sh +++ b/scripts/bypasser_permission_lab.sh @@ -35,7 +35,7 @@ wait_for_startup() { return 1 fi - if docker exec "$name" sh -lc "id appuser >/dev/null 2>&1 && ps -eo comm,args | awk '\$1 == \"gunicorn\" && index(\$0, \"shelfmark.main:app\") { found=1 } END { exit(found ? 0 : 1) }'" >/dev/null 2>&1; then + if docker exec "$name" sh -lc "getent passwd 1000 >/dev/null 2>&1 && ps -eo comm,args | awk '\$1 == \"gunicorn\" && index(\$0, \"shelfmark.main:app\") { found=1 } END { exit(found ? 0 : 1) }'" >/dev/null 2>&1; then return 0 fi @@ -81,7 +81,7 @@ exec /app/entrypoint.sh" >/dev/null run_probe() { local name="$1" local mode="${2:-default}" - docker exec -u appuser -e PROBE_MODE="$mode" "$name" sh -lc 'python3 - <<'"'"'PY'"'"' + docker exec -u 1000:1000 -e PROBE_MODE="$mode" "$name" sh -lc 'python3 - <<'"'"'PY'"'"' import asyncio import os import shelfmark.bypass.internal_bypasser as ib @@ -183,7 +183,7 @@ scenario_latest_proxy_auth_downloads_readonly() { start_container "$name" "$LATEST_IMAGE" ' mkdir -p /app/downloaded_files && touch /app/downloaded_files/pipfinding.lock /app/downloaded_files/proxy_dir.lock && - chown appuser:appuser /app/downloaded_files/pipfinding.lock /app/downloaded_files/proxy_dir.lock && + chown 1000:1000 /app/downloaded_files/pipfinding.lock /app/downloaded_files/proxy_dir.lock && chmod 0666 /app/downloaded_files/pipfinding.lock /app/downloaded_files/proxy_dir.lock && chown root:root /app/downloaded_files && chmod 0555 /app/downloaded_files && diff --git a/scripts/fix_permissions.py b/scripts/fix_permissions.py deleted file mode 100755 index b219de38..00000000 --- a/scripts/fix_permissions.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python3 -"""List configured destination roots that may need permission repair. - -This script is called by the entrypoint to find configured output destination -roots from config files under CONFIG_DIR/plugins/. - -Outputs directory paths that need permission fixing (one per line). -The entrypoint handles the actual chown operations. -""" - -import json -import os -import sys -from pathlib import Path - - -def get_directories_from_config() -> set[str]: - """Extract configured destination-style paths from config files.""" - directories = set() - - config_dir = Path(os.getenv("CONFIG_DIR", "/config")) - plugins_dir = config_dir / "plugins" - - if not plugins_dir.exists(): - return directories - - # Keys that can point at output destination roots or legacy equivalents - directory_keys = { - # Current destination settings - "DESTINATION", - "DESTINATION_AUDIOBOOK", - # Content-type routing destinations - "AA_CONTENT_TYPE_DIR_FICTION", - "AA_CONTENT_TYPE_DIR_NON_FICTION", - "AA_CONTENT_TYPE_DIR_UNKNOWN", - "AA_CONTENT_TYPE_DIR_MAGAZINE", - "AA_CONTENT_TYPE_DIR_COMIC", - "AA_CONTENT_TYPE_DIR_STANDARDS", - "AA_CONTENT_TYPE_DIR_MUSICAL_SCORE", - "AA_CONTENT_TYPE_DIR_OTHER", - # Legacy path settings still recognized in older configs - "INGEST_DIR", - "INGEST_DIR_AUDIOBOOK", - "INGEST_DIR_BOOK_FICTION", - "INGEST_DIR_BOOK_NON_FICTION", - "INGEST_DIR_BOOK_UNKNOWN", - "INGEST_DIR_MAGAZINE", - "INGEST_DIR_COMIC_BOOK", - "INGEST_DIR_STANDARDS_DOCUMENT", - "INGEST_DIR_MUSICAL_SCORE", - "INGEST_DIR_OTHER", - "LIBRARY_PATH", - "LIBRARY_PATH_AUDIOBOOK", - } - - # Read all JSON config files - for config_file in plugins_dir.glob("*.json"): - try: - with open(config_file, "r") as f: - config = json.load(f) - - for key in directory_keys: - if key in config: - value = config[key] - if value and isinstance(value, str) and value.startswith("/"): - directories.add(value) - except (json.JSONDecodeError, OSError): - continue - - return directories - - -def main(): - """Output configured destination roots that currently exist.""" - directories = get_directories_from_config() - - # Filter to directories that actually exist - existing = [] - for dir_path in directories: - path = Path(dir_path) - if path.exists() and path.is_dir(): - existing.append(dir_path) - - # Output one directory per line - for dir_path in sorted(existing): - print(dir_path) - - -if __name__ == "__main__": - main() diff --git a/shelfmark/config/download_settings_handlers.py b/shelfmark/config/download_settings_handlers.py new file mode 100644 index 00000000..4b4d02bb --- /dev/null +++ b/shelfmark/config/download_settings_handlers.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +_USER_PLACEHOLDER_PATTERN = re.compile(r"\{user\}", re.IGNORECASE) + + +def _get_download_setting_value( + current_values: dict[str, Any] | None, + key: str, + *, + default: object = None, +) -> object: + """Read a downloads setting from unsaved form values first, then persisted config.""" + from shelfmark.core.config import config + + current_values = current_values or {} + if key in current_values: + return current_values[key] + if default is None: + return config.get(key) + return config.get(key, default) + + +def _resolve_destination_test_path( + configured_path: str, +) -> tuple[Path, str | None]: + """Resolve a safe path to validate for destination test actions.""" + stripped_path = configured_path.strip() + + if not _USER_PLACEHOLDER_PATTERN.search(stripped_path): + return Path(stripped_path), None + + base_prefix = _USER_PLACEHOLDER_PATTERN.split(stripped_path, maxsplit=1)[0].rstrip("/") + if not base_prefix and not stripped_path.startswith("/"): + return Path(stripped_path), None + + base_path = base_prefix or "/" + return Path(base_path), ( + f" (tested base path {base_path} from configured template {stripped_path})" + ) + + +def _test_folder_destination( + *, + current_values: dict[str, Any] | None = None, + is_audiobook: bool, +) -> dict[str, Any]: + """Validate a folder destination using current form values.""" + from shelfmark.download.postprocess.destination import validate_destination + + destination_value = _get_download_setting_value( + current_values, + "DESTINATION", + default="/books", + ) + destination = str(destination_value or "").strip() + + label = "Books destination" + message_suffix = "" + + if is_audiobook: + audiobook_value = _get_download_setting_value( + current_values, + "DESTINATION_AUDIOBOOK", + default="", + ) + audiobook_destination = str(audiobook_value or "").strip() + if audiobook_destination: + destination = audiobook_destination + label = "Audiobook destination" + else: + label = "Audiobook destination" + message_suffix = " (using the Books destination)" + + if not destination: + return {"success": False, "message": f"{label} is required"} + + test_path, path_message = _resolve_destination_test_path(destination) + if path_message: + message_suffix += path_message + + errors: list[str] = [] + + def _status_callback(status: str, message: str | None) -> None: + if status == "error" and message: + errors.append(message) + + if not validate_destination(test_path, _status_callback): + message = errors[-1] if errors else f"Cannot access destination: {test_path}" + if message_suffix: + message = f"{message}{message_suffix}" + return {"success": False, "message": message} + + return { + "success": True, + "message": f"{label} is writable: {test_path}{message_suffix}", + } + + +def check_books_destination(current_values: dict[str, Any] | None = None) -> dict[str, Any]: + """Validate the configured books destination.""" + return _test_folder_destination(current_values=current_values, is_audiobook=False) + + +def check_audiobook_destination(current_values: dict[str, Any] | None = None) -> dict[str, Any]: + """Validate the configured audiobook destination.""" + return _test_folder_destination(current_values=current_values, is_audiobook=True) diff --git a/shelfmark/config/settings.py b/shelfmark/config/settings.py index ac926e5b..2bf897c3 100644 --- a/shelfmark/config/settings.py +++ b/shelfmark/config/settings.py @@ -10,6 +10,10 @@ get_booklore_library_options, get_booklore_path_options, ) +from shelfmark.config.download_settings_handlers import ( + check_audiobook_destination, + check_books_destination, +) from shelfmark.config.email_settings import check_email_connection from shelfmark.core.logger import setup_logger from shelfmark.core.settings_registry import ( @@ -618,14 +622,14 @@ def network_settings() -> list[SettingsField]: description=( "All traffic is routed through Tor. Requires container restart to change." if tor_enabled - else "Route all traffic through Tor for enhanced privacy." + else "Route all traffic through Tor for enhanced privacy. Requires root startup." ), default=tor_enabled, # Reflects actual state from env var disabled=True, # Tor state requires container restart disabled_reason=( "Tor routing is active. Set USING_TOR=false and restart to disable." if tor_enabled - else "Set USING_TOR=true env var and restart with NET_ADMIN/NET_RAW capabilities." + else "Set USING_TOR=true env var and restart as root." ), ), SelectField( @@ -919,6 +923,17 @@ def download_settings() -> list[SettingsField]: "value": "folder", }, ), + ActionButton( + key="test_destination", + label="Test Destination", + description="Check that Shelfmark can create and write to this destination.", + style="primary", + callback=check_books_destination, + show_when={ + "field": "BOOKS_OUTPUT_MODE", + "value": "folder", + }, + ), SelectField( key="FILE_ORGANIZATION", label="File Organization", @@ -1184,6 +1199,14 @@ def download_settings() -> list[SettingsField]: user_overridable=True, universal_only=True, ), + ActionButton( + key="test_destination_audiobook", + label="Test Destination", + description="Check that Shelfmark can create and write to this audiobook destination.", + style="primary", + callback=check_audiobook_destination, + universal_only=True, + ), SelectField( key="FILE_ORGANIZATION_AUDIOBOOK", label="File Organization", diff --git a/shelfmark/core/settings_registry.py b/shelfmark/core/settings_registry.py index c20141dc..214c84e2 100644 --- a/shelfmark/core/settings_registry.py +++ b/shelfmark/core/settings_registry.py @@ -187,6 +187,7 @@ class ActionButton: disabled_when: dict[str, Any] | None = ( None # Conditional disable: {"field": "key", "value": "expected", "reason": "..."} ) + universal_only: bool = False # Only show in Universal search mode (hide in Direct mode) def get_field_type(self) -> str: """Return the serialized field type for this action button.""" diff --git a/tests/config/test_download_settings.py b/tests/config/test_download_settings.py index 51f3d384..634b30d1 100644 --- a/tests/config/test_download_settings.py +++ b/tests/config/test_download_settings.py @@ -1,3 +1,6 @@ +from unittest.mock import patch + + def _base_email_mode_values() -> dict[str, object]: return { "BOOKS_OUTPUT_MODE": "email", @@ -91,3 +94,168 @@ def test_download_settings_booklore_library_and_path_depend_on_library_destinati {"field": "BOOKS_OUTPUT_MODE", "value": "booklore"}, {"field": "BOOKLORE_DESTINATION", "value": "library"}, ] + + +def test_download_settings_destination_test_buttons_exist(): + from shelfmark.config.settings import download_settings + + fields = download_settings() + books_button = next(field for field in fields if getattr(field, "key", None) == "test_destination") + audiobook_button = next( + field for field in fields if getattr(field, "key", None) == "test_destination_audiobook" + ) + + assert books_button.label == "Test Destination" + assert books_button.style == "primary" + assert books_button.show_when == {"field": "BOOKS_OUTPUT_MODE", "value": "folder"} + assert audiobook_button.label == "Test Destination" + assert audiobook_button.style == "primary" + assert audiobook_button.universal_only is True + + +def test_test_books_destination_uses_current_values(tmp_path): + from shelfmark.config.download_settings_handlers import check_books_destination + + destination = tmp_path / "books" + + result = check_books_destination({"DESTINATION": str(destination)}) + + assert result["success"] is True + assert result["message"] == f"Books destination is writable: {destination}" + assert destination.exists() + + +def test_test_audiobook_destination_falls_back_to_books_destination(tmp_path): + from shelfmark.config.download_settings_handlers import check_audiobook_destination + + destination = tmp_path / "books" + + result = check_audiobook_destination( + { + "DESTINATION": str(destination), + "DESTINATION_AUDIOBOOK": "", + } + ) + + assert result["success"] is True + assert result["message"] == ( + f"Audiobook destination is writable: {destination} (using the Books destination)" + ) + + +def test_test_books_destination_uses_base_path_for_user_placeholder(tmp_path): + from shelfmark.config.download_settings_handlers import check_books_destination + + destination = tmp_path / "books" + + result = check_books_destination({"DESTINATION": f"{destination}/{{User}}"}) + + assert result["success"] is True + assert result["message"] == ( + f"Books destination is writable: {destination} " + f"(tested base path {destination} from configured template {destination}/{{User}})" + ) + assert not (destination / "{User}").exists() + + +def test_test_books_destination_uses_base_path_for_lowercase_user_placeholder(tmp_path): + from shelfmark.config.download_settings_handlers import check_books_destination + + destination = tmp_path / "books" + + result = check_books_destination({"DESTINATION": f"{destination}/{{user}}"}) + + assert result["success"] is True + assert result["message"] == ( + f"Books destination is writable: {destination} " + f"(tested base path {destination} from configured template {destination}/{{user}})" + ) + assert not (destination / "{user}").exists() + + +def test_test_books_destination_rejects_relative_user_placeholder_path(): + from shelfmark.config.download_settings_handlers import check_books_destination + + result = check_books_destination({"DESTINATION": "{User}/books"}) + + assert result["success"] is False + assert result["message"] == "Destination must be absolute: {User}/books" + + +def test_test_books_destination_requires_value(): + from shelfmark.config.download_settings_handlers import check_books_destination + + result = check_books_destination({"DESTINATION": ""}) + + assert result["success"] is False + assert result["message"] == "Books destination is required" + + +def test_test_books_destination_uses_persisted_value_when_current_values_missing(monkeypatch, tmp_path): + from shelfmark.config.download_settings_handlers import check_books_destination + from shelfmark.core.config import config + + destination = tmp_path / "persisted-books" + + def _fake_get(key: str, default=None): + if key == "DESTINATION": + return str(destination) + return default + + monkeypatch.setattr(config, "get", _fake_get) + + result = check_books_destination() + + assert result["success"] is True + assert result["message"] == f"Books destination is writable: {destination}" + + +def test_test_audiobook_destination_preserves_books_fallback_suffix_on_failure(tmp_path): + from shelfmark.config.download_settings_handlers import check_audiobook_destination + + destination = tmp_path / "books" + + def _fake_validate_destination(path, status_callback): + status_callback("error", f"Destination not writable: {path}") + return False + + with patch( + "shelfmark.download.postprocess.destination.validate_destination", + side_effect=_fake_validate_destination, + ): + result = check_audiobook_destination( + { + "DESTINATION": str(destination), + "DESTINATION_AUDIOBOOK": "", + } + ) + + assert result["success"] is False + assert result["message"] == ( + f"Destination not writable: {destination} (using the Books destination)" + ) + + +def test_execute_action_passes_unsaved_values_to_destination_test(tmp_path): + import shelfmark.config.settings # noqa: F401 + from shelfmark.core.settings_registry import execute_action + + destination = tmp_path / "action-books" + captured: dict[str, object] = {} + + def _fake_validate_destination(path, status_callback): + captured["path"] = path + return True + + with patch( + "shelfmark.download.postprocess.destination.validate_destination", + side_effect=_fake_validate_destination, + ): + result = execute_action( + "downloads", + "test_destination", + {"DESTINATION": str(destination)}, + ) + + assert result["success"] is True + assert captured["path"] == destination diff --git a/tests/config/test_entrypoint_permissions.py b/tests/config/test_entrypoint_permissions.py new file mode 100644 index 00000000..fdb92366 --- /dev/null +++ b/tests/config/test_entrypoint_permissions.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import contextlib +import fcntl +import os +import shutil +import subprocess +from pathlib import Path + +ENTRYPOINT_PATH = Path(__file__).resolve().parents[2] / "entrypoint.sh" +ENTRYPOINT_LOCK_PATH = Path("/tmp/shelfmark_entrypoint_test.lock") +BASH_PATH = shutil.which("bash") or "/bin/bash" + + +@contextlib.contextmanager +def _entrypoint_lock(): + ENTRYPOINT_LOCK_PATH.parent.mkdir(parents=True, exist_ok=True) + with ENTRYPOINT_LOCK_PATH.open("w") as handle: + fcntl.flock(handle, fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(handle, fcntl.LOCK_UN) + + +def _write_executable(path: Path, content: str) -> None: + path.write_text(content) + path.chmod(0o755) + + +def _build_stub_bin(tmp_path: Path) -> tuple[Path, Path, Path]: + bin_dir = tmp_path / "bin" + bin_dir.mkdir() + + runtime_home_file = tmp_path / "gunicorn-home.txt" + runtime_args_file = tmp_path / "gunicorn-args.txt" + + _write_executable( + bin_dir / "getent", + """#!/bin/sh +if [ "$1" = "passwd" ] && [ "$2" = "$ENTRYPOINT_STUB_UID" ]; then + printf 'shelfmark:x:%s:%s:Shelfmark:%s:/bin/sh\\n' "$ENTRYPOINT_STUB_UID" "$ENTRYPOINT_STUB_GID" "$ENTRYPOINT_STUB_HOME" + exit 0 +fi +if [ "$1" = "group" ] && [ "$2" = "$ENTRYPOINT_STUB_GID" ]; then + printf 'shelfmark:x:%s:\\n' "$ENTRYPOINT_STUB_GID" + exit 0 +fi +exit 2 +""", + ) + _write_executable( + bin_dir / "gunicorn", + """#!/bin/sh +printf '%s' "$HOME" > "$ENTRYPOINT_GUNICORN_HOME_FILE" +printf '%s' "$*" > "$ENTRYPOINT_GUNICORN_ARGS_FILE" +exit 0 +""", + ) + + return bin_dir, runtime_home_file, runtime_args_file + + +def _run_entrypoint( + tmp_path: Path, + *, + extra_env: dict[str, str] | None = None, +) -> tuple[subprocess.CompletedProcess[str], Path, Path, Path]: + runtime_home = tmp_path / "runtime-home" + config_dir = tmp_path / "config" + config_dir.mkdir(exist_ok=True) + + bin_dir, runtime_home_file, runtime_args_file = _build_stub_bin(tmp_path) + + env = os.environ.copy() + env.update( + { + "BUILD_VERSION": "test-build", + "CONFIG_DIR": str(config_dir), + "DEBUG": "false", + "ENABLE_LOGGING": "false", + "ENTRYPOINT_GUNICORN_ARGS_FILE": str(runtime_args_file), + "ENTRYPOINT_GUNICORN_HOME_FILE": str(runtime_home_file), + "ENTRYPOINT_STUB_GID": str(os.getgid()), + "ENTRYPOINT_STUB_HOME": str(runtime_home), + "ENTRYPOINT_STUB_UID": str(os.getuid()), + "FLASK_PORT": "8084", + "LOG_LEVEL": "info", + "LOG_ROOT": str(tmp_path / "logs"), + "PATH": f"{bin_dir}:{env.get('PATH', '')}", + "RELEASE_VERSION": "test-release", + "TZ": "", + "USING_EXTERNAL_BYPASSER": "true", + } + ) + if extra_env: + env.update(extra_env) + + with _entrypoint_lock(): + result = subprocess.run( + [BASH_PATH, str(ENTRYPOINT_PATH)], + capture_output=True, + cwd=ENTRYPOINT_PATH.parent, + env=env, + text=True, + check=False, + ) + + return result, runtime_home_file, runtime_args_file, runtime_home + + +def test_entrypoint_rejects_tor_in_non_root_mode(tmp_path): + result, _, _, _ = _run_entrypoint(tmp_path, extra_env={"USING_TOR": "true"}) + + assert result.returncode == 1 + assert "USING_TOR=true requires the container to start as root." in result.stderr + assert "Non-root mode skips the privileged filesystem and network setup Tor depends on." in result.stderr + + +def test_entrypoint_non_root_mode_runs_with_stub_gunicorn(tmp_path): + result, runtime_home_file, runtime_args_file, runtime_home = _run_entrypoint(tmp_path) + + assert result.returncode == 0 + assert "Startup mode: non-root" in result.stdout + assert f"Runtime identity: shelfmark ({os.getuid()}:{os.getgid()})" in result.stdout + assert runtime_home.exists() + assert runtime_home_file.read_text() == str(runtime_home) + assert "shelfmark.main:app" in runtime_args_file.read_text() + + +def test_entrypoint_non_root_mode_requires_writable_config_dir(tmp_path): + readonly_config_dir = tmp_path / "readonly-config" + readonly_config_dir.mkdir() + readonly_config_dir.chmod(0o555) + + try: + result, _, _, _ = _run_entrypoint( + tmp_path, + extra_env={"CONFIG_DIR": str(readonly_config_dir)}, + ) + finally: + readonly_config_dir.chmod(0o755) + + assert result.returncode == 1 + assert f"Config directory is not writable in non-root mode: {readonly_config_dir}" in result.stdout + assert "Prepare ownership outside the container" in result.stdout