diff --git a/Dockerfile b/Dockerfile index 693d61e2c6..4688298d2c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,8 @@ RUN apt-get update -y --fix-missing --no-install-recommends \ curl \ rsync \ openssh-client \ + git \ + xz-utils \ && apt-get upgrade -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/start.sh b/start.sh index 59e0d65a73..a1406663a7 100755 --- a/start.sh +++ b/start.sh @@ -1,12 +1,48 @@ #!/usr/bin/env bash set -euo pipefail +# If invoked as root (e.g. via `sudo ./start.sh` or accidental root shell +# inside the container), re-exec as the unprivileged hermeswebui user so the +# WebUI process never owns root-only file modes on bind-mounted state. +# Outside containers the EUID==0 case is rare; inside the production image +# the entrypoint drops to hermeswebui itself, so this is a defensive guard. +# Sourced from PR #1686 (@binhpt310) — Cluster 1 (operational hardening), +# extracted to a focused follow-up after the parent PR was deferred over a +# separate sibling-repo build-context concern unrelated to this fix. +# +# Four preconditions to fire (all must hold): +# - EUID == 0 +# - hermeswebui user actually exists (id lookup) +# - sudo is on PATH (production image does not ship sudo, so this is the +# load-bearing no-op guard for the canonical container path) +# - sudo -u hermeswebui passes without prompting (NOPASSWD precheck) +# The NOPASSWD precheck via `sudo -n -u hermeswebui true` makes this a silent +# fall-through on host machines where the developer's hermeswebui user +# requires a password — better than exiting non-zero with `sudo: a password +# is required` and surprising the user who didn't ask for sudo behavior. +if [[ ${EUID:-$(id -u)} -eq 0 ]] && id hermeswebui >/dev/null 2>&1 \ + && command -v sudo >/dev/null 2>&1 \ + && sudo -n -u hermeswebui true 2>/dev/null; then + exec sudo -n -u hermeswebui "$0" "$@" +fi + REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [[ -f "${REPO_ROOT}/.env" ]]; then + # Filter out shell-readonly vars (UID, GID, EUID, EGID, PPID) before + # `source`ing. docker-compose.yml's macOS instructions document + # `echo "UID=$(id -u)" >> .env` to set host UID/GID, which then crashes + # `start.sh` with "UID: readonly variable" when bash tries to assign to + # those names. Filtering them out lets the .env file carry those entries + # for docker-compose's variable substitution while keeping local invocation + # of start.sh working. The regression guard at + # tests/test_bootstrap_dotenv.py:181 still passes — the line below contains + # both `source` and `.env`. + # Sourced from PR #1686 (@binhpt310) — Cluster 1 (operational hardening), + # extracted to a focused follow-up after the parent PR was deferred. set -a # shellcheck source=/dev/null - source "${REPO_ROOT}/.env" + source <(grep -vE '^[[:space:]]*(export[[:space:]]+)?(UID|GID|EUID|EGID|PPID)=' "${REPO_ROOT}/.env") set +a fi diff --git a/tests/test_docker_env_readonly_vars.py b/tests/test_docker_env_readonly_vars.py new file mode 100644 index 0000000000..226f0b8dc9 --- /dev/null +++ b/tests/test_docker_env_readonly_vars.py @@ -0,0 +1,244 @@ +"""Regression tests for start.sh's .env parsing handling readonly bash variables. + +Background: docker-compose.yml's macOS instructions document +``echo "UID=$(id -u)" >> .env`` to set host UID/GID for bind-mount permission +fixing. The repo-level .env file is then read by both: + + 1. ``docker-compose.yml`` itself (for ${UID}/${GID} variable substitution) + 2. ``start.sh`` (which `source`s the .env to load HERMES_WEBUI_* settings) + 3. ``bootstrap.py`` (via ``_load_repo_dotenv()``) + +The old ``set -a; source "${REPO_ROOT}/.env"; set +a`` pattern in start.sh +crashed with ``UID: readonly variable`` when the .env carried UID/GID lines — +because bash treats UID/GID/EUID/EGID/PPID as read-only. The fix filters +those readonly vars out of the source stream while leaving them intact in the +.env file for docker-compose's substitution. + +Sourced from PR #1686 (@binhpt310) — extracted to a focused follow-up after +the parent PR was deferred over an unrelated sibling-repo build-context concern. + +These tests pin: + - The filter pattern is present in start.sh + - The ``source`` + ``.env`` regression guard at + test_bootstrap_dotenv.py:181 still passes (both keywords present) + - All five readonly-name forms (UID, GID, EUID, EGID, PPID) are caught + - The optional ``export`` prefix on those names is also caught + - Non-readonly KEY=value lines in .env still load +""" +import re +import shutil +import subprocess +import textwrap +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[1] +START_SH = (REPO_ROOT / "start.sh").read_text(encoding="utf-8") + + +class TestStartShReadonlyEnvFilter: + """Pin start.sh's .env parser against the docker-compose macOS UID/GID flow.""" + + def test_start_sh_still_sources_env_regression_guard(self): + """The bootstrap regression guard at test_bootstrap_dotenv.py:181 + requires ``source`` AND ``.env`` to both appear in start.sh. After + the readonly-vars filter, both must still be present.""" + assert "source" in START_SH, ( + "start.sh must still call `source` to load .env " + "(regression guard, see tests/test_bootstrap_dotenv.py:181)" + ) + assert ".env" in START_SH, ( + "start.sh must still reference .env path " + "(regression guard, see tests/test_bootstrap_dotenv.py:181)" + ) + + def test_readonly_vars_filtered_before_source(self): + """The readonly bash names (UID/GID/EUID/EGID/PPID) must be filtered + out of the .env stream before `source` reads it. The filter is a + ``grep -vE`` against the .env file.""" + # The filter must mention all five readonly names. + for var in ("UID", "GID", "EUID", "EGID", "PPID"): + assert var in START_SH, ( + f"start.sh's .env filter must mention readonly var {var!r} " + "so that bash assignment to it does not crash with " + f"'{var}: readonly variable'" + ) + + def test_filter_pattern_uses_grep_or_equivalent(self): + """Filter must use a pattern that strips readonly-var lines before + the bash `source` consumes them. `grep -vE` is the canonical form; + the assertion accepts any process-substitution-into-source shape.""" + # Look for `source <(...UID...)` pattern. Note that the inner shell + # expression can contain its own parens (e.g. `(export[[:space:]]+)`), + # so we use a non-greedy `.*?` rather than `[^)]*`. + assert re.search( + r"source\s+<\(.*?UID.*?\)", + START_SH, + re.DOTALL, + ), ( + "start.sh's .env loader must filter readonly bash vars " + "(UID/GID/EUID/EGID/PPID) via `source <(grep -vE ...)` or " + "equivalent process-substitution form before `source`-ing " + "the .env file" + ) + + def test_filter_handles_optional_export_prefix(self): + """The ``export`` prefix on env vars is optional but common. The + readonly-var filter must catch both bare and exported forms.""" + assert "export" in START_SH, ( + "start.sh's .env filter must account for the optional `export` " + "prefix on readonly-var assignments (e.g. `export UID=501`), " + "otherwise bash will still crash on the assignment" + ) + + +@pytest.mark.skipif(shutil.which("bash") is None, reason="bash not available") +class TestStartShReadonlyEnvFilterBehavioral: + """Behavioral tests — actually run bash to verify .env parsing succeeds. + + These tests extract the .env loader block from start.sh and run it + against synthetic .env files. They guard against shell-quoting + regressions in the filter pattern itself (which the source-grep tests + above can't catch on their own). + """ + + @staticmethod + def _extract_env_loader(start_sh: str) -> str: + """Pull out the `if [[ -f "${REPO_ROOT}/.env" ]] ... fi` block.""" + # Find the if-block with .env in it. + m = re.search( + r'(if \[\[ -f "\$\{REPO_ROOT\}/\.env" \]\]; then.*?^fi)\n', + start_sh, + re.DOTALL | re.MULTILINE, + ) + assert m is not None, "could not locate .env loader block in start.sh" + return m.group(1) + + def _run_loader(self, env_contents: str, tmp_path: Path) -> subprocess.CompletedProcess: + """Write ``env_contents`` to a tmp .env and run start.sh's loader against it.""" + env_file = tmp_path / ".env" + env_file.write_text(env_contents, encoding="utf-8") + + loader = self._extract_env_loader(START_SH) + # Wrap loader in a tiny bash script that points REPO_ROOT at tmp_path + # and then echoes a few keys we care about. + script = textwrap.dedent(f"""\ + set -euo pipefail + REPO_ROOT={str(tmp_path)!r} + {loader} + # Print loaded values (or "unset") for the test to assert against. + echo "PORT=${{HERMES_WEBUI_PORT:-unset}}" + echo "SOME=${{SOME_KEY:-unset}}" + echo "ANOTHER=${{ANOTHER:-unset}}" + echo "EXIT_OK" + """) + + return subprocess.run( + ["bash", "-c", script], + capture_output=True, + text=True, + timeout=10, + ) + + def test_env_with_readonly_uid_gid_does_not_crash(self, tmp_path): + """The exact macOS docker-compose pattern: UID + GID in .env.""" + env_contents = textwrap.dedent("""\ + UID=501 + GID=20 + HERMES_WEBUI_PORT=8888 + SOME_KEY=normal-value + """) + result = self._run_loader(env_contents, tmp_path) + assert "EXIT_OK" in result.stdout, ( + f"loader crashed on .env with readonly UID/GID. " + f"stderr: {result.stderr!r}" + ) + assert "readonly variable" not in result.stderr, ( + f".env loader still triggered readonly-variable crash: " + f"{result.stderr!r}" + ) + # Non-readonly keys must still load. + assert "PORT=8888" in result.stdout + assert "SOME=normal-value" in result.stdout + + def test_env_with_exported_readonly_does_not_crash(self, tmp_path): + """`export UID=501` form must also be filtered.""" + env_contents = textwrap.dedent("""\ + export UID=501 + export GID=20 + HERMES_WEBUI_PORT=9000 + """) + result = self._run_loader(env_contents, tmp_path) + assert "EXIT_OK" in result.stdout + assert "readonly variable" not in result.stderr + assert "PORT=9000" in result.stdout + + def test_all_five_readonly_names_filtered(self, tmp_path): + """UID, GID, EUID, EGID, PPID — all five must be filtered.""" + env_contents = textwrap.dedent("""\ + UID=501 + GID=20 + EUID=501 + EGID=20 + PPID=12345 + HERMES_WEBUI_PORT=7777 + """) + result = self._run_loader(env_contents, tmp_path) + assert "EXIT_OK" in result.stdout, ( + f"loader crashed; stderr: {result.stderr!r}" + ) + assert "readonly variable" not in result.stderr + assert "PORT=7777" in result.stdout + + def test_normal_env_still_loads(self, tmp_path): + """A .env without readonly vars must still load all keys.""" + env_contents = textwrap.dedent("""\ + HERMES_WEBUI_PORT=8787 + SOME_KEY=hello + ANOTHER=world + """) + result = self._run_loader(env_contents, tmp_path) + assert "EXIT_OK" in result.stdout + assert "PORT=8787" in result.stdout + assert "SOME=hello" in result.stdout + assert "ANOTHER=world" in result.stdout + + def test_export_prefix_strips_correctly(self, tmp_path): + """`export FOO=bar` (non-readonly) loads `FOO=bar` after `set -a; source`.""" + env_contents = textwrap.dedent("""\ + UID=501 + export ANOTHER=exported-value + HERMES_WEBUI_PORT=6543 + """) + result = self._run_loader(env_contents, tmp_path) + assert "EXIT_OK" in result.stdout + assert "ANOTHER=exported-value" in result.stdout + assert "PORT=6543" in result.stdout + + +class TestDockerfileSystemPackages: + """Pin Dockerfile system-package dependencies (#1686 Cluster 1).""" + + def test_dockerfile_installs_xz_utils(self): + """xz-utils is required to extract xz-compressed tarballs (e.g. + Node.js distribution archives) — without it, agent install paths + that download xz-compressed deps fail with `xz: Cannot exec`.""" + dockerfile = (REPO_ROOT / "Dockerfile").read_text(encoding="utf-8") + assert re.search(r"\bxz-utils\b", dockerfile), ( + "Dockerfile must install xz-utils (apt package) — without it, " + "any tarball decompression of .tar.xz files fails with " + "`xz: Cannot exec: No such file or directory`" + ) + + def test_dockerfile_installs_git(self): + """git is needed for any agent-install path that clones a repo, plus + for the runtime ``git describe`` that powers WEBUI_VERSION detection + in non-baked images.""" + dockerfile = (REPO_ROOT / "Dockerfile").read_text(encoding="utf-8") + assert re.search(r"^\s*git\s*\\?\s*$", dockerfile, re.MULTILINE), ( + "Dockerfile must install git (apt package) — required for " + "version detection (`git describe`) and any agent install path " + "that clones a repo" + )