Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ RUN apt-get update -y --fix-missing --no-install-recommends \
curl \
rsync \
openssh-client \
git \
xz-utils \
&& apt-get upgrade -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
Expand Down
38 changes: 37 additions & 1 deletion start.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,48 @@
#!/usr/bin/env bash
set -euo pipefail

# If invoked as root (e.g. via `sudo ./start.sh` or accidental root shell
# inside the container), re-exec as the unprivileged hermeswebui user so the
# WebUI process never owns root-only file modes on bind-mounted state.
# Outside containers the EUID==0 case is rare; inside the production image
# the entrypoint drops to hermeswebui itself, so this is a defensive guard.
# Sourced from PR #1686 (@binhpt310) — Cluster 1 (operational hardening),
# extracted to a focused follow-up after the parent PR was deferred over a
# separate sibling-repo build-context concern unrelated to this fix.
#
# Four preconditions to fire (all must hold):
# - EUID == 0
# - hermeswebui user actually exists (id lookup)
# - sudo is on PATH (production image does not ship sudo, so this is the
# load-bearing no-op guard for the canonical container path)
# - sudo -u hermeswebui passes without prompting (NOPASSWD precheck)
# The NOPASSWD precheck via `sudo -n -u hermeswebui true` makes this a silent
# fall-through on host machines where the developer's hermeswebui user
# requires a password — better than exiting non-zero with `sudo: a password
# is required` and surprising the user who didn't ask for sudo behavior.
if [[ ${EUID:-$(id -u)} -eq 0 ]] && id hermeswebui >/dev/null 2>&1 \
&& command -v sudo >/dev/null 2>&1 \
&& sudo -n -u hermeswebui true 2>/dev/null; then
exec sudo -n -u hermeswebui "$0" "$@"
fi

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

if [[ -f "${REPO_ROOT}/.env" ]]; then
# Filter out shell-readonly vars (UID, GID, EUID, EGID, PPID) before
# `source`ing. docker-compose.yml's macOS instructions document
# `echo "UID=$(id -u)" >> .env` to set host UID/GID, which then crashes
# `start.sh` with "UID: readonly variable" when bash tries to assign to
# those names. Filtering them out lets the .env file carry those entries
# for docker-compose's variable substitution while keeping local invocation
# of start.sh working. The regression guard at
# tests/test_bootstrap_dotenv.py:181 still passes — the line below contains
# both `source` and `.env`.
# Sourced from PR #1686 (@binhpt310) — Cluster 1 (operational hardening),
# extracted to a focused follow-up after the parent PR was deferred.
set -a
# shellcheck source=/dev/null
source "${REPO_ROOT}/.env"
source <(grep -vE '^[[:space:]]*(export[[:space:]]+)?(UID|GID|EUID|EGID|PPID)=' "${REPO_ROOT}/.env")
set +a
fi

Expand Down
244 changes: 244 additions & 0 deletions tests/test_docker_env_readonly_vars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
"""Regression tests for start.sh's .env parsing handling readonly bash variables.

Background: docker-compose.yml's macOS instructions document
``echo "UID=$(id -u)" >> .env`` to set host UID/GID for bind-mount permission
fixing. The repo-level .env file is then read by both:

1. ``docker-compose.yml`` itself (for ${UID}/${GID} variable substitution)
2. ``start.sh`` (which `source`s the .env to load HERMES_WEBUI_* settings)
3. ``bootstrap.py`` (via ``_load_repo_dotenv()``)

The old ``set -a; source "${REPO_ROOT}/.env"; set +a`` pattern in start.sh
crashed with ``UID: readonly variable`` when the .env carried UID/GID lines —
because bash treats UID/GID/EUID/EGID/PPID as read-only. The fix filters
those readonly vars out of the source stream while leaving them intact in the
.env file for docker-compose's substitution.

Sourced from PR #1686 (@binhpt310) — extracted to a focused follow-up after
the parent PR was deferred over an unrelated sibling-repo build-context concern.

These tests pin:
- The filter pattern is present in start.sh
- The ``source`` + ``.env`` regression guard at
test_bootstrap_dotenv.py:181 still passes (both keywords present)
- All five readonly-name forms (UID, GID, EUID, EGID, PPID) are caught
- The optional ``export`` prefix on those names is also caught
- Non-readonly KEY=value lines in .env still load
"""
import re
import shutil
import subprocess
import textwrap
from pathlib import Path

import pytest


REPO_ROOT = Path(__file__).resolve().parents[1]
START_SH = (REPO_ROOT / "start.sh").read_text(encoding="utf-8")


class TestStartShReadonlyEnvFilter:
"""Pin start.sh's .env parser against the docker-compose macOS UID/GID flow."""

def test_start_sh_still_sources_env_regression_guard(self):
"""The bootstrap regression guard at test_bootstrap_dotenv.py:181
requires ``source`` AND ``.env`` to both appear in start.sh. After
the readonly-vars filter, both must still be present."""
assert "source" in START_SH, (
"start.sh must still call `source` to load .env "
"(regression guard, see tests/test_bootstrap_dotenv.py:181)"
)
assert ".env" in START_SH, (
"start.sh must still reference .env path "
"(regression guard, see tests/test_bootstrap_dotenv.py:181)"
)

def test_readonly_vars_filtered_before_source(self):
"""The readonly bash names (UID/GID/EUID/EGID/PPID) must be filtered
out of the .env stream before `source` reads it. The filter is a
``grep -vE`` against the .env file."""
# The filter must mention all five readonly names.
for var in ("UID", "GID", "EUID", "EGID", "PPID"):
assert var in START_SH, (
f"start.sh's .env filter must mention readonly var {var!r} "
"so that bash assignment to it does not crash with "
f"'{var}: readonly variable'"
)

def test_filter_pattern_uses_grep_or_equivalent(self):
"""Filter must use a pattern that strips readonly-var lines before
the bash `source` consumes them. `grep -vE` is the canonical form;
the assertion accepts any process-substitution-into-source shape."""
# Look for `source <(...UID...)` pattern. Note that the inner shell
# expression can contain its own parens (e.g. `(export[[:space:]]+)`),
# so we use a non-greedy `.*?` rather than `[^)]*`.
assert re.search(
r"source\s+<\(.*?UID.*?\)",
START_SH,
re.DOTALL,
), (
"start.sh's .env loader must filter readonly bash vars "
"(UID/GID/EUID/EGID/PPID) via `source <(grep -vE ...)` or "
"equivalent process-substitution form before `source`-ing "
"the .env file"
)

def test_filter_handles_optional_export_prefix(self):
"""The ``export`` prefix on env vars is optional but common. The
readonly-var filter must catch both bare and exported forms."""
assert "export" in START_SH, (
"start.sh's .env filter must account for the optional `export` "
"prefix on readonly-var assignments (e.g. `export UID=501`), "
"otherwise bash will still crash on the assignment"
)


@pytest.mark.skipif(shutil.which("bash") is None, reason="bash not available")
class TestStartShReadonlyEnvFilterBehavioral:
"""Behavioral tests — actually run bash to verify .env parsing succeeds.

These tests extract the .env loader block from start.sh and run it
against synthetic .env files. They guard against shell-quoting
regressions in the filter pattern itself (which the source-grep tests
above can't catch on their own).
"""

@staticmethod
def _extract_env_loader(start_sh: str) -> str:
"""Pull out the `if [[ -f "${REPO_ROOT}/.env" ]] ... fi` block."""
# Find the if-block with .env in it.
m = re.search(
r'(if \[\[ -f "\$\{REPO_ROOT\}/\.env" \]\]; then.*?^fi)\n',
start_sh,
re.DOTALL | re.MULTILINE,
)
assert m is not None, "could not locate .env loader block in start.sh"
return m.group(1)

def _run_loader(self, env_contents: str, tmp_path: Path) -> subprocess.CompletedProcess:
"""Write ``env_contents`` to a tmp .env and run start.sh's loader against it."""
env_file = tmp_path / ".env"
env_file.write_text(env_contents, encoding="utf-8")

loader = self._extract_env_loader(START_SH)
# Wrap loader in a tiny bash script that points REPO_ROOT at tmp_path
# and then echoes a few keys we care about.
script = textwrap.dedent(f"""\
set -euo pipefail
REPO_ROOT={str(tmp_path)!r}
{loader}
# Print loaded values (or "unset") for the test to assert against.
echo "PORT=${{HERMES_WEBUI_PORT:-unset}}"
echo "SOME=${{SOME_KEY:-unset}}"
echo "ANOTHER=${{ANOTHER:-unset}}"
echo "EXIT_OK"
""")

return subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
timeout=10,
)

def test_env_with_readonly_uid_gid_does_not_crash(self, tmp_path):
"""The exact macOS docker-compose pattern: UID + GID in .env."""
env_contents = textwrap.dedent("""\
UID=501
GID=20
HERMES_WEBUI_PORT=8888
SOME_KEY=normal-value
""")
result = self._run_loader(env_contents, tmp_path)
assert "EXIT_OK" in result.stdout, (
f"loader crashed on .env with readonly UID/GID. "
f"stderr: {result.stderr!r}"
)
assert "readonly variable" not in result.stderr, (
f".env loader still triggered readonly-variable crash: "
f"{result.stderr!r}"
)
# Non-readonly keys must still load.
assert "PORT=8888" in result.stdout
assert "SOME=normal-value" in result.stdout

def test_env_with_exported_readonly_does_not_crash(self, tmp_path):
"""`export UID=501` form must also be filtered."""
env_contents = textwrap.dedent("""\
export UID=501
export GID=20
HERMES_WEBUI_PORT=9000
""")
result = self._run_loader(env_contents, tmp_path)
assert "EXIT_OK" in result.stdout
assert "readonly variable" not in result.stderr
assert "PORT=9000" in result.stdout

def test_all_five_readonly_names_filtered(self, tmp_path):
"""UID, GID, EUID, EGID, PPID — all five must be filtered."""
env_contents = textwrap.dedent("""\
UID=501
GID=20
EUID=501
EGID=20
PPID=12345
HERMES_WEBUI_PORT=7777
""")
result = self._run_loader(env_contents, tmp_path)
assert "EXIT_OK" in result.stdout, (
f"loader crashed; stderr: {result.stderr!r}"
)
assert "readonly variable" not in result.stderr
assert "PORT=7777" in result.stdout

def test_normal_env_still_loads(self, tmp_path):
"""A .env without readonly vars must still load all keys."""
env_contents = textwrap.dedent("""\
HERMES_WEBUI_PORT=8787
SOME_KEY=hello
ANOTHER=world
""")
result = self._run_loader(env_contents, tmp_path)
assert "EXIT_OK" in result.stdout
assert "PORT=8787" in result.stdout
assert "SOME=hello" in result.stdout
assert "ANOTHER=world" in result.stdout

def test_export_prefix_strips_correctly(self, tmp_path):
"""`export FOO=bar` (non-readonly) loads `FOO=bar` after `set -a; source`."""
env_contents = textwrap.dedent("""\
UID=501
export ANOTHER=exported-value
HERMES_WEBUI_PORT=6543
""")
result = self._run_loader(env_contents, tmp_path)
assert "EXIT_OK" in result.stdout
assert "ANOTHER=exported-value" in result.stdout
assert "PORT=6543" in result.stdout


class TestDockerfileSystemPackages:
"""Pin Dockerfile system-package dependencies (#1686 Cluster 1)."""

def test_dockerfile_installs_xz_utils(self):
"""xz-utils is required to extract xz-compressed tarballs (e.g.
Node.js distribution archives) — without it, agent install paths
that download xz-compressed deps fail with `xz: Cannot exec`."""
dockerfile = (REPO_ROOT / "Dockerfile").read_text(encoding="utf-8")
assert re.search(r"\bxz-utils\b", dockerfile), (
"Dockerfile must install xz-utils (apt package) — without it, "
"any tarball decompression of .tar.xz files fails with "
"`xz: Cannot exec: No such file or directory`"
)

def test_dockerfile_installs_git(self):
"""git is needed for any agent-install path that clones a repo, plus
for the runtime ``git describe`` that powers WEBUI_VERSION detection
in non-baked images."""
dockerfile = (REPO_ROOT / "Dockerfile").read_text(encoding="utf-8")
assert re.search(r"^\s*git\s*\\?\s*$", dockerfile, re.MULTILINE), (
"Dockerfile must install git (apt package) — required for "
"version detection (`git describe`) and any agent install path "
"that clones a repo"
)
Loading