diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index dc9130f..2eaaf02 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -14,6 +14,10 @@ Complete this section if the PR changes backend selection or search execution. +### Compatibility Notes + + + - [ ] `--backend` behavior is explicit opt-in. - [ ] Linux/POSIX backend warnings or fallbacks do not affect Windows default search. - [ ] `-p/--path`, `--scope`, `-n/--max`, `--max-seconds`, `--stats`, `--log`, and `--caller codex --no-interactive` still work through the changed path. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2dd01a8..1586542 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -41,6 +41,10 @@ exits with `124`. Backend work should keep the current Windows behavior as the baseline. +For `--backend` changes, include a Compatibility Notes section in the PR body +that states what changed for `--stats`, `--log`, JSON output, exit codes, and +the Windows default path. If a field is added, call it out explicitly. + Expected boundary: - `se query` on Windows continues to use Everything / `es.exe`. diff --git a/README.md b/README.md index 744b5dc..7ee9aec 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ se --check --json |----------|------| | [fzf](https://github.com/junegunn/fzf) | `se -f` でインタラクティブ絞り込み | | [bat](https://github.com/sharkdp/bat) | fzf プレビューのシンタックスハイライト | +| [fd](https://github.com/sharkdp/fd) | `--backend fd` でファイル名/パス検索(POSIX 系の主用途。Windows でも明示指定可) | +| [ripgrep](https://github.com/BurntSushi/ripgrep) | `--backend rg-files` で `rg --files` + Python 正規表現フィルタ | ## Install @@ -65,7 +67,7 @@ Copy-Item .\src\se.cmd "$HOME\bin\" ln -s "$(pwd)/src/se" ~/bin/se ``` -> Everything は Windows 専用です。他の OS では動作しません。 +> Everything / `es.exe` backend は Windows 専用です。Linux/POSIX 系では `--backend fd` または `--backend rg-files` を明示指定してください。 ## First run @@ -99,6 +101,22 @@ se -p "D:\data" query # パス限定 se -f query # fzf でインタラクティブ選択(bat プレビュー付き) ``` +### Backend selection + +Windows の既定動作は従来どおり Everything / `es.exe` です。 +Linux/POSIX 系では自動判定せず、明示的に backend を指定します。 + +```bash +se query # Windows: Everything / es.exe +se --backend everything query # explicit Windows backend +se --backend fd query # fd filename/path search +se --backend rg-files query # rg --files + Python regex filter +``` + +Windows で `--backend fd` / `--backend rg-files` を明示指定すれば、PATH 上の `fd.exe` / `rg.exe` も使えます(実機 smoke / portable fallback 用)。既定 backend は変わりません。 + +`plocate` / `arch-linux` backend と自動 fallback、backend-aware `--check` は #56 の後続 PR で扱います。 + ### スコープ検索 ```powershell @@ -165,7 +183,7 @@ Timeout は crash ではなく expected failure。`--log` 指定時は `timed_ou | `--init` | `.se/` と `~/.serc` を生成 | | `--doctor` | 環境診断・自動修正・警告 | | `--check` | 読み取り専用ヘルスチェック | -| `--json` | `--check` の結果を JSON 出力(通常検索結果の JSON 化ではない) | +| `--json` | `--check` または通常検索結果を JSON 出力 | | `-p PATH` | 検索パスを限定 | | `-n NUM` | 最大結果数 | | `-f` | fzf でインタラクティブ絞り込み | @@ -176,7 +194,8 @@ Timeout は crash ではなく expected failure。`--log` 指定時は `timed_ou | `--caller {codex,pi,human}` | 実行プロファイル指定 | | `--no-interactive` | fzf 禁止 | | `--max-seconds N` | グローバル検索タイムアウト(N > 0) | -| `--stats` | elapsed / results / timed_out を stderr に出力 | +| `--stats` | backend / elapsed / results / timed_out を stderr に出力 | +| `--backend {everything,fd,rg-files}` | 検索 backend を明示指定 | ## Configuration diff --git a/src/se.py b/src/se.py index 949b4b2..4719108 100644 --- a/src/se.py +++ b/src/se.py @@ -16,7 +16,10 @@ import argparse import json +import ntpath import os +import platform +import posixpath import re import subprocess import sys @@ -469,16 +472,26 @@ def get_allowed_roots(caller: str | None) -> list[str] | None: return roots if roots else None +_WINDOWS_DRIVE_RE = re.compile(r"^[a-zA-Z]:") + + +def _is_windows_like_path(s: str) -> bool: + return bool(_WINDOWS_DRIVE_RE.match(s)) or "\\" in s + + def _is_under_root(path: str, root: str) -> bool: r"""Check whether *path* lives under *root* (case-insensitive, Windows-aware). - Uses os.path.commonpath to avoid false positives: - e.g. root=dir1 should NOT match dir12. + Selects ntpath or posixpath semantics from the input path shape so the + comparison works correctly regardless of the host OS. """ - p = os.path.normcase(os.path.normpath(path)) - r = os.path.normcase(os.path.normpath(root)) + pathmod = ntpath if ( + _is_windows_like_path(path) or _is_windows_like_path(root) + ) else posixpath try: - return os.path.commonpath([p, r]) == r + p = pathmod.normcase(pathmod.normpath(path)) + r = pathmod.normcase(pathmod.normpath(root)) + return pathmod.commonpath([p, r]) == r except ValueError: # Different drives on Windows return False @@ -540,6 +553,10 @@ def __init__(self, msg: str, partial: list[str] | None = None): self.partial = partial or [] +class BackendConfigError(Exception): + """Raised when a selected backend cannot be used in the current environment.""" + + def es_search(regex: str, path: str | None, n: int | None, timeout: float | None = None) -> list[str]: es = get_es_path() cmd = [es, "-r", regex] @@ -576,6 +593,139 @@ def es_search_multi_path(regex: str, paths: list[str], n: int | None, deadline: return all_results +def resolve_backend(requested: str | None) -> str: + """Resolve and validate the search backend for this process.""" + system = platform.system() + if requested is None: + if system == "Windows": + return "everything" + raise BackendConfigError( + "se: non-Windows platforms require --backend fd or --backend rg-files" + ) + if requested == "everything" and system != "Windows": + raise BackendConfigError("se: backend 'everything' is Windows-only") + return requested + + +def validate_everything_backend() -> None: + """Fail fast if the configured es.exe path is missing. + + The default search path resolution and Everything IPC live underneath + es.exe; if the binary cannot be resolved, every search would otherwise + surface a confusing FileNotFoundError from subprocess. + """ + es = get_es_path() + p = Path(es) + if p.exists(): + return + if not p.is_absolute() and shutil.which(es): + return + raise BackendConfigError( + f"se: es.exe not found at {es}. " + "Run `se --doctor` or set es_path in ~/.serc." + ) + + +def _backend_invoke(argv: list[str], timeout: float | None) -> tuple[str, int, str]: + """Invoke a backend command and return stdout, returncode, stderr.""" + try: + result = subprocess.run( + argv, + capture_output=True, + text=True, + encoding="utf-8", + timeout=timeout, + ) + except subprocess.TimeoutExpired: + raise SearchTimeout(f"backend timed out ({timeout}s)") + return result.stdout, result.returncode, result.stderr + + +def _remaining_timeout(deadline: float | None) -> float | None: + if deadline is None: + return None + left = deadline - time.monotonic() + if left <= 0: + raise SearchTimeout("global search budget exceeded") + return left + + +def _backend_paths(path: str | None, search_paths: list[str] | None) -> list[str] | None: + if search_paths: + return search_paths + if path: + return [path] + return None + + +def _limit_results(results: list[str], n: int | None) -> list[str]: + return results[:n] if n is not None else results + + +def _search_fd(regex: str, paths: list[str] | None, n: int | None, deadline: float | None) -> list[str]: + fd = shutil.which("fd") + if not fd: + raise BackendConfigError("se: backend 'fd' requires fd in PATH") + cmd = [fd, "--color", "never", "--absolute-path", "--full-path"] + if n is not None: + cmd += ["--max-results", str(n)] + cmd.append(regex) + if paths: + cmd.extend(paths) + stdout, rc, stderr = _backend_invoke(cmd, _remaining_timeout(deadline)) + if rc != 0 and stderr: + raise BackendConfigError(f"se: fd backend failed: {stderr.strip()}") + return _limit_results([l for l in stdout.splitlines() if l.strip()], n) + + +def _search_rg_files(regex: str, paths: list[str] | None, n: int | None, deadline: float | None) -> list[str]: + rg = shutil.which("rg") + if not rg: + raise BackendConfigError("se: backend 'rg-files' requires rg in PATH") + cmd = [rg, "--files", "--color", "never"] + if paths: + cmd.extend(paths) + stdout, rc, stderr = _backend_invoke(cmd, _remaining_timeout(deadline)) + if rc != 0 and stderr: + raise BackendConfigError(f"se: rg-files backend failed: {stderr.strip()}") + try: + pattern = re.compile(regex) + except re.error as e: + raise BackendConfigError(f"se: invalid regex for rg-files backend: {e}") + results: list[str] = [] + cwd = os.getcwd() + for line in stdout.splitlines(): + if not line.strip(): + continue + candidate = line if os.path.isabs(line) else os.path.abspath(os.path.join(cwd, line)) + if pattern.search(candidate): + results.append(candidate) + if n is not None and len(results) >= n: + break + return results + + +def backend_search( + backend: str, + regex: str, + path: str | None, + search_paths: list[str] | None, + n: int | None, + deadline: float | None, +) -> list[str]: + if backend == "everything": + validate_everything_backend() + if search_paths: + return es_search_multi_path(regex, search_paths, n, deadline=deadline) + return es_search(regex, path, n, timeout=_remaining_timeout(deadline)) + paths = _backend_paths(path, search_paths) + if backend == "fd": + return _search_fd(regex, paths, n, deadline) + if backend == "rg-files": + return _search_rg_files(regex, paths, n, deadline) + raise BackendConfigError(f"se: unknown backend: {backend}") + + def filter_results(results: list[str], allowed_roots: list[str] | None) -> list[str]: if not allowed_roots: return results @@ -1112,17 +1262,22 @@ def cmd_search(args) -> None: print(regex) return + try: + backend = resolve_backend(args.backend) + except BackendConfigError as e: + print(str(e), file=sys.stderr) + sys.exit(2) + # --- Search with timeout --- deadline = time.monotonic() + args.max_seconds if args.max_seconds else None timed_out = False start = time.perf_counter() try: - if search_paths: - results = es_search_multi_path(regex, search_paths, args.max, deadline=deadline) - else: - timeout = (deadline - time.monotonic()) if deadline else None - results = es_search(regex, args.path, args.max, timeout=timeout) + results = backend_search(backend, regex, args.path, search_paths, args.max, deadline) + except BackendConfigError as e: + print(str(e), file=sys.stderr) + sys.exit(2) except SearchTimeout as e: results = e.partial timed_out = True @@ -1139,7 +1294,7 @@ def cmd_search(args) -> None: if args.stats or agent_mode: partial_flag = " partial=True" if timed_out and results else "" migemo_flag = " migemo_fallback=True" if migemo_fallback else "" - print(f"[se] elapsed={elapsed:.3f}s results={len(results)} max={args.max} timed_out={timed_out} caller={caller}{partial_flag}{migemo_flag}", file=sys.stderr) + print(f"[se] backend={backend} elapsed={elapsed:.3f}s results={len(results)} max={args.max} timed_out={timed_out} caller={caller}{partial_flag}{migemo_flag}", file=sys.stderr) # Output if args.json: @@ -1151,6 +1306,7 @@ def cmd_search(args) -> None: "elapsed_s": round(elapsed, 3), "timed_out": timed_out, "migemo_fallback": migemo_fallback, + "backend": backend, } if args.scope: out["scope"] = args.scope @@ -1179,6 +1335,7 @@ def cmd_search(args) -> None: "result_count": len(results), "timed_out": timed_out, "migemo_fallback": migemo_fallback, + "backend": backend, "elapsed_s": round(elapsed, 3), "results": results[:50], }) @@ -1222,6 +1379,15 @@ def main(): parser.add_argument("--stats", action="store_true", help="Print elapsed time and result count to stderr") parser.add_argument("--check", action="store_true", help="Read-only health check (no auto-fix)") parser.add_argument("--json", action="store_true", help="JSON output (for --check and search results)") + parser.add_argument( + "--backend", + choices=["everything", "fd", "rg-files"], + default=None, + help=( + "Search backend (default: everything on Windows). " + "Non-Windows platforms must specify --backend explicitly." + ), + ) args = parser.parse_args() diff --git a/tests/test_backend.py b/tests/test_backend.py new file mode 100644 index 0000000..f869bbb --- /dev/null +++ b/tests/test_backend.py @@ -0,0 +1,213 @@ +"""Tests for explicit backend selection.""" +import json +import subprocess +import sys +import time +from pathlib import Path +from unittest.mock import patch + +import pytest + +# Add src to path +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src")) +import se + + +class Args: + human = True + caller = None + no_interactive = False + fzf = False + query = ["needle"] + scope = None + path = None + literal = True + expand_only = False + max = None + max_seconds = None + stats = False + json = False + log = False + backend = None + + +def make_args(**overrides): + args = Args() + for key, value in overrides.items(): + setattr(args, key, value) + return args + + +class TestResolveBackend: + def test_windows_default_is_everything(self): + with patch.object(se.platform, "system", return_value="Windows"): + assert se.resolve_backend(None) == "everything" + + def test_non_windows_default_requires_explicit_backend(self): + with patch.object(se.platform, "system", return_value="Linux"): + with pytest.raises(se.BackendConfigError) as exc: + se.resolve_backend(None) + assert "require --backend" in str(exc.value) + + def test_everything_is_windows_only(self): + with patch.object(se.platform, "system", return_value="Linux"): + with pytest.raises(se.BackendConfigError) as exc: + se.resolve_backend("everything") + assert "Windows-only" in str(exc.value) + + def test_fd_and_rg_files_are_explicit_backends(self): + with patch.object(se.platform, "system", return_value="Linux"): + assert se.resolve_backend("fd") == "fd" + assert se.resolve_backend("rg-files") == "rg-files" + + +class TestBackendSearch: + def test_everything_uses_existing_single_path_function(self): + with patch.object(se, "validate_everything_backend"), \ + patch.object(se, "es_search", return_value=["one"]) as mock_search: + assert se.backend_search("everything", "needle", "C:/root", None, 5, None) == ["one"] + mock_search.assert_called_once_with("needle", "C:/root", 5, timeout=None) + + def test_everything_uses_existing_multi_path_function(self): + with patch.object(se, "validate_everything_backend"), \ + patch.object(se, "es_search_multi_path", return_value=["one"]) as mock_search: + assert se.backend_search("everything", "needle", None, ["C:/a", "C:/b"], 5, 123.0) == ["one"] + mock_search.assert_called_once_with("needle", ["C:/a", "C:/b"], 5, deadline=123.0) + + def test_everything_missing_es_exe_raises_backend_config_error(self): + with patch.object(se, "get_es_path", return_value="C:/nonexistent/es.exe"), \ + patch.object(se, "es_search") as mock_search: + with pytest.raises(se.BackendConfigError) as exc: + se.backend_search("everything", "needle", None, None, 5, None) + assert "es.exe not found" in str(exc.value) + assert "--doctor" in str(exc.value) + mock_search.assert_not_called() + + def test_everything_accepts_path_resolved_es_exe(self): + with patch.object(se, "get_es_path", return_value="es.exe"), \ + patch.object(se.shutil, "which", return_value="C:/bin/es.exe"): + se.validate_everything_backend() + + def test_fd_missing_fails_clearly(self): + with patch.object(se.shutil, "which", return_value=None): + with pytest.raises(se.BackendConfigError) as exc: + se.backend_search("fd", "needle", None, None, 5, None) + assert "requires fd in PATH" in str(exc.value) + + def test_rg_files_missing_fails_clearly(self): + with patch.object(se.shutil, "which", return_value=None): + with pytest.raises(se.BackendConfigError) as exc: + se.backend_search("rg-files", "needle", None, None, 5, None) + assert "requires rg in PATH" in str(exc.value) + + def test_fd_invokes_command_with_limit_and_path(self): + with patch.object(se.shutil, "which", return_value="fd"), \ + patch.object(se, "_backend_invoke", return_value=("/tmp/one\n/tmp/two\n", 0, "")) as invoke: + results = se.backend_search("fd", "needle", "/tmp", None, 1, None) + assert results == ["/tmp/one"] + argv, timeout = invoke.call_args[0] + assert argv[:4] == ["fd", "--color", "never", "--absolute-path"] + assert "--full-path" in argv + assert "--max-results" in argv + assert "1" in argv + assert argv[-2:] == ["needle", "/tmp"] + assert timeout is None + + def test_rg_files_filters_results_in_python(self): + stdout = "src/se.py\nREADME.md\ntests/test_backend.py\n" + with patch.object(se.shutil, "which", return_value="rg"), \ + patch.object(se, "_backend_invoke", return_value=(stdout, 0, "")): + results = se.backend_search("rg-files", r"se\.py$", None, None, 5, None) + assert len(results) == 1 + assert results[0].endswith(str(Path("src") / "se.py")) + + def test_backend_timeout_maps_to_search_timeout(self): + deadline = time.monotonic() - 1 + with patch.object(se.shutil, "which", return_value="fd"): + with pytest.raises(se.SearchTimeout): + se.backend_search("fd", "needle", None, None, 5, deadline) + + def test_fd_nonzero_rc_with_stderr_raises_backend_config_error(self): + with patch.object(se.shutil, "which", return_value="fd"), \ + patch.object(se, "_backend_invoke", + return_value=("", 2, "fd: invalid regex")): + with pytest.raises(se.BackendConfigError) as exc: + se.backend_search("fd", "needle", None, None, 5, None) + assert "fd backend failed" in str(exc.value) + assert "invalid regex" in str(exc.value) + + def test_rg_files_nonzero_rc_with_stderr_raises_backend_config_error(self): + with patch.object(se.shutil, "which", return_value="rg"), \ + patch.object(se, "_backend_invoke", + return_value=("", 2, "rg: io error")): + with pytest.raises(se.BackendConfigError) as exc: + se.backend_search("rg-files", "needle", None, None, 5, None) + assert "rg-files backend failed" in str(exc.value) + assert "io error" in str(exc.value) + + def test_everything_single_path_uses_remaining_timeout(self): + deadline = time.monotonic() - 1 + with patch.object(se, "validate_everything_backend"), \ + patch.object(se, "es_search") as mock_search: + with pytest.raises(se.SearchTimeout): + se.backend_search("everything", "needle", "C:/root", None, 5, deadline) + mock_search.assert_not_called() + + +class TestCmdSearchObservation: + def test_stats_include_backend(self, capsys): + args = make_args(backend="fd", stats=True) + with patch.object(se, "ensure_init"), \ + patch.object(se, "backend_search", return_value=["/tmp/needle"]), \ + patch.object(se.platform, "system", return_value="Linux"): + se.cmd_search(args) + err = capsys.readouterr().err + assert "backend=fd" in err + + def test_json_includes_backend(self, capsys): + args = make_args(backend="rg-files", json=True) + with patch.object(se, "ensure_init"), \ + patch.object(se, "backend_search", return_value=["/tmp/needle"]), \ + patch.object(se.platform, "system", return_value="Linux"): + se.cmd_search(args) + out = capsys.readouterr().out + data = json.loads(out) + assert data["backend"] == "rg-files" + + def test_log_includes_backend(self): + args = make_args(backend="fd", log=True) + with patch.object(se, "ensure_init"), \ + patch.object(se, "backend_search", return_value=["/tmp/needle"]), \ + patch.object(se, "append_log") as append_log, \ + patch.object(se, "detect_session_id", return_value=None), \ + patch.object(se.platform, "system", return_value="Linux"): + se.cmd_search(args) + entry = append_log.call_args[0][0] + assert entry["backend"] == "fd" + + def test_expand_only_does_not_require_backend_on_linux(self, capsys): + args = make_args(expand_only=True, query=["needle"]) + with patch.object(se, "ensure_init"), \ + patch.object(se.platform, "system", return_value="Linux"), \ + patch.object(se, "backend_search") as backend_search: + se.cmd_search(args) + assert capsys.readouterr().out.strip() + backend_search.assert_not_called() + + def test_non_interactive_still_rejects_fzf_before_search(self): + args = make_args(backend="fd", fzf=True, no_interactive=True) + with patch.object(se, "ensure_init"), pytest.raises(SystemExit) as exc: + se.cmd_search(args) + assert exc.value.code == 2 + + +def test_invalid_backend_choice_exits_2(): + script = Path(__file__).resolve().parent.parent / "src" / "se.py" + r = subprocess.run( + [sys.executable, str(script), "--backend", "nope", "query"], + capture_output=True, + text=True, + timeout=10, + ) + assert r.returncode == 2 + assert "invalid choice" in r.stderr