From f7e5e209fb983b6974004b3a539e7419f1a1b379 Mon Sep 17 00:00:00 2001 From: wangxhu Date: Fri, 19 Jun 2026 20:04:49 +0800 Subject: [PATCH] fix(cli): add retrieve tag filtering Expose the existing all-tags retrieve filter through the CLI with a --tags flag and pass comma-separated tags into ContextSeek.retrieve(). Document the CLI all-tags semantics and add regression tests for the parser, CLI JSON output, and direct SDK retrieval behavior. Fixes #47. --- docs/en/guides/cli.md | 3 +- docs/zh/guides/cli.md | 3 +- src/contextseek/cli/main.py | 7 ++++ tests/unit_tests/test_cli.py | 45 ++++++++++++++++++++++ tests/unit_tests/test_context_injection.py | 23 +++++++++++ 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/docs/en/guides/cli.md b/docs/en/guides/cli.md index 5d147a5..f9208ee 100644 --- a/docs/en/guides/cli.md +++ b/docs/en/guides/cli.md @@ -178,13 +178,14 @@ Features: | Command | Key args | Description | |---------|----------|-------------| | `add` | `--content`(req) `--source` `--tags` | Write a context item, returns `{id, stage}` | -| `retrieve` | `--query`(req) `--k`(10) `--full` `--json` | Ranked SearchHits; L1 summaries by default, `--full` for L0 | +| `retrieve` | `--query`(req) `--k`(10) `--full` `--json` `--tags` | Ranked SearchHits; L1 summaries by default, `--full` for L0. `--tags a,b` requires returned items to carry all listed tags | | `expand` | `--ids`(req, comma-separated) | Expand retrieved ids to L0 full content | | `items` | `--stage`(raw/extracted/knowledge/skill) | List all items in a scope | ```bash contextseek add --scope me/work --content "Prefer concise answers" --source cli --tags preference,language contextseek retrieve --scope me/work --query "language preference" --k 5 +contextseek retrieve --scope me/work --query "language preference" --tags preference,language contextseek retrieve --scope me/work --query "language preference" --k 3 --full contextseek expand --scope me/work --ids 1a2b3c,4d5e6f contextseek items --scope me/work --stage knowledge diff --git a/docs/zh/guides/cli.md b/docs/zh/guides/cli.md index 7b31dc4..fa9616e 100644 --- a/docs/zh/guides/cli.md +++ b/docs/zh/guides/cli.md @@ -178,13 +178,14 @@ sync 会跳过写入时的冲突检测以保证批量导入速度;导入后用 | 命令 | 关键参数 | 说明 | |------|----------|------| | `add` | `--content`(必填) `--source` `--tags` | 写入一条上下文,返回 `{id, stage}` | -| `retrieve` | `--query`(必填) `--k`(10) `--full` `--json` | 检索排序后的 SearchHit;默认 L1 摘要,`--full` 返回 L0 全文 | +| `retrieve` | `--query`(必填) `--k`(10) `--full` `--json` `--tags` | 检索排序后的 SearchHit;默认 L1 摘要,`--full` 返回 L0 全文。`--tags a,b` 要求返回项同时包含列出的所有标签 | | `expand` | `--ids`(必填,逗号分隔) | 把已检索 id 升档到 L0 全文 | | `items` | `--stage`(raw/extracted/knowledge/skill) | 列举 scope 内全部 item | ```bash contextseek add --scope me/work --content "偏好简洁回答" --source cli --tags preference,language contextseek retrieve --scope me/work --query "语言偏好" --k 5 +contextseek retrieve --scope me/work --query "语言偏好" --tags preference,language contextseek retrieve --scope me/work --query "语言偏好" --k 3 --full contextseek expand --scope me/work --ids 1a2b3c,4d5e6f contextseek items --scope me/work --stage knowledge diff --git a/src/contextseek/cli/main.py b/src/contextseek/cli/main.py index 9a0dcc5..7d60e4c 100644 --- a/src/contextseek/cli/main.py +++ b/src/contextseek/cli/main.py @@ -94,6 +94,11 @@ def build_parser() -> argparse.ArgumentParser: action="store_true", help="emit machine-readable JSON instead of human-readable output", ) + retrieve_parser.add_argument( + "--tags", + default="", + help="comma-separated tag filter; returned items must contain all tags", + ) retrieve_parser.add_argument( "--verbose", action="store_true", @@ -439,6 +444,7 @@ def run_cli( return 0 if args.command == "retrieve": + tags = [t.strip() for t in args.tags.split(",") if t.strip()] with warnings.catch_warnings(): warnings.simplefilter("ignore") response = ctx.retrieve( @@ -446,6 +452,7 @@ def run_cli( scope=args.scope, k=args.k, full=args.full, + tags=tags or None, ) output = { "items": [ diff --git a/tests/unit_tests/test_cli.py b/tests/unit_tests/test_cli.py index 20dedd1..274795e 100644 --- a/tests/unit_tests/test_cli.py +++ b/tests/unit_tests/test_cli.py @@ -53,6 +53,51 @@ def test_k_positive_accepted(self) -> None: assert args.k == 5 +class TestRetrieveTagFiltering: + def test_retrieve_accepts_tags_flag(self) -> None: + parser = build_parser() + args = parser.parse_args( + ["retrieve", "--scope", "t", "--query", "q", "--tags", "a,b"] + ) + + assert args.tags == "a,b" + + def test_retrieve_filters_results_by_all_tags(self) -> None: + ctx = ContextSeek() + kept = ctx.add( + "database backup runbook", + scope="t/p", + source="test", + tags=["ops", "database"], + ) + ctx.add( + "database onboarding guide", + scope="t/p", + source="test", + tags=["docs", "database"], + ) + out = StringIO() + + with redirect_stdout(out): + code = run_cli( + [ + "retrieve", + "--scope", + "t/p", + "--query", + "database", + "--tags", + "ops,database", + "--json", + ], + client=ctx, + ) + + payload = json.loads(out.getvalue()) + assert code == 0 + assert [item["id"] for item in payload["items"]] == [kept.id] + + class TestExpandOutput: def test_expand_reports_missing_ids(self) -> None: ctx = ContextSeek() diff --git a/tests/unit_tests/test_context_injection.py b/tests/unit_tests/test_context_injection.py index 4e0dfa0..c177bfe 100644 --- a/tests/unit_tests/test_context_injection.py +++ b/tests/unit_tests/test_context_injection.py @@ -38,6 +38,29 @@ def test_full_flag_returns_full_layer(self): for hit in response: assert hit.layer == "full" + def test_tags_filter_requires_all_tags(self): + ctx = ContextSeek() + kept = ctx.add( + "database backup runbook", + scope="t/p", + source="cli", + tags=["ops", "database"], + ) + other = ctx.add( + "database onboarding guide", + scope="t/p", + source="cli", + tags=["docs", "database"], + ) + + tagged_response = ctx.retrieve( + "database", scope="t/p", tags=["ops", "database"] + ) + unfiltered_response = ctx.retrieve("database", scope="t/p") + + assert [hit.item.id for hit in tagged_response] == [kept.id] + assert {hit.item.id for hit in unfiltered_response} == {kept.id, other.id} + class TestExpand: def test_expand_returns_full_items_without_scope(self):