Skip to content

Commit 1f1dba7

Browse files
committed
Merge branch 'main' of github.com:ecodeclub/kbase into actions
2 parents 96596bd + 9348b72 commit 1f1dba7

29 files changed

+2648
-577
lines changed

.DS_Store

-6 KB
Binary file not shown.

.coveragerc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[run]
2+
source = app
3+
omit =
4+
*/__init__.py
5+
*/tests/*
6+
*/__pycache__/*
7+
app/main.py
8+
app/config/settings.py
9+
10+
[report]
11+
exclude_lines =
12+
pragma: no cover
13+
def __repr__
14+
if self.debug:
15+
raise AssertionError
16+
raise NotImplementedError
17+
if __name__ == .__main__.:
18+
if TYPE_CHECKING:

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,4 +205,11 @@ cython_debug/
205205
marimo/_static/
206206
marimo/_lsp/
207207
__marimo__/
208+
# macOS
208209
.DS_Store
210+
.DS_Store?
211+
._*
212+
.Spotlight-V100
213+
.Trashes
214+
ehthumbs.db
215+
Thumbs.db

.pre-commit-config.yaml

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,19 @@ repos:
3333
types: [python]
3434

3535
# 4. 重要检查(仅在push时)
36-
# - repo: local
37-
# hooks:
38-
# - id: pytest
39-
# name: pytest
40-
# language: system
41-
# entry: uv
42-
# args: ["run", "pytest", "-q"]
43-
# pass_filenames: false
44-
# stages: [pre-push]
45-
46-
# - id: pip-audit
47-
# name: pip-audit
48-
# language: system
49-
# entry: uv
50-
# args: ["run", "pip-audit", "--strict"]
51-
# pass_filenames: false
52-
# stages: [pre-push]
36+
# - repo: local
37+
# hooks:
38+
# - id: pytest
39+
# name: pytest
40+
# language: system
41+
# entry: uv
42+
# args: ["run", "pytest", "-q"]
43+
# pass_filenames: false
44+
# stages: [pre-push]
45+
# - id: pip-audit
46+
# name: pip-audit
47+
# language: system
48+
# entry: uv
49+
# args: ["run", "pip-audit", "--strict"]
50+
# pass_filenames: false
51+
# stages: [pre-push]

Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ help:
1616
@echo " lint ✨ 检查代码并自动修复问题"
1717
@echo " type 🔍 类型检查"
1818
@echo " test 🧪 运行测试并生成覆盖率报告"
19+
@echo " cov 🧪 运行测试并打开覆盖率报告"
1920
@echo " run ▶️ 启动开发服务器"
2021
@echo " pre-commit 🔄 运行预提交检查"
2122
@echo " audit 🛡️ 扫描依赖中的安全漏洞"
@@ -65,7 +66,12 @@ check: fmt lint type
6566

6667
.PHONY: test
6768
test:
68-
@uv run pytest -q --cov=app --cov-report=term-missing --cov-report=xml
69+
@uv run pytest -v --cov=app --cov-report=term-missing --cov-report=xml
70+
71+
.PHONY: cov
72+
cov: test
73+
@echo "🌐 打开覆盖率报告..."
74+
@open htmlcov/index.html || xdg-open htmlcov/index.html || echo "请打开: htmlcov/index.html"
6975

7076
.PHONY: audit
7177
audit:

README.md

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,18 @@ make run
8080

8181
## 📋 开发命令
8282

83-
| 命令 | 描述 |
84-
|---------------------|-----------------|
85-
| `make setup` | 🚀 一键设置完整开发环境 |
86-
| `make check` | ✅ 运行所有代码质量检查 |
87-
| `make test` | 🧪 运行测试并生成覆盖率报告 |
88-
| `make run` | ▶️ 启动开发服务器 |
89-
| `make fmt` | 🎨 格式化代码 |
90-
| `make lint` | ✨ 检查代码并自动修复 |
91-
| `make type` | 🔍 类型检查 |
92-
| `make audit` | 🛡️ 扫描安全漏洞 |
93-
| `make clean` | 🧹 清理临时文件 |
83+
| 命令 | 描述 |
84+
|--------------|-----------------|
85+
| `make setup` | 🚀 一键设置完整开发环境 |
86+
| `make check` | ✅ 运行所有代码质量检查 |
87+
| `make test` | 🧪 运行测试并生成覆盖率报告 |
88+
| `make cov` | 🧪 运行测试并打开覆盖率报告 |
89+
| `make run` | ▶️ 启动开发服务器 |
90+
| `make fmt` | 🎨 格式化代码 |
91+
| `make lint` | ✨ 检查代码并自动修复 |
92+
| `make type` | 🔍 类型检查 |
93+
| `make audit` | 🛡️ 扫描安全漏洞 |
94+
| `make clean` | 🧹 清理临时文件 |
9495
| `make docker_build` | docker打包 |
9596
| `make docker_run` | 运行 docker 镜像 |
9697

@@ -105,14 +106,15 @@ make run
105106

106107
### 主要端点
107108

108-
| 端点 | 方法 | 描述 |
109-
|------|------|------|
110-
| `/` | GET | API 根路径和信息 |
111-
| `/api/v1/health` | GET | 健康检查 |
112-
| `/api/v1/documents/upload-file` | POST | 本地文件上传 |
113-
| `/api/v1/documents/upload-from-url` | POST | 从COS URL上传 |
114-
| `/api/v1/search` | POST | 文档搜索 |
115-
| `/api/v1/tasks/{task_id}` | GET | 查询任务状态 |
109+
| 端点 | 方法 | 描述 |
110+
|-------------------------------------|------|----------------|
111+
| `/` | GET | API 根路径和信息 |
112+
| `/api/v1/health` | GET | 健康检查 |
113+
| `/api/v1/documents/upload-file` | POST | 本地文件上传 |
114+
| `/api/v1/documents/upload-from-url` | POST | 从COS URL上传 |
115+
| `/api/v1/documents/save` | POST | 以JSON格式字符串上传文档 |
116+
| `/api/v1/search` | POST | 文档搜索 |
117+
| `/api/v1/tasks/{task_id}` | GET | 查询任务状态 |
116118

117119
### 健康检查
118120
```bash

app/config/settings.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,15 @@ class ElasticsearchSettings(BaseModel):
3333
"""Elasticsearch 相关配置"""
3434

3535
url: str
36-
metadata_index: str = "file_metadatas"
37-
chunk_index: str = "file_chunks"
36+
number_of_shards: int
37+
number_of_replicas: int
38+
index_max_result_window: int
39+
index_refresh_interval: str
40+
index_option_type: str
41+
index_option_m: int
42+
index_option_ef_construction: int
43+
metadata_index_suffix: str
44+
chunk_index_suffix: str
3845
request_timeout: int = 15
3946

4047

@@ -44,7 +51,6 @@ class EmbedderSettings(BaseModel):
4451
model_name: str
4552
dimensions: int
4653
similarity_metric: str
47-
index_type: str
4854

4955

5056
class RerankerSettings(BaseModel):
@@ -82,9 +88,16 @@ class RetrievalSettings(BaseModel):
8288

8389
multiplier: int = Field(5, description="召回倍数配置")
8490
vector_weight: float = Field(2.0, description="向量搜索权重")
91+
vector_similarity: float = Field(0.7, description="相似度")
8592
text_weight: float = Field(1.0, description="文本搜索权重")
8693

8794

95+
class SearchSettings(BaseModel):
96+
"""搜索相关配置"""
97+
98+
max_top_k: int = Field(50, description="最大top_k值限制")
99+
100+
88101
class TencentOssSettings(BaseModel):
89102
"""
90103
腾讯云对象存储相关配置。
@@ -114,6 +127,7 @@ class Settings(BaseSettings):
114127
storage: StorageSettings
115128
upload: UploadSettings
116129
retrieval: RetrievalSettings
130+
search: SearchSettings
117131

118132
@property
119133
def cos_config(self) -> CosConfig:

app/domain/document.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http:#www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
113
from dataclasses import dataclass, field
214
from typing import Any
315

416

517
@dataclass
618
class Document:
19+
index_prefix: str
720
path: str
821
size: int
922
category: str | None = None

app/domain/search.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,54 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from dataclasses import dataclass, field
15+
from dataclasses import dataclass
16+
from enum import Enum
1617
from typing import Any
1718

1819

19-
@dataclass
20-
class SearchRequest:
21-
"""封装搜索请求,新增 mode 和 filters。"""
20+
class SearchMode(str, Enum):
21+
"""基础查询模式"""
22+
23+
VECTOR = "vector" # 向量搜索
24+
TERM = "term" # 精确匹配
25+
MATCH = "match" # 模糊匹配
26+
27+
28+
@dataclass(frozen=True)
29+
class SearchCondition:
30+
"""搜索条件 - 值对象"""
2231

23-
query: str
24-
top_k: int = 5
25-
filters: dict[str, Any] | None = field(default_factory=dict)
32+
field_name: str
33+
mode: SearchMode
34+
value: str | int | float | bool
35+
36+
37+
@dataclass(frozen=True)
38+
class SearchParameters:
39+
"""搜索参数 - 值对象"""
40+
41+
index_name: str
42+
conditions: list[SearchCondition]
43+
limit: int = 10
44+
filters: dict[str, Any] | None = None
2645

2746

2847
@dataclass
29-
class ContextChunk:
30-
"""定义一个上下文块,用于最终返回结果。"""
48+
class DocumentResult:
49+
"""文档结果 - 值对象"""
3150

32-
text: str
33-
file_metadata_id: str
51+
content: dict[str, Any]
3452
score: float
53+
id: str | None = None
3554

3655

3756
@dataclass
38-
class SearchResponse:
39-
"""定义最终的搜索响应格式。"""
57+
class SearchResult:
58+
"""搜索结果 - 聚合根"""
59+
60+
documents: list[DocumentResult]
61+
total_count: int
62+
search_time_ms: int
4063

41-
context: list[ContextChunk]
64+
def is_empty(self) -> bool:
65+
return len(self.documents) == 0

app/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from app.utils.loaders.dispatcher import DispatcherLoader
3131
from app.utils.rerankers.bge import BgeReranker
3232
from app.utils.splitters import RecursiveCharacterTextSplitter
33-
from app.web.handler import DocumentHandler
33+
from app.web.document import DocumentHandler
3434

3535
# 配置标准日志
3636
logging.basicConfig(
@@ -62,8 +62,6 @@
6262
splitter=splitter,
6363
embedder=embedder,
6464
reranker=reranker,
65-
metadata_index=settings.elasticsearch.metadata_index,
66-
chunk_index=settings.elasticsearch.chunk_index,
6765
settings=settings,
6866
)
6967
logger.info("✅ 核心服务组件初始化成功。")

0 commit comments

Comments
 (0)