Skip to content

Commit 1808f2f

Browse files
authored
Merge pull request #1 from longyue0521/dev
feat:实现文件上传、腾讯COS URL上传及查询接口
2 parents 57b1105 + 04c738b commit 1808f2f

38 files changed

+5910
-2
lines changed

.DS_Store

6 KB
Binary file not shown.

.gitignore

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ cython_debug/
173173
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174174
# and can be added to the global gitignore or merged into this file. For a more nuclear
175175
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
176-
#.idea/
176+
.idea/
177177

178178
# Abstra
179179
# Abstra is an AI-powered process automation framework.
@@ -186,7 +186,7 @@ cython_debug/
186186
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187187
# and can be added to the global gitignore or merged into this file. However, if you prefer,
188188
# you could uncomment the following to ignore the entire vscode folder
189-
# .vscode/
189+
.vscode/
190190

191191
# Ruff stuff:
192192
.ruff_cache/
@@ -205,3 +205,4 @@ cython_debug/
205205
marimo/_static/
206206
marimo/_lsp/
207207
__marimo__/
208+
.DS_Store

.pre-commit-config.yaml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
repos:
2+
# 1. 代码格式化(必须最先)- 使用项目环境确保版本一致
3+
- repo: local
4+
hooks:
5+
- id: ruff-format
6+
name: ruff format
7+
language: system
8+
entry: uv
9+
args: ["run", "ruff", "format", "."]
10+
pass_filenames: false
11+
types: [python]
12+
13+
# 2. 代码检查和自动修复 - 使用项目环境确保版本一致
14+
- repo: local
15+
hooks:
16+
- id: ruff-check
17+
name: ruff check
18+
language: system
19+
entry: uv
20+
args: ["run", "ruff", "check", ".", "--fix"]
21+
pass_filenames: false
22+
types: [python]
23+
24+
# 3. 类型检查(在代码修复后)
25+
- repo: local
26+
hooks:
27+
- id: mypy
28+
name: mypy
29+
language: system
30+
entry: uv
31+
args: ["run", "mypy", "app", "tests"]
32+
pass_filenames: false
33+
types: [python]
34+
35+
# 4. 重要检查(仅在push时)
36+
# - repo: local
37+
# hooks:
38+
# - id: pytest
39+
# name: pytest
40+
# language: system
41+
# entry: uv
42+
# args: ["run", "pytest", "-q"]
43+
# pass_filenames: false
44+
# stages: [pre-push]
45+
46+
# - id: pip-audit
47+
# name: pip-audit
48+
# language: system
49+
# entry: uv
50+
# args: ["run", "pip-audit", "--strict"]
51+
# pass_filenames: false
52+
# stages: [pre-push]
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
services:
2+
elasticsearch:
3+
image: docker.elastic.co/elasticsearch/elasticsearch:9.1.3
4+
environment:
5+
# 单节点模式配置
6+
- discovery.type=single-node
7+
# 内存配置
8+
- "ES_JAVA_OPTS=-Xms1g -Xmx1g"
9+
# 安全配置(测试环境,禁用安全认证)
10+
- xpack.security.enabled=false
11+
ports:
12+
- "9200:9200"
13+
volumes:
14+
- es-data:/usr/share/elasticsearch/data # 可选:数据持久化
15+
command: >
16+
bash -c "
17+
# 检查插件是否已安装
18+
if ! elasticsearch-plugin list | grep -q analysis-ik; then
19+
echo '🔧 安装IK插件...'
20+
elasticsearch-plugin install --batch https://release.infinilabs.com/analysis-ik/stable/elasticsearch-analysis-ik-9.1.3.zip
21+
echo '✅ IK插件安装完成'
22+
fi
23+
# 启动ES
24+
/usr/local/bin/docker-entrypoint.sh
25+
"
26+
27+
volumes:
28+
es-data:
29+
driver: local

.script/test.sh

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/bin/bash
2+
3+
# 支持中文的测试脚本
4+
BASE_URL="http://localhost:8080/api/v1"
5+
6+
# 获取脚本所在目录
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
9+
10+
# Python 辅助函数 - 正确显示中文
11+
json_format() {
12+
python3 -c "
13+
import json, sys
14+
try:
15+
data = json.loads(sys.stdin.read())
16+
print(json.dumps(data, indent=2, ensure_ascii=False))
17+
except Exception as e:
18+
print(f'JSON解析错误: {e}')
19+
print('原始内容:', sys.stdin.read())
20+
"
21+
}
22+
23+
echo "🚀 快速测试 KBase RAG API"
24+
echo "=========================="
25+
26+
# 查找PDF文件
27+
PDF_FILE=$(find "$PROJECT_ROOT/tests/fixtures/files/user" -name "*.pdf" | head -1)
28+
29+
if [[ -z "$PDF_FILE" ]]; then
30+
echo "❌ 未找到PDF文件,请在 tests/fixtures/files/user 下放置测试PDF"
31+
exit 1
32+
fi
33+
34+
echo "📄 测试文件: $PDF_FILE"
35+
echo
36+
37+
# 1. 健康检查
38+
echo "💓 健康检查..."
39+
curl --noproxy "*" -s "$BASE_URL/health" | json_format
40+
echo
41+
42+
# 2. 上传文件
43+
echo "📤 上传文件..."
44+
echo "正在上传: $(basename "$PDF_FILE")"
45+
46+
UPLOAD_RESPONSE=$(curl --noproxy "*" -s -X POST -F "file=@$PDF_FILE" "$BASE_URL/documents/upload-file")
47+
echo "$UPLOAD_RESPONSE" | json_format
48+
49+
# 检查上传是否成功
50+
SUCCESS=$(echo "$UPLOAD_RESPONSE" | python3 -c "
51+
import sys, json
52+
try:
53+
data = json.load(sys.stdin)
54+
print('✅ 上传成功!' if data.get('message') else '❌ 上传失败!')
55+
print(f'📄 任务ID: {data.get(\"task_id\", \"N/A\")}')
56+
except Exception as e:
57+
print(f'❌ 响应解析失败: {e}')
58+
")
59+
60+
echo "$SUCCESS"
61+
echo
62+
63+
echo "⏱️ 等待3秒让索引完成..."
64+
sleep 3
65+
66+
# 3. 搜索测试 - 使用中文查询
67+
echo "🔍 搜索测试..."
68+
69+
QUERIES=("并发")
70+
71+
for query in "${QUERIES[@]}"; do
72+
echo
73+
echo "🔎 搜索查询: \"$query\""
74+
75+
SEARCH_RESPONSE=$(curl --noproxy "*" -s -X POST \
76+
-H "Content-Type: application/json" \
77+
-d "{\"query\": \"$query\", \"top_k\": 2}" \
78+
"$BASE_URL/search")
79+
80+
echo "原始响应:"
81+
echo "$SEARCH_RESPONSE" | json_format
82+
83+
# 解析搜索结果 - 根据实际的SearchResponse和ContextChunk结构
84+
echo "$SEARCH_RESPONSE" | python3 -c "
85+
import sys, json
86+
try:
87+
data = json.load(sys.stdin)
88+
context = data.get('context', [])
89+
90+
print(f'')
91+
print(f'📊 搜索结果摘要:')
92+
print(f' 🔎 查询词: \"$query\"')
93+
print(f' 📝 结果数量: {len(context)} 条')
94+
95+
for i, item in enumerate(context[:2], 1):
96+
score = item.get('score', 0)
97+
file_id = item.get('file_metadata_id', 'Unknown')
98+
text_content = item.get('text', '')[:100]
99+
100+
print(f'')
101+
print(f' 结果 {i}:')
102+
print(f' 🎯 相关度: {score:.4f}' if isinstance(score, (int, float)) else f' 🎯 相关度: {score}')
103+
print(f' 📄 文档ID: {file_id}')
104+
print(f' 📝 内容: {text_content}...')
105+
106+
except Exception as e:
107+
print(f'❌ 搜索结果解析失败: {e}')
108+
print('原始响应:', str(data) if 'data' in locals() else 'N/A')
109+
"
110+
111+
# 只测试第一个查询,避免输出过长
112+
break
113+
done
114+
115+
echo
116+
echo "✅ 测试完成!"
117+
echo
118+
echo "💡 提示:"
119+
echo " - API 文档: http://localhost:8080/docs"
120+
echo " - 健康检查: http://localhost:8080/api/v1/health"

Makefile

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# 使用 bash 以支持更复杂的脚本
2+
SHELL := /bin/bash
3+
4+
# Python 版本号
5+
PYTHON_VERSION_TARGET := $(shell grep -E 'requires-python.*=' pyproject.toml | sed -E 's/.*">=([0-9]+\.[0-9]+).*/\1/')
6+
7+
# 默认目标: 显示帮助信息
8+
.PHONY: help
9+
help:
10+
@echo "Usage: make [target]"
11+
@echo ""
12+
@echo "Commands:"
13+
@echo " setup 🚀 一键设置完整的开发环境 (需要预先安装 uv)"
14+
@echo " check ✅ 运行所有代码质量检查 (格式化, lint, 类型检查)"
15+
@echo " fmt 🎨 格式化代码"
16+
@echo " lint ✨ 检查代码并自动修复问题"
17+
@echo " type 🔍 类型检查"
18+
@echo " test 🧪 运行测试并生成覆盖率报告"
19+
@echo " run ▶️ 启动开发服务器"
20+
@echo " pre-commit 🔄 运行预提交检查"
21+
@echo " audit 🛡️ 扫描依赖中的安全漏洞"
22+
@echo " e2e_up 🔄 启动集成测试环境"
23+
@echo " e2e_down 🔄 关闭集成测试环境"
24+
@echo " clean 🧹 清理临时文件和缓存"
25+
26+
# --- 主要的环境设置命令 ---
27+
28+
.PHONY: setup
29+
setup: _check_uv _check_python
30+
@echo "📦 步骤 1/3: 正在使用 Python ${PYTHON_VERSION_TARGET} 创建虚拟环境 .venv..."
31+
@uv venv --clear -p ${PYTHON_VERSION_TARGET}
32+
@echo "✅ 虚拟环境创建成功。"
33+
@echo ""
34+
@echo "⛓️ 步骤 2/3: 正在根据 uv.lock 同步依赖..."
35+
@uv sync --frozen
36+
@echo "✅ 依赖安装完成。"
37+
@echo ""
38+
@echo "🪝 步骤 3/3: 正在安装 Git 提交/推送钩子..."
39+
@uv run pre-commit install
40+
@uv run pre-commit install -t pre-push
41+
@echo "✅ Git 钩子安装成功。"
42+
@echo ""
43+
@echo "🎉 全部设置完成!请运行 source .venv/bin/activate 激活环境。"
44+
45+
# --- 日常开发命令 ---
46+
47+
.PHONY: fmt
48+
fmt:
49+
@uv run ruff format .
50+
51+
.PHONY: lint
52+
lint:
53+
@uv run ruff check . --fix
54+
55+
.PHONY: type
56+
type:
57+
@uv run mypy app tests
58+
59+
.PHONY: pre-commit
60+
pre-commit:
61+
@uv run pre-commit run --all-files
62+
63+
.PHONY: check
64+
check: fmt lint type
65+
66+
.PHONY: test
67+
test:
68+
@uv run pytest -q --cov=app --cov-report=term-missing --cov-report=xml
69+
70+
.PHONY: audit
71+
audit:
72+
@uv run pip-audit --strict
73+
74+
.PHONY: run
75+
run:
76+
@uv run uvicorn app.main:app --host 0.0.0.0 --port 8080
77+
78+
.PHONY: e2e_up
79+
e2e_up:
80+
docker compose -p kbase -f .script/integration_test_compose.yml up -d
81+
82+
.PHONY: e2e_down
83+
e2e_down:
84+
docker compose -p kbase -f .script/integration_test_compose.yml down -v
85+
86+
.PHONY: clean
87+
clean:
88+
@uv run pyclean .
89+
90+
# --- 内部帮助目标 ---
91+
92+
.PHONY: _check_uv
93+
_check_uv:
94+
@if ! command -v uv &> /dev/null; then \
95+
echo "❌ 'uv' 命令未找到。"; \
96+
echo "请先安装 uv 包管理器:"; \
97+
echo "详细安装说明请查看: README.md"; \
98+
exit 1; \
99+
fi
100+
101+
.PHONY: _check_python
102+
_check_python:
103+
@if [ -z "${PYTHON_VERSION_TARGET}" ]; then \
104+
echo "❌ 错误: 无法从 pyproject.toml 中解析 'requires-python' 版本。"; \
105+
exit 1; \
106+
fi
107+
@if ! uv python find ${PYTHON_VERSION_TARGET} &> /dev/null; then \
108+
echo "ℹ️ 未找到 Python ${PYTHON_VERSION_TARGET}。正在使用 uv 自动安装..."; \
109+
if ! uv python install ${PYTHON_VERSION_TARGET}; then \
110+
echo "❌ Python ${PYTHON_VERSION_TARGET} 安装失败。请检查网络或 uv 文档。"; \
111+
exit 1; \
112+
fi; \
113+
echo "✅ Python ${PYTHON_VERSION_TARGET} 安装成功。"; \
114+
fi

0 commit comments

Comments
 (0)