diff --git a/.gitignore b/.gitignore index be04c734..0f3c03c9 100644 --- a/.gitignore +++ b/.gitignore @@ -253,6 +253,19 @@ outputs/ fastapi_app/invite_codes.txt models/* +!models/.gitkeep +!models/README.md +!models/RMBG-2.0/ +!models/sam3/ +!models/sam3-official/ +models/RMBG-2.0/* +!models/RMBG-2.0/.gitkeep +models/sam3/* +!models/sam3/.gitkeep +models/sam3-official/* +!models/sam3-official/sam3/ +models/sam3-official/sam3/* +!models/sam3-official/sam3/.gitkeep outputs/* tmps/* data/* diff --git a/dataflow_agent/toolkits/multimodaltool/bg_tool.py b/dataflow_agent/toolkits/multimodaltool/bg_tool.py index fed84b03..96eba8f9 100644 --- a/dataflow_agent/toolkits/multimodaltool/bg_tool.py +++ b/dataflow_agent/toolkits/multimodaltool/bg_tool.py @@ -12,16 +12,16 @@ # - get_svg_render_desc: 返回 SVG 渲染工具的说明文本 # ================================================================ # BRIA-RMBG 2.0 高质量抠图工具 -# - 模型:RMBG 2.0(ONNX) -# - 依赖:onnxruntime, pillow, numpy +# - 模型:RMBG 2.0(Transformers / ModelScope 目录) +# - 依赖:transformers, pillow, numpy # ================================================================ from __future__ import annotations import os import subprocess +import sys from pathlib import Path -import platform import numpy as np from PIL import Image, ImageFilter @@ -31,9 +31,10 @@ from dataflow_agent.logger import get_logger CURRENT_DIR = Path(__file__).resolve().parent -# Allow override via env var (e.g. in Docker: RMBG_MODEL_PATH=/app/models/RMBG-2.0) +PROJECT_ROOT = CURRENT_DIR.parents[2] +# Allow override via env var (e.g. RMBG_MODEL_PATH=/app/models/RMBG-2.0) _env_model_path = os.environ.get("RMBG_MODEL_PATH") -MODEL_PATH = Path(_env_model_path) if _env_model_path else CURRENT_DIR / "onnx" / "model.onnx" +MODEL_PATH = Path(_env_model_path) if _env_model_path else PROJECT_ROOT / "models" / "RMBG-2.0" OUTPUT_DIR = CURRENT_DIR # 进程级抠图模型缓存:按 model_path 复用 BriaRMBG2Remover 实例 @@ -41,47 +42,50 @@ log = get_logger(__name__) +def _has_rmbg_model(model_path: Path) -> bool: + return (model_path / "config.json").exists() and (model_path / "model.safetensors").exists() + + def ensure_model(model_path: Path) -> None: """ 确保本地存在 RMBG-2.0 模型权重。 - 若 ``model_path`` 对应的文件不存在,则通过 ModelScope 下载 - ``AI-ModelScope/RMBG-2.0`` 模型到该路径所在目录。 + 当前实现期望 ``model_path`` 是一个 HuggingFace / ModelScope 风格的 + 模型目录;若目录缺失,则自动下载 ``AI-ModelScope/RMBG-2.0``。 参数 ---- model_path: - 本地模型文件路径(通常为 ONNX 或 transformers 权重)。 + 本地模型目录路径。 异常 ---- FileNotFoundError 当下载结束后仍未在 ``model_path`` 处找到模型文件时抛出。 """ - if model_path.exists(): + if model_path.is_file(): + model_path = model_path.parent + + if _has_rmbg_model(model_path): log.info(f"模型已存在: {model_path}") return - log.info("未检测到模型文件,正在下载 RMBG-2.0 权重...") - - # 确保目录存在 - model_path.parent.mkdir(parents=True, exist_ok=True) - # 判断当前系统是否为Windows - is_windows = platform.system().lower() == "windows" - # Windows用双引号包裹路径,Linux/macOS用单引号(保持原有逻辑) - quote = '"' if is_windows else "'" - # 直接下载到目标目录 - cmd = ( - f"modelscope download " - f"--model AI-ModelScope/RMBG-2.0 " - f"--local_dir {quote}{model_path}{quote} " - ) - os.system(cmd) + log.info("未检测到 RMBG 模型目录,正在下载 RMBG-2.0 权重...") + + model_path.mkdir(parents=True, exist_ok=True) + download_code = ( + "from modelscope import snapshot_download\n" + "snapshot_download(" + "'AI-ModelScope/RMBG-2.0', " + "local_dir=r'''%s''', " + "allow_patterns=['*.json', '*.py', '*.safetensors']" + ")\n" + ) % str(model_path) + subprocess.run([sys.executable, "-c", download_code], check=True) - # 检查下载是否成功 - if not model_path.exists(): + if not _has_rmbg_model(model_path): raise FileNotFoundError( - f"模型下载失败:未找到 {model_path}。\n" + f"模型下载失败:目录 {model_path} 缺少 config.json 或 model.safetensors。\n" "请检查 ModelScope 或手动下载。" ) @@ -109,6 +113,8 @@ def __init__(self, model_path: Path | None = None, output_dir: Path | None = Non 输出目录。若为 None,则使用当前文件所在目录。 """ self.model_path = Path(model_path) if model_path else MODEL_PATH + if self.model_path.is_file(): + self.model_path = self.model_path.parent self.output_dir = Path(output_dir) if output_dir else OUTPUT_DIR ensure_model(self.model_path) diff --git a/dataflow_agent/workflow/__init__.py b/dataflow_agent/workflow/__init__.py index b5082610..99b56b89 100644 --- a/dataflow_agent/workflow/__init__.py +++ b/dataflow_agent/workflow/__init__.py @@ -6,11 +6,37 @@ from .paper2video_subprocess import run_paper2video_via_subprocess from .registry import RuntimeRegistry -# ---- 1. 自动发现并导入所有工作流定义模块 --------------------------------- _pkg_path = Path(__file__).resolve().parent -for py in _pkg_path.glob("wf_*.py"): - mod_name = f"{__name__}.{py.stem}" - importlib.import_module(mod_name) +_imported_workflow_modules: set[str] = set() + + +def _workflow_module_names() -> list[str]: + return sorted( + f"{__name__}.{py.stem}" + for py in _pkg_path.glob("wf_*.py") + ) + + +def _import_workflow_modules_until(name: str | None = None) -> None: + """ + Lazy-load workflow definition modules. + + Importing every workflow during FastAPI startup pulls in heavyweight + dependencies such as torchvision/transformers and makes the backend look + dead for a long time. Only import enough modules to satisfy the requested + workflow registration, and fall back to importing everything only when the + caller explicitly asks for the full list. + """ + if name is not None and name in RuntimeRegistry._workflows: + return + + for mod_name in _workflow_module_names(): + if mod_name in _imported_workflow_modules: + continue + importlib.import_module(mod_name) + _imported_workflow_modules.add(mod_name) + if name is not None and name in RuntimeRegistry._workflows: + return # ---- 2. 工作流的统一接口 --------------------------------------------- def get_workflow(name: str): @@ -23,6 +49,7 @@ def get_workflow(name: str): Returns: Callable: 用于构建该工作流图的工厂函数 """ + _import_workflow_modules_until(name) return RuntimeRegistry.get(name) @@ -37,8 +64,9 @@ async def run_workflow(name: str, state): # ---- 3. 工作流注册信息公开接口 ------------------------------------------- -list_workflows = RuntimeRegistry.all +def list_workflows(): + _import_workflow_modules_until() + return RuntimeRegistry.all() # ---- 3. 工作流注册信息公开接口 ------------------------------------------- # 提供所有已注册工作流的列表,便于外部查询与 introspection -list_workflows = RuntimeRegistry.all \ No newline at end of file diff --git a/deploy/app_config.sh b/deploy/app_config.sh index 678f9b66..c4806376 100644 --- a/deploy/app_config.sh +++ b/deploy/app_config.sh @@ -3,8 +3,24 @@ # Shared FastAPI runtime config for deploy scripts. # Environment variables can override these defaults. +DEPLOY_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$DEPLOY_DIR/.." && pwd)" + +APP_HOST="${APP_HOST:-0.0.0.0}" APP_PORT="${APP_PORT:-8000}" -APP_WORKERS="${APP_WORKERS:-2}" +APP_WORKERS="${APP_WORKERS:-1}" APP_CONDA_ENV="${APP_CONDA_ENV:-}" -APP_PYTHON="${APP_PYTHON:-}" -CONDA_SH="${CONDA_SH:-/root/miniconda3/etc/profile.d/conda.sh}" +APP_PYTHON="${APP_PYTHON:-/opt/conda/bin/python}" +APP_FALLBACK_PYTHON="${APP_FALLBACK_PYTHON:-/opt/conda/bin/python}" +CONDA_SH="${CONDA_SH:-/opt/conda/etc/profile.d/conda.sh}" + +# Keep the legacy external repo as a fallback only. +PAPER2ANY_ASSET_ROOT="${PAPER2ANY_ASSET_ROOT:-/mnt/paper2any/lz/github-proj/Paper2Any}" +MODEL_SERVER_ENV_FILE="${MODEL_SERVER_ENV_FILE:-logs/model_servers.env}" + +SAM3_SERVER_URLS="${SAM3_SERVER_URLS:-http://127.0.0.1:8021}" +# Leave model paths empty by default so deploy/start.sh can prefer repo-local models first. +SAM3_HOME="${SAM3_HOME:-}" +SAM3_CHECKPOINT_PATH="${SAM3_CHECKPOINT_PATH:-}" +SAM3_BPE_PATH="${SAM3_BPE_PATH:-}" +RMBG_MODEL_PATH="${RMBG_MODEL_PATH:-}" diff --git a/deploy/start.sh b/deploy/start.sh index b25c6e05..b9643d5d 100755 --- a/deploy/start.sh +++ b/deploy/start.sh @@ -13,8 +13,97 @@ PID_FILE="$LOG_DIR/uvicorn.pid" mkdir -p "$LOG_DIR" +maybe_source_model_server_env() { + local env_file="$PROJECT_ROOT/$MODEL_SERVER_ENV_FILE" + if [ -f "$env_file" ]; then + # shellcheck disable=SC1090 + source "$env_file" + fi +} + +validate_python() { + local python_bin="$1" + [ -n "$python_bin" ] || return 1 + [ -x "$python_bin" ] || return 1 + + "$python_bin" - <<'PY' >/dev/null 2>&1 +import cv2 +import fastapi +import supabase +import torch +import uvicorn +PY +} + +choose_first_existing() { + local candidate + for candidate in "$@"; do + if [ -n "$candidate" ] && [ -e "$candidate" ]; then + printf '%s\n' "$candidate" + return 0 + fi + done + return 1 +} + +prepare_runtime_env() { + local resolved_sam3_checkpoint + local resolved_sam3_bpe + local resolved_sam3_home + local resolved_rmbg + + resolved_sam3_checkpoint="$( + choose_first_existing \ + "${SAM3_CHECKPOINT_PATH:-}" \ + "$PROJECT_ROOT/models/sam3/sam3.pt" \ + "$PAPER2ANY_ASSET_ROOT/models/sam3/sam3.pt" \ + || true + )" + resolved_sam3_bpe="$( + choose_first_existing \ + "${SAM3_BPE_PATH:-}" \ + "$PROJECT_ROOT/models/sam3/bpe_simple_vocab_16e6.txt.gz" \ + "$PAPER2ANY_ASSET_ROOT/models/sam3/bpe_simple_vocab_16e6.txt.gz" \ + "$PAPER2ANY_ASSET_ROOT/sam3_src/sam3/assets/bpe_simple_vocab_16e6.txt.gz" \ + || true + )" + resolved_sam3_home="$( + choose_first_existing \ + "${SAM3_HOME:-}" \ + "$PROJECT_ROOT/models/sam3-official/sam3" \ + "$PAPER2ANY_ASSET_ROOT/sam3_src" \ + || true + )" + resolved_rmbg="$( + choose_first_existing \ + "${RMBG_MODEL_PATH:-}" \ + "$PROJECT_ROOT/models/RMBG-2.0" \ + "$PAPER2ANY_ASSET_ROOT/models/RMBG-2.0" \ + || true + )" + + if [ -n "$resolved_sam3_checkpoint" ]; then + export SAM3_CHECKPOINT_PATH="$resolved_sam3_checkpoint" + export PAPER2DRAWIO_SAM3_CHECKPOINT_PATH="$resolved_sam3_checkpoint" + fi + if [ -n "$resolved_sam3_bpe" ]; then + export SAM3_BPE_PATH="$resolved_sam3_bpe" + export PAPER2DRAWIO_SAM3_BPE_PATH="$resolved_sam3_bpe" + fi + if [ -n "$resolved_sam3_home" ]; then + export SAM3_HOME="$resolved_sam3_home" + fi + if [ -n "$resolved_rmbg" ]; then + export RMBG_MODEL_PATH="$resolved_rmbg" + fi + if [ -n "${SAM3_SERVER_URLS:-}" ]; then + export SAM3_SERVER_URLS + fi +} + resolve_python() { - if [ -n "$APP_PYTHON" ] && [ -x "$APP_PYTHON" ]; then + if [ -n "$APP_PYTHON" ] && validate_python "$APP_PYTHON"; then + export APP_PYTHON return 0 fi @@ -23,19 +112,32 @@ resolve_python() { source "$CONDA_SH" conda activate "$APP_CONDA_ENV" >/dev/null 2>&1 || { echo "Failed to activate conda env: $APP_CONDA_ENV" - exit 1 + return 1 } - APP_PYTHON="$(command -v python)" - export APP_PYTHON - return 0 - fi - APP_PYTHON="$(command -v python3 || command -v python || true)" - export APP_PYTHON - if [ -z "$APP_PYTHON" ]; then - echo "No python interpreter found." - exit 1 + APP_PYTHON="$(command -v python || true)" + if validate_python "$APP_PYTHON"; then + export APP_PYTHON + return 0 + fi + + echo "Conda env '$APP_CONDA_ENV' is present but missing required runtime deps, falling back." + conda deactivate >/dev/null 2>&1 || true fi + + for candidate in \ + "${APP_FALLBACK_PYTHON:-}" \ + "$(command -v python3 || true)" \ + "$(command -v python || true)"; do + if validate_python "$candidate"; then + APP_PYTHON="$candidate" + export APP_PYTHON + return 0 + fi + done + + echo "No usable python runtime found. Checked conda env '$APP_CONDA_ENV' and fallback interpreters." + return 1 } find_port_listener_pids() { @@ -45,11 +147,23 @@ find_port_listener_pids() { return 0 fi - ss -ltnp 2>/dev/null \ - | awk -v port=":$port" '$4 ~ port { print $NF }' \ - | grep -oE 'pid=[0-9]+' \ - | cut -d= -f2 \ - | sort -u + if command -v ss >/dev/null 2>&1; then + ss -ltnp 2>/dev/null \ + | awk -v port=":$port" '$4 ~ port { print $NF }' \ + | grep -oE 'pid=[0-9]+' \ + | cut -d= -f2 \ + | sort -u + return 0 + fi + + if command -v netstat >/dev/null 2>&1; then + netstat -ltnp 2>/dev/null \ + | awk -v port=":$port" '$4 ~ port { split($7, parts, "/"); if (parts[1] ~ /^[0-9]+$/) print parts[1] }' \ + | sort -u + return 0 + fi + + return 1 } wait_for_port_listener() { @@ -75,15 +189,39 @@ if [ -n "$existing_pids" ]; then exit 1 fi -resolve_python +maybe_source_model_server_env +resolve_python || exit 1 +prepare_runtime_env -start_cmd=("$APP_PYTHON" -m uvicorn fastapi_app.main:app --workers "$APP_WORKERS" --port "$APP_PORT" --log-level info) +start_cmd=( + "$APP_PYTHON" -m uvicorn fastapi_app.main:app + --host "$APP_HOST" + --workers "$APP_WORKERS" + --port "$APP_PORT" + --log-level info +) # 使用 setsid + nohup 彻底脱离当前 shell,避免多 worker 被会话一起带走。 if command -v setsid >/dev/null 2>&1; then - nohup setsid "${start_cmd[@]}" >> "$LOG_DIR/app.log" 2>&1 < /dev/null & + nohup setsid env \ + SAM3_SERVER_URLS="${SAM3_SERVER_URLS:-}" \ + SAM3_HOME="${SAM3_HOME:-}" \ + SAM3_CHECKPOINT_PATH="${SAM3_CHECKPOINT_PATH:-}" \ + SAM3_BPE_PATH="${SAM3_BPE_PATH:-}" \ + PAPER2DRAWIO_SAM3_CHECKPOINT_PATH="${PAPER2DRAWIO_SAM3_CHECKPOINT_PATH:-}" \ + PAPER2DRAWIO_SAM3_BPE_PATH="${PAPER2DRAWIO_SAM3_BPE_PATH:-}" \ + RMBG_MODEL_PATH="${RMBG_MODEL_PATH:-}" \ + "${start_cmd[@]}" >> "$LOG_DIR/app.log" 2>&1 < /dev/null & else - nohup "${start_cmd[@]}" >> "$LOG_DIR/app.log" 2>&1 < /dev/null & + nohup env \ + SAM3_SERVER_URLS="${SAM3_SERVER_URLS:-}" \ + SAM3_HOME="${SAM3_HOME:-}" \ + SAM3_CHECKPOINT_PATH="${SAM3_CHECKPOINT_PATH:-}" \ + SAM3_BPE_PATH="${SAM3_BPE_PATH:-}" \ + PAPER2DRAWIO_SAM3_CHECKPOINT_PATH="${PAPER2DRAWIO_SAM3_CHECKPOINT_PATH:-}" \ + PAPER2DRAWIO_SAM3_BPE_PATH="${PAPER2DRAWIO_SAM3_BPE_PATH:-}" \ + RMBG_MODEL_PATH="${RMBG_MODEL_PATH:-}" \ + "${start_cmd[@]}" >> "$LOG_DIR/app.log" 2>&1 < /dev/null & fi echo $! > "$PID_FILE" diff --git a/deploy/stop.sh b/deploy/stop.sh index c64e2df7..a71d7f7d 100755 --- a/deploy/stop.sh +++ b/deploy/stop.sh @@ -91,11 +91,23 @@ find_port_listener_pids() { return 0 fi - ss -ltnp 2>/dev/null \ - | awk -v port=":$port" '$4 ~ port { print $NF }' \ - | grep -oE 'pid=[0-9]+' \ - | cut -d= -f2 \ - | sort -u + if command -v ss >/dev/null 2>&1; then + ss -ltnp 2>/dev/null \ + | awk -v port=":$port" '$4 ~ port { print $NF }' \ + | grep -oE 'pid=[0-9]+' \ + | cut -d= -f2 \ + | sort -u + return 0 + fi + + if command -v netstat >/dev/null 2>&1; then + netstat -ltnp 2>/dev/null \ + | awk -v port=":$port" '$4 ~ port { split($7, parts, "/"); if (parts[1] ~ /^[0-9]+$/) print parts[1] }' \ + | sort -u + return 0 + fi + + return 1 } cleanup_pid_files diff --git a/fastapi_app/routers/image2drawio.py b/fastapi_app/routers/image2drawio.py index 37ed4e27..dd63d7b6 100644 --- a/fastapi_app/routers/image2drawio.py +++ b/fastapi_app/routers/image2drawio.py @@ -5,7 +5,6 @@ from fastapi import APIRouter, File, Form, UploadFile from pydantic import BaseModel -from fastapi_app.services.image2drawio_service import Image2DrawioService from fastapi_app.config.settings import settings router = APIRouter(prefix="/image2drawio", tags=["image2drawio"]) @@ -29,6 +28,8 @@ async def generate_image2drawio( language: str = Form("zh"), email: Optional[str] = Form(None), ): + from fastapi_app.services.image2drawio_service import Image2DrawioService + service = Image2DrawioService() try: result = await service.generate_drawio( diff --git a/fastapi_app/routers/image2ppt.py b/fastapi_app/routers/image2ppt.py index 47f2828a..1e1bbd4b 100644 --- a/fastapi_app/routers/image2ppt.py +++ b/fastapi_app/routers/image2ppt.py @@ -6,13 +6,14 @@ from fastapi.responses import FileResponse from dataflow_agent.logger import get_logger -from fastapi_app.services.image2ppt_service import Image2PPTService log = get_logger(__name__) router = APIRouter() def get_service() -> Image2PPTService: + from fastapi_app.services.image2ppt_service import Image2PPTService + return Image2PPTService() @router.post("/image2ppt/generate") diff --git a/fastapi_app/routers/kb.py b/fastapi_app/routers/kb.py index cd230672..db723f50 100644 --- a/fastapi_app/routers/kb.py +++ b/fastapi_app/routers/kb.py @@ -13,19 +13,12 @@ import fitz # PyMuPDF from dataflow_agent.state import IntelligentQARequest, IntelligentQAState, KBPodcastRequest, KBPodcastState, KBMindMapRequest, KBMindMapState -from dataflow_agent.workflow.wf_intelligent_qa import create_intelligent_qa_graph -from dataflow_agent.workflow.wf_kb_podcast import create_kb_podcast_graph -from dataflow_agent.workflow.wf_kb_mindmap import create_kb_mindmap_graph -from dataflow_agent.toolkits.ragtool.vector_store_tool import process_knowledge_base_files, VectorStoreManager from dataflow_agent.utils import get_project_root from dataflow_agent.workflow import run_workflow from dataflow_agent.logger import get_logger from fastapi_app.config import settings from fastapi_app.schemas import Paper2PPTRequest, DeepResearchRequest, DeepResearchResponse, KBReportRequest, KBReportResponse from fastapi_app.utils import _from_outputs_url, _to_outputs_url -from fastapi_app.workflow_adapters.wa_paper2ppt import _init_state_from_request -from fastapi_app.services.kb_deepresearch_service import KBDeepResearchService -from fastapi_app.services.kb_report_service import KBReportService router = APIRouter(prefix="/kb", tags=["Knowledge Base"]) log = get_logger(__name__) @@ -249,11 +242,15 @@ def _build_text_context(file_paths: List[str], max_chars: int = 60000) -> str: return combined -def _get_deepresearch_service() -> KBDeepResearchService: +def _get_deepresearch_service() -> "KBDeepResearchService": + from fastapi_app.services.kb_deepresearch_service import KBDeepResearchService + return KBDeepResearchService() -def _get_report_service() -> KBReportService: +def _get_report_service() -> "KBReportService": + from fastapi_app.services.kb_report_service import KBReportService + return KBReportService() @@ -757,6 +754,8 @@ async def generate_ppt_from_kb( embed_api_url = embed_api_url.rstrip("/") + "/embeddings" files_for_embed = [{"path": str(p), "description": ""} for p in doc_paths] + from dataflow_agent.toolkits.ragtool.vector_store_tool import process_knowledge_base_files + manifest = await process_knowledge_base_files( files_for_embed, base_dir=str(base_dir), @@ -766,6 +765,8 @@ async def generate_ppt_from_kb( multimodal_model=None, ) + from dataflow_agent.toolkits.ragtool.vector_store_tool import VectorStoreManager + manager = VectorStoreManager( base_dir=str(base_dir), embedding_api_url=embed_api_url, @@ -807,6 +808,8 @@ def _match_file_ids(m: Dict[str, Any], paths: List[Path]) -> List[str]: ) # Run KB pagecontent workflow + from fastapi_app.workflow_adapters.wa_paper2ppt import _init_state_from_request + state_pc = _init_state_from_request(ppt_req, result_path=output_dir) state_pc.kb_query = query or "" state_pc.kb_retrieval_text = retrieval_text diff --git a/fastapi_app/routers/kb_embedding.py b/fastapi_app/routers/kb_embedding.py index d019b8ac..eb3d5dcf 100644 --- a/fastapi_app/routers/kb_embedding.py +++ b/fastapi_app/routers/kb_embedding.py @@ -2,7 +2,6 @@ from typing import List, Dict, Optional, Any import os from pathlib import Path -from dataflow_agent.toolkits.ragtool.vector_store_tool import process_knowledge_base_files, VectorStoreManager from dataflow_agent.utils import get_project_root from fastapi_app.config import settings from fastapi_app.utils import _to_outputs_url @@ -137,6 +136,8 @@ async def create_embedding( else: vector_store_dir = project_root / "outputs" / "kb_data" / "vector_store_main" + from dataflow_agent.toolkits.ragtool.vector_store_tool import process_knowledge_base_files + manifest = await process_knowledge_base_files( process_list, base_dir=str(vector_store_dir), @@ -234,6 +235,8 @@ async def search_kb( if model_name: kwargs["embedding_model"] = model_name + from dataflow_agent.toolkits.ragtool.vector_store_tool import VectorStoreManager + manager = VectorStoreManager(**kwargs) results = manager.search(query=query, top_k=top_k, file_ids=file_ids) diff --git a/fastapi_app/routers/paper2any.py b/fastapi_app/routers/paper2any.py index 879ef3ba..26facd02 100644 --- a/fastapi_app/routers/paper2any.py +++ b/fastapi_app/routers/paper2any.py @@ -4,7 +4,6 @@ from fastapi import APIRouter, Depends, File, Form, UploadFile, Request, Body from fastapi.responses import FileResponse from fastapi_app.schemas import Paper2FigureResponse, VerifyLlmRequest, VerifyLlmResponse -from fastapi_app.services.paper2any_service import Paper2AnyService from dataflow_agent.logger import get_logger log = get_logger(__name__) @@ -13,6 +12,8 @@ def get_service() -> Paper2AnyService: + from fastapi_app.services.paper2any_service import Paper2AnyService + return Paper2AnyService() diff --git a/fastapi_app/routers/paper2citation.py b/fastapi_app/routers/paper2citation.py index 90f594f1..345308da 100644 --- a/fastapi_app/routers/paper2citation.py +++ b/fastapi_app/routers/paper2citation.py @@ -14,12 +14,13 @@ Paper2CitationPaperDetailRequest, Paper2CitationPaperDetailResponse, ) -from fastapi_app.services.paper2citation_service import Paper2CitationService router = APIRouter(tags=["paper2citation"]) def get_service() -> Paper2CitationService: + from fastapi_app.services.paper2citation_service import Paper2CitationService + return Paper2CitationService() diff --git a/fastapi_app/routers/paper2poster.py b/fastapi_app/routers/paper2poster.py index e7ad2fc9..3a7e4cec 100644 --- a/fastapi_app/routers/paper2poster.py +++ b/fastapi_app/routers/paper2poster.py @@ -4,12 +4,12 @@ from fastapi import APIRouter, Depends, File, Form, UploadFile -from fastapi_app.services.paper2poster_service import Paper2PosterService - router = APIRouter() def get_service() -> Paper2PosterService: + from fastapi_app.services.paper2poster_service import Paper2PosterService + return Paper2PosterService() diff --git a/fastapi_app/routers/paper2ppt.py b/fastapi_app/routers/paper2ppt.py index 170b7a75..4d637fac 100644 --- a/fastapi_app/routers/paper2ppt.py +++ b/fastapi_app/routers/paper2ppt.py @@ -13,8 +13,6 @@ PageContentRequest, PPTGenerationRequest, ) -from fastapi_app.services.paper2ppt_service import Paper2PPTService -from fastapi_app.services.paper2ppt_task_service import Paper2PPTTaskService from dataflow_agent.utils.version_manager import ImageVersionManager from fastapi_app.utils import _to_outputs_url @@ -23,10 +21,14 @@ def get_service() -> Paper2PPTService: + from fastapi_app.services.paper2ppt_service import Paper2PPTService + return Paper2PPTService() def get_task_service() -> Paper2PPTTaskService: + from fastapi_app.services.paper2ppt_task_service import Paper2PPTTaskService + return Paper2PPTTaskService() diff --git a/fastapi_app/routers/paper2video.py b/fastapi_app/routers/paper2video.py index afd1286c..45a2224f 100644 --- a/fastapi_app/routers/paper2video.py +++ b/fastapi_app/routers/paper2video.py @@ -18,8 +18,6 @@ FeaturePaper2VideoRequest, FeaturePaper2VideoResponse, ) -from fastapi_app.services.paper2video_service import Paper2VideoService -from fastapi_app.workflow_adapters import run_paper_to_video_api from dataflow_agent.logger import get_logger @@ -31,6 +29,8 @@ def get_service() -> Paper2VideoService: """依赖注入:获取 Paper2VideoService 单例。""" + from fastapi_app.services.paper2video_service import Paper2VideoService + return Paper2VideoService() @@ -140,4 +140,6 @@ async def paper2video_generate_video( async def paper2video_endpoint(body: FeaturePaper2VideoRequest) -> FeaturePaper2VideoResponse: """旧版:单次请求跑完整 paper2video 工作流。""" log.info("[paper2video] legacy /paper2video endpoint called") + from fastapi_app.workflow_adapters import run_paper_to_video_api + return await run_paper_to_video_api(body) diff --git a/fastapi_app/routers/pdf2ppt.py b/fastapi_app/routers/pdf2ppt.py index 8a627c09..3fc8dd26 100644 --- a/fastapi_app/routers/pdf2ppt.py +++ b/fastapi_app/routers/pdf2ppt.py @@ -6,7 +6,6 @@ from fastapi.responses import FileResponse from dataflow_agent.logger import get_logger -from fastapi_app.services.pdf2ppt_service import PDF2PPTService from fastapi_app.config import settings log = get_logger(__name__) @@ -14,6 +13,8 @@ router = APIRouter() def get_service() -> PDF2PPTService: + from fastapi_app.services.pdf2ppt_service import PDF2PPTService + return PDF2PPTService() @router.post("/pdf2ppt/generate") diff --git a/models/.gitkeep b/models/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/models/.gitkeep @@ -0,0 +1 @@ + diff --git a/models/README.md b/models/README.md new file mode 100644 index 00000000..faf5a517 --- /dev/null +++ b/models/README.md @@ -0,0 +1,14 @@ +Local model assets live under this directory at deploy time. + +Expected layout: + +- `models/sam3/sam3.pt` +- `models/sam3/bpe_simple_vocab_16e6.txt.gz` +- `models/sam3-official/sam3/...` +- `models/RMBG-2.0/...` + +Large weights are intentionally ignored by git. To populate this directory on a machine, run: + +```bash +bash script/prepare_local_models.sh +``` diff --git a/models/RMBG-2.0/.gitkeep b/models/RMBG-2.0/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/models/RMBG-2.0/.gitkeep @@ -0,0 +1 @@ + diff --git a/models/sam3-official/sam3/.gitkeep b/models/sam3-official/sam3/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/models/sam3-official/sam3/.gitkeep @@ -0,0 +1 @@ + diff --git a/models/sam3/.gitkeep b/models/sam3/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/models/sam3/.gitkeep @@ -0,0 +1 @@ + diff --git a/script/prepare_local_models.sh b/script/prepare_local_models.sh new file mode 100755 index 00000000..082e9a3f --- /dev/null +++ b/script/prepare_local_models.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +MODELS_DIR="$ROOT_DIR/models" +LOCAL_SAM3_DIR="$MODELS_DIR/sam3" +LOCAL_SAM3_HOME="$MODELS_DIR/sam3-official/sam3" +LOCAL_RMBG_DIR="$MODELS_DIR/RMBG-2.0" + +PAPER2ANY_PYTHON="${PAPER2ANY_PYTHON:-/opt/conda/bin/python}" +PAPER2ANY_ASSET_ROOT="${PAPER2ANY_ASSET_ROOT:-/mnt/paper2any/lz/github-proj/Paper2Any}" +LEGACY_SAM3_DIR="$PAPER2ANY_ASSET_ROOT/models/sam3" +LEGACY_SAM3_HOME="$PAPER2ANY_ASSET_ROOT/sam3_src" + +log_info() { echo "[INFO] $1"; } +log_warn() { echo "[WARN] $1"; } +log_ok() { echo "[OK] $1"; } + +mkdir -p "$LOCAL_SAM3_DIR" "$LOCAL_RMBG_DIR" "$(dirname "$LOCAL_SAM3_HOME")" + +copy_file_if_missing() { + local src="$1" + local dst="$2" + + if [ -f "$dst" ]; then + log_ok "Exists: $dst" + return 0 + fi + if [ ! -f "$src" ]; then + log_warn "Missing source file: $src" + return 1 + fi + + cp -f "$src" "$dst" + log_ok "Copied: $dst" +} + +copy_tree_if_missing() { + local src="$1" + local dst="$2" + local existing_payload + + if [ -d "$dst" ] && [ -f "$dst/sam3/__init__.py" ]; then + log_ok "Exists: $dst" + return 0 + fi + if [ ! -d "$src" ]; then + log_warn "Missing source directory: $src" + return 1 + fi + + existing_payload="$(find "$dst" -mindepth 1 ! -name '.gitkeep' -print -quit 2>/dev/null || true)" + if [ -n "$existing_payload" ]; then + log_warn "Target already contains files, skip copying: $dst" + return 1 + fi + + rm -rf "$dst" + cp -a "$src" "$dst" + log_ok "Copied: $dst" +} + +download_rmbg_if_missing() { + if [ -f "$LOCAL_RMBG_DIR/config.json" ] && [ -f "$LOCAL_RMBG_DIR/model.safetensors" ]; then + log_ok "Exists: $LOCAL_RMBG_DIR" + return 0 + fi + + log_info "Downloading RMBG-2.0 into $LOCAL_RMBG_DIR" + "$PAPER2ANY_PYTHON" - < /dev/null 2>&1 +} + +choose_first_existing() { + local candidate + for candidate in "$@"; do + if [ -n "$candidate" ] && [ -e "$candidate" ]; then + printf '%s\n' "$candidate" + return 0 + fi done - printf " \b\b\b\b" + return 1 } -check_port() { - local port=$1 - lsof -i:$port > /dev/null 2>&1 - if [ $? -eq 0 ]; then - return 0 # Port is in use - else - return 1 # Port is free - fi +trim() { + local value="$1" + value="${value#"${value%%[![:space:]]*}"}" + value="${value%"${value##*[![:space:]]}"}" + printf '%s' "$value" +} + +validate_python_runtime() { + [ -x "$PAPER2ANY_PYTHON" ] || return 1 + "$PAPER2ANY_PYTHON" - <<'PY' >/dev/null 2>&1 +import cv2 +import fastapi +import torch +import uvicorn +PY +} + +check_cuda_runtime() { + "$PAPER2ANY_PYTHON" - <<'PY' +import sys +import torch + +available = torch.cuda.is_available() +count = torch.cuda.device_count() if available else 0 +print(f"torch.cuda.is_available={available}") +print(f"torch.cuda.device_count={count}") +if not available or count <= 0: + sys.exit(1) +PY } kill_port() { @@ -83,60 +98,147 @@ kill_port() { fi } -wait_for_port() { - local port=$1 +wait_for_http() { + local url=$1 local label=$2 local timeout=${3:-120} local waited=0 while [ "$waited" -lt "$timeout" ]; do - if check_port "$port"; then - log_success "$label is listening on :$port" + if curl -fsS "$url" > /dev/null 2>&1; then + log_success "$label is ready: $url" return 0 fi sleep 2 waited=$((waited + 2)) done - log_error "$label failed to bind :$port within ${timeout}s" + log_error "$label failed health check: $url within ${timeout}s" return 1 } -cleanup_cluster_ports() { - local ports=({8010..8024} 8003) +discover_available_gpus() { + if [ "$SAM3_GPUS_RAW" != "auto" ]; then + printf '%s\n' "$SAM3_GPUS_RAW" | tr ', ' '\n\n' | awk 'NF { print $1 }' + return 0 + fi + + if "$PAPER2ANY_PYTHON" - <<'PY' >/dev/null 2>&1 +import torch +raise SystemExit(0 if torch.cuda.is_available() else 1) +PY + then + "$PAPER2ANY_PYTHON" - <<'PY' +import torch + +items = [] +for idx in range(torch.cuda.device_count()): + try: + free_bytes, _ = torch.cuda.mem_get_info(idx) + except Exception: + free_bytes = 0 + items.append((free_bytes, idx)) + +for _, idx in sorted(items, reverse=True): + print(idx) +PY + return 0 + fi + + if command -v mx-smi >/dev/null 2>&1; then + mx-smi -L | awk ' + /^GPU#[0-9]+/ && $0 ~ /Available \(/ { + gsub("GPU#", "", $1) + print $1 + } + ' + fi +} + +prepare_sam3_paths() { + SAM3_CHECKPOINT_PATH="$( + choose_first_existing \ + "${SAM3_CHECKPOINT_PATH:-}" \ + "$ROOT_DIR/models/sam3/sam3.pt" \ + "$PAPER2ANY_ASSET_ROOT/models/sam3/sam3.pt" \ + || true + )" + SAM3_BPE_PATH="$( + choose_first_existing \ + "${SAM3_BPE_PATH:-}" \ + "$ROOT_DIR/models/sam3/bpe_simple_vocab_16e6.txt.gz" \ + "$PAPER2ANY_ASSET_ROOT/models/sam3/bpe_simple_vocab_16e6.txt.gz" \ + "$PAPER2ANY_ASSET_ROOT/sam3_src/sam3/assets/bpe_simple_vocab_16e6.txt.gz" \ + || true + )" + SAM3_HOME="$( + choose_first_existing \ + "${SAM3_HOME:-}" \ + "$ROOT_DIR/models/sam3-official/sam3" \ + "$PAPER2ANY_ASSET_ROOT/sam3_src" \ + || true + )" + + if [ -z "$SAM3_CHECKPOINT_PATH" ] || [ -z "$SAM3_BPE_PATH" ] || [ -z "$SAM3_HOME" ]; then + log_error "SAM3 assets are incomplete." + log_error "SAM3_HOME=$SAM3_HOME" + log_error "SAM3_CHECKPOINT_PATH=$SAM3_CHECKPOINT_PATH" + log_error "SAM3_BPE_PATH=$SAM3_BPE_PATH" + exit 1 + fi +} + +build_sam3_launch_gpu_ids() { + local gpu_id + local replica + local max_instances="$SAM3_MAX_INSTANCES" + + SAM3_GPU_IDS=() + + while IFS= read -r gpu_id; do + gpu_id="$(trim "$gpu_id")" + [ -n "$gpu_id" ] || continue + + for replica in $(seq 1 "$SAM3_INSTANCES_PER_GPU"); do + if [ "$max_instances" -gt 0 ] && [ "${#SAM3_GPU_IDS[@]}" -ge "$max_instances" ]; then + return 0 + fi + SAM3_GPU_IDS+=("$gpu_id") + done + done < <(discover_available_gpus) +} + +cleanup_ports() { + local ports=({8020..8028} "$OCR_PORT") + local port for port in "${ports[@]}"; do kill_port "$port" done } -check_cuda_runtime() { - "$PAPER2ANY_PYTHON" - <<'PY' -import sys - -try: - import torch -except Exception as exc: - print(f"TORCH_IMPORT_ERROR: {exc}") - sys.exit(1) +cleanup_processes() { + pkill -9 -f "sam3_server" 2>/dev/null || true + pkill -9 -f "ocr_server" 2>/dev/null || true + pkill -9 -f "generic_lb.py --port 8020" 2>/dev/null || true + if [ "$DRIPPER_AUTOSTOP" = "1" ]; then + pkill -9 -f "python -m dripper.server" 2>/dev/null || true + fi +} -available = torch.cuda.is_available() -count = 0 -try: - count = torch.cuda.device_count() -except Exception: - count = 0 +write_state_env() { + local sam3_urls="$1" -print(f"torch.cuda.is_available={available}") -print(f"torch.cuda.device_count={count}") -if not available or count <= 0: - sys.exit(1) -PY + : > "$STATE_ENV_FILE" + if [ -n "$sam3_urls" ]; then + printf 'export SAM3_SERVER_URLS=%q\n' "$sam3_urls" >> "$STATE_ENV_FILE" + fi + printf 'export SAM3_HOME=%q\n' "$SAM3_HOME" >> "$STATE_ENV_FILE" + printf 'export SAM3_CHECKPOINT_PATH=%q\n' "$SAM3_CHECKPOINT_PATH" >> "$STATE_ENV_FILE" + printf 'export SAM3_BPE_PATH=%q\n' "$SAM3_BPE_PATH" >> "$STATE_ENV_FILE" + printf 'export PAPER2DRAWIO_SAM3_CHECKPOINT_PATH=%q\n' "$SAM3_CHECKPOINT_PATH" >> "$STATE_ENV_FILE" + printf 'export PAPER2DRAWIO_SAM3_BPE_PATH=%q\n' "$SAM3_BPE_PATH" >> "$STATE_ENV_FILE" } -# ------------------------------------------------------------------------------ -# 🚀 Main Execution Flow -# ------------------------------------------------------------------------------ - cd "$ROOT_DIR" || { log_error "Failed to cd to $ROOT_DIR"; exit 1; } mkdir -p "$LOG_DIR" @@ -148,157 +250,138 @@ echo " | __/ (_| | |_) | __/ | / __// ___ \| | | | |_| |" echo " |_| \__,_| .__/ \___|_| |_____/_/ \_\_| |_|\__, |" echo " |_| |___/ " echo -e "${NC}" -echo -e " Target: ${BOLD}High Concurrency / Single Instance Mode${NC}" +echo -e " Target: ${BOLD}MetaX SAM3 Local Service${NC}" echo -e " Log Dir: $LOG_DIR" echo -e " Python: $PAPER2ANY_PYTHON" echo "------------------------------------------------------------" +if ! validate_python_runtime; then + log_error "Python runtime '$PAPER2ANY_PYTHON' is missing FastAPI/Torch/OpenCV runtime deps." + exit 1 +fi + log_info "Running CUDA preflight..." if ! check_cuda_runtime; then - log_error "Current Python environment cannot access CUDA. Activate a CUDA-capable env before starting model servers." + log_error "Current Python runtime cannot access MetaX CUDA devices." exit 1 fi -# --- Step 1: Deep Cleanup --- -log_info "Initiating deep cleanup sequence..." +prepare_sam3_paths -# Kill specific ports -PORTS_TO_CLEAN=({8010..8024} 8003) -for port in "${PORTS_TO_CLEAN[@]}"; do - kill_port $port -done +log_info "Cleaning stale local SAM3/OCR processes..." +if [ "$DRIPPER_AUTOSTOP" = "1" ]; then + log_info "Stopping local MinerU-HTML dripper service to free MetaX queue resources." +fi +cleanup_ports +cleanup_processes +sleep 1 +log_success "Cleanup complete." -# Nuke process names -log_info "Nuking vLLM and worker processes..." -pkill -9 -f "vllm.entrypoints.openai.api_server" 2>/dev/null || true -pkill -9 -f "VLLM::EngineCore" 2>/dev/null || true -pkill -9 -f "sam_server" 2>/dev/null || true -pkill -9 -f "sam3_server" 2>/dev/null || true -pkill -9 -f "ocr_server" 2>/dev/null || true -pkill -9 -f "generic_lb.py --port 8010" 2>/dev/null || true -pkill -9 -f "generic_lb.py --port 8020" 2>/dev/null || true +build_sam3_launch_gpu_ids -sleep 2 -log_success "Cleanup complete. System is clean." +if [ "$SAM3_ENABLED" = "1" ] && [ "${#SAM3_GPU_IDS[@]}" -eq 0 ]; then + log_error "No available GPUs detected for SAM3." + exit 1 +fi -# --- Step 2: Launch MinerU (vLLM) --- echo "------------------------------------------------------------" -log_info "Launching MinerU Cluster (vLLM)" -log_info "Config: Util=$MINERU_GPU_UTIL | MaxSeqs=$MINERU_MAX_SEQS" - -MINERU_BACKENDS="" - -for i in "${!MINERU_GPUS[@]}"; do - gpu_id=${MINERU_GPUS[$i]} - port=$((MINERU_START_PORT + i)) - - log_info "Booting instance on GPU $gpu_id @ Port $port..." - - CUDA_VISIBLE_DEVICES=$gpu_id nohup "$PAPER2ANY_PYTHON" -m vllm.entrypoints.openai.api_server \ - --model "$MINERU_MODEL" \ - --served-model-name "mineru" \ - --host 127.0.0.1 \ - --port $port \ - --logits-processors mineru_vl_utils:MinerULogitsProcessor \ - --gpu-memory-utilization $MINERU_GPU_UTIL \ - --max-num-seqs $MINERU_MAX_SEQS \ - --trust-remote-code \ - --enforce-eager \ - > "$LOG_DIR/mineru_gpu${gpu_id}.log" 2>&1 & - - MINERU_BACKENDS+="http://127.0.0.1:$port " -done +log_info "MinerU is intentionally not started here. This machine uses MinerU API only." +log_info "OCR server is disabled by default. This machine uses Ali Qwen-VL-OCR API." + +SAM3_URLS=() +if [ "$SAM3_ENABLED" = "1" ]; then + log_info "Launching SAM3 instances on MetaX GPUs: ${SAM3_GPU_IDS[*]}" + log_info "Instances per GPU: $SAM3_INSTANCES_PER_GPU" + for i in "${!SAM3_GPU_IDS[@]}"; do + gpu_id=${SAM3_GPU_IDS[$i]} + port=$((SAM3_START_PORT + i)) + instance_id=$((i + 1)) + log_info "Booting SAM3 on GPU $gpu_id @ Port $port..." + + if command -v setsid >/dev/null 2>&1; then + nohup setsid env \ + CUDA_VISIBLE_DEVICES="$gpu_id" \ + SAM3_HOME="$SAM3_HOME" \ + SAM3_CHECKPOINT_PATH="$SAM3_CHECKPOINT_PATH" \ + SAM3_BPE_PATH="$SAM3_BPE_PATH" \ + "$PAPER2ANY_PYTHON" -m dataflow_agent.toolkits.model_servers.sam3_server \ + --host "$SAM3_HOST" \ + --port "$port" \ + --checkpoint "$SAM3_CHECKPOINT_PATH" \ + --bpe "$SAM3_BPE_PATH" \ + --device cuda \ + > "$LOG_DIR/sam3_gpu${gpu_id}_inst${instance_id}_port${port}.log" 2>&1 < /dev/null & + else + nohup env \ + CUDA_VISIBLE_DEVICES="$gpu_id" \ + SAM3_HOME="$SAM3_HOME" \ + SAM3_CHECKPOINT_PATH="$SAM3_CHECKPOINT_PATH" \ + SAM3_BPE_PATH="$SAM3_BPE_PATH" \ + "$PAPER2ANY_PYTHON" -m dataflow_agent.toolkits.model_servers.sam3_server \ + --host "$SAM3_HOST" \ + --port "$port" \ + --checkpoint "$SAM3_CHECKPOINT_PATH" \ + --bpe "$SAM3_BPE_PATH" \ + --device cuda \ + > "$LOG_DIR/sam3_gpu${gpu_id}_inst${instance_id}_port${port}.log" 2>&1 < /dev/null & + fi -# --- Step 3: Launch SAM --- -echo "------------------------------------------------------------" -log_info "Launching SAM3 Cluster" - -SAM3_BACKENDS="" - -for i in "${!SAM3_GPUS[@]}"; do - gpu_id=${SAM3_GPUS[$i]} - port=$((SAM3_START_PORT + i)) - - log_info "Booting SAM3 on GPU $gpu_id @ Port $port..." - - env CUDA_VISIBLE_DEVICES=$gpu_id nohup "$PAPER2ANY_PYTHON" -m dataflow_agent.toolkits.model_servers.sam3_server \ - --port $port \ - --host 0.0.0.0 \ - --checkpoint "$SAM3_CHECKPOINT_PATH" \ - --bpe "$SAM3_BPE_PATH" \ - --device cuda \ - > "$LOG_DIR/sam_${gpu_id}.log" 2>&1 & - - SAM3_BACKENDS+="http://127.0.0.1:$port " -done + SAM3_URLS+=("http://127.0.0.1:$port") + done +fi -# --- Step 4: Launch OCR --- -echo "------------------------------------------------------------" -log_info "Starting OCR Service (CPU)..." -CUDA_VISIBLE_DEVICES="" nohup "$PAPER2ANY_PYTHON" -m uvicorn dataflow_agent.toolkits.model_servers.ocr_server:app \ - --port 8003 --host 0.0.0.0 --workers 4 \ - > "$LOG_DIR/ocr_server.log" 2>&1 & -log_success "OCR Service running on :8003" +if [ "$OCR_ENABLED" = "1" ]; then + echo "------------------------------------------------------------" + log_info "Starting local OCR server..." + if command -v setsid >/dev/null 2>&1; then + nohup setsid env \ + CUDA_VISIBLE_DEVICES="" \ + "$PAPER2ANY_PYTHON" -m uvicorn dataflow_agent.toolkits.model_servers.ocr_server:app \ + --host "$OCR_HOST" \ + --port "$OCR_PORT" \ + --workers "$OCR_WORKERS" \ + > "$LOG_DIR/ocr_server.log" 2>&1 < /dev/null & + else + nohup env \ + CUDA_VISIBLE_DEVICES="" \ + "$PAPER2ANY_PYTHON" -m uvicorn dataflow_agent.toolkits.model_servers.ocr_server:app \ + --host "$OCR_HOST" \ + --port "$OCR_PORT" \ + --workers "$OCR_WORKERS" \ + > "$LOG_DIR/ocr_server.log" 2>&1 < /dev/null & + fi +fi -# --- Step 5: Validate model backends --- echo "------------------------------------------------------------" -log_info "Validating model backends..." +log_info "Validating started services..." failed=0 -for i in "${!MINERU_GPUS[@]}"; do - port=$((MINERU_START_PORT + i)) - wait_for_port "$port" "MinerU backend" 240 || failed=1 -done - -for i in "${!SAM3_GPUS[@]}"; do - port=$((SAM3_START_PORT + i)) - wait_for_port "$port" "SAM3 backend" 120 || failed=1 +for url in "${SAM3_URLS[@]}"; do + wait_for_http "${url}/health" "SAM3 backend" 360 || failed=1 done -wait_for_port 8003 "OCR service" 30 || failed=1 +if [ "$OCR_ENABLED" = "1" ]; then + wait_for_http "http://127.0.0.1:${OCR_PORT}/health" "OCR backend" 60 || failed=1 +fi if [ "$failed" -ne 0 ]; then log_error "Model server startup incomplete. Check logs under $LOG_DIR" - cleanup_cluster_ports exit 1 fi -# --- Step 6: Launch Load Balancers --- -echo "------------------------------------------------------------" -log_info "Initializing Load Balancers..." - -# MinerU LB -nohup "$PAPER2ANY_PYTHON" dataflow_agent/toolkits/model_servers/generic_lb.py \ - --port 8010 \ - --name "MinerU LB" \ - --backends $MINERU_BACKENDS \ - > "$LOG_DIR/mineru_lb.log" 2>&1 & -log_success "MinerU LB running on :8010 -> [ $MINERU_BACKENDS]" - -# SAM3 LB -nohup "$PAPER2ANY_PYTHON" dataflow_agent/toolkits/model_servers/generic_lb.py \ - --port 8020 \ - --name "SAM3 LB" \ - --backends $SAM3_BACKENDS \ - > "$LOG_DIR/sam_lb.log" 2>&1 & -log_success "SAM3 LB running on :8020 -> [ $SAM3_BACKENDS]" - -# --- Step 7: Validate load balancers --- -echo "------------------------------------------------------------" -log_info "Validating load balancers..." - -failed=0 -wait_for_port 8010 "MinerU LB" 30 || failed=1 -wait_for_port 8020 "SAM3 LB" 30 || failed=1 - -if [ "$failed" -ne 0 ]; then - log_error "Load balancer startup incomplete. Check logs under $LOG_DIR" - cleanup_cluster_ports - exit 1 +SAM3_URLS_CSV="" +if [ "${#SAM3_URLS[@]}" -gt 0 ]; then + SAM3_URLS_CSV="$(IFS=,; echo "${SAM3_URLS[*]}")" fi +write_state_env "$SAM3_URLS_CSV" -# --- Final Check --- echo "------------------------------------------------------------" -echo -e "${GREEN}${BOLD}ALL SYSTEMS GO!${NC}" +echo -e "${GREEN}${BOLD}MODEL SERVICES READY${NC}" +if [ -n "$SAM3_URLS_CSV" ]; then + echo "SAM3_SERVER_URLS=$SAM3_URLS_CSV" +fi +if [ "$OCR_ENABLED" = "1" ]; then + echo "OCR_URL=http://127.0.0.1:${OCR_PORT}" +fi +echo "Env file: $STATE_ENV_FILE" echo -e "Monitor logs with: ${YELLOW}tail -f logs/*.log${NC}" -echo ""