Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
P2bPagecontentToBeamer agent
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
从 paper2page_content 产出的 pagecontent(结构化大纲)生成 LaTeX Beamer 代码。
输入:pagecontent (list[dict]: title, layout_description, key_points, asset_ref)
输出:latex_code,写入 state.beamer_code_path。
"""

from __future__ import annotations

from pathlib import Path
from typing import Any, Dict, Optional

from dataflow_agent.state import MainState
from dataflow_agent.toolkits.tool_manager import ToolManager
from dataflow_agent.logger import get_logger
from dataflow_agent.agentroles.cores.base_agent import BaseAgent
from dataflow_agent.agentroles.cores.registry import register
from dataflow_agent.toolkits.p2vtool.p2v_tool import extract_beamer_code

log = get_logger(__name__)


# ----------------------------------------------------------------------
# Agent Definition
# ----------------------------------------------------------------------
@register("p2b_pagecontent_to_beamer")
class P2bPagecontentToBeamer(BaseAgent):
"""从 pagecontent(结构化大纲)生成 Beamer LaTeX 代码"""

@classmethod
def create(cls, tool_manager: Optional[ToolManager] = None, **kwargs):
return cls(tool_manager=tool_manager, **kwargs)

@property
def role_name(self) -> str:
return "p2b_pagecontent_to_beamer"

@property
def system_prompt_template_name(self) -> str:
return "system_prompt_for_p2b_pagecontent_to_beamer"

@property
def task_prompt_template_name(self) -> str:
return "task_prompt_for_p2b_pagecontent_to_beamer"

def get_task_prompt_params(self, pre_tool_results: Dict[str, Any]) -> Dict[str, Any]:
return {
"pagecontent": pre_tool_results.get("pagecontent", "[]"),
"output_language": pre_tool_results.get("output_language", "English"),
"pdf_images_working_dir": pre_tool_results.get("pdf_images_working_dir", ""),
}

async def execute_pre_tools(self, state: MainState) -> Dict[str, Any]:
"""执行前置工具;若 state 上带有 pagecontent(并行时每页的 state),则优先使用,避免用到图节点注册时捕获的全量 pagecontent。"""
results = await super().execute_pre_tools(state)
pagecontent = getattr(state, "pagecontent", None)
if pagecontent is not None and isinstance(pagecontent, list) and len(pagecontent) > 0:
results["pagecontent"] = pagecontent
log.debug("使用 state.pagecontent 作为本页 pagecontent(共 %s 项)", len(pagecontent))
return results

def get_default_pre_tool_results(self) -> Dict[str, Any]:
return {}

def _get_beamer_code_from_result(self, result: Dict[str, Any]) -> str:
"""从 result 中取出 Beamer 代码,兼容规范 dict 或解析失败时的 {"raw": content}。"""
raw = result.get("latex_code", "") if isinstance(result, dict) else ""
if isinstance(raw, str) and raw:
code = extract_beamer_code(raw)
if code:
return code
# 解析失败时 result 可能为 {"raw": content},尝试从原始文本提取
raw_content = result.get("raw", "") if isinstance(result, dict) else ""
if isinstance(raw_content, str) and raw_content:
code = extract_beamer_code(raw_content)
if code:
return code
try:
from dataflow_agent.utils import robust_parse_json
parsed = robust_parse_json(raw_content)
if isinstance(parsed, dict):
raw = parsed.get("latex_code", "")
if isinstance(raw, str) and raw:
code = extract_beamer_code(raw)
if code:
return code
except Exception:
pass
return ""

def update_state_result(
self,
state: MainState,
result: Dict[str, Any],
pre_tool_results: Dict[str, Any],
):
beamer_code = self._get_beamer_code_from_result(result)
if not beamer_code:
log.error("p2b_pagecontent_to_beamer: 未得到有效 Beamer 代码")
super().update_state_result(state, result, pre_tool_results)
return

result_path = getattr(state, "result_path", "") or ""
if result_path:
base = Path(result_path).expanduser().resolve()
else:
req = getattr(state, "request", None)
paper_pdf_path = getattr(req, "paper_pdf_path", "") if req else ""
base = Path(paper_pdf_path).expanduser().resolve().parent if paper_pdf_path else Path(".").resolve()
output_dir = base / "output"
output_dir.mkdir(parents=True, exist_ok=True)
beamer_code_path = output_dir / "beamer_code.tex"
beamer_code_path.write_text(beamer_code, encoding="utf-8")
state.beamer_code_path = str(beamer_code_path)
log.info("p2b_pagecontent_to_beamer: Beamer 代码已写入 %s", beamer_code_path)
super().update_state_result(state, result, pre_tool_results)
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from dataflow_agent.logger import get_logger
from dataflow_agent.agentroles.cores.base_agent import BaseAgent
from dataflow_agent.agentroles.cores.registry import register
from dataflow_agent.toolkits.p2vtool.p2v_tool import extract_beamer_code

log = get_logger(__name__)

Expand Down Expand Up @@ -58,6 +59,40 @@ def get_default_pre_tool_results(self) -> Dict[str, Any]:
"""若调用方未显式传入,返回默认前置工具结果"""
return {}

async def execute_pre_tools(self, state: MainState) -> Dict[str, Any]:
"""先执行父类前置工具;若 state 上有 pre_tool_results(workflow 内注入),则合并进结果,保证 beamer_code/code_debug_result 能进入 prompt。"""
results = await super().execute_pre_tools(state)
inject = getattr(state, "pre_tool_results", None) or {}
for key in ("beamer_code", "code_debug_result"):
if key in inject:
results[key] = inject[key]
return results

def _get_beamer_code_from_result(self, result: Dict[str, Any]) -> str:
"""从 result 中取出 Beamer 代码,兼容规范 dict 或解析失败时的 {"raw": content}。"""
raw = result.get("latex_code", "") if isinstance(result, dict) else ""
if isinstance(raw, str) and raw:
code = extract_beamer_code(raw)
if code:
return code
raw_content = result.get("raw", "") if isinstance(result, dict) else ""
if isinstance(raw_content, str) and raw_content:
code = extract_beamer_code(raw_content)
if code:
return code
try:
from dataflow_agent.utils import robust_parse_json
parsed = robust_parse_json(raw_content)
if isinstance(parsed, dict):
raw = parsed.get("latex_code", "")
if isinstance(raw, str) and raw:
code = extract_beamer_code(raw)
if code:
return code
except Exception:
pass
return ""

# ---------- 结果写回 ----------
def update_state_result(
self,
Expand All @@ -66,16 +101,19 @@ def update_state_result(
pre_tool_results: Dict[str, Any],
):
"""将推理结果 {latex_code: xxxx} 写回 MainState"""
beamer_code = result.get("latex_code", '')
beamer_code = self._get_beamer_code_from_result(result)
beamer_code_path = state.beamer_code_path
if beamer_code and beamer_code_path:
from pathlib import Path

tex_path = Path(beamer_code_path)
tex_path.write_text(beamer_code, encoding='utf-8')
# 编译最新的tex代码
# 编译最新的 tex 代码并写回 state,便于调用方判断是否仍存在 error/warning
from dataflow_agent.toolkits.p2vtool.p2v_tool import compile_tex
is_beamer_wrong, is_beamer_warning, code_debug_result = compile_tex(beamer_code_path)
state.is_beamer_wrong = is_beamer_wrong
state.is_beamer_warning = is_beamer_warning
state.code_debug_result = code_debug_result
state.ppt_path = beamer_code_path.replace(".tex", ".pdf")
log.info(f"将更新好的beamer code写回 {beamer_code_path}")
else:
Expand Down
99 changes: 84 additions & 15 deletions dataflow_agent/promptstemplates/prompts_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1794,23 +1794,85 @@ class Paper2VideoPrompt:

## Source Content (Markdown)
{pdf_markdown}
"""

system_prompt_for_p2b_pagecontent_to_beamer = """
You are an expert in LaTeX Beamer. Your task is to convert **one slide's** structured outline (pagecontent) into a **single, structurally complete, compilable** Beamer LaTeX document.

**Context:** You generate slide content **one page at a time**. Each output must be a **full Beamer document** that compiles on its own (with Tectonic or TeX Live). Do not output a bare frame or fragment.

**Required document structure (do not omit any part):**
1. \\documentclass{{beamer}}
2. Preamble: **must** use \\usetheme{{Madrid}} (fixed theme).
3. \\begin{{document}}
4. **Exactly one** \\begin{{frame}}...\\end{{frame}} containing the slide content.
5. \\end{{document}}

**CRITICAL:** Ensure every \\begin{{frame}} has a matching \\end{{frame}}, and the document ends with \\end{{document}}. Avoid the error "!File ended while scanning use of \\frame".

**Font and package rules (strict):**
- **STRICTLY FORBIDDEN:** Times New Roman, Arial, Calibri, TeX Gyre Termes, or any non-standard TeX Live font. Use \\usepackage{{lmodern}} or default LaTeX fonts only.
- **Do NOT use** \\usepackage{{resizebox}} (invalid/grammar issues).
- If output_language is Chinese, you **must** include in the preamble: \\usepackage{{fontspec}} and \\usepackage{{ctex}}.

**Syntax rules:**
- **Do not use & in frame titles** (causes "Misplaced alignment tab character &"). Use "and" or comma instead.
- **Underscore in plain text:** In LaTeX, underscore `_` is reserved for math subscripts. Any `_` in normal text (e.g. function names like generate_from_input, variable names like user_inputs, system_prompt) **must** be written as \\_ (backslash-underscore). Example: `user_inputs` → `user\\_inputs`, `generate_from_input` → `generate\\_from\\_input`. Otherwise you get "Missing $ inserted" and compilation fails.
- Use \\alert{{}} for key terms or math symbols when appropriate.
- For literal percent sign in text use \\% (e.g. 5\\%).

**Content:** Use the given title, layout_description, key_points, and asset_ref. For image paths (e.g. in asset_ref), prepend the absolute base path given by pdf_images_working_dir and use \\includegraphics[width=0.8\\textwidth]{{...}} with \\caption and \\label. For table references (e.g. Table_2) use tabular/booktabs.

**Output:** Return only one JSON object with key "latex_code" containing the **entire** document from \\documentclass to \\end{{document}}, ready to compile.
"""

task_prompt_for_p2b_pagecontent_to_beamer = """
Generate **one** LaTeX Beamer slide as a **complete, compilable document**. The input is a **single slide's** pagecontent (one JSON object). Your output must be a full Beamer file: \\documentclass{{beamer}} + preamble + \\begin{{document}} + **one** \\begin{{frame}}...\\end{{frame}} + \\end{{document}}.

## Output language
{output_language}

## Images base directory (absolute path prefix for \\includegraphics)
{pdf_images_working_dir}

## This slide's pagecontent (single object)
{pagecontent}

## Asset / image rule
- If **asset_ref** is null or missing: do **not** output any figure, image block, or placeholder (e.g. do not write "配图占位" or "当前页未提供图片资源").

## Format requirements
- **Theme: use \\usetheme{{Madrid}} in the preamble** (fixed; do not use other themes).
- Font: use \\usepackage{{lmodern}} or default fonts only. **Do not use** Times New Roman, TeX Gyre Termes, resizebox.
- Chinese: if output language is Chinese, add \\usepackage{{fontspec}} and \\usepackage{{ctex}} in the preamble.
- No **&** in frame title (use "and" or comma).
- **Underscores in text:** Write \\_ for every underscore in normal text (e.g. user\\_inputs, generate\\_from\\_input), or you get "Missing $ inserted".
- Literal percent: use 5\\% not 5%.
- Every \\begin{{frame}} must have \\end{{frame}}; document must end with \\end{{document}}.

## Output format
Return a valid JSON object with a single key "latex_code".

{{
"latex_code": "FULL_BEAMER_DOCUMENT_WITH_ONE_FRAME_HERE"
}}
"""

system_prompt_for_p2v_beamer_code_debug = """
You are an expert in repairing LaTeX beamer code.
You are an expert in repairing LaTeX beamer code.
You must preserve all slide content exactly as written (including text, figures, and layout).
Your goal is to correct LaTeX compilation errors and return clean, compilable LaTeX code.
Your goal is to fix LaTeX compilation **errors** and **warnings** (e.g. Overfull box) and return clean, compilable LaTeX code.

Your output must:
- Be directly compilable using **tectonic** (a simplified TeX Live)
- Never include explanations, comments, or English/Chinese text outside the LaTeX code

"""

task_prompt_for_p2v_beamer_code_debug = """
(Critical!) Do not modify the file path, ignore the folloing message: "warning: accessing absolute path: "
You are given a LaTeX beamer code for the slides of a research paper and its error information.
You should correct these errors but do not change the slide content (e.g., text, figures and layout).
(Critical!) Do not modify the file path; ignore the following message: "warning: accessing absolute path: "

You are given a LaTeX beamer code for the slides of a research paper and its compilation log (errors and/or warnings).
Fix the reported issues but do not change the slide content (e.g., text, figures and layout).

## Content Preservation Rules (Strict)
- You MUST NOT delete, replace, or reduce the number of figures/images.
Expand All @@ -1819,23 +1881,30 @@ class Paper2VideoPrompt:
ONLY if necessary to fix compilation or layout issues.
- Keep the slide text content unchanged as much as possible.

## Some instruction
## Overfull box (warning)
When the log contains **Overfull \\hbox** or **Overfull \\vbox** (content or font too large), fix by:
- Reducing font size (e.g. \\small, \\footnotesize in the frame or for specific blocks).
- Reducing image/figure width or scale (e.g. width=0.7\\textwidth instead of 0.9\\textwidth).
- Do NOT remove or truncate text or figures; only resize or rescale to fit.

## Other instructions
**Font Safety**: **MUST** remove or comment out any usage of the `fontspec` package if and only if it causes errors (as it depends on system fonts).
For instance, if you encounter the error message: Package fontspec Error: The font "Latin Modern Roman" cannot be found, just remove or comment out it and use default TeX Live fonts.
For instance, if you see: Package fontspec Error: The font "Latin Modern Roman" cannot be found, remove or comment it out and use default TeX Live fonts.

**Image Loading Errors**:
If the compiler reports an image loading **error**, such as: "Unable to load picture or PDF file" or "! LaTeX Error: Cannot determine size of graphic", the model **MUST** remove the entire command responsible for loading that specific graphic.
**Image Loading Errors**:
If the compiler reports an image loading **error** (e.g. "Unable to load picture or PDF file" or "! LaTeX Error: Cannot determine size of graphic"), **MUST** remove the entire command that loads that graphic.

Output Format:
- Return a JSON object with a single key "latex_code".
## Output format
Return a JSON object with a single key "latex_code".
{{
"latex_code": "YOUR_GENERATED_latex_beamer_code_HERE"
}}
# Only output latex code which should be ready to compile using tectonic (simple version of TeX Live).
Output only the JSON; the latex code must be ready to compile with tectonic.

The LateX beamer code is:
The LaTeX beamer code is:
{beamer_code}
The compilation error message is:

The compilation log (errors and/or warnings) is:
{code_debug_result}
"""

Expand Down
30 changes: 30 additions & 0 deletions dataflow_agent/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,36 @@ class Paper2VideoState(MainState):
video_path: str = ""


# ==================== Paper2PptBeamer 相关 State 和 Request 定义 ====================
@dataclass
class Paper2PptBeamerRequest(MainRequest):
"""仅用于 PDF → Beamer PPT 工作流"""
paper_pdf_path: str = ""


# ==================== Paper2PptBeamer 生成 State ======================
@dataclass
class Paper2PptBeamerState(MainState):
"""用于 pagecontent → Beamer PPT 工作流(接在 paper2page_content 之后)"""
request: Paper2PptBeamerRequest = field(default_factory=Paper2PptBeamerRequest)

# 来自上游 paper2page_content 的产出
pagecontent: List[Dict[str, Any]] = field(default_factory=list)
result_path: str = ""
mineru_root: str = ""
minueru_output: str = "" # 论文全文/摘要,供 table_extractor 等使用

beamer_code_path: str = ""
is_beamer_wrong: bool = False
is_beamer_warning: bool = False
code_debug_result: str = ""
ppt_path: str = ""
img_size_debug: bool = True

# 每页单独生成时的路径列表(页序)
per_page_beamer_paths: List[str] = field(default_factory=list)
per_page_pdf_paths: List[str] = field(default_factory=list)


# ==================== Planning Agent 相关 State ====================
@dataclass
Expand Down
Loading