Skip to content

Commit

Permalink
fix: improve regex for content extraction in translator response
Browse files Browse the repository at this point in the history
  • Loading branch information
awwaawwa committed Feb 25, 2025
1 parent 4ea86e7 commit f56a7d1
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion pdf2zh/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,9 @@ def __init__(
self.prompttext = prompt
self.add_cache_impact_parameters("temperature", self.options["temperature"])
self.add_cache_impact_parameters("prompt", self.prompt("", self.prompttext))
think_filter_regex = r"^<think>.+?\n*(</think>|\n)+(</think>)\n*"
self.add_cache_impact_parameters('think_filter_regex', think_filter_regex)
self.think_filter_regex = re.compile(think_filter_regex, flags=re.DOTALL)

def do_translate(self, text) -> str:
response = self.client.chat.completions.create(
Expand All @@ -434,7 +437,7 @@ def do_translate(self, text) -> str:
if hasattr(response, "error"):
raise ValueError("Error response from Service", response.error)
content = response.choices[0].message.content.strip()
content = re.sub(r"^<think>.+?</think>", "", content, flags=re.DOTALL).strip()
content = self.think_filter_regex.sub("", content).strip()
return content

def get_formular_placeholder(self, id: int):
Expand Down

0 comments on commit f56a7d1

Please sign in to comment.