diff --git a/Dockerfile b/Dockerfile
index a36e7215..4f630891 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,8 @@ WORKDIR /app
EXPOSE 7860
ENV PYTHONUNBUFFERED=1
+ENV NOTO_FONT_PATH=/app
+ENV DOCKER_CONFIG=1
# Download all required fonts
ADD "https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf" /app/
@@ -26,4 +28,4 @@ COPY . .
RUN uv pip install --system --no-cache .
-CMD ["pdf2zh", "-i"]
+CMD ["pdf2zh", "--config", "/app/config.json", "-i"]
diff --git a/README.md b/README.md
index 5938f2d6..d0f86f06 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,8 @@ For docker deployment on cloud service:
+Docker supports custom config. More info can find [here](./docs/ADVANCED.md)
+
diff --git a/docs/ADVANCED.md b/docs/ADVANCED.md
index d5c259c4..b945b741 100644
--- a/docs/ADVANCED.md
+++ b/docs/ADVANCED.md
@@ -244,6 +244,8 @@ example config.json
By default, the config file is saved in the `~/.config/PDFMathTranslate/config.json`. The program will start by reading the contents of config.json, and after that it will read the contents of the environment variables. When an environment variable is available, the contents of the environment variable are used first and the file is updated.
+For Docker users, custom config file is also available. You can mount you config file with `/app/config.json` in the container. If you don't mount, everything works fine. PDFMathTranslate will create by itself. The config file will saved in `/app/PDFMathTranslate/config.json` in the container.
+
[⬆️ Back to top](#toc)
---
diff --git a/docs/APIS.md b/docs/APIS.md
index 3b04a93d..1e857d27 100644
--- a/docs/APIS.md
+++ b/docs/APIS.md
@@ -55,7 +55,14 @@ In a more flexible way, you can communicate with the program using HTTP protocol
- Submit translate task
```bash
- curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}"
+ curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F 'data={"lang_in":"en","lang_out":"zh","service":"google","thread":4}'
+ {"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"}
+ ```
+
+ - Submit translate task with custom prompt
+
+ ```bash
+ curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F 'data={"lang_in":"zh","lang_out":"zh","service":"google","thread":4,"prompt":"You are a professional, authentic machine translation engine. 如果目标语言为中文,你需要翻译成文言文。 Translate the following markdown source text to ${lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: ${text}\nTranslated Text:"}'
{"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"}
```
@@ -92,4 +99,5 @@ In a more flexible way, you can communicate with the program using HTTP protocol
[⬆️ Back to top](#toc)
+
---
diff --git a/pdf2zh/config.py b/pdf2zh/config.py
index 050a39e5..441a1b9f 100644
--- a/pdf2zh/config.py
+++ b/pdf2zh/config.py
@@ -1,8 +1,13 @@
-import json
+import toml
from pathlib import Path
from threading import RLock # 改成 RLock
import os
import copy
+import re
+from typing import List
+import logging
+
+log = logging.getLogger(__name__)
class ConfigManager:
@@ -12,24 +17,33 @@ class ConfigManager:
@classmethod
def get_instance(cls):
"""获取单例实例"""
- # 先判断是否存在实例,如果不存在再加锁进行初始化
- if cls._instance is None:
- with cls._lock:
- if cls._instance is None:
- cls._instance = cls()
+ with cls._lock:
+ if cls._instance is None:
+ cls._instance = cls()
return cls._instance
- def __init__(self):
+ def __init__(self, config_path=None, versioncheck=False, version="0.0.0"):
# 防止重复初始化
if hasattr(self, "_initialized") and self._initialized:
return
self._initialized = True
- self._config_path = Path.home() / ".config" / "PDFMathTranslate" / "config.json"
- self._config_data = {}
+ if config_path:
+ self._config_path = Path(config_path).resolve()
+ else:
+ self._config_path = (
+ Path.home() / ".config" / "PDFMathTranslate" / "config.toml"
+ )
- # 这里不要再加锁,因为外层可能已经加了锁 (get_instance), RLock也无妨
- self._ensure_config_exists()
+ self._config_data = {"global": {}, "translators": {}}
+ self.load_env_variables()
+ self.load_default_values()
+ if versioncheck:
+ self._save_config(
+ Path.home() / ".config" / "PDFMathTranslate" / f"config.{version}.toml"
+ )
+ else:
+ self._ensure_config_exists()
def _ensure_config_exists(self, isInit=True):
"""确保配置文件存在,如果不存在则创建默认配置"""
@@ -38,57 +52,149 @@ def _ensure_config_exists(self, isInit=True):
if not self._config_path.exists():
if isInit:
self._config_path.parent.mkdir(parents=True, exist_ok=True)
- self._config_data = {} # 默认配置内容
- self._save_config()
+ self._save_config(self._config_path)
else:
raise ValueError(f"config file {self._config_path} not found!")
else:
self._load_config()
def _load_config(self):
- """从 config.json 中加载配置"""
- with self._lock: # 加锁确保线程安全
+ with self._lock:
with self._config_path.open("r", encoding="utf-8") as f:
- self._config_data = json.load(f)
+ loaded_data = toml.load(f)
+ self._config_data = {
+ "global": loaded_data.get("global", {}),
+ "translators": {},
+ }
+ for name, envs in loaded_data.get("translators", {}).items():
+ self._config_data["translators"][name] = envs
- def _save_config(self):
- """保存配置到 config.json"""
- with self._lock: # 加锁确保线程安全
- # 移除循环引用并写入
+ def _save_config(self, config_path):
+ with self._lock:
cleaned_data = self._remove_circular_references(self._config_data)
- with self._config_path.open("w", encoding="utf-8") as f:
- json.dump(cleaned_data, f, indent=4, ensure_ascii=False)
+ if not isinstance(cleaned_data, dict):
+ raise ValueError("Invalid data type for config file")
+ with config_path.open("w", encoding="utf-8") as f:
+ toml.dump(cleaned_data, f)
def _remove_circular_references(self, obj, seen=None):
- """递归移除循环引用"""
if seen is None:
seen = set()
obj_id = id(obj)
if obj_id in seen:
- return None # 遇到已处理过的对象,视为循环引用
+ return {}
seen.add(obj_id)
if isinstance(obj, dict):
return {
- k: self._remove_circular_references(v, seen) for k, v in obj.items()
+ str(k): (
+ self._remove_circular_references(v, seen)
+ if isinstance(v, (dict, list))
+ else v
+ )
+ for k, v in obj.items()
+ if isinstance(k, str) and not isinstance(v, (float, int, bool))
}
elif isinstance(obj, list):
- return [self._remove_circular_references(i, seen) for i in obj]
- return obj
+ return [
+ (
+ self._remove_circular_references(i, seen)
+ if isinstance(i, (dict, list))
+ else i
+ )
+ for i in obj
+ ]
+ elif isinstance(obj, str):
+ return obj
+ return str(obj)
+
+ def load_default_values(self):
+ default_keys = {
+ "PDF2ZH_LANG_FROM": "English",
+ "PDF2ZH_LANG_TO": "Simplified Chinese",
+ "CELERY_BROKER": "redis://127.0.0.1:6379/0",
+ "CELERY_RESULT": "redis://127.0.0.1:6379/0",
+ "USE_MODELSCOPE": "0",
+ }
+ for key, value in default_keys.items():
+ self._config_data["global"][key] = value
+
+ translator_classes = []
+ try:
+ from pdf2zh.translator import (
+ GoogleTranslator,
+ BingTranslator,
+ DeepLTranslator,
+ DeepLXTranslator,
+ OllamaTranslator,
+ XinferenceTranslator,
+ AzureOpenAITranslator,
+ OpenAITranslator,
+ ZhipuTranslator,
+ ModelScopeTranslator,
+ SiliconTranslator,
+ GeminiTranslator,
+ AzureTranslator,
+ TencentTranslator,
+ DifyTranslator,
+ AnythingLLMTranslator,
+ ArgosTranslator,
+ GorkTranslator,
+ GroqTranslator,
+ DeepseekTranslator,
+ OpenAIlikedTranslator,
+ QwenMtTranslator,
+ )
+
+ translator_classes.extend(
+ [
+ GoogleTranslator,
+ BingTranslator,
+ DeepLTranslator,
+ DeepLXTranslator,
+ OllamaTranslator,
+ XinferenceTranslator,
+ AzureOpenAITranslator,
+ OpenAITranslator,
+ ZhipuTranslator,
+ ModelScopeTranslator,
+ SiliconTranslator,
+ GeminiTranslator,
+ AzureTranslator,
+ TencentTranslator,
+ DifyTranslator,
+ AnythingLLMTranslator,
+ ArgosTranslator,
+ GorkTranslator,
+ GroqTranslator,
+ DeepseekTranslator,
+ OpenAIlikedTranslator,
+ QwenMtTranslator,
+ ]
+ )
+ except ImportError as e:
+ log.error(f"Warning: Failed to import some translator classes: {e}")
+
+ for translator_class in translator_classes:
+ name = translator_class.name
+ envs = translator_class.envs
+ self._config_data["translators"][name] = envs
+
+ def load_env_variables(self):
+ for key, value in os.environ.items():
+ if key.startswith("PDF2ZH_"):
+ if key not in self._config_data["global"]:
+ self._config_data["global"][key] = value
@classmethod
- def custome_config(cls, file_path):
+ def custom_config(cls, file_path):
"""使用自定义路径加载配置文件"""
- custom_path = Path(file_path)
+ custom_path = Path(file_path).resolve()
if not custom_path.exists():
raise ValueError(f"Config file {custom_path} not found!")
- # 加锁
+
with cls._lock:
- instance = cls()
- instance._config_path = custom_path
- # 此处传 isInit=False,若不存在则报错;若存在则正常 _load_config()
- instance._ensure_config_exists(isInit=False)
- cls._instance = instance
+ cls._instance = cls(config_path=custom_path)
@classmethod
def get(cls, key, default=None):
@@ -103,17 +209,14 @@ def get(cls, key, default=None):
if key in os.environ:
value = os.environ[key]
instance._config_data[key] = value
- instance._save_config()
+ instance._save_config(instance._config_path)
return value
# 若 default 不为 None,则设置并保存
if default is not None:
instance._config_data[key] = default
- instance._save_config()
+ instance._save_config(instance._config_path)
return default
-
- # 找不到则抛出异常
- # raise KeyError(f"{key} is not found in config file or environment variables.")
return default
@classmethod
@@ -122,66 +225,48 @@ def set(cls, key, value):
instance = cls.get_instance()
with instance._lock:
instance._config_data[key] = value
- instance._save_config()
+ instance._save_config(instance._config_path)
@classmethod
def get_translator_by_name(cls, name):
"""根据 name 获取对应的 translator 配置"""
instance = cls.get_instance()
- translators = instance._config_data.get("translators", [])
- for translator in translators:
- if translator.get("name") == name:
- return translator["envs"]
- return None
+ translators = instance._config_data["translators"].get(name, {})
+ if translators == {}:
+ raise ValueError("no translator config found.")
+ return translators
@classmethod
- def set_translator_by_name(cls, name, new_translator_envs):
+ def set_translator_by_name(cls, name: str, envs: dict):
"""根据 name 设置或更新 translator 配置"""
instance = cls.get_instance()
with instance._lock:
- translators = instance._config_data.get("translators", [])
- for translator in translators:
- if translator.get("name") == name:
- translator["envs"] = copy.deepcopy(new_translator_envs)
- instance._save_config()
- return
- translators.append(
- {"name": name, "envs": copy.deepcopy(new_translator_envs)}
- )
- instance._config_data["translators"] = translators
- instance._save_config()
+ instance._config_data["translators"][name] = copy.deepcopy(envs)
+ instance._save_config(instance._config_path)
@classmethod
def get_env_by_translatername(cls, translater_name, name, default=None):
- """根据 name 获取对应的 translator 配置"""
+ """根据 name 获取对应的 translator 的具体配置"""
instance = cls.get_instance()
- translators = instance._config_data.get("translators", [])
- for translator in translators:
- if translator.get("name") == translater_name.name:
- if translator["envs"][name]:
- return translator["envs"][name]
- else:
- with instance._lock:
- translator["envs"][name] = default
- instance._save_config()
- return default
-
- with instance._lock:
- translators = instance._config_data.get("translators", [])
- for translator in translators:
- if translator.get("name") == translater_name.name:
- translator["envs"][name] = default
- instance._save_config()
+ type(translater_name)
+ translator = instance._config_data["translators"].get(translater_name.name, {})
+ if translator:
+ if name in translator.keys():
+ return translator[name]
+ else:
+ with instance._lock:
+ instance._config_data["translators"][translater_name.name][
+ name
+ ] = default
+ instance._save_config(instance._config_path)
return default
- translators.append(
- {
- "name": translater_name.name,
- "envs": copy.deepcopy(translater_name.envs),
- }
- )
- instance._config_data["translators"] = translators
- instance._save_config()
- return default
+ else:
+ with instance._lock:
+ instance._config_data["translators"][translater_name.name][
+ name
+ ] = default
+ instance._save_config(instance._config_path)
+ return default
@classmethod
def delete(cls, key):
@@ -190,7 +275,7 @@ def delete(cls, key):
with instance._lock:
if key in instance._config_data:
del instance._config_data[key]
- instance._save_config()
+ instance._save_config(instance._config_path)
@classmethod
def clear(cls):
@@ -198,17 +283,260 @@ def clear(cls):
instance = cls.get_instance()
with instance._lock:
instance._config_data = {}
- instance._save_config()
+ instance._save_config(instance._config_path)
@classmethod
def all(cls):
"""返回所有配置项"""
instance = cls.get_instance()
# 这里只做读取操作,一般可不加锁。不过为了保险也可以加锁。
- return instance._config_data
+ return instance._config_data, instance._config_path
@classmethod
- def remove(cls):
- instance = cls.get_instance()
- with instance._lock:
- os.remove(instance._config_path)
+ def gap(cls):
+ """
+ Convert a JSON file to a TOML file.
+ :param json_file: Path to the input JSON file.
+ :param toml_file: Path to the output TOML file.
+ """
+ newtoml = Path.home() / ".config" / "PDFMathTranslate" / "config.toml"
+ json_file = Path.home() / ".config" / "PDFMathTranslate" / "config.json"
+ backupfile = Path.home() / ".config" / "PDFMathTranslate" / "config.backup.json"
+ if os.path.exists(json_file):
+ import json
+
+ newdata = cls.get_instance()._config_data
+ try:
+ with open(json_file, "r", encoding="utf-8") as jf:
+ data = json.load(jf)
+ for key, value in data.items():
+ if key != "translators":
+ newdata["global"][key] = value
+ for t in data["translators"]:
+ newdata["translators"][t["name"]] = t["envs"]
+ with open(newtoml, "w", encoding="utf-8") as tf:
+ toml.dump(newdata, tf)
+
+ # os.remove(json_file)
+ except Exception as e:
+ log.error(f"Error during conversion: {e}")
+ if os.path.exists(backupfile):
+ os.remove(backupfile)
+
+ @classmethod
+ def versionconfigcheck(cls, version: str):
+ newfilename = (
+ Path.home()
+ / ".config"
+ / "PDFMathTranslate"
+ / f'config.{".".join(version.split(".")[:3])}.toml'
+ )
+ if get_versioned_config_files(Path.home() / ".config" / "PDFMathTranslate"):
+ log.info("config exist")
+ if (
+ compare_versions(
+ version,
+ get_versioned_config_files(
+ Path.home() / ".config" / "PDFMathTranslate"
+ )[0][1],
+ )
+ > -1
+ ):
+ delete_and_create_file(
+ get_versioned_config_files(
+ Path.home() / ".config" / "PDFMathTranslate"
+ )[0][0],
+ newfilename,
+ )
+ else:
+ cls(versioncheck=True, version=version)
+
+
+class DefaultConfig:
+ def __init__(self):
+ self._config_data = {"global": {}, "translators": {}}
+
+ def load_default_values(self):
+ default_keys = {
+ "PDF2ZH_LANG_FROM": "English",
+ "PDF2ZH_LANG_TO": "Simplified Chinese",
+ "CELERY_BROKER": "redis://127.0.0.1:6379/0",
+ "CELERY_RESULT": "redis://127.0.0.1:6379/0",
+ "USE_MODELSCOPE": "0",
+ }
+ for key, value in default_keys.items():
+ self._config_data["global"][key] = value
+
+ translator_classes = []
+ try:
+ from pdf2zh.translator import (
+ GoogleTranslator,
+ BingTranslator,
+ DeepLTranslator,
+ DeepLXTranslator,
+ OllamaTranslator,
+ XinferenceTranslator,
+ AzureOpenAITranslator,
+ OpenAITranslator,
+ ZhipuTranslator,
+ ModelScopeTranslator,
+ SiliconTranslator,
+ GeminiTranslator,
+ AzureTranslator,
+ TencentTranslator,
+ DifyTranslator,
+ AnythingLLMTranslator,
+ ArgosTranslator,
+ GorkTranslator,
+ GroqTranslator,
+ DeepseekTranslator,
+ OpenAIlikedTranslator,
+ QwenMtTranslator,
+ )
+
+ translator_classes.extend(
+ [
+ GoogleTranslator,
+ BingTranslator,
+ DeepLTranslator,
+ DeepLXTranslator,
+ OllamaTranslator,
+ XinferenceTranslator,
+ AzureOpenAITranslator,
+ OpenAITranslator,
+ ZhipuTranslator,
+ ModelScopeTranslator,
+ SiliconTranslator,
+ GeminiTranslator,
+ AzureTranslator,
+ TencentTranslator,
+ DifyTranslator,
+ AnythingLLMTranslator,
+ ArgosTranslator,
+ GorkTranslator,
+ GroqTranslator,
+ DeepseekTranslator,
+ OpenAIlikedTranslator,
+ QwenMtTranslator,
+ ]
+ )
+ except ImportError as e:
+ log.error(f"Warning: Failed to import some translator classes: {e}")
+
+ for translator_class in translator_classes:
+ name = translator_class.name
+ envs = translator_class.envs
+ self._config_data["translators"][name] = envs
+
+ def load_env_variables(self):
+ for key, value in os.environ.items():
+ if key.startswith("PDF2ZH_"):
+ if key not in self._config_data["global"]:
+ self._config_data["global"][key] = value
+
+ def save_config(self, config_path):
+ with Path(config_path).open("w", encoding="utf-8") as f:
+ toml.dump(self._config_data, f)
+
+ def default_config(self):
+ return self._config_data
+
+ def load_config(self, config_path):
+ if Path(config_path).exists():
+ with Path(config_path).open("r", encoding="utf-8") as f:
+ loaded_data = toml.load(f)
+ self._config_data = {
+ "global": loaded_data.get("global", {}),
+ "translators": {},
+ }
+ for name, envs in loaded_data.get("translators", {}).items():
+ self._config_data["translators"][name] = envs
+
+
+def get_versioned_config_files(directory):
+ """
+ 获取指定目录下所有以 'config' 开头且包含版本号(格式: X.Y.Z)的文件名,并返回版本号。
+ :param directory: 目标目录路径
+ :return: 版本号列表
+ """
+ version_pattern = re.compile(r"(\d+\.\d+\.\d+)")
+ try:
+ files = [
+ f
+ for f in os.listdir(directory)
+ if f.startswith("config") and os.path.isfile(os.path.join(directory, f))
+ ]
+ return [
+ (os.path.join(directory, f), match.groups()[-1])
+ for f in files
+ if (match := version_pattern.search(f))
+ ]
+ except FileNotFoundError:
+ log.error(f"Error: Directory '{directory}' not found.")
+ return []
+ except PermissionError:
+ log.error(f"Error: Permission denied for directory '{directory}'.")
+ return []
+
+
+def compare_versions(version1: str, version2: str) -> int:
+ """
+ Compare two version numbers based only on the first three parts.
+
+ :param version1: First version number as a string (e.g., "1.2.3").
+ :param version2: Second version number as a string (e.g., "1.2.4").
+ :return: -1 if version1 < version2, 1 if version1 > version2, 0 if equal.
+ """
+
+ def parse_version(version: str) -> List:
+ parts = version.split(".")[:3] # Only consider the first three parts
+ parsed_parts = []
+ for part in parts:
+ if part.isdigit():
+ parsed_parts.append(int(part))
+ else:
+ parsed_parts.append(part)
+ return parsed_parts
+
+ v1_parts = parse_version(version1)
+ v2_parts = parse_version(version2)
+
+ # Ensure both lists have the same length by padding with zeros
+ max_length = 3 # Limit to three parts
+ v1_parts.extend([0] * (max_length - len(v1_parts)))
+ v2_parts.extend([0] * (max_length - len(v2_parts)))
+
+ # Compare corresponding parts
+ for v1, v2 in zip(v1_parts, v2_parts):
+ if isinstance(v1, int) and isinstance(v2, int):
+ if v1 < v2:
+ return -1
+ elif v1 > v2:
+ return 1
+ else:
+ v1, v2 = str(v1), str(v2)
+ if v1 < v2:
+ return -1
+ elif v1 > v2:
+ return 1
+
+ return 0
+
+
+def delete_and_create_file(delete_path, create_path):
+ """
+ 删除指定的文件,并创建一个新的文件
+ :param delete_path: 需要删除的文件路径
+ :param create_path: 需要创建的新文件路径
+ """
+ try:
+ # 如果文件存在,先删除
+ if os.path.exists(delete_path):
+ os.remove(delete_path)
+
+ # 创建新文件
+ test = DefaultConfig()
+ test.load_default_values()
+ test.save_config(create_path)
+ except Exception as e:
+ log.error(f"操作失败: {e}")
diff --git a/pdf2zh/gui.py b/pdf2zh/gui.py
index 9671703b..4b0b2e79 100644
--- a/pdf2zh/gui.py
+++ b/pdf2zh/gui.py
@@ -461,6 +461,7 @@ def on_select_service(service, evt: gr.EventData):
value=ConfigManager.get_env_by_translatername(
translator, env[0], env[1]
),
+ type="password" if i in translator.iskey else "text",
)
_envs[-1] = gr.update(visible=translator.CustomPrompt)
return _envs
diff --git a/pdf2zh/high_level.py b/pdf2zh/high_level.py
index 03358fd3..ee471d18 100644
--- a/pdf2zh/high_level.py
+++ b/pdf2zh/high_level.py
@@ -382,7 +382,7 @@ def translate(
def download_remote_fonts(lang: str):
- URL_PREFIX = "https://github.com/timelic/source-han-serif/releases/download/main/"
+ URL_PREFIX = ConfigManager.get("FONT_URL_PREFIX")
LANG_NAME_MAP = {
**{la: "GoNotoKurrent-Regular.ttf" for la in noto_list},
**{
@@ -398,14 +398,32 @@ def download_remote_fonts(lang: str):
}
font_name = LANG_NAME_MAP.get(lang, "GoNotoKurrent-Regular.ttf")
- cache_folder = os.path.join(os.path.expanduser("~"), ".cache", "pdf2zh")
- os.makedirs(cache_folder, exist_ok=True)
# docker
- font_path = ConfigManager.get("NOTO_FONT_PATH", Path("/app", font_name).as_posix())
- if not Path(font_path).exists():
- font_path = Path(cache_folder, font_name).as_posix()
- if not Path(font_path).exists():
+ font_path = ConfigManager.get(
+ "NOTO_FONT_PATH", os.path.join(os.path.expanduser("~"), ".cache", "pdf2zh")
+ )
+ if not Path(font_path, font_name).exists():
+ font_path = Path(font_path, font_name).as_posix()
print(f"Downloading {font_name}...")
- urllib.request.urlretrieve(f"{URL_PREFIX}{font_name}", font_path)
-
- return font_path
+ with tqdm.tqdm(
+ unit="B", unit_scale=True, leave=False, unit_divisor=1024, desc=font_path
+ ) as t:
+ last_downloaded = [0] # 使用列表保存上一次的下载量
+
+ def reporthook(block_num, block_size, total_size):
+ if total_size > 0:
+ t.total = total_size
+ downloaded = block_num * block_size
+ # 计算本次的增量
+ delta = downloaded - last_downloaded[0]
+ t.update(delta)
+ last_downloaded[0] = downloaded
+
+ urllib.request.urlretrieve(
+ f"{URL_PREFIX}{font_name}", font_path, reporthook=reporthook
+ )
+
+ print(f"Downloaded {font_path}...")
+ return font_path
+ print(font_path)
+ return Path(font_path, font_name).as_posix()
diff --git a/pdf2zh/pdf2zh.py b/pdf2zh/pdf2zh.py
index 68969490..cb4327b0 100644
--- a/pdf2zh/pdf2zh.py
+++ b/pdf2zh/pdf2zh.py
@@ -174,6 +174,13 @@ def create_parser() -> argparse.ArgumentParser:
help="Use experimental backend babeldoc.",
)
+ parse_params.add_argument(
+ "--CN",
+ default=False,
+ action="store_true",
+ help="download in mainland China.",
+ )
+
parse_params.add_argument(
"--skip-subset-fonts",
action="store_true",
@@ -231,12 +238,27 @@ def main(args: Optional[List[str]] = None) -> int:
parsed_args = parse_args(args)
- if parsed_args.config:
- ConfigManager.custome_config(parsed_args.config)
+ ConfigManager.versionconfigcheck(__version__)
+ ConfigManager.gap()
if parsed_args.debug:
log.setLevel(logging.DEBUG)
+ if parsed_args.config:
+ ConfigManager.custom_config(parsed_args.config)
+
+ if parsed_args.CN:
+ ConfigManager.set(
+ "FONT_URL_PREFIX",
+ "https://gitee.com/xzk1234/source-han-serif/releases/download/0.1/",
+ )
+ os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
+ else:
+ ConfigManager.set(
+ "FONT_URL_PREFIX",
+ "https://github.com/timelic/source-han-serif/releases/download/main/",
+ )
+
if parsed_args.onnx:
ModelInstance.value = OnnxModel(parsed_args.onnx)
else:
diff --git a/pdf2zh/translator.py b/pdf2zh/translator.py
index 17d9b991..27cdffee 100644
--- a/pdf2zh/translator.py
+++ b/pdf2zh/translator.py
@@ -7,17 +7,6 @@
from copy import copy
from string import Template
from typing import cast
-
-logger = logging.getLogger(__name__)
-
-try:
- import argostranslate.package
- import argostranslate.translate
-except ImportError:
- logger.warning(
- "argos-translate is not installed, argostranslate will not work. if you want to use argostranslate, please install it."
- )
-
import deepl
import ollama
import openai
@@ -35,6 +24,15 @@
from pdf2zh.cache import TranslationCache
from pdf2zh.config import ConfigManager
+logger = logging.getLogger(__name__)
+try:
+ import argostranslate.package
+ import argostranslate.translate
+except ImportError:
+ logger.warning(
+ "argos-translate is not installed, argostranslate will not work. if you want to use argostranslate, please install it."
+ )
+
def remove_control_characters(s):
return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")
@@ -43,6 +41,7 @@ def remove_control_characters(s):
class BaseTranslator:
name = "base"
envs = {}
+ iskey = []
lang_map: dict[str, str] = {}
CustomPrompt = False
ignore_cache = False
@@ -246,6 +245,8 @@ class DeepLTranslator(BaseTranslator):
"DEEPL_AUTH_KEY": None,
}
lang_map = {"zh": "zh-Hans"}
+ # not work
+ iskey = [0]
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
@@ -268,6 +269,8 @@ class DeepLXTranslator(BaseTranslator):
"DEEPLX_ACCESS_TOKEN": None,
}
lang_map = {"zh": "zh-Hans"}
+ # not work
+ iskey = [1]
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
@@ -400,6 +403,8 @@ class OpenAITranslator(BaseTranslator):
"OPENAI_MODEL": "gpt-4o-mini",
}
CustomPrompt = True
+ # not work
+ iskey = [1]
def __init__(
self,
@@ -458,6 +463,8 @@ class AzureOpenAITranslator(BaseTranslator):
"AZURE_OPENAI_MODEL": "gpt-4o-mini",
}
CustomPrompt = True
+ # not work
+ iskey = [1]
def __init__(
self,
@@ -502,6 +509,8 @@ class ModelScopeTranslator(OpenAITranslator):
"MODELSCOPE_MODEL": "Qwen/Qwen2.5-32B-Instruct",
}
CustomPrompt = True
+ # not work
+ iskey = [1]
def __init__(
self,
@@ -531,6 +540,8 @@ class ZhipuTranslator(OpenAITranslator):
"ZHIPU_MODEL": "glm-4-flash",
}
CustomPrompt = True
+ # not work
+ iskey = [0]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -567,6 +578,8 @@ class SiliconTranslator(OpenAITranslator):
"SILICON_MODEL": "Qwen/Qwen2.5-7B-Instruct",
}
CustomPrompt = True
+ # not work
+ iskey = [0]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -587,6 +600,8 @@ class GeminiTranslator(OpenAITranslator):
"GEMINI_MODEL": "gemini-1.5-flash",
}
CustomPrompt = True
+ # not work
+ iskey = [0]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -607,6 +622,8 @@ class AzureTranslator(BaseTranslator):
"AZURE_API_KEY": None,
}
lang_map = {"zh": "zh-Hans"}
+ # not work
+ iskey = [1]
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
@@ -638,11 +655,20 @@ class TencentTranslator(BaseTranslator):
"TENCENTCLOUD_SECRET_ID": None,
"TENCENTCLOUD_SECRET_KEY": None,
}
+ # not work
+ iskey = [0, 1]
def __init__(self, lang_in, lang_out, model, envs=None, **kwargs):
self.set_envs(envs)
super().__init__(lang_in, lang_out, model)
- cred = credential.DefaultCredentialProvider().get_credential()
+
+ try:
+ cred = credential.DefaultCredentialProvider().get_credential()
+ except EnvironmentError:
+ cred = credential.Credential(
+ self.envs["TENCENTCLOUD_SECRET_ID"],
+ self.envs["TENCENTCLOUD_SECRET_KEY"],
+ )
self.client = TmtClient(cred, "ap-beijing")
self.req = TextTranslateRequest()
self.req.Source = self.lang_in
@@ -662,6 +688,8 @@ class AnythingLLMTranslator(BaseTranslator):
"AnythingLLM_APIKEY": None,
}
CustomPrompt = True
+ # not work
+ iskey = [1]
def __init__(self, lang_out, lang_in, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -699,6 +727,8 @@ class DifyTranslator(BaseTranslator):
"DIFY_API_URL": None, # 填写实际 Dify API 地址
"DIFY_API_KEY": None, # 替换为实际 API 密钥
}
+ # not work
+ iskey = [1]
def __init__(self, lang_out, lang_in, model, envs=None, **kwargs):
self.set_envs(envs)
@@ -781,6 +811,8 @@ class GorkTranslator(OpenAITranslator):
"GORK_MODEL": "grok-2-1212",
}
CustomPrompt = True
+ # not work
+ iskey = [0]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -799,6 +831,8 @@ class GroqTranslator(OpenAITranslator):
"GROQ_MODEL": "llama-3-3-70b-versatile",
}
CustomPrompt = True
+ # not work
+ iskey = [0]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -817,6 +851,8 @@ class DeepseekTranslator(OpenAITranslator):
"DEEPSEEK_MODEL": "deepseek-chat",
}
CustomPrompt = True
+ # not work
+ iskey = [0]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -836,6 +872,8 @@ class OpenAIlikedTranslator(OpenAITranslator):
"OPENAILIKED_MODEL": None,
}
CustomPrompt = True
+ # not work
+ iskey = [1]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)
@@ -870,6 +908,8 @@ class QwenMtTranslator(OpenAITranslator):
"ALI_DOMAINS": "This sentence is extracted from a scientific paper. When translating, please pay close attention to the use of specialized troubleshooting terminologies and adhere to scientific sentence structures to maintain the technical rigor and precision of the original text.",
}
CustomPrompt = True
+ # not work
+ iskey = [1]
def __init__(self, lang_in, lang_out, model, envs=None, prompt=None):
self.set_envs(envs)