Skip to content

报错,'Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'),感谢作者的回复 #40

Open
@iMoriton

Description

@iMoriton
import json
from pygtrans import Translate

# 加载原始数据集
with open('CoT_data.json', 'r', encoding='utf-8') as f:
    original_data = json.load(f)

# 初始化翻译客户端
client = Translate(
    domain='com.bo',
    proxies={'http': 'http://172.xx.xx.xxx:7899',
             'https': 'http://172.xx.xx.xxx:7899'}
)

# 创建纯藏语数据集容器
translated_data = [{
    "instruction": "",
    "input": "",
    "output": ""
} for _ in original_data]

# 分字段批量处理核心逻辑
for field in ['instruction', 'input', 'output']:
    # 提取字段内容并记录位置
    texts = []
    indexes = []
    for idx, item in enumerate(original_data):
        if field in item and item[field]:
            texts.append(item[field])
            indexes.append(idx)
    results = []
    # 批量翻译当前字段
    if texts:
        # print(texts)
        for i in range(0, len(texts), 100):
            results.extend(client.translate(texts[i:i + 100], target='bo'))
        # 回填翻译结果
        for result_idx, idx in enumerate(indexes):
            translated_data[idx][field] = results[result_idx].translatedText

# 保存结果
with open('tibetan_instructions.json', 'w', encoding='utf-8') as f:
    json.dump(translated_data, f, ensure_ascii=False, indent=2)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions