Skip to content

Commit 32d4339

Browse files
committed
updates
1 parent 626fd14 commit 32d4339

File tree

3 files changed

+369
-0
lines changed

3 files changed

+369
-0
lines changed

environment-cn.yml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: quantecon
2+
channels:
3+
- default
4+
- conda-forge
5+
dependencies:
6+
- python=3.12
7+
- anaconda=2024.10
8+
- pip
9+
- pip:
10+
- jupyter-book==1.0.3
11+
- quantecon-book-theme==0.8.2
12+
- sphinx-tojupyter==0.3.0
13+
- sphinxext-rediraffe==0.2.7
14+
- sphinx_reredirects==0.1.4
15+
- sphinx-exercise==1.0.1
16+
- sphinx-proof==0.2.0
17+
- ghp-import==1.1.0
18+
- sphinxcontrib-youtube==1.3.0 #Version 1.3.0 is required as quantecon-book-theme is only compatible with sphinx<=5
19+
- sphinx-togglebutton==0.3.2
20+
- --index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

tools/translation.ipynb

+241
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 5,
6+
"metadata": {
7+
"tags": []
8+
},
9+
"outputs": [
10+
{
11+
"ename": "NameError",
12+
"evalue": "name 'os' is not defined",
13+
"output_type": "error",
14+
"traceback": [
15+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
16+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
17+
"Cell \u001b[0;32mIn[5], line 15\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mre\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# Set the API key directly in your script using the environment variable\u001b[39;00m\n\u001b[0;32m---> 15\u001b[0m openai\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# Example API call to verify that everything is set up correctly\u001b[39;00m\n\u001b[1;32m 18\u001b[0m response \u001b[38;5;241m=\u001b[39m openai\u001b[38;5;241m.\u001b[39mCompletion\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m 19\u001b[0m model\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-davinci-003\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 20\u001b[0m prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTranslate this text to French.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 21\u001b[0m max_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m\n\u001b[1;32m 22\u001b[0m )\n",
18+
"\u001b[0;31mNameError\u001b[0m: name 'os' is not defined"
19+
]
20+
}
21+
],
22+
"source": [
23+
"\"\"\"\n",
24+
"Before running the file, config the environment by adding the OpenAI API key:\n",
25+
"\n",
26+
"\n",
27+
"echo \"export OPENAI_API_KEY='yourkey'\" >> ~/.zshrc\n",
28+
"source ~/.zshrc\n",
29+
"echo $OPENAI_API_KEY # Test to see if it is added\n",
30+
"\"\"\"\n",
31+
"\n",
32+
"import openai\n",
33+
"import re"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 4,
39+
"metadata": {
40+
"tags": []
41+
},
42+
"outputs": [
43+
{
44+
"ename": "OpenAIError",
45+
"evalue": "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable",
46+
"output_type": "error",
47+
"traceback": [
48+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
49+
"\u001b[0;31mOpenAIError\u001b[0m Traceback (most recent call last)",
50+
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m client \u001b[38;5;241m=\u001b[39m \u001b[43mopenai\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mOpenAI\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(openai\u001b[38;5;241m.\u001b[39mbeta\u001b[38;5;241m.\u001b[39massistants\u001b[38;5;241m.\u001b[39mlist())\n\u001b[1;32m 3\u001b[0m assistant_cn_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124masst_zjzyGwEZ1rVuJYWNQk6nzQTA\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
51+
"File \u001b[0;32m~/anaconda3/envs/quantecon/lib/python3.8/site-packages/openai/_client.py:105\u001b[0m, in \u001b[0;36mOpenAI.__init__\u001b[0;34m(self, api_key, organization, project, base_url, timeout, max_retries, default_headers, default_query, http_client, _strict_response_validation)\u001b[0m\n\u001b[1;32m 103\u001b[0m api_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39menviron\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_key \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 105\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m OpenAIError(\n\u001b[1;32m 106\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 107\u001b[0m )\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m api_key\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m organization \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
52+
"\u001b[0;31mOpenAIError\u001b[0m: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
53+
]
54+
}
55+
],
56+
"source": [
57+
"client = openai.OpenAI()\n",
58+
"print(openai.beta.assistants.list())\n",
59+
"assistant_cn_id = 'asst_zjzyGwEZ1rVuJYWNQk6nzQTA'"
60+
]
61+
},
62+
{
63+
"cell_type": "code",
64+
"execution_count": 3,
65+
"metadata": {
66+
"tags": []
67+
},
68+
"outputs": [],
69+
"source": [
70+
"def split_text(content, chunk_size=3000):\n",
71+
" chunks = []\n",
72+
" start = 0\n",
73+
" while start < len(content):\n",
74+
" end = start + chunk_size\n",
75+
"\n",
76+
" # If we are at the end of the content, just append the rest\n",
77+
" if end >= len(content):\n",
78+
" chunks.append(content[start:])\n",
79+
" break\n",
80+
"\n",
81+
" # Find the nearest line break before the chunk size\n",
82+
" next_line_break = content.rfind('\\n', start, end)\n",
83+
" if next_line_break == -1:\n",
84+
" # If no line break is found within the chunk size, extend to the end\n",
85+
" next_line_break = end\n",
86+
"\n",
87+
" # Check if a code cell starts within the chunk\n",
88+
" code_cell_start = content.find('```{code-cell}', start, next_line_break)\n",
89+
" if code_cell_start != -1:\n",
90+
" # If a code cell starts, find its end\n",
91+
" code_cell_end = content.find('```', code_cell_start + 14)\n",
92+
" if code_cell_end != -1:\n",
93+
" # Move the end to the end of the code cell\n",
94+
" next_line_break = content.find('\\n', code_cell_end) + 1\n",
95+
"\n",
96+
" chunks.append(content[start:next_line_break].strip())\n",
97+
" start = next_line_break\n",
98+
"\n",
99+
" return chunks"
100+
]
101+
},
102+
{
103+
"cell_type": "code",
104+
"execution_count": 4,
105+
"metadata": {
106+
"tags": []
107+
},
108+
"outputs": [],
109+
"source": [
110+
"content = ''' The things you would like to translate (i.e. \n",
111+
"# Univariate Time Series with Matrix Algebra\n",
112+
"\n",
113+
"## Overview\n",
114+
"\n",
115+
"This lecture uses matrices to solve some linear difference equations.\n",
116+
"\n",
117+
"As a running example, we’ll study a **second-order linear difference\n",
118+
"equation** that was the key technical tool in Paul Samuelson’s 1939\n",
119+
"article {cite}`Samuelson1939` that introduced the **multiplier-accelerator** model.\n",
120+
"\n",
121+
"This model became the workhorse that powered early econometric versions of\n",
122+
"Keynesian macroeconomic models in the United States.\n",
123+
"\n",
124+
"You can read about the details of that model in [this](https://python.quantecon.org/samuelson.html)\n",
125+
"QuantEcon lecture.\n",
126+
"\n",
127+
"(That lecture also describes some technicalities about second-order linear difference equations.)\n",
128+
"\n",
129+
"In this lecture, we'll also learn about an **autoregressive** representation and a **moving average** representation of a non-stationary\n",
130+
"univariate time series $\\{y_t\\}_{t=0}^T$.\n",
131+
"\n",
132+
"We'll also study a \"perfect foresight\" model of stock prices that involves solving\n",
133+
"a \"forward-looking\" linear difference equation.\n",
134+
"\n",
135+
"We will use the following imports:\n",
136+
"\n",
137+
"```{code-cell} ipython\n",
138+
"import numpy as np\n",
139+
"%matplotlib inline\n",
140+
"import matplotlib.pyplot as plt\n",
141+
"from matplotlib import cm\n",
142+
"plt.rcParams[\"figure.figsize\"] = (11, 5) #set default figure size\n",
143+
"```)\n",
144+
"'''"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": 5,
150+
"metadata": {
151+
"scrolled": true,
152+
"tags": []
153+
},
154+
"outputs": [],
155+
"source": [
156+
"chunks = split_text(content, chunk_size=3000)\n",
157+
"\n",
158+
"thread = client.beta.threads.create()\n",
159+
"\n",
160+
"translated_content = \"\"\n",
161+
"\n",
162+
"header = ''\n",
163+
"for chunk in chunks:\n",
164+
" # Create and poll the run for each chunk\n",
165+
" run = client.beta.threads.runs.create_and_poll(\n",
166+
" thread_id=thread.id,\n",
167+
" assistant_id=assistant_cn_id,\n",
168+
" instructions=\"Please translate the following content into simplified Chinese. Maintain all the markdown syntax and directives unchanged. Only translate text and code comments Give the results directly without system messages: \" + chunk\n",
169+
" )\n",
170+
" \n",
171+
" if run.status == 'completed': \n",
172+
" messages = client.beta.threads.messages.list(\n",
173+
" thread_id=thread.id\n",
174+
" )\n",
175+
" translated_content += header + messages.data[0].content[0].text.value\n",
176+
" else:\n",
177+
" print(f\"Translation failed for chunk: {chunk[:50]}... Status: {run.status}\")\n",
178+
" continue\n",
179+
" header = '\\n'"
180+
]
181+
},
182+
{
183+
"cell_type": "code",
184+
"execution_count": 6,
185+
"metadata": {
186+
"tags": []
187+
},
188+
"outputs": [
189+
{
190+
"data": {
191+
"text/plain": [
192+
"'\\n\\n# 单变量时间序列与矩阵代数\\n\\n## 概述\\n\\n本讲座使用矩阵来求解一些线性差分方程。\\n\\n作为一个贯穿整个讲座的例子,我们将研究一个**二阶线性差分方程**,这是保罗·萨缪尔森在1939年发表文章 {cite}`Samuelson1939`中引入**乘数-加速器**模型的关键技术工具。\\n\\n该模型成为了推动早期美国凯恩斯主义宏观经济模型的计量经济版本的工作马。\\n\\n你可以在[这](https://python.quantecon.org/samuelson.html)篇QuantEcon讲座中阅读该模型的详细信息。\\n\\n(该讲座还描述了一些关于二阶线性差分方程的技术细节。)\\n\\n在本讲座中,我们还将学习非平稳单变量时间序列 $\\\\{y_t\\\\}_{t=0}^T$ 的**自回归**表示和**移动平均**表示。\\n\\n我们还将研究一个涉及求解“前瞻性”线性差分方程的“完美预测”股票价格模型。\\n\\n我们将使用以下导入:\\n\\n```{code-cell} ipython\\nimport numpy as np\\n%matplotlib inline\\nimport matplotlib.pyplot as plt\\nfrom matplotlib import cm\\nplt.rcParams[\"figure.figsize\"] = (11, 5) # 设置默认图形大小\\n```'"
193+
]
194+
},
195+
"execution_count": 6,
196+
"metadata": {},
197+
"output_type": "execute_result"
198+
}
199+
],
200+
"source": [
201+
"# Output\n",
202+
"translated_content"
203+
]
204+
},
205+
{
206+
"cell_type": "code",
207+
"execution_count": 7,
208+
"metadata": {
209+
"tags": []
210+
},
211+
"outputs": [],
212+
"source": [
213+
"# Save Output\n",
214+
"\n",
215+
"with open('output.md', 'w', encoding='utf-8') as file:\n",
216+
" file.write(translated_content)"
217+
]
218+
}
219+
],
220+
"metadata": {
221+
"kernelspec": {
222+
"display_name": "Python 3 (ipykernel)",
223+
"language": "python",
224+
"name": "python3"
225+
},
226+
"language_info": {
227+
"codemirror_mode": {
228+
"name": "ipython",
229+
"version": 3
230+
},
231+
"file_extension": ".py",
232+
"mimetype": "text/x-python",
233+
"name": "python",
234+
"nbconvert_exporter": "python",
235+
"pygments_lexer": "ipython3",
236+
"version": "3.8.19"
237+
}
238+
},
239+
"nbformat": 4,
240+
"nbformat_minor": 4
241+
}

tools/translation.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
Before running the file, config the environment by adding the OpenAI API key:
3+
4+
5+
echo "export OPENAI_API_KEY='yourkey'" >> ~/.zshrc
6+
source ~/.zshrc
7+
echo $OPENAI_API_KEY # Test to see if it is added
8+
"""
9+
10+
11+
import openai
12+
import os
13+
from concurrent.futures import ThreadPoolExecutor
14+
15+
def process_file(filename, function, assistant_cn_id):
16+
input_file = os.path.join(directory, filename)
17+
print(f'processing {input_file}')
18+
if os.path.isfile(input_file):
19+
function(input_file, assistant_cn_id)
20+
21+
22+
def split_text(content, chunk_size=3000):
23+
chunks = []
24+
start = 0
25+
while start < len(content):
26+
end = start + chunk_size
27+
28+
# If we are at the end of the content, just append the rest
29+
if end >= len(content):
30+
chunks.append(content[start:])
31+
break
32+
33+
# Find the nearest line break before the chunk size
34+
next_line_break = content.rfind('\n', start, end)
35+
if next_line_break == -1:
36+
# If no line break is found within the chunk size, extend to the end
37+
next_line_break = end
38+
39+
# Check if a code cell starts within the chunk
40+
code_cell_start = content.find('```{code-cell}', start, next_line_break)
41+
if code_cell_start != -1:
42+
# If a code cell starts, find its end
43+
code_cell_end = content.find('```', code_cell_start + 14)
44+
if code_cell_end != -1:
45+
# Move the end to the end of the code cell
46+
next_line_break = content.find('\n', code_cell_end) + 1
47+
48+
chunks.append(content[start:next_line_break].strip())
49+
start = next_line_break
50+
51+
return chunks
52+
53+
def translate_cn(input_file, assistant_id):
54+
# Initialize the OpenAI client
55+
client = openai.OpenAI()
56+
57+
# Read the content of the input markdown file
58+
with open(input_file, 'r', encoding='utf-8') as file:
59+
content = file.read()
60+
61+
# Split the content into chunks
62+
chunks = split_text(content, chunk_size=1000)
63+
64+
# Initialize the OpenAI client and thread
65+
thread = client.beta.threads.create()
66+
67+
translated_content = ""
68+
header = ''
69+
for chunk in chunks:
70+
# Create and poll the run for each chunk
71+
run = client.beta.threads.runs.create_and_poll(
72+
thread_id=thread.id,
73+
assistant_id=assistant_id,
74+
instructions="Give a direct translation into simplified Chinese. Maintain all the markdown syntax and directives unchanged. Give the results directly without system messages: " + chunk
75+
)
76+
77+
if run.status == 'completed':
78+
messages = client.beta.threads.messages.list(
79+
thread_id=thread.id
80+
)
81+
translated_content += header + messages.data[0].content[0].text.value
82+
else:
83+
print(f"Translation failed for chunk: {chunk[:50]}... Status: {run.status}")
84+
continue
85+
header = '\n'
86+
87+
# Create the output file name
88+
output_file = input_file.replace('.md', '_cn.md')
89+
90+
# Write the translated content to the new markdown file
91+
with open(output_file, 'w', encoding='utf-8') as file:
92+
file.write(translated_content)
93+
94+
print(f"Translated content has been saved to {output_file}")
95+
96+
97+
if __name__ == "__main__":
98+
directory = "lectures"
99+
assistant_cn_id = 'asst_zjzyGwEZ1rVuJYWNQk6nzQTA'
100+
101+
files = [f for f in os.listdir(directory) if f.endswith('.md') and os.path.isfile(os.path.join(directory, f))]
102+
print(openai.beta.assistants.list())
103+
104+
print(f'files to translate: {files}')
105+
106+
file_handler = lambda file: process_file(file, translate_cn, assistant_cn_id)
107+
with ThreadPoolExecutor() as executor:
108+
executor.map(file_handler, files[1:])

0 commit comments

Comments
 (0)