updates

HumphreyYang · HumphreyYang · commit 32d4339e0e42 · 2025-03-16T19:44:09.000+11:00
diff --git a/environment-cn.yml b/environment-cn.yml
@@ -0,0 +1,20 @@
+name: quantecon
+channels:
+  - default
+  - conda-forge
+dependencies:
+  - python=3.12
+  - anaconda=2024.10
+  - pip
+  - pip:
+    - jupyter-book==1.0.3
+    - quantecon-book-theme==0.8.2
+    - sphinx-tojupyter==0.3.0
+    - sphinxext-rediraffe==0.2.7
+    - sphinx_reredirects==0.1.4
+    - sphinx-exercise==1.0.1
+    - sphinx-proof==0.2.0
+    - ghp-import==1.1.0
+    - sphinxcontrib-youtube==1.3.0 #Version 1.3.0 is required as quantecon-book-theme is only compatible with sphinx<=5
+    - sphinx-togglebutton==0.3.2
+    - --index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
diff --git a/tools/translation.ipynb b/tools/translation.ipynb
@@ -0,0 +1,241 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'os' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 15\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mre\u001b[39;00m\n\u001b[1;32m     14\u001b[0m \u001b[38;5;66;03m# Set the API key directly in your script using the environment variable\u001b[39;00m\n\u001b[0;32m---> 15\u001b[0m openai\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     17\u001b[0m \u001b[38;5;66;03m# Example API call to verify that everything is set up correctly\u001b[39;00m\n\u001b[1;32m     18\u001b[0m response \u001b[38;5;241m=\u001b[39m openai\u001b[38;5;241m.\u001b[39mCompletion\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m     19\u001b[0m     model\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-davinci-003\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     20\u001b[0m     prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTranslate this text to French.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     21\u001b[0m     max_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m\n\u001b[1;32m     22\u001b[0m )\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'os' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Before running the file, config the environment by adding the OpenAI API key:\n",
+    "\n",
+    "\n",
+    "echo \"export OPENAI_API_KEY='yourkey'\" >> ~/.zshrc\n",
+    "source ~/.zshrc\n",
+    "echo $OPENAI_API_KEY # Test to see if it is added\n",
+    "\"\"\"\n",
+    "\n",
+    "import openai\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "ename": "OpenAIError",
+     "evalue": "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOpenAIError\u001b[0m                               Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m client \u001b[38;5;241m=\u001b[39m \u001b[43mopenai\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mOpenAI\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(openai\u001b[38;5;241m.\u001b[39mbeta\u001b[38;5;241m.\u001b[39massistants\u001b[38;5;241m.\u001b[39mlist())\n\u001b[1;32m      3\u001b[0m assistant_cn_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124masst_zjzyGwEZ1rVuJYWNQk6nzQTA\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
+      "File \u001b[0;32m~/anaconda3/envs/quantecon/lib/python3.8/site-packages/openai/_client.py:105\u001b[0m, in \u001b[0;36mOpenAI.__init__\u001b[0;34m(self, api_key, organization, project, base_url, timeout, max_retries, default_headers, default_query, http_client, _strict_response_validation)\u001b[0m\n\u001b[1;32m    103\u001b[0m     api_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39menviron\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    104\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_key \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 105\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m OpenAIError(\n\u001b[1;32m    106\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    107\u001b[0m     )\n\u001b[1;32m    108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m api_key\n\u001b[1;32m    110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m organization \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "\u001b[0;31mOpenAIError\u001b[0m: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
+     ]
+    }
+   ],
+   "source": [
+    "client = openai.OpenAI()\n",
+    "print(openai.beta.assistants.list())\n",
+    "assistant_cn_id = 'asst_zjzyGwEZ1rVuJYWNQk6nzQTA'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def split_text(content, chunk_size=3000):\n",
+    "    chunks = []\n",
+    "    start = 0\n",
+    "    while start < len(content):\n",
+    "        end = start + chunk_size\n",
+    "\n",
+    "        # If we are at the end of the content, just append the rest\n",
+    "        if end >= len(content):\n",
+    "            chunks.append(content[start:])\n",
+    "            break\n",
+    "\n",
+    "        # Find the nearest line break before the chunk size\n",
+    "        next_line_break = content.rfind('\\n', start, end)\n",
+    "        if next_line_break == -1:\n",
+    "            # If no line break is found within the chunk size, extend to the end\n",
+    "            next_line_break = end\n",
+    "\n",
+    "        # Check if a code cell starts within the chunk\n",
+    "        code_cell_start = content.find('```{code-cell}', start, next_line_break)\n",
+    "        if code_cell_start != -1:\n",
+    "            # If a code cell starts, find its end\n",
+    "            code_cell_end = content.find('```', code_cell_start + 14)\n",
+    "            if code_cell_end != -1:\n",
+    "                # Move the end to the end of the code cell\n",
+    "                next_line_break = content.find('\\n', code_cell_end) + 1\n",
+    "\n",
+    "        chunks.append(content[start:next_line_break].strip())\n",
+    "        start = next_line_break\n",
+    "\n",
+    "    return chunks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "content = ''' The things you would like to translate (i.e. \n",
+    "# Univariate Time Series with Matrix Algebra\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "This lecture uses matrices to solve some linear difference equations.\n",
+    "\n",
+    "As a running example, we’ll study a **second-order linear difference\n",
+    "equation** that was the key technical tool in Paul Samuelson’s 1939\n",
+    "article {cite}`Samuelson1939` that introduced the **multiplier-accelerator** model.\n",
+    "\n",
+    "This model became the workhorse that powered early econometric versions of\n",
+    "Keynesian macroeconomic models in the United States.\n",
+    "\n",
+    "You can read about the details of that model in [this](https://python.quantecon.org/samuelson.html)\n",
+    "QuantEcon lecture.\n",
+    "\n",
+    "(That lecture also describes some technicalities about second-order linear difference equations.)\n",
+    "\n",
+    "In this lecture, we'll also learn about an **autoregressive** representation and a **moving average** representation of a  non-stationary\n",
+    "univariate time series $\\{y_t\\}_{t=0}^T$.\n",
+    "\n",
+    "We'll also study a \"perfect foresight\" model of stock prices that involves solving\n",
+    "a \"forward-looking\" linear difference equation.\n",
+    "\n",
+    "We will use the following imports:\n",
+    "\n",
+    "```{code-cell} ipython\n",
+    "import numpy as np\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib import cm\n",
+    "plt.rcParams[\"figure.figsize\"] = (11, 5)  #set default figure size\n",
+    "```)\n",
+    "'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "chunks = split_text(content, chunk_size=3000)\n",
+    "\n",
+    "thread = client.beta.threads.create()\n",
+    "\n",
+    "translated_content = \"\"\n",
+    "\n",
+    "header = ''\n",
+    "for chunk in chunks:\n",
+    "    # Create and poll the run for each chunk\n",
+    "    run = client.beta.threads.runs.create_and_poll(\n",
+    "        thread_id=thread.id,\n",
+    "        assistant_id=assistant_cn_id,\n",
+    "        instructions=\"Please translate the following content into simplified Chinese. Maintain all the markdown syntax and directives unchanged. Only translate text and code comments Give the results directly without system messages: \" + chunk\n",
+    "    )\n",
+    "    \n",
+    "    if run.status == 'completed': \n",
+    "        messages = client.beta.threads.messages.list(\n",
+    "            thread_id=thread.id\n",
+    "        )\n",
+    "        translated_content += header + messages.data[0].content[0].text.value\n",
+    "    else:\n",
+    "        print(f\"Translation failed for chunk: {chunk[:50]}... Status: {run.status}\")\n",
+    "        continue\n",
+    "    header = '\\n'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\n\\n# 单变量时间序列与矩阵代数\\n\\n## 概述\\n\\n本讲座使用矩阵来求解一些线性差分方程。\\n\\n作为一个贯穿整个讲座的例子，我们将研究一个**二阶线性差分方程**，这是保罗·萨缪尔森在1939年发表文章 {cite}`Samuelson1939`中引入**乘数-加速器**模型的关键技术工具。\\n\\n该模型成为了推动早期美国凯恩斯主义宏观经济模型的计量经济版本的工作马。\\n\\n你可以在[这](https://python.quantecon.org/samuelson.html)篇QuantEcon讲座中阅读该模型的详细信息。\\n\\n（该讲座还描述了一些关于二阶线性差分方程的技术细节。）\\n\\n在本讲座中，我们还将学习非平稳单变量时间序列 $\\\\{y_t\\\\}_{t=0}^T$ 的**自回归**表示和**移动平均**表示。\\n\\n我们还将研究一个涉及求解“前瞻性”线性差分方程的“完美预测”股票价格模型。\\n\\n我们将使用以下导入：\\n\\n```{code-cell} ipython\\nimport numpy as np\\n%matplotlib inline\\nimport matplotlib.pyplot as plt\\nfrom matplotlib import cm\\nplt.rcParams[\"figure.figsize\"] = (11, 5)  # 设置默认图形大小\\n```'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Output\n",
+    "translated_content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Save Output\n",
+    "\n",
+    "with open('output.md', 'w', encoding='utf-8') as file:\n",
+    "    file.write(translated_content)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tools/translation.py b/tools/translation.py
@@ -0,0 +1,108 @@
+"""
+Before running the file, config the environment by adding the OpenAI API key:
+
+
+echo "export OPENAI_API_KEY='yourkey'" >> ~/.zshrc
+source ~/.zshrc
+echo $OPENAI_API_KEY # Test to see if it is added
+"""
+
+
+import openai
+import os
+from concurrent.futures import ThreadPoolExecutor
+
+def process_file(filename, function, assistant_cn_id):
+    input_file = os.path.join(directory, filename)
+    print(f'processing {input_file}')
+    if os.path.isfile(input_file):
+        function(input_file, assistant_cn_id)
+
+
+def split_text(content, chunk_size=3000):
+    chunks = []
+    start = 0
+    while start < len(content):
+        end = start + chunk_size
+
+        # If we are at the end of the content, just append the rest
+        if end >= len(content):
+            chunks.append(content[start:])
+            break
+
+        # Find the nearest line break before the chunk size
+        next_line_break = content.rfind('\n', start, end)
+        if next_line_break == -1:
+            # If no line break is found within the chunk size, extend to the end
+            next_line_break = end
+
+        # Check if a code cell starts within the chunk
+        code_cell_start = content.find('```{code-cell}', start, next_line_break)
+        if code_cell_start != -1:
+            # If a code cell starts, find its end
+            code_cell_end = content.find('```', code_cell_start + 14)
+            if code_cell_end != -1:
+                # Move the end to the end of the code cell
+                next_line_break = content.find('\n', code_cell_end) + 1
+
+        chunks.append(content[start:next_line_break].strip())
+        start = next_line_break
+
+    return chunks
+
+def translate_cn(input_file, assistant_id):
+    # Initialize the OpenAI client
+    client = openai.OpenAI()
+
+    # Read the content of the input markdown file
+    with open(input_file, 'r', encoding='utf-8') as file:
+        content = file.read()
+
+    # Split the content into chunks
+    chunks = split_text(content, chunk_size=1000)
+
+    # Initialize the OpenAI client and thread
+    thread = client.beta.threads.create()
+
+    translated_content = ""
+    header = ''
+    for chunk in chunks:
+        # Create and poll the run for each chunk
+        run = client.beta.threads.runs.create_and_poll(
+            thread_id=thread.id,
+            assistant_id=assistant_id,
+            instructions="Give a direct translation into simplified Chinese. Maintain all the markdown syntax and directives unchanged. Give the results directly without system messages: " + chunk
+        )
+        
+        if run.status == 'completed': 
+            messages = client.beta.threads.messages.list(
+                thread_id=thread.id
+            )
+            translated_content += header + messages.data[0].content[0].text.value
+        else:
+            print(f"Translation failed for chunk: {chunk[:50]}... Status: {run.status}")
+            continue
+        header = '\n'
+
+    # Create the output file name
+    output_file = input_file.replace('.md', '_cn.md')
+
+    # Write the translated content to the new markdown file
+    with open(output_file, 'w', encoding='utf-8') as file:
+        file.write(translated_content)
+
+    print(f"Translated content has been saved to {output_file}")
+
+
+if __name__ == "__main__":
+    directory = "lectures"
+    assistant_cn_id = 'asst_zjzyGwEZ1rVuJYWNQk6nzQTA'
+
+    files = [f for f in os.listdir(directory) if f.endswith('.md') and os.path.isfile(os.path.join(directory, f))]
+    print(openai.beta.assistants.list())
+    
+    print(f'files to translate: {files}')
+    
+    file_handler = lambda file: process_file(file, translate_cn, assistant_cn_id)
+    with ThreadPoolExecutor() as executor:
+        executor.map(file_handler, files[1:])