diff --git a/README.MD b/README.MD
new file mode 100644
index 000000000..6c4d66e2e
--- /dev/null
+++ b/README.MD
@@ -0,0 +1,120 @@
+# TextRecognitionDataGenerator 
+
+
+
+[English](README_EN.md) | 简体中文
+
+## 介绍
+
+一个文字识别模型的数据生成器
+
+## 特性
+
+1. 支持中英文识别。
+
+## 必要条件
+
+- Python 3.6+
+
+## 近期更新
+
+**`2023-02-15`**: 继承自[TextRecognitionDataGenerator](https://github.com/Belval/TextRecognitionDataGenerator) 。
+
+## 安装
+创建虚拟环境
+```commandline
+conda create -n TextRecognitionDataGenerator python=3.7
+```
+激活虚拟环境
+```commandline
+conda activate TextRecognitionDataGenerator
+```
+
+安装所需以来
+```commandline
+pip install -r requirements.txt
+```
+## 使用
+
+### 准备材料
+
+- **背景**
+
+为丰富生成图片的背景，请自行准备背景图片并放置于[images](./trdg/images)文件夹下。
+
+- **语料**
+
+准备文字生成语料，并放置在[texts](./trdg/texts)文件夹下，同时执行生成命令中，使用-i参数指定语料文件路径。
+
+- **字体**
+
+1. 需准备一个语料字典文件放置于[chars](./trdg/chars)文件夹下，该字典文件记录了生成语料中可能出现所的所有字符。
+2. 下载对应语言所需要的字体文件，并放置于[fonts](./trdg/fonts)文件夹下。下面列举几个可供选择的字体下载网站： [FFONTS](https://cn.ffonts.net/Agra.font.download)、[FONT MEME](https://fontmeme.com/ziti/thai-fonts/1/)、[fontriver](https://cn.fontriver.com/foreign_look/russian/)。
+3. 执行下述语句，筛选可用字体，执行完成后会在[fonts](./trdg/fonts)文件夹下生成一个带_filter后缀的文件夹，该文件夹下字体即为可用字体。
+
+```bash
+python tools/filter_fonts.py --dict_path 字典文件路径 --font_dir 字体文件夹路径
+```
+
+### 执行生成数据
+
+在项目根目录下，执行生成语句，
+```bash
+bash run.sh
+```
+
+对于不同语言的生成需求可对如下参数进行调整，
+
+***参数配置表***
+
+|     参数     |                             含义                             |  类型   | 默认值  |
+| :----------: | :----------------------------------------------------------: | :-----: | :-----: |
+| --output_dir |                        结果输出文件夹                        |   str   |  out/   |
+|      -i      |                      指定生成的文本语料                      |   str   |         |
+|      -l      |                             语言                             |   str   |   en    |
+|      -c      |                        生成的图片数量                        |   int   |         |
+|     -rs      |      使用随机字符组合作为语料，即没有语义信息的文本组合      |         |         |
+|     -let     |     与-rs一起使用，使用随机字符组合作为语料，只生成字符      |         |         |
+|     -num     |     与-rs一起使用，使用随机字符组合作为语料，只生成数字      |         |         |
+|     -sym     |     与-rs一起使用，使用随机字符组合作为语料，只生成标点      |         |         |
+|      -w      |                   生成图片中包含的单词数量                   |   int   |    1    |
+|      -r      |            与-w搭配使用，随机生成单词数，最大为-w            |         |         |
+|      -f      |                      设置图片高度像素数                      |   int   |   32    |
+|      -t      |                         执行的进程数                         |   int   |    1    |
+|      -e      |                       设置图片保存格式                       |   str   |   jpg   |
+|      -k      |       文本行倾斜角度,需要与-rk一起使用，否则为固定角度       |   int   |    0    |
+|     -rk      |             使得倾斜角度为，-k到+k的范围随机选取             |         |         |
+|     -wk      |               使用维基百科语料，运行似乎会报错               |         |         |
+|     -bl      |   增加高斯模糊效果，数值越大越模糊，设置-rbl模糊程度可随机   |   int   |    0    |
+|     -rbl     |                随机模糊，模糊值从0到-bl的数值                |         |         |
+|      -b      |      背景，0：高斯噪点，1：白色背景，2：类网格，3：图片      |         |         |
+|     -hw      |                   手写配置，当前运行会报错                   |         |         |
+|     -na      | 标签输出格式，0: [TEXT]_[ID].[EXT], 1: [ID]_[TEXT].[EXT] 2: labels.txt |   int   |    0    |
+|     -om      |               是否返回遮掩的图片，0：否；1：是               |   int   |    0    |
+|     -obb     | 返回每个字的坐标信息，0：不返还；1：txt格式；2：Tesseract格式 |   int   |    0    |
+|      -d      |  添加文字扭曲效果，0：无变换，1：Sine，2：Cosine，3：random  |   int   |    0    |
+|     -do      |   定义扭曲的方向，与-d配合使用；0：上下，1：左右，2:都有。   |   int   |    0    |
+|     -wd      |                       设置固定图片宽度                       |   int   |   -1    |
+|     -al      | 设置文字在图片中的对其方式，需配合-wd使用，0：左，1：中，2：右。 |         |         |
+|     -or      |            生成的文字方向，0代表水平，1代表竖直的            |         |         |
+|     -tc      |   设置文本颜色，设置一个颜色区间的话形如'#000000,#888888'    |   str   | #282828 |
+|     -sw      |       两个词之前的空间,1.0代表一个空格，0代表没有间隔        |  float  |   1.0   |
+|     -cs      |             两个字符之前的空间，2代表两个像素。              |   int   |    0    |
+|  --margins   |        设置文本行四周边距，单位是像素；上，左，下，右        | margins | 5,5,5,5 |
+|    --fit     |     加这个参数将使边距更紧密，一般配合--margins一起使用      |         |         |
+|     -ft      |                         指定某个字体                         |         |         |
+|     -fd      |                     指定字体所在的文件夹                     |         |         |
+|     -id      |                   指定背景图片所在的文件夹                   |         |         |
+|     -ca      |   指定只生成大写或者小写的语料，upper：大写，lower：小写。   |   str   |         |
+|     -dt      |                           指定字典                           |         |         |
+|     -stw     |                        定义笔画的宽度                        |   int   |    0    |
+|     -stf     |        定义笔画轮廓的颜色，如果stroke_width比0大的话         |   str   | #282828 |
+|     -im      | 定义要使用的图像模式。RGB是默认的，L表示8位灰度图像，等等。  |   str   |   RGB   |
+
+## 联系
+
+1. 邮箱：jianjinlv@163.com
+
+## 许可证书
+
+本项目的代码基于MIT协议发布。
\ No newline at end of file
diff --git a/README.md b/README_EN.md
similarity index 100%
rename from README.md
rename to README_EN.md
diff --git a/requirements.txt b/requirements.txt
index c0735c528..53819815d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ diffimg==0.2.3
 arabic-reshaper==2.1.3
 python-bidi==0.4.2
 wikipedia>=1.4.0
+fonttools
\ No newline at end of file
diff --git a/run.sh b/run.sh
new file mode 100644
index 000000000..b6c04639a
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,5 @@
+# 阿拉伯语
+# python trdg/run.py -c 1000000 -f 64 -b 3 -d 1 -l ar -na 2  -k 5 -rk -bl 1 -rbl -t 8 -b 3 -tc '#000000,#888888' -dt trdg/chars/ar_dict.txt -i trdg/texts/ar/train.txt -fd trdg/fonts/ar_filter/ --output_dir ar_TextRecognitionDataGenerator_train_100w
+# 俄语
+#python trdg/run.py -c 100 -f 64 -b 3 -d 1 -l ru -na 2  -k 5 -rk -bl 1 -rbl -t 8 -b 3 -tc '#000000,#888888' --margins 0,5,0,5 --fit -dt trdg/chars/ppocr_keys_v1.txt -i trdg/texts/test.txt -fd trdg/fonts/zh/ --output_dir output/zh_TextRecognitionDataGenerator_train_100w
+python trdg/run.py -c 1000 -f 48 -rs -num -sym -b 3 -d 1 -na 2  -k 5 -rk -bl 1 -rbl -t 8 -b 3 -tc '#000000,#888888' --margins 0,5,0,5 --fit   -fd trdg/fonts/zh_filter/ --output_dir output/test_TextRecognitionDataGenerator_train
\ No newline at end of file
diff --git a/setup.py b/setup.py
index aaaa417f3..0a15bcd4c 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
 
 here = path.abspath(path.dirname(__file__))
 
-with open(path.join(here, "README.md"), encoding="utf-8") as f:
+with open(path.join(here, "README_EN.md"), encoding="utf-8") as f:
     long_description = f.read()
 
 setup(
diff --git a/tools/filter_fonts.py b/tools/filter_fonts.py
new file mode 100644
index 000000000..ce2ef3b5a
--- /dev/null
+++ b/tools/filter_fonts.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+# @Time      : 2023/2/23 12:00
+# @Author    : JianjinL
+# @eMail     : jianjinlv@163.com
+# @File      : filter_fonts
+# @Software  : PyCharm
+# @Dscription: 筛选符合字典的字体
+import os
+import shutil
+from fontTools.ttLib import TTFont
+import fontTools
+import argparse
+
+# 1. 定义命令行解析器对象
+parser = argparse.ArgumentParser(description='筛选符合字典的字体')
+# 2. 添加命令行参数
+parser.add_argument('--dict_path', type=str, required=True, help="字典路径")
+parser.add_argument('--font_dir', type=str, required=True, help="字体所在文件夹")
+# 3. 从命令行中结构化解析参数
+args = parser.parse_args()
+# 字典、字体文件夹
+dict_path = args.dict_path
+font_dir = args.font_dir
+
+# 创建文件夹
+if not os.path.exists(font_dir+"_filter"):
+    os.mkdir(font_dir+"_filter")
+
+
+
+# 记录结果
+result = []
+
+
+# 遍历每个字体
+for font_path in os.listdir(font_dir):
+    # 保存结果的字典
+    data = {}
+    # 读取字体对象
+    try:
+        font = TTFont(os.path.join(font_dir, font_path))
+    except fontTools.ttLib.TTLibError as err:
+        continue
+    # 读取字典并遍历校验每个字符
+    with open(dict_path, 'r', encoding='utf8') as fread:
+        charlist = [char.replace("\n", "") for char in fread.readlines()]
+        charset = set(charlist)
+        for char in charlist:
+            for table in font['cmap'].tables:
+                try:
+                    if ord(char) in table.cmap.keys() and char in charset:
+                        charset.remove(char)
+                except TypeError as err:
+                    print(f"Font:{font}, Error:{err}")
+        print(f"字体：{font_path}, 不支持的字符：{charset}")
+        if len(charset) == 0:
+            shutil.copy(os.path.join(font_dir, font_path), os.path.join(font_dir + "_filter", font_path))
+
+
+
diff --git a/tools/filter_labels.py b/tools/filter_labels.py
new file mode 100644
index 000000000..522bf4d66
--- /dev/null
+++ b/tools/filter_labels.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+# @Time      : 2023/2/24 14:05
+# @Author    : JianjinL
+# @eMail     : jianjinlv@163.com
+# @File      : filter_labels
+# @Software  : PyCharm
+# @Dscription: 将生成的数据集标签与图片对一遍，删除没有图片的标签
+
+import os
+import argparse
+
+# 1. 定义命令行解析器对象
+parser = argparse.ArgumentParser(description='筛选标签')
+# 2. 添加命令行参数
+parser.add_argument('--img_dir', type=str, required=True, help="生成的图片所在文件夹")
+parser.add_argument('--label_path', type=str, required=True, help="原始标签所在路径")
+# 3. 从命令行中结构化解析参数
+args = parser.parse_args()
+# 解析参数
+img_dir = args.img_dir
+label_path = args.label_path
+
+# 读取图片列表
+img_list = set(os.listdir(img_dir))
+
+with open(os.path.join(label_path.replace("labels.txt", "labels_filter.txt")), 'w', encoding='utf8') as f_filter:
+    with open(label_path, 'r', encoding='utf8') as f_label:
+        label_lines = f_label.readlines()
+        for line in label_lines:
+            img_name = line.split("\t")[0].split("images/")[1]
+            if img_name in img_list:
+                f_filter.write(line)
+
+
diff --git a/trdg/data_generator.py b/trdg/data_generator.py
index 75a79467f..94cecd708 100644
--- a/trdg/data_generator.py
+++ b/trdg/data_generator.py
@@ -58,6 +58,13 @@ def generate(
         image = None
 
         margin_top, margin_left, margin_bottom, margin_right = margins
+        # 更改功能：使得边界值可以随机取值
+        # -----------------
+        margin_top = rnd.randint(0, margin_top)
+        margin_left = rnd.randint(0, margin_left)
+        margin_bottom = rnd.randint(0, margin_bottom)
+        margin_right = rnd.randint(0, margin_right)
+        # -----------------
         horizontal_margin = margin_left + margin_right
         vertical_margin = margin_top + margin_bottom
 
diff --git a/trdg/run.py b/trdg/run.py
index 65edd1291..008139ec8 100755
--- a/trdg/run.py
+++ b/trdg/run.py
@@ -419,8 +419,9 @@ def main():
     if args.language == "ar":
         from arabic_reshaper import ArabicReshaper
         from bidi.algorithm import get_display
-
         arabic_reshaper = ArabicReshaper()
+        # 修改阿拉伯语系文本标签进行修改
+        strings_ar = [get_display(x) for x in strings]
         strings = [
             " ".join(
                 [get_display(arabic_reshaper.reshape(w)) for w in s.split(" ")[::-1]]
@@ -475,9 +476,12 @@ def main():
     ):
         pass
     p.terminate()
-
+    # 修改输出格式，直接返回paddleocr样式
     if args.name_format == 2:
         # Create file with filename-to-label connections
+        # 对阿拉伯语系标签进行修改
+        if args.language == "ar":
+            strings = strings_ar
         with open(
             os.path.join(args.output_dir, "labels.txt"), "w", encoding="utf8"
         ) as f:
@@ -486,7 +490,7 @@ def main():
                 label = strings[i]
                 if args.space_width == 0:
                     label = label.replace(" ", "")
-                f.write("{} {}\n".format(file_name, label))
+                f.write("{0}/images/{1}\t{2}\n".format(args.output_dir.replace("output/", ""), file_name, label))
 
 
 if __name__ == "__main__":
diff --git a/trdg/string_generator.py b/trdg/string_generator.py
index 631d51b2d..0008b3272 100644
--- a/trdg/string_generator.py
+++ b/trdg/string_generator.py
@@ -117,7 +117,7 @@ def create_strings_randomly(
     if num:
         pool += "0123456789"
     if sym:
-        pool += "!\"#$%&'()*+,-./:;?@[\\]^_`{|}~"
+        pool += " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz。？！，、；：“”‘’『』「」（）[]〔〕【】—…·-～《》〈〉_/!\"#$%&'()*+,.:;<=>?\@^`{|}~«»ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩαβΓγΔδεζηθικλμπσΣΦφΩωΨψ∫﹤﹥≦≧°∞≥≤≠‰±⊥√∑×ⅪⅫⅰⅱⅲⅳⅴⅵⅶⅷⅸⅹ①②③④⑤⑥⑦⑧⑨⑩"
 
     if lang == "cn":
         min_seq_len = 1