From c6bfbf420adbfeaa95e04f10ccca3b10e92b6f4f Mon Sep 17 00:00:00 2001
From: chupei <njuchupei@gmail.com>
Date: Fri, 15 Aug 2025 13:41:12 +0800
Subject: [PATCH 1/8] v3.2.2-released (#524)

Co-authored-by: Yanggq <1041206149@qq.com>
Co-authored-by: linfeng <56671143+LollipopsAndWine@users.noreply.github.com>
---
 .../html/recognizer/cc_math/render/mathjax.py | 36 +++++++++++++++++
 .../html/recognizer/cc_math/render/render.py  |  1 +
 .../html/recognizer/cc_math/tag_script.py     | 19 ++++++++-
 .../extractor/html/recognizer/ccmath.py       | 40 ++++++++++++-------
 llm_web_kit/extractor/html/recognizer/list.py | 15 +++----
 .../good_data/html/math_mathjax_mock.html     |  1 +
 .../good_data/html_data_input.jsonl           |  3 +-
 .../assets/ccmath/math_class_math.html        |  1 +
 .../assets/ccmath/math_class_math_1.html      |  0
 .../ccmath/math_class_math_inline_1.html      |  8 ++++
 .../extractor/html/recognizer/test_list.py    | 35 ++++++++++++++++
 .../extractor/html/recognizer/test_math.py    |  8 ++++
 .../extractor/test_extractor_chain.py         | 19 +++++++--
 13 files changed, 157 insertions(+), 29 deletions(-)
 create mode 100644 tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html
 create mode 100644 tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html
 create mode 100644 tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_1.html
 create mode 100644 tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html
diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py b/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py
index bb309468..06ac62a9 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py
@@ -533,6 +533,42 @@ def _detect_ascii_math(self, tree: HtmlElement) -> bool:
         return processascii
 
 
+class MathJaxRenderMock(MathJaxRender):
+    """虚拟的MathJax渲染器，用于没有MathJax配置但需要使用MathJax解析逻辑的情况.
+
+    这个类主要用于处理以下场景：
+    1. 网页中没有显式的MathJax配置（如<script type="text/x-mathjax-config">）
+    2. 但在HTML解析过程中检测到了数学公式元素（如<math>标签、公式相关的class等）
+    3. 需要使用MathJax渲染器方案扫一遍所有内容，防止漏抽取公式
+
+    与普通MathJaxRender的区别：
+    - MathJaxRender：会解析HTML中的MathJax配置，使用自定义的分隔符和选项
+    - MathJaxRenderMock：直接使用默认的MathJax配置，不解析HTML配置
+    """
+
+    def __init__(self):
+        """初始化虚拟MathJax渲染器."""
+        super().__init__()
+        self.render_type = MathRenderType.MATHJAX_MOCK
+        # 使用默认的MathJax选项
+        self.options = MATHJAX_OPTIONS.copy()
+
+    def get_options(self, html: str) -> Dict[str, Any]:
+        """虚拟渲染器直接返回默认选项，不解析HTML配置.
+
+        Args:
+            html: HTML字符串（忽略）
+
+        Returns:
+            Dict[str, Any]: 默认MathJax选项字典
+        """
+        return self.options
+
+    def is_customized_options(self) -> bool:
+        """虚拟渲染器始终返回False，表示使用默认配置."""
+        return False
+
+
 # 使用示例
 if __name__ == '__main__':
     # MathJax示例
diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py b/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py
index 7314d66a..ad2a9390 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py
@@ -11,6 +11,7 @@
 class MathRenderType:
     """数学公式渲染器类型."""
     MATHJAX = 'mathjax'
+    MATHJAX_MOCK = 'mathjax_mock'  # 虚拟的mathjax渲染器
     MATHJAX_CUSTOMIZED = 'mathjax_customized'  # 临时增加这个type，未来区分走自定义解析的数据
     KATEX = 'katex'
 
diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py b/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py
index 0e218163..8821f0e1 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py
@@ -14,10 +14,16 @@ def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement, pa
     try:
         text = node.text
         if text and text_strip(text):
+            # 先处理非script标签和style标签的节点：即class为math/katex的节点
+            # 例子：<div class="math">f(x) \sim x^2, \quad x\to\infty</div>
             if node.tag not in ['script', 'style']:
                 new_span = create_new_span([(CCMATH_INLINE,MathType.LATEX)], cm.wrap_math_md(text), node, math_render, o_html)
-                node.addnext(new_span)
+                # node.addnext(new_span)
+                replace_element(node, new_span)  # 替换节点，而不是添加
+
+            # 下面是katex逻辑
             else:
+                # 例子：<script type = "e44e-text/javascript">katex.render("f(a,b,c) = (a^2+b^2+c^2)^3", mykatex);</script>
                 katex_pattern = re.compile(r'katex.render')
                 node_text = text_strip(text)
                 if katex_pattern.findall(node_text):
@@ -28,8 +34,17 @@ def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement, pa
                             target_element = target_elements[0]
                             o_html = element_to_html(target_element)
                             target_element.text = None
-                            new_span = create_new_span([(CCMATH_INLINE,MathType.LATEX)], cm.wrap_math_md(formula_content), target_element, math_render, o_html)
+                            wrapped_formula = cm.wrap_math_md(formula_content)
+                            # 转化为ccmath，例子：
+                            # <ccmath-inline type="latex" by="katex" html='...'>f(a,b,c) = (a^2+b^2+c^2)^3</ccmath-inline>
+                            new_span = create_new_span([(CCMATH_INLINE, MathType.LATEX)], wrapped_formula,
+                                                       target_element, math_render, o_html)
+                            # 插入到span标签内，例子：
+                            # <span id="mykatex"><ccmath-inline ... </ccmath-inline></span>
                             target_element.insert(0, new_span)
+
+                # 处理sript且type为math/tex的节点
+                # 例子：<html><head><script type="math/tex">x^2 + y^2 = z^2</script></head></html>
                 elif node.get('type') and 'math/tex' in node.get('type'):
                     tag_math_type_list = cm.get_equation_type(o_html)
                     if not tag_math_type_list:
diff --git a/llm_web_kit/extractor/html/recognizer/ccmath.py b/llm_web_kit/extractor/html/recognizer/ccmath.py
index 08021dbf..28078250 100644
--- a/llm_web_kit/extractor/html/recognizer/ccmath.py
+++ b/llm_web_kit/extractor/html/recognizer/ccmath.py
@@ -24,6 +24,7 @@ class MathRecognizer(BaseHTMLElementRecognizer):
     def __init__(self):
         super().__init__()
         self.cm = CCMATH()
+        self.mathjax_detected = False  # 添加检测标记
 
     @override
     def recognize(self, base_url: str, main_html_lst: List[Tuple[HtmlElement, HtmlElement]], raw_html: str, language:str = 'en') -> List[Tuple[HtmlElement, HtmlElement]]:
@@ -122,8 +123,9 @@ def process_ccmath_html(self, cc_html: str, o_html: str, math_render: BaseMathRe
             self.cm.url = base_url
             tree = cc_html
             math_render_type = math_render.get_render_type()
-            # 打印遍历node次数
-            # count = 0
+            self.mathjax_detected = False  # 重置标记
+
+            # process1: node循环逻辑
             for node in iter_node(tree):
                 assert isinstance(node, HtmlElement)
                 original_html = self._element_to_html(node)
@@ -134,9 +136,11 @@ def process_ccmath_html(self, cc_html: str, o_html: str, math_render: BaseMathRe
                         node.tag == 'span' and
                         node.get('class') in [CSDN.INLINE, CSDN.DISPLAY]):
                     tag_script.process_katex_mathml(self.cm, math_render_type, node)
+                    self.mathjax_detected = True
 
                 if ZHIHU.DOMAIN in self.cm.url and node.tag == 'span' and node.get('class') == ZHIHU.MATH:
                     tag_script.process_zhihu_custom_tag(self.cm, math_render_type, node)
+                    self.mathjax_detected = True
 
                 # tag = span， class 为 math-containerm， 或者 mathjax 或者 wp-katex-eq
                 if node.tag == 'span' and node.get('class') and (
@@ -147,44 +151,50 @@ def process_ccmath_html(self, cc_html: str, o_html: str, math_render: BaseMathRe
                         'tex' in node.get('class')
                 ):
                     tag_common_modify.modify_tree(self.cm, math_render_type, original_html, node, parent)
-
-                # script[type="math/tex"]
-                # if node.tag == 'script' and node.get('type') and 'math/tex' in node.get('type'):
-                #     print('匹配到script标签: ', node.get('type'))
-                #     tag_common_modify.modify_tree(cm, math_render_type, original_html, node, parent)
+                    self.mathjax_detected = True
 
                 # math tags
                 if node.tag == 'math' or node.tag.endswith(':math'):
                     # print(f"匹配到数学标签: {node.tag}")
                     # print(f"标签内容: {original_html}")
                     tag_math.modify_tree(self.cm, math_render_type, original_html, node, parent)
+                    self.mathjax_detected = True
 
                 if node.tag == 'mjx-container':
                     tag_mjx.modify_tree(self.cm, math_render, original_html, node)
+                    self.mathjax_detected = True
 
                 # img中的latex
                 if node.tag == 'img':
                     tag_img.modify_tree(self.cm, math_render_type, original_html, node, parent)
+                    self.mathjax_detected = True
 
                 # span.katex
                 if node.tag == 'script' or 'math' == node.get('class') or 'katex' == node.get('class'):
                     # print('匹配到script/math/katex标签: ', original_html)
                     tag_script.modify_tree(self.cm, math_render_type, original_html, node, parent)
+                    self.mathjax_detected = True
                 # 只有有渲染器的网站才会走下面文本匹配逻辑
                 if math_render_type:
                     # 14. 只处理只有一层的p标签
                     if node.tag == 'p' and len(node.getchildren()) == 0:
                         # print('匹配到p标签: ', original_html)
                         tag_common_modify.modify_tree(self.cm, math_render_type, original_html, node, parent)
+                        self.mathjax_detected = True
 
-            # 修改：传入tree节点，mathjax方案作为process2，不参与上面process1节点的遍历
-            if math_render_type:
-                try:
-                    if math_render_type == MathRenderType.MATHJAX:
-                        math_render.find_math(tree)
-                except Exception as e:
-                    raise HtmlMathMathjaxRenderRecognizerException(f'处理MathjaxRender数学公式失败: {e}')
-
+            # procsee2: mathjax渲染器逻辑
+            try:
+                # case1：有mathjax配置
+                if math_render_type == MathRenderType.MATHJAX:
+                    math_render.find_math(tree)
+                # case2：无Mathjax配置但是开启Mathjax逻辑开关（node循环抽到公式的情况）
+                elif math_render_type is None and self.mathjax_detected:
+                    from llm_web_kit.extractor.html.recognizer.cc_math.render.mathjax import \
+                        MathJaxRenderMock
+                    math_render = MathJaxRenderMock()
+                    math_render.find_math(tree)
+            except Exception as e:
+                raise HtmlMathMathjaxRenderRecognizerException(f'处理MathjaxRender数学公式失败: {e}')
             # 保存处理后的html
             # with open('test20250702_result.html', 'w', encoding='utf-8') as f:
             #     f.write(self._element_to_html(tree))
diff --git a/llm_web_kit/extractor/html/recognizer/list.py b/llm_web_kit/extractor/html/recognizer/list.py
index d91caa23..c3599fc8 100644
--- a/llm_web_kit/extractor/html/recognizer/list.py
+++ b/llm_web_kit/extractor/html/recognizer/list.py
@@ -199,13 +199,14 @@ def __extract_list_item_text_recusive(el: HtmlElement):
                 # item['c'].strip(): 会导致前面处理br标签，添加的\n\n失效
                 result['c'] = ' '.join(normalize_text_segment(item['c'].strip()) for item in paragraph)
             return result
-        list_item_tags = ('li', 'dd', 'dt', 'ul', 'div', 'p', 'span')
-        if child.tag in list_item_tags:
-            paragraph = __extract_list_item_text_recusive(child)
-            if len(paragraph) > 0:
-                tem_json = json.dumps(paragraph).replace('$br$\"}', '\"}')
-                new_paragraph = json.loads(tem_json)
-                text_paragraph.append(new_paragraph)
+        # list_item_tags = ('li', 'dd', 'dt', 'ul', 'div', 'p', 'span')
+        # if child.tag in list_item_tags:
+        # 去掉if限制条件，允许非标准结构的列表通过
+        paragraph = __extract_list_item_text_recusive(child)
+        if len(paragraph) > 0:
+            tem_json = json.dumps(paragraph).replace('$br$\"}', '\"}')
+            new_paragraph = json.loads(tem_json)
+            text_paragraph.append(new_paragraph)
 
         for n, item in enumerate(text_paragraph):
             tem_json = json.dumps(item).replace('$br$', '\\n\\n')
diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html
new file mode 100644
index 00000000..2928e55a
--- /dev/null
+++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html
@@ -0,0 +1 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>    <title>Monotone Sequences of Real Numbers - Mathonline</title>    <script type="text/javascript"            src="http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade/common--javascript/init.combined.js"></script>    <script type="text/javascript">        var URL_HOST = 'www.wikidot.com';        var URL_DOMAIN = 'wikidot.com';        var USE_SSL = true;        var URL_STATIC = 'http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade';        // global request information        var WIKIREQUEST = {};        WIKIREQUEST.info = {};        WIKIREQUEST.info.domain = "mathonline.wikidot.com";        WIKIREQUEST.info.siteId = 613263;        WIKIREQUEST.info.siteUnixName = "mathonline";        WIKIREQUEST.info.categoryId = 4083504;        WIKIREQUEST.info.themeId = 1;        WIKIREQUEST.info.requestPageName = "monotone-sequences-of-real-numbers";        OZONE.request.timestamp = 1558920674;        OZONE.request.date = new Date();        WIKIREQUEST.info.lang = 'en';        WIKIREQUEST.info.pageUnixName = "monotone-sequences-of-real-numbers";        WIKIREQUEST.info.pageId = 24240376;        WIKIREQUEST.info.lang = "en";        OZONE.lang = "en";        var isUAMobile = !!/Android|webOS|iPhone|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent);    </script>    <script type="text/javascript">        require.config({            baseUrl: URL_STATIC + '/common--javascript',            paths: {                'jquery.ui': 'jquery-ui.min',                'jquery.form': 'jquery.form'            }        });    </script>    <meta http-equiv="content-type" content="text/html;charset=UTF-8"/>    <meta http-equiv="content-language" content="en"/>    <script type="text/javascript"            src="http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade/common--javascript/WIKIDOT.combined.js"></script>    <style type="text/css" id="internal-style">        /* modules */        /* theme */        @import url(http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade/common--theme/base/css/style.css);        @import url(/admin:css/code/1);    </style>    <link rel="shortcut icon" href="/local--favicon/favicon.gif"/>    <link rel="icon" type="image/gif" href="/local--favicon/favicon.gif"/>    <link rel="apple-touch-icon" href="/common--images/apple-touch-icon-57x57.png"/>    <link rel="apple-touch-icon" sizes="72x72" href="/common--images/apple-touch-icon-72x72.png"/>    <link rel="apple-touch-icon" sizes="114x114" href="/common--images/apple-touch-icon-114x114.png"/>    <link rel="alternate" type="application/wiki" title="Edit this page"          href="javascript:WIKIDOT.page.listeners.editClick()"/>    <script type="text/javascript">        var _gaq = _gaq || [];        _gaq.push(['_setAccount', 'UA-18234656-1']);        _gaq.push(['_setDomainName', 'none']);        _gaq.push(['_setAllowLinker', true]);        _gaq.push(['_trackPageview']);        _gaq.push(['old._setAccount', 'UA-68540-5']);        _gaq.push(['old._setDomainName', 'none']);        _gaq.push(['old._setAllowLinker', true]);        _gaq.push(['old._trackPageview']);        _gaq.push(['userTracker._setAccount', 'UA-139433011-1']);        _gaq.push(['userTracker._trackPageview']);    </script>    <script type="text/javascript">        window.google_analytics_uacct = 'UA-18234656-1';        window.google_analytics_domain_name = 'none';    </script>    <link rel="manifest" href="/onesignal/manifest.json"/>    <script src="https://cdn.onesignal.com/sdks/OneSignalSDK.js" acync=""></script>    <script>        var OneSignal = window.OneSignal || [];        OneSignal.push(function () {            OneSignal.init({                appId: null,            });        });    </script>    <style>        .form-value-title, .form-labels, div.page-tags {            display: none;        }        #action-area h1 {            display: none;        }    </style>    <style>        .form-value-title, .form-labels, div.page-tags {            display: none;        }        #action-area h1 {            display: none;        }    </style></head><body id="html-body"><div id="skrollr-body">    <a name="page-top"></a>    <div id="container-wrap-wrap">        <div id="container-wrap">            <div id="container">                <div id="header">                    <h1><a href="/"><span>Mathonline</span></a></h1>                    <h2><span>Learn Mathematics</span></h2>                    <!-- google_ad_section_start(weight=ignore) -->                    <div id="search-top-box" class="form-search">                        <form id="search-top-box-form" action="dummy" class="input-append">                            <input id="search-top-box-input" class="text empty search-query" type="text" size="15"                                   name="query" value="Search this site"                                   onfocus="if(YAHOO.util.Dom.hasClass(this, 'empty')){YAHOO.util.Dom.removeClass(this,'empty'); this.value='';}"/><input                                class="button btn" type="submit" name="search" value="Search"/>                        </form>                    </div>                    <div id="top-bar">                    </div>                    <div id="login-status"><a href="javascript:;" onclick="WIKIDOT.page.listeners.createAccount(event)"                                              class="login-status-create-account btn">Create account</a> <span>or</span>                        <a href="javascript:;" onclick="WIKIDOT.page.listeners.loginClick(event)"                           class="login-status-sign-in btn btn-primary">Sign in</a></div>                    <div id="header-extra-div-1"><span></span></div>                    <div id="header-extra-div-2"><span></span></div>                    <div id="header-extra-div-3"><span></span></div>                </div>                <div id="content-wrap">                    <div id="side-bar">                    </div>                    <!-- google_ad_section_end -->                    <div id="main-content">                        <div id="action-area-top"></div>                        <div id="page-title">                            Monotone Sequences of Real Numbers                        </div>                        <div id="page-content">                            <table style="margin:0; padding:0">                                <tr>                                    <td style="margin:0; padding:0">                                        <div id="toc">                                            <div id="toc-action-bar"><a href="javascript:;"                                                                        onclick="WIKIDOT.page.listeners.foldToc(event)">Fold</a><a                                                    style="display: none" href="javascript:;"                                                    onclick="WIKIDOT.page.listeners.unfoldToc(event)">Unfold</a></div>                                            <div class="title">Table of Contents</div>                                            <div id="toc-list">                                                <div style="margin-left: 1em;"><a href="#toc0">Monotone Sequences of                                                    Real Numbers</a></div>                                            </div>                                        </div>                                    </td>                                </tr>                            </table>                            <h1 id="toc0"><span>Monotone Sequences of Real Numbers</span></h1>                            <p>We will now look at two new types of sequences, <strong>increasing sequences</strong> and                                <strong>decreasing sequences</strong>.</p>                            <table class="wiki-content-table">                                <tr>                                    <td><strong>Definition:</strong> A sequence of real numbers <span                                            class="math-inline">$(a_n)$</span> is said to be <strong>Increasing</strong>                                        if <span class="math-inline">$a_n ≤ a_{n+1}$</span> for all <span                                                class="math-inline">$n \in \mathbb{N}$</span>. Similarly, a sequence of                                        real numbers <span class="math-inline">$(a_n)$</span> is said to be <strong>Decreasing</strong>                                        if <span class="math-inline">$a_n ≥ a_{n+1}$</span> for all <span                                                class="math-inline">$n \in \mathbb{N}$</span>. A sequence <span                                                class="math-inline">$(a_n)$</span> is said to be                                        <strong>Monotone</strong> or <strong>Monotonic</strong> if it is either                                        increasing or decreasing.                                    </td>                                </tr>                            </table>                            <p><em>A sequence <span class="math-inline">$(a_n)$</span> is said to be <strong>Strictly                                Increasing</strong> if <span class="math-inline">$a_n &lt; a_{n+1}$</span> for all <span                                    class="math-inline">$n \in \mathbb{N}$</span> and <strong>Strictly                                Decreasing</strong> if <span class="math-inline">$a_n &gt; a_{n+1}$</span> for all <span                                    class="math-inline">$n \in \mathbb{N}$</span>.</em></p>                            <p>For example, consider the sequence <span class="math-inline">$\left ( \frac{1}{n} \right ) = (1, \frac{1}{2}, \frac{1}{3}, ..., \frac{1}{n}, \frac{1}{n+1}, ... )$</span>.                                We note that <span class="math-inline">$\forall n \in \mathbb{N}$</span>, <span                                        class="math-inline">$n &lt; n+1$</span> and so <span class="math-inline">$\frac{1}{n} &gt; \frac{1}{n+1}$</span>,                                and so this sequence is decreasing and hence monotone.</p>                            <p>The following graph represents the first 10 terms of the monotonically decreasing                                sequence <span class="math-inline">$\left ( \frac{1}{n} \right )$</span>:</p>                            <div class="image-container aligncenter"><img                                    src="http://mathonline.wdfiles.com/local--files/monotone-sequences-of-real-numbers/Screen%20Shot%202014-12-04%20at%203.58.31%20PM.png"                                    alt="Screen%20Shot%202014-12-04%20at%203.58.31%20PM.png" class="image"/></div>                            <p>One such example of an increasing sequence is the sequence <span class="math-inline">$(n + 2)$</span>.                                Clearly <span class="math-inline">$\forall n \in \mathbb{N}$</span>, <span                                        class="math-inline">$n + 2 &lt; (n+1) + 2 = n + 3$</span> (since if not, then                                <span class="math-inline">$n + 2 ≥ n + 3$</span> which implies that <span                                        class="math-inline">$0 ≥ 1$</span>, which is a contradiction). The following                                graph represents the first 10 terms of the monotonically increasing sequence <span                                        class="math-inline">$(n + 2)$</span>:</p>                            <div class="image-container aligncenter"><img                                    src="http://mathonline.wdfiles.com/local--files/monotone-sequences-of-real-numbers/Screen%20Shot%202014-12-04%20at%204.02.37%20PM.png"                                    alt="Screen%20Shot%202014-12-04%20at%204.02.37%20PM.png" class="image"/></div>                            <p>From the definition of an increasing and decreasing sequence, we should note that EVERY                                successive term in the sequence should either be larger than the previous (increasing                                sequences) or smaller than the previous (decreasing sequences). Therefore the sequence                                <span class="math-inline">$(1, 2, 1, \frac{1}{2}, \frac{1}{3}, \frac{1}{4}, ...)$</span>                                cannot be considered a decreasing sequence as <span class="math-inline">$1 = a_1 \not ≥ a_2 = 2$</span>.                                From this, we will formulate the following definitions:</p>                            <table class="wiki-content-table">                                <tr>                                    <td><strong>Definition:</strong> A sequence of real numbers <span                                            class="math-inline">$(a_n)$</span> is said to be <strong>Ultimately                                        Increasing</strong> if for some <span                                            class="math-inline">$K \in \mathbb{N}$</span> we have that <span                                            class="math-inline">$\forall n ≥ K$</span> then <span class="math-inline">$a_n ≤ a_{n+1}$</span>.                                        Similarly, a sequence of real numbers <span class="math-inline">$(a_n)$</span>                                        is said to be <strong>Ultimately Decreasing</strong> if for some <span                                                class="math-inline">$K \in \mathbb{N}$</span> we have that <span                                                class="math-inline">$\forall n ≥ K$</span> then <span                                                class="math-inline">$a_n ≥ a_{n+1}$</span>. A sequence <span                                                class="math-inline">$(a_n)$</span> is said to be <strong>Ultimately                                            Monotone</strong> or <strong>Ultimately Monotonic</strong> if for some <span                                                class="math-inline">$K \in \mathbb{N}$</span>, if <span                                                class="math-inline">$n ≥ K$</span> then <span class="math-inline">$(a_n)$</span>                                        is either ultimately increasing or ultimately decreasing.                                    </td>                                </tr>                            </table>                            <p>Consider the sequence <span                                    class="math-inline">$(n^2 - 4n + 3) = (0, -1, 0, 3, 8, ...)$</span>. This is an                                ultimately increasing sequence, since for <span class="math-inline">$n ≥ 2$</span> we                                have that <span class="math-inline">$a_n ≤ a_{n+1}$</span>. The following graph                                represents the first 7 terms of this ultimately increasing sequence:</p>                            <div class="image-container aligncenter"><img                                    src="http://mathonline.wdfiles.com/local--files/monotone-sequences-of-real-numbers/Screen%20Shot%202014-12-04%20at%204.20.53%20PM.png"                                    alt="Screen%20Shot%202014-12-04%20at%204.20.53%20PM.png" class="image"/></div>                        </div>                        <div id="page-info-break"></div>                        <div id="page-options-container">                        </div>                        <div id="action-area" style="display: none;"></div>                    </div>                </div>                <div id="footer" style="display: block; visibility: visible;">                    <div class="options" style="display: block; visibility: visible;">                        <a href="http://www.wikidot.com/doc" id="wikidot-help-button">Help</a>                        &nbsp;|                        <a href="http://www.wikidot.com/legal:terms-of-service" id="wikidot-tos-button">Terms of                            Service</a>                        &nbsp;|                        <a href="http://www.wikidot.com/legal:privacy-policy" id="wikidot-privacy-button">Privacy</a>                        &nbsp;|                        <a href="javascript:;" id="bug-report-button"                           onclick="WIKIDOT.page.listeners.pageBugReport(event)">Report a bug</a>                        &nbsp;|                        <a href="javascript:;" id="abuse-report-button"                           onclick="WIKIDOT.page.listeners.flagPageObjectionable(event)">Flag as objectionable</a>                    </div>                    Powered by <a href="http://www.wikidot.com">Wikidot.com</a>                </div>                <div id="license-area" class="license-area">                    Unless otherwise stated, the content of this page is licensed under <a rel="license"                                                                                           href="http://creativecommons.org/licenses/by-sa/3.0/">Creative                    Commons Attribution-ShareAlike 3.0 License</a>                </div>                <div id="extrac-div-1"><span></span></div>                <div id="extrac-div-2"><span></span></div>                <div id="extrac-div-3"><span></span></div>            </div>        </div>        <!-- These extra divs/spans may be used as catch-alls to add extra imagery. -->        <div id="extra-div-1"><span></span></div>        <div id="extra-div-2"><span></span></div>        <div id="extra-div-3"><span></span></div>        <div id="extra-div-4"><span></span></div>        <div id="extra-div-5"><span></span></div>        <div id="extra-div-6"><span></span></div>    </div></div><div id="dummy-ondomready-block" style="display: none;"></div><!-- Google Analytics load --><script type="text/javascript">    (function () {        var ga = document.createElement('script');        ga.type = 'text/javascript';        ga.async = true;        ga.src = ('https:' == document.location.protocol ? 'https://' : 'http://') + 'stats.g.doubleclick.net/dc.js';        var s = document.getElementsByTagName('script')[0];        s.parentNode.insertBefore(ga, s);    })();</script><!-- Quantcast --><script type="text/javascript">    _qoptions = {        qacct: "p-edL3gsnUjJzw-"    };    (function () {        var qc = document.createElement('script');        qc.type = 'text/javascript';        qc.async = true;        qc.src = ('https:' == document.location.protocol ? 'https://secure' : 'http://edge') + '.quantserve.com/quant.js';        var s = document.getElementsByTagName('script')[0];        s.parentNode.insertBefore(qc, s);    })();</script><noscript>    <img src="http://pixel.quantserve.com/pixel/p-edL3gsnUjJzw-.gif" style="display: none;" border="0" height="1"         width="1" alt="Quantcast"/></noscript><div id="page-options-bottom-tips" style="display: none;">    <div id="edit-button-hovertip">        Click here to edit contents of this page.    </div></div><div id="page-options-bottom-2-tips" style="display: none;">    <div id="edit-sections-button-hovertip">        Click here to toggle editing of individual sections of the page (if possible). Watch headings for an &quot;edit&quot;        link when available.    </div>    <div id="edit-append-button-hovertip">        Append content without editing the whole page source.    </div>    <div id="history-button-hovertip">        Check out how this page has evolved in the past.    </div>    <div id="discuss-button-hovertip">        If you want to discuss contents of this page - this is the easiest way to do it.    </div>    <div id="files-button-hovertip">        View and manage file attachments for this page.    </div>    <div id="site-tools-button-hovertip">        A few useful tools to manage this Site.    </div>    <div id="backlinks-button-hovertip">        See pages that link to and include this page.    </div>    <div id="rename-move-button-hovertip">        Change the name (also URL address, possibly the category) of the page.    </div>    <div id="view-source-button-hovertip">        View wiki source for this page without editing.    </div>    <div id="parent-page-button-hovertip">        View/set parent page (used for creating breadcrumbs and structured layout).    </div>    <div id="abuse-report-button-hovertip">        Notify administrators if there is objectionable content in this page.    </div>    <div id="bug-report-button-hovertip">        Something does not work as expected? Find out what you can do.    </div>    <div id="wikidot-help-button-hovertip">        General Wikidot.com documentation and help section.    </div>    <div id="wikidot-tos-button-hovertip">        Wikidot.com Terms of Service - what you can, what you should not etc.    </div>    <div id="wikidot-privacy-button-hovertip">        Wikidot.com Privacy Policy.    </div></div></body></html>
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl
index 7ccc1eb1..22cebca1 100644
--- a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl
+++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl
@@ -100,4 +100,5 @@
 {"track_id": "test_mjx_container", "dataset_name": "test_mjx_container", "url": "https://test.com","data_source_category": "HTML",  "path":"testmathjax.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
 {"track_id": "test_word_press", "dataset_name": "test_word_press", "url": "https://test.com","data_source_category": "HTML",  "path":"word_press.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
 {"track_id": "test_ascii_delimiter", "dataset_name": "test_ascii_delimiter", "url": "https://montalk.net/notes/342/tuning-forks-and-megalithic-technology","data_source_category": "HTML",  "path":"math_test_ascii_delimiter.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
-{"track_id": "test_htmlmath_sub_sup", "dataset_name": "test_htmlmath_sub_sup", "url": "https://cccbdb.nist.gov/compvibs3.asp?casno=123911&charge=0&method=42&basis=0","data_source_category": "HTML",  "path":"math_table_title_htmlmath_sub_sup.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
\ No newline at end of file
+{"track_id": "test_htmlmath_sub_sup", "dataset_name": "test_htmlmath_sub_sup", "url": "https://cccbdb.nist.gov/compvibs3.asp?casno=123911&charge=0&method=42&basis=0","data_source_category": "HTML",  "path":"math_table_title_htmlmath_sub_sup.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
+{"track_id": "test_mathjax_mock", "dataset_name": "test_mathjax_mock", "url": "http://mathonline.wikidot.com/monotone-sequences-of-real-numbers","data_source_category": "HTML",  "path":"math_mathjax_mock.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html
new file mode 100644
index 00000000..ec9e8518
--- /dev/null
+++ b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html
@@ -0,0 +1 @@
+<!DOCTYPE html><html lang="en"><head>    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-78463541-4"></script>    <script>        window.dataLayer = window.dataLayer || [];        function gtag() {            dataLayer.push(arguments)        };        gtag('js', new Date());        gtag('config', 'UA-78463541-4');    </script>    <meta charset="utf-8">    <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1">    <meta name="robots" content="index, follow">    <title>4.7 Years In Minutes - How Many Minutes Is 4.7 Years?</title>    <meta name="description"          content="Convert 4.7 years to minutes (4.7 yr to min). What is four point seven years in minutes? Easily convert from years into minutes.">    <link rel="canonical" href="https://convertoctopus.com/4-7-years-to-minutes">    <link rel="amphtml" href="https://convertoctopus.com/amp/4-7-years-to-minutes">    <script type="application/ld+json">        {            "@context": "http://schema.org",            "@type": "BreadcrumbList",            "itemListElement": [                {                    "@type": "ListItem",                    "position": 1,                    "item": {                        "@id": "/years-to-minutes",                        "name": "years to minutes"                    }                },                {                    "@type": "ListItem",                    "position": 2,                    "item": {                        "@id": "/4-7-years-to-minutes",                        "name": "4.7 years to minutes"                    }                }            ]        }    </script>    <script type="application/ld+json">        {            "@context": "https://schema.org",            "@type": "Article",            "inLanguage": "en",            "isFamilyFriendly": "http://schema.org/True",            "mainEntityOfPage": {                "@type": "WebPage",                "@id": "https://convertoctopus.com/4-7-years-to-minutes"            },            "headline": "How long is 4.7 years in minutes?",            "alternativeHeadline": "How to convert 4.7 years to minutes",            "image": {                "@type": "ImageObject",                "representativeOfPage": "http://schema.org/True",                "url": "https://convertoctopus.com/images/4-7-years-to-minutes",                "width": "730",                "height": "380",                "description": "4.7 years in minutes",                "caption": "4.7 years is equal to 2470320 minutes"            },            "author": "ConvertOctopus",            "genre": "Unit Converter",            "keywords": "4.7 years minutes",            "publisher": {                "@type": "Organization",                "name": "ConvertOctopus",                "url": "https://convertoctopus.com",                "logo": {                    "@type": "ImageObject",                    "url": "https://convertoctopus.com/img/logo.png",                    "width": "201",                    "height": "24"                }            },            "url": "https://convertoctopus.com/4-7-years-to-minutes",            "datePublished": "2017-10-01 18:15:40",            "dateCreated": "2017-08-01 18:15:05",            "dateModified": "2019-01-05 19:13:18",            "description": "Convert 4.7 years to minutes",            "articleBody": "We conclude that four point seven years is equivalent to two million four hundred seventy thousand three hundred twenty minutes."        }    </script>    <link rel="icon" href="/img/favicon.ico">    <link rel="apple-touch-icon" sizes="180x180" href="/img/apple-touch-icon.png">    <link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32.png">    <link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16.png">    <link rel="manifest" href="/img/site.webmanifest">    <link rel="mask-icon" href="/img/safari-pinned-tab.svg" color="#5bbad5">    <meta name="msapplication-TileColor" content="#da532c">    <meta name="theme-color" content="#117ed2">    <style>        button,        input,        select {            vertical-align: middle        }        .breadcrumb,        .container,        .convert-box,        .rw,        .title-wrap,        nav {            overflow: hidden        }        header {            background: #117ed2        }        .centered h2,        .result {            word-wrap: break-word        }        a,        button {            text-decoration: none        }        body,        html {            height: 100%;            width: 100%;            margin: 0;            padding: 0;            left: 0;            top: 0;            font-size: 100%        }        * {            font-family: FreeSans, Arimo, "Droid Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;            color: #333447;            line-height: 1.5;            -webkit-font-smoothing: antialiased        }        p {            font-size: 16px;            font-weight: 400;            line-height: 1.8        }        .math,        .math sub,        h1,        h2,        h3 {            font-weight: 500        }        h1 {            color: #9f9f9f;            font-size: 22px;            margin: 0 0 15px;        }        h2,        h3 {            font-size: 1.2rem        }        h2 {            margin-top: 30px;            margin-bottom: 10px        }        .breadcrumb li,        footer p,        ul {            font-size: 16px        }        header {            padding: 15px 0        }        .breadcrumb,        ul {            padding: 0;            list-style: none        }        .conversion {            width: auto;            margin: 20px 0 10px        }        a {            color: #117ed2        }        a:hover {            color: #34a0f3        }        .breadcrumb {            display: block;            text-align: center;            margin: 15px 20px;            width: auto        }        .centered h2,        th {            text-align: left        }        .breadcrumb li {            float: left        }        h2.info,        h3 {            margin-top: 30px        }        p.info {            line-height: 1.5em        }        footer p {            color: #d4d4d4;            margin: 0 20px 20px        }        .separator {            margin: 0 4px        }        ul.type {            font-size: 24px        }        ul.type li {            padding: 10px 0        }        .conversion h2 {            margin-top: 0        }        .hidden {            display: none        }        .block:before,        .centered {            display: inline-block;            vertical-align: middle        }        .btm {            width: 100%;            margin: 0 auto        }        .col {            width: auto;            float: none        }        .math,        .math sub {            font-family: Georgia, Times New Roman, serif;            font-size: 18px        }        table {            border-collapse: collapse;            min-width: 70%        }        th {            border-top: 1px solid #ccc        }        tr {            border-bottom: 1px solid #ccc        }        td,        th {            padding: 8px 12px        }        blockquote {            margin: 0        }        h3 dfn {            font-weight: 600        }        .no-margin {            width: auto;            margin-top: 10px        }        .img-wrapper {            width: auto;            height: auto;            position: relative;            margin: 30px auto -15px        }        .block {            position: absolute;            top: 0;            left: 0;            right: 0;            bottom: 0;            margin: 0 8px;            z-index: 1        }        .block:before {            content: '';            height: 100%        }        .centered {            width: 65%;            padding-left: 10px        }        .centered h2 {            font-size: 30px;            line-height: 1;            color: #dab5f9;            margin: 0        }        .a-3,        button {            text-align: center        }        .centered h2 span {            font-size: 18px;            display: block;            color: #FFF;            line-height: 1        }        input[type=number]::-webkit-inner-spin-button,        input[type=number]::-webkit-outer-spin-button {            -webkit-appearance: none;            -moz-appearance: none;            appearance: none;            margin: 0        }        .conversion h2 {            display: none        }        .result,        .value {            display: block;            font-weight: 500        }        .conversions-box,        .result-box {            background: #FFF;            margin: 10px 0;            overflow: hidden        }        .result-box {            padding: 30px 20px        }        .conversions-box {            padding: 10px 20px        }        .result {            font-size: 34px;            color: #54a9f9        }        .value {            font-size: 22px;            color: #145694        }        .convert-box {            background-color: #FFF;            margin: 10px 0;            padding: 10px 20px 15px        }        .a-1,        .a-2 {            width: 336px;            height: auto;            margin: 0 auto        }        .a-3 {            width: auto;            height: 100px;            margin: 10px auto 0        }        input,        select {            padding: .5em .6em;            box-sizing: border-box;            height: 40px        }        .top {            display: block;            margin-top: 10px        }        .a-box {            background-color: transparent        }        .boxes {            display: flex;            flex-flow: row wrap        }        label,        select {            display: block        }        button,        input {            display: inline-block        }        .hide,        .sky-left,        .sky-right {            display: none        }        .boxes > .box-1 {            order: 1        }        .boxes > .box-2 {            order: 3        }        .boxes > .box-3 {            order: 2        }        .box-1,        .box-2,        .box-3 {            width: 100%        }        .col-amount,        .col-button,        .col-unit-from,        .col-unit-to {            margin: 0;            width: 100%;            float: none        }        button,        input,        label,        select {            width: 100%;            font-size: 16px;            font-family: sans-serif        }        input {            border: 1px solid #e9e8e8;            border-radius: 4px;            width: 100%        }        fieldset {            margin: 0;            padding: 0;            border: 0        }        label {            margin: 20px 0 10px;            color: #969898        }        select {            box-shadow: none;            border: 1px solid #e9e8e8;            border-radius: 4px;            margin: .25em 0;            background-color: #fff        }        button {            margin: 25px 0 0;            background-color: #117ed2;            color: #fff;            font-family: inherit;            font-size: 100%;            padding: 13px 20px;            border: transparent;            border-radius: 4px;            zoom: 1;            white-space: nowrap;            cursor: pointer;            -webkit-user-drag: none;            -webkit-user-select: none;            -moz-user-select: none;            -ms-user-select: none;            user-select: none        }        button:hover {            background-color: #34a0f3        }        .title-wrap {            background-color: #F0F0F2        }        .a-4 {            width: 300px;            height: auto;            margin: 15px auto        }        .home-button {            margin: 30px 0 15px        }        .col-left,        .col-right {            width: 100%;            float: none;            padding: 0        }        nav a {            color: #848484        }        nav a:hover {            color: #b7b7b7        }        .brand {            background-image: url('/img/logo.png');            background-repeat: no-repeat;            height: 24px;            width: 201px;            display: block;            margin: 0 20px        }        .home-converter {            margin: 0;        }        ul li {            padding: 5px 0        }        .content {            padding: 0 20px;        }        .converter-title {            margin: 0        }        @media (min-width: 768px) {            .a-3,            .conversion h2,            .hidden,            .top {                display: block            }            .container {                width: 740px;                margin: 0 auto;                padding: 0            }            h1,            h2 {                font-size: 24px            }            h3 {                font-size: 22px            }            .breadcrumb {                text-align: left;                margin: 15px auto;                width: 730px            }            .col {                width: 365px;                float: left            }            .block {                margin: 0 25px            }            .centered {                padding-left: 0            }            .centered h2 {                font-size: 36px            }            .centered h2 span {                font-size: 22px            }            .math,            .math sub {                font-size: 20px            }            .conversion {                margin: 50px auto            }            .item-1 {                margin-right: 10%            }            .item-1,            .item-2 {                width: 45%;                float: left            }            .btm,            .no-margin {                width: 730px            }            .no-margin {                margin: 0 auto            }            .img-wrapper {                margin-top: 50px;            }            body {                background: #f3f3f3            }            .content,            .conversion-list {                background: #FFF;                margin: 5px 0 10px;                overflow: hidden;            }            .grid-container {                overflow: hidden            }            .content {                padding: 20px 50px            }            .conversion-list {                padding: 15px            }            .container-home {                margin-top: 10px;                margin-bottom: 10px            }            .a-1 {                width: 336px;                height: auto            }            .a-3,            .top {                width: 728px;                height: 90px;                margin-top: 0            }            .result-box {                margin-bottom: 0;                padding: 40px 50px 0;                margin-top: 10px            }            .convert-box {                margin-top: 0;                padding: 0 50px 40px            }            .converter-title {                display: none;            }            .box-1 .a-box,            .box-2 .a-box {                padding: 10px 10px            }            .box-1 .a-box {                margin-right: 5px            }            .box-2 .a-box {                margin-left: 5px            }            .a-box {                padding: 5px            }            .title-wrap,            nav {                background: 0 0            }            .boxes > .box-1 {                order: 2            }            .boxes > .box-2 {                order: 3            }            .boxes > .box-3 {                order: 1            }            .title-wrap {                margin: 0            }            .col-amount {                margin-right: 1%;                width: 15%;                float: left            }            .col-unit-from,            .col-unit-to {                margin-right: 1%;                width: 31%;                float: left            }            .col-button {                width: 20%;                float: left            }            button {                margin-top: 52px;                padding: 9px            }            .result {                font-size: 38px            }            label {                margin: 20px 0 10px            }            .box-1,            .box-2 {                width: 50%            }            .col-left,            .col-right {                width: 49%;                float: left            }            .col-left {                padding-right: 1%            }            .col-right {                padding-left: 1%            }            header {                padding: 20px 0;                margin-bottom: 15px            }            .hide {                display: block            }            .a-4 {                margin: 0 auto            }            .home-converter {                margin: 30px 60px;            }            footer p {                margin: 0 0 20px            }            .brand {                margin: 0            }        }        @media (min-width: 1120px) {            .container {                position: relative            }            .sky-left,            .sky-right {                position: absolute;                display: block            }            .sky {                width: auto;                height: auto;            }            .sky-left {                top: 800px;                left: -180px            }            .sky-right {                top: 800px;                right: -180px            }        }        .responsive {            width: 100%;            height: auto;        }        .social-wrapper {            margin: 35px 0 35px;        }        .social {            display: block;            overflow: hidden;            margin: 0 auto;            width: 275px        }        .social a {            background: url("/img/socialicons.png");            width: 45px;            height: 45px;            display: block;            margin: 2px 5px;            float: left;            left: 24px        }        #facebook-icon {            background-position-x: 0        }        #twitter-icon {            background-position-x: -45px        }        #messenger-icon {            background-position-x: -180px        }        #whatsapp-icon {            background-position-x: -135px        }        #pinterest-icon {            background-position-x: -90px        }    </style>    <!--[if lt IE 9]>    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>    <![endif]-->    <script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>    <script>        (adsbygoogle = window.adsbygoogle || []).push({            google_ad_client: "ca-pub-1543067051094976",            enable_page_level_ads: true        });    </script></head><body><header>    <div class="container">        <a class="brand" href="/"></a>    </div></header><article>    <div class="container">        <aside>            <div class="result-box">                <h1>How long is 4.7 years in minutes?</h1>                <span class="value">4.7 years equals</span>                <span class="result">2470320 minutes</span>            </div>        </aside>        <div class="boxes">            <div class="box-3">                <div class="convert-box">                    <h2 class="converter-title">Unit Converter</h2>                    <form action="/convert" method="get" target="_top">                        <fieldset>                            <div class="col-amount">                                <label for="amount">Amount</label>                                <input name="amount" required id="amount" step="0.1" type="number" min="0.1" max="10000"                                       placeholder="number" value="4.7">                            </div>                            <div class="col-unit-from">                                <label for="unitFrom">From</label>                                <select name="unitFrom" id="unitFrom">                                    <optgroup label="length">                                        <option value="centimeters">centimeters (cm)</option>                                        <option value="feet">feet (ft)</option>                                        <option value="inches">inches (in)</option>                                        <option value="kilometers">kilometers (km)</option>                                        <option value="meters">meters (m)</option>                                        <option value="miles">miles (mi)</option>                                        <option value="decimeters">decimeters (dm)</option>                                        <option value="millimeters">millimeters (mm)</option>                                        <option value="yards">yards (yd)</option>                                    </optgroup>                                    <optgroup label="mass">                                        <option value="grams">grams (g)</option>                                        <option value="ounces">ounces (oz)</option>                                        <option value="pounds">pounds (lb)</option>                                        <option value="kilograms">kilograms (kg)</option>                                    </optgroup>                                    <optgroup label="time">                                        <option value="days">days (d)</option>                                        <option value="months">months (mo)</option>                                        <option value="years" selected>years (yr)</option>                                        <option value="hours">hours (hr)</option>                                        <option value="minutes">minutes (min)</option>                                        <option value="seconds">seconds (s)</option>                                        <option value="weeks">weeks (wk)</option>                                    </optgroup>                                    <optgroup label="velocity">                                        <option value="feet-per-second">feet per second (ft/s)</option>                                        <option value="kilometers-per-hour">kilometers per hour (km/h)</option>                                        <option value="knots">knots (kt)</option>                                        <option value="meters-per-second">meters per second (m/s)</option>                                        <option value="miles-per-hour">miles per hour (mph)</option>                                    </optgroup>                                    <optgroup label="volume">                                        <option value="cubic-centimeters">cubic centimeters (cm3)</option>                                        <option value="cubic-feet">cubic feet (ft3)</option>                                        <option value="cubic-inches">cubic inches (in3)</option>                                        <option value="cubic-meters">cubic meters (m3)</option>                                        <option value="cups">cups (cup)</option>                                        <option value="deciliters">deciliters (dL)</option>                                        <option value="gallons">gallons (gal)</option>                                        <option value="liters">liters (L)</option>                                        <option value="milliliters">milliliters (ml)</option>                                        <option value="fluid-ounces">fluid ounces (fl oz)</option>                                        <option value="pints">pints (pt)</option>                                        <option value="quarts">quarts (qt)</option>                                        <option value="tablespoons">tablespoons (tbsp)</option>                                        <option value="teaspoons">teaspoons (tsp)</option>                                    </optgroup>                                </select>                            </div>                            <div class="col-unit-to">                                <label for="unitTo">To</label>                                <select name="unitTo" id="unitTo">                                    <optgroup label="length">                                        <option value="centimeters">centimeters (cm)</option>                                        <option value="feet">feet (ft)</option>                                        <option value="inches">inches (in)</option>                                        <option value="kilometers">kilometers (km)</option>                                        <option value="meters">meters (m)</option>                                        <option value="miles">miles (mi)</option>                                        <option value="decimeters">decimeters (dm)</option>                                        <option value="millimeters">millimeters (mm)</option>                                        <option value="yards">yards (yd)</option>                                    </optgroup>                                    <optgroup label="mass">                                        <option value="grams">grams (g)</option>                                        <option value="ounces">ounces (oz)</option>                                        <option value="pounds">pounds (lb)</option>                                        <option value="kilograms">kilograms (kg)</option>                                    </optgroup>                                    <optgroup label="time">                                        <option value="days">days (d)</option>                                        <option value="months">months (mo)</option>                                        <option value="years">years (yr)</option>                                        <option value="hours">hours (hr)</option>                                        <option value="minutes" selected>minutes (min)</option>                                        <option value="seconds">seconds (s)</option>                                        <option value="weeks">weeks (wk)</option>                                    </optgroup>                                    <optgroup label="velocity">                                        <option value="feet-per-second">feet per second (ft/s)</option>                                        <option value="kilometers-per-hour">kilometers per hour (km/h)</option>                                        <option value="knots">knots (kt)</option>                                        <option value="meters-per-second">meters per second (m/s)</option>                                        <option value="miles-per-hour">miles per hour (mph)</option>                                    </optgroup>                                    <optgroup label="volume">                                        <option value="cubic-centimeters">cubic centimeters (cm3)</option>                                        <option value="cubic-feet">cubic feet (ft3)</option>                                        <option value="cubic-inches">cubic inches (in3)</option>                                        <option value="cubic-meters">cubic meters (m3)</option>                                        <option value="cups">cups (cup)</option>                                        <option value="deciliters">deciliters (dL)</option>                                        <option value="gallons">gallons (gal)</option>                                        <option value="liters">liters (L)</option>                                        <option value="milliliters">milliliters (ml)</option>                                        <option value="fluid-ounces">fluid ounces (fl oz)</option>                                        <option value="pints">pints (pt)</option>                                        <option value="quarts">quarts (qt)</option>                                        <option value="tablespoons">tablespoons (tbsp)</option>                                        <option value="teaspoons">teaspoons (tsp)</option>                                    </optgroup>                                </select>                            </div>                            <div class="col-button">                                <button type="submit">Convert</button>                            </div>                        </fieldset>                    </form>                </div>            </div>            <div class="box-1">                <div class="a-box">                    <div class="a-1">                        <ins class="adsbygoogle" style="display:inline-block;width:336px;height:280px"                             data-ad-client="ca-pub-1543067051094976" data-ad-slot="2593765870"></ins>                        <script>                            (adsbygoogle = window.adsbygoogle || []).push({});                        </script>                    </div>                </div>            </div>            <div class="box-2">                <div class="a-box">                    <div class="a-2">                        <ins class="adsbygoogle" style="display:inline-block;width:336px;height:280px"                             data-ad-client="ca-pub-1543067051094976" data-ad-slot="2593765870"></ins>                        <script>                            (adsbygoogle = window.adsbygoogle || []).push({});                        </script>                    </div>                </div>            </div>        </div>        <div class="content">            <section>                <h2>Conversion formula</h2>                <p>The conversion factor from years to minutes is 525600, which means that 1 year is equal to 525600                    minutes:</p>                <p class="math">1 yr = 525600 min</p>                <p>To convert 4.7 years into minutes we have to multiply 4.7 by the conversion factor in order to get                    the time amount from years to minutes. We can also form a simple proportion to calculate the                    result:</p>                <p class="math">1 yr &rarr; 525600 min</p>                <p class="math">4.7 yr &rarr; T<sub>(min)</sub></p>                <p>Solve the above proportion to obtain the time <em>T</em> in minutes:</p>                <p class="math">T<sub>(min)</sub> = 4.7 yr &times; 525600 min</p>                <p class="math">T<sub>(min)</sub> = 2470320 min</p>                <p>The final result is:</p>                <p class="math">4.7 yr &rarr; 2470320 min</p>                <p>We conclude that <strong>4.7 years is equivalent to 2470320 minutes</strong>:</p>                <p class="math">4.7 years = 2470320 minutes</p>            </section>        </div>        <img alt="4.7 years is equal to 2470320 minutes" src="/images/4-7-years-to-minutes" width="730" height="380"             class="responsive">        <div class="content">            <section>                <h2>Alternative conversion</h2>                <p>We can also convert by utilizing the inverse value of the conversion factor. In this case <strong>1                    minute is equal to 4.0480585511189E-7 &times; 4.7 years</strong>.</p>                <p>Another way is saying that <em>4.7 years is equal to 1 &divide; 4.0480585511189E-7 minutes</em>.            </section>            <section>                <h2>Approximate result</h2>                <p>For practical purposes we can round our final result to an approximate numerical value. We can say                    that <strong>four point seven years is approximately two million four hundred seventy thousand three                        hundred twenty minutes</strong>:</p>                <p class="math">4.7 yr &cong; 2470320 min</p>                <p>An alternative is also that <em>one minute is approximately zero times four point seven years</em>.                </p>            </section>            <div class="social-wrapper">                <div class="social">                    <a id="facebook-icon"                       href="https://www.facebook.com/sharer/sharer.php?u=https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="twitter-icon"                       href="https://twitter.com/intent/tweet?text=4.7 Years In Minutes - How Many Minutes Is 4.7 Years?&amp;url=https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="whatsapp-icon"                       href="whatsapp://send?text=4.7 Years In Minutes - How Many Minutes Is 4.7 Years? https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="messenger-icon"                       href="fb-messenger://share?link=4.7 Years In Minutes - How Many Minutes Is 4.7 Years? https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="pinterest-icon"                       href="https://pinterest.com/pin/create/button/?url=https://convertoctopus.com/4-7-years-to-minutes&amp;description=4.7 Years In Minutes - How Many Minutes Is 4.7 Years?"                       target="_blank"></a>                </div>            </div>            <section>                <h2>Conversion table</h2>                <h3>years to minutes chart</h3>                <p>For quick reference purposes, below is the conversion table you can use to convert from years to                    minutes</p>                <table>                    <thead>                    <tr>                        <th>years (yr)</th>                        <th>minutes (min)</th>                    </tr>                    </thead>                    <tbody>                    <tr>                        <td><a href="/5-7-years-to-minutes">5.7 years</a></td>                        <td>2995920 minutes</td>                    </tr>                    <tr>                        <td><a href="/6-7-years-to-minutes">6.7 years</a></td>                        <td>3521520 minutes</td>                    </tr>                    <tr>                        <td><a href="/7-7-years-to-minutes">7.7 years</a></td>                        <td>4047120 minutes</td>                    </tr>                    <tr>                        <td><a href="/8-7-years-to-minutes">8.7 years</a></td>                        <td>4572720 minutes</td>                    </tr>                    <tr>                        <td><a href="/9-7-years-to-minutes">9.7 years</a></td>                        <td>5098320 minutes</td>                    </tr>                    <tr>                        <td><a href="/10-7-years-to-minutes">10.7 years</a></td>                        <td>5623920 minutes</td>                    </tr>                    <tr>                        <td><a href="/11-7-years-to-minutes">11.7 years</a></td>                        <td>6149520 minutes</td>                    </tr>                    <tr>                        <td><a href="/12-7-years-to-minutes">12.7 years</a></td>                        <td>6675120 minutes</td>                    </tr>                    <tr>                        <td><a href="/13-7-years-to-minutes">13.7 years</a></td>                        <td>7200720 minutes</td>                    </tr>                    <tr>                        <td><a href="/14-7-years-to-minutes">14.7 years</a></td>                        <td>7726320 minutes</td>                    </tr>                    </tbody>                </table>            </section>            <aside>                <h2>Conversion units</h2>                <p>The units involved in this conversion are years and minutes. This is how they are defined:</p>                <section>                    <h3><dfn>Year</dfn></h3>                    <blockquote cite="https://en.wikipedia.org/">                        <p>A year (symbol: y; also abbreviated yr.) is the orbital period of the Earth moving in its                            orbit around the Sun. Due to the Earth&#039;s axial tilt, the course of a year sees the                            passing of the seasons, marked by changes in weather, the hours of daylight, and,                            consequently, vegetation and soil fertility. In temperate and subpolar regions around the                            globe, four seasons are generally recognized: spring, summer, autumn and winter. In tropical                            and subtropical regions several geographical sectors do not present defined seasons; but in                            the seasonal tropics, the annual wet and dry seasons are recognized and tracked. A calendar                            year is an approximation of the number of days of the Earth&#039;s orbital period as counted                            in a given calendar. The Gregorian, or modern, calendar, presents its calendar year to be                            either a common year of 365 days or a leap year of 366 days.</p>                        <footer>                            Source: <cite title="Wikipedia"><a href="https://en.wikipedia.org/">Wikipedia</a> Topic:                            year</cite>                        </footer>                    </blockquote>                </section>                <section>                    <h3><dfn>Minute</dfn></h3>                    <blockquote cite="https://en.wikipedia.org/">                        <p>The minute is a unit of time or of angle. As a unit of time, the minute (symbol: min) is                            equal to 1⁄60 (the first sexagesimal fraction) of an hour, or 60 seconds. In the UTC time                            standard, a minute on rare occasions has 61 seconds, a consequence of leap seconds (there is                            a provision to insert a negative leap second, which would result in a 59-second minute, but                            this has never happened in more than 40 years under this system). As a unit of angle, the                            minute of arc is equal to 1⁄60 of a degree, or 60 seconds (of arc). Although not an SI unit                            for either time or angle, the minute is accepted for use with SI units for both. The SI                            symbols for minute or minutes are min for time measurement, and the prime symbol after a                            number, e.g. 5′, for angle measurement. The prime is also sometimes used informally to                            denote minutes of time. In contrast to the hour, the minute (and the second) does not have a                            clear historical background. What is traceable only is that it started being recorded in the                            Middle Ages due to the ability of construction of &quot;precision&quot; timepieces                            (mechanical and water clocks). However, no consistent records of the origin for the division                            as 1⁄60 part of the hour (and the second 1⁄60 of the minute) have ever been found, despite                            many speculations.</p>                        <footer>                            Source: <cite title="Wikipedia"><a href="https://en.wikipedia.org/">Wikipedia</a> Topic:                            minute</cite>                        </footer>                    </blockquote>                </section>            </aside>            <div class="grid-container">                <aside>                    <h3>How long is 4.7 years in other time units?</h3>                    <ul>                        <li><a href="/4-7-years-to-days">4.7 years to days</a></li>                        <li><a href="/4-7-years-to-months">4.7 years to months</a></li>                        <li><a href="/4-7-years-to-hours">4.7 years to hours</a></li>                        <li><a href="/4-7-years-to-minutes">4.7 years to minutes</a></li>                        <li><a href="/4-7-years-to-seconds">4.7 years to seconds</a></li>                        <li><a href="/4-7-years-to-weeks">4.7 years to weeks</a></li>                    </ul>                </aside>            </div>            <div class="grid-container">                <aside>                    <h3>Recent years to minutes conversions</h3>                    <div class="item-1">                        <ul>                            <li><a href="/3-8-years-to-minutes">3.8 years to minutes</a></li>                            <li><a href="/14-4-years-to-minutes">14.4 years to minutes</a></li>                            <li><a href="/17-7-years-to-minutes">17.7 years to minutes</a></li>                            <li><a href="/58-5-years-to-minutes">58.5 years to minutes</a></li>                            <li><a href="/263-years-to-minutes">263 years to minutes</a></li>                            <li><a href="/168-years-to-minutes">168 years to minutes</a></li>                            <li><a href="/141-years-to-minutes">141 years to minutes</a></li>                            <li><a href="/69-4-years-to-minutes">69.4 years to minutes</a></li>                            <li><a href="/528-years-to-minutes">528 years to minutes</a></li>                            <li><a href="/4-6-years-to-minutes">4.6 years to minutes</a></li>                        </ul>                    </div>                    <div class="item-2">                        <ul>                            <li><a href="/420-years-to-minutes">420 years to minutes</a></li>                            <li><a href="/134-9-years-to-minutes">134.9 years to minutes</a></li>                            <li><a href="/22-5-years-to-minutes">22.5 years to minutes</a></li>                            <li><a href="/78-years-to-minutes">78 years to minutes</a></li>                            <li><a href="/45-4-years-to-minutes">45.4 years to minutes</a></li>                            <li><a href="/164-4-years-to-minutes">164.4 years to minutes</a></li>                            <li><a href="/59-8-years-to-minutes">59.8 years to minutes</a></li>                            <li><a href="/259-years-to-minutes">259 years to minutes</a></li>                            <li><a href="/264-years-to-minutes">264 years to minutes</a></li>                            <li><a href="/159-5-years-to-minutes">159.5 years to minutes</a></li>                        </ul>                    </div>                </aside>            </div>            <div class="grid-container">                <aside>                    <h3>Recent conversions</h3>                    <ul>                        <li><a href="/152-feet-per-second-to-meters-per-second">152 feet per second to meters per                            second</a></li>                        <li><a href="/7-5-feet-per-second-to-miles-per-hour">7.5 feet per second to miles per hour</a>                        </li>                        <li><a href="/59-8-grams-to-kilograms">59.8 grams to kilograms</a></li>                        <li><a href="/74-4-feet-per-second-to-meters-per-second">74.4 feet per second to meters per                            second</a></li>                        <li><a href="/11-6-grams-to-kilograms">11.6 grams to kilograms</a></li>                        <li><a href="/1108-gallons-to-teaspoons">1108 gallons to teaspoons</a></li>                        <li><a href="/965-minutes-to-seconds">965 minutes to seconds</a></li>                        <li><a href="/861-inches-to-meters">861 inches to meters</a></li>                        <li><a href="/102-milliliters-to-teaspoons">102 milliliters to teaspoons</a></li>                        <li><a href="/13-9-grams-to-pounds">13.9 grams to pounds</a></li>                        <li><a href="/182-years-to-minutes">182 years to minutes</a></li>                        <li><a href="/38-7-knots-to-miles-per-hour">38.7 knots to miles per hour</a></li>                        <li><a href="/17-4-feet-per-second-to-miles-per-hour">17.4 feet per second to miles per hour</a>                        </li>                        <li><a href="/53-3-kilometers-per-hour-to-knots">53.3 kilometers per hour to knots</a></li>                        <li><a href="/87-2-centimeters-to-millimeters">87.2 centimeters to millimeters</a></li>                        <li><a href="/96-centimeters-to-meters">96 centimeters to meters</a></li>                        <li><a href="/88-3-kilometers-per-hour-to-miles-per-hour">88.3 kilometers per hour to miles per                            hour</a></li>                        <li><a href="/22-1-feet-to-yards">22.1 feet to yards</a></li>                        <li><a href="/357-cups-to-tablespoons">357 cups to tablespoons</a></li>                        <li><a href="/1353-months-to-years">1353 months to years</a></li>                    </ul>                </aside>            </div>        </div>    </div></article><nav>    <ol class="breadcrumb">        <li><a href="/">home</a></li>        <li class="separator">&rsaquo;</li>        <li><a href="/years-to-minutes">years to minutes</a></li>        <li class="separator">&rsaquo;</li>        <li>4.7 yr to min</li>    </ol></nav><footer class="footer-btm">    <div class="container">        <p>&copy;2020 ConvertOctopus.com</p>    </div></footer></body></html>
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_1.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_1.html
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html
new file mode 100644
index 00000000..cda8dd54
--- /dev/null
+++ b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html
@@ -0,0 +1,8 @@
+1 yr = 525600 min
+1 yr → 525600 min
+4.7 yr → T
+T
+T
+4.7 yr → 2470320 min
+4.7 years = 2470320 minutes
+4.7 yr ≅ 2470320 min
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/html/recognizer/test_list.py b/tests/llm_web_kit/extractor/html/recognizer/test_list.py
index dbe79347..5f8d61de 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/test_list.py
+++ b/tests/llm_web_kit/extractor/html/recognizer/test_list.py
@@ -375,3 +375,38 @@ def test_get_attribute_standalone_improved(self):
             error_msg = str(context.exception)
             self.assertIn('中没有cclist标签', error_msg)
             self.assertIn(element.tag, error_msg)
+
+    def test_no_standard_get_list_content_list(self):
+        """测试非标准结构的list获取content_list."""
+        # 获取私有方法 __get_list_content_list
+        get_list_content_list_method = getattr(self.__list_recognize, '_ListRecognizer__get_list_content_list')
+
+        # 创建测试数据
+        test_elements = [
+            html_to_element('''<ul id="productslist">
+                                    <figure class="list">
+                                        <figcaption><h4>How to Process Oxidized Lead Zinc Ore by Flotation</h4>
+                                            <p>How to Process Oxidized Lead Zinc Ore by Flotation. Metallurgical Content. The
+                                                Flowsheet. Crushing Section; GRINDING; Conditioning and Flotation; Thickening and
+                                                Filtering; Sampling; ORE TESTING LABORATORY; The problem of treating oxidized lead
+                                                zinc ores for the production of high grade lead zinc concentrates is a complex </p>
+                                        </figcaption>
+                                    </figure>
+                                    <figure class="list">
+                                        <figcaption><h4>ore dressing flotation machine,fluorite ore flotation </h4>
+                                            <p>Ore dressing flotation machine is widely used to conduct flotation of copper ore,
+                                                lead zinc ore, glod ore, etc. Mail to sales@sinofote</p>
+                                        </figcaption>
+                                    </figure>
+                                    <figure class="list">
+                                        <figcaption><h4>Zinc Ore Mining Crusher wffofoundation</h4>
+                                            <p>Zinc ore mining process can 14 2016 31 Mar Lead zinc ore dressing equipment zinc ore
+                                                Once processing in the flotation circuit was complete, the zinc </p>
+                                        </figcaption>
+                                    </figure>
+                                </ul>''')
+        ]
+
+        for i, element in enumerate(test_elements):
+            list_content_list = get_list_content_list_method(element, 1)
+            assert len(list_content_list) == 3
diff --git a/tests/llm_web_kit/extractor/html/recognizer/test_math.py b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
index f51c1869..6069c590 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/test_math.py
+++ b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
@@ -207,6 +207,14 @@
         'base_url': 'https://physicshelpforum.com/t/latex-upgrade-physics-forum-powered-by-mathjax-v3.17489/',
         'expected': 'assets/ccmath/math_physicsforums_2_1.html',
         'expected_inline': 'assets/ccmath/math_physicsforums_2_inline_1.html'
+    },
+    {
+        'input': [
+            'assets/ccmath/math_class_math.html',
+        ],
+        'base_url': 'https://convertoctopus.com/4-7-years-to-minutes',
+        'expected': 'assets/ccmath/math_class_math_1.html',
+        'expected_inline': 'assets/ccmath/math_class_math_inline_1.html'
     }
 ]
 
diff --git a/tests/llm_web_kit/extractor/test_extractor_chain.py b/tests/llm_web_kit/extractor/test_extractor_chain.py
index dc53e015..7f1bf8c9 100644
--- a/tests/llm_web_kit/extractor/test_extractor_chain.py
+++ b/tests/llm_web_kit/extractor/test_extractor_chain.py
@@ -64,7 +64,7 @@ def setUp(self):
                     continue
                 self.data_json.append(json.loads(line))
 
-        assert len(self.data_json) == 103
+        assert len(self.data_json) == 104
 
         # Config for HTML extraction
         self.config = load_pipe_tpl('html-test')
@@ -810,16 +810,27 @@ def test_ascii_delimiter(self):
         input_data = DataJson(test_data)
         result = chain.extract(input_data)
         md_content = result.get_content_list().to_nlp_md()
-        # with open('mathjax抽取case222.md', 'w', encoding='utf-8') as f:
-        #     f.write(md_content)
         self.assertIn(r'$f = \frac{1}{T} ^ 2 \sqrt{\frac{A E}{\rho}}$', md_content)
         self.assertIn(r'${m}^{2}$', md_content)
         self.assertIn(r'\rho$', md_content)
         self.assertIn(r'$f = \frac{1}{2 L} \sqrt{\frac{E}{\rho}}$', md_content)
         self.assertIn(r'$L = {T}^{2} / \left(2 W\right)$', md_content)
 
+    def test_mathjax_mock(self):
+        """测试虚拟mathjax渲染器."""
+        chain = ExtractSimpleFactory.create(self.config)
+        self.assertIsNotNone(chain)
+        test_data = self.data_json[103]
+        input_data = DataJson(test_data)
+        result = chain.extract(input_data)
+        md_content = result.get_content_list().to_nlp_md()
+        self.assertIn(r'$(a_n)$', md_content)
+        self.assertIn(r'$a_n ≤ a_{n+1}$', md_content)
+        self.assertIn(r'$n \in \mathbb{N}$', md_content)
+        self.assertIn(r'$\left ( \frac{1}{n} \right ) = (1, \frac{1}{2}, \frac{1}{3}, ..., \frac{1}{n}, \frac{1}{n+1}, ... )$', md_content)
+
     def test_htmlmath_sub_sup(self):
-        """测试ascii分隔符."""
+        """测试htmlmath中的上下标标签."""
         chain = ExtractSimpleFactory.create(self.config)
         self.assertIsNotNone(chain)
         test_data = self.data_json[102]

From 9e03b5114841999615fdc7a279ec0843dd9d31ce Mon Sep 17 00:00:00 2001
From: chupei <njuchupei@gmail.com>
Date: Fri, 15 Aug 2025 13:58:05 +0800
Subject: [PATCH 2/8] Revert "v3.2.2-released" (#525)

---
 .../html/recognizer/cc_math/render/mathjax.py | 36 -----------------
 .../html/recognizer/cc_math/render/render.py  |  1 -
 .../html/recognizer/cc_math/tag_script.py     | 19 +--------
 .../extractor/html/recognizer/ccmath.py       | 40 +++++++------------
 llm_web_kit/extractor/html/recognizer/list.py | 15 ++++---
 .../good_data/html/math_mathjax_mock.html     |  1 -
 .../good_data/html_data_input.jsonl           |  3 +-
 .../assets/ccmath/math_class_math.html        |  1 -
 .../assets/ccmath/math_class_math_1.html      |  0
 .../ccmath/math_class_math_inline_1.html      |  8 ----
 .../extractor/html/recognizer/test_list.py    | 35 ----------------
 .../extractor/html/recognizer/test_math.py    |  8 ----
 .../extractor/test_extractor_chain.py         | 19 ++-------
 13 files changed, 29 insertions(+), 157 deletions(-)
 delete mode 100644 tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html
 delete mode 100644 tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html
 delete mode 100644 tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_1.html
 delete mode 100644 tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html

diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py b/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py
index 06ac62a9..bb309468 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py
@@ -533,42 +533,6 @@ def _detect_ascii_math(self, tree: HtmlElement) -> bool:
         return processascii
 
 
-class MathJaxRenderMock(MathJaxRender):
-    """虚拟的MathJax渲染器，用于没有MathJax配置但需要使用MathJax解析逻辑的情况.
-
-    这个类主要用于处理以下场景：
-    1. 网页中没有显式的MathJax配置（如<script type="text/x-mathjax-config">）
-    2. 但在HTML解析过程中检测到了数学公式元素（如<math>标签、公式相关的class等）
-    3. 需要使用MathJax渲染器方案扫一遍所有内容，防止漏抽取公式
-
-    与普通MathJaxRender的区别：
-    - MathJaxRender：会解析HTML中的MathJax配置，使用自定义的分隔符和选项
-    - MathJaxRenderMock：直接使用默认的MathJax配置，不解析HTML配置
-    """
-
-    def __init__(self):
-        """初始化虚拟MathJax渲染器."""
-        super().__init__()
-        self.render_type = MathRenderType.MATHJAX_MOCK
-        # 使用默认的MathJax选项
-        self.options = MATHJAX_OPTIONS.copy()
-
-    def get_options(self, html: str) -> Dict[str, Any]:
-        """虚拟渲染器直接返回默认选项，不解析HTML配置.
-
-        Args:
-            html: HTML字符串（忽略）
-
-        Returns:
-            Dict[str, Any]: 默认MathJax选项字典
-        """
-        return self.options
-
-    def is_customized_options(self) -> bool:
-        """虚拟渲染器始终返回False，表示使用默认配置."""
-        return False
-
-
 # 使用示例
 if __name__ == '__main__':
     # MathJax示例
diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py b/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py
index ad2a9390..7314d66a 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/render/render.py
@@ -11,7 +11,6 @@
 class MathRenderType:
     """数学公式渲染器类型."""
     MATHJAX = 'mathjax'
-    MATHJAX_MOCK = 'mathjax_mock'  # 虚拟的mathjax渲染器
     MATHJAX_CUSTOMIZED = 'mathjax_customized'  # 临时增加这个type，未来区分走自定义解析的数据
     KATEX = 'katex'
 
diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py b/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py
index 8821f0e1..0e218163 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/tag_script.py
@@ -14,16 +14,10 @@ def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement, pa
     try:
         text = node.text
         if text and text_strip(text):
-            # 先处理非script标签和style标签的节点：即class为math/katex的节点
-            # 例子：<div class="math">f(x) \sim x^2, \quad x\to\infty</div>
             if node.tag not in ['script', 'style']:
                 new_span = create_new_span([(CCMATH_INLINE,MathType.LATEX)], cm.wrap_math_md(text), node, math_render, o_html)
-                # node.addnext(new_span)
-                replace_element(node, new_span)  # 替换节点，而不是添加
-
-            # 下面是katex逻辑
+                node.addnext(new_span)
             else:
-                # 例子：<script type = "e44e-text/javascript">katex.render("f(a,b,c) = (a^2+b^2+c^2)^3", mykatex);</script>
                 katex_pattern = re.compile(r'katex.render')
                 node_text = text_strip(text)
                 if katex_pattern.findall(node_text):
@@ -34,17 +28,8 @@ def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement, pa
                             target_element = target_elements[0]
                             o_html = element_to_html(target_element)
                             target_element.text = None
-                            wrapped_formula = cm.wrap_math_md(formula_content)
-                            # 转化为ccmath，例子：
-                            # <ccmath-inline type="latex" by="katex" html='...'>f(a,b,c) = (a^2+b^2+c^2)^3</ccmath-inline>
-                            new_span = create_new_span([(CCMATH_INLINE, MathType.LATEX)], wrapped_formula,
-                                                       target_element, math_render, o_html)
-                            # 插入到span标签内，例子：
-                            # <span id="mykatex"><ccmath-inline ... </ccmath-inline></span>
+                            new_span = create_new_span([(CCMATH_INLINE,MathType.LATEX)], cm.wrap_math_md(formula_content), target_element, math_render, o_html)
                             target_element.insert(0, new_span)
-
-                # 处理sript且type为math/tex的节点
-                # 例子：<html><head><script type="math/tex">x^2 + y^2 = z^2</script></head></html>
                 elif node.get('type') and 'math/tex' in node.get('type'):
                     tag_math_type_list = cm.get_equation_type(o_html)
                     if not tag_math_type_list:
diff --git a/llm_web_kit/extractor/html/recognizer/ccmath.py b/llm_web_kit/extractor/html/recognizer/ccmath.py
index 28078250..08021dbf 100644
--- a/llm_web_kit/extractor/html/recognizer/ccmath.py
+++ b/llm_web_kit/extractor/html/recognizer/ccmath.py
@@ -24,7 +24,6 @@ class MathRecognizer(BaseHTMLElementRecognizer):
     def __init__(self):
         super().__init__()
         self.cm = CCMATH()
-        self.mathjax_detected = False  # 添加检测标记
 
     @override
     def recognize(self, base_url: str, main_html_lst: List[Tuple[HtmlElement, HtmlElement]], raw_html: str, language:str = 'en') -> List[Tuple[HtmlElement, HtmlElement]]:
@@ -123,9 +122,8 @@ def process_ccmath_html(self, cc_html: str, o_html: str, math_render: BaseMathRe
             self.cm.url = base_url
             tree = cc_html
             math_render_type = math_render.get_render_type()
-            self.mathjax_detected = False  # 重置标记
-
-            # process1: node循环逻辑
+            # 打印遍历node次数
+            # count = 0
             for node in iter_node(tree):
                 assert isinstance(node, HtmlElement)
                 original_html = self._element_to_html(node)
@@ -136,11 +134,9 @@ def process_ccmath_html(self, cc_html: str, o_html: str, math_render: BaseMathRe
                         node.tag == 'span' and
                         node.get('class') in [CSDN.INLINE, CSDN.DISPLAY]):
                     tag_script.process_katex_mathml(self.cm, math_render_type, node)
-                    self.mathjax_detected = True
 
                 if ZHIHU.DOMAIN in self.cm.url and node.tag == 'span' and node.get('class') == ZHIHU.MATH:
                     tag_script.process_zhihu_custom_tag(self.cm, math_render_type, node)
-                    self.mathjax_detected = True
 
                 # tag = span， class 为 math-containerm， 或者 mathjax 或者 wp-katex-eq
                 if node.tag == 'span' and node.get('class') and (
@@ -151,50 +147,44 @@ def process_ccmath_html(self, cc_html: str, o_html: str, math_render: BaseMathRe
                         'tex' in node.get('class')
                 ):
                     tag_common_modify.modify_tree(self.cm, math_render_type, original_html, node, parent)
-                    self.mathjax_detected = True
+
+                # script[type="math/tex"]
+                # if node.tag == 'script' and node.get('type') and 'math/tex' in node.get('type'):
+                #     print('匹配到script标签: ', node.get('type'))
+                #     tag_common_modify.modify_tree(cm, math_render_type, original_html, node, parent)
 
                 # math tags
                 if node.tag == 'math' or node.tag.endswith(':math'):
                     # print(f"匹配到数学标签: {node.tag}")
                     # print(f"标签内容: {original_html}")
                     tag_math.modify_tree(self.cm, math_render_type, original_html, node, parent)
-                    self.mathjax_detected = True
 
                 if node.tag == 'mjx-container':
                     tag_mjx.modify_tree(self.cm, math_render, original_html, node)
-                    self.mathjax_detected = True
 
                 # img中的latex
                 if node.tag == 'img':
                     tag_img.modify_tree(self.cm, math_render_type, original_html, node, parent)
-                    self.mathjax_detected = True
 
                 # span.katex
                 if node.tag == 'script' or 'math' == node.get('class') or 'katex' == node.get('class'):
                     # print('匹配到script/math/katex标签: ', original_html)
                     tag_script.modify_tree(self.cm, math_render_type, original_html, node, parent)
-                    self.mathjax_detected = True
                 # 只有有渲染器的网站才会走下面文本匹配逻辑
                 if math_render_type:
                     # 14. 只处理只有一层的p标签
                     if node.tag == 'p' and len(node.getchildren()) == 0:
                         # print('匹配到p标签: ', original_html)
                         tag_common_modify.modify_tree(self.cm, math_render_type, original_html, node, parent)
-                        self.mathjax_detected = True
 
-            # procsee2: mathjax渲染器逻辑
-            try:
-                # case1：有mathjax配置
-                if math_render_type == MathRenderType.MATHJAX:
-                    math_render.find_math(tree)
-                # case2：无Mathjax配置但是开启Mathjax逻辑开关（node循环抽到公式的情况）
-                elif math_render_type is None and self.mathjax_detected:
-                    from llm_web_kit.extractor.html.recognizer.cc_math.render.mathjax import \
-                        MathJaxRenderMock
-                    math_render = MathJaxRenderMock()
-                    math_render.find_math(tree)
-            except Exception as e:
-                raise HtmlMathMathjaxRenderRecognizerException(f'处理MathjaxRender数学公式失败: {e}')
+            # 修改：传入tree节点，mathjax方案作为process2，不参与上面process1节点的遍历
+            if math_render_type:
+                try:
+                    if math_render_type == MathRenderType.MATHJAX:
+                        math_render.find_math(tree)
+                except Exception as e:
+                    raise HtmlMathMathjaxRenderRecognizerException(f'处理MathjaxRender数学公式失败: {e}')
+
             # 保存处理后的html
             # with open('test20250702_result.html', 'w', encoding='utf-8') as f:
             #     f.write(self._element_to_html(tree))
diff --git a/llm_web_kit/extractor/html/recognizer/list.py b/llm_web_kit/extractor/html/recognizer/list.py
index c3599fc8..d91caa23 100644
--- a/llm_web_kit/extractor/html/recognizer/list.py
+++ b/llm_web_kit/extractor/html/recognizer/list.py
@@ -199,14 +199,13 @@ def __extract_list_item_text_recusive(el: HtmlElement):
                 # item['c'].strip(): 会导致前面处理br标签，添加的\n\n失效
                 result['c'] = ' '.join(normalize_text_segment(item['c'].strip()) for item in paragraph)
             return result
-        # list_item_tags = ('li', 'dd', 'dt', 'ul', 'div', 'p', 'span')
-        # if child.tag in list_item_tags:
-        # 去掉if限制条件，允许非标准结构的列表通过
-        paragraph = __extract_list_item_text_recusive(child)
-        if len(paragraph) > 0:
-            tem_json = json.dumps(paragraph).replace('$br$\"}', '\"}')
-            new_paragraph = json.loads(tem_json)
-            text_paragraph.append(new_paragraph)
+        list_item_tags = ('li', 'dd', 'dt', 'ul', 'div', 'p', 'span')
+        if child.tag in list_item_tags:
+            paragraph = __extract_list_item_text_recusive(child)
+            if len(paragraph) > 0:
+                tem_json = json.dumps(paragraph).replace('$br$\"}', '\"}')
+                new_paragraph = json.loads(tem_json)
+                text_paragraph.append(new_paragraph)
 
         for n, item in enumerate(text_paragraph):
             tem_json = json.dumps(item).replace('$br$', '\\n\\n')
diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html
deleted file mode 100644
index 2928e55a..00000000
--- a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/math_mathjax_mock.html
+++ /dev/null
@@ -1 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>    <title>Monotone Sequences of Real Numbers - Mathonline</title>    <script type="text/javascript"            src="http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade/common--javascript/init.combined.js"></script>    <script type="text/javascript">        var URL_HOST = 'www.wikidot.com';        var URL_DOMAIN = 'wikidot.com';        var USE_SSL = true;        var URL_STATIC = 'http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade';        // global request information        var WIKIREQUEST = {};        WIKIREQUEST.info = {};        WIKIREQUEST.info.domain = "mathonline.wikidot.com";        WIKIREQUEST.info.siteId = 613263;        WIKIREQUEST.info.siteUnixName = "mathonline";        WIKIREQUEST.info.categoryId = 4083504;        WIKIREQUEST.info.themeId = 1;        WIKIREQUEST.info.requestPageName = "monotone-sequences-of-real-numbers";        OZONE.request.timestamp = 1558920674;        OZONE.request.date = new Date();        WIKIREQUEST.info.lang = 'en';        WIKIREQUEST.info.pageUnixName = "monotone-sequences-of-real-numbers";        WIKIREQUEST.info.pageId = 24240376;        WIKIREQUEST.info.lang = "en";        OZONE.lang = "en";        var isUAMobile = !!/Android|webOS|iPhone|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent);    </script>    <script type="text/javascript">        require.config({            baseUrl: URL_STATIC + '/common--javascript',            paths: {                'jquery.ui': 'jquery-ui.min',                'jquery.form': 'jquery.form'            }        });    </script>    <meta http-equiv="content-type" content="text/html;charset=UTF-8"/>    <meta http-equiv="content-language" content="en"/>    <script type="text/javascript"            src="http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade/common--javascript/WIKIDOT.combined.js"></script>    <style type="text/css" id="internal-style">        /* modules */        /* theme */        @import url(http://d3g0gp89917ko0.cloudfront.net/v--b23e476b7ade/common--theme/base/css/style.css);        @import url(/admin:css/code/1);    </style>    <link rel="shortcut icon" href="/local--favicon/favicon.gif"/>    <link rel="icon" type="image/gif" href="/local--favicon/favicon.gif"/>    <link rel="apple-touch-icon" href="/common--images/apple-touch-icon-57x57.png"/>    <link rel="apple-touch-icon" sizes="72x72" href="/common--images/apple-touch-icon-72x72.png"/>    <link rel="apple-touch-icon" sizes="114x114" href="/common--images/apple-touch-icon-114x114.png"/>    <link rel="alternate" type="application/wiki" title="Edit this page"          href="javascript:WIKIDOT.page.listeners.editClick()"/>    <script type="text/javascript">        var _gaq = _gaq || [];        _gaq.push(['_setAccount', 'UA-18234656-1']);        _gaq.push(['_setDomainName', 'none']);        _gaq.push(['_setAllowLinker', true]);        _gaq.push(['_trackPageview']);        _gaq.push(['old._setAccount', 'UA-68540-5']);        _gaq.push(['old._setDomainName', 'none']);        _gaq.push(['old._setAllowLinker', true]);        _gaq.push(['old._trackPageview']);        _gaq.push(['userTracker._setAccount', 'UA-139433011-1']);        _gaq.push(['userTracker._trackPageview']);    </script>    <script type="text/javascript">        window.google_analytics_uacct = 'UA-18234656-1';        window.google_analytics_domain_name = 'none';    </script>    <link rel="manifest" href="/onesignal/manifest.json"/>    <script src="https://cdn.onesignal.com/sdks/OneSignalSDK.js" acync=""></script>    <script>        var OneSignal = window.OneSignal || [];        OneSignal.push(function () {            OneSignal.init({                appId: null,            });        });    </script>    <style>        .form-value-title, .form-labels, div.page-tags {            display: none;        }        #action-area h1 {            display: none;        }    </style>    <style>        .form-value-title, .form-labels, div.page-tags {            display: none;        }        #action-area h1 {            display: none;        }    </style></head><body id="html-body"><div id="skrollr-body">    <a name="page-top"></a>    <div id="container-wrap-wrap">        <div id="container-wrap">            <div id="container">                <div id="header">                    <h1><a href="/"><span>Mathonline</span></a></h1>                    <h2><span>Learn Mathematics</span></h2>                    <!-- google_ad_section_start(weight=ignore) -->                    <div id="search-top-box" class="form-search">                        <form id="search-top-box-form" action="dummy" class="input-append">                            <input id="search-top-box-input" class="text empty search-query" type="text" size="15"                                   name="query" value="Search this site"                                   onfocus="if(YAHOO.util.Dom.hasClass(this, 'empty')){YAHOO.util.Dom.removeClass(this,'empty'); this.value='';}"/><input                                class="button btn" type="submit" name="search" value="Search"/>                        </form>                    </div>                    <div id="top-bar">                    </div>                    <div id="login-status"><a href="javascript:;" onclick="WIKIDOT.page.listeners.createAccount(event)"                                              class="login-status-create-account btn">Create account</a> <span>or</span>                        <a href="javascript:;" onclick="WIKIDOT.page.listeners.loginClick(event)"                           class="login-status-sign-in btn btn-primary">Sign in</a></div>                    <div id="header-extra-div-1"><span></span></div>                    <div id="header-extra-div-2"><span></span></div>                    <div id="header-extra-div-3"><span></span></div>                </div>                <div id="content-wrap">                    <div id="side-bar">                    </div>                    <!-- google_ad_section_end -->                    <div id="main-content">                        <div id="action-area-top"></div>                        <div id="page-title">                            Monotone Sequences of Real Numbers                        </div>                        <div id="page-content">                            <table style="margin:0; padding:0">                                <tr>                                    <td style="margin:0; padding:0">                                        <div id="toc">                                            <div id="toc-action-bar"><a href="javascript:;"                                                                        onclick="WIKIDOT.page.listeners.foldToc(event)">Fold</a><a                                                    style="display: none" href="javascript:;"                                                    onclick="WIKIDOT.page.listeners.unfoldToc(event)">Unfold</a></div>                                            <div class="title">Table of Contents</div>                                            <div id="toc-list">                                                <div style="margin-left: 1em;"><a href="#toc0">Monotone Sequences of                                                    Real Numbers</a></div>                                            </div>                                        </div>                                    </td>                                </tr>                            </table>                            <h1 id="toc0"><span>Monotone Sequences of Real Numbers</span></h1>                            <p>We will now look at two new types of sequences, <strong>increasing sequences</strong> and                                <strong>decreasing sequences</strong>.</p>                            <table class="wiki-content-table">                                <tr>                                    <td><strong>Definition:</strong> A sequence of real numbers <span                                            class="math-inline">$(a_n)$</span> is said to be <strong>Increasing</strong>                                        if <span class="math-inline">$a_n ≤ a_{n+1}$</span> for all <span                                                class="math-inline">$n \in \mathbb{N}$</span>. Similarly, a sequence of                                        real numbers <span class="math-inline">$(a_n)$</span> is said to be <strong>Decreasing</strong>                                        if <span class="math-inline">$a_n ≥ a_{n+1}$</span> for all <span                                                class="math-inline">$n \in \mathbb{N}$</span>. A sequence <span                                                class="math-inline">$(a_n)$</span> is said to be                                        <strong>Monotone</strong> or <strong>Monotonic</strong> if it is either                                        increasing or decreasing.                                    </td>                                </tr>                            </table>                            <p><em>A sequence <span class="math-inline">$(a_n)$</span> is said to be <strong>Strictly                                Increasing</strong> if <span class="math-inline">$a_n &lt; a_{n+1}$</span> for all <span                                    class="math-inline">$n \in \mathbb{N}$</span> and <strong>Strictly                                Decreasing</strong> if <span class="math-inline">$a_n &gt; a_{n+1}$</span> for all <span                                    class="math-inline">$n \in \mathbb{N}$</span>.</em></p>                            <p>For example, consider the sequence <span class="math-inline">$\left ( \frac{1}{n} \right ) = (1, \frac{1}{2}, \frac{1}{3}, ..., \frac{1}{n}, \frac{1}{n+1}, ... )$</span>.                                We note that <span class="math-inline">$\forall n \in \mathbb{N}$</span>, <span                                        class="math-inline">$n &lt; n+1$</span> and so <span class="math-inline">$\frac{1}{n} &gt; \frac{1}{n+1}$</span>,                                and so this sequence is decreasing and hence monotone.</p>                            <p>The following graph represents the first 10 terms of the monotonically decreasing                                sequence <span class="math-inline">$\left ( \frac{1}{n} \right )$</span>:</p>                            <div class="image-container aligncenter"><img                                    src="http://mathonline.wdfiles.com/local--files/monotone-sequences-of-real-numbers/Screen%20Shot%202014-12-04%20at%203.58.31%20PM.png"                                    alt="Screen%20Shot%202014-12-04%20at%203.58.31%20PM.png" class="image"/></div>                            <p>One such example of an increasing sequence is the sequence <span class="math-inline">$(n + 2)$</span>.                                Clearly <span class="math-inline">$\forall n \in \mathbb{N}$</span>, <span                                        class="math-inline">$n + 2 &lt; (n+1) + 2 = n + 3$</span> (since if not, then                                <span class="math-inline">$n + 2 ≥ n + 3$</span> which implies that <span                                        class="math-inline">$0 ≥ 1$</span>, which is a contradiction). The following                                graph represents the first 10 terms of the monotonically increasing sequence <span                                        class="math-inline">$(n + 2)$</span>:</p>                            <div class="image-container aligncenter"><img                                    src="http://mathonline.wdfiles.com/local--files/monotone-sequences-of-real-numbers/Screen%20Shot%202014-12-04%20at%204.02.37%20PM.png"                                    alt="Screen%20Shot%202014-12-04%20at%204.02.37%20PM.png" class="image"/></div>                            <p>From the definition of an increasing and decreasing sequence, we should note that EVERY                                successive term in the sequence should either be larger than the previous (increasing                                sequences) or smaller than the previous (decreasing sequences). Therefore the sequence                                <span class="math-inline">$(1, 2, 1, \frac{1}{2}, \frac{1}{3}, \frac{1}{4}, ...)$</span>                                cannot be considered a decreasing sequence as <span class="math-inline">$1 = a_1 \not ≥ a_2 = 2$</span>.                                From this, we will formulate the following definitions:</p>                            <table class="wiki-content-table">                                <tr>                                    <td><strong>Definition:</strong> A sequence of real numbers <span                                            class="math-inline">$(a_n)$</span> is said to be <strong>Ultimately                                        Increasing</strong> if for some <span                                            class="math-inline">$K \in \mathbb{N}$</span> we have that <span                                            class="math-inline">$\forall n ≥ K$</span> then <span class="math-inline">$a_n ≤ a_{n+1}$</span>.                                        Similarly, a sequence of real numbers <span class="math-inline">$(a_n)$</span>                                        is said to be <strong>Ultimately Decreasing</strong> if for some <span                                                class="math-inline">$K \in \mathbb{N}$</span> we have that <span                                                class="math-inline">$\forall n ≥ K$</span> then <span                                                class="math-inline">$a_n ≥ a_{n+1}$</span>. A sequence <span                                                class="math-inline">$(a_n)$</span> is said to be <strong>Ultimately                                            Monotone</strong> or <strong>Ultimately Monotonic</strong> if for some <span                                                class="math-inline">$K \in \mathbb{N}$</span>, if <span                                                class="math-inline">$n ≥ K$</span> then <span class="math-inline">$(a_n)$</span>                                        is either ultimately increasing or ultimately decreasing.                                    </td>                                </tr>                            </table>                            <p>Consider the sequence <span                                    class="math-inline">$(n^2 - 4n + 3) = (0, -1, 0, 3, 8, ...)$</span>. This is an                                ultimately increasing sequence, since for <span class="math-inline">$n ≥ 2$</span> we                                have that <span class="math-inline">$a_n ≤ a_{n+1}$</span>. The following graph                                represents the first 7 terms of this ultimately increasing sequence:</p>                            <div class="image-container aligncenter"><img                                    src="http://mathonline.wdfiles.com/local--files/monotone-sequences-of-real-numbers/Screen%20Shot%202014-12-04%20at%204.20.53%20PM.png"                                    alt="Screen%20Shot%202014-12-04%20at%204.20.53%20PM.png" class="image"/></div>                        </div>                        <div id="page-info-break"></div>                        <div id="page-options-container">                        </div>                        <div id="action-area" style="display: none;"></div>                    </div>                </div>                <div id="footer" style="display: block; visibility: visible;">                    <div class="options" style="display: block; visibility: visible;">                        <a href="http://www.wikidot.com/doc" id="wikidot-help-button">Help</a>                        &nbsp;|                        <a href="http://www.wikidot.com/legal:terms-of-service" id="wikidot-tos-button">Terms of                            Service</a>                        &nbsp;|                        <a href="http://www.wikidot.com/legal:privacy-policy" id="wikidot-privacy-button">Privacy</a>                        &nbsp;|                        <a href="javascript:;" id="bug-report-button"                           onclick="WIKIDOT.page.listeners.pageBugReport(event)">Report a bug</a>                        &nbsp;|                        <a href="javascript:;" id="abuse-report-button"                           onclick="WIKIDOT.page.listeners.flagPageObjectionable(event)">Flag as objectionable</a>                    </div>                    Powered by <a href="http://www.wikidot.com">Wikidot.com</a>                </div>                <div id="license-area" class="license-area">                    Unless otherwise stated, the content of this page is licensed under <a rel="license"                                                                                           href="http://creativecommons.org/licenses/by-sa/3.0/">Creative                    Commons Attribution-ShareAlike 3.0 License</a>                </div>                <div id="extrac-div-1"><span></span></div>                <div id="extrac-div-2"><span></span></div>                <div id="extrac-div-3"><span></span></div>            </div>        </div>        <!-- These extra divs/spans may be used as catch-alls to add extra imagery. -->        <div id="extra-div-1"><span></span></div>        <div id="extra-div-2"><span></span></div>        <div id="extra-div-3"><span></span></div>        <div id="extra-div-4"><span></span></div>        <div id="extra-div-5"><span></span></div>        <div id="extra-div-6"><span></span></div>    </div></div><div id="dummy-ondomready-block" style="display: none;"></div><!-- Google Analytics load --><script type="text/javascript">    (function () {        var ga = document.createElement('script');        ga.type = 'text/javascript';        ga.async = true;        ga.src = ('https:' == document.location.protocol ? 'https://' : 'http://') + 'stats.g.doubleclick.net/dc.js';        var s = document.getElementsByTagName('script')[0];        s.parentNode.insertBefore(ga, s);    })();</script><!-- Quantcast --><script type="text/javascript">    _qoptions = {        qacct: "p-edL3gsnUjJzw-"    };    (function () {        var qc = document.createElement('script');        qc.type = 'text/javascript';        qc.async = true;        qc.src = ('https:' == document.location.protocol ? 'https://secure' : 'http://edge') + '.quantserve.com/quant.js';        var s = document.getElementsByTagName('script')[0];        s.parentNode.insertBefore(qc, s);    })();</script><noscript>    <img src="http://pixel.quantserve.com/pixel/p-edL3gsnUjJzw-.gif" style="display: none;" border="0" height="1"         width="1" alt="Quantcast"/></noscript><div id="page-options-bottom-tips" style="display: none;">    <div id="edit-button-hovertip">        Click here to edit contents of this page.    </div></div><div id="page-options-bottom-2-tips" style="display: none;">    <div id="edit-sections-button-hovertip">        Click here to toggle editing of individual sections of the page (if possible). Watch headings for an &quot;edit&quot;        link when available.    </div>    <div id="edit-append-button-hovertip">        Append content without editing the whole page source.    </div>    <div id="history-button-hovertip">        Check out how this page has evolved in the past.    </div>    <div id="discuss-button-hovertip">        If you want to discuss contents of this page - this is the easiest way to do it.    </div>    <div id="files-button-hovertip">        View and manage file attachments for this page.    </div>    <div id="site-tools-button-hovertip">        A few useful tools to manage this Site.    </div>    <div id="backlinks-button-hovertip">        See pages that link to and include this page.    </div>    <div id="rename-move-button-hovertip">        Change the name (also URL address, possibly the category) of the page.    </div>    <div id="view-source-button-hovertip">        View wiki source for this page without editing.    </div>    <div id="parent-page-button-hovertip">        View/set parent page (used for creating breadcrumbs and structured layout).    </div>    <div id="abuse-report-button-hovertip">        Notify administrators if there is objectionable content in this page.    </div>    <div id="bug-report-button-hovertip">        Something does not work as expected? Find out what you can do.    </div>    <div id="wikidot-help-button-hovertip">        General Wikidot.com documentation and help section.    </div>    <div id="wikidot-tos-button-hovertip">        Wikidot.com Terms of Service - what you can, what you should not etc.    </div>    <div id="wikidot-privacy-button-hovertip">        Wikidot.com Privacy Policy.    </div></div></body></html>
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl
index 22cebca1..7ccc1eb1 100644
--- a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl
+++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl
@@ -100,5 +100,4 @@
 {"track_id": "test_mjx_container", "dataset_name": "test_mjx_container", "url": "https://test.com","data_source_category": "HTML",  "path":"testmathjax.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
 {"track_id": "test_word_press", "dataset_name": "test_word_press", "url": "https://test.com","data_source_category": "HTML",  "path":"word_press.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
 {"track_id": "test_ascii_delimiter", "dataset_name": "test_ascii_delimiter", "url": "https://montalk.net/notes/342/tuning-forks-and-megalithic-technology","data_source_category": "HTML",  "path":"math_test_ascii_delimiter.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
-{"track_id": "test_htmlmath_sub_sup", "dataset_name": "test_htmlmath_sub_sup", "url": "https://cccbdb.nist.gov/compvibs3.asp?casno=123911&charge=0&method=42&basis=0","data_source_category": "HTML",  "path":"math_table_title_htmlmath_sub_sup.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
-{"track_id": "test_mathjax_mock", "dataset_name": "test_mathjax_mock", "url": "http://mathonline.wikidot.com/monotone-sequences-of-real-numbers","data_source_category": "HTML",  "path":"math_mathjax_mock.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
\ No newline at end of file
+{"track_id": "test_htmlmath_sub_sup", "dataset_name": "test_htmlmath_sub_sup", "url": "https://cccbdb.nist.gov/compvibs3.asp?casno=123911&charge=0&method=42&basis=0","data_source_category": "HTML",  "path":"math_table_title_htmlmath_sub_sup.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}}
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html
deleted file mode 100644
index ec9e8518..00000000
--- a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math.html
+++ /dev/null
@@ -1 +0,0 @@
-<!DOCTYPE html><html lang="en"><head>    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-78463541-4"></script>    <script>        window.dataLayer = window.dataLayer || [];        function gtag() {            dataLayer.push(arguments)        };        gtag('js', new Date());        gtag('config', 'UA-78463541-4');    </script>    <meta charset="utf-8">    <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1">    <meta name="robots" content="index, follow">    <title>4.7 Years In Minutes - How Many Minutes Is 4.7 Years?</title>    <meta name="description"          content="Convert 4.7 years to minutes (4.7 yr to min). What is four point seven years in minutes? Easily convert from years into minutes.">    <link rel="canonical" href="https://convertoctopus.com/4-7-years-to-minutes">    <link rel="amphtml" href="https://convertoctopus.com/amp/4-7-years-to-minutes">    <script type="application/ld+json">        {            "@context": "http://schema.org",            "@type": "BreadcrumbList",            "itemListElement": [                {                    "@type": "ListItem",                    "position": 1,                    "item": {                        "@id": "/years-to-minutes",                        "name": "years to minutes"                    }                },                {                    "@type": "ListItem",                    "position": 2,                    "item": {                        "@id": "/4-7-years-to-minutes",                        "name": "4.7 years to minutes"                    }                }            ]        }    </script>    <script type="application/ld+json">        {            "@context": "https://schema.org",            "@type": "Article",            "inLanguage": "en",            "isFamilyFriendly": "http://schema.org/True",            "mainEntityOfPage": {                "@type": "WebPage",                "@id": "https://convertoctopus.com/4-7-years-to-minutes"            },            "headline": "How long is 4.7 years in minutes?",            "alternativeHeadline": "How to convert 4.7 years to minutes",            "image": {                "@type": "ImageObject",                "representativeOfPage": "http://schema.org/True",                "url": "https://convertoctopus.com/images/4-7-years-to-minutes",                "width": "730",                "height": "380",                "description": "4.7 years in minutes",                "caption": "4.7 years is equal to 2470320 minutes"            },            "author": "ConvertOctopus",            "genre": "Unit Converter",            "keywords": "4.7 years minutes",            "publisher": {                "@type": "Organization",                "name": "ConvertOctopus",                "url": "https://convertoctopus.com",                "logo": {                    "@type": "ImageObject",                    "url": "https://convertoctopus.com/img/logo.png",                    "width": "201",                    "height": "24"                }            },            "url": "https://convertoctopus.com/4-7-years-to-minutes",            "datePublished": "2017-10-01 18:15:40",            "dateCreated": "2017-08-01 18:15:05",            "dateModified": "2019-01-05 19:13:18",            "description": "Convert 4.7 years to minutes",            "articleBody": "We conclude that four point seven years is equivalent to two million four hundred seventy thousand three hundred twenty minutes."        }    </script>    <link rel="icon" href="/img/favicon.ico">    <link rel="apple-touch-icon" sizes="180x180" href="/img/apple-touch-icon.png">    <link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32.png">    <link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16.png">    <link rel="manifest" href="/img/site.webmanifest">    <link rel="mask-icon" href="/img/safari-pinned-tab.svg" color="#5bbad5">    <meta name="msapplication-TileColor" content="#da532c">    <meta name="theme-color" content="#117ed2">    <style>        button,        input,        select {            vertical-align: middle        }        .breadcrumb,        .container,        .convert-box,        .rw,        .title-wrap,        nav {            overflow: hidden        }        header {            background: #117ed2        }        .centered h2,        .result {            word-wrap: break-word        }        a,        button {            text-decoration: none        }        body,        html {            height: 100%;            width: 100%;            margin: 0;            padding: 0;            left: 0;            top: 0;            font-size: 100%        }        * {            font-family: FreeSans, Arimo, "Droid Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;            color: #333447;            line-height: 1.5;            -webkit-font-smoothing: antialiased        }        p {            font-size: 16px;            font-weight: 400;            line-height: 1.8        }        .math,        .math sub,        h1,        h2,        h3 {            font-weight: 500        }        h1 {            color: #9f9f9f;            font-size: 22px;            margin: 0 0 15px;        }        h2,        h3 {            font-size: 1.2rem        }        h2 {            margin-top: 30px;            margin-bottom: 10px        }        .breadcrumb li,        footer p,        ul {            font-size: 16px        }        header {            padding: 15px 0        }        .breadcrumb,        ul {            padding: 0;            list-style: none        }        .conversion {            width: auto;            margin: 20px 0 10px        }        a {            color: #117ed2        }        a:hover {            color: #34a0f3        }        .breadcrumb {            display: block;            text-align: center;            margin: 15px 20px;            width: auto        }        .centered h2,        th {            text-align: left        }        .breadcrumb li {            float: left        }        h2.info,        h3 {            margin-top: 30px        }        p.info {            line-height: 1.5em        }        footer p {            color: #d4d4d4;            margin: 0 20px 20px        }        .separator {            margin: 0 4px        }        ul.type {            font-size: 24px        }        ul.type li {            padding: 10px 0        }        .conversion h2 {            margin-top: 0        }        .hidden {            display: none        }        .block:before,        .centered {            display: inline-block;            vertical-align: middle        }        .btm {            width: 100%;            margin: 0 auto        }        .col {            width: auto;            float: none        }        .math,        .math sub {            font-family: Georgia, Times New Roman, serif;            font-size: 18px        }        table {            border-collapse: collapse;            min-width: 70%        }        th {            border-top: 1px solid #ccc        }        tr {            border-bottom: 1px solid #ccc        }        td,        th {            padding: 8px 12px        }        blockquote {            margin: 0        }        h3 dfn {            font-weight: 600        }        .no-margin {            width: auto;            margin-top: 10px        }        .img-wrapper {            width: auto;            height: auto;            position: relative;            margin: 30px auto -15px        }        .block {            position: absolute;            top: 0;            left: 0;            right: 0;            bottom: 0;            margin: 0 8px;            z-index: 1        }        .block:before {            content: '';            height: 100%        }        .centered {            width: 65%;            padding-left: 10px        }        .centered h2 {            font-size: 30px;            line-height: 1;            color: #dab5f9;            margin: 0        }        .a-3,        button {            text-align: center        }        .centered h2 span {            font-size: 18px;            display: block;            color: #FFF;            line-height: 1        }        input[type=number]::-webkit-inner-spin-button,        input[type=number]::-webkit-outer-spin-button {            -webkit-appearance: none;            -moz-appearance: none;            appearance: none;            margin: 0        }        .conversion h2 {            display: none        }        .result,        .value {            display: block;            font-weight: 500        }        .conversions-box,        .result-box {            background: #FFF;            margin: 10px 0;            overflow: hidden        }        .result-box {            padding: 30px 20px        }        .conversions-box {            padding: 10px 20px        }        .result {            font-size: 34px;            color: #54a9f9        }        .value {            font-size: 22px;            color: #145694        }        .convert-box {            background-color: #FFF;            margin: 10px 0;            padding: 10px 20px 15px        }        .a-1,        .a-2 {            width: 336px;            height: auto;            margin: 0 auto        }        .a-3 {            width: auto;            height: 100px;            margin: 10px auto 0        }        input,        select {            padding: .5em .6em;            box-sizing: border-box;            height: 40px        }        .top {            display: block;            margin-top: 10px        }        .a-box {            background-color: transparent        }        .boxes {            display: flex;            flex-flow: row wrap        }        label,        select {            display: block        }        button,        input {            display: inline-block        }        .hide,        .sky-left,        .sky-right {            display: none        }        .boxes > .box-1 {            order: 1        }        .boxes > .box-2 {            order: 3        }        .boxes > .box-3 {            order: 2        }        .box-1,        .box-2,        .box-3 {            width: 100%        }        .col-amount,        .col-button,        .col-unit-from,        .col-unit-to {            margin: 0;            width: 100%;            float: none        }        button,        input,        label,        select {            width: 100%;            font-size: 16px;            font-family: sans-serif        }        input {            border: 1px solid #e9e8e8;            border-radius: 4px;            width: 100%        }        fieldset {            margin: 0;            padding: 0;            border: 0        }        label {            margin: 20px 0 10px;            color: #969898        }        select {            box-shadow: none;            border: 1px solid #e9e8e8;            border-radius: 4px;            margin: .25em 0;            background-color: #fff        }        button {            margin: 25px 0 0;            background-color: #117ed2;            color: #fff;            font-family: inherit;            font-size: 100%;            padding: 13px 20px;            border: transparent;            border-radius: 4px;            zoom: 1;            white-space: nowrap;            cursor: pointer;            -webkit-user-drag: none;            -webkit-user-select: none;            -moz-user-select: none;            -ms-user-select: none;            user-select: none        }        button:hover {            background-color: #34a0f3        }        .title-wrap {            background-color: #F0F0F2        }        .a-4 {            width: 300px;            height: auto;            margin: 15px auto        }        .home-button {            margin: 30px 0 15px        }        .col-left,        .col-right {            width: 100%;            float: none;            padding: 0        }        nav a {            color: #848484        }        nav a:hover {            color: #b7b7b7        }        .brand {            background-image: url('/img/logo.png');            background-repeat: no-repeat;            height: 24px;            width: 201px;            display: block;            margin: 0 20px        }        .home-converter {            margin: 0;        }        ul li {            padding: 5px 0        }        .content {            padding: 0 20px;        }        .converter-title {            margin: 0        }        @media (min-width: 768px) {            .a-3,            .conversion h2,            .hidden,            .top {                display: block            }            .container {                width: 740px;                margin: 0 auto;                padding: 0            }            h1,            h2 {                font-size: 24px            }            h3 {                font-size: 22px            }            .breadcrumb {                text-align: left;                margin: 15px auto;                width: 730px            }            .col {                width: 365px;                float: left            }            .block {                margin: 0 25px            }            .centered {                padding-left: 0            }            .centered h2 {                font-size: 36px            }            .centered h2 span {                font-size: 22px            }            .math,            .math sub {                font-size: 20px            }            .conversion {                margin: 50px auto            }            .item-1 {                margin-right: 10%            }            .item-1,            .item-2 {                width: 45%;                float: left            }            .btm,            .no-margin {                width: 730px            }            .no-margin {                margin: 0 auto            }            .img-wrapper {                margin-top: 50px;            }            body {                background: #f3f3f3            }            .content,            .conversion-list {                background: #FFF;                margin: 5px 0 10px;                overflow: hidden;            }            .grid-container {                overflow: hidden            }            .content {                padding: 20px 50px            }            .conversion-list {                padding: 15px            }            .container-home {                margin-top: 10px;                margin-bottom: 10px            }            .a-1 {                width: 336px;                height: auto            }            .a-3,            .top {                width: 728px;                height: 90px;                margin-top: 0            }            .result-box {                margin-bottom: 0;                padding: 40px 50px 0;                margin-top: 10px            }            .convert-box {                margin-top: 0;                padding: 0 50px 40px            }            .converter-title {                display: none;            }            .box-1 .a-box,            .box-2 .a-box {                padding: 10px 10px            }            .box-1 .a-box {                margin-right: 5px            }            .box-2 .a-box {                margin-left: 5px            }            .a-box {                padding: 5px            }            .title-wrap,            nav {                background: 0 0            }            .boxes > .box-1 {                order: 2            }            .boxes > .box-2 {                order: 3            }            .boxes > .box-3 {                order: 1            }            .title-wrap {                margin: 0            }            .col-amount {                margin-right: 1%;                width: 15%;                float: left            }            .col-unit-from,            .col-unit-to {                margin-right: 1%;                width: 31%;                float: left            }            .col-button {                width: 20%;                float: left            }            button {                margin-top: 52px;                padding: 9px            }            .result {                font-size: 38px            }            label {                margin: 20px 0 10px            }            .box-1,            .box-2 {                width: 50%            }            .col-left,            .col-right {                width: 49%;                float: left            }            .col-left {                padding-right: 1%            }            .col-right {                padding-left: 1%            }            header {                padding: 20px 0;                margin-bottom: 15px            }            .hide {                display: block            }            .a-4 {                margin: 0 auto            }            .home-converter {                margin: 30px 60px;            }            footer p {                margin: 0 0 20px            }            .brand {                margin: 0            }        }        @media (min-width: 1120px) {            .container {                position: relative            }            .sky-left,            .sky-right {                position: absolute;                display: block            }            .sky {                width: auto;                height: auto;            }            .sky-left {                top: 800px;                left: -180px            }            .sky-right {                top: 800px;                right: -180px            }        }        .responsive {            width: 100%;            height: auto;        }        .social-wrapper {            margin: 35px 0 35px;        }        .social {            display: block;            overflow: hidden;            margin: 0 auto;            width: 275px        }        .social a {            background: url("/img/socialicons.png");            width: 45px;            height: 45px;            display: block;            margin: 2px 5px;            float: left;            left: 24px        }        #facebook-icon {            background-position-x: 0        }        #twitter-icon {            background-position-x: -45px        }        #messenger-icon {            background-position-x: -180px        }        #whatsapp-icon {            background-position-x: -135px        }        #pinterest-icon {            background-position-x: -90px        }    </style>    <!--[if lt IE 9]>    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>    <![endif]-->    <script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>    <script>        (adsbygoogle = window.adsbygoogle || []).push({            google_ad_client: "ca-pub-1543067051094976",            enable_page_level_ads: true        });    </script></head><body><header>    <div class="container">        <a class="brand" href="/"></a>    </div></header><article>    <div class="container">        <aside>            <div class="result-box">                <h1>How long is 4.7 years in minutes?</h1>                <span class="value">4.7 years equals</span>                <span class="result">2470320 minutes</span>            </div>        </aside>        <div class="boxes">            <div class="box-3">                <div class="convert-box">                    <h2 class="converter-title">Unit Converter</h2>                    <form action="/convert" method="get" target="_top">                        <fieldset>                            <div class="col-amount">                                <label for="amount">Amount</label>                                <input name="amount" required id="amount" step="0.1" type="number" min="0.1" max="10000"                                       placeholder="number" value="4.7">                            </div>                            <div class="col-unit-from">                                <label for="unitFrom">From</label>                                <select name="unitFrom" id="unitFrom">                                    <optgroup label="length">                                        <option value="centimeters">centimeters (cm)</option>                                        <option value="feet">feet (ft)</option>                                        <option value="inches">inches (in)</option>                                        <option value="kilometers">kilometers (km)</option>                                        <option value="meters">meters (m)</option>                                        <option value="miles">miles (mi)</option>                                        <option value="decimeters">decimeters (dm)</option>                                        <option value="millimeters">millimeters (mm)</option>                                        <option value="yards">yards (yd)</option>                                    </optgroup>                                    <optgroup label="mass">                                        <option value="grams">grams (g)</option>                                        <option value="ounces">ounces (oz)</option>                                        <option value="pounds">pounds (lb)</option>                                        <option value="kilograms">kilograms (kg)</option>                                    </optgroup>                                    <optgroup label="time">                                        <option value="days">days (d)</option>                                        <option value="months">months (mo)</option>                                        <option value="years" selected>years (yr)</option>                                        <option value="hours">hours (hr)</option>                                        <option value="minutes">minutes (min)</option>                                        <option value="seconds">seconds (s)</option>                                        <option value="weeks">weeks (wk)</option>                                    </optgroup>                                    <optgroup label="velocity">                                        <option value="feet-per-second">feet per second (ft/s)</option>                                        <option value="kilometers-per-hour">kilometers per hour (km/h)</option>                                        <option value="knots">knots (kt)</option>                                        <option value="meters-per-second">meters per second (m/s)</option>                                        <option value="miles-per-hour">miles per hour (mph)</option>                                    </optgroup>                                    <optgroup label="volume">                                        <option value="cubic-centimeters">cubic centimeters (cm3)</option>                                        <option value="cubic-feet">cubic feet (ft3)</option>                                        <option value="cubic-inches">cubic inches (in3)</option>                                        <option value="cubic-meters">cubic meters (m3)</option>                                        <option value="cups">cups (cup)</option>                                        <option value="deciliters">deciliters (dL)</option>                                        <option value="gallons">gallons (gal)</option>                                        <option value="liters">liters (L)</option>                                        <option value="milliliters">milliliters (ml)</option>                                        <option value="fluid-ounces">fluid ounces (fl oz)</option>                                        <option value="pints">pints (pt)</option>                                        <option value="quarts">quarts (qt)</option>                                        <option value="tablespoons">tablespoons (tbsp)</option>                                        <option value="teaspoons">teaspoons (tsp)</option>                                    </optgroup>                                </select>                            </div>                            <div class="col-unit-to">                                <label for="unitTo">To</label>                                <select name="unitTo" id="unitTo">                                    <optgroup label="length">                                        <option value="centimeters">centimeters (cm)</option>                                        <option value="feet">feet (ft)</option>                                        <option value="inches">inches (in)</option>                                        <option value="kilometers">kilometers (km)</option>                                        <option value="meters">meters (m)</option>                                        <option value="miles">miles (mi)</option>                                        <option value="decimeters">decimeters (dm)</option>                                        <option value="millimeters">millimeters (mm)</option>                                        <option value="yards">yards (yd)</option>                                    </optgroup>                                    <optgroup label="mass">                                        <option value="grams">grams (g)</option>                                        <option value="ounces">ounces (oz)</option>                                        <option value="pounds">pounds (lb)</option>                                        <option value="kilograms">kilograms (kg)</option>                                    </optgroup>                                    <optgroup label="time">                                        <option value="days">days (d)</option>                                        <option value="months">months (mo)</option>                                        <option value="years">years (yr)</option>                                        <option value="hours">hours (hr)</option>                                        <option value="minutes" selected>minutes (min)</option>                                        <option value="seconds">seconds (s)</option>                                        <option value="weeks">weeks (wk)</option>                                    </optgroup>                                    <optgroup label="velocity">                                        <option value="feet-per-second">feet per second (ft/s)</option>                                        <option value="kilometers-per-hour">kilometers per hour (km/h)</option>                                        <option value="knots">knots (kt)</option>                                        <option value="meters-per-second">meters per second (m/s)</option>                                        <option value="miles-per-hour">miles per hour (mph)</option>                                    </optgroup>                                    <optgroup label="volume">                                        <option value="cubic-centimeters">cubic centimeters (cm3)</option>                                        <option value="cubic-feet">cubic feet (ft3)</option>                                        <option value="cubic-inches">cubic inches (in3)</option>                                        <option value="cubic-meters">cubic meters (m3)</option>                                        <option value="cups">cups (cup)</option>                                        <option value="deciliters">deciliters (dL)</option>                                        <option value="gallons">gallons (gal)</option>                                        <option value="liters">liters (L)</option>                                        <option value="milliliters">milliliters (ml)</option>                                        <option value="fluid-ounces">fluid ounces (fl oz)</option>                                        <option value="pints">pints (pt)</option>                                        <option value="quarts">quarts (qt)</option>                                        <option value="tablespoons">tablespoons (tbsp)</option>                                        <option value="teaspoons">teaspoons (tsp)</option>                                    </optgroup>                                </select>                            </div>                            <div class="col-button">                                <button type="submit">Convert</button>                            </div>                        </fieldset>                    </form>                </div>            </div>            <div class="box-1">                <div class="a-box">                    <div class="a-1">                        <ins class="adsbygoogle" style="display:inline-block;width:336px;height:280px"                             data-ad-client="ca-pub-1543067051094976" data-ad-slot="2593765870"></ins>                        <script>                            (adsbygoogle = window.adsbygoogle || []).push({});                        </script>                    </div>                </div>            </div>            <div class="box-2">                <div class="a-box">                    <div class="a-2">                        <ins class="adsbygoogle" style="display:inline-block;width:336px;height:280px"                             data-ad-client="ca-pub-1543067051094976" data-ad-slot="2593765870"></ins>                        <script>                            (adsbygoogle = window.adsbygoogle || []).push({});                        </script>                    </div>                </div>            </div>        </div>        <div class="content">            <section>                <h2>Conversion formula</h2>                <p>The conversion factor from years to minutes is 525600, which means that 1 year is equal to 525600                    minutes:</p>                <p class="math">1 yr = 525600 min</p>                <p>To convert 4.7 years into minutes we have to multiply 4.7 by the conversion factor in order to get                    the time amount from years to minutes. We can also form a simple proportion to calculate the                    result:</p>                <p class="math">1 yr &rarr; 525600 min</p>                <p class="math">4.7 yr &rarr; T<sub>(min)</sub></p>                <p>Solve the above proportion to obtain the time <em>T</em> in minutes:</p>                <p class="math">T<sub>(min)</sub> = 4.7 yr &times; 525600 min</p>                <p class="math">T<sub>(min)</sub> = 2470320 min</p>                <p>The final result is:</p>                <p class="math">4.7 yr &rarr; 2470320 min</p>                <p>We conclude that <strong>4.7 years is equivalent to 2470320 minutes</strong>:</p>                <p class="math">4.7 years = 2470320 minutes</p>            </section>        </div>        <img alt="4.7 years is equal to 2470320 minutes" src="/images/4-7-years-to-minutes" width="730" height="380"             class="responsive">        <div class="content">            <section>                <h2>Alternative conversion</h2>                <p>We can also convert by utilizing the inverse value of the conversion factor. In this case <strong>1                    minute is equal to 4.0480585511189E-7 &times; 4.7 years</strong>.</p>                <p>Another way is saying that <em>4.7 years is equal to 1 &divide; 4.0480585511189E-7 minutes</em>.            </section>            <section>                <h2>Approximate result</h2>                <p>For practical purposes we can round our final result to an approximate numerical value. We can say                    that <strong>four point seven years is approximately two million four hundred seventy thousand three                        hundred twenty minutes</strong>:</p>                <p class="math">4.7 yr &cong; 2470320 min</p>                <p>An alternative is also that <em>one minute is approximately zero times four point seven years</em>.                </p>            </section>            <div class="social-wrapper">                <div class="social">                    <a id="facebook-icon"                       href="https://www.facebook.com/sharer/sharer.php?u=https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="twitter-icon"                       href="https://twitter.com/intent/tweet?text=4.7 Years In Minutes - How Many Minutes Is 4.7 Years?&amp;url=https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="whatsapp-icon"                       href="whatsapp://send?text=4.7 Years In Minutes - How Many Minutes Is 4.7 Years? https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="messenger-icon"                       href="fb-messenger://share?link=4.7 Years In Minutes - How Many Minutes Is 4.7 Years? https://convertoctopus.com/4-7-years-to-minutes"                       target="_blank"></a>                    <a id="pinterest-icon"                       href="https://pinterest.com/pin/create/button/?url=https://convertoctopus.com/4-7-years-to-minutes&amp;description=4.7 Years In Minutes - How Many Minutes Is 4.7 Years?"                       target="_blank"></a>                </div>            </div>            <section>                <h2>Conversion table</h2>                <h3>years to minutes chart</h3>                <p>For quick reference purposes, below is the conversion table you can use to convert from years to                    minutes</p>                <table>                    <thead>                    <tr>                        <th>years (yr)</th>                        <th>minutes (min)</th>                    </tr>                    </thead>                    <tbody>                    <tr>                        <td><a href="/5-7-years-to-minutes">5.7 years</a></td>                        <td>2995920 minutes</td>                    </tr>                    <tr>                        <td><a href="/6-7-years-to-minutes">6.7 years</a></td>                        <td>3521520 minutes</td>                    </tr>                    <tr>                        <td><a href="/7-7-years-to-minutes">7.7 years</a></td>                        <td>4047120 minutes</td>                    </tr>                    <tr>                        <td><a href="/8-7-years-to-minutes">8.7 years</a></td>                        <td>4572720 minutes</td>                    </tr>                    <tr>                        <td><a href="/9-7-years-to-minutes">9.7 years</a></td>                        <td>5098320 minutes</td>                    </tr>                    <tr>                        <td><a href="/10-7-years-to-minutes">10.7 years</a></td>                        <td>5623920 minutes</td>                    </tr>                    <tr>                        <td><a href="/11-7-years-to-minutes">11.7 years</a></td>                        <td>6149520 minutes</td>                    </tr>                    <tr>                        <td><a href="/12-7-years-to-minutes">12.7 years</a></td>                        <td>6675120 minutes</td>                    </tr>                    <tr>                        <td><a href="/13-7-years-to-minutes">13.7 years</a></td>                        <td>7200720 minutes</td>                    </tr>                    <tr>                        <td><a href="/14-7-years-to-minutes">14.7 years</a></td>                        <td>7726320 minutes</td>                    </tr>                    </tbody>                </table>            </section>            <aside>                <h2>Conversion units</h2>                <p>The units involved in this conversion are years and minutes. This is how they are defined:</p>                <section>                    <h3><dfn>Year</dfn></h3>                    <blockquote cite="https://en.wikipedia.org/">                        <p>A year (symbol: y; also abbreviated yr.) is the orbital period of the Earth moving in its                            orbit around the Sun. Due to the Earth&#039;s axial tilt, the course of a year sees the                            passing of the seasons, marked by changes in weather, the hours of daylight, and,                            consequently, vegetation and soil fertility. In temperate and subpolar regions around the                            globe, four seasons are generally recognized: spring, summer, autumn and winter. In tropical                            and subtropical regions several geographical sectors do not present defined seasons; but in                            the seasonal tropics, the annual wet and dry seasons are recognized and tracked. A calendar                            year is an approximation of the number of days of the Earth&#039;s orbital period as counted                            in a given calendar. The Gregorian, or modern, calendar, presents its calendar year to be                            either a common year of 365 days or a leap year of 366 days.</p>                        <footer>                            Source: <cite title="Wikipedia"><a href="https://en.wikipedia.org/">Wikipedia</a> Topic:                            year</cite>                        </footer>                    </blockquote>                </section>                <section>                    <h3><dfn>Minute</dfn></h3>                    <blockquote cite="https://en.wikipedia.org/">                        <p>The minute is a unit of time or of angle. As a unit of time, the minute (symbol: min) is                            equal to 1⁄60 (the first sexagesimal fraction) of an hour, or 60 seconds. In the UTC time                            standard, a minute on rare occasions has 61 seconds, a consequence of leap seconds (there is                            a provision to insert a negative leap second, which would result in a 59-second minute, but                            this has never happened in more than 40 years under this system). As a unit of angle, the                            minute of arc is equal to 1⁄60 of a degree, or 60 seconds (of arc). Although not an SI unit                            for either time or angle, the minute is accepted for use with SI units for both. The SI                            symbols for minute or minutes are min for time measurement, and the prime symbol after a                            number, e.g. 5′, for angle measurement. The prime is also sometimes used informally to                            denote minutes of time. In contrast to the hour, the minute (and the second) does not have a                            clear historical background. What is traceable only is that it started being recorded in the                            Middle Ages due to the ability of construction of &quot;precision&quot; timepieces                            (mechanical and water clocks). However, no consistent records of the origin for the division                            as 1⁄60 part of the hour (and the second 1⁄60 of the minute) have ever been found, despite                            many speculations.</p>                        <footer>                            Source: <cite title="Wikipedia"><a href="https://en.wikipedia.org/">Wikipedia</a> Topic:                            minute</cite>                        </footer>                    </blockquote>                </section>            </aside>            <div class="grid-container">                <aside>                    <h3>How long is 4.7 years in other time units?</h3>                    <ul>                        <li><a href="/4-7-years-to-days">4.7 years to days</a></li>                        <li><a href="/4-7-years-to-months">4.7 years to months</a></li>                        <li><a href="/4-7-years-to-hours">4.7 years to hours</a></li>                        <li><a href="/4-7-years-to-minutes">4.7 years to minutes</a></li>                        <li><a href="/4-7-years-to-seconds">4.7 years to seconds</a></li>                        <li><a href="/4-7-years-to-weeks">4.7 years to weeks</a></li>                    </ul>                </aside>            </div>            <div class="grid-container">                <aside>                    <h3>Recent years to minutes conversions</h3>                    <div class="item-1">                        <ul>                            <li><a href="/3-8-years-to-minutes">3.8 years to minutes</a></li>                            <li><a href="/14-4-years-to-minutes">14.4 years to minutes</a></li>                            <li><a href="/17-7-years-to-minutes">17.7 years to minutes</a></li>                            <li><a href="/58-5-years-to-minutes">58.5 years to minutes</a></li>                            <li><a href="/263-years-to-minutes">263 years to minutes</a></li>                            <li><a href="/168-years-to-minutes">168 years to minutes</a></li>                            <li><a href="/141-years-to-minutes">141 years to minutes</a></li>                            <li><a href="/69-4-years-to-minutes">69.4 years to minutes</a></li>                            <li><a href="/528-years-to-minutes">528 years to minutes</a></li>                            <li><a href="/4-6-years-to-minutes">4.6 years to minutes</a></li>                        </ul>                    </div>                    <div class="item-2">                        <ul>                            <li><a href="/420-years-to-minutes">420 years to minutes</a></li>                            <li><a href="/134-9-years-to-minutes">134.9 years to minutes</a></li>                            <li><a href="/22-5-years-to-minutes">22.5 years to minutes</a></li>                            <li><a href="/78-years-to-minutes">78 years to minutes</a></li>                            <li><a href="/45-4-years-to-minutes">45.4 years to minutes</a></li>                            <li><a href="/164-4-years-to-minutes">164.4 years to minutes</a></li>                            <li><a href="/59-8-years-to-minutes">59.8 years to minutes</a></li>                            <li><a href="/259-years-to-minutes">259 years to minutes</a></li>                            <li><a href="/264-years-to-minutes">264 years to minutes</a></li>                            <li><a href="/159-5-years-to-minutes">159.5 years to minutes</a></li>                        </ul>                    </div>                </aside>            </div>            <div class="grid-container">                <aside>                    <h3>Recent conversions</h3>                    <ul>                        <li><a href="/152-feet-per-second-to-meters-per-second">152 feet per second to meters per                            second</a></li>                        <li><a href="/7-5-feet-per-second-to-miles-per-hour">7.5 feet per second to miles per hour</a>                        </li>                        <li><a href="/59-8-grams-to-kilograms">59.8 grams to kilograms</a></li>                        <li><a href="/74-4-feet-per-second-to-meters-per-second">74.4 feet per second to meters per                            second</a></li>                        <li><a href="/11-6-grams-to-kilograms">11.6 grams to kilograms</a></li>                        <li><a href="/1108-gallons-to-teaspoons">1108 gallons to teaspoons</a></li>                        <li><a href="/965-minutes-to-seconds">965 minutes to seconds</a></li>                        <li><a href="/861-inches-to-meters">861 inches to meters</a></li>                        <li><a href="/102-milliliters-to-teaspoons">102 milliliters to teaspoons</a></li>                        <li><a href="/13-9-grams-to-pounds">13.9 grams to pounds</a></li>                        <li><a href="/182-years-to-minutes">182 years to minutes</a></li>                        <li><a href="/38-7-knots-to-miles-per-hour">38.7 knots to miles per hour</a></li>                        <li><a href="/17-4-feet-per-second-to-miles-per-hour">17.4 feet per second to miles per hour</a>                        </li>                        <li><a href="/53-3-kilometers-per-hour-to-knots">53.3 kilometers per hour to knots</a></li>                        <li><a href="/87-2-centimeters-to-millimeters">87.2 centimeters to millimeters</a></li>                        <li><a href="/96-centimeters-to-meters">96 centimeters to meters</a></li>                        <li><a href="/88-3-kilometers-per-hour-to-miles-per-hour">88.3 kilometers per hour to miles per                            hour</a></li>                        <li><a href="/22-1-feet-to-yards">22.1 feet to yards</a></li>                        <li><a href="/357-cups-to-tablespoons">357 cups to tablespoons</a></li>                        <li><a href="/1353-months-to-years">1353 months to years</a></li>                    </ul>                </aside>            </div>        </div>    </div></article><nav>    <ol class="breadcrumb">        <li><a href="/">home</a></li>        <li class="separator">&rsaquo;</li>        <li><a href="/years-to-minutes">years to minutes</a></li>        <li class="separator">&rsaquo;</li>        <li>4.7 yr to min</li>    </ol></nav><footer class="footer-btm">    <div class="container">        <p>&copy;2020 ConvertOctopus.com</p>    </div></footer></body></html>
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_1.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_1.html
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html
deleted file mode 100644
index cda8dd54..00000000
--- a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/math_class_math_inline_1.html
+++ /dev/null
@@ -1,8 +0,0 @@
-1 yr = 525600 min
-1 yr → 525600 min
-4.7 yr → T
-T
-T
-4.7 yr → 2470320 min
-4.7 years = 2470320 minutes
-4.7 yr ≅ 2470320 min
\ No newline at end of file
diff --git a/tests/llm_web_kit/extractor/html/recognizer/test_list.py b/tests/llm_web_kit/extractor/html/recognizer/test_list.py
index 5f8d61de..dbe79347 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/test_list.py
+++ b/tests/llm_web_kit/extractor/html/recognizer/test_list.py
@@ -375,38 +375,3 @@ def test_get_attribute_standalone_improved(self):
             error_msg = str(context.exception)
             self.assertIn('中没有cclist标签', error_msg)
             self.assertIn(element.tag, error_msg)
-
-    def test_no_standard_get_list_content_list(self):
-        """测试非标准结构的list获取content_list."""
-        # 获取私有方法 __get_list_content_list
-        get_list_content_list_method = getattr(self.__list_recognize, '_ListRecognizer__get_list_content_list')
-
-        # 创建测试数据
-        test_elements = [
-            html_to_element('''<ul id="productslist">
-                                    <figure class="list">
-                                        <figcaption><h4>How to Process Oxidized Lead Zinc Ore by Flotation</h4>
-                                            <p>How to Process Oxidized Lead Zinc Ore by Flotation. Metallurgical Content. The
-                                                Flowsheet. Crushing Section; GRINDING; Conditioning and Flotation; Thickening and
-                                                Filtering; Sampling; ORE TESTING LABORATORY; The problem of treating oxidized lead
-                                                zinc ores for the production of high grade lead zinc concentrates is a complex </p>
-                                        </figcaption>
-                                    </figure>
-                                    <figure class="list">
-                                        <figcaption><h4>ore dressing flotation machine,fluorite ore flotation </h4>
-                                            <p>Ore dressing flotation machine is widely used to conduct flotation of copper ore,
-                                                lead zinc ore, glod ore, etc. Mail to sales@sinofote</p>
-                                        </figcaption>
-                                    </figure>
-                                    <figure class="list">
-                                        <figcaption><h4>Zinc Ore Mining Crusher wffofoundation</h4>
-                                            <p>Zinc ore mining process can 14 2016 31 Mar Lead zinc ore dressing equipment zinc ore
-                                                Once processing in the flotation circuit was complete, the zinc </p>
-                                        </figcaption>
-                                    </figure>
-                                </ul>''')
-        ]
-
-        for i, element in enumerate(test_elements):
-            list_content_list = get_list_content_list_method(element, 1)
-            assert len(list_content_list) == 3
diff --git a/tests/llm_web_kit/extractor/html/recognizer/test_math.py b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
index 6069c590..f51c1869 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/test_math.py
+++ b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
@@ -207,14 +207,6 @@
         'base_url': 'https://physicshelpforum.com/t/latex-upgrade-physics-forum-powered-by-mathjax-v3.17489/',
         'expected': 'assets/ccmath/math_physicsforums_2_1.html',
         'expected_inline': 'assets/ccmath/math_physicsforums_2_inline_1.html'
-    },
-    {
-        'input': [
-            'assets/ccmath/math_class_math.html',
-        ],
-        'base_url': 'https://convertoctopus.com/4-7-years-to-minutes',
-        'expected': 'assets/ccmath/math_class_math_1.html',
-        'expected_inline': 'assets/ccmath/math_class_math_inline_1.html'
     }
 ]
 
diff --git a/tests/llm_web_kit/extractor/test_extractor_chain.py b/tests/llm_web_kit/extractor/test_extractor_chain.py
index 7f1bf8c9..dc53e015 100644
--- a/tests/llm_web_kit/extractor/test_extractor_chain.py
+++ b/tests/llm_web_kit/extractor/test_extractor_chain.py
@@ -64,7 +64,7 @@ def setUp(self):
                     continue
                 self.data_json.append(json.loads(line))
 
-        assert len(self.data_json) == 104
+        assert len(self.data_json) == 103
 
         # Config for HTML extraction
         self.config = load_pipe_tpl('html-test')
@@ -810,27 +810,16 @@ def test_ascii_delimiter(self):
         input_data = DataJson(test_data)
         result = chain.extract(input_data)
         md_content = result.get_content_list().to_nlp_md()
+        # with open('mathjax抽取case222.md', 'w', encoding='utf-8') as f:
+        #     f.write(md_content)
         self.assertIn(r'$f = \frac{1}{T} ^ 2 \sqrt{\frac{A E}{\rho}}$', md_content)
         self.assertIn(r'${m}^{2}$', md_content)
         self.assertIn(r'\rho$', md_content)
         self.assertIn(r'$f = \frac{1}{2 L} \sqrt{\frac{E}{\rho}}$', md_content)
         self.assertIn(r'$L = {T}^{2} / \left(2 W\right)$', md_content)
 
-    def test_mathjax_mock(self):
-        """测试虚拟mathjax渲染器."""
-        chain = ExtractSimpleFactory.create(self.config)
-        self.assertIsNotNone(chain)
-        test_data = self.data_json[103]
-        input_data = DataJson(test_data)
-        result = chain.extract(input_data)
-        md_content = result.get_content_list().to_nlp_md()
-        self.assertIn(r'$(a_n)$', md_content)
-        self.assertIn(r'$a_n ≤ a_{n+1}$', md_content)
-        self.assertIn(r'$n \in \mathbb{N}$', md_content)
-        self.assertIn(r'$\left ( \frac{1}{n} \right ) = (1, \frac{1}{2}, \frac{1}{3}, ..., \frac{1}{n}, \frac{1}{n+1}, ... )$', md_content)
-
     def test_htmlmath_sub_sup(self):
-        """测试htmlmath中的上下标标签."""
+        """测试ascii分隔符."""
         chain = ExtractSimpleFactory.create(self.config)
         self.assertIsNotNone(chain)
         test_data = self.data_json[102]

From 329439b37c390fcae4cdee37132657c17e3603c6 Mon Sep 17 00:00:00 2001
From: quyuan <qywan918@163.com>
Date: Fri, 15 Aug 2025 06:13:25 +0000
Subject: [PATCH 3/8] Update version.py with new version

---
 llm_web_kit/libs/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm_web_kit/libs/version.py b/llm_web_kit/libs/version.py
index b50da94d..29e4a941 100644
--- a/llm_web_kit/libs/version.py
+++ b/llm_web_kit/libs/version.py
@@ -1 +1 @@
-__version__ = '3.2.1'
+__version__ = '3.2.2'

From fe1a2ec1e0df4cbf85d4e1d17e1224858231e3a8 Mon Sep 17 00:00:00 2001
From: quyuan <qywan918@163.com>
Date: Fri, 22 Aug 2025 12:32:52 +0000
Subject: [PATCH 4/8] Update version.py with new version

---
 llm_web_kit/libs/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm_web_kit/libs/version.py b/llm_web_kit/libs/version.py
index 29e4a941..32206102 100644
--- a/llm_web_kit/libs/version.py
+++ b/llm_web_kit/libs/version.py
@@ -1 +1 @@
-__version__ = '3.2.2'
+__version__ = '3.2.3'

From 6668e692340a8b1d150982f093069aecc81a01e8 Mon Sep 17 00:00:00 2001
From: quyuan <qywan918@163.com>
Date: Mon, 25 Aug 2025 03:37:07 +0000
Subject: [PATCH 5/8] Update version.py with new version

---
 llm_web_kit/libs/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm_web_kit/libs/version.py b/llm_web_kit/libs/version.py
index 32206102..d6497a81 100644
--- a/llm_web_kit/libs/version.py
+++ b/llm_web_kit/libs/version.py
@@ -1 +1 @@
-__version__ = '3.2.3'
+__version__ = '4.0.0'

From 85a91edaa71a40262002cb351b4d56bac5476d28 Mon Sep 17 00:00:00 2001
From: quyuan <qywan918@163.com>
Date: Mon, 25 Aug 2025 11:52:47 +0000
Subject: [PATCH 6/8] Update version.py with new version

---
 llm_web_kit/libs/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm_web_kit/libs/version.py b/llm_web_kit/libs/version.py
index d6497a81..1a3bef53 100644
--- a/llm_web_kit/libs/version.py
+++ b/llm_web_kit/libs/version.py
@@ -1 +1 @@
-__version__ = '4.0.0'
+__version__ = '4.0.1'

From 21b3622ab004951ff8b06a77580b7e909c00e394 Mon Sep 17 00:00:00 2001
From: quyuan <qywan918@163.com>
Date: Thu, 11 Sep 2025 11:48:59 +0000
Subject: [PATCH 7/8] Update version.py with new version

---
 llm_web_kit/libs/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm_web_kit/libs/version.py b/llm_web_kit/libs/version.py
index 1a3bef53..fa721b49 100644
--- a/llm_web_kit/libs/version.py
+++ b/llm_web_kit/libs/version.py
@@ -1 +1 @@
-__version__ = '4.0.1'
+__version__ = '4.1.0'

From 2b99160d9710b5f628525cc9f8b7e95a1f100833 Mon Sep 17 00:00:00 2001
From: chupei <njuchupei@gmail.com>
Date: Thu, 11 Sep 2025 20:01:32 +0800
Subject: [PATCH 8/8] update pydantic requirement

---
 requirements/dev.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements/dev.txt b/requirements/dev.txt
index ffe7e23f..23380654 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -4,7 +4,6 @@ nbstripout==0.8.1
 nltk==3.8.1
 openai==1.75.0
 pre-commit==3.8.0
-pydantic==2.10.6
 pytest==8.3.3
 # coverage tools
 pytest-cov==6.0.0

years (yr)	minutes (min)
5.7 years	2995920 minutes
6.7 years	3521520 minutes
7.7 years	4047120 minutes
8.7 years	4572720 minutes
9.7 years	5098320 minutes
10.7 years	5623920 minutes
11.7 years	6149520 minutes
12.7 years	6675120 minutes
13.7 years	7200720 minutes
14.7 years	7726320 minutes