From c01c18125512172f447ab43882f74e53fd263704 Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 3 Mar 2025 17:11:44 +0100 Subject: [PATCH 1/5] change html blockquote parsing logik --- docs/source/topics/text-formatting.rst | 31 ++++++++++++-------------- pyrogram/parser/html.py | 7 ++---- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/docs/source/topics/text-formatting.rst b/docs/source/topics/text-formatting.rst index 3ab0a2d95..8553a5497 100644 --- a/docs/source/topics/text-formatting.rst +++ b/docs/source/topics/text-formatting.rst @@ -61,7 +61,7 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the >blockquote - |>escaped blockquote + **>escaped blockquote >Fist line of multi line blockquote >Block quotation continued @@ -69,13 +69,12 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the >Block quotation continued >The last line of the block quotation - **> - The expandable block quotation started right after the previous block quotation - It is separated from the previous block quotation by expandable syntax - Expandable block quotation continued - Hidden by default part of the expandable block quotation started - Expandable block quotation continued - The last line of the expandable block quotation with the expandability mark<** + **>The expandable block quotation started right after the previous block quotation + **>It is separated from the previous block quotation by expandable syntax + **>Expandable block quotation continued + **>Hidden by default part of the expandable block quotation started + **>Expandable block quotation continued + **>The last line of the expandable block quotation with the expandability mark|| `inline fixed-width code` @@ -115,21 +114,19 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the ">blockquote\n" - "|>escaped blockquote\n" - ">Fist line of multi line blockquote\n" ">Block quotation continued\n" ">Block quotation continued\n" ">Block quotation continued\n" ">The last line of the block quotation" - "**>\n" - "The expandable block quotation started right after the previous block quotation\n" - "It is separated from the previous block quotation by expandable syntax\n" - "Expandable block quotation continued\n" - "Hidden by default part of the expandable block quotation started\n" - "Expandable block quotation continued\n" - "The last line of the expandable block quotation with the expandability mark<**" + "||\n" + "**>The expandable block quotation started right after the previous block quotation\n" + "**>It is separated from the previous block quotation by expandable syntax\n" + "**>Expandable block quotation continued\n" + "**>Hidden by default part of the expandable block quotation started\n" + "**>Expandable block quotation continued\n" + "**>The last line of the expandable block quotation with the expandability mark||" ), parse_mode=ParseMode.MARKDOWN diff --git a/pyrogram/parser/html.py b/pyrogram/parser/html.py index 594feba04..f5e53250a 100644 --- a/pyrogram/parser/html.py +++ b/pyrogram/parser/html.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # @@ -178,16 +178,13 @@ def parse_one(entity): language = getattr(entity, "language", "") or "" start_tag = f'<{name} language="{language}">' if language else f"<{name}>" end_tag = f"" - elif entity_type == MessageEntityType.BLOCKQUOTE: - name = entity_type.name.lower() - start_tag = f"<{name}>" - end_tag = f"" elif entity_type == MessageEntityType.EXPANDABLE_BLOCKQUOTE: name = "blockquote" start_tag = f"<{name} expandable>" end_tag = f"" elif entity_type in ( MessageEntityType.CODE, + MessageEntityType.BLOCKQUOTE, MessageEntityType.SPOILER, ): name = entity_type.name.lower() From 6564d5fddea3be621a50b1768e20eb0a0dfe8aef Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 3 Mar 2025 17:13:12 +0100 Subject: [PATCH 2/5] (fix): Adapt markdown unparser from telethon The problem with current implementation is when we have nested markdown inside a url the markdown order is messed up. Co-Authored-By: wulan17 --- pyrogram/parser/__init__.py | 2 +- pyrogram/parser/utils.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pyrogram/parser/__init__.py b/pyrogram/parser/__init__.py index 00c7acae7..af477e50f 100644 --- a/pyrogram/parser/__init__.py +++ b/pyrogram/parser/__init__.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # diff --git a/pyrogram/parser/utils.py b/pyrogram/parser/utils.py index 32c81707f..42a23348a 100644 --- a/pyrogram/parser/utils.py +++ b/pyrogram/parser/utils.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # @@ -39,3 +39,20 @@ def remove_surrogates(text): def replace_once(source: str, old: str, new: str, start: int): return source[:start] + source[start:].replace(old, new, 1) + + +def within_surrogate(text, index, *, length=None): + """ + + https://github.com/LonamiWebs/Telethon/blob/63d9b26/telethon/helpers.py#L52-L63 + + `True` if ``index`` is within a surrogate (before and after it, not at!). + """ + if length is None: + length = len(text) + + return ( + 1 < index < len(text) and # in bounds + '\ud800' <= text[index - 1] <= '\udbff' and # previous is + '\ud800' <= text[index] <= '\udfff' # current is + ) From f9c4f6fc278e5da4f91b5c30c3b3c464236e0693 Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 3 Mar 2025 17:14:35 +0100 Subject: [PATCH 3/5] broken fixes - Add support for multi-line blockquote in markdown unparser - Add support for custom emoji in markdown unparser Co-Authored-By: wulan17 --- pyrogram/parser/markdown.py | 381 +++++++++++++++--------------------- pyrogram/parser/parser.py | 2 +- 2 files changed, 157 insertions(+), 226 deletions(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index bdf62cad4..d98cc31f3 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # @@ -22,7 +22,6 @@ import pyrogram from pyrogram.enums import MessageEntityType - from . import utils from .html import HTML @@ -34,149 +33,91 @@ CODE_DELIM = "`" PRE_DELIM = "```" BLOCKQUOTE_DELIM = ">" -BLOCKQUOTE_ESCAPE_DELIM = "|>" BLOCKQUOTE_EXPANDABLE_DELIM = "**>" -BLOCKQUOTE_EXPANDABLE_END_DELIM = "<**" - - -MARKDOWN_RE = re.compile( - r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( - d="|".join( - [ - "".join(i) - for i in [ - [rf"\{j}" for j in i] - for i in [ - PRE_DELIM, - CODE_DELIM, - STRIKE_DELIM, - UNDERLINE_DELIM, - ITALIC_DELIM, - BOLD_DELIM, - SPOILER_DELIM, - ] - ] + +MARKDOWN_RE = re.compile(r"({d})".format( + d="|".join( + ["".join(i) for i in [ + [rf"\{j}" for j in i] + for i in [ + PRE_DELIM, + CODE_DELIM, + STRIKE_DELIM, + UNDERLINE_DELIM, + ITALIC_DELIM, + BOLD_DELIM, + SPOILER_DELIM ] - ) - ) -) + ]] + ))) +URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)") OPENING_TAG = "<{}>" CLOSING_TAG = "" URL_MARKUP = '{}' EMOJI_MARKUP = "{}" FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM] +CODE_TAG_RE = re.compile(r".*?") class Markdown: def __init__(self, client: Optional["pyrogram.Client"]): self.html = HTML(client) - @staticmethod - def escape_and_create_quotes(text: str, strict: bool): - text_lines: list[str | None] = text.splitlines() - - # Indexes of Already escaped lines - html_escaped_list: list[int] = [] - - # Temporary Queue to hold lines to be quoted - to_quote_list: list[tuple[int, str]] = [] - - def create_blockquote(quote_type: str = "") -> None: - """ - Merges all lines in quote_queue into first line of queue - Encloses that line in html quote - Replaces rest of the lines with None placeholders to preserve indexes - """ - if len(to_quote_list) == 0: - return - - joined_lines = "\n".join([i[1] for i in to_quote_list]) + def blockquote_parser(self, text): + text = re.sub(r'\n>', '\n>', re.sub(r'^>', '>', text)) + lines = text.split('\n') + result = [] - first_line_index, _ = to_quote_list[0] - text_lines[first_line_index] = ( - f"{joined_lines}" - ) - - for line_to_remove in to_quote_list[1:]: - text_lines[line_to_remove[0]] = None - - to_quote_list.clear() - - # Handle Expandable Quote - inside_blockquote = False - for index, line in enumerate(text_lines): - if line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM) and not inside_blockquote: - delim_stripped_line = line[3:] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) - - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) - - inside_blockquote = True - continue - - elif line.endswith(BLOCKQUOTE_EXPANDABLE_END_DELIM) and inside_blockquote: - delim_stripped_line = line[:-3] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) - - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) - - inside_blockquote = False - - create_blockquote(quote_type=" expandable") - - if inside_blockquote: - parsed_line = html.escape(line) if strict else line - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) - - # Handle Single line/Continued Quote - for index, line in enumerate(text_lines): - if line is None: - continue - - if line.startswith(BLOCKQUOTE_ESCAPE_DELIM): - text_lines[index] = line[1:] - create_blockquote() - continue + in_blockquote = False + for line in lines: if line.startswith(BLOCKQUOTE_DELIM): - delim_stripped_line = line[1:] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) + if not in_blockquote: + line = re.sub(r'^> ', OPENING_TAG.format("blockquote"), line) + line = re.sub(r'^>', OPENING_TAG.format("blockquote"), line) + in_blockquote = True + result.append(line.strip()) + else: + result.append(line[1:].strip()) + elif line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM): + if not in_blockquote: + line = re.sub(r'^\*\*> ', OPENING_TAG.format("blockquote expandable"), line) + line = re.sub(r'^\*\*>', OPENING_TAG.format("blockquote expandable"), line) + in_blockquote = True + result.append(line.strip()) + else: + result.append(line[3:].strip()) + else: + if in_blockquote: + line = CLOSING_TAG.format("blockquote") + line + in_blockquote = False + result.append(line) - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) + if in_blockquote: + line = result[len(result)-1] + CLOSING_TAG.format("blockquote") + result.pop(len(result)-1) + result.append(line) - elif len(to_quote_list) > 0: - create_blockquote() - else: - create_blockquote() + return '\n'.join(result) + async def parse(self, text: str, strict: bool = False): if strict: - for idx, line in enumerate(text_lines): - if idx not in html_escaped_list: - text_lines[idx] = html.escape(line) - - return "\n".join( - [valid_line for valid_line in text_lines if valid_line is not None] - ) + text = html.escape(text) + text = self.blockquote_parser(text) - async def parse(self, text: str, strict: bool = False): - text = self.escape_and_create_quotes(text, strict=strict) delims = set() is_fixed_width = False + placeholders = {} + for i, code_section in enumerate(CODE_TAG_RE.findall(text)): + placeholder = f"{{CODE_SECTION_{i}}}" + placeholders[placeholder] = code_section + text = text.replace(code_section, placeholder, 1) + for i, match in enumerate(re.finditer(MARKDOWN_RE, text)): start, _ = match.span() - delim, is_emoji, text_url, url = match.groups() + delim = match.group(1) full = match.group(0) if delim in FIXED_WIDTH_DELIMS: @@ -185,20 +126,6 @@ async def parse(self, text: str, strict: bool = False): if is_fixed_width and delim not in FIXED_WIDTH_DELIMS: continue - if not is_emoji and text_url: - text = utils.replace_once( - text, full, URL_MARKUP.format(url, text_url), start - ) - continue - - if is_emoji: - emoji = text_url - emoji_id = url.lstrip("tg://emoji?id=") - text = utils.replace_once( - text, full, EMOJI_MARKUP.format(emoji_id, emoji), start - ) - continue - if delim == BOLD_DELIM: tag = "b" elif delim == ITALIC_DELIM: @@ -224,109 +151,113 @@ async def parse(self, text: str, strict: bool = False): tag = CLOSING_TAG.format(tag) if delim == PRE_DELIM and delim in delims: - delim_and_language = text[text.find(PRE_DELIM) :].split("\n")[0] - language = delim_and_language[len(PRE_DELIM) :] - text = utils.replace_once( - text, delim_and_language, f'
', start
-                )
+                delim_and_language = text[text.find(PRE_DELIM):].split("\n")[0]
+                language = delim_and_language[len(PRE_DELIM):]
+                text = utils.replace_once(text, delim_and_language, f'
', start)
                 continue
 
             text = utils.replace_once(text, delim, tag, start)
 
+        for i, match in enumerate(re.finditer(URL_RE, text)):
+            start, _ = match.span()
+            is_emoji, text_url, url = match.groups()
+            full = match.group(0)
+
+            if not is_emoji and text_url:
+                text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
+                continue
+
+            if is_emoji:
+                emoji = text_url
+                emoji_id = url.lstrip("tg://emoji?id=")
+                text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
+                continue
+
+        for placeholder, code_section in placeholders.items():
+            text = text.replace(placeholder, code_section)
+
         return await self.html.parse(text)
 
     @staticmethod
     def unparse(text: str, entities: list):
+        """
+        Performs the reverse operation to .parse(), effectively returning
+        markdown-like syntax given a normal text and its MessageEntity's.
+
+        :param text: the text to be reconverted into markdown.
+        :param entities: list of MessageEntity's applied to the text.
+        :return: a markdown-like text representing the combination of both inputs.
+        """
+        delimiters = {
+            MessageEntityType.BOLD: BOLD_DELIM,
+            MessageEntityType.ITALIC: ITALIC_DELIM,
+            MessageEntityType.UNDERLINE: UNDERLINE_DELIM,
+            MessageEntityType.STRIKETHROUGH: STRIKE_DELIM,
+            MessageEntityType.CODE: CODE_DELIM,
+            MessageEntityType.PRE: PRE_DELIM,
+            MessageEntityType.BLOCKQUOTE: BLOCKQUOTE_DELIM,
+            MessageEntityType.EXPANDABLE_BLOCKQUOTE: BLOCKQUOTE_EXPANDABLE_DELIM,
+            MessageEntityType.SPOILER: SPOILER_DELIM
+        }
+
         text = utils.add_surrogates(text)
 
-        entities_offsets = []
-
-        for entity in entities:
-            entity_type = entity.type
-            start = entity.offset
-            end = start + entity.length
-
-            if entity_type == MessageEntityType.BOLD:
-                start_tag = end_tag = BOLD_DELIM
-            elif entity_type == MessageEntityType.ITALIC:
-                start_tag = end_tag = ITALIC_DELIM
-            elif entity_type == MessageEntityType.UNDERLINE:
-                start_tag = end_tag = UNDERLINE_DELIM
-            elif entity_type == MessageEntityType.STRIKETHROUGH:
-                start_tag = end_tag = STRIKE_DELIM
-            elif entity_type == MessageEntityType.CODE:
-                start_tag = end_tag = CODE_DELIM
-            elif entity_type == MessageEntityType.PRE:
-                language = getattr(entity, "language", "") or ""
-                start_tag = f"{PRE_DELIM}{language}\n"
-                end_tag = f"\n{PRE_DELIM}"
-            elif entity_type == MessageEntityType.BLOCKQUOTE:
-                start_tag = BLOCKQUOTE_DELIM + " "
-                end_tag = ""
-                blockquote_text = text[start:end]
-                lines = blockquote_text.split("\n")
-                last_length = 0
-                for line in lines:
-                    if len(line) == 0 and last_length == end:
-                        continue
-                    start_offset = start + last_length
-                    last_length = last_length + len(line)
-                    end_offset = start_offset + last_length
-                    entities_offsets.append(
-                        (
-                            start_tag,
-                            start_offset,
-                        )
-                    )
-                    entities_offsets.append(
-                        (
-                            end_tag,
-                            end_offset,
-                        )
-                    )
-                    last_length = last_length + 1
-                continue
-            elif entity_type == MessageEntityType.EXPANDABLE_BLOCKQUOTE:
-                start_tag = BLOCKQUOTE_EXPANDABLE_DELIM + " "
-                end_tag = " " + BLOCKQUOTE_EXPANDABLE_END_DELIM
-            elif entity_type == MessageEntityType.SPOILER:
-                start_tag = end_tag = SPOILER_DELIM
-            elif entity_type == MessageEntityType.TEXT_LINK:
-                url = entity.url
-                start_tag = "["
-                end_tag = f"]({url})"
-            elif entity_type == MessageEntityType.TEXT_MENTION:
-                user = entity.user
-                start_tag = "["
-                end_tag = f"](tg://user?id={user.id})"
-            elif entity_type == MessageEntityType.CUSTOM_EMOJI:
-                emoji_id = entity.custom_emoji_id
-                start_tag = "!["
-                end_tag = f"](tg://emoji?id={emoji_id})"
+        insert_at = []
+        for i, entity in enumerate(entities):
+            s = entity.offset
+            e = entity.offset + entity.length
+            delimiter = delimiters.get(entity.type, None)
+            if delimiter:
+                if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
+                    open_delimiter = delimiter
+                    close_delimiter = delimiter
+                    if entity.type == MessageEntityType.PRE:
+                        close_delimiter = '\n' + delimiter
+                        if entity.language:
+                            open_delimiter += entity.language + '\n'
+                        else:
+                            open_delimiter += '\n'
+                    insert_at.append((s, i, open_delimiter))
+                    insert_at.append((e, -i, close_delimiter))
+                else:
+                    # Handle multiline blockquotes
+                    text_subset = text[s:e]
+                    lines = text_subset.splitlines()
+                    for line_num, line in enumerate(lines):
+                        line_start = s + sum(len(l) + 1 for l in lines[:line_num])
+                        if entity.type == MessageEntityType.EXPANDABLE_BLOCKQUOTE:
+                            insert_at.append((line_start, i, BLOCKQUOTE_EXPANDABLE_DELIM))
+                        else:
+                            insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
+                    # No closing delimiter for blockquotes
             else:
-                continue
-
-            entities_offsets.append(
-                (
-                    start_tag,
-                    start,
-                )
-            )
-            entities_offsets.append(
-                (
-                    end_tag,
-                    end,
-                )
-            )
-
-        entities_offsets = map(
-            lambda x: x[1],
-            sorted(
-                enumerate(entities_offsets), key=lambda x: (x[1][1], x[0]), reverse=True
-            ),
-        )
-
-        for entity, offset in entities_offsets:
-            text = text[:offset] + entity + text[offset:]
+                url = None
+                is_emoji = False
+                if entity.type == MessageEntityType.TEXT_LINK:
+                    url = entity.url
+                elif entity.type == MessageEntityType.TEXT_MENTION:
+                    url = f'tg://user?id={entity.user.id}'
+                elif entity.type == MessageEntityType.CUSTOM_EMOJI:
+                    url = f"tg://emoji?id={entity.custom_emoji_id}"
+                    is_emoji = True
+                if url:
+                    if is_emoji:
+                        insert_at.append((s, i, '!['))
+                    else:
+                        insert_at.append((s, i, '['))
+                    insert_at.append((e, -i, f']({url})'))
+
+        insert_at.sort(key=lambda t: (t[0], t[1]))
+        while insert_at:
+            at, _, what = insert_at.pop()
+
+            # If we are in the middle of a surrogate nudge the position by -1.
+            # Otherwise we would end up with malformed text and fail to encode.
+            # For example of bad input: "Hi \ud83d\ude1c"
+            # https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
+            while utils.within_surrogate(text, at):
+                at += 1
+
+            text = text[:at] + what + text[at:]
 
         return utils.remove_surrogates(text)
diff --git a/pyrogram/parser/parser.py b/pyrogram/parser/parser.py
index 0ce2b2375..e2de12144 100644
--- a/pyrogram/parser/parser.py
+++ b/pyrogram/parser/parser.py
@@ -1,5 +1,5 @@
 #  Pyrogram - Telegram MTProto API Client Library for Python
-#  Copyright (C) 2017-present Dan 
+#  Copyright (C) 2017-present 
 #
 #  This file is part of Pyrogram.
 #

From 1d5bb900d81896c4e5ec3df23f6fbf7ae05dbe5e Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Mon, 3 Mar 2025 17:17:40 +0100
Subject: [PATCH 4/5] fix

---
 pyrogram/parser/markdown.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index d98cc31f3..2cd784fcf 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -181,6 +181,9 @@ async def parse(self, text: str, strict: bool = False):
     @staticmethod
     def unparse(text: str, entities: list):
         """
+
+        https://github.com/LonamiWebs/Telethon/blob/141b620/telethon/extensions/markdown.py#L137-L193
+
         Performs the reverse operation to .parse(), effectively returning
         markdown-like syntax given a normal text and its MessageEntity's.
 

From 34ca4e783749132021f317b177c84e6d8f9f4ef0 Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Sun, 9 Mar 2025 12:03:22 +0100
Subject: [PATCH 5/5] markdown: Check if PRE is inside blockquote before
 unparsing it

Co-Authored-By: wulan17 
---
 pyrogram/parser/markdown.py | 41 +++++++++++++++++++++++++++++--------
 pyrogram/parser/utils.py    |  1 -
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index 2cd784fcf..aa2fbf7fd 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -181,7 +181,6 @@ async def parse(self, text: str, strict: bool = False):
     @staticmethod
     def unparse(text: str, entities: list):
         """
-
         https://github.com/LonamiWebs/Telethon/blob/141b620/telethon/extensions/markdown.py#L137-L193
 
         Performs the reverse operation to .parse(), effectively returning
@@ -211,15 +210,41 @@ def unparse(text: str, entities: list):
             e = entity.offset + entity.length
             delimiter = delimiters.get(entity.type, None)
             if delimiter:
-                if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
+                if entity.type == MessageEntityType.PRE:
+                    inside_blockquote = any(
+                        blk_entity.offset <= s < blk_entity.offset + blk_entity.length and
+                        blk_entity.offset < e <= blk_entity.offset + blk_entity.length
+                        for blk_entity in entities
+                        if blk_entity.type == MessageEntityType.BLOCKQUOTE
+                    )
+                    is_expandable = any(
+                        blk_entity.offset <= s < blk_entity.offset + blk_entity.length and
+                        blk_entity.offset < e <= blk_entity.offset + blk_entity.length and
+                        blk_entity.collapsed
+                        for blk_entity in entities
+                        if blk_entity.type == MessageEntityType.BLOCKQUOTE
+                    )
+                    if inside_blockquote:
+                        if is_expandable:
+                            if entity.language:
+                                open_delimiter = f"{delimiter}{entity.language}\n**>"
+                            else:
+                                open_delimiter = f"{delimiter}\n**>"
+                            close_delimiter = f"\n**>{delimiter}"
+                        else:
+                            if entity.language:
+                                open_delimiter = f"{delimiter}{entity.language}\n>"
+                            else:
+                                open_delimiter = f"{delimiter}\n>"
+                            close_delimiter = f"\n>{delimiter}"
+                    else:
+                        open_delimiter = delimiter
+                        close_delimiter = delimiter
+                    insert_at.append((s, i, open_delimiter))
+                    insert_at.append((e, -i, close_delimiter))
+                elif entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
                     open_delimiter = delimiter
                     close_delimiter = delimiter
-                    if entity.type == MessageEntityType.PRE:
-                        close_delimiter = '\n' + delimiter
-                        if entity.language:
-                            open_delimiter += entity.language + '\n'
-                        else:
-                            open_delimiter += '\n'
                     insert_at.append((s, i, open_delimiter))
                     insert_at.append((e, -i, close_delimiter))
                 else:
diff --git a/pyrogram/parser/utils.py b/pyrogram/parser/utils.py
index 42a23348a..e01197694 100644
--- a/pyrogram/parser/utils.py
+++ b/pyrogram/parser/utils.py
@@ -43,7 +43,6 @@ def replace_once(source: str, old: str, new: str, start: int):
 
 def within_surrogate(text, index, *, length=None):
     """
-    
     https://github.com/LonamiWebs/Telethon/blob/63d9b26/telethon/helpers.py#L52-L63
 
     `True` if ``index`` is within a surrogate (before and after it, not at!).