Skip to content
This repository was archived by the owner on Apr 21, 2025. It is now read-only.

Commit 4c12b63

Browse files
committed
Updated library
1 parent ef9e154 commit 4c12b63

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
],
2929
},
3030
install_requires=[
31-
"html2text>=2020.1.16",
31+
"html2markdown>=0.1.7",
3232
"mdv>=1.7.4",
3333
"tqdm==4.48.0",
3434
"txtai>=2.0.0"

src/python/codequestion/query.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
import argparse
66
import os
77
import os.path
8+
import re
89
import sqlite3
910

10-
import html2text
11+
import html2markdown
1112
import mdv
1213

1314
from txtai.embeddings import Embeddings
@@ -20,6 +21,27 @@ class Query(object):
2021
Methods to query an embeddings index.
2122
"""
2223

24+
@staticmethod
25+
def markdown(text):
26+
"""
27+
Converts html text to markdown.
28+
29+
Args:
30+
text: html text
31+
32+
Returns:
33+
text as markdown
34+
"""
35+
36+
# Remove rel attributes as they are not supported by html2markdown
37+
text = re.sub(r' rel=".+?">', ">", text)
38+
39+
# Convert html to markdown
40+
text = html2markdown.convert(text)
41+
42+
# Decode [<>&] characters
43+
return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
44+
2345
@staticmethod
2446
def escape(text):
2547
"""
@@ -69,11 +91,8 @@ def render(text, html=True, tab_length=0):
6991
"""
7092

7193
if html:
72-
# Convert HTML
73-
parser = html2text.HTML2Text()
74-
parser.body_width = 0
75-
text = parser.handle(text)
76-
94+
# Convert html to markdown
95+
text = Query.markdown(text)
7796
text = Query.escape(text)
7897

7998
text = mdv.main(text, theme="592.2129", c_theme="953.3567", cols=180, tab_length=tab_length)

0 commit comments

Comments
 (0)