Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
19ba24b
more saas changes
destitutus Nov 27, 2019
1e76009
render turingwinner
destitutus Dec 1, 2019
8369ee0
toc case
destitutus Dec 1, 2019
6fa0920
fix after tex structure change
mint87 Jan 14, 2020
e696845
fix after tex structure change
mint87 Jan 15, 2020
c8c32ec
Merge pull request #37 from codio/10926_fix_saas_book_convertation
Feb 5, 2020
d77f550
11080 fix some more problems (#40)
Apr 27, 2020
f681f9d
Some more fixes (#41)
May 7, 2020
9dea71e
fixed parse chapters with line break
mint87 May 13, 2020
5b694f8
fixed extra line breaks in self-check question
mint87 May 13, 2020
5f5c4e4
Merge remote-tracking branch 'origin/more_saas_changes' into more_saa…
mint87 May 13, 2020
8926e61
fix checkyourself
mint87 May 15, 2020
cafd4b9
11432 tabularline (#42)
May 15, 2020
d87b250
some fixes (#44)
May 19, 2020
38c2fd7
changed dpi for pdf2image converter (#45)
May 20, 2020
b9a8af9
fixed display equation (#48)
May 21, 2020
dca0a84
11525 fix self check (#47)
May 21, 2020
429d0c9
fixed invalid content (keep escaping slash for some chars) (#49)
May 22, 2020
6eccecc
11533 numbered list addcounter (#51)
May 26, 2020
b2d7220
11534 some more random tags (#50)
Jun 2, 2020
08a9661
added ref for num-list items (#52)
Jun 3, 2020
94f35e1
changed dpi for pdf2image
mint87 Jun 3, 2020
9d68e6d
11409 fonts (#54)
Jun 6, 2020
ef77dc5
Small fixes (#55)
Jun 8, 2020
db0b479
11545 book name (#53)
Jun 8, 2020
d4a5515
fix broken references (looks like sec:ruby:ruby_idioms) (#56)
Jun 10, 2020
c559d23
removed extra whitespace in author quotation (#57)
Jul 14, 2020
f0766b3
changes for sectionfile regex (#58)
Jul 14, 2020
6f27203
clear extra tags
mint87 Jul 20, 2020
2640c50
11719 some fixes (#59)
Jul 23, 2020
319c3b5
added escaping for specific chars (#67)
Sep 16, 2020
f14780b
fix \small
mint87 Sep 16, 2020
3f89eb7
replace \index{} (#60)
Sep 18, 2020
beafd77
fixed some specific cases (#70)
Oct 20, 2020
6319ab5
12023 broken figure num (#71)
Nov 30, 2020
949fc4a
fix in figure
sergei-bronnikov Jan 28, 2021
ef86620
Merge remote-tracking branch 'origin/master' into more_saas_changes
ksimuk Jan 28, 2021
2445291
fix
ksimuk Jan 28, 2021
45b074c
fix merge
sergei-bronnikov Jan 29, 2021
7ed18ec
fix werker
sergei-bronnikov Jan 29, 2021
3fac3cd
12366 fix issues (#76)
Feb 16, 2021
8c2ecf2
Update for competencies
jairovelasquez Jul 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion converter/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ def _convert_assets(config, generate_dir, pdfs_for_convert, convert_from_path, b
dst_folder.mkdir(exist_ok=True, parents=True)

try:
pages = convert_from_path(pdf_file, 500)
pages = convert_from_path(pdf_file, 300)
if pages:
image = Path(pdf.replace('.pdf', '.jpg'))
page = pages[0]
page.save(dst_folder.joinpath(image.name), 'JPEG')
except KeyboardInterrupt as e:
raise e
except BaseException as e:
logging.error("convert %s to jpg error" % pdf)
logging.error(e)
Expand Down
10 changes: 5 additions & 5 deletions converter/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ def prepare_codio_rules(config):
def cleanup_latex(lines):
updated = []
starts = (
'%', '\\index{', '\\label{', '\\markboth{', '\\addcontentsline{',
'\\vspace', '\\newpage', '\\noindent',
'\\ttfamily', '\\chapter', '\\section', '\\newcommand', '\\vfill', '\\pagebreak'
'%', '\\label{', '\\markboth{', '\\addcontentsline{',
'\\vspace', '\\newpage', '\\vfill', '\\pagebreak',
'\\ttfamily', '\\chapter', '\\section', '\\newcommand'
)
for line in lines:
if line.startswith(starts):
continue
updated.append(line)
updated.append(line.rstrip('\n'))
return updated


Expand Down Expand Up @@ -191,7 +191,7 @@ def prepare_structure(generate_dir):

def make_metadata_items(config):
book = {
"name": "TODO: book name",
"name": config.get("name"),
"children": []
}
metadata = {
Expand Down
19 changes: 18 additions & 1 deletion converter/latex2markdown.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import uuid
import re

from converter.markdown.del_icons_description import DelIconsDescription
from converter.markdown.equation import Equation
from converter.markdown.inline_code_block import InlineCodeBlock
from converter.markdown.code_block import CodeBlock
from converter.markdown.bold import Bold
Expand All @@ -24,6 +26,7 @@
from converter.markdown.cleanup import Cleanup
from converter.markdown.exercise import Exercise
from converter.markdown.figure import Figure
from converter.markdown.competency import Competency
from converter.markdown.refs import Refs
from converter.markdown.sidebar import Sidebar
from converter.markdown.eqnarray import EqnArray
Expand All @@ -42,7 +45,10 @@
from converter.markdown.screencast import Screencast
from converter.markdown.tabularx import Tabularx
from converter.markdown.tabular import Tabular
from converter.markdown.tags import Tags
from converter.markdown.textfigure import Textfigure
from converter.markdown.unescape import UnEscape
from converter.markdown.turingwinner import TuringWinner


class LaTeX2Markdown(object):
Expand Down Expand Up @@ -74,6 +80,7 @@ def increment_figure_counter(self, figure_counter):
def _latex_to_markdown(self):
output = self._latex_string

output = DelIconsDescription(output).convert()
output, figure_counter = TableFigure(
output, self._caret_token, self._load_workspace_file,
self._figure_counter_offset, self._chapter_num, self._refs
Expand All @@ -86,6 +93,7 @@ def _latex_to_markdown(self):
output = Ignore(output).convert()
output = SaasSpecific(output, self._caret_token).convert()
output = ItalicBold(output).convert()
output = Equation(output, self._caret_token).convert()
output, source_codes = CodeBlock(
output, self._percent_token, self._caret_token, self._remove_trinket
).convert()
Expand All @@ -106,10 +114,12 @@ def _latex_to_markdown(self):
self._source_codes.extend(source_codes)
output = re.sub(r"\\%", self._percent_token, output)
output = InlineCodeBlock(output, self._percent_token).convert()
output = Textfigure(output, self._caret_token).convert()

# remove comments
output = RemoveComments(output).convert()
output = Quotation(output, self._caret_token).convert()
output = Competency(output, self._caret_token).convert()
output = Paragraph(output).convert_without_tags()
output = Refs(output, self._refs).convert()
output = Links(output).convert()
Expand All @@ -123,6 +133,9 @@ def _latex_to_markdown(self):
output = PitFall(output, self._caret_token).convert()
output = Summary(output, self._caret_token).convert()
output = Chips(output, self._caret_token).convert()
output, images = TuringWinner(output, self._caret_token, self._detect_asset_ext,).convert()
if images:
self._pdfs.extend(images)
output = Cleanup(output).convert()

output, images, figure_counter = PicFigure(
Expand All @@ -134,7 +147,8 @@ def _latex_to_markdown(self):
if images:
self._pdfs.extend(images)
output, images, figure_counter = Figure(
output, self._figure_counter_offset, self._chapter_num, self._detect_asset_ext, self._caret_token
output, self._figure_counter_offset, self._chapter_num, self._detect_asset_ext,
self._caret_token, self._refs
).convert()
if images:
self._pdfs.extend(images)
Expand Down Expand Up @@ -164,8 +178,11 @@ def _latex_to_markdown(self):
output = Center(output, self._caret_token).convert()

output = UnEscape(output).convert()
output = Tags(output).convert()
output = NewLine(output).convert()

output = re.sub(r"\n? *\\label{.*?}", r"", output)

# convert all matched % back
output = re.sub(self._percent_token, "%", output)
output = re.sub(self._caret_token, "\n", output)
Expand Down
5 changes: 2 additions & 3 deletions converter/markdown/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,8 @@ def _format_block_contents(self, block_name, block_contents):
for line in block_contents.lstrip().rstrip().split("\n"):
line = line.lstrip().rstrip()
line = line.replace("\\\\", "<br/>")
indented_line = line_indent_char + line + self._caret_token
output_str += indented_line
return output_str
output_str += f'{line} '
return line_indent_char + output_str

def _format_block_name(self, block_name, block_title=None):
block_config = self._block_configuration[block_name]
Expand Down
5 changes: 3 additions & 2 deletions converter/markdown/block_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ def match_block(chars, output, repl_func):
level = 0 if '{' in chars else 1
for index in range(pos + len(chars), len(output), 1):
ch = output[index]
if ch == '}':
prev_char = output[index - 1]
if ch == '}' and prev_char != '\\':
if level == 0:
start_position = pos+len(chars) if '{' in chars else output.find("{", pos) + 1
output = output[0:pos] + repl_func(output[start_position:index]) + output[index + 1:]
break
else:
level += 1
elif ch == '{':
elif ch == '{' and prev_char != '\\':
level -= 1
pos = output.find(chars)
return output
12 changes: 6 additions & 6 deletions converter/markdown/bold.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def __init__(self, latex_str):

def convert(self):
output = self.str
output = re.sub(r"\\textbf{(.*?)}", r"**\1**", output, flags=re.DOTALL + re.VERBOSE)
output = re.sub(r"{\\bf[ ](.*?)}", r"**\1**", output, flags=re.DOTALL + re.VERBOSE)
output = re.sub(r"{\\sf[ ](.*?)}", r"**\1**", output, flags=re.DOTALL + re.VERBOSE)
output = re.sub(r"\\textbf{(.*?)}", r"<b>\1</b>", output, flags=re.DOTALL + re.VERBOSE)
output = re.sub(r"{\\bf[ ](.*?)}", r"<b>\1</b>", output, flags=re.DOTALL + re.VERBOSE)
output = re.sub(r"{\\sf[ ](.*?)}", r"<b>\1</b>", output, flags=re.DOTALL + re.VERBOSE)

output = match_block("\\B{", output, lambda match: f"**{match}**")
output = match_block("\\C{", output, lambda match: f"**{match}**")
output = match_block("\\T{", output, lambda match: f"**{match}**")
output = match_block("\\B{", output, lambda match: f"<b>{match}</b>")
output = match_block("\\C{", output, lambda match: f"<b>{match}</b>")
output = match_block("\\T{", output, lambda match: f"<b>{match}</b>")

return output
3 changes: 2 additions & 1 deletion converter/markdown/center.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from converter.markdown.text_as_paragraph import TextAsParagraph

center_re = re.compile(r"""\\begin{center}(?P<block_contents>.*?)\\end{center}""", flags=re.DOTALL + re.VERBOSE)
center_re = re.compile(r"""\\begin{(center|centering)}(?P<block_contents>.*?)\\end{(center|centering)}""",
flags=re.DOTALL + re.VERBOSE)


class Center(TextAsParagraph):
Expand Down
11 changes: 9 additions & 2 deletions converter/markdown/checkyourself.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,20 @@ def make_answer_block(self, matchobj):
answer_block_contents = matchobj.group('answer_block_contents')
answer_block_contents = answer_block_contents.replace("\\\\", "<br/>")
answer_block_contents = self.to_paragraph(answer_block_contents)
return '<details><summary>Check yourself</summary>{}</details>'.format(answer_block_contents)
caret_token = self._caret_token
return f'{caret_token}<p><details><summary>Check yourself</summary>' \
f'{caret_token}{caret_token}{answer_block_contents}</details></p>'

def make_block(self, matchobj):
block_contents = matchobj.group('block_contents')
block_contents = re.sub(r"\s*\n\s*", " ", block_contents).strip()
block_contents = block_contents.replace("\\\\", "<br/>")
block_contents = re.sub(r"\n? *\\label{.*?} *", r"", block_contents)
answer_str = answer_re.sub(self.make_answer_block, block_contents)
caret_token = self._caret_token
return f'{caret_token}|||challenge{caret_token}{answer_str}{caret_token}|||{caret_token}'
answer_str = re.sub(r"\\{", r"{", answer_str)
answer_str = re.sub(r"\\}", r"}", answer_str)
return f'{caret_token}|||challenge{caret_token}{answer_str}{caret_token}{caret_token}|||{caret_token}'

def convert(self):
return checkyourself_re.sub(self.make_block, self.str)
11 changes: 7 additions & 4 deletions converter/markdown/chips.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@

from converter.markdown.text_as_paragraph import TextAsParagraph

chips_re = re.compile(r"""\\begin{chips}{(?P<title>.*?)}(?P<block_contents>.*?)\\end{chips}""",
flags=re.DOTALL + re.VERBOSE)
chips_re = re.compile(r"""\\begin{chips}{(?P<title>.*?)}({(?P<ref>.*?)})?(?P<block_contents>.*?)\\end{chips}""",
flags=re.DOTALL)


class Chips(TextAsParagraph):
def __init__(self, latex_str, caret_token):
super().__init__(latex_str, caret_token)

def make_block(self, matchobj):
caret_token = self._caret_token
block_contents = matchobj.group('block_contents')
block_contents = self.to_paragraph(block_contents)
title = matchobj.group('title')
title = self.to_paragraph(title)
caret_token = self._caret_token
return f'## {title}{caret_token}{block_contents}{caret_token}'
ref = matchobj.group('ref')
if ref is not None:
return f'## {title}{caret_token}{ref}{caret_token}{caret_token}{block_contents}{caret_token}'
return f'## {title}{caret_token}{caret_token}{block_contents}{caret_token}'

def convert(self):
return chips_re.sub(self.make_block, self.str)
24 changes: 17 additions & 7 deletions converter/markdown/cite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,21 @@
from converter.markdown.block_matcher import match_block
from converter.guides.tools import get_text_in_brackets

cite_re = re.compile(r"""~?\\cite{(?P<ref>.*?)}""", flags=re.DOTALL + re.VERBOSE)
cite_re = re.compile(r"""\\cite{(?P<ref>.*?)}""", flags=re.DOTALL + re.VERBOSE)

bib_re = re.compile(r"""@(?P<type>.*?){(?P<ref>.*?),""", flags=re.DOTALL + re.VERBOSE)


def clean_text(string):
string = string.replace("{", "").replace("}", "")
string = string.replace("\\&", "&")
string = string.replace("`", "'")
string = string.replace("---", "-")
string = string.replace("\\", "")
string = string.replace("o{}", "o")
return string


class Cite(object):
_bib_file = None
_bib_entries = []
Expand All @@ -34,13 +44,13 @@ def __init__(self, latex_str, load_workspace_file):

def make_bib_content(line):
bib_entry = {}
for item in line.split(',\n'):
for item in re.split(r',\s*\n', line):
item = item.strip()
sub_items = item.split('=', 1)
if len(sub_items) > 1:
value = sub_items[1].strip()
key = sub_items[0].strip().lower()
if value.startswith('{'):
if value.startswith('{') or value.startswith('"{') or value.startswith('\'{'):
value = get_text_in_brackets(value)
value = ' '.join(value.split('\n'))
bib_entry[key] = value
Expand All @@ -65,13 +75,13 @@ def get_bib_text(self, ref):
bib_item = Cite._bib_file.get(ref.lower(), {})
if bib_item:
if bib_item.get('title') and bib_item.get('author'):
author = bib_item.get('author')
title = bib_item.get('title')
author = clean_text(bib_item.get('author'))
title = clean_text(bib_item.get('title'))
return f'<abbr title="{title}">{author}</abbr>'
elif bib_item.get('title'):
return bib_item.get('title')
return clean_text(bib_item.get('title'))
elif bib_item.get('author'):
return bib_item.get('author')
return clean_text(bib_item.get('author'))
return ref

def make_block(self, matchobj):
Expand Down
6 changes: 3 additions & 3 deletions converter/markdown/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def convert(self):

output = re.sub(r"\\'{(.*?)}", r"\1&#x301;", output)

output = re.sub(r"(.*?)(~)(.*?)", r"\1 \3", output)
output = re.sub(r"(~)(.*?)", r" \2", output)
output = re.sub(r"(.*?)(~)", r"\1 ", output)
output = re.sub(r"(.*?)(?:(?<!\\)(~))(.*?)", r"\1 \3", output)
output = re.sub(r"(?:(?<!\\)(~))(.*?)", r" \2", output)
output = re.sub(r"(.*?)(?:(?<!\\)(~))", r"\1 ", output)

return output
2 changes: 1 addition & 1 deletion converter/markdown/codefilefigure.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def make_block(self, matchobj):
self._refs.get(label).get('ref')
)

return f'{caret_token}{caption}{caret_token}**source:{file_path}**{caret_token}' \
return f'{caret_token}{caption}{caret_token}**source: {file_path}**{caret_token}' \
f'```code{caret_token}{file_content}{caret_token}```{caret_token}{replace_token}'

def remove_matched_token(self, output, chars):
Expand Down
21 changes: 21 additions & 0 deletions converter/markdown/competency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import re

from converter.markdown.block_matcher import match_block
from converter.markdown.text_as_paragraph import TextAsParagraph

competency_re = re.compile(r"""\\competency(\[(.*?)\])?{(?P<block_contents>.*?)}""",
flags=re.DOTALL + re.VERBOSE)

class Competency(TextAsParagraph):
def __init__(self, latex_str, caret_token):
super().__init__(latex_str, caret_token)

def make_block(self, matchobj):
block_contents = matchobj.group('block_contents')
block_contents = re.sub(r"\s*\n\s*", " ", block_contents).strip()
block_contents = block_contents.replace("\\\\", "<br/>")
caret_token = self._caret_token
return f'{caret_token}|||topic{caret_token}## Competency{caret_token}{block_contents}{caret_token}{caret_token}|||{caret_token}'

def convert(self):
return competency_re.sub(self.make_block, self.str)
14 changes: 14 additions & 0 deletions converter/markdown/del_icons_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import re


class DelIconsDescription(object):
def __init__(self, latex_str):
self.str = latex_str

def convert(self):
output = self.str

output = re.sub(r"\s*We also use.*?look them up.\s*", "", output)
output = re.sub(r"\\tablefigure{ch_intro/tables/icons_table}{.*?}{.*?}", "", output)

return output
4 changes: 2 additions & 2 deletions converter/markdown/elaboration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from converter.markdown.text_as_paragraph import TextAsParagraph

elaboration_re = re.compile(r"""\\begin{elaboration}{(?P<title>.*?)}(?P<block_contents>.*?)\\end{elaboration}""",
elaboration_re = re.compile(r"""(\s+)?\\begin{elaboration}{(?P<title>.*?)}(?P<block_contents>.*?)\\end{elaboration}""",
flags=re.DOTALL + re.VERBOSE)


Expand All @@ -16,7 +16,7 @@ def make_block(self, matchobj):
title = self.to_paragraph(title)
block_contents = self.to_paragraph(block_contents)
caret_token = self._caret_token
return f'## {title}{caret_token}{block_contents}'
return f'{caret_token}## {title}{caret_token}{block_contents}{caret_token}'

def convert(self):
return elaboration_re.sub(self.make_block, self.str)
19 changes: 19 additions & 0 deletions converter/markdown/equation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import re

equation_re = re.compile(r"""\\begin{equation}(?P<block_contents>.*?)\\end{equation}""", flags=re.DOTALL)


class Equation(object):
def __init__(self, latex_str, caret_token):
self._caret_token = caret_token
self.str = latex_str

def make_block(self, matchobj):
block_contents = matchobj.group('block_contents')
block_contents = block_contents.strip()
caret_token = self._caret_token
return f'{caret_token}<center>{caret_token}$${caret_token}{block_contents}' \
f'{caret_token}$${caret_token}</center>{caret_token}'

def convert(self):
return equation_re.sub(self.make_block, self.str)
Loading