Skip to content

Commit

Permalink
Remove ruby dependency from manpage build
Browse files Browse the repository at this point in the history
  • Loading branch information
wtlangford authored and nicowilliams committed Feb 26, 2019
1 parent c1f1185 commit 175dbc4
Show file tree
Hide file tree
Showing 5 changed files with 302 additions and 36 deletions.
4 changes: 2 additions & 2 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ install-binaries: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) install-exec

DOC_FILES = docs/content docs/public docs/templates docs/site.yml \
docs/Pipfile docs/Pipfile.lock docs/build_website.py \
docs/README.md jq.1.prebuilt
docs/Pipfile docs/Pipfile.lock docs/build_manpage.py \
docs/build_manpage.py docs/README.md jq.1.prebuilt

EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
jq.1.prebuilt jq.spec src/lexer.c src/lexer.h src/parser.c \
Expand Down
3 changes: 2 additions & 1 deletion docs/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ verify_ssl = true
[packages]
jinja2 = "*"
pyyaml = "*"
markdown = "*"
markdown = "*"
lxml = "*"
34 changes: 33 additions & 1 deletion docs/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

235 changes: 234 additions & 1 deletion docs/build_manpage.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,2 +1,235 @@
#!/usr/bin/env python3
print("Manpage build not yet supported")
from datetime import date
from io import StringIO
from lxml import etree
import markdown
from markdown.extensions import Extension
import re
import sys
import yaml

# Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags.
class EscapeHtml(Extension):
def extendMarkdown(self, md, md_globals):
del md.preprocessors['html_block']
del md.inlinePatterns['html']

class RoffWalker(object):
def __init__(self, tree, output=sys.stdout):
self.tree = tree
self.target = output
self.f = StringIO()

def walk(self):
self._walk(self.tree, parent_tag=None)
# We don't want to start lines with \. because that can confuse man
# For lines that start with \., we need to prefix them with \& so it
# knows not to treat that line as a directive
data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
self.target.write(data)

def _ul_is_special(self, root):
if len(root) != 1:
return False
child = root[0]
if child.tag != 'li':
return False
msg = ''.join(child.itertext()).strip()
return msg.endswith(':')

def _walk_child(self, root):
if len(root) > 0:
self._walk(root[0], parent_tag=root.tag)

def _write_element(self, root, ensure_newline=True):
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw(text)
self._walk_child(root)
self._write_tail(root, ensure_newline=ensure_newline)

def _write_tail(self, root, ensure_newline=False, inline=False):
if root.tail is not None:
if inline or root.tail != '\n':
text = self._sanitize(root.tail)
if text.endswith('\n'):
ensure_newline = False
self.__write_raw(text)
if ensure_newline:
self.__write_raw('\n')

def _walk(self, root, parent_tag=None):
last_tag = None
while root is not None:
if root.tag == 'h1':
self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(date.today().strftime('%B %Y')))
self.__write_cmd('.SH "NAME"')
# TODO: properly parse this
self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' + "\n")

elif root.tag == 'h2':
self.__write_cmd('.SH "{}"'.format(''.join(root.itertext()).strip()))

elif root.tag == 'h3':
text = ''.join(root.itertext()).strip()
self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))

elif root.tag == 'p':
if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
self.__write_cmd('.P')
self._write_element(root, ensure_newline=(parent_tag != 'li'))

elif root.tag == 'ul':
if self._ul_is_special(root):
li = root[0]
self.__write_cmd('.TP')
self._write_element(li)
next = root.getnext()
while next is not None and next.tag == 'p':
if next.getnext() is not None and next.getnext().tag == 'pre':
# we don't want to .IP these, because it'll look funny with the code indent
break
self.__write_cmd('.IP')
self._write_element(next)
root = next
next = root.getnext()
else:
self._walk_child(root)
self._write_tail(root)
# A pre tag after the end of a list doesn't want two of the indentation commands
if root.getnext() is None or root.getnext().tag != 'pre':
self.__write_cmd('.IP "" 0')

elif root.tag == 'li':
self.__write_cmd(r'.IP "\(bu" 4')
if root.text is not None and root.text.strip() != '':
text = self._sanitize(root.text)
self.__write_raw(text)
self._walk_child(root)
self._write_tail(root, ensure_newline=True)

elif root.tag == 'strong':
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))

self._write_tail(root, inline=True)

elif root.tag == 'em':
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fI{}\\fR'.format(text))
self._write_tail(root, inline=True)

elif root.tag == 'code':
if root.text is not None:
text = self._code_sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))
self._write_tail(root, inline=True)

elif root.tag == 'pre':
self.__write_cmd('.IP "" 4')
self.__write_cmd('.nf\n') # extra newline for spacing reasons
next = root
first = True
while next is not None and next.tag == 'pre':
if not first:
self.__write_raw('\n')
text = ''.join(next.itertext(with_tail=False))
self.__write_raw(self._pre_sanitize(text))
first = False
root = next
next = next.getnext()
self.__write_cmd('.fi')
self.__write_cmd('.IP "" 0')

else:
self._walk_child(root)

last_tag = root.tag
root = root.getnext()

def _base_sanitize(self, text):
text = re.sub(r'\\', r'\\e', text)
text = re.sub(r'\.', r'\\.', text)
text = re.sub("'", r"\'", text)
text = re.sub('-', r'\-', text)
return text

def _pre_sanitize(self, text):
return self._base_sanitize(text)

def _code_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'\s', ' ', text)
return text

def _h3_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(' \n|\n ', ' ', text)
text = re.sub('\n', ' ', text)
return text

def _sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text)
text = re.sub(r' +', ' ', text)
text = re.sub('\n', ' ', text)
return text

def __write_cmd(self, dat):
print('.', dat, sep='\n', file=self.f)
pass

def __write_raw(self, dat):
print(dat, sep='', end='', file=self.f)
pass

def load_yml_file(fn):
with open(fn) as f:
return yaml.load(f)

def dedent_body(body):
lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')]
return '\n'.join(lines)

def convert_manual_to_markdown():
f = StringIO()
manual = load_yml_file("content/manual/manual.yml")
f.write(manual.get('manpage_intro', '\n'))
f.write(dedent_body(manual.get('body', '\n')))
for section in manual.get('sections', []):
f.write('## {}\n'.format(section.get('title', '').upper()))
f.write(dedent_body(section.get('body', '\n')))
f.write('\n')
for entry in section.get('entries', []):
f.write('### {}\n'.format(entry.get('title', '')))
f.write(dedent_body(entry.get('body', '\n')))
f.write('\n')
if entry.get('examples') is not None:
f.write("~~~~\n")
first = True
for example in entry.get('examples'):
if not first:
f.write('\n')
f.write("jq '{}'\n".format(example.get('program', '')))
f.write(" {}\n".format(example.get('input', '')))
output = [str(x) for x in example.get('output', [])]
f.write("=> {}\n".format(', '.join(output)))
first = False
f.write("~~~~\n")
f.write('\n')
f.write(manual.get('manpage_epilogue', ''))
return f.getvalue()

# Convert manual.yml to our special markdown format
markdown_data = convert_manual_to_markdown()

# Convert markdown to html
html_data = markdown.markdown(markdown_data, extensions=[EscapeHtml(), 'fenced_code'])

# Parse the html into a tree so we can walk it
tr = etree.HTML(html_data, etree.HTMLParser())

# Convert the markdown to ROFF
RoffWalker(tr).walk()
Loading

0 comments on commit 175dbc4

Please sign in to comment.