-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathpostlight_parser.py
executable file
·79 lines (69 loc) · 2.14 KB
/
postlight_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
"""Python wrapper of the postlight-parser command line
This requires you've installed Node.js
(https://nodejs.org/en/)
and the postlight-parser
(https://github.com/postlight/parser):
# Install postlight-parser globally
$ yarn global add @postlight/parser
# or
$ npm -g install @postlight/parser
"""
import json
import sys
from reader import HTML2Text, Format, unescape, main
from Naked.toolshed.shell import muterun_js
def postlight_parser(url, parser_cli_path):
"""Wrap the postlight-parser command line driver
url: URL string to parse
mercur_cli_path: path to postlight-parser command line driver
"""
response = muterun_js(
parser_cli_path,
arguments=url
)
if response.exitcode != 0:
print('[ERROR] URL: {}'.format(url), file=sys.stderr)
print('[ERROR]', response.stderr.decode('utf-8'), file=sys.stderr)
sys.exit(response.exitcode)
else:
raw = response.stdout.decode('utf-8')
result = json.loads(raw[raw.find('{'):])
if 'error' in result:
print('[ERROR] URL: {}'.format(url), file=sys.stderr)
print('[ERROR]', result['messages'], file=sys.stderr)
sys.exit(1)
return result
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description=__doc__
)
parser.add_argument(
'url',
help='URL to parse',
)
parser.add_argument(
'-f', '--format',
choices=list(Format.formatter),
default='json',
help='output format'
)
parser.add_argument(
'-w', '--body-width',
type=int,
default=None,
help='character offset at which to wrap lines for plain-text'
)
parser.add_argument(
'-p', '--parser-path',
default='/opt/homebrew/bin/postlight-parser',
help='path to postlight-parser command line driver'
)
args = parser.parse_args()
obj = main(
postlight_parser(args.url, args.parser_path),
args.body_width
)
print(Format.formatter[args.format](obj))