diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..c1b1f108 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +.gitignore +.git/ +.github/ +misc/ +tests/ +tools +venv/ +dump/ +.mpypy_cache/ +*.pyc +*.log +README.md +docker-compose.yml +Dockerfile +Makefile diff --git a/.gitignore b/.gitignore index af4172ea..a8672f68 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,7 @@ *.swp .coverage .vagrant -application.log \ No newline at end of file +*.log +venv/ +__pycache__ +.mpypy_cache/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 22f77314..2344026a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,15 @@ -FROM python:2.7 +FROM python:3.12 -RUN apt-get update \ - && apt-get install man-db -y \ - && apt-get clean +RUN apt update \ + && apt install man-db -y \ + && apt clean -ADD ./requirements.txt /tmp/requirements.txt +WORKDIR /opt/webapp +COPY . . -RUN pip install --upgrade pip \ - && python --version \ - && pip install -r /tmp/requirements.txt \ - && rm -rf ~/.cache/pip/* +RUN pip3 install --no-cache-dir --no-warn-script-location --upgrade pip setuptools wheel virtualenv \ + && pip3 install --no-cache-dir --no-warn-script-location -r requirements.txt -ADD ./ /opt/webapp/ -WORKDIR /opt/webapp EXPOSE 5000 -CMD ["make", "serve"] +CMD ["python3", "runserver.py"] diff --git a/Makefile b/Makefile index 49856bce..611e845e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ tests: - nosetests --exe --with-doctest tests/ explainshell/ + pytest --doctest-modules tests/ explainshell/ serve: - python runserver.py + docker-compose up --build .PHONY: tests diff --git a/README.md b/README.md index eea826b5..fe5011b5 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ When querying explainshell, it: > > If you're relying on manpages, be aware that they may not reflect the latest behavior. Contributions in this area are welcome but would require rethinking the documentation pipeline. -Right now explainshell.com contains the entire [archive of Ubuntu](http://manpages.ubuntu.com/). It's not +Right now explainshell.com contains the entire [archive of Ubuntu](https://manpages.ubuntu.com/). It's not possible to directly add a missing man page to the live site (it might be in the future). ## Running explainshell locally diff --git a/docker-compose.yml b/docker-compose.yml index 686b0309..6dbfb152 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,10 @@ -version: '2' services: db: image: mongo web: - build: . - command: make serve + build: + context: . + dockerfile: Dockerfile environment: - MONGO_URI=mongodb://db - HOST_IP=0.0.0.0 diff --git a/explainshell/algo/classifier.py b/explainshell/algo/classifier.py index 1b1c8216..da3fac67 100644 --- a/explainshell/algo/classifier.py +++ b/explainshell/algo/classifier.py @@ -1,4 +1,6 @@ -import itertools, collections, logging +import itertools +import collections +import logging import nltk import nltk.metrics @@ -9,26 +11,32 @@ logger = logging.getLogger(__name__) + def get_features(paragraph): features = {} - ptext = paragraph.cleantext() - assert ptext - - features['starts_with_hyphen'] = algo.features.starts_with_hyphen(ptext) - features['is_indented'] = algo.features.is_indented(ptext) - features['par_length'] = algo.features.par_length(ptext) - for w in ('=', '--', '[', '|', ','): - features['first_line_contains_%s' % w] = algo.features.first_line_contains(ptext, w) - features['first_line_length'] = algo.features.first_line_length(ptext) - features['first_line_word_count'] = algo.features.first_line_word_count(ptext) - features['is_good_section'] = algo.features.is_good_section(paragraph) - features['word_count'] = algo.features.word_count(ptext) + p_text = paragraph.clean_text() + logger.debug(f"length of p_text: {len(p_text)}") + assert p_text + + features["starts_with_hyphen"] = algo.features.starts_with_hyphen(p_text) + features["is_indented"] = algo.features.is_indented(p_text) + features["par_length"] = algo.features.par_length(p_text) + for w in ("=", "--", "[", "|", ","): + features[f"first_line_contains_{w}"] = algo.features.first_line_contains( + p_text, w + ) + features["first_line_length"] = algo.features.first_line_length(p_text) + features["first_line_word_count"] = algo.features.first_line_word_count(p_text) + features["is_good_section"] = algo.features.is_good_section(paragraph) + features["word_count"] = algo.features.word_count(p_text) return features -class classifier(object): - '''classify the paragraphs of a man page as having command line options - or not''' + +class Classifier: + """classify the paragraphs of a man page as having command line options + or not""" + def __init__(self, store, algo, **classifier_args): self.store = store self.algo = algo @@ -39,59 +47,60 @@ def train(self): if self.classifier: return - manpages = self.store.trainingset() + man_pages = self.store.training_set() # flatten the manpages so we get a list of (manpage-name, paragraph) def flatten_manpages(manpage): - l = [] + p_list = [] for para in manpage.paragraphs: - l.append(para) - return l - paragraphs = itertools.chain(*[flatten_manpages(m) for m in manpages]) + p_list.append(para) + return p_list + + paragraphs = itertools.chain(*[flatten_manpages(m) for m in man_pages]) training = list(paragraphs) - negids = [p for p in training if not p.is_option] - posids = [p for p in training if p.is_option] + neg_ids = [p for p in training if not p.is_option] + pos_ids = [p for p in training if p.is_option] - negfeats = [(get_features(p), False) for p in negids] - posfeats = [(get_features(p), True) for p in posids] + neg_feats = [(get_features(p), False) for p in neg_ids] + pos_feats = [(get_features(p), True) for p in pos_ids] - negcutoff = len(negfeats)*3/4 - poscutoff = len(posfeats)*3/4 + neg_cutoff = int(len(neg_feats) * 3 / 4) + pos_cutoff = int(len(pos_feats) * 3 / 4) - trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff] - self.testfeats = negfeats[negcutoff:] + posfeats[poscutoff:] + train_feats = neg_feats[:neg_cutoff] + pos_feats[:pos_cutoff] + self.test_feats = neg_feats[neg_cutoff:] + pos_feats[pos_cutoff:] - logger.info('train on %d instances', len(trainfeats)) + logger.info("train on %d instances", len(train_feats)) - if self.algo == 'maxent': + if self.algo == "maxent": c = nltk.classify.maxent.MaxentClassifier - elif self.algo == 'bayes': + elif self.algo == "bayes": c = nltk.classify.NaiveBayesClassifier else: - raise ValueError('unknown classifier') + raise ValueError("unknown classifier") - self.classifier = c.train(trainfeats, **self.classifier_args) + self.classifier = c.train(train_feats, **self.classifier_args) def evaluate(self): self.train() - refsets = collections.defaultdict(set) - testsets = collections.defaultdict(set) + ref_sets = collections.defaultdict(set) + test_sets = collections.defaultdict(set) - for i, (feats, label) in enumerate(self.testfeats): - refsets[label].add(i) + for i, (feats, label) in enumerate(self.test_feats): + ref_sets[label].add(i) guess = self.classifier.prob_classify(feats) observed = guess.max() - testsets[observed].add(i) - #if label != observed: - # print 'label:', label, 'observed:', observed, feats + test_sets[observed].add(i) + # if label != observed: + # print('label:', label, 'observed:', observed, feats - print 'pos precision:', nltk.metrics.precision(refsets[True], testsets[True]) - print 'pos recall:', nltk.metrics.recall(refsets[True], testsets[True]) - print 'neg precision:', nltk.metrics.precision(refsets[False], testsets[False]) - print 'neg recall:', nltk.metrics.recall(refsets[False], testsets[False]) + print("pos precision:", nltk.metrics.precision(ref_sets[True], test_sets[True])) + print("pos recall:", nltk.metrics.recall(ref_sets[True], test_sets[True])) + print("neg precision:", nltk.metrics.precision(ref_sets[False], test_sets[False])) + print("neg recall:", nltk.metrics.recall(ref_sets[False], test_sets[False])) - print self.classifier.show_most_informative_features(10) + print(self.classifier.show_most_informative_features(10)) def classify(self, manpage): self.train() @@ -102,10 +111,9 @@ def classify(self, manpage): option = guess.max() certainty = guess.prob(option) - if option: - if certainty < config.CLASSIFIER_CUTOFF: - pass - else: - logger.info('classified %s (%f) as an option paragraph', item, certainty) - item.is_option = True - yield certainty, item + if option and certainty >= config.CLASSIFIER_CUTOFF: + logger.info( + "classified %s (%f) as an option paragraph", item, certainty + ) + item.is_option = True + yield certainty, item diff --git a/explainshell/algo/features.py b/explainshell/algo/features.py index 644602eb..32c05edc 100644 --- a/explainshell/algo/features.py +++ b/explainshell/algo/features.py @@ -1,7 +1,8 @@ import re + def extract_first_line(paragraph): - ''' + """ >>> extract_first_line('a b cd') 'a b' >>> extract_first_line('a b cd') @@ -10,54 +11,63 @@ def extract_first_line(paragraph): 'a b cd' >>> extract_first_line(' a b cd') 'a b' - ''' + """ lines = paragraph.splitlines() first = lines[0].strip() - spaces = list(re.finditer(r'(\s+)', first)) + spaces = list(re.finditer(r"(\s+)", first)) # handle options that have their description in the first line by trying # to treat it as two lines (looking at spaces between option and the rest # of the text) if spaces: longest = max(spaces, key=lambda m: m.span()[1] - m.span()[0]) if longest and longest.start() > 1 and longest.end() - longest.start() > 1: - first = first[:longest.start()] + first = first[: longest.start()] return first + def starts_with_hyphen(paragraph): - return paragraph.lstrip()[0] == '-' + return paragraph.lstrip()[0] == "-" + def is_indented(paragraph): return paragraph != paragraph.lstrip() + def par_length(paragraph): return round(len(paragraph.strip()), -1) / 2 + def first_line_contains(paragraph, what): - l = paragraph.splitlines()[0] - return what in l + ln = paragraph.splitlines()[0] + return what in ln + def first_line_length(paragraph): first = extract_first_line(paragraph) return round(len(first), -1) / 2 + def first_line_word_count(paragraph): first = extract_first_line(paragraph) splitted = [s for s in first.split() if len(s) > 1] return round(len(splitted), -1) + def is_good_section(paragraph): if not paragraph.section: return False s = paragraph.section.lower() - if 'options' in s: + if "options" in s: return True - if s in ('description', 'function letters'): + if s in ("description", "function letters"): return True return False + def word_count(text): - return round(len(re.findall(r'\w+', text)), -1) + return round(len(re.findall(r"\w+", text)), -1) + def has_bold(html): - return '' in html + return "" in html diff --git a/explainshell/config.py b/explainshell/config.py index 9df17ba4..ed9eb88c 100644 --- a/explainshell/config.py +++ b/explainshell/config.py @@ -1,45 +1,14 @@ import os -_currdir = os.path.dirname(os.path.dirname(__file__)) +_curr_dir = os.path.dirname(os.path.dirname(__file__)) -MANPAGEDIR = os.path.join(_currdir, 'manpages') +MAN_PAGE_DIR = os.path.join(_curr_dir, "manpages") CLASSIFIER_CUTOFF = 0.7 -TOOLSDIR = os.path.join(_currdir, 'tools') +TOOLS_DIR = os.path.join(_curr_dir, "tools") -MAN2HTML = os.path.join(TOOLSDIR, 'w3mman2html.cgi') +MAN2HTML = os.path.join(TOOLS_DIR, "w3mman2html.cgi") # host to pass into Flask's app.run. -HOST_IP = os.getenv('HOST_IP', False) -MONGO_URI = os.getenv('MONGO_URI', 'mongodb://localhost') +HOST_IP = os.getenv("HOST_IP", "") +MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost") DEBUG = True - -LOGGING_DICT = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'standard': { - 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s' - }, - }, - 'handlers': { - 'console': { - 'level' : 'INFO', - 'class' : 'logging.StreamHandler', - 'formatter': 'standard', - }, - 'file': { - 'class': 'logging.FileHandler', - 'level': 'INFO', - 'formatter': 'standard', - 'filename': 'application.log', - 'mode': 'a', - }, - }, - 'loggers': { - 'explainshell': { - 'handlers': ['console'], - 'level': 'INFO', - 'propagate': False - } - } -} diff --git a/explainshell/errors.py b/explainshell/errors.py index 18982b2d..b4477827 100644 --- a/explainshell/errors.py +++ b/explainshell/errors.py @@ -1,5 +1,6 @@ class ProgramDoesNotExist(Exception): pass + class EmptyManpage(Exception): pass diff --git a/explainshell/fixer.py b/explainshell/fixer.py index aadef797..2e60d303 100644 --- a/explainshell/fixer.py +++ b/explainshell/fixer.py @@ -1,14 +1,17 @@ -import textwrap, logging +import textwrap +import logging from explainshell import util -class basefixer(object): - '''The base fixer class which other fixers inherit from. + +class BaseFixer: + """The base fixer class which other fixers inherit from. Subclasses override the base methods in order to fix manpage content during - different parts of the parsing/classifying/saving process.''' - runbefore = [] - runlast = False + different parts of the parsing/classifying/saving process.""" + + run_before = [] + run_last = False def __init__(self, mctx): self.mctx = mctx @@ -36,20 +39,23 @@ def post_option_extraction(self): def pre_add_manpage(self): pass -fixerscls = [] + +fixers_cls = [] fixerspriority = {} -class runner(object): - '''The runner coordinates the fixers.''' + +class Runner: + """The runner coordinates the fixers.""" + def __init__(self, mctx): self.mctx = mctx - self.fixers = [f(mctx) for f in fixerscls] + self.fixers = [f(mctx) for f in fixers_cls] def disable(self, name): before = len(self.fixers) self.fixers = [f for f in self.fixers if f.__class__.__name__ != name] if before == len(self.fixers): - raise ValueError('fixer %r not found' % name) + raise ValueError(f"fixer {name} not found") def _fixers(self): return (f for f in self.fixers if f.run) @@ -82,70 +88,76 @@ def pre_add_manpage(self): for f in self._fixers(): f.pre_add_manpage() -def register(fixercls): - fixerscls.append(fixercls) - for f in fixercls.runbefore: - if not hasattr(f, '_parents'): + +def register(fixer_cls): + fixers_cls.append(fixer_cls) + for f in fixer_cls.run_before: + if not hasattr(f, "_parents"): f._parents = [] - f._parents.append(fixercls) - return fixercls + f._parents.append(fixer_cls) + return fixer_cls + @register -class bulletremover(basefixer): - '''remove list bullets from paragraph start, see mysqlslap.1''' +class BulletRemover(BaseFixer): + """remove list bullets from paragraph start, see mysqlslap.1""" + def post_parse_manpage(self): - toremove = [] + to_remove = [] for i, p in enumerate(self.mctx.manpage.paragraphs): try: - idx = p.text.index('\xc2\xb7') - p.text = p.text[:idx] + p.text[idx+2:] + idx = p.text.index("\xc2\xb7") + p.text = p.text[:idx] + p.text[idx + 2 :] if not p.text.strip(): - toremove.append(i) + to_remove.append(i) except ValueError: pass - for i in reversed(toremove): + for i in reversed(to_remove): del self.mctx.manpage.paragraphs[i] + @register -class leadingspaceremover(basefixer): - '''go over all known option paragraphs and remove their leading spaces - by the amount of spaces in the first line''' +class LeadingSpaceRemover(BaseFixer): + """go over all known option paragraphs and remove their leading spaces + by the amount of spaces in the first line""" def post_option_extraction(self): for i, p in enumerate(self.mctx.manpage.options): - text = self._removewhitespace(p.text) + text = self._remove_ws(p.text) p.text = text - def _removewhitespace(self, text): - ''' - >>> f = leadingspaceremover(None) - >>> f._removewhitespace(' a\\n b ') + def _remove_ws(self, text): + """ + >>> f = LeadingSpaceRemover(None) + >>> f._remove_ws(' a\\n b ') 'a\\n b' - >>> f._removewhitespace('\\t a\\n\\t \\tb') + >>> f._remove_ws('\\t a\\n\\t \\tb') 'a\\n\\tb' - ''' + """ return textwrap.dedent(text).rstrip() + @register -class tarfixer(basefixer): +class TarFixer(BaseFixer): def __init__(self, *args): - super(tarfixer, self).__init__(*args) - self.run = self.mctx.name == 'tar' + super().__init__(*args) + self.run = self.mctx.name == "tar" def pre_add_manpage(self): - self.mctx.manpage.partialmatch = True + self.mctx.manpage.partial_match = True + @register -class paragraphjoiner(basefixer): - runbefore = [leadingspaceremover] - maxdistance = 5 +class ParagraphJoiner(BaseFixer): + run_before = [LeadingSpaceRemover] + max_distance = 5 def post_option_extraction(self): options = [p for p in self.mctx.manpage.paragraphs if p.is_option] self._join(self.mctx.manpage.paragraphs, options) def _join(self, paragraphs, options): - def _paragraphsbetween(op1, op2): + def _paragraphs_between(op1, op2): assert op1.idx < op2.idx r = [] start = None @@ -156,51 +168,60 @@ def _paragraphsbetween(op1, op2): r.append(p) return r, start - totalmerged = 0 - for curr, next in util.pairwise(options): - between, start = _paragraphsbetween(curr, next) - if curr.section == next.section and 1 <= len(between) < self.maxdistance: - self.logger.info('merging paragraphs %d through %d (inclusive)', curr.idx, next.idx-1) - newdesc = [curr.text.rstrip()] - newdesc.extend([p.text.rstrip() for p in between]) - curr.text = '\n\n'.join(newdesc) - del paragraphs[start:start+len(between)] - totalmerged += len(between) - return totalmerged + total_merged = 0 + for curr, o_next in util.pairwise(options): + between, start = _paragraphs_between(curr, o_next) + if curr.section == o_next.section and 1 <= len(between) < self.max_distance: + self.logger.info( + "merging paragraphs %d through %d (inclusive)", + curr.idx, + o_next.idx - 1, + ) + new_desc = [curr.text.rstrip()] + new_desc.extend([p.text.rstrip() for p in between]) + curr.text = "\n\n".join(new_desc) + del paragraphs[start: start + len(between)] + total_merged += len(between) + return total_merged + @register -class optiontrimmer(basefixer): - runbefore = [paragraphjoiner] +class OptionTrimmer(BaseFixer): + run_before = [ParagraphJoiner] - d = {'git-rebase' : (50, -1)} + d = {"git-rebase": (50, -1)} def __init__(self, mctx): - super(optiontrimmer, self).__init__(mctx) + super().__init__(mctx) self.run = self.mctx.name in self.d def post_classify(self): start, end = self.d[self.mctx.name] - classifiedoptions = [p for p in self.mctx.manpage.paragraphs if p.is_option] - assert classifiedoptions + classified_opts = [p for p in self.mctx.manpage.paragraphs if p.is_option] + assert classified_opts if end == -1: - end = classifiedoptions[-1].idx + end = classified_opts[-1].idx else: assert start > end - for p in classifiedoptions: - if not (start <= p.idx <= end): + for p in classified_opts: + if not start <= p.idx <= end: p.is_option = False - self.logger.info('removing option %r', p) + self.logger.info("removing option %r", p) + -def _parents(fixercls): - p = getattr(fixercls, '_parents', []) - last = fixercls.runlast +def _parents(fixer_cls): + p = getattr(fixer_cls, "_parents", []) + last = fixer_cls.run_last if last and p: - raise ValueError("%s can't be last and also run before someone else" % fixercls.__name__) + raise ValueError( + f"{fixer_cls.__name__} can't be last and also run before someone else" + ) if last: - return [f for f in fixerscls if f is not fixercls] + return [f for f in fixers_cls if f is not fixer_cls] return p -fixerscls = util.toposorted(fixerscls, _parents) + +fixers_cls = util.topo_sorted(fixers_cls, _parents) diff --git a/explainshell/helpconstants.py b/explainshell/help_constants.py similarity index 67% rename from explainshell/helpconstants.py rename to explainshell/help_constants.py index a3063c69..085e0d30 100644 --- a/explainshell/helpconstants.py +++ b/explainshell/help_constants.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - import textwrap -NOSYNOPSIS = 'no synopsis found' +NO_SYNOPSIS = "no synopsis found" -PIPELINES = textwrap.dedent(''' Pipelines +PIPELINES = textwrap.dedent( + """ Pipelines A pipeline is a sequence of one or more commands separated by one of the control operators | or |&. The format for a pipeline is: @@ -33,15 +32,21 @@ total user and system time consumed by the shell and its children. The TIMEFORMAT variable may be used to specify the format of the time information. - Each command in a pipeline is executed as a separate process (i.e., in a subshell).''') + Each command in a pipeline is executed as a separate process (i.e., in a subshell).""" +) -OPSEMICOLON = textwrap.dedent(''' Commands separated by a ; are executed sequentially; the shell waits for each command to terminate in turn. The - return status is the exit status of the last command executed.''') +OPSEMICOLON = textwrap.dedent( + """ Commands separated by a ; are executed sequentially; the shell waits for each command to terminate in turn. The + return status is the exit status of the last command executed.""" +) -OPBACKGROUND = textwrap.dedent(''' If a command is terminated by the control operator &, the shell executes the command in the background in - a subshell. The shell does not wait for the command to finish, and the return status is 0.''') +OPBACKGROUND = textwrap.dedent( + """ If a command is terminated by the control operator &, the shell executes the command in the background in + a subshell. The shell does not wait for the command to finish, and the return status is 0.""" +) -OPANDOR = textwrap.dedent(''' AND and OR lists are sequences of one of more pipelines separated by the && and || control operators, +OPANDOR = textwrap.dedent( + """ AND and OR lists are sequences of one of more pipelines separated by the && and || control operators, respectively. AND and OR lists are executed with left associativity. An AND list has the form command1 && command2 @@ -53,48 +58,63 @@ command1 || command2 command2 is executed if and only if command1 returns a non-zero exit status. The return status of AND - and OR lists is the exit status of the last command executed in the list.''') + and OR lists is the exit status of the last command executed in the list.""" +) -OPERATORS = {';' : OPSEMICOLON, '&' : OPBACKGROUND, '&&' : OPANDOR, '||' : OPANDOR} +OPERATORS = {";": OPSEMICOLON, "&": OPBACKGROUND, "&&": OPANDOR, "||": OPANDOR} -REDIRECTION = textwrap.dedent(''' Before a command is executed, its input and output may be redirected using a special notation interpreted +REDIRECTION = textwrap.dedent( + """ Before a command is executed, its input and output may be redirected using a special notation + interpreted by the shell. Redirection may also be used to open and close files for the current shell execution - environment. The following redirection operators may precede or appear anywhere within a simple command - or may follow a command. Redirections are processed in the order they appear, from left to right.''') + environment. The following redirection operators may precede or appear anywhere within a simple + command + or may follow a command. Redirections are processed in the order they appear, from left to right.""" +) -REDIRECTING_INPUT = textwrap.dedent(''' Redirecting Input +REDIRECTING_INPUT = textwrap.dedent( + """ Redirecting Input Redirection of input causes the file whose name results from the expansion of word to be opened for reading on file descriptor n, or the standard input (file descriptor 0) if n is not specified. The general format for redirecting input is: - [n]<word''') + [n]<word""" +) -REDIRECTING_OUTPUT = textwrap.dedent(''' Redirecting Output +REDIRECTING_OUTPUT = textwrap.dedent( + """ Redirecting Output Redirection of output causes the file whose name results from the expansion of word to be opened for - writing on file descriptor n, or the standard output (file descriptor 1) if n is not specified. If the + writing on file descriptor n, or the standard output (file descriptor 1) if n is not specified. + If the file does not exist it is created; if it does exist it is truncated to zero size. The general format for redirecting output is: [n]>word - If the redirection operator is >, and the noclobber option to the set builtin has been enabled, the + If the redirection operator is >, and the noclobber option to the set builtin has + been enabled, the redirection will fail if the file whose name results from the expansion of word exists and is a regular - file. If the redirection operator is >|, or the redirection operator is > and the noclobber option to - the set builtin command is not enabled, the redirection is attempted even if the file named by word - exists.''') - -APPENDING_REDIRECTED_OUTPUT = textwrap.dedent(''' Appending Redirected Output + file. If the redirection operator is >|, or the redirection operator is > and the + noclobber option to + the set builtin command is not enabled, the redirection is attempted even if the file named by + word exists.""" +) + +APPENDING_REDIRECTED_OUTPUT = textwrap.dedent( + """ Appending Redirected Output Redirection of output in this fashion causes the file whose name results from the expansion of word to be - opened for appending on file descriptor n, or the standard output (file descriptor 1) if n is not - specified. If the file does not exist it is created. + opened for appending on file descriptor n, or the standard output (file descriptor 1) if + n is not specified. If the file does not exist it is created. The general format for appending output is: - [n]>>word''') + [n]>>word""" +) -REDIRECTING_OUTPUT_ERROR = textwrap.dedent(''' Redirecting Standard Output and Standard Error +REDIRECTING_OUTPUT_ERROR = textwrap.dedent( + """ Redirecting Standard Output and Standard Error This construct allows both the standard output (file descriptor 1) and the standard error output (file descriptor 2) to be redirected to the file whose name is the expansion of word. @@ -106,9 +126,11 @@ Of the two forms, the first is preferred. This is semantically equivalent to - >word 2>&1''') + >word 2>&1""" +) -APPENDING_OUTPUT_ERROR = textwrap.dedent(''' Appending Standard Output and Standard Error +APPENDING_OUTPUT_ERROR = textwrap.dedent( + """ Appending Standard Output and Standard Error This construct allows both the standard output (file descriptor 1) and the standard error output (file descriptor 2) to be appended to the file whose name is the expansion of word. @@ -118,9 +140,11 @@ This is semantically equivalent to - >>word 2>&1''') + >>word 2>&1""" +) -HERE_DOCUMENTS = textwrap.dedent(''' Here Documents +HERE_DOCUMENTS = textwrap.dedent( + r""" Here Documents This type of redirection instructs the shell to read input from the current source until a line containing only delimiter (with no trailing blanks) is seen. All of the lines read up to that point are then used as the standard input for a command. @@ -132,12 +156,15 @@ delimiter No parameter expansion, command substitution, arithmetic expansion, or pathname expansion is performed on - word. If any characters in word are quoted, the delimiter is the result of quote removal on word, and + word. If any characters in word are quoted, the delimiter is the result of quote + removal on word, and the lines in the here-document are not expanded. If word is unquoted, all lines of the here-document are subjected to parameter expansion, command substitution, and arithmetic expansion. In the latter case, - the character sequence \<newline> is ignored, and \ must be used to quote the characters \, $, and `. + the character sequence \<newline> is ignored, and \ must be used to quote the characters + \, $, and `. - If the redirection operator is <<-, then all leading tab characters are stripped from input lines and the + If the redirection operator is <<-, then all leading tab characters are stripped from input lines + and the line containing delimiter. This allows here-documents within shell scripts to be indented in a natural fashion. @@ -146,75 +173,102 @@ <<<word - The word is expanded and supplied to the command on its standard input.''') - -REDIRECTION_KIND = {'<' : REDIRECTING_INPUT, - '>' : REDIRECTING_OUTPUT, - '>>' : APPENDING_REDIRECTED_OUTPUT, - '&>' : REDIRECTING_OUTPUT_ERROR, - '>&' : REDIRECTING_OUTPUT_ERROR, - '&>>' : APPENDING_OUTPUT_ERROR, - '<<' : HERE_DOCUMENTS, - '<<<' : HERE_DOCUMENTS} + The word is expanded and supplied to the command on its standard input.""" +) + +REDIRECTION_KIND = { + "<": REDIRECTING_INPUT, + ">": REDIRECTING_OUTPUT, + ">>": APPENDING_REDIRECTED_OUTPUT, + "&>": REDIRECTING_OUTPUT_ERROR, + ">&": REDIRECTING_OUTPUT_ERROR, + "&>>": APPENDING_OUTPUT_ERROR, + "<<": HERE_DOCUMENTS, + "<<<": HERE_DOCUMENTS, +} -ASSIGNMENT = textwrap.dedent(''' A variable may be assigned to by a statement of the form +ASSIGNMENT = textwrap.dedent( + """ A variable may be assigned to by a statement of the form name=[value] - If value is not given, the variable is assigned the null string. All values undergo tilde expansion, + If value is not given, the variable is assigned the null string. All values undergo tilde + expansion, parameter and variable expansion, command substitution, arithmetic expansion, and quote removal (see - EXPANSION below). If the variable has its integer attribute set, then value is evaluated as an - arithmetic expression even if the $((...)) expansion is not used (see Arithmetic Expansion below). Word - splitting is not performed, with the exception of "$@" as explained below under Special Parameters. + EXPANSION below). If the variable has its integer attribute set, then value is + evaluated as an + arithmetic expression even if the $((...)) expansion is not used (see Arithmetic Expansion below). + Word + splitting is not performed, with the exception of "$@" as explained below under Special + Parameters. Pathname expansion is not performed. Assignment statements may also appear as arguments to the alias, declare, typeset, export, readonly, and local builtin commands. In the context where an assignment statement is assigning a value to a shell variable or array index, the += operator can be used to append to or add to the variable's previous value. When += is applied to a - variable for which the integer attribute has been set, value is evaluated as an arithmetic expression and + variable for which the integer attribute has been set, value is evaluated as an arithmetic + expression and added to the variable's current value, which is also evaluated. When += is applied to an array variable using compound assignment (see Arrays below), the variable's value is not unset (as it is when using =), and new values are appended to the array beginning at one greater than the array's maximum index (for indexed arrays) or added as additional key-value pairs in an associative array. When applied to a - string-valued variable, value is expanded and appended to the variable's value.''') - -_group = textwrap.dedent(''' { list; } - list is simply executed in the current shell environment. list must be terminated with a newline - or semicolon. This is known as a group command. The return status is the exit status of list. - Note that unlike the metacharacters ( and ), { and } are reserved words and must occur where a + string-valued variable, value is expanded and appended to the variable's value.""" +) + +_group = textwrap.dedent( + """ { list; } + list is simply executed in the current shell environment. list must be terminated with a + newline + or semicolon. This is known as a group command. The return status is the exit status + of list. + Note that unlike the metacharacters ( and ), { and } are + reserved words and must occur where a reserved word is permitted to be recognized. Since they do not cause a word break, they must be - separated from list by whitespace or another shell metacharacter.''') + separated from list by whitespace or another shell metacharacter.""" +) -_subshell = textwrap.dedent(''' (list) list is executed in a subshell environment (see COMMAND EXECUTION ENVIRONMENT below). Variable +_subshell = textwrap.dedent( + """ (list) list is executed in a subshell environment (see COMMAND EXECUTION + ENVIRONMENT below). Variable assignments and builtin commands that affect the shell's environment do not remain in effect after - the command completes. The return status is the exit status of list.''') - -_negate = '''If the reserved word ! precedes a pipeline, the exit status of that pipeline is the logical negation of the -exit status as described above.''' - -_if = textwrap.dedent(''' if list; then list; [ elif list; then list; ] ... [ else list; ] fi - The if list is executed. If its exit status is zero, the then list is executed. Otherwise, each - elif list is executed in turn, and if its exit status is zero, the corresponding then list is - executed and the command completes. Otherwise, the else list is executed, if present. The exit - status is the exit status of the last command executed, or zero if no condition tested true.''') - -_for = textwrap.dedent(''' for name [ [ in [ word ... ] ] ; ] do list ; done + the command completes. The return status is the exit status of list.""" +) + +_negate = """If the reserved word ! precedes a pipeline, the exit status of that pipeline is the logical + negation of the exit status as described above.""" + +_if = textwrap.dedent( + """ if list; then list; [ elif list; then list; ] ... + [ else list; ] fi + The if list is executed. If its exit status is zero, the then list is + executed. Otherwise, each elif list is executed in turn, and if its exit status is zero, + the corresponding then list is executed and the command completes. + Otherwise, the else list is executed, if present. The exit + status is the exit status of the last command executed, or zero if no condition tested true.""" +) + +_for = textwrap.dedent( + """ for name [ [ in [ word ... ] ] ; ] do list ; done The list of words following in is expanded, generating a list of items. The variable name is set to each element of this list in turn, and list is executed each time. If the in word is omitted, the for command executes list once for each positional parameter that is set (see PARAMETERS below). The return status is the exit status of the last command that executes. If the expansion of the items following in results in an empty list, no commands are executed, and the return - status is 0.''') + status is 0.""" +) -_whileuntil = textwrap.dedent(''' while list-1; do list-2; done +_whileuntil = textwrap.dedent( + """ while list-1; do list-2; done until list-1; do list-2; done The while command continuously executes the list list-2 as long as the last command in the list list-1 returns an exit status of zero. The until command is identical to the while command, except that the test is negated; list-2 is executed as long as the last command in list-1 returns a non-zero exit status. The exit status of the while and until commands is the exit status of the - last command executed in list-2, or zero if none was executed.''') + last command executed in list-2, or zero if none was executed.""" +) -_select = textwrap.dedent(''' select name [ in word ] ; do list ; done +_select = textwrap.dedent( + """ select name [ in word ] ; do list ; done The list of words following in is expanded, generating a list of items. The set of expanded words is printed on the standard error, each preceded by a number. If the in word is omitted, the positional parameters are printed (see PARAMETERS below). The PS3 prompt is then displayed and a @@ -223,29 +277,33 @@ prompt are displayed again. If EOF is read, the command completes. Any other value read causes name to be set to null. The line read is saved in the variable REPLY. The list is executed after each selection until a break command is executed. The exit status of select is the exit status of - the last command executed in list, or zero if no commands were executed.''') - -RESERVEDWORDS = { - '!' : _negate, - '{' : _group, - '}' : _group, - '(' : _subshell, - ')' : _subshell, - ';' : OPSEMICOLON, + the last command executed in list, or zero if no commands were executed.""" +) + +RESERVED_WORDS = { + "!": _negate, + "{": _group, + "}": _group, + "(": _subshell, + ")": _subshell, + ";": OPSEMICOLON, } + def _addwords(key, text, *words): for word in words: - COMPOUNDRESERVEDWORDS.setdefault(key, {})[word] = text + COMPOUND_RESERVED_WORDS.setdefault(key, {})[word] = text + -COMPOUNDRESERVEDWORDS = {} -_addwords('if', _if, 'if', 'then', 'elif', 'else', 'fi', ';') -_addwords('for', _for, 'for', 'in', 'do', 'done', ';') -_addwords('while', _whileuntil, 'while', 'do', 'done', ';') -_addwords('until', _whileuntil, 'until', 'do', 'done') -_addwords('select', _select, 'select', 'in', 'do', 'done') +COMPOUND_RESERVED_WORDS = {} +_addwords("if", _if, "if", "then", "elif", "else", "fi", ";") +_addwords("for", _for, "for", "in", "do", "done", ";") +_addwords("while", _whileuntil, "while", "do", "done", ";") +_addwords("until", _whileuntil, "until", "do", "done") +_addwords("select", _select, "select", "in", "do", "done") -_function = textwrap.dedent(''' A shell function is an object that is called like a simple command and executes a compound command with a +_function = textwrap.dedent( + """ A shell function is an object that is called like a simple command and executes a compound command with a new set of positional parameters. Shell functions are declared as follows: name () compound-command [redirection] @@ -258,26 +316,29 @@ def _addwords(key, text, *words): redirections (see REDIRECTION below) specified when a function is defined are performed when the function is executed. The exit status of a function definition is zero unless a syntax error occurs or a readonly function with the same name already exists. When executed, the exit status - of a function is the exit status of the last command executed in the body. (See FUNCTIONS below.)''') + of a function is the exit status of the last command executed in the body. (See FUNCTIONS below.)""" +) -_functioncall = 'call shell function %r' -_functionarg = 'argument for shell function %r' +_function_call = "call shell function %r" +_functionarg = "argument for shell function %r" -COMMENT = textwrap.dedent('''COMMENTS +COMMENT = textwrap.dedent( + """COMMENTS In a non-interactive shell, or an interactive shell in which the interactive_comments option to the shopt builtin is enabled (see SHELL BUILTIN COMMANDS below), a word beginning with # causes that word and all remaining characters on that line to be ignored. An interactive shell without the interactive_comments option enabled does not allow comments. The interactive_comments option is on by default in interactive - shells.''') + shells.""" +) parameters = { - '*' : 'star', - '@' : 'at', - '#' : 'pound', - '?' : 'question', - '-' : 'hyphen', - '$' : 'dollar', - '!' : 'exclamation', - '0' : 'zero', - '_' : 'underscore', + "*": "star", + "@": "at", + "#": "pound", + "?": "question", + "-": "hyphen", + "$": "dollar", + "!": "exclamation", + "0": "zero", + "_": "underscore", } diff --git a/explainshell/logger/__init__.py b/explainshell/logger/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/explainshell/logger/logger_helper.py b/explainshell/logger/logger_helper.py new file mode 100644 index 00000000..dc83916a --- /dev/null +++ b/explainshell/logger/logger_helper.py @@ -0,0 +1,53 @@ +""" +This module provides a helper function for logging using the loguru library. + +The logger_helper module sets up a logger with a rotated log file and also logs to standard output. +""" + +import logging +import sys +from pathlib import Path + +import loguru + +# intercept log entries handled by python's 'logging' module +# and redirect them to loguru logger +from explainshell.logger.logging_interceptor import InterceptHandler + +parent_dir = Path(__file__).parent.parent.parent +logs_dir = parent_dir / "logs" +# create logs directory if it does not exist +logs_dir.mkdir(exist_ok=True) + +logger = loguru.logger +logger.remove() + + +def level_filter(): + """ + Filter function to exclude DEBUG and SUCCESS log levels from being logged to standard output. + """ + def is_level(record): + return record["level"].name not in ["DEBUG", "SUCCESS"] + return is_level + + +# init rotated log file +logger.add( + logs_dir / "debug.log", + rotation="10 MB", + retention="7 days", + backtrace=True, + colorize=False, + catch=True, + delay=True, + diagnose=True, + enqueue=True, +) + + +# also log to standard output +logger.add(sys.stdout, colorize=True, filter=level_filter()) + +# activate logging and redirect all logs to loguru logger +logging.basicConfig(handlers=[InterceptHandler()], level=logging.DEBUG, force=True) diff --git a/explainshell/logger/logging_interceptor.py b/explainshell/logger/logging_interceptor.py new file mode 100644 index 00000000..7701bce6 --- /dev/null +++ b/explainshell/logger/logging_interceptor.py @@ -0,0 +1,32 @@ +import logging +import sys + +import loguru + +logger = loguru.logger + + +class InterceptHandler(logging.Handler): + """ + intercept log messages logged with the logging module + + source: https://loguru.readthedocs.io/en/stable/overview.html#entirely-compatible-with-standard-logging + also see: https://stackoverflow.com/a/70620198 + """ + + def emit(self, record): + # Get corresponding Loguru level if it exists. + try: + level = logger.level(record.levelname).name + except ValueError: + level = record.levelno + + # Find caller from where originated the logged message. + frame, depth = sys._getframe(6), 6 + while frame and frame.f_code.co_filename == logging.__file__: + frame = frame.f_back + depth += 1 + + logger.opt(depth=depth, exception=record.exc_info).log( + level, record.getMessage() + ) diff --git a/explainshell/manager.py b/explainshell/manager.py index ff4f7b54..48794552 100644 --- a/explainshell/manager.py +++ b/explainshell/manager.py @@ -1,91 +1,105 @@ -import sys, os, argparse, logging, glob +import argparse +import os +import sys +import logging +import glob -from explainshell import options, store, fixer, manpage, errors, util, config +from explainshell import options, store, fixer, manpage, errors, config from explainshell.algo import classifier -logger = logging.getLogger('explainshell.manager') +logger = logging.getLogger("explainshell.manager") -class managerctx(object): + +class ManagerCtx: def __init__(self, classifier, store, manpage): self.classifier = classifier self.store = store self.manpage = manpage self.name = manpage.name - self.classifiermanpage = None - self.optionsraw = None - self.optionsextracted = None + self.classifier_man_page = None + self.options_raw = None + self.options_extracted = None self.aliases = None -class manager(object): - '''the manager uses all parts of the system to read, classify, parse, extract - and write a man page to the database''' - def __init__(self, dbhost, dbname, paths, overwrite=False, drop=False): + +class Manager: + """the manager uses all parts of the system to read, classify, parse, extract + and write a man page to the database""" + + def __init__(self, db_host, dbname, paths, overwrite=False, drop=False): self.paths = paths self.overwrite = overwrite - self.store = store.store(dbname, dbhost) + self.store = store.Store(dbname, db_host) - self.classifier = classifier.classifier(self.store, 'bayes') + self.classifier = classifier.Classifier(self.store, "bayes") self.classifier.train() if drop: self.store.drop(True) def ctx(self, m): - return managerctx(self.classifier, self.store, m) + return ManagerCtx(self.classifier, self.store, m) - def _read(self, ctx, frunner): - frunner.pre_get_raw_manpage() + def _read(self, ctx, f_runner): + f_runner.pre_get_raw_manpage() ctx.manpage.read() ctx.manpage.parse() assert len(ctx.manpage.paragraphs) > 1 - ctx.manpage = store.manpage(ctx.manpage.shortpath, ctx.manpage.name, - ctx.manpage.synopsis, ctx.manpage.paragraphs, list(ctx.manpage.aliases)) - frunner.post_parse_manpage() - - def _classify(self, ctx, frunner): - ctx.classifiermanpage = store.classifiermanpage(ctx.name, ctx.manpage.paragraphs) - frunner.pre_classify() + ctx.manpage = store.ManPage( + ctx.manpage.short_path, + ctx.manpage.name, + ctx.manpage.synopsis, + ctx.manpage.paragraphs, + list(ctx.manpage.aliases), + ) + f_runner.post_parse_manpage() + + def _classify(self, ctx, fr_runner): + ctx.classifiermanpage = store.ClassifierManpage( + ctx.name, ctx.manpage.paragraphs + ) + fr_runner.pre_classify() _ = list(ctx.classifier.classify(ctx.classifiermanpage)) - frunner.post_classify() + fr_runner.post_classify() - def _extract(self, ctx, frunner): + def _extract(self, ctx, f_runner): options.extract(ctx.manpage) - frunner.post_option_extraction() + f_runner.post_option_extraction() if not ctx.manpage.options: - logger.warn("couldn't find any options for manpage %s", ctx.manpage.name) + logger.warning("couldn't find any options for manpage %s", ctx.manpage.name) - def _write(self, ctx, frunner): - frunner.pre_add_manpage() - return ctx.store.addmanpage(ctx.manpage) + def _write(self, ctx, f_runner): + f_runner.pre_add_manpage() + return ctx.store.add_manpage(ctx.manpage) - def _update(self, ctx, frunner): - frunner.pre_add_manpage() + def _update(self, ctx, f_runner): + f_runner.pre_add_manpage() return ctx.store.updatemanpage(ctx.manpage) def process(self, ctx): - frunner = fixer.runner(ctx) + f_runner = fixer.Runner(ctx) - self._read(ctx, frunner) - self._classify(ctx, frunner) - self._extract(ctx, frunner) + self._read(ctx, f_runner) + self._classify(ctx, f_runner) + self._extract(ctx, f_runner) - m = self._write(ctx, frunner) + m = self._write(ctx, f_runner) return m def edit(self, m, paragraphs=None): ctx = self.ctx(m) - frunner = fixer.runner(ctx) + f_runner = fixer.Runner(ctx) if paragraphs: m.paragraphs = paragraphs - frunner.disable('paragraphjoiner') - frunner.post_option_extraction() + f_runner.disable("paragraphjoiner") + f_runner.post_option_extraction() else: - self._extract(ctx, frunner) - m = self._update(ctx, frunner) + self._extract(ctx, f_runner) + m = self._update(ctx, f_runner) return m def run(self): @@ -93,16 +107,19 @@ def run(self): exists = [] for path in self.paths: try: - m = manpage.manpage(path) - logger.info('handling manpage %s (from %s)', m.name, path) + m = manpage.ManPage(path) + logger.info("handling manpage %s (from %s)", m.name, path) try: - mps = self.store.findmanpage(m.shortpath[:-3]) - mps = [mp for mp in mps if m.shortpath == mp.source] + mps = self.store.find_man_page(m.short_path[:-3]) + mps = [mp for mp in mps if m.short_path == mp.source] if mps: assert len(mps) == 1 mp = mps[0] if not self.overwrite or mp.updated: - logger.info('manpage %r already in the data store, not overwriting it', m.name) + logger.info( + "manpage %r already in the data store, not overwriting it", + m.name, + ) exists.append(m) continue except errors.ProgramDoesNotExist: @@ -113,89 +130,114 @@ def run(self): m = self.process(ctx) if m: added.append(m) - except errors.EmptyManpage, e: - logger.error('manpage %r is empty!', e.args[0]) + except errors.EmptyManpage as e: + logger.error("manpage %r is empty!", e.args[0]) except ValueError: - logger.fatal('uncaught exception when handling manpage %s', path) + logger.fatal("uncaught exception when handling manpage %s", path) except KeyboardInterrupt: raise - except: - logger.fatal('uncaught exception when handling manpage %s', path) + except Exception as error_msg: + logger.fatal(f"uncaught exception when handling manpage '{path}' -> error: {error_msg}") raise if not added: - logger.warn('no manpages added') + logger.warning("no manpages added") else: - self.findmulticommands() + self.findmulti_cmds() return added, exists - def findmulticommands(self): + def findmulti_cmds(self): manpages = {} potential = [] for _id, m in self.store.names(): - if '-' in m: - potential.append((m.split('-'), _id)) + if "-" in m: + potential.append((m.split("-"), _id)) else: manpages[m] = _id - mappings = set([x[0] for x in self.store.mappings()]) - mappingstoadd = [] - multicommands = {} + mappings = {x[0] for x in self.store.mappings()} + mappings_to_a = [] + multi_cmds = {} for p, _id in potential: - if ' '.join(p) in mappings: + if " ".join(p) in mappings: continue if p[0] in manpages: - mappingstoadd.append((' '.join(p), _id)) - multicommands[p[0]] = manpages[p[0]] + mappings_to_a.append((" ".join(p), _id)) + multi_cmds[p[0]] = manpages[p[0]] + + for src, dst in mappings_to_a: + self.store.add_mapping(src, dst, 1) + logger.info("inserting mapping (multi_cmd) %s -> %s", src, dst) - for src, dst in mappingstoadd: - self.store.addmapping(src, dst, 1) - logger.info('inserting mapping (multicommand) %s -> %s', src, dst) + for multi_cmd, _id in multi_cmds.items(): + self.store.set_multi_cmd(_id) + logger.info("making %r a multi_cmd", multi_cmd) - for multicommand, _id in multicommands.iteritems(): - self.store.setmulticommand(_id) - logger.info('making %r a multicommand', multicommand) + return mappings_to_a, multi_cmds - return mappingstoadd, multicommands -def main(files, dbname, dbhost, overwrite, drop, verify): +def main(files, dbname, db_host, overwrite, drop, verify): if verify: - s = store.store(dbname, dbhost) + s = store.Store(dbname, db_host) ok = s.verify() return 0 if ok else 1 if drop: - if raw_input('really drop db (y/n)? ').strip().lower() != 'y': + if input("really drop db (y/n)? ").strip().lower() != "y": drop = False else: - overwrite = True # if we drop, no need to take overwrite into account + overwrite = True # if we drop, no need to take overwrite into account gzs = set() for path in files: if os.path.isdir(path): - gzs.update([os.path.abspath(f) for f in glob.glob(os.path.join(path, '*.gz'))]) + gzs.update( + [os.path.abspath(f) for f in glob.glob(os.path.join(path, "*.gz"))] + ) else: gzs.add(os.path.abspath(path)) - m = manager(dbhost, dbname, gzs, overwrite, drop) + m = Manager(db_host, dbname, gzs, overwrite, drop) added, exists = m.run() for mp in added: - print 'successfully added %s' % mp.source + print(f"successfully added '{mp.source}'") if exists: - print 'these manpages already existed and werent overwritten: \n\n%s' % '\n'.join([m.path for m in exists]) - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='process man pages and save them in the store') - parser.add_argument('--log', type=str, default='ERROR', help='use log as the logger log level') - parser.add_argument('--overwrite', action='store_true', default=False, help='overwrite man pages that already exist in the store') - parser.add_argument('--drop', action='store_true', default=False, help='delete all existing man pages') - parser.add_argument('--db', default='explainshell', help='mongo db name') - parser.add_argument('--host', default=config.MONGO_URI, help='mongo host') - parser.add_argument('--verify', action='store_true', default=False, help='verify db integrity') - parser.add_argument('files', nargs='*') + print( + "these manpages already existed and were not overwritten: \n\n%s" + % "\n".join([m.path for m in exists]) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="process man pages and save them in the store" + ) + parser.add_argument( + "--log", type=str, default="ERROR", help="use log as the logger log level" + ) + parser.add_argument( + "--overwrite", + action="store_true", + default=False, + help="overwrite man pages that already exist in the store", + ) + parser.add_argument( + "--drop", + action="store_true", + default=False, + help="delete all existing man pages", + ) + parser.add_argument("--db", default="explainshell", help="mongo db name") + parser.add_argument("--host", default=config.MONGO_URI, help="mongo host") + parser.add_argument( + "--verify", action="store_true", default=False, help="verify db integrity" + ) + parser.add_argument("files", nargs="*") args = parser.parse_args() logging.basicConfig(level=getattr(logging, args.log.upper())) - sys.exit(main(args.files, args.db, args.host, args.overwrite, args.drop, args.verify)) + sys.exit( + main(args.files, args.db, args.host, args.overwrite, args.drop, args.verify) + ) diff --git a/explainshell/manpage.py b/explainshell/manpage.py index 979c3d7a..7bfc0505 100644 --- a/explainshell/manpage.py +++ b/explainshell/manpage.py @@ -1,9 +1,14 @@ -import os, subprocess, re, logging, collections, urllib +import os +import subprocess +import re +import logging +import collections +import urllib from explainshell import config, store, errors -devnull = open(os.devnull, 'w') -SPLITSYNOP = re.compile(r'([^ ]+) - (.*)$') +devnull = open(os.devnull, "w") +SPLIT_SYNOP = re.compile(r"([^ ]+) - (.*)$") ENV = dict(os.environ) ENV["W3MMAN_MAN"] = "man --no-hyphenation" @@ -13,25 +18,27 @@ logger = logging.getLogger(__name__) -def extractname(gzname): - ''' - >>> extractname('ab.1.gz') + +def extract_name(gz_name): + """ + >>> extract_name('ab.1.gz') 'ab' - >>> extractname('ab.1.1.gz') + >>> extract_name('ab.1.1.gz') 'ab.1' - >>> extractname('ab.1xyz.gz') + >>> extract_name('ab.1xyz.gz') 'ab' - >>> extractname('ab.1.1xyz.gz') + >>> extract_name('ab.1.1xyz.gz') 'ab.1' - >>> extractname('a/b/c/ab.1.1xyz.gz') + >>> extract_name('a/b/c/ab.1.1xyz.gz') 'ab.1' - ''' - if '/' in gzname: - gzname = os.path.basename(gzname) - return gzname.rsplit('.', 2)[0] + """ + if "/" in gz_name: + gz_name = os.path.basename(gz_name) + return gz_name.rsplit(".", 2)[0] + -def bold(l): - ''' +def bold(ln_in): + """ >>> bold('a') ([], ['a']) >>> bold('a') @@ -40,9 +47,9 @@ def bold(l): (['b'], ['a', 'c']) >>> bold('first second:') (['first', 'second:'], []) - ''' + """ inside = [] - for m in _section.finditer(l): + for m in _section.finditer(ln_in): inside.append(m.span(0)) current = 0 @@ -50,156 +57,194 @@ def bold(l): for start, end in inside: outside.append((current, start)) current = end - outside.append((current, len(l))) + outside.append((current, len(ln_in))) - inside = [l[s:e] for s, e in inside] - inside = [s.replace('', '').replace('', '') for s in inside] + inside = [ln_in[s:e] for s, e in inside] + inside = [s.replace("", "").replace("", "") for s in inside] - outside = [l[s:e] for s, e in outside] - outside = [l for l in outside if l and not l.isspace()] + outside = [ln_in[s:e] for s, e in outside] + outside = [ln for ln in outside if ln and not ln.isspace()] return inside, outside + # w3mman2html.cgi (the tool we're using to output html from a man page) does # some strange escaping which causes it to output invalid utf8. we look these # up and fix them manually -_replacementsprefix = [ - ('\xe2\x80\xe2\x80\x98', None, True), # left single quote - ('\xe2\x80\xe2\x80\x99', None, True), # right single quote - ('\xe2\x80\xe2\x80\x9c', None, True), # left double quote - ('\xe2\x80\xe2\x80\x9d', None, True), # right double quote - ('\xe2\x94\xe2\x94\x82', '|', False), # pipe - ('\xe2\x8e\xe2\x8e\xaa', None, False), # pipe 2 - ('\xe2\x80\xe2\x80\x90', None, True), # hyphen - ('\xe2\x80\xe2\x80\x94', None, True), # hyphen 2 - ('\xe2\x80\xc2\xbd', None, True), # half - ('\xe2\x88\xe2\x88\x97', None, True), # asterisk - ('\xe2\x86\xe2\x86\x92', None, True), # right arrow - ('\xe2\x88\xe2\x88\x92', None, True), # minus sign - ('\xe2\x80\xe2\x80\x93', None, True), # en dash - ('\xe2\x80\xe2\x80\xb2', None, False), # prime - ('\xe2\x88\xe2\x88\xbc', None, False), # tilde operator - ('\xe2\x86\xe2\x86\xb5', None, False), # downwards arrow with corner leftwards - ('\xef\xbf\xef\xbf\xbd', None, False) # replacement char - ] +_rp_prefix = [ + ("\xe2\x80\xe2\x80\x98", None, True), # left single quote + ("\xe2\x80\xe2\x80\x99", None, True), # right single quote + ("\xe2\x80\xe2\x80\x9c", None, True), # left double quote + ("\xe2\x80\xe2\x80\x9d", None, True), # right double quote + ("\xe2\x94\xe2\x94\x82", "|", False), # pipe + ("\xe2\x8e\xe2\x8e\xaa", None, False), # pipe 2 + ("\xe2\x80\xe2\x80\x90", None, True), # hyphen + ("\xe2\x80\xe2\x80\x94", None, True), # hyphen 2 + ("\xe2\x80\xc2\xbd", None, True), # half + ("\xe2\x88\xe2\x88\x97", None, True), # asterisk + ("\xe2\x86\xe2\x86\x92", None, True), # right arrow + ("\xe2\x88\xe2\x88\x92", None, True), # minus sign + ("\xe2\x80\xe2\x80\x93", None, True), # en dash + ("\xe2\x80\xe2\x80\xb2", None, False), # prime + ("\xe2\x88\xe2\x88\xbc", None, False), # tilde operator + ("\xe2\x86\xe2\x86\xb5", None, False), # downwards arrow with corner leftwards + ("\xef\xbf\xef\xbf\xbd", None, False), # replacement char +] _replacements = [] -for searchfor, replacewith, underline in _replacementsprefix: - if replacewith is None: - replacewith = searchfor[2:] - _replacements.append((searchfor, replacewith)) +for search_for, rp_with, underline in _rp_prefix: + if rp_with is None: + rp_with = search_for[2:] + _replacements.append((search_for, rp_with)) if underline: - x = list(replacewith) - x.insert(1, '') - x = ''.join(x) - _replacements.append((x, '%s' % replacewith)) - -_replacementsnoprefix = ['\xc2\xb7', # bullet - '\xc2\xb4', # apostrophe - '\xc2\xa0', # no break space - '\xc3\xb8', '\xe4\xbd\xa0', '\xe5\xa5\xbd', # gibberish - '\xc2\xa7', # section sign - '\xef\xbf\xbd', # replacement char - '\xc2\xa4', # latin small letter a with diaeresis - '\xc3\xa4', # latin small letter a with diaeresis - '\xc4\xa4', # latin small letter a with diaeresis - '\xc3\xaa', # latin small letter e with circumflex - ] - -for s in _replacementsnoprefix: + x = list(rp_with) + x.insert(1, "") + x = "".join(x) + _replacements.append((x, f"{rp_with}")) + +_replacements_no_prefix = [ + "\xc2\xb7", # bullet + "\xc2\xb4", # apostrophe + "\xc2\xa0", # no break space + "\xc3\xb8", + "\xe4\xbd\xa0", + "\xe5\xa5\xbd", # gibberish + "\xc2\xa7", # section sign + "\xef\xbf\xbd", # replacement char + "\xc2\xa4", # latin small letter a with diaeresis + "\xc3\xa4", # latin small letter a with diaeresis + "\xc4\xa4", # latin small letter a with diaeresis + "\xc3\xaa", # latin small letter e with circumflex +] + +for s in _replacements_no_prefix: x = list(s) - x.insert(1, '') - x = ''.join(x) - _replacements.append((x, '%s' % s)) + x.insert(1, "") + x = "".join(x) + _replacements.append((x, f"{s}")) _href = re.compile(r'') -_section = re.compile(r'([^<]+)') +_section = re.compile(r"([^<]+)") + -def _parsetext(lines): - paragraphlines = [] +def _parse_text(lines): + para_lines = [] section = None i = 0 - for l in lines: - l = re.sub(_href, r'', l) - for lookfor, replacewith in _replacements: - l = re.sub(lookfor, replacewith, l) + for ln in lines: + # TODO: check if this url is still valid + ln = re.sub( + _href, + r'', + ln, + ) + for look_for, rp_with in _replacements: + ln = re.sub(look_for, rp_with, ln) + # confirm the line is valid utf8 - lreplaced = l.decode('utf8', 'ignore').encode('utf8') - if lreplaced != l: - logger.error('line %r contains invalid utf8', l) - l = lreplaced + l_replaced = ln # .decode("utf8", "ignore").encode("utf8") + if l_replaced != ln: + logger.error("line %r contains invalid utf8", ln) + ln = l_replaced raise ValueError - if l.startswith(''): # section - section = re.sub(_section, r'\1', l) + if ln.startswith(""): # section + section = re.sub(_section, r"\1", ln) else: - foundsection = False - if l.strip().startswith(''): - inside, outside = bold(l.strip()) - if not outside and inside[-1][-1] == ':': - foundsection = True - section = ' '.join(inside)[:-1] - if not foundsection: - if not l.strip() and paragraphlines: - yield store.paragraph(i, '\n'.join(paragraphlines), section, False) + found_section = False + if ln.strip().startswith(""): + inside, outside = bold(ln.strip()) + if not outside and inside[-1][-1] == ":": + found_section = True + section = " ".join(inside)[:-1] + if not found_section: + if not ln.strip() and para_lines: + yield store.Paragraph(i, "\n".join(para_lines), section, False) i += 1 - paragraphlines = [] - elif l.strip(): - paragraphlines.append(l) - if paragraphlines: - yield store.paragraph(i, '\n'.join(paragraphlines), section, False) - -def _parsesynopsis(base, synopsis): - ''' - >>> _parsesynopsis('/a/b/c', '/a/b/c: "p-r+o++g - foo bar."') + para_lines = [] + elif ln.strip(): + para_lines.append(ln) + if para_lines: + yield store.Paragraph(i, "\n".join(para_lines), section, False) + + +def _parse_synopsis(base, synopsis): + """ + >>> _parse_synopsis('/a/b/c', '/a/b/c: "p-r+o++g - foo bar."') ('p-r+o++g', 'foo bar') - ''' - synopsis = synopsis[len(base)+3:-1] - if synopsis[-1] == '.': + """ + synopsis = synopsis[len(base) + 3: -1] + if synopsis[-1] == ".": synopsis = synopsis[:-1] - return SPLITSYNOP.match(synopsis).groups() -class manpage(object): - '''read the man page at path by executing w3mman2html.cgi and find its - synopsis with lexgrog + if not SPLIT_SYNOP.match(synopsis): + return [] + + return SPLIT_SYNOP.match(synopsis).groups() + + +class ManPage: + """read the man page at path by executing `w3mman2html.cgi` and find it's + synopsis with `lexgrog` since some man pages share the same name (different versions), each alias of a man page has a score that's determined in this simple fashion: - name of man page source file is given a score of 10 - all other names found for a particular man page are given a score of 1 - (other names are found by scanning the output of lexgrog) - ''' + (other names are found by scanning the output of `lexgrog`) + """ + def __init__(self, path): self.path = path - self.shortpath = os.path.basename(self.path) - self.name = extractname(self.path) - self.aliases = set([self.name]) + self.short_path = os.path.basename(self.path) + self.name = extract_name(self.path) + self.aliases = {self.name} self.synopsis = None self.paragraphs = None self._text = None def read(self): - '''Read the content from a local manpage file and store it in usable formats - on the class instance.''' - cmd = [config.MAN2HTML, urllib.urlencode({'local' : os.path.abspath(self.path)})] - logger.info('executing %r', ' '.join(cmd)) - self._text = subprocess.check_output(cmd, stderr=devnull, env=ENV) + """Read the content from a local manpage file and store it in usable formats + on the class instance.""" + cmd = [config.MAN2HTML, urllib.parse.urlencode({"local": os.path.abspath(self.path)})] + logger.info("executing %r", " ".join(cmd)) + self._text = "" + + try: + t_proc = subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=300, env=ENV) + + if t_proc.stdout: + self._text = t_proc.stdout + if t_proc.stderr: + logger.error(f"failed to extract text for {self.name} -> w3mman2html.cgi returned: {t_proc.stderr}") + except Exception as error_msg: + logger.error(f"failed to extract text for {self.name} -> error: {error_msg}") + try: - self.synopsis = subprocess.check_output(['lexgrog', self.path], stderr=devnull).rstrip() + self.synopsis = "" + s_proc = subprocess.run( + ["lexgrog", self.path], capture_output=True, text=True, timeout=300 + ) + if s_proc.stdout: + self.synopsis = s_proc.stdout.rstrip() + if s_proc.stderr: + logger.error(f"failed to extract text for {self.name} -> lexgrog returned: {s_proc.stderr}") except subprocess.CalledProcessError: - logger.error('failed to extract synopsis for %s', self.name) + logger.error("failed to extract synopsis for %s", self.name) def parse(self): - self.paragraphs = list(_parsetext(self._text.splitlines()[7:-3])) + self.paragraphs = list(_parse_text(self._text.splitlines()[7:-3])) if not self.paragraphs: - raise errors.EmptyManpage(self.shortpath) + raise errors.EmptyManpage(self.short_path) if self.synopsis: - self.synopsis = [_parsesynopsis(self.path, l) for l in self.synopsis.splitlines()] + self.synopsis = [ + _parse_synopsis(self.path, s_line) for s_line in self.synopsis.splitlines() + ] # figure out aliases from the synopsis d = collections.OrderedDict() for prog, text in self.synopsis: d.setdefault(text, []).append(prog) - text, progs = d.items()[0] + text, progs = list(dict(d).items())[0] self.synopsis = text self.aliases.update(progs) self.aliases.remove(self.name) diff --git a/explainshell/matcher.py b/explainshell/matcher.py index 41cb8649..c0d9cd8e 100644 --- a/explainshell/matcher.py +++ b/explainshell/matcher.py @@ -1,43 +1,50 @@ -import collections, logging, itertools +import collections +import logging +import itertools import bashlex.parser import bashlex.ast -from explainshell import errors, util, helpconstants +from explainshell import errors, help_constants, util -class matchgroup(object): - '''a class to group matchresults together + +class MatchGroup: + """a class to group matchresults together we group all shell results in one group and create a new group for every - command''' + command""" + def __init__(self, name): self.name = name self.results = [] def __repr__(self): - return '' % (self.name, len(self.results)) + return "" % (self.name, len(self.results)) -class matchresult(collections.namedtuple('matchresult', 'start end text match')): + +class MatchResult(collections.namedtuple("MatchResult", "start end text match")): @property def unknown(self): return self.text is None -matchwordexpansion = collections.namedtuple('matchwordexpansion', - 'start end kind') + +match_word_exp = collections.namedtuple("match_word_exp", "start end kind") logger = logging.getLogger(__name__) -class matcher(bashlex.ast.nodevisitor): - '''parse a command line and return a list of matchresults describing + +class Matcher(bashlex.ast.nodevisitor): + """parse a command line and return a list of `MatchResult`s describing each token. - ''' + """ + def __init__(self, s, store): - self.s = s.encode('latin1', 'replace') + self.s = s self.store = store - self._prevoption = self._currentoption = None - self.groups = [matchgroup('shell')] + self._prev_option = self._current_option = None + self.groups = [MatchGroup("shell")] - # a list of matchwordexpansions where expansions happened during word + # a list of `match_word_exp` where expansions happened during word # expansion self.expansions = [] @@ -45,98 +52,109 @@ def __init__(self, s, store): # command is started, we push a tuple with: # - the node that started this group. this is used to find it when # a command ends (see visitnodeend) - # - its matchgroup. new matchresults will be added to it. + # - its `MatchGroup`. new `MatchResult`s will be added to it. # - a word used to end the top-most command. this is used when a flag # starts a new command, e.g. find -exec. - self.groupstack = [(None, self.groups[-1], None)] + self.group_stack = [(None, self.groups[-1], None)] # keep a stack of the currently visited compound command (if/for..) # to provide context when matching reserved words, since for example # the keyword 'done' can appear in a for, while.. - self.compoundstack = [] + self.compound_stack = [] # a set of functions defined in the current input, we will try to match # commands against them so if one refers to defined function, it won't # show up as unknown or be taken from the db self.functions = set() - def _generatecommandgroupname(self): - existing = len([g for g in self.groups if g.name.startswith('command')]) - return 'command%d' % existing + def _generate_cmd_group_name(self): + existing = len([g for g in self.groups if g.name.startswith("command")]) + return f"command{existing}" @property def matches(self): - '''return the list of results from the most recently created group''' - return self.groupstack[-1][1].results + """return the list of results from the most recently created group""" + return self.group_stack[-1][1].results @property - def allmatches(self): + def all_matches(self): return list(itertools.chain.from_iterable(g.results for g in self.groups)) @property - def manpage(self): - group = self.groupstack[-1][1] + def man_page(self): + group = self.group_stack[-1][1] # we do not have a manpage if the top of the stack is the shell group. # this can happen if the first argument is a command substitution # and we're not treating it as a "man page not found" - if group.name != 'shell': + if group.name != "shell": return group.manpage def find_option(self, opt): - self._currentoption = self.manpage.find_option(opt) - logger.debug('looking up option %r, got %r', opt, self._currentoption) - return self._currentoption + self._current_option = self.man_page.find_option(opt) + logger.debug("looking up option %r, got %r", opt, self._current_option) + return self._current_option - def findmanpages(self, prog): - prog = prog.decode('latin1') - logger.info('looking up %r in store', prog) - manpages = self.store.findmanpage(prog) - logger.info('found %r in store, got: %r, using %r', prog, manpages, manpages[0]) - return manpages + def find_man_pages(self, prog): + logger.info("looking up %r in store", prog) + man_pages = self.store.find_man_page(prog) + logger.info("found %r in store, got: %r, using %r", prog, man_pages, man_pages[0]) + return man_pages def unknown(self, token, start, end): - logger.debug('nothing to do with token %r', token) - return matchresult(start, end, None, None) + logger.debug("nothing to do with token %r", token) + return MatchResult(start, end, None, None) def visitreservedword(self, node, word): # first try the compound reserved words helptext = None - if self.compoundstack: - currentcompound = self.compoundstack[-1] - helptext = helpconstants.COMPOUNDRESERVEDWORDS.get(currentcompound, {}).get(word) + if self.compound_stack: + current_compound = self.compound_stack[-1] + helptext = help_constants.COMPOUND_RESERVED_WORDS.get(current_compound, {}).get( + word + ) # try these if we don't have anything specific if not helptext: - helptext = helpconstants.RESERVEDWORDS[word] + helptext = help_constants.RESERVED_WORDS[word] - self.groups[0].results.append(matchresult(node.pos[0], node.pos[1], helptext, None)) + self.groups[0].results.append( + MatchResult(node.pos[0], node.pos[1], helptext, None) + ) def visitoperator(self, node, op): helptext = None - if self.compoundstack: - currentcompound = self.compoundstack[-1] - helptext = helpconstants.COMPOUNDRESERVEDWORDS.get(currentcompound, {}).get(op) + if self.compound_stack: + curr_compound = self.compound_stack[-1] + helptext = help_constants.COMPOUND_RESERVED_WORDS.get(curr_compound, {}).get( + op + ) if not helptext: - helptext = helpconstants.OPERATORS[op] + helptext = help_constants.OPERATORS[op] - self.groups[0].results.append(matchresult(node.pos[0], node.pos[1], helptext, None)) + self.groups[0].results.append( + MatchResult(node.pos[0], node.pos[1], helptext, None) + ) def visitpipe(self, node, pipe): self.groups[0].results.append( - matchresult(node.pos[0], node.pos[1], helpconstants.PIPELINES, None)) + MatchResult(node.pos[0], node.pos[1], help_constants.PIPELINES, None) + ) - def visitredirect(self, node, input, type, output, heredoc): - helptext = [helpconstants.REDIRECTION] + def visitredirect(self, node, input, r_type, output, heredoc): + helptext = [help_constants.REDIRECTION] - if type == '>&' and isinstance(output, int): - type = type[:-1] + if r_type == ">&" and isinstance(output, int): + r_type = r_type[:-1] - if type in helpconstants.REDIRECTION_KIND: - helptext.append(helpconstants.REDIRECTION_KIND[type]) + if r_type in help_constants.REDIRECTION_KIND: + helptext.append(help_constants.REDIRECTION_KIND[r_type]) + + logger.debug(helptext) self.groups[0].results.append( - matchresult(node.pos[0], node.pos[1], '\n\n'.join(helptext), None)) + MatchResult(node.pos[0], node.pos[1], "\n\n".join(helptext), None) + ) # the output might contain a wordnode, visiting it will confuse the # matcher who'll think it's an argument, instead visit the expansions @@ -151,21 +169,26 @@ def visitcommand(self, node, parts): assert parts # look for the first WordNode, which might not be at parts[0] - idxwordnode = bashlex.ast.findfirstkind(parts, 'word') - if idxwordnode == -1: - logger.info('no words found in command (probably contains only redirects)') + idx_word_node = bashlex.ast.findfirstkind(parts, "word") + if idx_word_node == -1: + logger.info("no words found in command (probably contains only redirects)") return - wordnode = parts[idxwordnode] + word_node = parts[idx_word_node] # check if this refers to a previously defined function - if wordnode.word in self.functions: - logger.info('word %r is a function, not trying to match it or its ' - 'arguments', wordnode) - - # first, add a matchresult for the function call - mr = matchresult(wordnode.pos[0], wordnode.pos[1], - helpconstants._functioncall % wordnode.word, None) + if word_node.word in self.functions: + logger.info( + f"word {word_node} is a function, not trying to match it or it's arguments" + ) + + # first, add a MatchResult for the function call + mr = MatchResult( + word_node.pos[0], + word_node.pos[1], + help_constants._function_call % word_node.word, + None, + ) self.matches.append(mr) # this is a bit nasty: if we were to visit the command like we @@ -174,300 +197,352 @@ def visitcommand(self, node, parts): # consider them part of the function call for part in parts: # maybe it's a redirect... - if part.kind != 'word': + if part.kind != "word": self.visit(part) else: # this is an argument to the function - if part is not wordnode: - mr = matchresult(part.pos[0], part.pos[1], - helpconstants._functionarg % wordnode.word, - None) + if part is not word_node: + mr = MatchResult( + part.pos[0], + part.pos[1], + help_constants._functionarg % word_node.word, + None, + ) self.matches.append(mr) # visit any expansions in there - for ppart in part.parts: - self.visit(ppart) + for p_part in part.parts: + self.visit(p_part) # we're done with this commandnode, don't visit its children return False - self.startcommand(node, parts, None) def visitif(self, *args): - self.compoundstack.append('if') + self.compound_stack.append("if") + def visitfor(self, node, parts): - self.compoundstack.append('for') + self.compound_stack.append("for") for part in parts: # don't visit words since they're not part of the current command, # instead consider them part of the for construct - if part.kind == 'word': - mr = matchresult(part.pos[0], part.pos[1], helpconstants._for, None) + if part.kind == "word": + mr = MatchResult(part.pos[0], part.pos[1], help_constants._for, None) self.groups[0].results.append(mr) - # but we do want to visit expanions - for ppart in part.parts: - self.visit(ppart) + # but we do want to visit expansions + for p_part in part.parts: + self.visit(p_part) else: self.visit(part) return False def visitwhile(self, *args): - self.compoundstack.append('while') + self.compound_stack.append("while") + def visituntil(self, *args): - self.compoundstack.append('until') + self.compound_stack.append("until") def visitnodeend(self, node): - if node.kind == 'command': + if node.kind == "command": # it's possible for visitcommand/end to be called without a command # group being pushed if it contains only redirect nodes - if len(self.groupstack) > 1: - logger.info('visitnodeend %r, groups %d', node, len(self.groupstack)) + if len(self.group_stack) > 1: + logger.info("visitnodeend %r, groups %d", node, len(self.group_stack)) - while self.groupstack[-1][0] is not node: - logger.info('popping groups that are a result of nested commands') + while self.group_stack[-1][0] is not node: + logger.info("popping groups that are a result of nested commands") self.endcommand() self.endcommand() - elif node.kind in ('if', 'for', 'while', 'until'): - kind = self.compoundstack.pop() + elif node.kind in ("if", "for", "while", "until"): + kind = self.compound_stack.pop() assert kind == node.kind def startcommand(self, commandnode, parts, endword, addgroup=True): - logger.info('startcommand commandnode=%r parts=%r, endword=%r, addgroup=%s', - commandnode, parts, endword, addgroup) - idxwordnode = bashlex.ast.findfirstkind(parts, 'word') - assert idxwordnode != -1 - - wordnode = parts[idxwordnode] - if wordnode.parts: - logger.info('node %r has parts (it was expanded), no point in looking' - ' up a manpage for it', wordnode) + logger.info( + "startcommand commandnode=%r parts=%r, endword=%r, addgroup=%s", + commandnode, + parts, + endword, + addgroup, + ) + idx_word_node = bashlex.ast.findfirstkind(parts, "word") + assert idx_word_node != -1 + + word_node = parts[idx_word_node] + if word_node.parts: + logger.info( + "node %r has parts (it was expanded), no point in looking" + " up a manpage for it", + word_node, + ) if addgroup: - mg = matchgroup(self._generatecommandgroupname()) + mg = MatchGroup(self._generate_cmd_group_name()) mg.manpage = None mg.suggestions = None self.groups.append(mg) - self.groupstack.append((commandnode, mg, endword)) + self.group_stack.append((commandnode, mg, endword)) return False - startpos, endpos = wordnode.pos + startpos, endpos = word_node.pos try: - mps = self.findmanpages(wordnode.word) + mps = self.find_man_pages(word_node.word) # we consume this node here, pop it from parts so we # don't visit it again as an argument - parts.pop(idxwordnode) - except errors.ProgramDoesNotExist, e: + parts.pop(idx_word_node) + except errors.ProgramDoesNotExist as error_msg: if addgroup: # add a group for this command, we'll mark it as unknown # when visitword is called - logger.info('no manpage found for %r, adding a group for it', - wordnode.word) + logger.info( + f"no manpage found for {word_node.word}, adding a group for it" + ) - mg = matchgroup(self._generatecommandgroupname()) - mg.error = e + mg = MatchGroup(self._generate_cmd_group_name()) + mg.error = error_msg mg.manpage = None mg.suggestions = None self.groups.append(mg) - self.groupstack.append((commandnode, mg, endword)) + self.group_stack.append((commandnode, mg, endword)) return False manpage = mps[0] - idxnextwordnode = bashlex.ast.findfirstkind(parts, 'word') + idx_next_word_node = bashlex.ast.findfirstkind(parts, "word") - # check the next word for a possible multicommand if: + # check the next word for a possible multi_cmd if: # - the matched manpage says so # - we have another word node # - the word node has no expansions in it - if manpage.multicommand and idxnextwordnode != -1 and not parts[idxnextwordnode].parts: - nextwordnode = parts[idxnextwordnode] + if ( + manpage.multi_cmd + and idx_next_word_node != -1 + and not parts[idx_next_word_node].parts + ): + next_word_node = parts[idx_next_word_node] try: - multi = '%s %s' % (wordnode.word, nextwordnode.word) - logger.info('%r is a multicommand, trying to get another token and look up %r', manpage, multi) - mps = self.findmanpages(multi) + multi = f"{word_node.word} {next_word_node.word}" + logger.info( + f"{manpage} is a multi_cmd, trying to get another token and look up {multi}" + ) + mps = self.find_man_pages(multi) manpage = mps[0] # we consume this node here, pop it from parts so we # don't visit it again as an argument - parts.pop(idxnextwordnode) - endpos = nextwordnode.pos[1] + parts.pop(idx_next_word_node) + endpos = next_word_node.pos[1] except errors.ProgramDoesNotExist: - logger.info('no manpage %r for multicommand %r', multi, manpage) + logger.info("no manpage %r for multi_cmd %r", multi, manpage) - # create a new matchgroup for the current command - mg = matchgroup(self._generatecommandgroupname()) + # create a new MatchGroup for the current command + mg = MatchGroup(self._generate_cmd_group_name()) mg.manpage = manpage mg.suggestions = mps[1:] self.groups.append(mg) - self.groupstack.append((commandnode, mg, endword)) + self.group_stack.append((commandnode, mg, endword)) - self.matches.append(matchresult(startpos, endpos, - manpage.synopsis or helpconstants.NOSYNOPSIS, None)) + self.matches.append( + MatchResult( + startpos, endpos, manpage.synopsis or help_constants.NO_SYNOPSIS, None + ) + ) return True def endcommand(self): - '''end the most recently created command group by popping it from the - group stack. groups are created by visitcommand or a nested command''' - assert len(self.groupstack) >= 2, 'groupstack must contain shell and command groups' - g = self.groupstack.pop() - logger.info('ending group %s', g) + """end the most recently created command group by popping it from the + group stack. groups are created by visitcommand or a nested command""" + assert ( + len(self.group_stack) >= 2 + ), "groupstack must contain shell and command groups" + g = self.group_stack.pop() + logger.info("ending group %s", g) def visitcommandsubstitution(self, node, command): kind = self.s[node.pos[0]] - substart = 2 if kind == '$' else 1 + sub_start = 2 if kind == "$" else 1 # start the expansion after the $( or ` - self.expansions.append(matchwordexpansion(node.pos[0] + substart, - node.pos[1] - 1, - 'substitution')) + self.expansions.append( + match_word_exp(node.pos[0] + sub_start, node.pos[1] - 1, "substitution") + ) # do not try to match the child nodes return False def visitprocesssubstitution(self, node, command): # don't include opening <( and closing ) - self.expansions.append(matchwordexpansion(node.pos[0] + 2, - node.pos[1] - 1, - 'substitution')) + self.expansions.append( + match_word_exp(node.pos[0] + 2, node.pos[1] - 1, "substitution") + ) # do not try to match the child nodes return False def visitassignment(self, node, word): - helptext = helpconstants.ASSIGNMENT - self.groups[0].results.append(matchresult(node.pos[0], node.pos[1], helptext, None)) + helptext = help_constants.ASSIGNMENT + self.groups[0].results.append( + MatchResult(node.pos[0], node.pos[1], helptext, None) + ) def visitword(self, node, word): def attemptfuzzy(chars): m = [] - if chars[0] == '-': + if chars[0] == "-": tokens = [chars[0:2]] + list(chars[2:]) - considerarg = True + consider_arg = True else: tokens = list(chars) - considerarg = False + consider_arg = False pos = node.pos[0] - prevoption = None + prev_option = None for i, t in enumerate(tokens): - op = t if t[0] == '-' else '-' + t + op = t if t[0] == "-" else "-" + t option = self.find_option(op) if option: - if considerarg and not m and option.expectsarg: - logger.info('option %r expected an arg, taking the rest too', option) + if consider_arg and not m and option.expects_arg: + logger.info( + "option %r expected an arg, taking the rest too", option + ) # reset the current option if we already took an argument, # this prevents the next word node to also consider itself # as an argument - self._currentoption = None - return [matchresult(pos, pos+len(chars), option.text, None)] + self._current_option = None + return [MatchResult(pos, pos + len(chars), option.text, None)] - mr = matchresult(pos, pos+len(t), option.text, None) + mr = MatchResult(pos, pos + len(t), option.text, None) m.append(mr) # if the previous option expected an argument and we couldn't # match the current token, take the rest as its argument, this # covers a series of short options where the last one has an argument # with no space between it, such as 'xargs -r0n1' - elif considerarg and prevoption and prevoption.expectsarg: + elif consider_arg and prev_option and prev_option.expects_arg: pmr = m[-1] - mr = matchresult(pmr.start, pmr.end+(len(tokens)-i), pmr.text, None) + mr = MatchResult( + pmr.start, pmr.end + (len(tokens) - i), pmr.text, None + ) m[-1] = mr # reset the current option if we already took an argument, # this prevents the next word node to also consider itself # as an argument - self._currentoption = None + self._current_option = None break else: - m.append(self.unknown(t, pos, pos+len(t))) + m.append(self.unknown(t, pos, pos + len(t))) pos += len(t) - prevoption = option + prev_option = option return m def _visitword(node, word): - if not self.manpage: - logger.info('inside an unknown command, giving up on %r', word) + if not self.man_page: + logger.info("inside an unknown command, giving up on %r", word) self.matches.append(self.unknown(word, node.pos[0], node.pos[1])) return - logger.info('trying to match token: %r', word) + logger.info("trying to match token: %r", word) - self._prevoption = self._currentoption - if word.startswith('--'): - word = word.split('=', 1)[0] + self._prev_option = self._current_option + if word.startswith("--"): + word = word.split("=", 1)[0] option = self.find_option(word) if option: - logger.info('found an exact match for %r: %r', word, option) - mr = matchresult(node.pos[0], node.pos[1], option.text, None) + logger.info("found an exact match for %r: %r", word, option) + mr = MatchResult(node.pos[0], node.pos[1], option.text, None) self.matches.append(mr) # check if we splitted the word just above, if we did then reset # the current option so the next word doesn't consider itself # an argument if word != node.word: - self._currentoption = None + self._current_option = None else: word = node.word # check if we're inside a nested command and this word marks the end - if isinstance(self.groupstack[-1][-1], list) and word in self.groupstack[-1][-1]: - logger.info('token %r ends current nested command', word) + if ( + isinstance(self.group_stack[-1][-1], list) + and word in self.group_stack[-1][-1] + ): + logger.info("token %r ends current nested command", word) self.endcommand() - mr = matchresult(node.pos[0], node.pos[1], self.matches[-1].text, None) + mr = MatchResult( + node.pos[0], node.pos[1], self.matches[-1].text, None + ) self.matches.append(mr) - elif word != '-' and word.startswith('-') and not word.startswith('--'): - logger.debug('looks like a short option') + elif word != "-" and word.startswith("-") and not word.startswith("--"): + logger.debug("looks like a short option") if len(word) > 2: logger.info("trying to split it up") self.matches.extend(attemptfuzzy(word)) else: - self.matches.append(self.unknown(word, node.pos[0], node.pos[1])) - elif self._prevoption and self._prevoption.expectsarg: - logger.info("previous option possibly expected an arg, and we can't" - " find an option to match the current token, assuming it's an arg") - ea = self._prevoption.expectsarg - possibleargs = ea if isinstance(ea, list) else [] + self.matches.append( + self.unknown(word, node.pos[0], node.pos[1]) + ) + elif self._prev_option and self._prev_option.expects_arg: + logger.info( + "previous option possibly expected an arg, and we can't" + " find an option to match the current token, assuming it's an arg" + ) + ea = self._prev_option.expects_arg + possible_args = ea if isinstance(ea, list) else [] take = True - if possibleargs and word not in possibleargs: + if possible_args and word not in possible_args: take = False - logger.info('token %r not in list of possible args %r for %r', - word, possibleargs, self._prevoption) + logger.info( + "token %r not in list of possible args %r for %r", + word, + possible_args, + self._prev_option, + ) if take: - if self._prevoption.nestedcommand: - logger.info('option %r can nest commands', self._prevoption) - if self.startcommand(None, [node], self._prevoption.nestedcommand, addgroup=False): - self._currentoption = None + if self._prev_option.nested_cmd: + logger.info("option %r can nest commands", self._prev_option) + if self.startcommand( + None, + [node], + self._prev_option.nested_cmd, + addgroup=False, + ): + self._current_option = None return pmr = self.matches[-1] - mr = matchresult(pmr.start, node.pos[1], pmr.text, None) + mr = MatchResult(pmr.start, node.pos[1], pmr.text, None) self.matches[-1] = mr else: - self.matches.append(self.unknown(word, node.pos[0], node.pos[1])) + self.matches.append( + self.unknown(word, node.pos[0], node.pos[1]) + ) else: - if self.manpage.partialmatch: - logger.info('attemping to do a partial match') + if self.man_page.partial_match: + logger.info("attempting to do a partial match") m = attemptfuzzy(word) if not any(mm.unknown for mm in m): - logger.info('found a match for everything, taking it') + logger.info("found a match for everything, taking it") self.matches.extend(m) return - if self.manpage.arguments: - if self.manpage.nestedcommand: - logger.info('manpage %r can nest commands', self.manpage) - if self.startcommand(None, [node], self.manpage.nestedcommand, addgroup=False): - self._currentoption = None + if self.man_page.arguments: + if self.man_page.nested_cmd: + logger.info("manpage %r can nest commands", self.man_page) + if self.startcommand( + None, [node], self.man_page.nested_cmd, addgroup=False + ): + self._current_option = None return - d = self.manpage.arguments + d = self.man_page.arguments k = list(d.keys())[0] - logger.info('got arguments, using %r', k) + logger.info("got arguments, using %r", k) text = d[k] - mr = matchresult(node.pos[0], node.pos[1], text, None) + mr = MatchResult(node.pos[0], node.pos[1], text, None) self.matches.append(mr) return @@ -481,8 +556,12 @@ def visitfunction(self, node, name, body, parts): def _iscompoundopenclosecurly(compound): first, last = compound.list[0], compound.list[-1] - if (first.kind == 'reservedword' and last.kind == 'reservedword' and - first.word == '{' and last.word == '}'): + if ( + first.kind == "reservedword" + and last.kind == "reservedword" + and first.word == "{" + and last.word == "}" + ): return True # if the compound command we have there is { }, let's include the @@ -491,26 +570,32 @@ def _iscompoundopenclosecurly(compound): # context if _iscompoundopenclosecurly(body): # create a matchresult until after the first { - mr = matchresult(node.pos[0], body.list[0].pos[1], - helpconstants._function, None) + mr = MatchResult( + node.pos[0], body.list[0].pos[1], help_constants._function, None + ) self.groups[0].results.append(mr) # create a matchresult for the closing } - mr = matchresult(body.list[-1].pos[0], body.list[-1].pos[1], - helpconstants._function, None) + mr = MatchResult( + body.list[-1].pos[0], + body.list[-1].pos[1], + help_constants._function, + None, + ) self.groups[0].results.append(mr) # visit anything in between the { } for part in body.list[1:-1]: self.visit(part) else: - beforebody = bashlex.ast.findfirstkind(parts, 'compound') - 1 + beforebody = bashlex.ast.findfirstkind(parts, "compound") - 1 assert beforebody > 0 beforebody = parts[beforebody] # create a matchresult ending at the node before body - mr = matchresult(node.pos[0], beforebody.pos[1], - helpconstants._function, None) + mr = MatchResult( + node.pos[0], beforebody.pos[1], help_constants._function, None + ) self.groups[0].results.append(mr) self.visit(body) @@ -518,79 +603,93 @@ def _iscompoundopenclosecurly(compound): return False def visittilde(self, node, value): - self.expansions.append(matchwordexpansion(node.pos[0], node.pos[1], - 'tilde')) + self.expansions.append(match_word_exp(node.pos[0], node.pos[1], "tilde")) def visitparameter(self, node, value): try: int(value) - kind = 'digits' + kind = "digits" except ValueError: - kind = helpconstants.parameters.get(value, 'param') + kind = help_constants.parameters.get(value, "param") - self.expansions.append(matchwordexpansion(node.pos[0], node.pos[1], - 'parameter-%s' % kind)) + self.expansions.append( + match_word_exp(node.pos[0], node.pos[1], f"parameter-{kind}") + ) def match(self): - logger.info('matching string %r', self.s) + if isinstance(self.s, bytes): + self.s = self.s.decode("utf-8") + logger.info(f"matching string {self.s}") # limit recursive parsing to a depth of 1 - self.ast = bashlex.parser.parsesingle(self.s, expansionlimit=1, - strictmode=False) + self.ast = bashlex.parser.parsesingle( + self.s, expansionlimit=1, strictmode=False + ) if self.ast: self.visit(self.ast) - assert len(self.groupstack) == 1, 'groupstack should contain only shell group after matching' + assert ( + len(self.group_stack) == 1 + ), "groupstack should contain only shell group after matching" # if we only have one command in there and no shell results/expansions, # reraise the original exception - if (len(self.groups) == 2 and not self.groups[0].results and - self.groups[1].manpage is None and not self.expansions): + if ( + len(self.groups) == 2 + and not self.groups[0].results + and self.groups[1].manpage is None + and not self.expansions + ): raise self.groups[1].error else: - logger.warn('no AST generated for %r', self.s) - - def debugmatch(): - s = '\n'.join(['%d) %r = %r' % (i, self.s[m.start:m.end], m.text) for i, m in enumerate(self.allmatches)]) + logger.warning("no AST generated for %r", self.s) + + def debug_match(): + s = "\n".join( + [ + f"{i}) {self.s[m.start: m.end]} = {m.text}" + for i, m in enumerate(self.all_matches) + ] + ) return s - self._markunparsedunknown() + self._mark_unparsed_unknown() - # fix each matchgroup seperately + # fix each MatchGroup separately for group in self.groups: if group.results: - if getattr(group, 'manpage', None): + if getattr(group, "manpage", None): # ensure that the program part isn't unknown (i.e. it has # something as its synopsis) assert not group.results[0].unknown - group.results = self._mergeadjacent(group.results) + group.results = self._merge_adjacent(group.results) - # add matchresult.match to existing matches + # add MatchResult.match to existing matches for i, m in enumerate(group.results): - assert m.end <= len(self.s), '%d %d' % (m.end, len(self.s)) + assert m.end <= len(self.s), f"{m.end} {len(self.s)}" - portion = self.s[m.start:m.end].decode('latin1') - group.results[i] = matchresult(m.start, m.end, m.text, portion) + portion = self.s[m.start: m.end] + group.results[i] = MatchResult(m.start, m.end, m.text, portion) - logger.debug('%r matches:\n%s', self.s, debugmatch()) + logger.debug("%r matches:\n%s", self.s, debug_match()) # not strictly needed, but doesn't hurt self.expansions.sort() return self.groups - def _markunparsedunknown(self): - '''the parser may leave a remainder at the end of the string if it doesn't - match any of the rules, mark them as unknowns''' - parsed = [False]*len(self.s) + def _mark_unparsed_unknown(self): + """the parser may leave a remainder at the end of the string if it doesn't + match any of the rules, mark them as unknowns""" + parsed = [False] * len(self.s) # go over all existing matches to see if we've covered the # current position - for start, end, _, _ in self.allmatches: + for start, end, _, _ in self.all_matches: for i in range(start, end): parsed[i] = True - for i in range(len(parsed)): + for i, parsed_i in enumerate(parsed): c = self.s[i] # whitespace is always 'unparsed' if c.isspace(): @@ -598,43 +697,45 @@ def _markunparsedunknown(self): # the parser ignores comments but we can use a trick to see if this # starts a comment and is beyond the ending index of the parsed - # portion of the inpnut - if (not self.ast or i > self.ast.pos[1]) and c == '#': - comment = matchresult(i, len(parsed), helpconstants.COMMENT, None) + # portion of the input + if (not self.ast or i > self.ast.pos[1]) and c == "#": + comment = MatchResult(i, len(parsed), help_constants.COMMENT, None) self.groups[0].results.append(comment) break if not parsed[i]: # add unparsed results to the 'shell' group - self.groups[0].results.append(self.unknown(c, i, i+1)) + self.groups[0].results.append(self.unknown(c, i, i + 1)) # there are no overlaps, so sorting by the start is enough self.groups[0].results.sort(key=lambda mr: mr.start) - def _resultindex(self): - '''return a mapping of matchresults to their index among all - matches, sorted by the start position of the matchresult''' + def _result_index(self): + """return a mapping of `MatchResult`s to their index among all + matches, sorted by the start position of the `MatchResult`""" d = {} i = 0 - for result in sorted(self.allmatches, key=lambda mr: mr.start): + for result in sorted(self.all_matches, key=lambda mr: mr.start): d[result] = i i += 1 return d - def _mergeadjacent(self, matches): + def _merge_adjacent(self, matches): merged = [] - resultindex = self._resultindex() - sametext = itertools.groupby(matches, lambda m: m.text) - for text, ll in sametext: - for l in util.groupcontinuous(ll, key=lambda m: resultindex[m]): - if len(l) == 1: - merged.append(l[0]) + result_index = self._result_index() + same_text = itertools.groupby(matches, lambda m: m.text) + for text, ll in same_text: + for l_group in util.group_continuous(ll, key=lambda m: result_index[m]): + l_group = list(l_group) + + if len(l_group) == 1: + merged.append(l_group[0]) else: - start = l[0].start - end = l[-1].end - endindex = resultindex[l[-1]] - for mr in l: - del resultindex[mr] - merged.append(matchresult(start, end, text, None)) - resultindex[merged[-1]] = endindex + start = l_group[0].start + end = l_group[-1].end + end_index = result_index[l_group[-1]] + for mr in l_group: + del result_index[mr] + merged.append(MatchResult(start, end, text, None)) + result_index[merged[-1]] = end_index return merged diff --git a/explainshell/options.py b/explainshell/options.py index 7abdd07f..ae3ba5b4 100644 --- a/explainshell/options.py +++ b/explainshell/options.py @@ -1,26 +1,31 @@ -import re, collections, logging +import collections +import logging +import re from explainshell import store -tokenstate = collections.namedtuple('tokenstate', 'startpos endpos token') +token_state = collections.namedtuple("token_state", "startpos endpos token") logger = logging.getLogger(__name__) + def extract(manpage): - '''extract options from all paragraphs that have been classified as containing - options''' + """extract options from all paragraphs that have been classified as containing + options""" for i, p in enumerate(manpage.paragraphs): if p.is_option: - s, l = extract_option(p.cleantext()) - if s or l: - expectsarg = any(x.expectsarg for x in s + l) + s, ln = extract_option(p.clean_text()) + if s or ln: + expects_arg = any(x.expects_arg for x in s + ln) s = [x.flag for x in s] - l = [x.flag for x in l] - manpage.paragraphs[i] = store.option(p, s, l, expectsarg) + ln = [x.flag for x in ln] + manpage.paragraphs[i] = store.Option(p, s, ln, expects_arg) else: logger.error("no options could be extracted from paragraph %r", p) -opt_regex = re.compile(r''' + +opt_regex = re.compile( + r""" (?P--?(?:\?|\#|(?:\w+-)*\w+)) # option starts with - or -- and can have - in the middle but not at the end, also allow '-?' (?: (?:\s?(=)?\s?) # -a= @@ -39,30 +44,37 @@ def extract(manpage): ) (?(argoptional)(?P[\]>])) # read closing ] or > if we have an arg )? # the whole arg thing is optional - (?P,\s*|\s+|\Z|/|\|)''', re.X) # read any trailing whitespace or the end of the string + (?P,\s*|\s+|\Z|/|\|)""", + re.X, +) # read any trailing whitespace or the end of the string -opt2_regex = re.compile(r''' +opt2_regex = re.compile( + r""" (?P\w+) # an option that doesn't start with any of the usual characters, e.g. options from 'dd' like bs=BYTES (?: (?:\s*=\s*) # an optional arg, e.g. bs=BYTES (?P\w+) ) - (?:,\s*|\s+|\Z)''', re.X) # end with , or whitespace or the end of the string + (?:,\s*|\s+|\Z)""", + re.X, +) # end with , or whitespace or the end of the string + def _flag(s, pos=0): - ''' + """ >>> _flag('a=b').groupdict() {'opt': 'a', 'arg': 'b'} >>> bool(_flag('---c-d')) False >>> bool(_flag('foobar')) False - ''' + """ m = opt2_regex.match(s, pos) return m + def _option(s, pos=0): - ''' + """ >>> bool(_option('-')) False >>> bool(_option('--')) @@ -75,19 +87,19 @@ def _option(s, pos=0): False >>> bool(_option('--a-b-')) False - >>> sorted(_option('-a').groupdict().iteritems()) + >>> sorted(_option('-a').groupdict().items()) [('arg', None), ('argoptional', None), ('argoptionalc', None), ('ending', ''), ('opt', '-a')] - >>> sorted(_option('--a').groupdict().iteritems()) + >>> sorted(_option('--a').groupdict().items()) [('arg', None), ('argoptional', None), ('argoptionalc', None), ('ending', ''), ('opt', '--a')] - >>> sorted(_option('-a').groupdict().iteritems()) + >>> sorted(_option('-a').groupdict().items()) [('arg', 'b'), ('argoptional', '<'), ('argoptionalc', '>'), ('ending', ''), ('opt', '-a')] - >>> sorted(_option('-a=[foo]').groupdict().iteritems()) + >>> sorted(_option('-a=[foo]').groupdict().items()) [('arg', 'foo'), ('argoptional', '['), ('argoptionalc', ']'), ('ending', ''), ('opt', '-a')] - >>> sorted(_option('-a=').groupdict().iteritems()) + >>> sorted(_option('-a=').groupdict().items()) [('arg', 'foo'), ('argoptional', '<'), ('argoptionalc', '>'), ('ending', ''), ('opt', '-a')] - >>> sorted(_option('-a=').groupdict().iteritems()) + >>> sorted(_option('-a=').groupdict().items()) [('arg', 'foo bar'), ('argoptional', '<'), ('argoptionalc', '>'), ('ending', ''), ('opt', '-a')] - >>> sorted(_option('-a=foo').groupdict().iteritems()) + >>> sorted(_option('-a=foo').groupdict().items()) [('arg', 'foo'), ('argoptional', None), ('argoptionalc', None), ('ending', ''), ('opt', '-a')] >>> bool(_option('-a=[foo>')) False @@ -95,87 +107,91 @@ def _option(s, pos=0): False >>> _option('-a foo').end(0) 3 - ''' + """ m = opt_regex.match(s, pos) if m: - if m.group('argoptional'): - c = m.group('argoptional') - cc = m.group('argoptionalc') - if (c == '[' and cc == ']') or (c == '<' and cc == '>'): + if m.group("argoptional"): + c = m.group("argoptional") + cc = m.group("argoptionalc") + if (c == "[" and cc == "]") or (c == "<" and cc == ">"): return m else: return return m -_eatbetweenregex = re.compile(r'\s*(?:or|,|\|)\s*') -def _eatbetween(s, pos): - ''' - >>> _eatbetween('foo', 0) +_eat_between_regex = re.compile(r"\s*(?:or|,|\|)\s*") + + +def _eat_between(s, pos): + """ + >>> _eat_between('foo', 0) 0 - >>> _eatbetween('a, b', 1) + >>> _eat_between('a, b', 1) 3 - >>> _eatbetween('a|b', 1) + >>> _eat_between('a|b', 1) 2 - >>> _eatbetween('a or b', 1) + >>> _eat_between('a or b', 1) 5 - ''' - m = _eatbetweenregex.match(s, pos) + """ + m = _eat_between_regex.match(s, pos) if m: return m.end(0) return pos -class extractedoption(collections.namedtuple('extractedoption', 'flag expectsarg')): + +class ExtractedOption(collections.namedtuple("ExtractedOption", "flag expects_arg")): def __eq__(self, other): if isinstance(other, str): return self.flag == other else: - return super(extractedoption, self).__eq__(other) + return super().__eq__(other) def __str__(self): return self.flag + def extract_option(txt): - '''this is where the magic is (suppose) to happen. try and find options - using a regex''' - startpos = currpos = len(txt) - len(txt.lstrip()) + """this is where the magic is (suppose) to happen. try and find options + using a regex""" + start_pos = curr_pos = len(txt) - len(txt.lstrip()) short, long = [], [] - m = _option(txt, currpos) + m = _option(txt, curr_pos) # keep going as long as options are found while m: - s = m.group('opt') - po = extractedoption(s, m.group('arg')) - if s.startswith('--'): + s = m.group("opt") + po = ExtractedOption(s, m.group("arg")) + if s.startswith("--"): long.append(po) else: short.append(po) - currpos = m.end(0) - currpos = _eatbetween(txt, currpos) - if m.group('ending') == '|': - m = _option(txt, currpos) + curr_pos = m.end(0) + curr_pos = _eat_between(txt, curr_pos) + if m.group("ending") == "|": + m = _option(txt, curr_pos) if not m: - startpos = currpos - while currpos < len(txt) and not txt[currpos].isspace(): - if txt[currpos] == '|': - short.append(extractedoption(txt[startpos:currpos], None)) - startpos = currpos - currpos += 1 - leftover = txt[startpos:currpos] + start_pos = curr_pos + while curr_pos < len(txt) and not txt[curr_pos].isspace(): + if txt[curr_pos] == "|": + short.append(ExtractedOption(txt[start_pos:curr_pos], None)) + start_pos = curr_pos + curr_pos += 1 + leftover = txt[start_pos:curr_pos] if leftover: - short.append(extractedoption(leftover, None)) + short.append(ExtractedOption(leftover, None)) else: - m = _option(txt, currpos) + m = _option(txt, curr_pos) - if currpos == startpos: - m = _flag(txt, currpos) + if curr_pos == start_pos: + m = _flag(txt, curr_pos) while m: - s = m.group('opt') - po = extractedoption(s, m.group('arg')) + s = m.group("opt") + po = ExtractedOption(s, m.group("arg")) long.append(po) - currpos = m.end(0) - currpos = _eatbetween(txt, currpos) - m = _flag(txt, currpos) + curr_pos = m.end(0) + curr_pos = _eat_between(txt, curr_pos) + m = _flag(txt, curr_pos) return short, long diff --git a/explainshell/store.py b/explainshell/store.py index 6cadaf0b..4e7a3247 100644 --- a/explainshell/store.py +++ b/explainshell/store.py @@ -1,74 +1,105 @@ -'''data objects to save processed man pages to mongodb''' -import pymongo, collections, re, logging +""" +data objects to save processed man pages to mongodb +""" -from explainshell import errors, util, helpconstants, config +import collections +import re +import logging + +# from pprint import pprint + +import pymongo +from bson import ObjectId + +from explainshell import errors, help_constants, util, config logger = logging.getLogger(__name__) -class classifiermanpage(collections.namedtuple('classifiermanpage', 'name paragraphs')): - '''a man page that had its paragraphs manually tagged as containing options - or not''' + +class ClassifierManpage(collections.namedtuple("ClassifierManpage", "name paragraphs")): + """a man page that had its paragraphs manually tagged as containing options + or not""" + @staticmethod def from_store(d): - m = classifiermanpage(d['name'], [paragraph.from_store(p) for p in d['paragraphs']]) + m = ClassifierManpage( + d["name"], [Paragraph.from_store(p) for p in d["paragraphs"]] + ) return m def to_store(self): - return {'name' : self.name, - 'paragraphs' : [p.to_store() for p in self.paragraphs]} + return { + "name": self.name, + "paragraphs": [p.to_store() for p in self.paragraphs], + } + + +class Paragraph: + """a paragraph inside a man page is text that ends with two new lines""" -class paragraph(object): - '''a paragraph inside a man page is text that ends with two new lines''' def __init__(self, idx, text, section, is_option): self.idx = idx self.text = text self.section = section self.is_option = is_option - def cleantext(self): - t = re.sub(r'<[^>]+>', '', self.text) - t = re.sub('<', '<', t) - t = re.sub('>', '>', t) + if not isinstance(self.text, str): + self.text = self.text.decode("utf-8") + + def clean_text(self): + t = re.sub(r"<[^>]+>", "", self.text) + t = re.sub("<", "<", t) + t = re.sub(">", ">", t) return t @staticmethod def from_store(d): - p = paragraph(d.get('idx', 0), d['text'].encode('utf8'), d['section'], d['is_option']) + p = Paragraph( + d.get("idx", 0), d["text"].encode("utf8"), d["section"], d["is_option"] + ) return p def to_store(self): - return {'idx' : self.idx, 'text' : self.text, 'section' : self.section, - 'is_option' : self.is_option} + return { + "idx": self.idx, + "text": self.text, + "section": self.section, + "is_option": self.is_option, + } def __repr__(self): - t = self.cleantext() - t = t[:min(20, t.find('\n'))].lstrip() - return '' % (self.idx, self.section, t) + t = self.clean_text() + t = t[: min(20, t.find("\n"))].lstrip() + return f"" def __eq__(self, other): if not other: return False return self.__dict__ == other.__dict__ -class option(paragraph): - '''a paragraph that contains extracted options + +class Option(Paragraph): + """a paragraph that contains extracted options short - a list of short options (-a, -b, ..) long - a list of long options (--a, --b) - expectsarg - specifies if one of the short/long options expects an additional argument + expects_arg - specifies if one of the short/long options expects an additional argument argument - specifies if to consider this as positional arguments - nestedcommand - specifies if the arguments to this option can start a nested command - ''' - def __init__(self, p, short, long, expectsarg, argument=None, nestedcommand=False): - paragraph.__init__(self, p.idx, p.text, p.section, p.is_option) + nested_cmd - specifies if the arguments to this option can start a nested command + """ + + def __init__(self, p, short, long, expects_arg, argument=None, nested_cmd=False): + Paragraph.__init__(self, p.idx, p.text, p.section, p.is_option) self.short = short self.long = long self._opts = self.short + self.long self.argument = argument - self.expectsarg = expectsarg - self.nestedcommand = nestedcommand - if nestedcommand: - assert expectsarg, 'an option that can nest commands must expect an argument' + self.expects_arg = expects_arg + self.nested_cmd = nested_cmd + if nested_cmd: + assert ( + expects_arg + ), "an option that can nest commands must expect an argument" @property def opts(self): @@ -76,29 +107,38 @@ def opts(self): @classmethod def from_store(cls, d): - p = paragraph.from_store(d) + p = Paragraph.from_store(d) - return cls(p, d['short'], d['long'], d['expectsarg'], d['argument'], - d.get('nestedcommand')) + # logger.debug(str(vars(d))) + + return cls( + p, + d["short"], + d["long"], + d["expectsarg"], + d["argument"], + d.get("nestedcmd"), + ) def to_store(self): - d = paragraph.to_store(self) - assert d['is_option'] - d['short'] = self.short - d['long'] = self.long - d['expectsarg'] = self.expectsarg - d['argument'] = self.argument - d['nestedcommand'] = self.nestedcommand + d = Paragraph.to_store(self) + assert d["is_option"] + d["short"] = self.short + d["long"] = self.long + d["expectsarg"] = self.expects_arg + d["argument"] = self.argument + d["nestedcmd"] = self.nested_cmd return d def __str__(self): - return '(%s)' % ', '.join([str(x) for x in self.opts]) + return "(" + ", ".join([str(x) for x in self.opts]) + ")" def __repr__(self): - return '' % (self.idx, str(self)) + return f" git commit updated - whether this man page was manually updated - nestedcommand - specifies if positional arguments to this program can start a nested command, + nested_cmd - specifies if positional arguments to this program can start a nested command, e.g. sudo, xargs - ''' - def __init__(self, source, name, synopsis, paragraphs, aliases, - partialmatch=False, multicommand=False, updated=False, - nestedcommand=False): + """ + + def __init__( + self, + source, + name, + synopsis, + paragraphs, + aliases, + partial_match=False, + multi_cmd=False, + updated=False, + nested_cmd=False, + ): self.source = source self.name = name self.synopsis = synopsis self.paragraphs = paragraphs self.aliases = aliases - self.partialmatch = partialmatch - self.multicommand = multicommand + self.partial_match = partial_match + self.multi_cmd = multi_cmd self.updated = updated - self.nestedcommand = nestedcommand + self.nested_cmd = nested_cmd - def removeoption(self, idx): + def remove_option(self, idx): for i, p in self.paragraphs: if p.idx == idx: - if not isinstance(p, option): - raise ValueError("paragraph %d isn't an option" % idx) - self.paragraphs[i] = paragraph(p.idx, p.text, p.section, False) + if not isinstance(p, Option): + raise ValueError(f"paragraph {idx} isn't an option") + self.paragraphs[i] = Paragraph(p.idx, p.text, p.section, False) return - raise ValueError('idx %d not found' % idx) + raise ValueError(f"idx {idx} not found") @property - def namesection(self): - name, section = util.namesection(self.source[:-3]) - return '%s(%s)' % (name, section) + def name_section(self): + name, section = util.name_section(self.source[:-3]) + return f"{name}({section})" @property def section(self): - name, section = util.namesection(self.source[:-3]) + name, section = util.name_section(self.source[:-3]) return section @property def options(self): - return [p for p in self.paragraphs if isinstance(p, option)] + return [p for p in self.paragraphs if isinstance(p, Option)] @property def arguments(self): @@ -159,69 +209,103 @@ def arguments(self): groups.setdefault(opt.argument, []).append(opt) # merge all the paragraphs under the same argument to a single string - for k, l in groups.iteritems(): - groups[k] = '\n\n'.join([p.text for p in l]) + for k, ln in groups.items(): + groups[k] = "\n\n".join([p.text for p in ln]) return groups @property - def synopsisnoname(self): - return re.match(r'[\w|-]+ - (.*)$', self.synopsis).group(1) + def synopsis_no_name(self): + return re.match(r"[\w|-]+ - (.*)$", self.synopsis).group(1) def find_option(self, flag): - for option in self.options: - for o in option.opts: + for o_tmp in self.options: + for o in o_tmp.opts: if o == flag: - return option + return o_tmp def to_store(self): - return {'source' : self.source, 'name' : self.name, 'synopsis' : self.synopsis, - 'paragraphs' : [p.to_store() for p in self.paragraphs], - 'aliases' : self.aliases, 'partialmatch' : self.partialmatch, - 'multicommand' : self.multicommand, 'updated' : self.updated, - 'nestedcommand' : self.nestedcommand} + return { + "source": self.source, + "name": self.name, + "synopsis": self.synopsis, + "paragraphs": [p.to_store() for p in self.paragraphs], + "aliases": self.aliases, + "partial_match": self.partial_match, + "multi_cmd": self.multi_cmd, + "updated": self.updated, + "nested_cmd": self.nested_cmd, + } @staticmethod def from_store(d): paragraphs = [] - for pd in d.get('paragraphs', []): - pp = paragraph.from_store(pd) - if pp.is_option == True and 'short' in pd: - pp = option.from_store(pd) + for pd in d.get("paragraphs", []): + pp = Paragraph.from_store(pd) + if pp.is_option is True and "short" in pd: + pp = Option.from_store(pd) paragraphs.append(pp) - synopsis = d['synopsis'] + synopsis = d["synopsis"] if synopsis: - synopsis = synopsis.encode('utf8') + synopsis = synopsis.encode("utf8") else: - synopsis = helpconstants.NOSYNOPSIS - - return manpage(d['source'], d['name'], synopsis, paragraphs, - [tuple(x) for x in d['aliases']], d['partialmatch'], - d['multicommand'], d['updated'], d.get('nestedcommand')) + synopsis = help_constants.NO_SYNOPSIS + + partial_match = None + if "partialmatch" in d: + partial_match = d["partialmatch"] + elif "partial_match" in d: + partial_match = d["partial_match"] + + multi_cmd = None + if "multicommand" in d: + multi_cmd = d["multicommand"] + elif "multi_cmd" in d: + multi_cmd = d["multi_cmd"] + + nested_cmd = None + if "nestedcmd" in d: + nested_cmd = d["nestedcmd"] + elif "nested_cmd" in d: + nested_cmd = d["nested_cmd"] + + return ManPage( + d["source"], + d["name"], + synopsis, + paragraphs, + [tuple(x) for x in d["aliases"]], + partial_match, + multi_cmd, + d["updated"], + nested_cmd, + ) @staticmethod def from_store_name_only(name, source): - return manpage(source, name, None, [], [], None, None, None) + return ManPage(source, name, None, [], [], None, None, None) def __repr__(self): - return '' % (self.name, self.section, len(self.options)) + return f"" -class store(object): - '''read/write processed man pages from mongodb + +class Store: + """read/write processed man pages from mongodb we use three collections: 1) classifier - contains manually tagged paragraphs from man pages 2) manpage - contains a processed man page 3) mapping - contains (name, manpageid, score) tuples - ''' - def __init__(self, db='explainshell', host=config.MONGO_URI): - logger.info('creating store, db = %r, host = %r', db, host) + """ + + def __init__(self, db="explainshell", host=config.MONGO_URI): + logger.info("creating store, db = %r, host = %r", db, host) self.connection = pymongo.MongoClient(host) self.db = self.connection[db] - self.classifier = self.db['classifier'] - self.manpage = self.db['manpage'] - self.mapping = self.db['mapping'] + self.classifier = self.db["classifier"] + self.manpage = self.db["manpage"] + self.mapping = self.db["mapping"] def close(self): self.connection.disconnect() @@ -231,169 +315,205 @@ def drop(self, confirm=False): if not confirm: return - logger.info('dropping mapping, manpage, collections') + logger.info("dropping mapping, manpage, collections") self.mapping.drop() self.manpage.drop() - def trainingset(self): + def training_set(self): for d in self.classifier.find(): - yield classifiermanpage.from_store(d) + yield ClassifierManpage.from_store(d) def __contains__(self, name): - c = self.mapping.find({'src' : name}).count() + c = self.mapping.count_documents({"src": name}) return c > 0 def __iter__(self): for d in self.manpage.find(): - yield manpage.from_store(d) + yield ManPage.from_store(d) - def findmanpage(self, name): - '''find a man page by its name, everything following the last dot (.) in name, + def find_man_page(self, name): + """find a man page by its name, everything following the last dot (.) in name, is taken as the section of the man page we return the man page found with the highest score, and a list of suggestions that also matched the given name (only the first item - is prepopulated with the option data)''' - if name.endswith('.gz'): - logger.info('name ends with .gz, looking up an exact match by source') - d = self.manpage.find_one({'source':name}) + is prepopulated with the option data)""" + if name.endswith(".gz"): + logger.info("name ends with .gz, looking up an exact match by source") + d = self.manpage.find_one({"source": name}) if not d: raise errors.ProgramDoesNotExist(name) - m = manpage.from_store(d) - logger.info('returning %s', m) + m = ManPage.from_store(d) + logger.info("returning %s", m) return [m] section = None - origname = name + orig_name = name # don't try to look for a section if it's . (source) - if name != '.': - splitted = name.rsplit('.', 1) + if name != ".": + splitted = name.rsplit(".", 1) name = splitted[0] if len(splitted) > 1: section = splitted[1] - logger.info('looking up manpage in mapping with src %r', name) - cursor = self.mapping.find({'src' : name}) - count = cursor.count() - if not count: + logger.info("looking up manpage in mapping with src %r", name) + cursor = list(self.mapping.find({"src": name})) + + count = len(cursor) + if count == 0: + logger.debug(f"count is {count}") raise errors.ProgramDoesNotExist(name) - dsts = dict(((d['dst'], d['score']) for d in cursor)) - cursor = self.manpage.find({'_id' : {'$in' : list(dsts.keys())}}, {'name' : 1, 'source' : 1}) - if cursor.count() != len(dsts): - logger.error('one of %r mappings is missing in manpage collection ' - '(%d mappings, %d found)', dsts, len(dsts), cursor.count()) - results = [(d.pop('_id'), manpage.from_store_name_only(**d)) for d in cursor] + dsts = {d["dst"]: d["score"] for d in cursor} + cursor = list( + self.manpage.find( + {"_id": {"$in": list(dsts.keys())}}, {"name": 1, "source": 1} + ) + ) + if len(list(cursor)) != len(dsts): + logger.error( + "one of %r mappings is missing in manpage collection " + "(%d mappings, %d found)", + dsts, + len(dsts), + len(cursor), + ) + results = [(d.pop("_id"), ManPage.from_store_name_only(**d)) for d in cursor] results.sort(key=lambda x: dsts.get(x[0], 0), reverse=True) - logger.info('got %s', results) + logger.info("got %s", results) if section is not None: if len(results) > 1: - results.sort(key=lambda (oid, m): m.section == section, reverse=True) - logger.info(r'sorting %r so %s is first', results, section) - if not results[0][1].section == section: - raise errors.ProgramDoesNotExist(origname) - results.extend(self._discovermanpagesuggestions(results[0][0], results)) + results.sort( + key=lambda oid_m: oid_m[1].section == section, reverse=True + ) + logger.info(r"sorting %r so %s is first", results, section) + if results[0][1].section != section: + raise errors.ProgramDoesNotExist(orig_name) + results.extend(self._discover_manpage_suggestions(results[0][0], results)) oid = results[0][0] results = [x[1] for x in results] - results[0] = manpage.from_store(self.manpage.find_one({'_id' : oid})) + results[0] = ManPage.from_store(self.manpage.find_one({"_id": oid})) return results - def _discovermanpagesuggestions(self, oid, existing): - '''find suggestions for a given man page + def _discover_manpage_suggestions(self, oid, existing): + """find suggestions for a given man page oid is the objectid of the man page in question, existing is a list of (oid, man page) of suggestions that were already discovered - ''' - skip = set([oid for oid, m in existing]) - cursor = self.mapping.find({'dst' : oid}) + """ + skip = {oid for oid, m in existing} + cursor = self.mapping.find({"dst": oid}) # find all srcs that point to oid - srcs = [d['src'] for d in cursor] + srcs = [d["src"] for d in cursor] # find all dsts of srcs - suggestionoids = self.mapping.find({'src' : {'$in' : srcs}}, {'dst' : 1}) + suggestion_oids = self.mapping.find({"src": {"$in": srcs}}, {"dst": 1}) # remove already discovered - suggestionoids = [d['dst'] for d in suggestionoids if d['dst'] not in skip] - if not suggestionoids: + suggestion_oids = [d["dst"] for d in suggestion_oids if d["dst"] not in skip] + if not suggestion_oids: return [] # get just the name and source of found suggestions - suggestionoids = self.manpage.find({'_id' : {'$in' : suggestionoids}}, - {'name' : 1, 'source' : 1}) - return [(d.pop('_id'), manpage.from_store_name_only(**d)) for d in suggestionoids] - - def addmapping(self, src, dst, score): - self.mapping.insert({'src' : src, 'dst' : dst, 'score' : score}) - - def addmanpage(self, m): - '''add m into the store, if it exists first remove it and its mappings - - each man page may have aliases besides the name determined by its - basename''' - d = self.manpage.find_one({'source' : m.source}) + suggestion_oids = self.manpage.find( + {"_id": {"$in": suggestion_oids}}, {"name": 1, "source": 1} + ) + return [ + (d.pop("_id"), ManPage.from_store_name_only(**d)) for d in suggestion_oids + ] + + def add_mapping(self, src, dst, score): + if not isinstance(dst, ObjectId): + dst = dst.inserted_id + self.mapping.insert_one({"src": src, "dst": dst, "score": score}) + + def add_manpage(self, m): + """add `m` into the store, if it exists first remove it and it's mappings + + each man page may have aliases besides the name determined by it's + basename""" + d = self.manpage.find_one({"source": m.source}) if d: - logger.info('removing old manpage %s (%s)', m.source, d['_id']) - self.manpage.remove(d['_id']) + logger.info("removing old manpage %s (%s)", m.source, d["_id"]) + self.manpage.delete_one({"_id": d["_id"]}) # remove old mappings if there are any - c = self.mapping.count() - self.mapping.remove({'dst' : d['_id']}) - c -= self.mapping.count() - logger.info('removed %d mappings for manpage %s', c, m.source) + c = self.mapping.count_documents({}) + self.mapping.delete_one({"dst": d["_id"]}) + c -= self.mapping.count_documents({}) + logger.info("removed %d mappings for manpage %s", c, m.source) - o = self.manpage.insert(m.to_store()) + o = self.manpage.insert_one(m.to_store()) for alias, score in m.aliases: - self.addmapping(alias, o, score) - logger.info('inserting mapping (alias) %s -> %s (%s) with score %d', alias, m.name, o, score) + self.add_mapping(alias, o, score) + logger.info( + "inserting mapping (alias) %s -> %s (%s) with score %d", + alias, + m.name, + o, + score, + ) return m - def updatemanpage(self, m): - '''update m and add new aliases if necessary + def update_man_page(self, m): + """update m and add new aliases if necessary - change updated attribute so we don't overwrite this in the future''' - logger.info('updating manpage %s', m.source) + change updated attribute so we don't overwrite this in the future""" + logger.info("updating manpage %s", m.source) m.updated = True - self.manpage.update({'source' : m.source}, m.to_store()) - _id = self.manpage.find_one({'source' : m.source}, fields={'_id':1})['_id'] + self.manpage.update_one({"source": m.source}, m.to_store()) + _id = self.manpage.find_one({"source": m.source}, fields={"_id": 1})["_id"] for alias, score in m.aliases: if alias not in self: - self.addmapping(alias, _id, score) - logger.info('inserting mapping (alias) %s -> %s (%s) with score %d', alias, m.name, _id, score) + self.add_mapping(alias, _id, score) + logger.info( + "inserting mapping (alias) %s -> %s (%s) with score %d", + alias, + m.name, + _id, + score, + ) else: - logger.debug('mapping (alias) %s -> %s (%s) already exists', alias, m.name, _id) + logger.debug( + "mapping (alias) %s -> %s (%s) already exists", alias, m.name, _id + ) return m def verify(self): # check that everything in manpage is reachable mappings = list(self.mapping.find()) - reachable = set([m['dst'] for m in mappings]) - manpages = set([m['_id'] for m in self.manpage.find(fields={'_id':1})]) + reachable = {m["dst"] for m in mappings} + man_pages = {m["_id"] for m in self.manpage.find(fields={"_id": 1})} ok = True - unreachable = manpages - reachable + unreachable = man_pages - reachable if unreachable: - logger.error('manpages %r are unreachable (nothing maps to them)', unreachable) - unreachable = [self.manpage.find_one({'_id' : u})['name'] for u in unreachable] + logger.error( + "manpages %r are unreachable (nothing maps to them)", unreachable + ) + unreachable = [ + self.manpage.find_one({"_id": u})["name"] for u in unreachable + ] ok = False - notfound = reachable - manpages + notfound = reachable - man_pages if notfound: - logger.error('mappings to inexisting manpages: %r', notfound) + logger.error("mappings to non-existing manpages: %r", notfound) ok = False return ok, unreachable, notfound def names(self): - cursor = self.manpage.find(fields={'name':1}) + cursor = self.manpage.find({}, {"name": 1}) for d in cursor: - yield d['_id'], d['name'] + yield d["_id"], d["name"] def mappings(self): - cursor = self.mapping.find(fields={'src':1}) + cursor = self.mapping.find({}, {"src": 1}) for d in cursor: - yield d['src'], d['_id'] + yield d["src"], d["_id"] - def setmulticommand(self, manpageid): - self.manpage.update({'_id' : manpageid}, {'$set' : {'multicommand' : True}}) + def set_multi_cmd(self, manpage_id): + self.manpage.update_one({"_id": manpage_id}, {"$set": {"multi_cmd": True}}) diff --git a/explainshell/util.py b/explainshell/util.py index 127177b1..ba3adb4c 100644 --- a/explainshell/util.py +++ b/explainshell/util.py @@ -1,8 +1,9 @@ import itertools from operator import itemgetter -def consecutive(l, fn): - '''yield consecutive items from l that fn returns True for them + +def consecutive(ln, fn): + """yield consecutive items from l that fn returns True for them >>> even = lambda x: x % 2 == 0 >>> list(consecutive([], even)) @@ -17,12 +18,12 @@ def consecutive(l, fn): [[1], [2, 4]] >>> list(consecutive([1, 2, 4, 5, 7, 8, 10], even)) [[1], [2, 4], [5], [7], [8, 10]] - ''' - it = iter(l) + """ + it = iter(ln) ll = [] try: while True: - x = it.next() + x = next(it) if fn(x): ll.append(x) else: @@ -34,51 +35,57 @@ def consecutive(l, fn): if ll: yield ll -def groupcontinuous(l, key=None): - ''' - >>> list(groupcontinuous([1, 2, 4, 5, 7, 8, 10])) + +def group_continuous(l, key=None): + """ + >>> list(group_continuous([1, 2, 4, 5, 7, 8, 10])) [[1, 2], [4, 5], [7, 8], [10]] - >>> list(groupcontinuous(range(5))) + >>> list(group_continuous(range(5))) [[0, 1, 2, 3, 4]] - ''' + """ if key is None: key = lambda x: x - for k, g in itertools.groupby(enumerate(l), lambda (i, x): i-key(x)): - yield map(itemgetter(1), g) + for k, g in itertools.groupby(enumerate(l), lambda ix: ix[0] - key(ix[1])): + yield list(map(itemgetter(1), g)) -def toposorted(graph, parents): + +def topo_sorted(graph, parents): """ Returns vertices of a DAG in topological order. Arguments: - graph -- vetices of a graph to be toposorted - parents -- function (vertex) -> vertices to preceed + graph -- vertices of a graph to be topo_sorted + parents -- function (vertex) -> vertices to proceed given vertex in output """ result = [] used = set() + def use(v, top): if id(v) in used: return for parent in parents(v): if parent is top: - raise ValueError('graph is cyclical', graph) + raise ValueError("graph is cyclical", graph) use(parent, v) used.add(id(v)) result.append(v) + for v in graph: use(v, v) return result + def pairwise(iterable): a, b = itertools.tee(iterable) next(b, None) - return itertools.izip(a, b) + return zip(a, b) + -class peekable(object): - ''' - >>> it = peekable(iter('abc')) - >>> it.index, it.peek(), it.index, it.peek(), it.next(), it.index, it.peek(), it.next(), it.next(), it.index +class Peekable: + """ + >>> it = Peekable(iter('abc')) + >>> it.index, it.peek(), it.index, it.peek(), next(it), it.index, it.peek(), next(it), next(it), it.index (0, 'a', 0, 'a', 'a', 1, 'b', 'b', 'c', 3) >>> it.peek() Traceback (most recent call last): @@ -88,58 +95,66 @@ class peekable(object): Traceback (most recent call last): File "", line 1, in ? StopIteration - >>> it.next() + >>> next(it) Traceback (most recent call last): File "", line 1, in ? StopIteration - ''' + """ + def __init__(self, it): self.it = it self._peeked = False - self._peekvalue = None + self._peek_value = None self._idx = 0 + def __iter__(self): return self - def next(self): + + def __next__(self): if self._peeked: self._peeked = False self._idx += 1 - return self._peekvalue - n = self.it.next() + return self._peek_value + n = next(self.it) self._idx += 1 return n - def hasnext(self): + + def has_next(self): try: self.peek() return True except StopIteration: return False + def peek(self): if self._peeked: - return self._peekvalue + return self._peek_value else: - self._peekvalue = self.it.next() + self._peek_value = next(self.it) self._peeked = True - return self._peekvalue + return self._peek_value + @property def index(self): - '''return the index of the next item returned by next()''' + """return the index of the next item returned by next()""" return self._idx -def namesection(path): - assert '.gz' not in path - name, section = path.rsplit('.', 1) + +def name_section(path): + assert ".gz" not in path + name, section = path.rsplit(".", 1) return name, section -class propertycache(object): + +class PropertyCache: def __init__(self, func): self.func = func self.name = func.__name__ def __get__(self, obj, type=None): result = self.func(obj) - self.cachevalue(obj, result) + self.cache_value(obj, result) return result - def cachevalue(self, obj, value): + def cache_value(self, obj, value): setattr(obj, self.name, value) diff --git a/explainshell/web/__init__.py b/explainshell/web/__init__.py index b16223ca..ef2a7784 100644 --- a/explainshell/web/__init__.py +++ b/explainshell/web/__init__.py @@ -1,10 +1,11 @@ from flask import Flask + app = Flask(__name__) from explainshell.web import views from explainshell import store, config if config.DEBUG: - from explainshell.web import debugviews + from explainshell.web import debug_views app.config.from_object(config) diff --git a/explainshell/web/debug_views.py b/explainshell/web/debug_views.py new file mode 100644 index 00000000..00fa2b1c --- /dev/null +++ b/explainshell/web/debug_views.py @@ -0,0 +1,88 @@ +import logging + +from flask import render_template, request, abort, redirect, url_for, json + +from explainshell import manager, config, store +from explainshell.web import app, helpers + +logger = logging.getLogger(__name__) + + +@app.route("/debug") +def debug(): + s = store.Store("explainshell", config.MONGO_URI) + d = {"manpages": []} + for mp in s: + synopsis = "" + if mp.synopsis: + synopsis = mp.synopsis[:20] + dd = {"name": mp.name, "synopsis": synopsis} + o_list = [] + for o in mp.options: + o_list.append(str(o)) + dd["options"] = ", ".join(o_list) + d["manpages"].append(dd) + d["manpages"].sort(key=lambda d: d["name"].lower()) + return render_template("debug.html", d=d) + + +def _convert_value(value): + if isinstance(value, list): + return [s.strip() for s in value] + elif value.lower() == "true": + return True + elif value: + return value.strip() + return False + + +@app.route("/debug/tag/", methods=["GET", "POST"]) +def tag(source): + mngr = manager.Manager(config.MONGO_URI, "explainshell", [], False, False) + s = mngr.store + m = s.find_man_page(source)[0] + assert m + + if "paragraphs" in request.form: + paragraphs = json.loads(request.form["paragraphs"]) + m_paragraphs = [] + for d in paragraphs: + idx = d["idx"] + text = d["text"] + section = d["section"] + short = [s.strip() for s in d["short"]] + long = [s.strip() for s in d["long"]] + expects_arg = _convert_value(d["expects_arg"]) + nested_cmd = _convert_value(d["nested_cmd"]) + if isinstance(nested_cmd, str): + nested_cmd = [nested_cmd] + elif nested_cmd is True: + logger.error("nested_cmd %r must be a string or list", nested_cmd) + abort(503) + argument = d["argument"] + if not argument: + argument = None + p = store.Paragraph(idx, text, section, d["is_option"]) + if d["is_option"] and (short or long or argument): + p = store.Option(p, short, long, expects_arg, argument, nested_cmd) + m_paragraphs.append(p) + + if request.form.get("nested_cmd", "").lower() == "true": + m.nested_cmd = True + else: + m.nested_cmd = False + m = mngr.edit(m, m_paragraphs) + if m: + return redirect(url_for("explain", cmd=m.name)) + else: + abort(503) + else: + helpers.convert_paragraphs(m) + for p in m.paragraphs: + if isinstance(p, store.Option): + if isinstance(p.expects_arg, list): + p.expects_arg = ", ".join(p.expects_arg) + if isinstance(p.nested_cmd, list): + p.nested_cmd = ", ".join(p.nested_cmd) + + return render_template("tagger.html", m=m) diff --git a/explainshell/web/debugviews.py b/explainshell/web/debugviews.py deleted file mode 100644 index 08f970a5..00000000 --- a/explainshell/web/debugviews.py +++ /dev/null @@ -1,85 +0,0 @@ -import logging - -from flask import render_template, request, abort, redirect, url_for, json - -from explainshell import manager, config, store -from explainshell.web import app, helpers - -logger = logging.getLogger(__name__) - -@app.route('/debug') -def debug(): - s = store.store('explainshell', config.MONGO_URI) - d = {'manpages' : []} - for mp in s: - synopsis = '' - if mp.synopsis: - synopsis = mp.synopsis[:20] - dd = {'name' : mp.name, 'synopsis' : synopsis} - l = [] - for o in mp.options: - l.append(str(o)) - dd['options'] = ', '.join(l) - d['manpages'].append(dd) - d['manpages'].sort(key=lambda d: d['name'].lower()) - return render_template('debug.html', d=d) - -def _convertvalue(value): - if isinstance(value, list): - return [s.strip() for s in value] - elif value.lower() == 'true': - return True - elif value: - return value.strip() - return False - -@app.route('/debug/tag/', methods=['GET', 'POST']) -def tag(source): - mngr = manager.manager(config.MONGO_URI, 'explainshell', [], False, False) - s = mngr.store - m = s.findmanpage(source)[0] - assert m - - if 'paragraphs' in request.form: - paragraphs = json.loads(request.form['paragraphs']) - mparagraphs = [] - for d in paragraphs: - idx = d['idx'] - text = d['text'] - section = d['section'] - short = [s.strip() for s in d['short']] - long = [s.strip() for s in d['long']] - expectsarg = _convertvalue(d['expectsarg']) - nestedcommand = _convertvalue(d['nestedcommand']) - if isinstance(nestedcommand, str): - nestedcommand = [nestedcommand] - elif nestedcommand is True: - logger.error('nestedcommand %r must be a string or list', nestedcommand) - abort(503) - argument = d['argument'] - if not argument: - argument = None - p = store.paragraph(idx, text, section, d['is_option']) - if d['is_option'] and (short or long or argument): - p = store.option(p, short, long, expectsarg, argument, nestedcommand) - mparagraphs.append(p) - - if request.form.get('nestedcommand', '').lower() == 'true': - m.nestedcommand = True - else: - m.nestedcommand = False - m = mngr.edit(m, mparagraphs) - if m: - return redirect(url_for('explain', cmd=m.name)) - else: - abort(503) - else: - helpers.convertparagraphs(m) - for p in m.paragraphs: - if isinstance(p, store.option): - if isinstance(p.expectsarg, list): - p.expectsarg = ', '.join(p.expectsarg) - if isinstance(p.nestedcommand, list): - p.nestedcommand = ', '.join(p.nestedcommand) - - return render_template('tagger.html', m=m) diff --git a/explainshell/web/helpers.py b/explainshell/web/helpers.py index 481337aa..ce199808 100644 --- a/explainshell/web/helpers.py +++ b/explainshell/web/helpers.py @@ -1,20 +1,23 @@ from explainshell import util -def convertparagraphs(manpage): + +def convert_paragraphs(manpage): for p in manpage.paragraphs: - p.text = p.text.decode('utf-8') + p.text = p.text.decode("utf-8") return manpage + def suggestions(matches, command): - '''enrich command matches with links to other man pages with the - same name''' + """enrich command matches with links to other man pages with the + same name""" for m in matches: - if 'name' in m and 'suggestions' in m: - before = command[:m['start']] - after = command[m['end']:] - newsuggestions = [] - for othermp in sorted(m['suggestions'], key=lambda mp: mp.section): - mid = '%s.%s' % (othermp.name, othermp.section) - newsuggestions.append({'cmd' : ''.join([before, mid, after]), - 'text' : othermp.namesection}) - m['suggestions'] = newsuggestions + if "name" in m and "suggestions" in m: + before = command[: m["start"]] + after = command[m["end"]:] + new_suggestions = [] + for other_mp in sorted(m["suggestions"], key=lambda mp: mp.section): + mid = f"{other_mp.name}.{other_mp.section}" + new_suggestions.append( + {"cmd": "".join([before, mid, after]), "text": other_mp.name_section} + ) + m["suggestions"] = new_suggestions diff --git a/explainshell/web/static/css/es.css b/explainshell/web/static/css/es.css index 74175a3c..abc256b3 100644 --- a/explainshell/web/static/css/es.css +++ b/explainshell/web/static/css/es.css @@ -115,26 +115,20 @@ a { width: 460px; /* necessary for right:0 to work at #themeSelector */ position: absolute; top: -30px; - left: 240px; + left: 130px; font-family: 'Berkshire Swash', cursive; font-size: 24px; } #top-search { - width: 150px; + width: 260px; -webkit-transition: all .5s ease; -moz-transition: all .5s ease; transition: all .5s ease; position: relative; - top: -3px; /* looks a bit unaligned without this in chrome */ left: 0; } -#top-search:focus { - left: -110px; - width: 260px; -} - #prevnext { text-align: center; padding-top: 10px; @@ -166,12 +160,23 @@ a { /* using a fixed margin-left doesn't work both with and without the search bar in the menu */ position: absolute; - right: 0; + right: -110px; margin-top: 5px; font-family: "Courier New",Courier,Monaco,Menlo,Consolas,monospace; font-size: 20px; } +pre code.hljs { + background-color: transparent; + padding: 2px; +} +pre.highlightjs { + background-color: transparent; + margin: 2px; + padding: 0px; + border: none; +} + body[data-theme='dark'] { color: white; } @@ -198,6 +203,9 @@ body[data-theme='dark'] { background-color: #222; color: white; } +[data-theme='dark'] pre.highlightjs { + background-color: transparent; +} [data-theme='dark'] .caret { /* this is probably a problem with the bootstrap theme */ diff --git a/explainshell/web/static/css/highlight.default.min.css b/explainshell/web/static/css/highlight.default.min.css new file mode 100644 index 00000000..a75ea911 --- /dev/null +++ b/explainshell/web/static/css/highlight.default.min.css @@ -0,0 +1,9 @@ +/*! + Theme: Default + Description: Original highlight.js style + Author: (c) Ivan Sagalaev + Maintainer: @highlightjs/core-team + Website: https://highlightjs.org/ + License: see project LICENSE + Touched: 2021 +*/pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}.hljs{background:#f3f3f3;color:#444}.hljs-comment{color:#697070}.hljs-punctuation,.hljs-tag{color:#444a}.hljs-tag .hljs-attr,.hljs-tag .hljs-name{color:#444}.hljs-attribute,.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-name,.hljs-selector-tag{font-weight:700}.hljs-deletion,.hljs-number,.hljs-quote,.hljs-selector-class,.hljs-selector-id,.hljs-string,.hljs-template-tag,.hljs-type{color:#800}.hljs-section,.hljs-title{color:#800;font-weight:700}.hljs-link,.hljs-operator,.hljs-regexp,.hljs-selector-attr,.hljs-selector-pseudo,.hljs-symbol,.hljs-template-variable,.hljs-variable{color:#ab5656}.hljs-literal{color:#695}.hljs-addition,.hljs-built_in,.hljs-bullet,.hljs-code{color:#397300}.hljs-meta{color:#1f7199}.hljs-meta .hljs-string{color:#38a}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700} \ No newline at end of file diff --git a/explainshell/web/static/css/hljs-atom-one-dark.min.css b/explainshell/web/static/css/hljs-atom-one-dark.min.css new file mode 100644 index 00000000..5344ee38 --- /dev/null +++ b/explainshell/web/static/css/hljs-atom-one-dark.min.css @@ -0,0 +1 @@ +pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}.hljs{color:#abb2bf;background:#282c34}.hljs-comment,.hljs-quote{color:#5c6370;font-style:italic}.hljs-doctag,.hljs-formula,.hljs-keyword{color:#c678dd}.hljs-deletion,.hljs-name,.hljs-section,.hljs-selector-tag,.hljs-subst{color:#e06c75}.hljs-literal{color:#56b6c2}.hljs-addition,.hljs-attribute,.hljs-meta .hljs-string,.hljs-regexp,.hljs-string{color:#98c379}.hljs-attr,.hljs-number,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-pseudo,.hljs-template-variable,.hljs-type,.hljs-variable{color:#d19a66}.hljs-bullet,.hljs-link,.hljs-meta,.hljs-selector-id,.hljs-symbol,.hljs-title{color:#61aeee}.hljs-built_in,.hljs-class .hljs-title,.hljs-title.class_{color:#e6c07b}.hljs-emphasis{font-style:italic}.hljs-strong{font-weight:700}.hljs-link{text-decoration:underline} \ No newline at end of file diff --git a/explainshell/web/static/js/es.js b/explainshell/web/static/js/es.js index 951a1b88..408abb23 100644 --- a/explainshell/web/static/js/es.js +++ b/explainshell/web/static/js/es.js @@ -17,11 +17,19 @@ var themes = { default: '//cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/2.3.1/css/bootstrap.min.css', dark: '//maxcdn.bootstrapcdn.com/bootswatch/2.3.1/cyborg/bootstrap.min.css' }; +var hljs_themes = { + default: '//cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css', + dark: '//cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css' +} if (debug){ themes = { default: '/static/css/bootstrap.min.css', dark: '/static/css/bootstrap-cyborg.min.css' }; + hljs_themes = { + default: '/static/css/highlight.default.min.css', + dark: '/static/css/hljs-atom-one-dark.min.css' + }; } var assignedcolors = {}; @@ -37,7 +45,7 @@ var docCookies = { return decodeURIComponent(document.cookie.replace(new RegExp("(?:(?:^|.*;)\\s*" + encodeURIComponent(sKey).replace(/[\-\.\+\*]/g, "\\$&") + "\\s*\\=\\s*([^;]*).*$)|^.*$"), "$1")) || null; }, setItem: function (sKey, sValue, vEnd, sPath, sDomain, bSecure) { - if (!sKey || /^(?:expires|max\-age|path|domain|secure)$/i.test(sKey)) { return false; } + if (!sKey || /^(?:expires|max-age|path|domain|secure)$/i.test(sKey)) { return false; } var sExpires = ""; if (vEnd) { switch (vEnd.constructor) { @@ -224,6 +232,8 @@ function eslink(clazz, option, mid, color) { this.goingleft = rrmid <= mid; $(this.help).css("border-color", this.color); + + $("#" + clazz + " b:first-of-type").css("color", this.color); } } @@ -256,7 +266,7 @@ eslink.prototype.nearby = function(other) { return Math.abs(r.right - rr.left) <= closeness || Math.abs(r.left - rr.right) <= closeness; }; -// a conveninent wrapper around an array of points that allows to chain appends +// a convenient wrapper around an array of points that allows to chain appends function espath() { this.points = []; } @@ -1127,6 +1137,8 @@ function setTheme(theme) { console.log('setting theme to', theme); $("#bootstrapCSS").attr('href', themes[theme]); + $("#hljsCSS").attr('href', hljs_themes[theme]); + $(document.body).attr('data-theme', theme); docCookies.setItem(themeCookieName, theme, Infinity, '/'); } @@ -1134,10 +1146,11 @@ function setTheme(theme) { // Theme-related stuff $(document).ready(function() { - if (!docCookies.getItem(themeCookieName)) { - var selectedTheme = 'default'; - setTheme(selectedTheme); // to set the correct css file and data-theme - } + // use theme from local storage or auto-detect otherwise + var selectedTheme = localStorage.getItem('theme') + || (window.matchMedia("(prefers-color-scheme: dark)").matches ? 'dark' : 'default') + || 'default'; + $("#themeContainer .dropdown-menu a").click(function() { setTheme($(this).attr('data-theme-name')); diff --git a/explainshell/web/static/js/highlight.min.js b/explainshell/web/static/js/highlight.min.js new file mode 100644 index 00000000..6d00bf09 --- /dev/null +++ b/explainshell/web/static/js/highlight.min.js @@ -0,0 +1,351 @@ +/*! + Highlight.js v11.10.0 (git: 366a8bd012) + (c) 2006-2024 Josh Goebel and other contributors + License: BSD-3-Clause + */ + var hljs=function(){"use strict";function e(t){ + return t instanceof Map?t.clear=t.delete=t.set=()=>{ + throw Error("map is read-only")}:t instanceof Set&&(t.add=t.clear=t.delete=()=>{ + throw Error("set is read-only") + }),Object.freeze(t),Object.getOwnPropertyNames(t).forEach((n=>{ + const i=t[n],s=typeof i;"object"!==s&&"function"!==s||Object.isFrozen(i)||e(i) + })),t}class t{constructor(e){ + void 0===e.data&&(e.data={}),this.data=e.data,this.isMatchIgnored=!1} + ignoreMatch(){this.isMatchIgnored=!0}}function n(e){ + return e.replace(/&/g,"&").replace(//g,">").replace(/"/g,""").replace(/'/g,"'") + }function i(e,...t){const n=Object.create(null);for(const t in e)n[t]=e[t] + ;return t.forEach((e=>{for(const t in e)n[t]=e[t]})),n}const s=e=>!!e.scope + ;class o{constructor(e,t){ + this.buffer="",this.classPrefix=t.classPrefix,e.walk(this)}addText(e){ + this.buffer+=n(e)}openNode(e){if(!s(e))return;const t=((e,{prefix:t})=>{ + if(e.startsWith("language:"))return e.replace("language:","language-") + ;if(e.includes(".")){const n=e.split(".") + ;return[`${t}${n.shift()}`,...n.map(((e,t)=>`${e}${"_".repeat(t+1)}`))].join(" ") + }return`${t}${e}`})(e.scope,{prefix:this.classPrefix});this.span(t)} + closeNode(e){s(e)&&(this.buffer+="")}value(){return this.buffer}span(e){ + this.buffer+=``}}const r=(e={})=>{const t={children:[]} + ;return Object.assign(t,e),t};class a{constructor(){ + this.rootNode=r(),this.stack=[this.rootNode]}get top(){ + return this.stack[this.stack.length-1]}get root(){return this.rootNode}add(e){ + this.top.children.push(e)}openNode(e){const t=r({scope:e}) + ;this.add(t),this.stack.push(t)}closeNode(){ + if(this.stack.length>1)return this.stack.pop()}closeAllNodes(){ + for(;this.closeNode(););}toJSON(){return JSON.stringify(this.rootNode,null,4)} + walk(e){return this.constructor._walk(e,this.rootNode)}static _walk(e,t){ + return"string"==typeof t?e.addText(t):t.children&&(e.openNode(t), + t.children.forEach((t=>this._walk(e,t))),e.closeNode(t)),e}static _collapse(e){ + "string"!=typeof e&&e.children&&(e.children.every((e=>"string"==typeof e))?e.children=[e.children.join("")]:e.children.forEach((e=>{ + a._collapse(e)})))}}class c extends a{constructor(e){super(),this.options=e} + addText(e){""!==e&&this.add(e)}startScope(e){this.openNode(e)}endScope(){ + this.closeNode()}__addSublanguage(e,t){const n=e.root + ;t&&(n.scope="language:"+t),this.add(n)}toHTML(){ + return new o(this,this.options).value()}finalize(){ + return this.closeAllNodes(),!0}}function l(e){ + return e?"string"==typeof e?e:e.source:null}function g(e){return h("(?=",e,")")} + function u(e){return h("(?:",e,")*")}function d(e){return h("(?:",e,")?")} + function h(...e){return e.map((e=>l(e))).join("")}function f(...e){const t=(e=>{ + const t=e[e.length-1] + ;return"object"==typeof t&&t.constructor===Object?(e.splice(e.length-1,1),t):{} + })(e);return"("+(t.capture?"":"?:")+e.map((e=>l(e))).join("|")+")"} + function p(e){return RegExp(e.toString()+"|").exec("").length-1} + const b=/\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./ + ;function m(e,{joinWith:t}){let n=0;return e.map((e=>{n+=1;const t=n + ;let i=l(e),s="";for(;i.length>0;){const e=b.exec(i);if(!e){s+=i;break} + s+=i.substring(0,e.index), + i=i.substring(e.index+e[0].length),"\\"===e[0][0]&&e[1]?s+="\\"+(Number(e[1])+t):(s+=e[0], + "("===e[0]&&n++)}return s})).map((e=>`(${e})`)).join(t)} + const E="[a-zA-Z]\\w*",x="[a-zA-Z_]\\w*",w="\\b\\d+(\\.\\d+)?",y="(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)",_="\\b(0b[01]+)",O={ + begin:"\\\\[\\s\\S]",relevance:0},v={scope:"string",begin:"'",end:"'", + illegal:"\\n",contains:[O]},k={scope:"string",begin:'"',end:'"',illegal:"\\n", + contains:[O]},N=(e,t,n={})=>{const s=i({scope:"comment",begin:e,end:t, + contains:[]},n);s.contains.push({scope:"doctag", + begin:"[ ]*(?=(TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):)", + end:/(TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):/,excludeBegin:!0,relevance:0}) + ;const o=f("I","a","is","so","us","to","at","if","in","it","on",/[A-Za-z]+['](d|ve|re|ll|t|s|n)/,/[A-Za-z]+[-][a-z]+/,/[A-Za-z][a-z]{2,}/) + ;return s.contains.push({begin:h(/[ ]+/,"(",o,/[.]?[:]?([.][ ]|[ ])/,"){3}")}),s + },S=N("//","$"),M=N("/\\*","\\*/"),R=N("#","$");var j=Object.freeze({ + __proto__:null,APOS_STRING_MODE:v,BACKSLASH_ESCAPE:O,BINARY_NUMBER_MODE:{ + scope:"number",begin:_,relevance:0},BINARY_NUMBER_RE:_,COMMENT:N, + C_BLOCK_COMMENT_MODE:M,C_LINE_COMMENT_MODE:S,C_NUMBER_MODE:{scope:"number", + begin:y,relevance:0},C_NUMBER_RE:y,END_SAME_AS_BEGIN:e=>Object.assign(e,{ + "on:begin":(e,t)=>{t.data._beginMatch=e[1]},"on:end":(e,t)=>{ + t.data._beginMatch!==e[1]&&t.ignoreMatch()}}),HASH_COMMENT_MODE:R,IDENT_RE:E, + MATCH_NOTHING_RE:/\b\B/,METHOD_GUARD:{begin:"\\.\\s*"+x,relevance:0}, + NUMBER_MODE:{scope:"number",begin:w,relevance:0},NUMBER_RE:w, + PHRASAL_WORDS_MODE:{ + begin:/\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|they|like|more)\b/ + },QUOTE_STRING_MODE:k,REGEXP_MODE:{scope:"regexp",begin:/\/(?=[^/\n]*\/)/, + end:/\/[gimuy]*/,contains:[O,{begin:/\[/,end:/\]/,relevance:0,contains:[O]}]}, + RE_STARTERS_RE:"!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~", + SHEBANG:(e={})=>{const t=/^#![ ]*\// + ;return e.binary&&(e.begin=h(t,/.*\b/,e.binary,/\b.*/)),i({scope:"meta",begin:t, + end:/$/,relevance:0,"on:begin":(e,t)=>{0!==e.index&&t.ignoreMatch()}},e)}, + TITLE_MODE:{scope:"title",begin:E,relevance:0},UNDERSCORE_IDENT_RE:x, + UNDERSCORE_TITLE_MODE:{scope:"title",begin:x,relevance:0}});function A(e,t){ + "."===e.input[e.index-1]&&t.ignoreMatch()}function I(e,t){ + void 0!==e.className&&(e.scope=e.className,delete e.className)}function T(e,t){ + t&&e.beginKeywords&&(e.begin="\\b("+e.beginKeywords.split(" ").join("|")+")(?!\\.)(?=\\b|\\s)", + e.__beforeBegin=A,e.keywords=e.keywords||e.beginKeywords,delete e.beginKeywords, + void 0===e.relevance&&(e.relevance=0))}function L(e,t){ + Array.isArray(e.illegal)&&(e.illegal=f(...e.illegal))}function B(e,t){ + if(e.match){ + if(e.begin||e.end)throw Error("begin & end are not supported with match") + ;e.begin=e.match,delete e.match}}function P(e,t){ + void 0===e.relevance&&(e.relevance=1)}const D=(e,t)=>{if(!e.beforeMatch)return + ;if(e.starts)throw Error("beforeMatch cannot be used with starts") + ;const n=Object.assign({},e);Object.keys(e).forEach((t=>{delete e[t] + })),e.keywords=n.keywords,e.begin=h(n.beforeMatch,g(n.begin)),e.starts={ + relevance:0,contains:[Object.assign(n,{endsParent:!0})] + },e.relevance=0,delete n.beforeMatch + },H=["of","and","for","in","not","or","if","then","parent","list","value"],C="keyword" + ;function $(e,t,n=C){const i=Object.create(null) + ;return"string"==typeof e?s(n,e.split(" ")):Array.isArray(e)?s(n,e):Object.keys(e).forEach((n=>{ + Object.assign(i,$(e[n],t,n))})),i;function s(e,n){ + t&&(n=n.map((e=>e.toLowerCase()))),n.forEach((t=>{const n=t.split("|") + ;i[n[0]]=[e,U(n[0],n[1])]}))}}function U(e,t){ + return t?Number(t):(e=>H.includes(e.toLowerCase()))(e)?0:1}const z={},W=e=>{ + console.error(e)},X=(e,...t)=>{console.log("WARN: "+e,...t)},G=(e,t)=>{ + z[`${e}/${t}`]||(console.log(`Deprecated as of ${e}. ${t}`),z[`${e}/${t}`]=!0) + },K=Error();function F(e,t,{key:n}){let i=0;const s=e[n],o={},r={} + ;for(let e=1;e<=t.length;e++)r[e+i]=s[e],o[e+i]=!0,i+=p(t[e-1]) + ;e[n]=r,e[n]._emit=o,e[n]._multi=!0}function Z(e){(e=>{ + e.scope&&"object"==typeof e.scope&&null!==e.scope&&(e.beginScope=e.scope, + delete e.scope)})(e),"string"==typeof e.beginScope&&(e.beginScope={ + _wrap:e.beginScope}),"string"==typeof e.endScope&&(e.endScope={_wrap:e.endScope + }),(e=>{if(Array.isArray(e.begin)){ + if(e.skip||e.excludeBegin||e.returnBegin)throw W("skip, excludeBegin, returnBegin not compatible with beginScope: {}"), + K + ;if("object"!=typeof e.beginScope||null===e.beginScope)throw W("beginScope must be object"), + K;F(e,e.begin,{key:"beginScope"}),e.begin=m(e.begin,{joinWith:""})}})(e),(e=>{ + if(Array.isArray(e.end)){ + if(e.skip||e.excludeEnd||e.returnEnd)throw W("skip, excludeEnd, returnEnd not compatible with endScope: {}"), + K + ;if("object"!=typeof e.endScope||null===e.endScope)throw W("endScope must be object"), + K;F(e,e.end,{key:"endScope"}),e.end=m(e.end,{joinWith:""})}})(e)}function V(e){ + function t(t,n){ + return RegExp(l(t),"m"+(e.case_insensitive?"i":"")+(e.unicodeRegex?"u":"")+(n?"g":"")) + }class n{constructor(){ + this.matchIndexes={},this.regexes=[],this.matchAt=1,this.position=0} + addRule(e,t){ + t.position=this.position++,this.matchIndexes[this.matchAt]=t,this.regexes.push([t,e]), + this.matchAt+=p(e)+1}compile(){0===this.regexes.length&&(this.exec=()=>null) + ;const e=this.regexes.map((e=>e[1]));this.matcherRe=t(m(e,{joinWith:"|" + }),!0),this.lastIndex=0}exec(e){this.matcherRe.lastIndex=this.lastIndex + ;const t=this.matcherRe.exec(e);if(!t)return null + ;const n=t.findIndex(((e,t)=>t>0&&void 0!==e)),i=this.matchIndexes[n] + ;return t.splice(0,n),Object.assign(t,i)}}class s{constructor(){ + this.rules=[],this.multiRegexes=[], + this.count=0,this.lastIndex=0,this.regexIndex=0}getMatcher(e){ + if(this.multiRegexes[e])return this.multiRegexes[e];const t=new n + ;return this.rules.slice(e).forEach((([e,n])=>t.addRule(e,n))), + t.compile(),this.multiRegexes[e]=t,t}resumingScanAtSamePosition(){ + return 0!==this.regexIndex}considerAll(){this.regexIndex=0}addRule(e,t){ + this.rules.push([e,t]),"begin"===t.type&&this.count++}exec(e){ + const t=this.getMatcher(this.regexIndex);t.lastIndex=this.lastIndex + ;let n=t.exec(e) + ;if(this.resumingScanAtSamePosition())if(n&&n.index===this.lastIndex);else{ + const t=this.getMatcher(0);t.lastIndex=this.lastIndex+1,n=t.exec(e)} + return n&&(this.regexIndex+=n.position+1, + this.regexIndex===this.count&&this.considerAll()),n}} + if(e.compilerExtensions||(e.compilerExtensions=[]), + e.contains&&e.contains.includes("self"))throw Error("ERR: contains `self` is not supported at the top-level of a language. See documentation.") + ;return e.classNameAliases=i(e.classNameAliases||{}),function n(o,r){const a=o + ;if(o.isCompiled)return a + ;[I,B,Z,D].forEach((e=>e(o,r))),e.compilerExtensions.forEach((e=>e(o,r))), + o.__beforeBegin=null,[T,L,P].forEach((e=>e(o,r))),o.isCompiled=!0;let c=null + ;return"object"==typeof o.keywords&&o.keywords.$pattern&&(o.keywords=Object.assign({},o.keywords), + c=o.keywords.$pattern, + delete o.keywords.$pattern),c=c||/\w+/,o.keywords&&(o.keywords=$(o.keywords,e.case_insensitive)), + a.keywordPatternRe=t(c,!0), + r&&(o.begin||(o.begin=/\B|\b/),a.beginRe=t(a.begin),o.end||o.endsWithParent||(o.end=/\B|\b/), + o.end&&(a.endRe=t(a.end)), + a.terminatorEnd=l(a.end)||"",o.endsWithParent&&r.terminatorEnd&&(a.terminatorEnd+=(o.end?"|":"")+r.terminatorEnd)), + o.illegal&&(a.illegalRe=t(o.illegal)), + o.contains||(o.contains=[]),o.contains=[].concat(...o.contains.map((e=>(e=>(e.variants&&!e.cachedVariants&&(e.cachedVariants=e.variants.map((t=>i(e,{ + variants:null},t)))),e.cachedVariants?e.cachedVariants:q(e)?i(e,{ + starts:e.starts?i(e.starts):null + }):Object.isFrozen(e)?i(e):e))("self"===e?o:e)))),o.contains.forEach((e=>{n(e,a) + })),o.starts&&n(o.starts,r),a.matcher=(e=>{const t=new s + ;return e.contains.forEach((e=>t.addRule(e.begin,{rule:e,type:"begin" + }))),e.terminatorEnd&&t.addRule(e.terminatorEnd,{type:"end" + }),e.illegal&&t.addRule(e.illegal,{type:"illegal"}),t})(a),a}(e)}function q(e){ + return!!e&&(e.endsWithParent||q(e.starts))}class J extends Error{ + constructor(e,t){super(e),this.name="HTMLInjectionError",this.html=t}} + const Y=n,Q=i,ee=Symbol("nomatch"),te=n=>{ + const i=Object.create(null),s=Object.create(null),o=[];let r=!0 + ;const a="Could not find the language '{}', did you forget to load/include a language module?",l={ + disableAutodetect:!0,name:"Plain text",contains:[]};let p={ + ignoreUnescapedHTML:!1,throwUnescapedHTML:!1,noHighlightRe:/^(no-?highlight)$/i, + languageDetectRe:/\blang(?:uage)?-([\w-]+)\b/i,classPrefix:"hljs-", + cssSelector:"pre code",languages:null,__emitter:c};function b(e){ + return p.noHighlightRe.test(e)}function m(e,t,n){let i="",s="" + ;"object"==typeof t?(i=e, + n=t.ignoreIllegals,s=t.language):(G("10.7.0","highlight(lang, code, ...args) has been deprecated."), + G("10.7.0","Please use highlight(code, options) instead.\nhttps://github.com/highlightjs/highlight.js/issues/2277"), + s=e,i=t),void 0===n&&(n=!0);const o={code:i,language:s};N("before:highlight",o) + ;const r=o.result?o.result:E(o.language,o.code,n) + ;return r.code=o.code,N("after:highlight",r),r}function E(e,n,s,o){ + const c=Object.create(null);function l(){if(!N.keywords)return void M.addText(R) + ;let e=0;N.keywordPatternRe.lastIndex=0;let t=N.keywordPatternRe.exec(R),n="" + ;for(;t;){n+=R.substring(e,t.index) + ;const s=_.case_insensitive?t[0].toLowerCase():t[0],o=(i=s,N.keywords[i]);if(o){ + const[e,i]=o + ;if(M.addText(n),n="",c[s]=(c[s]||0)+1,c[s]<=7&&(j+=i),e.startsWith("_"))n+=t[0];else{ + const n=_.classNameAliases[e]||e;u(t[0],n)}}else n+=t[0] + ;e=N.keywordPatternRe.lastIndex,t=N.keywordPatternRe.exec(R)}var i + ;n+=R.substring(e),M.addText(n)}function g(){null!=N.subLanguage?(()=>{ + if(""===R)return;let e=null;if("string"==typeof N.subLanguage){ + if(!i[N.subLanguage])return void M.addText(R) + ;e=E(N.subLanguage,R,!0,S[N.subLanguage]),S[N.subLanguage]=e._top + }else e=x(R,N.subLanguage.length?N.subLanguage:null) + ;N.relevance>0&&(j+=e.relevance),M.__addSublanguage(e._emitter,e.language) + })():l(),R=""}function u(e,t){ + ""!==e&&(M.startScope(t),M.addText(e),M.endScope())}function d(e,t){let n=1 + ;const i=t.length-1;for(;n<=i;){if(!e._emit[n]){n++;continue} + const i=_.classNameAliases[e[n]]||e[n],s=t[n];i?u(s,i):(R=s,l(),R=""),n++}} + function h(e,t){ + return e.scope&&"string"==typeof e.scope&&M.openNode(_.classNameAliases[e.scope]||e.scope), + e.beginScope&&(e.beginScope._wrap?(u(R,_.classNameAliases[e.beginScope._wrap]||e.beginScope._wrap), + R=""):e.beginScope._multi&&(d(e.beginScope,t),R="")),N=Object.create(e,{parent:{ + value:N}}),N}function f(e,n,i){let s=((e,t)=>{const n=e&&e.exec(t) + ;return n&&0===n.index})(e.endRe,i);if(s){if(e["on:end"]){const i=new t(e) + ;e["on:end"](n,i),i.isMatchIgnored&&(s=!1)}if(s){ + for(;e.endsParent&&e.parent;)e=e.parent;return e}} + if(e.endsWithParent)return f(e.parent,n,i)}function b(e){ + return 0===N.matcher.regexIndex?(R+=e[0],1):(T=!0,0)}function m(e){ + const t=e[0],i=n.substring(e.index),s=f(N,e,i);if(!s)return ee;const o=N + ;N.endScope&&N.endScope._wrap?(g(), + u(t,N.endScope._wrap)):N.endScope&&N.endScope._multi?(g(), + d(N.endScope,e)):o.skip?R+=t:(o.returnEnd||o.excludeEnd||(R+=t), + g(),o.excludeEnd&&(R=t));do{ + N.scope&&M.closeNode(),N.skip||N.subLanguage||(j+=N.relevance),N=N.parent + }while(N!==s.parent);return s.starts&&h(s.starts,e),o.returnEnd?0:t.length} + let w={};function y(i,o){const a=o&&o[0];if(R+=i,null==a)return g(),0 + ;if("begin"===w.type&&"end"===o.type&&w.index===o.index&&""===a){ + if(R+=n.slice(o.index,o.index+1),!r){const t=Error(`0 width match regex (${e})`) + ;throw t.languageName=e,t.badRule=w.rule,t}return 1} + if(w=o,"begin"===o.type)return(e=>{ + const n=e[0],i=e.rule,s=new t(i),o=[i.__beforeBegin,i["on:begin"]] + ;for(const t of o)if(t&&(t(e,s),s.isMatchIgnored))return b(n) + ;return i.skip?R+=n:(i.excludeBegin&&(R+=n), + g(),i.returnBegin||i.excludeBegin||(R=n)),h(i,e),i.returnBegin?0:n.length})(o) + ;if("illegal"===o.type&&!s){ + const e=Error('Illegal lexeme "'+a+'" for mode "'+(N.scope||"")+'"') + ;throw e.mode=N,e}if("end"===o.type){const e=m(o);if(e!==ee)return e} + if("illegal"===o.type&&""===a)return 1 + ;if(I>1e5&&I>3*o.index)throw Error("potential infinite loop, way more iterations than matches") + ;return R+=a,a.length}const _=O(e) + ;if(!_)throw W(a.replace("{}",e)),Error('Unknown language: "'+e+'"') + ;const v=V(_);let k="",N=o||v;const S={},M=new p.__emitter(p);(()=>{const e=[] + ;for(let t=N;t!==_;t=t.parent)t.scope&&e.unshift(t.scope) + ;e.forEach((e=>M.openNode(e)))})();let R="",j=0,A=0,I=0,T=!1;try{ + if(_.__emitTokens)_.__emitTokens(n,M);else{for(N.matcher.considerAll();;){ + I++,T?T=!1:N.matcher.considerAll(),N.matcher.lastIndex=A + ;const e=N.matcher.exec(n);if(!e)break;const t=y(n.substring(A,e.index),e) + ;A=e.index+t}y(n.substring(A))}return M.finalize(),k=M.toHTML(),{language:e, + value:k,relevance:j,illegal:!1,_emitter:M,_top:N}}catch(t){ + if(t.message&&t.message.includes("Illegal"))return{language:e,value:Y(n), + illegal:!0,relevance:0,_illegalBy:{message:t.message,index:A, + context:n.slice(A-100,A+100),mode:t.mode,resultSoFar:k},_emitter:M};if(r)return{ + language:e,value:Y(n),illegal:!1,relevance:0,errorRaised:t,_emitter:M,_top:N} + ;throw t}}function x(e,t){t=t||p.languages||Object.keys(i);const n=(e=>{ + const t={value:Y(e),illegal:!1,relevance:0,_top:l,_emitter:new p.__emitter(p)} + ;return t._emitter.addText(e),t})(e),s=t.filter(O).filter(k).map((t=>E(t,e,!1))) + ;s.unshift(n);const o=s.sort(((e,t)=>{ + if(e.relevance!==t.relevance)return t.relevance-e.relevance + ;if(e.language&&t.language){if(O(e.language).supersetOf===t.language)return 1 + ;if(O(t.language).supersetOf===e.language)return-1}return 0})),[r,a]=o,c=r + ;return c.secondBest=a,c}function w(e){let t=null;const n=(e=>{ + let t=e.className+" ";t+=e.parentNode?e.parentNode.className:"" + ;const n=p.languageDetectRe.exec(t);if(n){const t=O(n[1]) + ;return t||(X(a.replace("{}",n[1])), + X("Falling back to no-highlight mode for this block.",e)),t?n[1]:"no-highlight"} + return t.split(/\s+/).find((e=>b(e)||O(e)))})(e);if(b(n))return + ;if(N("before:highlightElement",{el:e,language:n + }),e.dataset.highlighted)return void console.log("Element previously highlighted. To highlight again, first unset `dataset.highlighted`.",e) + ;if(e.children.length>0&&(p.ignoreUnescapedHTML||(console.warn("One of your code blocks includes unescaped HTML. This is a potentially serious security risk."), + console.warn("https://github.com/highlightjs/highlight.js/wiki/security"), + console.warn("The element with unescaped HTML:"), + console.warn(e)),p.throwUnescapedHTML))throw new J("One of your code blocks includes unescaped HTML.",e.innerHTML) + ;t=e;const i=t.textContent,o=n?m(i,{language:n,ignoreIllegals:!0}):x(i) + ;e.innerHTML=o.value,e.dataset.highlighted="yes",((e,t,n)=>{const i=t&&s[t]||n + ;e.classList.add("hljs"),e.classList.add("language-"+i) + })(e,n,o.language),e.result={language:o.language,re:o.relevance, + relevance:o.relevance},o.secondBest&&(e.secondBest={ + language:o.secondBest.language,relevance:o.secondBest.relevance + }),N("after:highlightElement",{el:e,result:o,text:i})}let y=!1;function _(){ + "loading"!==document.readyState?document.querySelectorAll(p.cssSelector).forEach(w):y=!0 + }function O(e){return e=(e||"").toLowerCase(),i[e]||i[s[e]]} + function v(e,{languageName:t}){"string"==typeof e&&(e=[e]),e.forEach((e=>{ + s[e.toLowerCase()]=t}))}function k(e){const t=O(e) + ;return t&&!t.disableAutodetect}function N(e,t){const n=e;o.forEach((e=>{ + e[n]&&e[n](t)}))} + "undefined"!=typeof window&&window.addEventListener&&window.addEventListener("DOMContentLoaded",(()=>{ + y&&_()}),!1),Object.assign(n,{highlight:m,highlightAuto:x,highlightAll:_, + highlightElement:w, + highlightBlock:e=>(G("10.7.0","highlightBlock will be removed entirely in v12.0"), + G("10.7.0","Please use highlightElement now."),w(e)),configure:e=>{p=Q(p,e)}, + initHighlighting:()=>{ + _(),G("10.6.0","initHighlighting() deprecated. Use highlightAll() now.")}, + initHighlightingOnLoad:()=>{ + _(),G("10.6.0","initHighlightingOnLoad() deprecated. Use highlightAll() now.") + },registerLanguage:(e,t)=>{let s=null;try{s=t(n)}catch(t){ + if(W("Language definition for '{}' could not be registered.".replace("{}",e)), + !r)throw t;W(t),s=l} + s.name||(s.name=e),i[e]=s,s.rawDefinition=t.bind(null,n),s.aliases&&v(s.aliases,{ + languageName:e})},unregisterLanguage:e=>{delete i[e] + ;for(const t of Object.keys(s))s[t]===e&&delete s[t]}, + listLanguages:()=>Object.keys(i),getLanguage:O,registerAliases:v, + autoDetection:k,inherit:Q,addPlugin:e=>{(e=>{ + e["before:highlightBlock"]&&!e["before:highlightElement"]&&(e["before:highlightElement"]=t=>{ + e["before:highlightBlock"](Object.assign({block:t.el},t)) + }),e["after:highlightBlock"]&&!e["after:highlightElement"]&&(e["after:highlightElement"]=t=>{ + e["after:highlightBlock"](Object.assign({block:t.el},t))})})(e),o.push(e)}, + removePlugin:e=>{const t=o.indexOf(e);-1!==t&&o.splice(t,1)}}),n.debugMode=()=>{ + r=!1},n.safeMode=()=>{r=!0},n.versionString="11.10.0",n.regex={concat:h, + lookahead:g,either:f,optional:d,anyNumberOfTimes:u} + ;for(const t in j)"object"==typeof j[t]&&e(j[t]);return Object.assign(n,j),n + },ne=te({});return ne.newInstance=()=>te({}),ne}() + ;"object"==typeof exports&&"undefined"!=typeof module&&(module.exports=hljs);/*! `bash` grammar compiled for Highlight.js 11.10.0 */ + (()=>{var e=(()=>{"use strict";return e=>{const s=e.regex,t={},n={begin:/\$\{/, + end:/\}/,contains:["self",{begin:/:-/,contains:[t]}]};Object.assign(t,{ + className:"variable",variants:[{ + begin:s.concat(/\$[\w\d#@][\w\d_]*/,"(?![\\w\\d])(?![$])")},n]});const a={ + className:"subst",begin:/\$\(/,end:/\)/,contains:[e.BACKSLASH_ESCAPE] + },i=e.inherit(e.COMMENT(),{match:[/(^|\s)/,/#.*$/],scope:{2:"comment"}}),c={ + begin:/<<-?\s*(?=\w+)/,starts:{contains:[e.END_SAME_AS_BEGIN({begin:/(\w+)/, + end:/(\w+)/,className:"string"})]}},o={className:"string",begin:/"/,end:/"/, + contains:[e.BACKSLASH_ESCAPE,t,a]};a.contains.push(o);const r={begin:/\$?\(\(/, + end:/\)\)/,contains:[{begin:/\d+#[0-9a-f]+/,className:"number"},e.NUMBER_MODE,t] + },l=e.SHEBANG({binary:"(fish|bash|zsh|sh|csh|ksh|tcsh|dash|scsh)",relevance:10 + }),m={className:"function",begin:/\w[\w\d_]*\s*\(\s*\)\s*\{/,returnBegin:!0, + contains:[e.inherit(e.TITLE_MODE,{begin:/\w[\w\d_]*/})],relevance:0};return{ + name:"Bash",aliases:["sh","zsh"],keywords:{$pattern:/\b[a-z][a-z0-9._-]+\b/, + keyword:["if","then","else","elif","fi","for","while","until","in","do","done","case","esac","function","select"], + literal:["true","false"], + built_in:["break","cd","continue","eval","exec","exit","export","getopts","hash","pwd","readonly","return","shift","test","times","trap","umask","unset","alias","bind","builtin","caller","command","declare","echo","enable","help","let","local","logout","mapfile","printf","read","readarray","source","sudo","type","typeset","ulimit","unalias","set","shopt","autoload","bg","bindkey","bye","cap","chdir","clone","comparguments","compcall","compctl","compdescribe","compfiles","compgroups","compquote","comptags","comptry","compvalues","dirs","disable","disown","echotc","echoti","emulate","fc","fg","float","functions","getcap","getln","history","integer","jobs","kill","limit","log","noglob","popd","print","pushd","pushln","rehash","sched","setcap","setopt","stat","suspend","ttyctl","unfunction","unhash","unlimit","unsetopt","vared","wait","whence","where","which","zcompile","zformat","zftp","zle","zmodload","zparseopts","zprof","zpty","zregexparse","zsocket","zstyle","ztcp","chcon","chgrp","chown","chmod","cp","dd","df","dir","dircolors","ln","ls","mkdir","mkfifo","mknod","mktemp","mv","realpath","rm","rmdir","shred","sync","touch","truncate","vdir","b2sum","base32","base64","cat","cksum","comm","csplit","cut","expand","fmt","fold","head","join","md5sum","nl","numfmt","od","paste","ptx","pr","sha1sum","sha224sum","sha256sum","sha384sum","sha512sum","shuf","sort","split","sum","tac","tail","tr","tsort","unexpand","uniq","wc","arch","basename","chroot","date","dirname","du","echo","env","expr","factor","groups","hostid","id","link","logname","nice","nohup","nproc","pathchk","pinky","printenv","printf","pwd","readlink","runcon","seq","sleep","stat","stdbuf","stty","tee","test","timeout","tty","uname","unlink","uptime","users","who","whoami","yes"] + },contains:[l,e.SHEBANG(),m,r,i,c,{match:/(\/[a-z._-]+)+/},o,{match:/\\"/},{ + className:"string",begin:/'/,end:/'/},{match:/\\'/},t]}}})() + ;hljs.registerLanguage("bash",e)})(); + +/*! `bash` grammar compiled for Highlight.js 11.10.0 */ +(()=>{var e=(()=>{"use strict";return e=>{const s=e.regex,t={},n={begin:/\$\{/, + end:/\}/,contains:["self",{begin:/:-/,contains:[t]}]};Object.assign(t,{ + className:"variable",variants:[{ + begin:s.concat(/\$[\w\d#@][\w\d_]*/,"(?![\\w\\d])(?![$])")},n]});const a={ + className:"subst",begin:/\$\(/,end:/\)/,contains:[e.BACKSLASH_ESCAPE] + },i=e.inherit(e.COMMENT(),{match:[/(^|\s)/,/#.*$/],scope:{2:"comment"}}),c={ + begin:/<<-?\s*(?=\w+)/,starts:{contains:[e.END_SAME_AS_BEGIN({begin:/(\w+)/, + end:/(\w+)/,className:"string"})]}},o={className:"string",begin:/"/,end:/"/, + contains:[e.BACKSLASH_ESCAPE,t,a]};a.contains.push(o);const r={begin:/\$?\(\(/, + end:/\)\)/,contains:[{begin:/\d+#[0-9a-f]+/,className:"number"},e.NUMBER_MODE,t] + },l=e.SHEBANG({binary:"(fish|bash|zsh|sh|csh|ksh|tcsh|dash|scsh)",relevance:10 + }),m={className:"function",begin:/\w[\w\d_]*\s*\(\s*\)\s*\{/,returnBegin:!0, + contains:[e.inherit(e.TITLE_MODE,{begin:/\w[\w\d_]*/})],relevance:0};return{ + name:"Bash",aliases:["sh","zsh"],keywords:{$pattern:/\b[a-z][a-z0-9._-]+\b/, + keyword:["if","then","else","elif","fi","for","while","until","in","do","done","case","esac","function","select"], + literal:["true","false"], + built_in:["break","cd","continue","eval","exec","exit","export","getopts","hash","pwd","readonly","return","shift","test","times","trap","umask","unset","alias","bind","builtin","caller","command","declare","echo","enable","help","let","local","logout","mapfile","printf","read","readarray","source","sudo","type","typeset","ulimit","unalias","set","shopt","autoload","bg","bindkey","bye","cap","chdir","clone","comparguments","compcall","compctl","compdescribe","compfiles","compgroups","compquote","comptags","comptry","compvalues","dirs","disable","disown","echotc","echoti","emulate","fc","fg","float","functions","getcap","getln","history","integer","jobs","kill","limit","log","noglob","popd","print","pushd","pushln","rehash","sched","setcap","setopt","stat","suspend","ttyctl","unfunction","unhash","unlimit","unsetopt","vared","wait","whence","where","which","zcompile","zformat","zftp","zle","zmodload","zparseopts","zprof","zpty","zregexparse","zsocket","zstyle","ztcp","chcon","chgrp","chown","chmod","cp","dd","df","dir","dircolors","ln","ls","mkdir","mkfifo","mknod","mktemp","mv","realpath","rm","rmdir","shred","sync","touch","truncate","vdir","b2sum","base32","base64","cat","cksum","comm","csplit","cut","expand","fmt","fold","head","join","md5sum","nl","numfmt","od","paste","ptx","pr","sha1sum","sha224sum","sha256sum","sha384sum","sha512sum","shuf","sort","split","sum","tac","tail","tr","tsort","unexpand","uniq","wc","arch","basename","chroot","date","dirname","du","echo","env","expr","factor","groups","hostid","id","link","logname","nice","nohup","nproc","pathchk","pinky","printenv","printf","pwd","readlink","runcon","seq","sleep","stat","stdbuf","stty","tee","test","timeout","tty","uname","unlink","uptime","users","who","whoami","yes"] + },contains:[l,e.SHEBANG(),m,r,i,c,{match:/(\/[a-z._-]+)+/},o,{match:/\\"/},{ + className:"string",begin:/'/,end:/'/},{match:/\\'/},t]}}})() + ;hljs.registerLanguage("bash",e)})(); \ No newline at end of file diff --git a/explainshell/web/templates/about.html b/explainshell/web/templates/about.html index 726d11e2..2a202412 100644 --- a/explainshell/web/templates/about.html +++ b/explainshell/web/templates/about.html @@ -4,7 +4,7 @@

Hello,

This site contains 29761 parsed manpages from sections 1 and 8 found in - Ubuntu's manpage repository. + Ubuntu's manpage repository. A lot of heuristics were used to extract the arguments of each program, and there are errors here and there, especially in manpages that have a non-standard layout.

diff --git a/explainshell/web/templates/base.html b/explainshell/web/templates/base.html index f3b65915..b41cacf8 100644 --- a/explainshell/web/templates/base.html +++ b/explainshell/web/templates/base.html @@ -7,15 +7,19 @@ {%- if config.DEBUG %} {% if request.cookies.get('theme', 'default') == 'default' %} + {% else %} + {% endif %} {% else %} {% if request.cookies.get('theme', 'default') == 'default' %} + {% else %} + {% endif %} {%- endif %} @@ -23,7 +27,7 @@ {% block head %}{% endblock %} - +
@@ -59,10 +63,12 @@ + {% else %} + {%- endif %} {%- block js %}{% endblock %} diff --git a/explainshell/web/templates/errors/parsingerror.html b/explainshell/web/templates/errors/parsingerror.html index ef4eb97f..4142f6ed 100644 --- a/explainshell/web/templates/errors/parsingerror.html +++ b/explainshell/web/templates/errors/parsingerror.html @@ -1,6 +1,6 @@ {% extends "errors/error.html" %} {% block message -%}
{{ e }}
-
{{ e.s.decode('latin1')|e }}
+
{{ e.s|e }}
 {{ ' '*e.position }}^
{%- endblock %} diff --git a/explainshell/web/templates/explain.html b/explainshell/web/templates/explain.html index a43071a5..b09b18db 100644 --- a/explainshell/web/templates/explain.html +++ b/explainshell/web/templates/explain.html @@ -40,7 +40,7 @@
{% for m in matches if m.name %} {% if loop.first %}source manpages: {% endif %} -{{ m.name|e }}{% if not loop.last %}, {% endif %} +{{ m.name|e }}{% if not loop.last %}, {% endif %} {% endfor %}
{%- endblock %} diff --git a/explainshell/web/templates/index.html b/explainshell/web/templates/index.html index c75dcbd7..b4fd6d7d 100644 --- a/explainshell/web/templates/index.html +++ b/explainshell/web/templates/index.html @@ -15,7 +15,7 @@
- +
@@ -26,7 +26,7 @@

examples

    {{ macros.examplebullet(":(){ :|:& };:") }} {{ macros.examplebullet("for user in $(cut -f1 -d: /etc/passwd); do crontab -u $user -l 2>/dev/null; done") }} - {{ macros.examplebullet('file=$(echo `basename "$file"`)') }} + {{ macros.examplebullet('name=$(printf "%s@%s" "$(id -nu)" "$(uname -n)")') }} {{ macros.examplebullet("true && { echo success; } || { echo failed; }") }} {{ macros.examplebullet("cut -d ' ' -f 1 /var/log/apache2/access_logs | uniq -c | sort -n") }} {{ macros.examplebullet('tar zcf - some-dir | ssh some-server "cd /; tar xvzf -"') }} @@ -40,6 +40,8 @@

    examples

    {%- endblock %} {% block js -%} {%- endblock %} diff --git a/explainshell/web/templates/macros.html b/explainshell/web/templates/macros.html index befaa2ef..4f661052 100644 --- a/explainshell/web/templates/macros.html +++ b/explainshell/web/templates/macros.html @@ -1,10 +1,10 @@ {% macro outputcommand(mp, suggestions) -%} {% if suggestions|length == 0 %} - {{ mp.program|e }} + {{ mp.program|e }} {% else %} - {{ mp.program|e }} + {{ mp.program|e }}