Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b02b7fb
Add auto detection for dark browser/system mode
rugk May 16, 2019
a4ac247
Merge branch 'master' into autodarkmode
rugk Dec 31, 2020
23f61f7
Change the example with the "useless use of echo" and backticks to si…
Strahinja Jan 20, 2022
354ecbb
chore: init .dockerignore file
mundanevision20 Sep 14, 2024
0d22359
chore: extend .gitignore file
mundanevision20 Sep 14, 2024
1fd151d
chore: update docker-compose.yml contents
mundanevision20 Sep 14, 2024
a6c8c11
chore: upgrade to Python 3.12 and reduce layers
mundanevision20 Sep 14, 2024
1eb8688
chore: update serve command in Makefile
mundanevision20 Sep 14, 2024
5626ba4
chore(deps): update pip dependencies
mundanevision20 Sep 14, 2024
a302a61
chore: upgrade Python code to Python 3 and enhance readability
mundanevision20 Sep 14, 2024
2add463
chore: enhance formatting and update variable names
mundanevision20 Sep 14, 2024
cd50f3e
feat: add syntax highlighting to landing page
mundanevision20 Sep 15, 2024
423315a
fix: replace inbuilt logging with loguru based helper scripts
mundanevision20 Sep 15, 2024
b807e60
fix: fixed bugs in displany and ingestion of contents
mundanevision20 Sep 15, 2024
6a8fba0
fix: fixed bugs in ingestion of manpages
mundanevision20 Sep 16, 2024
8fbd699
feat(js): highlight first clazz
mundanevision20 Sep 21, 2024
14e89ef
chore: merged 'rugk-fork/autodarkmode' into mundanevision20-python3-…
mundanevision20 Sep 21, 2024
aaf4cdc
chore: merged 'apoorvlathey-fork/fixSearch' into mundanevision20-pyt…
mundanevision20 Sep 21, 2024
04adb1d
fix: fixed bug in manpage.py
mundanevision20 Sep 21, 2024
29a20e6
chore: update to Ubuntu noble manpages
mundanevision20 Sep 21, 2024
8b98cc6
chore: merged 'strahinja-fork/master' into mundanevision20-python3-mi…
mundanevision20 Sep 21, 2024
551d1d3
Merge PR #330: Python 3 migration
tobiashochguertel Oct 26, 2025
4b609bb
Complete Python 2→3 compatibility fixes
tobiashochguertel Oct 26, 2025
c7294f7
fix: address Copilot code review suggestions
tobiashochguertel Oct 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
.gitignore
.git/
.github/
misc/
tests/
tools
venv/
dump/
.mpypy_cache/
*.pyc
*.log
README.md
docker-compose.yml
Dockerfile
Makefile
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
*.swp
.coverage
.vagrant
application.log
*.log
venv/
__pycache__
.mpypy_cache/
21 changes: 9 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
FROM python:2.7
FROM python:3.12

RUN apt-get update \
&& apt-get install man-db -y \
&& apt-get clean
RUN apt update \
&& apt install man-db -y \
&& apt clean

ADD ./requirements.txt /tmp/requirements.txt
WORKDIR /opt/webapp
COPY . .

RUN pip install --upgrade pip \
&& python --version \
&& pip install -r /tmp/requirements.txt \
&& rm -rf ~/.cache/pip/*
RUN pip3 install --no-cache-dir --no-warn-script-location --upgrade pip setuptools wheel virtualenv \
&& pip3 install --no-cache-dir --no-warn-script-location -r requirements.txt

ADD ./ /opt/webapp/
WORKDIR /opt/webapp
EXPOSE 5000

CMD ["make", "serve"]
CMD ["python3", "runserver.py"]
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
tests:
nosetests --exe --with-doctest tests/ explainshell/
pytest --doctest-modules tests/ explainshell/

serve:
python runserver.py
docker-compose up --build

.PHONY: tests
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ When querying explainshell, it:
>
> If you're relying on manpages, be aware that they may not reflect the latest behavior. Contributions in this area are welcome but would require rethinking the documentation pipeline.

Right now explainshell.com contains the entire [archive of Ubuntu](http://manpages.ubuntu.com/). It's not
Right now explainshell.com contains the entire [archive of Ubuntu](https://manpages.ubuntu.com/). It's not
possible to directly add a missing man page to the live site (it might be in the future).

## Running explainshell locally
Expand Down
6 changes: 3 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
version: '2'
services:
db:
image: mongo
web:
build: .
command: make serve
build:
context: .
dockerfile: Dockerfile
environment:
- MONGO_URI=mongodb://db
- HOST_IP=0.0.0.0
Expand Down
114 changes: 61 additions & 53 deletions explainshell/algo/classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import itertools, collections, logging
import itertools
import collections
import logging

import nltk
import nltk.metrics
Expand All @@ -9,26 +11,32 @@

logger = logging.getLogger(__name__)


def get_features(paragraph):
features = {}
ptext = paragraph.cleantext()
assert ptext

features['starts_with_hyphen'] = algo.features.starts_with_hyphen(ptext)
features['is_indented'] = algo.features.is_indented(ptext)
features['par_length'] = algo.features.par_length(ptext)
for w in ('=', '--', '[', '|', ','):
features['first_line_contains_%s' % w] = algo.features.first_line_contains(ptext, w)
features['first_line_length'] = algo.features.first_line_length(ptext)
features['first_line_word_count'] = algo.features.first_line_word_count(ptext)
features['is_good_section'] = algo.features.is_good_section(paragraph)
features['word_count'] = algo.features.word_count(ptext)
p_text = paragraph.clean_text()
logger.debug(f"length of p_text: {len(p_text)}")
assert p_text

features["starts_with_hyphen"] = algo.features.starts_with_hyphen(p_text)
features["is_indented"] = algo.features.is_indented(p_text)
features["par_length"] = algo.features.par_length(p_text)
for w in ("=", "--", "[", "|", ","):
features[f"first_line_contains_{w}"] = algo.features.first_line_contains(
p_text, w
)
features["first_line_length"] = algo.features.first_line_length(p_text)
features["first_line_word_count"] = algo.features.first_line_word_count(p_text)
features["is_good_section"] = algo.features.is_good_section(paragraph)
features["word_count"] = algo.features.word_count(p_text)

return features

class classifier(object):
'''classify the paragraphs of a man page as having command line options
or not'''

class Classifier:
"""classify the paragraphs of a man page as having command line options
or not"""

def __init__(self, store, algo, **classifier_args):
self.store = store
self.algo = algo
Expand All @@ -39,59 +47,60 @@ def train(self):
if self.classifier:
return

manpages = self.store.trainingset()
man_pages = self.store.training_set()

# flatten the manpages so we get a list of (manpage-name, paragraph)
def flatten_manpages(manpage):
l = []
p_list = []
for para in manpage.paragraphs:
l.append(para)
return l
paragraphs = itertools.chain(*[flatten_manpages(m) for m in manpages])
p_list.append(para)
return p_list

paragraphs = itertools.chain(*[flatten_manpages(m) for m in man_pages])
training = list(paragraphs)

negids = [p for p in training if not p.is_option]
posids = [p for p in training if p.is_option]
neg_ids = [p for p in training if not p.is_option]
pos_ids = [p for p in training if p.is_option]

negfeats = [(get_features(p), False) for p in negids]
posfeats = [(get_features(p), True) for p in posids]
neg_feats = [(get_features(p), False) for p in neg_ids]
pos_feats = [(get_features(p), True) for p in pos_ids]

negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4
neg_cutoff = int(len(neg_feats) * 3 / 4)
pos_cutoff = int(len(pos_feats) * 3 / 4)

trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
self.testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
train_feats = neg_feats[:neg_cutoff] + pos_feats[:pos_cutoff]
self.test_feats = neg_feats[neg_cutoff:] + pos_feats[pos_cutoff:]

logger.info('train on %d instances', len(trainfeats))
logger.info("train on %d instances", len(train_feats))

if self.algo == 'maxent':
if self.algo == "maxent":
c = nltk.classify.maxent.MaxentClassifier
elif self.algo == 'bayes':
elif self.algo == "bayes":
c = nltk.classify.NaiveBayesClassifier
else:
raise ValueError('unknown classifier')
raise ValueError("unknown classifier")

self.classifier = c.train(trainfeats, **self.classifier_args)
self.classifier = c.train(train_feats, **self.classifier_args)

def evaluate(self):
self.train()
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
ref_sets = collections.defaultdict(set)
test_sets = collections.defaultdict(set)

for i, (feats, label) in enumerate(self.testfeats):
refsets[label].add(i)
for i, (feats, label) in enumerate(self.test_feats):
ref_sets[label].add(i)
guess = self.classifier.prob_classify(feats)
observed = guess.max()
testsets[observed].add(i)
#if label != observed:
# print 'label:', label, 'observed:', observed, feats
test_sets[observed].add(i)
# if label != observed:
# print('label:', label, 'observed:', observed, feats

print 'pos precision:', nltk.metrics.precision(refsets[True], testsets[True])
print 'pos recall:', nltk.metrics.recall(refsets[True], testsets[True])
print 'neg precision:', nltk.metrics.precision(refsets[False], testsets[False])
print 'neg recall:', nltk.metrics.recall(refsets[False], testsets[False])
print("pos precision:", nltk.metrics.precision(ref_sets[True], test_sets[True]))
print("pos recall:", nltk.metrics.recall(ref_sets[True], test_sets[True]))
print("neg precision:", nltk.metrics.precision(ref_sets[False], test_sets[False]))
print("neg recall:", nltk.metrics.recall(ref_sets[False], test_sets[False]))

print self.classifier.show_most_informative_features(10)
print(self.classifier.show_most_informative_features(10))

def classify(self, manpage):
self.train()
Expand All @@ -102,10 +111,9 @@ def classify(self, manpage):
option = guess.max()
certainty = guess.prob(option)

if option:
if certainty < config.CLASSIFIER_CUTOFF:
pass
else:
logger.info('classified %s (%f) as an option paragraph', item, certainty)
item.is_option = True
yield certainty, item
if option and certainty >= config.CLASSIFIER_CUTOFF:
logger.info(
"classified %s (%f) as an option paragraph", item, certainty
)
item.is_option = True
yield certainty, item
32 changes: 21 additions & 11 deletions explainshell/algo/features.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import re


def extract_first_line(paragraph):
'''
"""
>>> extract_first_line('a b cd')
'a b'
>>> extract_first_line('a b cd')
Expand All @@ -10,54 +11,63 @@ def extract_first_line(paragraph):
'a b cd'
>>> extract_first_line(' a b cd')
'a b'
'''
"""
lines = paragraph.splitlines()
first = lines[0].strip()
spaces = list(re.finditer(r'(\s+)', first))
spaces = list(re.finditer(r"(\s+)", first))
# handle options that have their description in the first line by trying
# to treat it as two lines (looking at spaces between option and the rest
# of the text)
if spaces:
longest = max(spaces, key=lambda m: m.span()[1] - m.span()[0])
if longest and longest.start() > 1 and longest.end() - longest.start() > 1:
first = first[:longest.start()]
first = first[: longest.start()]
return first


def starts_with_hyphen(paragraph):
return paragraph.lstrip()[0] == '-'
return paragraph.lstrip()[0] == "-"


def is_indented(paragraph):
return paragraph != paragraph.lstrip()


def par_length(paragraph):
return round(len(paragraph.strip()), -1) / 2


def first_line_contains(paragraph, what):
l = paragraph.splitlines()[0]
return what in l
ln = paragraph.splitlines()[0]
return what in ln


def first_line_length(paragraph):
first = extract_first_line(paragraph)
return round(len(first), -1) / 2


def first_line_word_count(paragraph):
first = extract_first_line(paragraph)
splitted = [s for s in first.split() if len(s) > 1]

return round(len(splitted), -1)


def is_good_section(paragraph):
if not paragraph.section:
return False
s = paragraph.section.lower()
if 'options' in s:
if "options" in s:
return True
if s in ('description', 'function letters'):
if s in ("description", "function letters"):
return True
return False


def word_count(text):
return round(len(re.findall(r'\w+', text)), -1)
return round(len(re.findall(r"\w+", text)), -1)


def has_bold(html):
return '<b>' in html
return "<b>" in html
43 changes: 6 additions & 37 deletions explainshell/config.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,14 @@
import os

_currdir = os.path.dirname(os.path.dirname(__file__))
_curr_dir = os.path.dirname(os.path.dirname(__file__))

MANPAGEDIR = os.path.join(_currdir, 'manpages')
MAN_PAGE_DIR = os.path.join(_curr_dir, "manpages")
CLASSIFIER_CUTOFF = 0.7
TOOLSDIR = os.path.join(_currdir, 'tools')
TOOLS_DIR = os.path.join(_curr_dir, "tools")

MAN2HTML = os.path.join(TOOLSDIR, 'w3mman2html.cgi')
MAN2HTML = os.path.join(TOOLS_DIR, "w3mman2html.cgi")

# host to pass into Flask's app.run.
HOST_IP = os.getenv('HOST_IP', False)
MONGO_URI = os.getenv('MONGO_URI', 'mongodb://localhost')
HOST_IP = os.getenv("HOST_IP", "")
MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost")
DEBUG = True

LOGGING_DICT = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'standard': {
'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
},
},
'handlers': {
'console': {
'level' : 'INFO',
'class' : 'logging.StreamHandler',
'formatter': 'standard',
},
'file': {
'class': 'logging.FileHandler',
'level': 'INFO',
'formatter': 'standard',
'filename': 'application.log',
'mode': 'a',
},
},
'loggers': {
'explainshell': {
'handlers': ['console'],
'level': 'INFO',
'propagate': False
}
}
}
1 change: 1 addition & 0 deletions explainshell/errors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class ProgramDoesNotExist(Exception):
pass


class EmptyManpage(Exception):
pass
Loading
Loading