-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Build: Allow multiple PDFs #10438
base: main
Are you sure you want to change the base?
Build: Allow multiple PDFs #10438
Changes from all commits
3f2bd1f
f7c3b58
8efd480
abea0cb
a39256d
032c7fd
b89dd23
653a0ce
64ada46
216592e
600b948
f452630
9bfe6f3
65e714b
99ffb3b
7b11369
8fadd42
d91a455
c610326
d13eaa2
01e3c2a
c5622b2
cc2231d
c1cc8eb
c49bbc1
0075280
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -6,6 +6,7 @@ | |||||
|
||||||
import itertools | ||||||
import os | ||||||
import shutil | ||||||
from glob import glob | ||||||
from pathlib import Path | ||||||
|
||||||
|
@@ -86,6 +87,14 @@ def __init__(self, *args, **kwargs): | |||||
), | ||||||
self.relative_output_dir, | ||||||
) | ||||||
|
||||||
# Isolate temporary files in the _readthedocs/ folder | ||||||
# Used for new feature ENABLE_MULTIPLE_PDFS | ||||||
self.absolute_host_tmp_root = os.path.join( | ||||||
self.project.checkout_path(self.version.slug), | ||||||
"_readthedocs/tmp", | ||||||
) | ||||||
Comment on lines
+91
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we need a temporary directory we should |
||||||
|
||||||
self.absolute_container_output_dir = os.path.join( | ||||||
"$READTHEDOCS_OUTPUT", self.relative_output_dir | ||||||
) | ||||||
|
@@ -389,6 +398,7 @@ class LocalMediaBuilder(BaseSphinx): | |||||
|
||||||
def _post_build(self): | ||||||
"""Internal post build to create the ZIP file from the HTML output.""" | ||||||
|
||||||
target_file = os.path.join( | ||||||
self.absolute_container_output_dir, | ||||||
# TODO: shouldn't this name include the name of the version as well? | ||||||
|
@@ -435,41 +445,54 @@ class EpubBuilder(BaseSphinx): | |||||
relative_output_dir = "epub" | ||||||
|
||||||
def _post_build(self): | ||||||
"""Internal post build to cleanup EPUB output directory and leave only one .epub file.""" | ||||||
"""Internal post build to clean up EPUB output directory and leave only one .epub file.""" | ||||||
temp_epub_file = f"/tmp/{self.project.slug}-{self.version.slug}.epub" | ||||||
target_file = os.path.join( | ||||||
self.absolute_container_output_dir, | ||||||
f"{self.project.slug}.epub", | ||||||
) | ||||||
|
||||||
epub_sphinx_filepaths = glob( | ||||||
os.path.join(self.absolute_host_output_dir, "*.epub") | ||||||
) | ||||||
epub_sphinx_filepaths = self._get_epub_files_generated() | ||||||
if epub_sphinx_filepaths: | ||||||
# NOTE: we currently support only one .epub per version | ||||||
epub_filepath = epub_sphinx_filepaths[0] | ||||||
|
||||||
# Move out the epub file | ||||||
self.run( | ||||||
"mv", epub_filepath, temp_epub_file, cwd=self.project_path, record=False | ||||||
) | ||||||
|
||||||
# Delete everything recursively | ||||||
self.run( | ||||||
"rm", | ||||||
"--recursive", | ||||||
self.absolute_container_output_dir, | ||||||
cwd=self.project_path, | ||||||
record=False, | ||||||
) | ||||||
|
||||||
# Create everything again | ||||||
self.run( | ||||||
"mkdir", | ||||||
"--parents", | ||||||
self.absolute_container_output_dir, | ||||||
cwd=self.project_path, | ||||||
record=False, | ||||||
) | ||||||
|
||||||
# Restore the epub file | ||||||
self.run( | ||||||
"mv", temp_epub_file, target_file, cwd=self.project_path, record=False | ||||||
) | ||||||
|
||||||
def _get_epub_files_generated(self): | ||||||
""" | ||||||
Returns a list of *.epub generated by the sphinx build command. | ||||||
|
||||||
This is split out in a separate method to be test-friendly. | ||||||
""" | ||||||
return glob(os.path.join(self.absolute_host_output_dir, "*.epub")) | ||||||
|
||||||
|
||||||
class LatexBuildCommand(BuildCommand): | ||||||
|
||||||
|
@@ -501,9 +524,12 @@ class PdfBuilder(BaseSphinx): | |||||
|
||||||
relative_output_dir = "pdf" | ||||||
sphinx_builder = "latex" | ||||||
|
||||||
# TODO: Remove this when dissolving Feature.ENABLE_MULTIPLE_PDFS | ||||||
pdf_file_name = None | ||||||
|
||||||
def build(self): | ||||||
"""Runs Sphinx to convert to LaTeX, uses latexmk to build PDFs.""" | ||||||
self.run( | ||||||
*self.get_sphinx_cmd(), | ||||||
"-T", | ||||||
|
@@ -516,7 +542,7 @@ def build(self): | |||||
f"language={self.project.language}", | ||||||
# Sphinx's source directory (SOURCEDIR). | ||||||
# We are executing this command at the location of the `conf.py` file (CWD). | ||||||
# TODO: ideally we should execute it from where the repository was clonned, | ||||||
# TODO: ideally we should execute it from where the repository was cloned, | ||||||
# but that could lead unexpected behavior to some users: | ||||||
# https://github.com/readthedocs/readthedocs.org/pull/9888#issuecomment-1384649346 | ||||||
".", | ||||||
|
@@ -526,16 +552,28 @@ def build(self): | |||||
bin_path=self.python_env.venv_bin(), | ||||||
) | ||||||
|
||||||
tex_files = glob(os.path.join(self.absolute_host_output_dir, "*.tex")) | ||||||
if not tex_files: | ||||||
raise BuildUserError("No TeX files were found.") | ||||||
# Check that we generated .tex files. | ||||||
self._check_for_files("tex") | ||||||
|
||||||
# Run LaTeX -> PDF conversions | ||||||
success = self._build_latexmk(self.project_path) | ||||||
|
||||||
self._post_build() | ||||||
if self.project.has_feature(Feature.ENABLE_MULTIPLE_PDFS): | ||||||
self._post_build_multiple() | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
else: | ||||||
self._post_build() | ||||||
return success | ||||||
|
||||||
def _check_for_files(self, extension): | ||||||
""" | ||||||
Check that a file pattern exists. | ||||||
|
||||||
This method mostly exists so we have a pattern that is test-friend (can be mocked). | ||||||
""" | ||||||
tex_files = glob(os.path.join(self.absolute_host_output_dir, f"*.{extension}")) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line could be combined with |
||||||
if not tex_files: | ||||||
raise BuildUserError("No *.{extension} files were found.") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The build error string should be an attribute like |
||||||
|
||||||
def _build_latexmk(self, cwd): | ||||||
# These steps are copied from the Makefile generated by Sphinx >= 1.6 | ||||||
# https://github.com/sphinx-doc/sphinx/blob/master/sphinx/texinputs/Makefile_t | ||||||
|
@@ -585,24 +623,44 @@ def _build_latexmk(self, cwd): | |||||
# When ``-f`` is used, latexmk will continue building if it | ||||||
# encounters errors. We still receive a failure exit code in this | ||||||
# case, but the correct steps should run. | ||||||
'-f', | ||||||
'-dvi-', | ||||||
'-ps-', | ||||||
f'-jobname={self.project.slug}', | ||||||
'-interaction=nonstopmode', | ||||||
"-f", | ||||||
"-dvi-", | ||||||
"-ps-", | ||||||
"-interaction=nonstopmode", | ||||||
] | ||||||
|
||||||
if self.project.has_feature(Feature.ENABLE_MULTIPLE_PDFS): | ||||||
cmd.append(f"-jobname={self.project.slug}_%A") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to add a comment here explaining what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose here we want to use the filename the user has specified in |
||||||
else: | ||||||
cmd.append( | ||||||
f"-jobname={self.project.slug}", | ||||||
) | ||||||
|
||||||
cmd_ret = self.build_env.run_command_class( | ||||||
cls=latex_class, | ||||||
cmd=cmd, | ||||||
warn_only=True, | ||||||
cwd=self.absolute_host_output_dir, | ||||||
) | ||||||
|
||||||
self.pdf_file_name = f"{self.project.slug}.pdf" | ||||||
if self.project.has_feature(Feature.ENABLE_MULTIPLE_PDFS): | ||||||
pdf_files = glob(os.path.join(self.absolute_host_output_dir, "*.pdf")) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as |
||||||
|
||||||
# There is only 1 PDF file. We will call it project_slug.pdf | ||||||
# This is the old behavior. | ||||||
if len(pdf_files) == 1: | ||||||
os.rename( | ||||||
os.path.join(self.absolute_host_output_dir, pdf_files[0]), | ||||||
os.path.join( | ||||||
self.absolute_host_output_dir, f"{self.project.slug}.pdf" | ||||||
), | ||||||
) | ||||||
Comment on lines
+649
to
+657
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure to understand why we would like to keep "the old behavior here". I think we will always want "the new behavior" for projects using this feature. However, I'm not sure to understand what would be the filename of the resulting PDF in the new behavior --but it should be the name the user has defined in the |
||||||
else: | ||||||
self.pdf_file_name = f"{self.project.slug}.pdf" | ||||||
|
||||||
return cmd_ret.successful | ||||||
|
||||||
# Removed by Feature.ENABLE_MULTIPLE_PDFS | ||||||
def _post_build(self): | ||||||
"""Internal post build to cleanup PDF output directory and leave only one .pdf file.""" | ||||||
|
||||||
|
@@ -649,3 +707,61 @@ def _post_build(self): | |||||
self.run( | ||||||
"mv", temp_pdf_file, target_file, cwd=self.project_path, record=False | ||||||
) | ||||||
|
||||||
# Introduced by Feature.ENABLE_MULTIPLE_PDFS | ||||||
def _post_build_multiple(self): | ||||||
"""Internal post build to cleanup PDF output directory and leave only .pdf files.""" | ||||||
|
||||||
pdf_files = self._pdf_files_generated() | ||||||
if not pdf_files: | ||||||
raise PDFNotFound() | ||||||
|
||||||
if not os.path.exists(self.absolute_host_tmp_root): | ||||||
os.makedirs(self.absolute_host_tmp_root) | ||||||
|
||||||
# We cannot use '*' in commands sent to the host machine, the asterisk gets escaped. | ||||||
# So we opt for iterating from outside the container | ||||||
pdf_file_names = [] | ||||||
for fname in pdf_files: | ||||||
shutil.move( | ||||||
fname, | ||||||
os.path.join(self.absolute_host_tmp_root, os.path.basename(fname)), | ||||||
) | ||||||
pdf_file_names.append(os.path.basename(fname)) | ||||||
Comment on lines
+722
to
+730
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could probably use |
||||||
|
||||||
# Delete everything from the output dir | ||||||
self.run( | ||||||
"rm", | ||||||
"-r", | ||||||
self.absolute_host_output_dir, | ||||||
cwd=self.project_path, | ||||||
record=False, | ||||||
) | ||||||
|
||||||
# Recreate the output dir | ||||||
self.run( | ||||||
"mkdir", | ||||||
"-p", | ||||||
self.absolute_host_output_dir, | ||||||
cwd=self.project_path, | ||||||
record=False, | ||||||
) | ||||||
|
||||||
# Move the PDFs back | ||||||
# Note: We cannot use '*' in commands sent to the host machine, the asterisk gets escaped. | ||||||
for basename in pdf_file_names: | ||||||
self.run( | ||||||
"mv", | ||||||
os.path.join(self.absolute_host_tmp_root, basename), | ||||||
self.absolute_host_output_dir, | ||||||
cwd=self.project_path, | ||||||
record=False, | ||||||
) | ||||||
|
||||||
def _pdf_files_generated(self): | ||||||
""" | ||||||
Return a list of pdf files generated by the command. | ||||||
|
||||||
This method is mainly here to be test and mock friendly. | ||||||
""" | ||||||
return glob(os.path.join(self.absolute_host_output_dir, "*.pdf")) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,15 @@ | |
MEDIA_TYPE_JSON, | ||
) | ||
|
||
# Map media types to their know extensions. | ||
# This is used for validating and uploading artifacts. | ||
MEDIA_TYPES_EXTENSIONS = { | ||
MEDIA_TYPE_PDF: ("pdf",), | ||
MEDIA_TYPE_EPUB: ("epub",), | ||
MEDIA_TYPE_HTMLZIP: ("zip",), | ||
MEDIA_TYPE_JSON: ("json",), | ||
} | ||
Comment on lines
+50
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we using a tuple in the value here because we want to have multiple extensions per each media type? |
||
|
||
BUILD_COMMANDS_OUTPUT_PATH = "_readthedocs/" | ||
BUILD_COMMANDS_OUTPUT_PATH_HTML = os.path.join(BUILD_COMMANDS_OUTPUT_PATH, "html") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
from django.db import models | ||
|
||
from .querysets import ImportedFileQuerySet | ||
|
||
|
||
class HTMLFileManager(models.Manager): | ||
|
||
def get_queryset(self): | ||
return super().get_queryset().filter(name__endswith='.html') | ||
return ImportedFileQuerySet(self.model, using=self._db).html_files() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why don't we pass
filter_extensions
here directly instead of a dictionary without knowing exactly what it has inside?