readthedocs · benjaoming · Jun 14, 2023 · Jun 20, 2023 · Jun 20, 2023 · Jun 21, 2023
@@ -156,6 +156,7 @@
 
 # All artifact types supported by Read the Docs.
 # They match the output directory (`_readthedocs/<artifact type>`)
+# TODO: Remove this and use constants.MEDIA_TYPES instead
 ARTIFACT_TYPES = (
     "html",
     "json",
@@ -177,3 +178,11 @@
     "epub",
     "pdf",
 )
+
+# Part of Feature: ENABLE_MULTIPLE_PDFS
+# Should replace ARTIFACT_TYPES_WITHOUT_MULTIPLE_FILES_SUPPORT
+# Currently hidden behind a feature flag
+ARTIFACT_TYPES_WITHOUT_MULTIPLE_FILES_SUPPORT_NO_PDF = (
+    "htmlzip",
+    "epub",
+)
@@ -132,13 +132,13 @@ def _check_suspicious_path(self, path):
     def _rclone(self):
         raise NotImplementedError
 
-    def rclone_sync_directory(self, source, destination):
+    def rclone_sync_directory(self, source, destination, **sync_kwargs):
         """Sync a directory recursively to storage using rclone sync."""
         if destination in ("", "/"):
             raise SuspiciousFileOperation("Syncing all storage cannot be right")
 
         self._check_suspicious_path(source)
-        return self._rclone.sync(source, destination)
+        return self._rclone.sync(source, destination, **sync_kwargs)
 
     def join(self, directory, filepath):
         return safe_join(directory, filepath)

@@ -6,6 +6,7 @@
 
 import itertools
 import os
+import shutil
 from glob import glob
 from pathlib import Path
 
@@ -86,6 +87,14 @@ def __init__(self, *args, **kwargs):
             ),
             self.relative_output_dir,
         )
+
+        # Isolate temporary files in the _readthedocs/ folder
+        # Used for new feature ENABLE_MULTIPLE_PDFS
+        self.absolute_host_tmp_root = os.path.join(
+            self.project.checkout_path(self.version.slug),
+            "_readthedocs/tmp",
+        )
+
         self.absolute_container_output_dir = os.path.join(
             "$READTHEDOCS_OUTPUT", self.relative_output_dir
         )
@@ -389,6 +398,7 @@ class LocalMediaBuilder(BaseSphinx):
 
     def _post_build(self):
         """Internal post build to create the ZIP file from the HTML output."""
+
         target_file = os.path.join(
             self.absolute_container_output_dir,
             # TODO: shouldn't this name include the name of the version as well?
@@ -435,41 +445,54 @@ class EpubBuilder(BaseSphinx):
     relative_output_dir = "epub"
 
     def _post_build(self):
-        """Internal post build to cleanup EPUB output directory and leave only one .epub file."""
+        """Internal post build to clean up EPUB output directory and leave only one .epub file."""
         temp_epub_file = f"/tmp/{self.project.slug}-{self.version.slug}.epub"
         target_file = os.path.join(
             self.absolute_container_output_dir,
             f"{self.project.slug}.epub",
         )
 
-        epub_sphinx_filepaths = glob(
-            os.path.join(self.absolute_host_output_dir, "*.epub")
-        )
+        epub_sphinx_filepaths = self._get_epub_files_generated()
         if epub_sphinx_filepaths:
             # NOTE: we currently support only one .epub per version
             epub_filepath = epub_sphinx_filepaths[0]
 
+            # Move out the epub file
             self.run(
                 "mv", epub_filepath, temp_epub_file, cwd=self.project_path, record=False
             )
+
+            # Delete everything recursively
             self.run(
                 "rm",
                 "--recursive",
                 self.absolute_container_output_dir,
                 cwd=self.project_path,
                 record=False,
             )
+
+            # Create everything again
             self.run(
                 "mkdir",
                 "--parents",
                 self.absolute_container_output_dir,
                 cwd=self.project_path,
                 record=False,
             )
+
+            # Restore the epub file
             self.run(
                 "mv", temp_epub_file, target_file, cwd=self.project_path, record=False
             )
 
+    def _get_epub_files_generated(self):
+        """
+        Returns a list of *.epub generated by the sphinx build command.
+
+        This is split out in a separate method to be test-friendly.
+        """
+        return glob(os.path.join(self.absolute_host_output_dir, "*.epub"))
+
 
 class LatexBuildCommand(BuildCommand):
 
@@ -501,9 +524,12 @@ class PdfBuilder(BaseSphinx):
 
     relative_output_dir = "pdf"
     sphinx_builder = "latex"
+
+    # TODO: Remove this when dissolving Feature.ENABLE_MULTIPLE_PDFS
     pdf_file_name = None
 
     def build(self):
+        """Runs Sphinx to convert to LaTeX, uses latexmk to build PDFs."""
         self.run(
             *self.get_sphinx_cmd(),
             "-T",
@@ -516,7 +542,7 @@ def build(self):
             f"language={self.project.language}",
             # Sphinx's source directory (SOURCEDIR).
             # We are executing this command at the location of the `conf.py` file (CWD).
-            # TODO: ideally we should execute it from where the repository was clonned,
+            # TODO: ideally we should execute it from where the repository was cloned,
             # but that could lead unexpected behavior to some users:
             # https://github.com/readthedocs/readthedocs.org/pull/9888#issuecomment-1384649346
             ".",
@@ -526,16 +552,28 @@ def build(self):
             bin_path=self.python_env.venv_bin(),
         )
 
-        tex_files = glob(os.path.join(self.absolute_host_output_dir, "*.tex"))
-        if not tex_files:
-            raise BuildUserError("No TeX files were found.")
+        # Check that we generated .tex files.
+        self._check_for_files("tex")
 
         # Run LaTeX -> PDF conversions
         success = self._build_latexmk(self.project_path)
 
-        self._post_build()
+        if self.project.has_feature(Feature.ENABLE_MULTIPLE_PDFS):
+            self._post_build_multiple()
-            self._post_build_multiple()
+            self._post_build_multiple_pdf()
-            self._post_build_multiple()
+            self._post_build_multiple_pdf()
+        else:
+            self._post_build()
         return success
 
+    def _check_for_files(self, extension):
+        """
+        Check that a file pattern exists.
+
+        This method mostly exists so we have a pattern that is test-friend (can be mocked).
+        """
+        tex_files = glob(os.path.join(self.absolute_host_output_dir, f"*.{extension}"))
+        if not tex_files:
+            raise BuildUserError("No *.{extension} files were found.")
+
     def _build_latexmk(self, cwd):
         # These steps are copied from the Makefile generated by Sphinx >= 1.6
         # https://github.com/sphinx-doc/sphinx/blob/master/sphinx/texinputs/Makefile_t
@@ -585,24 +623,44 @@ def _build_latexmk(self, cwd):
             # When ``-f`` is used, latexmk will continue building if it
             # encounters errors. We still receive a failure exit code in this
             # case, but the correct steps should run.
-            '-f',
-            '-dvi-',
-            '-ps-',
-            f'-jobname={self.project.slug}',
-            '-interaction=nonstopmode',
+            "-f",
+            "-dvi-",
+            "-ps-",
+            "-interaction=nonstopmode",
         ]
 
+        if self.project.has_feature(Feature.ENABLE_MULTIPLE_PDFS):
+            cmd.append(f"-jobname={self.project.slug}_%A")
+        else:
+            cmd.append(
+                f"-jobname={self.project.slug}",
+            )
+
         cmd_ret = self.build_env.run_command_class(
             cls=latex_class,
             cmd=cmd,
             warn_only=True,
             cwd=self.absolute_host_output_dir,
         )
 
-        self.pdf_file_name = f"{self.project.slug}.pdf"
+        if self.project.has_feature(Feature.ENABLE_MULTIPLE_PDFS):
+            pdf_files = glob(os.path.join(self.absolute_host_output_dir, "*.pdf"))
+
+            # There is only 1 PDF file. We will call it project_slug.pdf
+            # This is the old behavior.
+            if len(pdf_files) == 1:
+                os.rename(
+                    os.path.join(self.absolute_host_output_dir, pdf_files[0]),
+                    os.path.join(
+                        self.absolute_host_output_dir, f"{self.project.slug}.pdf"
+                    ),
+                )
+        else:
+            self.pdf_file_name = f"{self.project.slug}.pdf"
 
         return cmd_ret.successful
 
+    # Removed by Feature.ENABLE_MULTIPLE_PDFS
     def _post_build(self):
         """Internal post build to cleanup PDF output directory and leave only one .pdf file."""
 
@@ -649,3 +707,61 @@ def _post_build(self):
             self.run(
                 "mv", temp_pdf_file, target_file, cwd=self.project_path, record=False
             )
+
+    # Introduced by Feature.ENABLE_MULTIPLE_PDFS
+    def _post_build_multiple(self):
+        """Internal post build to cleanup PDF output directory and leave only .pdf files."""
+
+        pdf_files = self._pdf_files_generated()
+        if not pdf_files:
+            raise PDFNotFound()
+
+        if not os.path.exists(self.absolute_host_tmp_root):
+            os.makedirs(self.absolute_host_tmp_root)
+
+        # We cannot use '*' in commands sent to the host machine, the asterisk gets escaped.
+        # So we opt for iterating from outside the container
+        pdf_file_names = []
+        for fname in pdf_files:
+            shutil.move(
+                fname,
+                os.path.join(self.absolute_host_tmp_root, os.path.basename(fname)),
+            )
+            pdf_file_names.append(os.path.basename(fname))
+
+        # Delete everything from the output dir
+        self.run(
+            "rm",
+            "-r",
+            self.absolute_host_output_dir,
+            cwd=self.project_path,
+            record=False,
+        )
+
+        # Recreate the output dir
+        self.run(
+            "mkdir",
+            "-p",
+            self.absolute_host_output_dir,
+            cwd=self.project_path,
+            record=False,
+        )
+
+        # Move the PDFs back
+        # Note: We cannot use '*' in commands sent to the host machine, the asterisk gets escaped.
+        for basename in pdf_file_names:
+            self.run(
+                "mv",
+                os.path.join(self.absolute_host_tmp_root, basename),
+                self.absolute_host_output_dir,
+                cwd=self.project_path,
+                record=False,
+            )
+
+    def _pdf_files_generated(self):
+        """
+        Return a list of pdf files generated by the command.
+
+        This method is mainly here to be test and mock friendly.
+        """
+        return glob(os.path.join(self.absolute_host_output_dir, "*.pdf"))
@@ -47,6 +47,15 @@
     MEDIA_TYPE_JSON,
 )
 
+# Map media types to their know extensions.
+# This is used for validating and uploading artifacts.
+MEDIA_TYPES_EXTENSIONS = {
+    MEDIA_TYPE_PDF: ("pdf",),
+    MEDIA_TYPE_EPUB: ("epub",),
+    MEDIA_TYPE_HTMLZIP: ("zip",),
+    MEDIA_TYPE_JSON: ("json",),
+}
+
 BUILD_COMMANDS_OUTPUT_PATH = "_readthedocs/"
 BUILD_COMMANDS_OUTPUT_PATH_HTML = os.path.join(BUILD_COMMANDS_OUTPUT_PATH, "html")
 

@@ -1,7 +1,9 @@
 from django.db import models
 
+from .querysets import ImportedFileQuerySet
+
 
 class HTMLFileManager(models.Manager):
 
     def get_queryset(self):
-        return super().get_queryset().filter(name__endswith='.html')
+        return ImportedFileQuerySet(self.model, using=self._db).html_files()