SCons · grossag · Nov 6, 2020 · Jul 13, 2021 · Aug 17, 2021 · Aug 17, 2021
diff --git a/.appveyor/install.bat b/.appveyor/install.bat
@@ -3,7 +3,7 @@ for /F "tokens=*" %%g in ('C:\\%WINPYTHON%\\python.exe -c "import sys; print(sys
 REM use mingw 32 bit until #3291 is resolved
 set PATH=C:\\%WINPYTHON%;C:\\%WINPYTHON%\\Scripts;C:\\ProgramData\\chocolatey\\bin;C:\\MinGW\\bin;C:\\MinGW\\msys\\1.0\\bin;C:\\cygwin\\bin;C:\\msys64\\usr\\bin;C:\\msys64\\mingw64\\bin;%PATH%
 C:\\%WINPYTHON%\\python.exe -m pip install -U --progress-bar off pip setuptools wheel
-C:\\%WINPYTHON%\\python.exe -m pip install -U --progress-bar off coverage codecov
+C:\\%WINPYTHON%\\python.exe -m pip install -U --progress-bar off urllib3 coverage codecov
 set STATIC_DEPS=true & C:\\%WINPYTHON%\\python.exe -m pip install -U --progress-bar off lxml
 C:\\%WINPYTHON%\\python.exe -m pip install -U --progress-bar off -r requirements.txt
 REM install 3rd party tools to test with

diff --git a/CHANGES.txt b/CHANGES.txt
@@ -56,6 +56,20 @@ RELEASE 4.2.0 - Sat, 31 Jul 2021 18:12:46 -0700
     - As part of experimental ninja tool, allow SetOption() to set both disable_execute_ninja and
       disable_ninja.
 
+  From Adam Gross:
+    - Added support for remote caching. This feature allows for fetch and push of build outputs
+      to a Bazel remote cache server or any other similar server that supports /ac/ and /cas/
+      GET and PUT requests using SHA-256 file names. See https://github.com/buchgr/bazel-remote
+      for more details on the server. New parameters introduced:
+         --remote-cache-fetch-enabled: Enables fetch of build output from the server
+         --remote-cache-push-enabled: Enables push of build output to the server
+         --remote-cache-url: Required if fetch or push is enabled
+         --remote-cache-connections: Connection count (defaults to 100)
+    - Added support for a new parameter --use-scheduler-v2 that opts into a newer, more aggressive
+      parallel scanner. This scanner avoids waiting on jobs if the job queue is full and instead
+      scans for tasks. This scanner is expected to improve the performance of your build as long
+      as you don't have very large actions that cause poor scanning performance.
+
   From David H:
     - Fix Issue #3906 - `IMPLICIT_COMMAND_DEPENDENCIES` was not properly disabled when
       set to any string value (For example ['none','false','no','off'])

diff --git a/SCons/Job.py b/SCons/Job.py
@@ -21,13 +21,14 @@
 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-"""Serial and Parallel classes to execute build tasks.
+"""Serial, Parallel, and ParallelV2 classes to execute build tasks.
 
 The Jobs class provides a higher level interface to start,
 stop, and wait on jobs.
 """
 
 import SCons.compat
+import SCons.Node
 
 import os
 import signal
@@ -64,7 +65,8 @@ class Jobs:
     methods for starting, stopping, and waiting on all N jobs.
     """
 
-    def __init__(self, num, taskmaster):
+    def __init__(self, num, taskmaster, remote_cache=None,
+                 use_scheduler_v2=False):
         """
         Create 'num' jobs using the given taskmaster.
 
@@ -76,19 +78,29 @@ def __init__(self, num, taskmaster):
         allocated.  If more than one job is requested but the Parallel
         class can't do it, it gets reset to 1.  Wrapping interfaces that
         care should check the value of 'num_jobs' after initialization.
+
+        'remote_cache' can be set to a RemoteCache.RemoteCache object.
+
+        'use_scheduler_v2' can be set to True to opt into the newer and more
+        aggressive scheduler.
         """
 
         self.job = None
-        if num > 1:
-            stack_size = explicit_stack_size
-            if stack_size is None:
-                stack_size = default_stack_size
 
-            try:
+        stack_size = explicit_stack_size
+        if stack_size is None:
+            stack_size = default_stack_size
+
+        try:
+            if ((remote_cache and remote_cache.fetch_enabled) or
+                    use_scheduler_v2):
+                self.job = ParallelV2(taskmaster, num, stack_size, remote_cache)
+            elif num > 1:
                 self.job = Parallel(taskmaster, num, stack_size)
-                self.num_jobs = num
-            except NameError:
-                pass
+            self.num_jobs = num
+        except NameError:
+            pass
+
         if self.job is None:
             self.job = Serial(taskmaster)
             self.num_jobs = 1
@@ -359,7 +371,6 @@ def __init__(self, taskmaster, num, stack_size):
             self.taskmaster = taskmaster
             self.interrupted = InterruptState()
             self.tp = ThreadPool(num, stack_size, self.interrupted)
-
             self.maxjobs = num
 
         def start(self):
@@ -399,28 +410,199 @@ def start(self):
                 # Let any/all completed tasks finish up before we go
                 # back and put the next batch of tasks on the queue.
                 while True:
-                    task, ok = self.tp.get()
+                    self.process_result()
                     jobs = jobs - 1
 
-                    if ok:
-                        task.executed()
-                    else:
-                        if self.interrupted():
-                            try:
-                                raise SCons.Errors.BuildError(
-                                    task.targets[0], errstr=interrupt_msg)
-                            except:
-                                task.exception_set()
-
-                        # Let the failed() callback function arrange
-                        # for the build to stop if that's appropriate.
-                        task.failed()
+                    if self.tp.resultsQueue.empty():
+                        break
+
+            self.tp.cleanup()
+            self.taskmaster.cleanup()
+
+        def process_result(self):
+            task, ok = self.tp.get()
+
+            if ok:
+                task.executed()
+            else:
+                if self.interrupted():
+                    try:
+                        raise SCons.Errors.BuildError(
+                            task.targets[0], errstr=interrupt_msg)
+                    except:
+                        task.exception_set()
+
+                # Let the failed() callback function arrange
+                # for the build to stop if that's appropriate.
+                task.failed()
+
+            task.postprocess()
+
+    class ParallelV2(Parallel):
+        """
+        This class is an extension of the Parallel class that provides two main
+        improvements:
+
+        1. Minimizes time waiting for jobs by fetching tasks.
+        2. Supports remote caching.
+        """
+        __slots__ = ['remote_cache']
+
+        def __init__(self, taskmaster, num, stack_size, remote_cache):
+            super(ParallelV2, self).__init__(taskmaster, num, stack_size)
+
+            self.remote_cache = remote_cache
 
+        def get_next_task_to_execute(self, limit):
+            """
+            Finds the next task that is ready for execution. If limit is 0,
+            this function fetches until a task is found ready to execute.
+            Otherwise, this function will fetch up to "limit" number of tasks.
+
+            Returns tuple with:
+                1. Task to execute.
+                2. False if a call to next_task returned None, True otherwise.
+            """
+            count = 0
+            while limit == 0 or count < limit:
+                task = self.taskmaster.next_task()
+                if task is None:
+                    return None, False
+
+                try:
+                    # prepare task for execution
+                    task.prepare()
+                except:
+                    task.exception_set()
+                    task.failed()
                     task.postprocess()
+                else:
+                    if task.needs_execute():
+                        return task, True
+                    else:
+                        task.executed()
+                        task.postprocess()
 
-                    if self.tp.resultsQueue.empty():
+                count = count + 1
+
+            # We hit the limit of tasks to retrieve.
+            return None, True
+
+        def start(self):
+            fetch_response_queue = queue.Queue(0)
+            if self.remote_cache:
+                self.remote_cache.set_fetch_response_queue(
+                    fetch_response_queue)
+
+            jobs = 0
+            tasks_left = True
+            pending_fetches = 0
+            cache_hits = 0
+            cache_misses = 0
+            cache_skips = 0
+            cache_suspended = 0
+
+            while True:
+                fetch_limit = 0 if jobs == 0 and pending_fetches == 0 else 1
+                if tasks_left:
+                    task, tasks_left = \
+                        self.get_next_task_to_execute(fetch_limit)
+                else:
+                    task = None
+
+                if not task and not tasks_left and jobs == 0 and \
+                        pending_fetches == 0:
+                    # No tasks left, no jobs, no cache fetches.
+                    break
+
+                while jobs > 0:
+                    # Break if there are no results available and one of the
+                    # following is true:
+                    #   1. There are tasks left.
+                    #   2. There is at least one job slot open and at least one
+                    #      remote cache fetch pending.
+                    # Otherwise we want to wait for jobs because the most
+                    # important factor for build speed is keeping the job
+                    # queue full.
+                    if ((tasks_left or
+                            (jobs < self.maxjobs and pending_fetches > 0))
+                            and self.tp.resultsQueue.empty()):
+                        break
+
+                    self.process_result()
+                    jobs = jobs - 1
+
+                    # Tasks could have been unblocked, so we should check
+                    # again.
+                    tasks_left = True
+
+                while pending_fetches > 0:
+                    # Trimming the remote cache fetch queue is the least
+                    # important job, so we only block if there are no responses
+                    # available, no tasks left to fetch, and no active jobs.
+                    if ((tasks_left or jobs > 0) and
+                            fetch_response_queue.empty()):
                         break
 
+                    cache_task, cache_hit, target_infos = \
+                        fetch_response_queue.get()
+                    pending_fetches = pending_fetches - 1
+
+                    if cache_hit:
+                        cache_hits = cache_hits + 1
+                        cache_task.executed(target_infos=target_infos)
+                        cache_task.postprocess()
+
+                        # Tasks could have been unblocked, so we should check
+                        # again.
+                        tasks_left = True
+                    else:
+                        cache_misses = cache_misses + 1
+                        self.tp.put(cache_task)
+                        jobs = jobs + 1
+
+                if task:
+                    # Tasks should first go to the remote cache if enabled.
+                    if self.remote_cache:
+                        fetch_pending, task_cacheable = \
+                            self.remote_cache.fetch_task(task)
+                    else:
+                        fetch_pending = task_cacheable = False
+
+                    if fetch_pending:
+                        pending_fetches = pending_fetches + 1
+                    else:
+                        # Fetch is not pending because remote cache is not
+                        # being used or the task was not cacheable.
+                        #
+                        # Count the number of non-cacheable tasks but don't
+                        # count tasks with 1 target that is an alias, because
+                        # they are not actually run.
+                        if (len(task.targets) > 1 or
+                                not isinstance(task.targets[0],
+                                               SCons.Node.Alias.Alias)):
+                            if task_cacheable:
+                                cache_suspended = cache_suspended + 1
+                            else:
+                                cache_skips = cache_skips + 1
+                        self.tp.put(task)
+                        jobs = jobs + 1
+
+            # Instruct the remote caching layer to log information about
+            # the cache hit rate.
+            cache_count = cache_hits + cache_misses + cache_suspended
+            task_count = cache_count + cache_skips
+            if self.remote_cache and task_count > 0:
+                reset_count = self.remote_cache.reset_count
+                total_failures = self.remote_cache.total_failure_count
+                hit_pct = (cache_hits * 100.0 / cache_count if cache_count
+                           else 0.0)
+                cacheable_pct = cache_count * 100.0 / task_count
+                self.remote_cache.log_stats(
+                    hit_pct, cache_count, cache_hits, cache_misses,
+                    cache_suspended, cacheable_pct, cache_skips, task_count,
+                    total_failures, reset_count)
+
             self.tp.cleanup()
             self.taskmaster.cleanup()
 

diff --git a/SCons/Node/FS.py b/SCons/Node/FS.py
@@ -1118,6 +1118,9 @@ class LocalFS:
     really need this one?
     """
 
+    def access(self, path, mode):
+        return os.access(path, mode)
+
     def chmod(self, path, mode):
         return os.chmod(path, mode)
 
@@ -3006,20 +3009,10 @@ def push_to_cache(self):
         if self.exists():
             self.get_build_env().get_CacheDir().push(self)
 
-    def retrieve_from_cache(self):
-        """Try to retrieve the node's content from a cache
-
-        This method is called from multiple threads in a parallel build,
-        so only do thread safe stuff here. Do thread unsafe stuff in
-        built().
-
-        Returns true if the node was successfully retrieved.
+    def should_retrieve_from_cache(self):
+        """Returns whether this node should be retrieved from the cache
         """
-        if self.nocache:
-            return None
-        if not self.is_derived():
-            return None
-        return self.get_build_env().get_CacheDir().retrieve(self)
+        return not self.nocache and self.is_derived()
 
     def visited(self):
         if self.exists() and self.executor is not None:
@@ -3274,7 +3267,7 @@ def builder_set(self, builder):
         SCons.Node.Node.builder_set(self, builder)
         self.changed_since_last_build = 5
 
-    def built(self):
+    def built(self, csig=None, size=0):
         """Called just after this File node is successfully built.
 
          Just like for 'release_target_info' we try to release
@@ -3284,7 +3277,7 @@ def built(self):
          @see: release_target_info
         """
 
-        SCons.Node.Node.built(self)
+        SCons.Node.Node.built(self, csig, size)
 
         if (not SCons.Node.interactive and
             not hasattr(self.attributes, 'keep_targetinfo')):