Skip to content

Commit 2c51a60

Browse files
committed
Merge branch 'tickets/DM-51326'
2 parents f528f30 + a3df4cd commit 2c51a60

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

python/activator/caching.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@ def __len__(self):
145145
total += len(cache)
146146
return total
147147

148+
def __str__(self):
149+
summary = ", ".join(f"{type_name}: {len(cache)}/{cache.max_size}"
150+
for type_name, cache in sorted(self._caches.items()))
151+
return "{%s}" % summary
152+
148153
def _merge_into_cache(self, inputs: Mapping[str, Set[daf_butler.DatasetRef]]) \
149154
-> [Set[daf_butler.DatasetRef], Set[daf_butler.DatasetRef], Mapping[str, EvictingSet]]:
150155
"""Compute a bulk update of caches for multiple dataset types.

python/activator/middleware_interface.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,16 +1767,19 @@ def clean_local_repo(self, exposure_ids: set[int]) -> None:
17671767
instrument=self.visit.instrument,
17681768
detector=self.visit.detector,
17691769
)
1770+
_log_trace.debug("Removing %d raws for exposures %s.", len(raws), exposure_ids)
17701771
self.butler.pruneDatasets(raws, disassociate=True, unstore=True, purge=True)
17711772
# Outputs are all in their own runs, so just drop them.
17721773
preload_run = runs.get_preload_run(self.instrument, self._deployment, self._day_obs)
17731774
_remove_run_completely(self.butler, preload_run)
17741775
for pipeline_file in self._get_combined_pipeline_files():
17751776
output_run = runs.get_output_run(self.instrument, self._deployment, pipeline_file,
17761777
self._day_obs)
1778+
_log_trace.debug("Removing run %s.", output_run)
17771779
_remove_run_completely(self.butler, output_run)
17781780

17791781
# Clean out calibs, templates, and other preloaded datasets
1782+
_log_trace.debug("Cache contents: %s", self.cache)
17801783
excess_datasets = set()
17811784
for dataset_type in self.butler.registry.queryDatasetTypes(...):
17821785
excess_datasets |= set(self.butler.query_datasets(
@@ -1834,15 +1837,15 @@ def _filter_datasets(src_repo: Butler,
18341837
-------
18351838
datasets : iterable [`lsst.daf.butler.DatasetRef`]
18361839
The datasets that exist in ``src_repo`` but not ``dest_repo``.
1837-
datasetRefs are guaranteed to be fully expanded if any only if
1840+
datasetRefs are guaranteed to be fully expanded if and only if
18381841
``query`` guarantees it.
18391842
18401843
Raises
18411844
------
18421845
_MissingDatasetError
18431846
Raised if the query on ``src_repo`` failed to find any datasets.
18441847
"""
1845-
known_datasets = query(dest_repo, "known datasets")
1848+
known_datasets = set(query(dest_repo, "known datasets"))
18461849

18471850
# Let exceptions from src_repo query raise: if it fails, that invalidates
18481851
# this operation.
@@ -1851,7 +1854,10 @@ def _filter_datasets(src_repo: Butler,
18511854
raise _MissingDatasetError("Source repo query found no matches.")
18521855
if all_callback:
18531856
all_callback(src_datasets)
1854-
return itertools.filterfalse(lambda ref: ref in known_datasets, src_datasets)
1857+
missing = src_datasets - known_datasets
1858+
_log_trace.debug("Found %d matching datasets. %d present locally, %d to download.",
1859+
len(src_datasets), len(src_datasets & known_datasets), len(missing))
1860+
return missing
18551861

18561862

18571863
def _generic_query(dataset_types: collections.abc.Iterable[str | lsst.daf.butler.DatasetType],

0 commit comments

Comments
 (0)