@@ -1767,16 +1767,19 @@ def clean_local_repo(self, exposure_ids: set[int]) -> None:
1767
1767
instrument = self .visit .instrument ,
1768
1768
detector = self .visit .detector ,
1769
1769
)
1770
+ _log_trace .debug ("Removing %d raws for exposures %s." , len (raws ), exposure_ids )
1770
1771
self .butler .pruneDatasets (raws , disassociate = True , unstore = True , purge = True )
1771
1772
# Outputs are all in their own runs, so just drop them.
1772
1773
preload_run = runs .get_preload_run (self .instrument , self ._deployment , self ._day_obs )
1773
1774
_remove_run_completely (self .butler , preload_run )
1774
1775
for pipeline_file in self ._get_combined_pipeline_files ():
1775
1776
output_run = runs .get_output_run (self .instrument , self ._deployment , pipeline_file ,
1776
1777
self ._day_obs )
1778
+ _log_trace .debug ("Removing run %s." , output_run )
1777
1779
_remove_run_completely (self .butler , output_run )
1778
1780
1779
1781
# Clean out calibs, templates, and other preloaded datasets
1782
+ _log_trace .debug ("Cache contents: %s" , self .cache )
1780
1783
excess_datasets = set ()
1781
1784
for dataset_type in self .butler .registry .queryDatasetTypes (...):
1782
1785
excess_datasets |= set (self .butler .query_datasets (
@@ -1834,15 +1837,15 @@ def _filter_datasets(src_repo: Butler,
1834
1837
-------
1835
1838
datasets : iterable [`lsst.daf.butler.DatasetRef`]
1836
1839
The datasets that exist in ``src_repo`` but not ``dest_repo``.
1837
- datasetRefs are guaranteed to be fully expanded if any only if
1840
+ datasetRefs are guaranteed to be fully expanded if and only if
1838
1841
``query`` guarantees it.
1839
1842
1840
1843
Raises
1841
1844
------
1842
1845
_MissingDatasetError
1843
1846
Raised if the query on ``src_repo`` failed to find any datasets.
1844
1847
"""
1845
- known_datasets = query (dest_repo , "known datasets" )
1848
+ known_datasets = set ( query (dest_repo , "known datasets" ) )
1846
1849
1847
1850
# Let exceptions from src_repo query raise: if it fails, that invalidates
1848
1851
# this operation.
@@ -1851,7 +1854,10 @@ def _filter_datasets(src_repo: Butler,
1851
1854
raise _MissingDatasetError ("Source repo query found no matches." )
1852
1855
if all_callback :
1853
1856
all_callback (src_datasets )
1854
- return itertools .filterfalse (lambda ref : ref in known_datasets , src_datasets )
1857
+ missing = src_datasets - known_datasets
1858
+ _log_trace .debug ("Found %d matching datasets. %d present locally, %d to download." ,
1859
+ len (src_datasets ), len (src_datasets & known_datasets ), len (missing ))
1860
+ return missing
1855
1861
1856
1862
1857
1863
def _generic_query (dataset_types : collections .abc .Iterable [str | lsst .daf .butler .DatasetType ],
0 commit comments