1818
1919from commoncode import fileutils
2020from commoncode import ignore
21+ from commoncode import hash
2122
2223import extractcode # NOQA
2324import extractcode .archive
@@ -86,6 +87,7 @@ def extract(
8687 recurse = False ,
8788 replace_originals = False ,
8889 ignore_pattern = (),
90+ known_archive_hashes = set ()
8991):
9092 """
9193 Walk and extract any archives found at ``location`` (either a file or
@@ -121,6 +123,7 @@ def extract(
121123 kinds = kinds ,
122124 recurse = recurse ,
123125 ignore_pattern = ignore_pattern ,
126+ known_archive_hashes = known_archive_hashes
124127 )
125128
126129 processed_events = []
@@ -151,6 +154,7 @@ def extract_files(
151154 kinds = extractcode .default_kinds ,
152155 recurse = False ,
153156 ignore_pattern = (),
157+ known_archive_hashes = set ()
154158):
155159 """
156160 Extract the files found at `location`.
@@ -190,7 +194,7 @@ def extract_files(
190194 if not recurse and extractcode .is_extraction_path (loc ):
191195 if TRACE :
192196 logger .debug (
193- 'extract:walk not recurse: skipped file: %(loc)r' % locals ())
197+ 'extract:walk: not recurse: skipped file: %(loc)r' % locals ())
194198 continue
195199
196200 if not extractcode .archive .should_extract (
@@ -203,6 +207,14 @@ def extract_files(
203207 'extract:walk: skipped file: not should_extract: %(loc)r' % locals ())
204208 continue
205209
210+ file_hash = hash .sha256 (loc )
211+
212+ if known_archive_hashes and file_hash in known_archive_hashes :
213+ if TRACE :
214+ logger .debug (
215+ 'extract:walk: skipped file: decompression bomb detected: %(loc)r' % locals ())
216+ continue
217+
206218 target = join (abspath (top ), extractcode .get_extraction_path (loc ))
207219 if TRACE :
208220 logger .debug ('extract:target: %(target)r' % locals ())
@@ -220,11 +232,16 @@ def extract_files(
220232 if recurse :
221233 if TRACE :
222234 logger .debug ('extract:walk: recursing on target: %(target)r' % locals ())
235+
236+ kah = set (known_archive_hashes )
237+ kah .add (file_hash )
238+
223239 for xevent in extract (
224240 location = target ,
225241 kinds = kinds ,
226242 recurse = recurse ,
227243 ignore_pattern = ignore_pattern ,
244+ known_archive_hashes = kah
228245 ):
229246 if TRACE :
230247 logger .debug ('extract:walk:recurse:extraction event: %(xevent)r' % locals ())
0 commit comments