Skip to content

Commit f1adb27

Browse files
authored
Walk filter (#265)
* additional filtering * added tests * fix docstring * extra test * some micro-optimizations I may regret later * further optimizations * another micro-optimization * optimizations
1 parent e422bc0 commit f1adb27

File tree

5 files changed

+119
-25
lines changed

5 files changed

+119
-25
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](http://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](http://semver.org/).
77

8+
## [2.4.0] - 2019-02-15
9+
10+
### Added
11+
12+
- Added `exclude` and `filter_dirs` arguments to walk
13+
- Micro-optimizations to walk
14+
815
## [2.3.1] - 2019-02-10
916

1017
### Fixed

fs/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Version, used in module and setup.py.
22
"""
3-
__version__ = "2.3.1"
3+
__version__ = "2.4.0"

fs/copy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def copy_fs(
3535
walker (~fs.walk.Walker, optional): A walker object that will be
3636
used to scan for files in ``src_fs``. Set this if you only want
3737
to consider a sub-set of the resources in ``src_fs``.
38-
on_copy (callable):A function callback called after a single file copy
38+
on_copy (callable): A function callback called after a single file copy
3939
is executed. Expected signature is ``(src_fs, src_path, dst_fs,
4040
dst_path)``.
4141
workers (int): Use `worker` threads to copy data, or ``0`` (default) for

fs/walk.py

Lines changed: 81 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from ._repr import make_repr
1818
from .errors import FSError
1919
from .path import abspath
20-
from .path import join
20+
from .path import combine
2121
from .path import normpath
2222

2323
if False: # typing.TYPE_CHECKING
@@ -68,6 +68,12 @@ class Walker(object):
6868
a list of filename patterns, e.g. ``['*.py']``. Files will
6969
only be returned if the final component matches one of the
7070
patterns.
71+
exclude (list, optional): If supplied, this parameter should be
72+
a list of filename patterns, e.g. ``['~*']``. Files matching
73+
any of these patterns will be removed from the walk.
74+
filter_dirs (list, optional): A list of patterns that will be used
75+
to match directories paths. The walk will only open directories
76+
that match at least one of these patterns.
7177
exclude_dirs (list, optional): A list of patterns that will be
7278
used to filter out directories from the walk. e.g.
7379
``['*.svn', '*.git']``.
@@ -81,6 +87,8 @@ def __init__(
8187
on_error=None, # type: Optional[OnError]
8288
search="breadth", # type: Text
8389
filter=None, # type: Optional[List[Text]]
90+
exclude=None, # type: Optional[List[Text]]
91+
filter_dirs=None, # type: Optional[List[Text]]
8492
exclude_dirs=None, # type: Optional[List[Text]]
8593
max_depth=None, # type: Optional[int]
8694
):
@@ -99,6 +107,8 @@ def __init__(
99107
self.on_error = on_error
100108
self.search = search
101109
self.filter = filter
110+
self.exclude = exclude
111+
self.filter_dirs = filter_dirs
102112
self.exclude_dirs = exclude_dirs
103113
self.max_depth = max_depth
104114
super(Walker, self).__init__()
@@ -169,6 +179,8 @@ def __repr__(self):
169179
on_error=(self.on_error, None),
170180
search=(self.search, "breadth"),
171181
filter=(self.filter, None),
182+
exclude=(self.exclude, None),
183+
filter_dirs=(self.filter_dirs, None),
172184
exclude_dirs=(self.exclude_dirs, None),
173185
max_depth=(self.max_depth, None),
174186
)
@@ -192,6 +204,8 @@ def _check_open_dir(self, fs, path, info):
192204
"""
193205
if self.exclude_dirs is not None and fs.match(self.exclude_dirs, info.name):
194206
return False
207+
if self.filter_dirs is not None and not fs.match(self.filter_dirs, info.name):
208+
return False
195209
return self.check_open_dir(fs, path, info)
196210

197211
def check_open_dir(self, fs, path, info):
@@ -251,6 +265,9 @@ def check_file(self, fs, info):
251265
bool: `True` if the file should be included.
252266
253267
"""
268+
269+
if self.exclude is not None and fs.match(self.exclude, info.name):
270+
return False
254271
return fs.match(self.filter, info.name)
255272

256273
def _scan(
@@ -341,9 +358,10 @@ def files(self, fs, path="/"):
341358
recursively within the given directory.
342359
343360
"""
361+
_combine = combine
344362
for _path, info in self._iter_walk(fs, path=path):
345363
if info is not None and not info.is_dir:
346-
yield join(_path, info.name)
364+
yield _combine(_path, info.name)
347365

348366
def dirs(self, fs, path="/"):
349367
# type: (FS, Text) -> Iterator[Text]
@@ -358,9 +376,10 @@ def dirs(self, fs, path="/"):
358376
recursively within the given directory.
359377
360378
"""
379+
_combine = combine
361380
for _path, info in self._iter_walk(fs, path=path):
362381
if info is not None and info.is_dir:
363-
yield join(_path, info.name)
382+
yield _combine(_path, info.name)
364383

365384
def info(
366385
self,
@@ -381,10 +400,11 @@ def info(
381400
(str, Info): a tuple of ``(<absolute path>, <resource info>)``.
382401
383402
"""
403+
_combine = combine
384404
_walk = self._iter_walk(fs, path=path, namespaces=namespaces)
385405
for _path, info in _walk:
386406
if info is not None:
387-
yield join(_path, info.name), info
407+
yield _combine(_path, info.name), info
388408

389409
def _walk_breadth(
390410
self,
@@ -398,19 +418,27 @@ def _walk_breadth(
398418
queue = deque([path])
399419
push = queue.appendleft
400420
pop = queue.pop
401-
depth = self._calculate_depth(path)
421+
422+
_combine = combine
423+
_scan = self._scan
424+
_calculate_depth = self._calculate_depth
425+
_check_open_dir = self._check_open_dir
426+
_check_scan_dir = self._check_scan_dir
427+
_check_file = self.check_file
428+
429+
depth = _calculate_depth(path)
402430

403431
while queue:
404432
dir_path = pop()
405-
for info in self._scan(fs, dir_path, namespaces=namespaces):
433+
for info in _scan(fs, dir_path, namespaces=namespaces):
406434
if info.is_dir:
407-
_depth = self._calculate_depth(dir_path) - depth + 1
408-
if self._check_open_dir(fs, dir_path, info):
435+
_depth = _calculate_depth(dir_path) - depth + 1
436+
if _check_open_dir(fs, dir_path, info):
409437
yield dir_path, info # Opened a directory
410-
if self._check_scan_dir(fs, dir_path, info, _depth):
411-
push(join(dir_path, info.name))
438+
if _check_scan_dir(fs, dir_path, info, _depth):
439+
push(_combine(dir_path, info.name))
412440
else:
413-
if self.check_file(fs, info):
441+
if _check_file(fs, info):
414442
yield dir_path, info # Found a file
415443
yield dir_path, None # End of directory
416444

@@ -425,15 +453,18 @@ def _walk_depth(
425453
"""
426454
# No recursion!
427455

428-
def scan(path):
429-
# type: (Text) -> Iterator[Info]
430-
"""Perform scan."""
431-
return self._scan(fs, path, namespaces=namespaces)
456+
_combine = combine
457+
_scan = self._scan
458+
_calculate_depth = self._calculate_depth
459+
_check_open_dir = self._check_open_dir
460+
_check_scan_dir = self._check_scan_dir
461+
_check_file = self.check_file
462+
depth = _calculate_depth(path)
432463

433464
stack = [
434-
(path, scan(path), None)
465+
(path, _scan(fs, path, namespaces=namespaces), None)
435466
] # type: List[Tuple[Text, Iterator[Info], Optional[Tuple[Text, Info]]]]
436-
depth = self._calculate_depth(path)
467+
437468
push = stack.append
438469

439470
while stack:
@@ -445,15 +476,21 @@ def scan(path):
445476
yield dir_path, None
446477
del stack[-1]
447478
elif info.is_dir:
448-
_depth = self._calculate_depth(dir_path) - depth + 1
449-
if self._check_open_dir(fs, dir_path, info):
450-
if self._check_scan_dir(fs, dir_path, info, _depth):
451-
_path = join(dir_path, info.name)
452-
push((_path, scan(_path), (dir_path, info)))
479+
_depth = _calculate_depth(dir_path) - depth + 1
480+
if _check_open_dir(fs, dir_path, info):
481+
if _check_scan_dir(fs, dir_path, info, _depth):
482+
_path = _combine(dir_path, info.name)
483+
push(
484+
(
485+
_path,
486+
_scan(fs, _path, namespaces=namespaces),
487+
(dir_path, info),
488+
)
489+
)
453490
else:
454491
yield dir_path, info
455492
else:
456-
if self.check_file(fs, info):
493+
if _check_file(fs, info):
457494
yield dir_path, info
458495

459496

@@ -525,6 +562,12 @@ def walk(
525562
of file name patterns, e.g. ``['*.py']``. Files will only be
526563
returned if the final component matches one of the
527564
patterns.
565+
exclude (list, optional): If supplied, this parameter should be
566+
a list of filename patterns, e.g. ``['~*', '.*']``. Files matching
567+
any of these patterns will be removed from the walk.
568+
filter_dirs (list, optional): A list of patterns that will be used
569+
to match directories paths. The walk will only open directories
570+
that match at least one of these patterns.
528571
exclude_dirs (list): A list of patterns that will be used
529572
to filter out directories from the walk, e.g. ``['*.svn',
530573
'*.git']``.
@@ -574,6 +617,12 @@ def files(self, path="/", **kwargs):
574617
of file name patterns, e.g. ``['*.py']``. Files will only be
575618
returned if the final component matches one of the
576619
patterns.
620+
exclude (list, optional): If supplied, this parameter should be
621+
a list of filename patterns, e.g. ``['~*', '.*']``. Files matching
622+
any of these patterns will be removed from the walk.
623+
filter_dirs (list, optional): A list of patterns that will be used
624+
to match directories paths. The walk will only open directories
625+
that match at least one of these patterns.
577626
exclude_dirs (list): A list of patterns that will be used
578627
to filter out directories from the walk, e.g. ``['*.svn',
579628
'*.git']``.
@@ -606,6 +655,9 @@ def dirs(self, path="/", **kwargs):
606655
`False` to re-raise it.
607656
search (str): If ``'breadth'`` then the directory will be
608657
walked *top down*. Set to ``'depth'`` to walk *bottom up*.
658+
filter_dirs (list, optional): A list of patterns that will be used
659+
to match directories paths. The walk will only open directories
660+
that match at least one of these patterns.
609661
exclude_dirs (list): A list of patterns that will be used
610662
to filter out directories from the walk, e.g. ``['*.svn',
611663
'*.git']``.
@@ -650,6 +702,12 @@ def info(
650702
of file name patterns, e.g. ``['*.py']``. Files will only be
651703
returned if the final component matches one of the
652704
patterns.
705+
exclude (list, optional): If supplied, this parameter should be
706+
a list of filename patterns, e.g. ``['~*', '.*']``. Files matching
707+
any of these patterns will be removed from the walk.
708+
filter_dirs (list, optional): A list of patterns that will be used
709+
to match directories paths. The walk will only open directories
710+
that match at least one of these patterns.
653711
exclude_dirs (list): A list of patterns that will be used
654712
to filter out directories from the walk, e.g. ``['*.svn',
655713
'*.git']``.

tests/test_walk.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,22 @@ def test_walk(self):
6363
]
6464
self.assertEqual(_walk, expected)
6565

66+
def test_walk_filter_dirs(self):
67+
_walk = []
68+
for step in self.fs.walk(filter_dirs=["foo*"]):
69+
self.assertIsInstance(step, walk.Step)
70+
path, dirs, files = step
71+
_walk.append(
72+
(path, [info.name for info in dirs], [info.name for info in files])
73+
)
74+
expected = [
75+
("/", ["foo1", "foo2", "foo3"], []),
76+
("/foo1", [], ["top1.txt", "top2.txt"]),
77+
("/foo2", [], ["top3.bin"]),
78+
("/foo3", [], []),
79+
]
80+
self.assertEqual(_walk, expected)
81+
6682
def test_walk_depth(self):
6783
_walk = []
6884
for step in self.fs.walk(search="depth"):
@@ -193,6 +209,19 @@ def test_walk_files_filter(self):
193209

194210
self.assertEqual(files, [])
195211

212+
def test_walk_files_exclude(self):
213+
# Test exclude argument works
214+
files = list(self.fs.walk.files(exclude=["*.txt"]))
215+
self.assertEqual(files, ["/foo2/top3.bin"])
216+
217+
# Test exclude doesn't break filter
218+
files = list(self.fs.walk.files(filter=["*.bin"], exclude=["*.txt"]))
219+
self.assertEqual(files, ["/foo2/top3.bin"])
220+
221+
# Test excluding everything
222+
files = list(self.fs.walk.files(exclude=["*"]))
223+
self.assertEqual(files, [])
224+
196225
def test_walk_info(self):
197226
walk = []
198227
for path, info in self.fs.walk.info():

0 commit comments

Comments
 (0)