Skip to content

Commit e05901d

Browse files
committed
chore(fs): add tests to cover recent PRs
1 parent 1a8cd72 commit e05901d

File tree

2 files changed

+149
-30
lines changed

2 files changed

+149
-30
lines changed

pydrive2/fs/spec.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,9 +427,9 @@ def info(self, path):
427427

428428
def ls(self, path, detail=False):
429429
bucket, base = self.split_path(path)
430+
assert bucket == self.root
430431

431432
dir_ids = self._path_to_item_ids(base)
432-
433433
if not dir_ids:
434434
raise FileNotFoundError(
435435
errno.ENOENT, os.strerror(errno.ENOENT), path
@@ -465,14 +465,14 @@ def ls(self, path, detail=False):
465465

466466
def find(self, path, detail=False, **kwargs):
467467
bucket, base = self.split_path(path)
468-
469-
seen_paths = set()
468+
assert bucket == self.root
470469

471470
# Make sure the base path is cached and dir_ids below has some
472471
# dirs revelant to this call
473472
self._path_to_item_ids(base)
474473

475474
dir_ids = [self._ids_cache["ids"].copy()]
475+
seen_paths = set()
476476
contents = []
477477
while dir_ids:
478478
query_ids = {

pydrive2/test/test_fs.py

Lines changed: 146 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from io import StringIO
12
import os
23
import posixpath
34
import secrets
@@ -26,16 +27,22 @@ def remote_dir(base_remote_dir):
2627

2728

2829
@pytest.fixture
29-
def fs(tmpdir, base_remote_dir):
30-
setup_credentials()
31-
auth = GoogleAuth(settings_file_path("default.yaml", tmpdir / ""))
32-
auth.ServiceAuth()
30+
def create_fs(tmpdir, base_remote_dir):
31+
def _create_fs(create=True):
32+
setup_credentials()
33+
auth = GoogleAuth(settings_file_path("default.yaml", tmpdir / ""))
34+
auth.ServiceAuth()
3335

34-
bucket, base = base_remote_dir.split("/", 1)
35-
fs = GDriveFileSystem(base_remote_dir, auth)
36-
fs._gdrive_create_dir("root", base)
36+
_, base = base_remote_dir.split("/", 1)
37+
fs = GDriveFileSystem(base_remote_dir, auth)
38+
if create:
39+
item = fs._gdrive_create_dir("root", base)
40+
else:
41+
item = None
3742

38-
return fs
43+
return fs, item
44+
45+
return _create_fs
3946

4047

4148
@pytest.mark.manual
@@ -66,7 +73,8 @@ def test_fs_service_json(base_remote_dir):
6673
)
6774

6875

69-
def test_info(fs, tmpdir, remote_dir):
76+
def test_info(create_fs, remote_dir):
77+
fs, _ = create_fs()
7078
fs.touch(remote_dir + "/info/a.txt")
7179
fs.touch(remote_dir + "/info/b.txt")
7280
details = fs.info(remote_dir + "/info/a.txt")
@@ -87,7 +95,8 @@ def test_info(fs, tmpdir, remote_dir):
8795
assert details["name"] == remote_dir + "/info/"
8896

8997

90-
def test_move(fs, remote_dir):
98+
def test_move(create_fs, remote_dir):
99+
fs, _ = create_fs()
91100
fs.touch(remote_dir + "/a.txt")
92101
initial_info = fs.info(remote_dir + "/a.txt")
93102

@@ -102,7 +111,8 @@ def test_move(fs, remote_dir):
102111
assert initial_info == secondary_info
103112

104113

105-
def test_rm(fs, remote_dir):
114+
def test_rm(create_fs, remote_dir):
115+
fs, _ = create_fs()
106116
fs.touch(remote_dir + "/a.txt")
107117
fs.rm(remote_dir + "/a.txt")
108118
assert not fs.exists(remote_dir + "/a.txt")
@@ -116,7 +126,8 @@ def test_rm(fs, remote_dir):
116126
assert not fs.exists(remote_dir + "/dir/c/a")
117127

118128

119-
def test_ls(fs: GDriveFileSystem, remote_dir):
129+
def test_ls(create_fs, remote_dir):
130+
fs, _ = create_fs()
120131
_, base = fs.split_path(remote_dir + "dir/")
121132
fs._path_to_item_ids(base, create=True)
122133
assert fs.ls(remote_dir + "dir/") == []
@@ -141,12 +152,91 @@ def by_name(details):
141152
assert dirs == expected
142153

143154

144-
def test_ls_non_existing_dir(fs, remote_dir):
155+
def test_basic_ops_caching(create_fs, remote_dir, mocker):
156+
# Internally we have to derefence names into IDs to call GDrive APIs
157+
# we are trying hard to cache those and make sure that operations like
158+
# exists, ls, find, etc. don't hit the API more than once per path
159+
160+
# ListFile (_gdrive_list) is the main operation that we use to retieve file
161+
# metadata in all operations like find/ls/exist - etc. It should be fine as
162+
# a basic benchmark to count those.
163+
# Note: we can't count direct API calls since we have retries, also can't
164+
# count even direct calls to the GDrive client - for the same reason
165+
166+
fs, _ = create_fs()
167+
spy = mocker.spy(fs, "_gdrive_list")
168+
169+
dir_path = remote_dir + "/a/b/c/"
170+
file_path = dir_path + "test.txt"
171+
fs.touch(file_path)
172+
173+
assert spy.call_count == 5
174+
spy.reset_mock()
175+
176+
fs.exists(file_path)
177+
assert spy.call_count == 1
178+
spy.reset_mock()
179+
180+
fs.ls(remote_dir)
181+
assert spy.call_count == 1
182+
spy.reset_mock()
183+
184+
fs.ls(dir_path)
185+
assert spy.call_count == 1
186+
spy.reset_mock()
187+
188+
fs.find(dir_path)
189+
assert spy.call_count == 1
190+
spy.reset_mock()
191+
192+
fs.find(remote_dir)
193+
assert spy.call_count == 1
194+
spy.reset_mock()
195+
196+
197+
def test_ops_work_with_duplicate_names(create_fs, remote_dir):
198+
fs, base_item = create_fs()
199+
200+
remote_dir_item = fs._gdrive_create_dir(
201+
base_item["id"], remote_dir.split("/")[-1]
202+
)
203+
dir_name = str(uuid.uuid4())
204+
dir1 = fs._gdrive_create_dir(remote_dir_item["id"], dir_name)
205+
dir2 = fs._gdrive_create_dir(remote_dir_item["id"], dir_name)
206+
207+
# Two directories were created with the same name
208+
assert dir1["id"] != dir2["id"]
209+
210+
dir_path = remote_dir + "/" + dir_name + "/"
211+
for test_fs in [fs, create_fs(create=False)[0]]:
212+
# ls returns both of them, even though the names are the same
213+
result = test_fs.ls(remote_dir)
214+
assert len(result) == 2
215+
assert set(result) == {dir_path}
216+
217+
for test_fs in [fs, create_fs(create=False)[0]]:
218+
# find by default doesn't return dirs at all
219+
result = test_fs.find(remote_dir)
220+
assert len(result) == 0
221+
222+
fs._gdrive_upload_fobj("a.txt", dir1["id"], StringIO(""))
223+
fs._gdrive_upload_fobj("b.txt", dir2["id"], StringIO(""))
224+
225+
for test_fs in [fs, create_fs(create=False)[0]]:
226+
# now we should have both files
227+
result = test_fs.find(remote_dir)
228+
assert len(result) == 2
229+
assert set(result) == {dir_path + file for file in ["a.txt", "b.txt"]}
230+
231+
232+
def test_ls_non_existing_dir(create_fs, remote_dir):
233+
fs, _ = create_fs()
145234
with pytest.raises(FileNotFoundError):
146235
fs.ls(remote_dir + "dir/")
147236

148237

149-
def test_find(fs, remote_dir):
238+
def test_find(create_fs, remote_dir):
239+
fs, _ = create_fs()
150240
fs.mkdir(remote_dir + "/dir")
151241

152242
files = [
@@ -169,15 +259,28 @@ def test_find(fs, remote_dir):
169259
for file in files:
170260
fs.touch(file)
171261

172-
assert set(fs.find(remote_dir)) == set(files)
262+
for test_fs in [fs, create_fs(create=False)[0]]:
263+
# Test for https://github.com/iterative/PyDrive2/issues/229
264+
# It must go first, so that we test with a cache miss as well
265+
assert set(test_fs.find(remote_dir + "/dir/c/d/")) == set(
266+
[
267+
file
268+
for file in files
269+
if file.startswith(remote_dir + "/dir/c/d/")
270+
]
271+
)
272+
273+
# General find test
274+
assert set(test_fs.find(remote_dir)) == set(files)
173275

174-
find_results = fs.find(remote_dir, detail=True)
175-
info_results = [fs.info(file) for file in files]
176-
info_results = {content["name"]: content for content in info_results}
177-
assert find_results == info_results
276+
find_results = test_fs.find(remote_dir, detail=True)
277+
info_results = [test_fs.info(file) for file in files]
278+
info_results = {content["name"]: content for content in info_results}
279+
assert find_results == info_results
178280

179281

180-
def test_exceptions(fs, tmpdir, remote_dir):
282+
def test_exceptions(create_fs, tmpdir, remote_dir):
283+
fs, _ = create_fs()
181284
with pytest.raises(FileNotFoundError):
182285
with fs.open(remote_dir + "/a.txt"):
183286
...
@@ -189,7 +292,8 @@ def test_exceptions(fs, tmpdir, remote_dir):
189292
fs.get_file(remote_dir + "/c.txt", tmpdir / "c.txt")
190293

191294

192-
def test_open_rw(fs, remote_dir):
295+
def test_open_rw(create_fs, remote_dir):
296+
fs, _ = create_fs()
193297
data = b"dvc.org"
194298

195299
with fs.open(remote_dir + "/a.txt", "wb") as stream:
@@ -199,15 +303,22 @@ def test_open_rw(fs, remote_dir):
199303
assert stream.read() == data
200304

201305

202-
def test_concurrent_operations(fs, remote_dir):
306+
def test_concurrent_operations(create_fs, remote_dir):
307+
fs, _ = create_fs()
308+
309+
# Include an extra dir name to force upload operations creating it
310+
# this way we can also test that only a single directory is created
311+
# enven if multiple threads are uploading files into the same dir
312+
dir_name = secrets.token_hex(16)
313+
203314
def create_random_file():
204315
name = secrets.token_hex(16)
205-
with fs.open(remote_dir + "/" + name, "w") as stream:
316+
with fs.open(remote_dir + f"/{dir_name}/" + name, "w") as stream:
206317
stream.write(name)
207318
return name
208319

209320
def read_random_file(name):
210-
with fs.open(remote_dir + "/" + name, "r") as stream:
321+
with fs.open(remote_dir + f"/{dir_name}/" + name, "r") as stream:
211322
return stream.read()
212323

213324
with futures.ThreadPoolExecutor() as executor:
@@ -225,8 +336,14 @@ def read_random_file(name):
225336

226337
assert write_names == read_names
227338

339+
# Test that only a single dir is cretead
340+
for test_fs in [fs, create_fs(create=False)[0]]:
341+
results = test_fs.ls(remote_dir)
342+
assert results == [remote_dir + f"/{dir_name}/"]
343+
228344

229-
def test_put_file(fs, tmpdir, remote_dir):
345+
def test_put_file(create_fs, tmpdir, remote_dir):
346+
fs, _ = create_fs()
230347
src_file = tmpdir / "a.txt"
231348
with open(src_file, "wb") as file:
232349
file.write(b"data")
@@ -237,7 +354,8 @@ def test_put_file(fs, tmpdir, remote_dir):
237354
assert stream.read() == b"data"
238355

239356

240-
def test_get_file(fs, tmpdir, remote_dir):
357+
def test_get_file(create_fs, tmpdir, remote_dir):
358+
fs, _ = create_fs()
241359
src_file = tmpdir / "a.txt"
242360
dest_file = tmpdir / "b.txt"
243361

@@ -249,7 +367,8 @@ def test_get_file(fs, tmpdir, remote_dir):
249367
assert dest_file.read() == "data"
250368

251369

252-
def test_get_file_callback(fs, tmpdir, remote_dir):
370+
def test_get_file_callback(create_fs, tmpdir, remote_dir):
371+
fs, _ = create_fs()
253372
src_file = tmpdir / "a.txt"
254373
dest_file = tmpdir / "b.txt"
255374

0 commit comments

Comments
 (0)