1+ from io import StringIO
12import os
23import posixpath
34import secrets
@@ -26,16 +27,22 @@ def remote_dir(base_remote_dir):
2627
2728
2829@pytest .fixture
29- def fs (tmpdir , base_remote_dir ):
30- setup_credentials ()
31- auth = GoogleAuth (settings_file_path ("default.yaml" , tmpdir / "" ))
32- auth .ServiceAuth ()
30+ def create_fs (tmpdir , base_remote_dir ):
31+ def _create_fs (create = True ):
32+ setup_credentials ()
33+ auth = GoogleAuth (settings_file_path ("default.yaml" , tmpdir / "" ))
34+ auth .ServiceAuth ()
3335
34- bucket , base = base_remote_dir .split ("/" , 1 )
35- fs = GDriveFileSystem (base_remote_dir , auth )
36- fs ._gdrive_create_dir ("root" , base )
36+ _ , base = base_remote_dir .split ("/" , 1 )
37+ fs = GDriveFileSystem (base_remote_dir , auth )
38+ if create :
39+ item = fs ._gdrive_create_dir ("root" , base )
40+ else :
41+ item = None
3742
38- return fs
43+ return fs , item
44+
45+ return _create_fs
3946
4047
4148@pytest .mark .manual
@@ -66,7 +73,8 @@ def test_fs_service_json(base_remote_dir):
6673 )
6774
6875
69- def test_info (fs , tmpdir , remote_dir ):
76+ def test_info (create_fs , remote_dir ):
77+ fs , _ = create_fs ()
7078 fs .touch (remote_dir + "/info/a.txt" )
7179 fs .touch (remote_dir + "/info/b.txt" )
7280 details = fs .info (remote_dir + "/info/a.txt" )
@@ -87,7 +95,8 @@ def test_info(fs, tmpdir, remote_dir):
8795 assert details ["name" ] == remote_dir + "/info/"
8896
8997
90- def test_move (fs , remote_dir ):
98+ def test_move (create_fs , remote_dir ):
99+ fs , _ = create_fs ()
91100 fs .touch (remote_dir + "/a.txt" )
92101 initial_info = fs .info (remote_dir + "/a.txt" )
93102
@@ -102,7 +111,8 @@ def test_move(fs, remote_dir):
102111 assert initial_info == secondary_info
103112
104113
105- def test_rm (fs , remote_dir ):
114+ def test_rm (create_fs , remote_dir ):
115+ fs , _ = create_fs ()
106116 fs .touch (remote_dir + "/a.txt" )
107117 fs .rm (remote_dir + "/a.txt" )
108118 assert not fs .exists (remote_dir + "/a.txt" )
@@ -116,7 +126,8 @@ def test_rm(fs, remote_dir):
116126 assert not fs .exists (remote_dir + "/dir/c/a" )
117127
118128
119- def test_ls (fs : GDriveFileSystem , remote_dir ):
129+ def test_ls (create_fs , remote_dir ):
130+ fs , _ = create_fs ()
120131 _ , base = fs .split_path (remote_dir + "dir/" )
121132 fs ._path_to_item_ids (base , create = True )
122133 assert fs .ls (remote_dir + "dir/" ) == []
@@ -141,12 +152,98 @@ def by_name(details):
141152 assert dirs == expected
142153
143154
144- def test_ls_non_existing_dir (fs , remote_dir ):
155+ def test_basic_ops_caching (create_fs , remote_dir , mocker ):
156+ # Internally we have to derefence names into IDs to call GDrive APIs
157+ # we are trying hard to cache those and make sure that operations like
158+ # exists, ls, find, etc. don't hit the API more than once per path
159+
160+ # ListFile (_gdrive_list) is the main operation that we use to retieve file
161+ # metadata in all operations like find/ls/exist - etc. It should be fine as
162+ # a basic benchmark to count those.
163+ # Note: we can't count direct API calls since we have retries, also can't
164+ # count even direct calls to the GDrive client - for the same reason
165+
166+ fs , _ = create_fs ()
167+ spy = mocker .spy (fs , "_gdrive_list" )
168+
169+ dir_path = remote_dir + "/a/b/c/"
170+ file_path = dir_path + "test.txt"
171+ fs .touch (file_path )
172+
173+ assert spy .call_count == 5
174+ spy .reset_mock ()
175+
176+ fs .exists (file_path )
177+ assert spy .call_count == 1
178+ spy .reset_mock ()
179+
180+ fs .ls (remote_dir )
181+ assert spy .call_count == 1
182+ spy .reset_mock ()
183+
184+ fs .ls (dir_path )
185+ assert spy .call_count == 1
186+ spy .reset_mock ()
187+
188+ fs .find (dir_path )
189+ assert spy .call_count == 1
190+ spy .reset_mock ()
191+
192+ fs .find (remote_dir )
193+ assert spy .call_count == 1
194+ spy .reset_mock ()
195+
196+
197+ def test_ops_work_with_duplicate_names (create_fs , remote_dir ):
198+ fs , base_item = create_fs ()
199+
200+ remote_dir_item = fs ._gdrive_create_dir (
201+ base_item ["id" ], remote_dir .split ("/" )[- 1 ]
202+ )
203+ dir_name = str (uuid .uuid4 ())
204+ dir1 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
205+ dir2 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
206+
207+ # Two directories were created with the same name
208+ assert dir1 ["id" ] != dir2 ["id" ]
209+
210+ dir_path = remote_dir + "/" + dir_name + "/"
211+
212+ # ls returns both of them, even though the names are the same
213+ test_fs = fs
214+ result = test_fs .ls (remote_dir )
215+ assert len (result ) == 2
216+ assert set (result ) == {dir_path }
217+
218+ # ls returns both of them, even though the names are the same
219+ test_fs = create_fs (create = False )[0 ]
220+ result = test_fs .ls (remote_dir )
221+ assert len (result ) == 2
222+ assert set (result ) == {dir_path }
223+
224+ for test_fs in [fs , create_fs (create = False )[0 ]]:
225+ # find by default doesn't return dirs at all
226+ result = test_fs .find (remote_dir )
227+ assert len (result ) == 0
228+
229+ fs ._gdrive_upload_fobj ("a.txt" , dir1 ["id" ], StringIO ("" ))
230+ fs ._gdrive_upload_fobj ("b.txt" , dir2 ["id" ], StringIO ("" ))
231+
232+ for test_fs in [fs , create_fs (create = False )[0 ]]:
233+ # now we should have both files
234+ result = test_fs .find (remote_dir )
235+ assert len (result ) == 2
236+ assert set (result ) == {dir_path + file for file in ["a.txt" , "b.txt" ]}
237+
238+
239+ def test_ls_non_existing_dir (create_fs , remote_dir ):
240+ fs , _ = create_fs ()
145241 with pytest .raises (FileNotFoundError ):
146242 fs .ls (remote_dir + "dir/" )
147243
148244
149- def test_find (fs , remote_dir ):
245+ def test_find (create_fs , remote_dir ):
246+ fs , _ = create_fs ()
150247 fs .mkdir (remote_dir + "/dir" )
151248
152249 files = [
@@ -169,15 +266,28 @@ def test_find(fs, remote_dir):
169266 for file in files :
170267 fs .touch (file )
171268
172- assert set (fs .find (remote_dir )) == set (files )
269+ for test_fs in [fs , create_fs (create = False )[0 ]]:
270+ # Test for https://github.com/iterative/PyDrive2/issues/229
271+ # It must go first, so that we test with a cache miss as well
272+ assert set (test_fs .find (remote_dir + "/dir/c/d/" )) == set (
273+ [
274+ file
275+ for file in files
276+ if file .startswith (remote_dir + "/dir/c/d/" )
277+ ]
278+ )
279+
280+ # General find test
281+ assert set (test_fs .find (remote_dir )) == set (files )
173282
174- find_results = fs .find (remote_dir , detail = True )
175- info_results = [fs .info (file ) for file in files ]
176- info_results = {content ["name" ]: content for content in info_results }
177- assert find_results == info_results
283+ find_results = test_fs .find (remote_dir , detail = True )
284+ info_results = [test_fs .info (file ) for file in files ]
285+ info_results = {content ["name" ]: content for content in info_results }
286+ assert find_results == info_results
178287
179288
180- def test_exceptions (fs , tmpdir , remote_dir ):
289+ def test_exceptions (create_fs , tmpdir , remote_dir ):
290+ fs , _ = create_fs ()
181291 with pytest .raises (FileNotFoundError ):
182292 with fs .open (remote_dir + "/a.txt" ):
183293 ...
@@ -189,7 +299,8 @@ def test_exceptions(fs, tmpdir, remote_dir):
189299 fs .get_file (remote_dir + "/c.txt" , tmpdir / "c.txt" )
190300
191301
192- def test_open_rw (fs , remote_dir ):
302+ def test_open_rw (create_fs , remote_dir ):
303+ fs , _ = create_fs ()
193304 data = b"dvc.org"
194305
195306 with fs .open (remote_dir + "/a.txt" , "wb" ) as stream :
@@ -199,15 +310,22 @@ def test_open_rw(fs, remote_dir):
199310 assert stream .read () == data
200311
201312
202- def test_concurrent_operations (fs , remote_dir ):
313+ def test_concurrent_operations (create_fs , remote_dir ):
314+ fs , _ = create_fs ()
315+
316+ # Include an extra dir name to force upload operations creating it
317+ # this way we can also test that only a single directory is created
318+ # enven if multiple threads are uploading files into the same dir
319+ dir_name = secrets .token_hex (16 )
320+
203321 def create_random_file ():
204322 name = secrets .token_hex (16 )
205- with fs .open (remote_dir + " /" + name , "w" ) as stream :
323+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "w" ) as stream :
206324 stream .write (name )
207325 return name
208326
209327 def read_random_file (name ):
210- with fs .open (remote_dir + " /" + name , "r" ) as stream :
328+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "r" ) as stream :
211329 return stream .read ()
212330
213331 with futures .ThreadPoolExecutor () as executor :
@@ -225,8 +343,14 @@ def read_random_file(name):
225343
226344 assert write_names == read_names
227345
346+ # Test that only a single dir is cretead
347+ for test_fs in [fs , create_fs (create = False )[0 ]]:
348+ results = test_fs .ls (remote_dir )
349+ assert results == [remote_dir + f"/{ dir_name } /" ]
350+
228351
229- def test_put_file (fs , tmpdir , remote_dir ):
352+ def test_put_file (create_fs , tmpdir , remote_dir ):
353+ fs , _ = create_fs ()
230354 src_file = tmpdir / "a.txt"
231355 with open (src_file , "wb" ) as file :
232356 file .write (b"data" )
@@ -237,7 +361,8 @@ def test_put_file(fs, tmpdir, remote_dir):
237361 assert stream .read () == b"data"
238362
239363
240- def test_get_file (fs , tmpdir , remote_dir ):
364+ def test_get_file (create_fs , tmpdir , remote_dir ):
365+ fs , _ = create_fs ()
241366 src_file = tmpdir / "a.txt"
242367 dest_file = tmpdir / "b.txt"
243368
@@ -249,7 +374,8 @@ def test_get_file(fs, tmpdir, remote_dir):
249374 assert dest_file .read () == "data"
250375
251376
252- def test_get_file_callback (fs , tmpdir , remote_dir ):
377+ def test_get_file_callback (create_fs , tmpdir , remote_dir ):
378+ fs , _ = create_fs ()
253379 src_file = tmpdir / "a.txt"
254380 dest_file = tmpdir / "b.txt"
255381
0 commit comments