1+ from io import StringIO
12import os
23import posixpath
34import secrets
@@ -25,17 +26,39 @@ def remote_dir(base_remote_dir):
2526 return base_remote_dir + "/" + str (uuid .uuid4 ())
2627
2728
28- @pytest .fixture
29- def fs ( tmpdir , base_remote_dir ):
29+ @pytest .fixture ( scope = "module" )
30+ def service_auth ( tmp_path_factory ):
3031 setup_credentials ()
31- auth = GoogleAuth (settings_file_path ("default.yaml" , tmpdir / "" ))
32+ tmpdir = tmp_path_factory .mktemp ("settings" )
33+ auth = GoogleAuth (settings_file_path ("default.yaml" , wkdir = tmpdir ))
3234 auth .ServiceAuth ()
35+ return auth
36+
37+
38+ @pytest .fixture (scope = "module" )
39+ def fs_factory (base_remote_dir , service_auth ):
40+ base_item = None
41+ GDriveFileSystem .cachable = False
42+
43+ def _create_fs ():
44+ nonlocal base_item
45+ _ , base = base_remote_dir .split ("/" , 1 )
46+ fs = GDriveFileSystem (base_remote_dir , service_auth )
47+ if base_item is None :
48+ base_item = fs ._gdrive_create_dir ("root" , base )
49+
50+ return fs , base_item
51+
52+ yield _create_fs
53+
54+ GDriveFileSystem .cachable = True
55+ fs = GDriveFileSystem (base_remote_dir , service_auth )
56+ fs .rm_file (base_remote_dir )
3357
34- bucket , base = base_remote_dir .split ("/" , 1 )
35- fs = GDriveFileSystem (base_remote_dir , auth )
36- fs ._gdrive_create_dir ("root" , base )
3758
38- return fs
59+ @pytest .fixture
60+ def fs (fs_factory ):
61+ return fs_factory ()[0 ]
3962
4063
4164@pytest .mark .manual
@@ -66,7 +89,7 @@ def test_fs_service_json(base_remote_dir):
6689 )
6790
6891
69- def test_info (fs , tmpdir , remote_dir ):
92+ def test_info (fs , remote_dir ):
7093 fs .touch (remote_dir + "/info/a.txt" )
7194 fs .touch (remote_dir + "/info/b.txt" )
7295 details = fs .info (remote_dir + "/info/a.txt" )
@@ -116,20 +139,20 @@ def test_rm(fs, remote_dir):
116139 assert not fs .exists (remote_dir + "/dir/c/a" )
117140
118141
119- def test_ls (fs : GDriveFileSystem , remote_dir ):
120- _ , base = fs .split_path (remote_dir + "dir/" )
142+ def test_ls (fs , remote_dir ):
143+ _ , base = fs .split_path (remote_dir + "/ dir/" )
121144 fs ._path_to_item_ids (base , create = True )
122- assert fs .ls (remote_dir + "dir/" ) == []
145+ assert fs .ls (remote_dir + "/ dir/" ) == []
123146
124147 files = set ()
125148 for no in range (8 ):
126- file = remote_dir + f"dir/test_{ no } "
149+ file = remote_dir + f"/ dir/test_{ no } "
127150 fs .touch (file )
128151 files .add (file )
129152
130- assert set (fs .ls (remote_dir + "dir/" )) == files
153+ assert set (fs .ls (remote_dir + "/ dir/" )) == files
131154
132- dirs = fs .ls (remote_dir + "dir/" , detail = True )
155+ dirs = fs .ls (remote_dir + "/ dir/" , detail = True )
133156 expected = [fs .info (file ) for file in files ]
134157
135158 def by_name (details ):
@@ -141,12 +164,95 @@ def by_name(details):
141164 assert dirs == expected
142165
143166
167+ def test_basic_ops_caching (fs_factory , remote_dir , mocker ):
168+ # Internally we have to derefence names into IDs to call GDrive APIs
169+ # we are trying hard to cache those and make sure that operations like
170+ # exists, ls, find, etc. don't hit the API more than once per path
171+
172+ # ListFile (_gdrive_list) is the main operation that we use to retieve file
173+ # metadata in all operations like find/ls/exist - etc. It should be fine as
174+ # a basic benchmark to count those.
175+ # Note: we can't count direct API calls since we have retries, also can't
176+ # count even direct calls to the GDrive client - for the same reason
177+ fs , _ = fs_factory ()
178+ spy = mocker .spy (fs , "_gdrive_list" )
179+
180+ dir_path = remote_dir + "/a/b/c/"
181+ file_path = dir_path + "test.txt"
182+ fs .touch (file_path )
183+
184+ assert spy .call_count == 5
185+ spy .reset_mock ()
186+
187+ fs .exists (file_path )
188+ assert spy .call_count == 1
189+ spy .reset_mock ()
190+
191+ fs .ls (remote_dir )
192+ assert spy .call_count == 1
193+ spy .reset_mock ()
194+
195+ fs .ls (dir_path )
196+ assert spy .call_count == 1
197+ spy .reset_mock ()
198+
199+ fs .find (dir_path )
200+ assert spy .call_count == 1
201+ spy .reset_mock ()
202+
203+ fs .find (remote_dir )
204+ assert spy .call_count == 1
205+ spy .reset_mock ()
206+
207+
208+ def test_ops_work_with_duplicate_names (fs_factory , remote_dir ):
209+ fs , base_item = fs_factory ()
210+
211+ remote_dir_item = fs ._gdrive_create_dir (
212+ base_item ["id" ], remote_dir .split ("/" )[- 1 ]
213+ )
214+ dir_name = str (uuid .uuid4 ())
215+ dir1 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
216+ dir2 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
217+
218+ # Two directories were created with the same name
219+ assert dir1 ["id" ] != dir2 ["id" ]
220+
221+ dir_path = remote_dir + "/" + dir_name + "/"
222+
223+ # ls returns both of them, even though the names are the same
224+ test_fs = fs
225+ result = test_fs .ls (remote_dir )
226+ assert len (result ) == 2
227+ assert set (result ) == {dir_path }
228+
229+ # ls returns both of them, even though the names are the same
230+ test_fs , _ = fs_factory ()
231+ result = test_fs .ls (remote_dir )
232+ assert len (result ) == 2
233+ assert set (result ) == {dir_path }
234+
235+ for test_fs in [fs , fs_factory ()[0 ]]:
236+ # find by default doesn't return dirs at all
237+ result = test_fs .find (remote_dir )
238+ assert len (result ) == 0
239+
240+ fs ._gdrive_upload_fobj ("a.txt" , dir1 ["id" ], StringIO ("" ))
241+ fs ._gdrive_upload_fobj ("b.txt" , dir2 ["id" ], StringIO ("" ))
242+
243+ for test_fs in [fs , fs_factory ()[0 ]]:
244+ # now we should have both files
245+ result = test_fs .find (remote_dir )
246+ assert len (result ) == 2
247+ assert set (result ) == {dir_path + file for file in ["a.txt" , "b.txt" ]}
248+
249+
144250def test_ls_non_existing_dir (fs , remote_dir ):
145251 with pytest .raises (FileNotFoundError ):
146252 fs .ls (remote_dir + "dir/" )
147253
148254
149- def test_find (fs , remote_dir ):
255+ def test_find (fs , fs_factory , remote_dir ):
150256 fs .mkdir (remote_dir + "/dir" )
151257
152258 files = [
@@ -169,12 +275,24 @@ def test_find(fs, remote_dir):
169275 for file in files :
170276 fs .touch (file )
171277
172- assert set (fs .find (remote_dir )) == set (files )
278+ for test_fs in [fs , fs_factory ()[0 ]]:
279+ # Test for https://github.com/iterative/PyDrive2/issues/229
280+ # It must go first, so that we test with a cache miss as well
281+ assert set (test_fs .find (remote_dir + "/dir/c/d/" )) == set (
282+ [
283+ file
284+ for file in files
285+ if file .startswith (remote_dir + "/dir/c/d/" )
286+ ]
287+ )
288+
289+ # General find test
290+ assert set (test_fs .find (remote_dir )) == set (files )
173291
174- find_results = fs .find (remote_dir , detail = True )
175- info_results = [fs .info (file ) for file in files ]
176- info_results = {content ["name" ]: content for content in info_results }
177- assert find_results == info_results
292+ find_results = test_fs .find (remote_dir , detail = True )
293+ info_results = [test_fs .info (file ) for file in files ]
294+ info_results = {content ["name" ]: content for content in info_results }
295+ assert find_results == info_results
178296
179297
180298def test_exceptions (fs , tmpdir , remote_dir ):
@@ -199,15 +317,20 @@ def test_open_rw(fs, remote_dir):
199317 assert stream .read () == data
200318
201319
202- def test_concurrent_operations (fs , remote_dir ):
320+ def test_concurrent_operations (fs , fs_factory , remote_dir ):
321+ # Include an extra dir name to force upload operations creating it
322+ # this way we can also test that only a single directory is created
323+ # enven if multiple threads are uploading files into the same dir
324+ dir_name = secrets .token_hex (16 )
325+
203326 def create_random_file ():
204327 name = secrets .token_hex (16 )
205- with fs .open (remote_dir + " /" + name , "w" ) as stream :
328+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "w" ) as stream :
206329 stream .write (name )
207330 return name
208331
209332 def read_random_file (name ):
210- with fs .open (remote_dir + " /" + name , "r" ) as stream :
333+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "r" ) as stream :
211334 return stream .read ()
212335
213336 with futures .ThreadPoolExecutor () as executor :
@@ -225,6 +348,11 @@ def read_random_file(name):
225348
226349 assert write_names == read_names
227350
351+ # Test that only a single dir is cretead
352+ for test_fs in [fs , fs_factory ()[0 ]]:
353+ results = test_fs .ls (remote_dir )
354+ assert results == [remote_dir + f"/{ dir_name } /" ]
355+
228356
229357def test_put_file (fs , tmpdir , remote_dir ):
230358 src_file = tmpdir / "a.txt"
0 commit comments