1
+ from io import StringIO
1
2
import os
2
3
import posixpath
3
4
import secrets
@@ -26,16 +27,22 @@ def remote_dir(base_remote_dir):
26
27
27
28
28
29
@pytest .fixture
29
- def fs (tmpdir , base_remote_dir ):
30
- setup_credentials ()
31
- auth = GoogleAuth (settings_file_path ("default.yaml" , tmpdir / "" ))
32
- auth .ServiceAuth ()
30
+ def create_fs (tmpdir , base_remote_dir ):
31
+ def _create_fs (create = True ):
32
+ setup_credentials ()
33
+ auth = GoogleAuth (settings_file_path ("default.yaml" , tmpdir / "" ))
34
+ auth .ServiceAuth ()
33
35
34
- bucket , base = base_remote_dir .split ("/" , 1 )
35
- fs = GDriveFileSystem (base_remote_dir , auth )
36
- fs ._gdrive_create_dir ("root" , base )
36
+ _ , base = base_remote_dir .split ("/" , 1 )
37
+ fs = GDriveFileSystem (base_remote_dir , auth )
38
+ if create :
39
+ item = fs ._gdrive_create_dir ("root" , base )
40
+ else :
41
+ item = None
37
42
38
- return fs
43
+ return fs , item
44
+
45
+ return _create_fs
39
46
40
47
41
48
@pytest .mark .manual
@@ -66,7 +73,8 @@ def test_fs_service_json(base_remote_dir):
66
73
)
67
74
68
75
69
- def test_info (fs , tmpdir , remote_dir ):
76
+ def test_info (create_fs , remote_dir ):
77
+ fs , _ = create_fs ()
70
78
fs .touch (remote_dir + "/info/a.txt" )
71
79
fs .touch (remote_dir + "/info/b.txt" )
72
80
details = fs .info (remote_dir + "/info/a.txt" )
@@ -87,7 +95,8 @@ def test_info(fs, tmpdir, remote_dir):
87
95
assert details ["name" ] == remote_dir + "/info/"
88
96
89
97
90
- def test_move (fs , remote_dir ):
98
+ def test_move (create_fs , remote_dir ):
99
+ fs , _ = create_fs ()
91
100
fs .touch (remote_dir + "/a.txt" )
92
101
initial_info = fs .info (remote_dir + "/a.txt" )
93
102
@@ -102,7 +111,8 @@ def test_move(fs, remote_dir):
102
111
assert initial_info == secondary_info
103
112
104
113
105
- def test_rm (fs , remote_dir ):
114
+ def test_rm (create_fs , remote_dir ):
115
+ fs , _ = create_fs ()
106
116
fs .touch (remote_dir + "/a.txt" )
107
117
fs .rm (remote_dir + "/a.txt" )
108
118
assert not fs .exists (remote_dir + "/a.txt" )
@@ -116,7 +126,8 @@ def test_rm(fs, remote_dir):
116
126
assert not fs .exists (remote_dir + "/dir/c/a" )
117
127
118
128
119
- def test_ls (fs : GDriveFileSystem , remote_dir ):
129
+ def test_ls (create_fs , remote_dir ):
130
+ fs , _ = create_fs ()
120
131
_ , base = fs .split_path (remote_dir + "dir/" )
121
132
fs ._path_to_item_ids (base , create = True )
122
133
assert fs .ls (remote_dir + "dir/" ) == []
@@ -141,12 +152,91 @@ def by_name(details):
141
152
assert dirs == expected
142
153
143
154
144
- def test_ls_non_existing_dir (fs , remote_dir ):
155
+ def test_basic_ops_caching (create_fs , remote_dir , mocker ):
156
+ # Internally we have to derefence names into IDs to call GDrive APIs
157
+ # we are trying hard to cache those and make sure that operations like
158
+ # exists, ls, find, etc. don't hit the API more than once per path
159
+
160
+ # ListFile (_gdrive_list) is the main operation that we use to retieve file
161
+ # metadata in all operations like find/ls/exist - etc. It should be fine as
162
+ # a basic benchmark to count those.
163
+ # Note: we can't count direct API calls since we have retries, also can't
164
+ # count even direct calls to the GDrive client - for the same reason
165
+
166
+ fs , _ = create_fs ()
167
+ spy = mocker .spy (fs , "_gdrive_list" )
168
+
169
+ dir_path = remote_dir + "/a/b/c/"
170
+ file_path = dir_path + "test.txt"
171
+ fs .touch (file_path )
172
+
173
+ assert spy .call_count == 5
174
+ spy .reset_mock ()
175
+
176
+ fs .exists (file_path )
177
+ assert spy .call_count == 1
178
+ spy .reset_mock ()
179
+
180
+ fs .ls (remote_dir )
181
+ assert spy .call_count == 1
182
+ spy .reset_mock ()
183
+
184
+ fs .ls (dir_path )
185
+ assert spy .call_count == 1
186
+ spy .reset_mock ()
187
+
188
+ fs .find (dir_path )
189
+ assert spy .call_count == 1
190
+ spy .reset_mock ()
191
+
192
+ fs .find (remote_dir )
193
+ assert spy .call_count == 1
194
+ spy .reset_mock ()
195
+
196
+
197
+ def test_ops_work_with_duplicate_names (create_fs , remote_dir ):
198
+ fs , base_item = create_fs ()
199
+
200
+ remote_dir_item = fs ._gdrive_create_dir (
201
+ base_item ["id" ], remote_dir .split ("/" )[- 1 ]
202
+ )
203
+ dir_name = str (uuid .uuid4 ())
204
+ dir1 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
205
+ dir2 = fs ._gdrive_create_dir (remote_dir_item ["id" ], dir_name )
206
+
207
+ # Two directories were created with the same name
208
+ assert dir1 ["id" ] != dir2 ["id" ]
209
+
210
+ dir_path = remote_dir + "/" + dir_name + "/"
211
+ for test_fs in [fs , create_fs (create = False )[0 ]]:
212
+ # ls returns both of them, even though the names are the same
213
+ result = test_fs .ls (remote_dir )
214
+ assert len (result ) == 2
215
+ assert set (result ) == {dir_path }
216
+
217
+ for test_fs in [fs , create_fs (create = False )[0 ]]:
218
+ # find by default doesn't return dirs at all
219
+ result = test_fs .find (remote_dir )
220
+ assert len (result ) == 0
221
+
222
+ fs ._gdrive_upload_fobj ("a.txt" , dir1 ["id" ], StringIO ("" ))
223
+ fs ._gdrive_upload_fobj ("b.txt" , dir2 ["id" ], StringIO ("" ))
224
+
225
+ for test_fs in [fs , create_fs (create = False )[0 ]]:
226
+ # now we should have both files
227
+ result = test_fs .find (remote_dir )
228
+ assert len (result ) == 2
229
+ assert set (result ) == {dir_path + file for file in ["a.txt" , "b.txt" ]}
230
+
231
+
232
+ def test_ls_non_existing_dir (create_fs , remote_dir ):
233
+ fs , _ = create_fs ()
145
234
with pytest .raises (FileNotFoundError ):
146
235
fs .ls (remote_dir + "dir/" )
147
236
148
237
149
- def test_find (fs , remote_dir ):
238
+ def test_find (create_fs , remote_dir ):
239
+ fs , _ = create_fs ()
150
240
fs .mkdir (remote_dir + "/dir" )
151
241
152
242
files = [
@@ -169,15 +259,28 @@ def test_find(fs, remote_dir):
169
259
for file in files :
170
260
fs .touch (file )
171
261
172
- assert set (fs .find (remote_dir )) == set (files )
262
+ for test_fs in [fs , create_fs (create = False )[0 ]]:
263
+ # Test for https://github.com/iterative/PyDrive2/issues/229
264
+ # It must go first, so that we test with a cache miss as well
265
+ assert set (test_fs .find (remote_dir + "/dir/c/d/" )) == set (
266
+ [
267
+ file
268
+ for file in files
269
+ if file .startswith (remote_dir + "/dir/c/d/" )
270
+ ]
271
+ )
272
+
273
+ # General find test
274
+ assert set (test_fs .find (remote_dir )) == set (files )
173
275
174
- find_results = fs .find (remote_dir , detail = True )
175
- info_results = [fs .info (file ) for file in files ]
176
- info_results = {content ["name" ]: content for content in info_results }
177
- assert find_results == info_results
276
+ find_results = test_fs .find (remote_dir , detail = True )
277
+ info_results = [test_fs .info (file ) for file in files ]
278
+ info_results = {content ["name" ]: content for content in info_results }
279
+ assert find_results == info_results
178
280
179
281
180
- def test_exceptions (fs , tmpdir , remote_dir ):
282
+ def test_exceptions (create_fs , tmpdir , remote_dir ):
283
+ fs , _ = create_fs ()
181
284
with pytest .raises (FileNotFoundError ):
182
285
with fs .open (remote_dir + "/a.txt" ):
183
286
...
@@ -189,7 +292,8 @@ def test_exceptions(fs, tmpdir, remote_dir):
189
292
fs .get_file (remote_dir + "/c.txt" , tmpdir / "c.txt" )
190
293
191
294
192
- def test_open_rw (fs , remote_dir ):
295
+ def test_open_rw (create_fs , remote_dir ):
296
+ fs , _ = create_fs ()
193
297
data = b"dvc.org"
194
298
195
299
with fs .open (remote_dir + "/a.txt" , "wb" ) as stream :
@@ -199,15 +303,22 @@ def test_open_rw(fs, remote_dir):
199
303
assert stream .read () == data
200
304
201
305
202
- def test_concurrent_operations (fs , remote_dir ):
306
+ def test_concurrent_operations (create_fs , remote_dir ):
307
+ fs , _ = create_fs ()
308
+
309
+ # Include an extra dir name to force upload operations creating it
310
+ # this way we can also test that only a single directory is created
311
+ # enven if multiple threads are uploading files into the same dir
312
+ dir_name = secrets .token_hex (16 )
313
+
203
314
def create_random_file ():
204
315
name = secrets .token_hex (16 )
205
- with fs .open (remote_dir + " /" + name , "w" ) as stream :
316
+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "w" ) as stream :
206
317
stream .write (name )
207
318
return name
208
319
209
320
def read_random_file (name ):
210
- with fs .open (remote_dir + " /" + name , "r" ) as stream :
321
+ with fs .open (remote_dir + f"/ { dir_name } /" + name , "r" ) as stream :
211
322
return stream .read ()
212
323
213
324
with futures .ThreadPoolExecutor () as executor :
@@ -225,8 +336,14 @@ def read_random_file(name):
225
336
226
337
assert write_names == read_names
227
338
339
+ # Test that only a single dir is cretead
340
+ for test_fs in [fs , create_fs (create = False )[0 ]]:
341
+ results = test_fs .ls (remote_dir )
342
+ assert results == [remote_dir + f"/{ dir_name } /" ]
343
+
228
344
229
- def test_put_file (fs , tmpdir , remote_dir ):
345
+ def test_put_file (create_fs , tmpdir , remote_dir ):
346
+ fs , _ = create_fs ()
230
347
src_file = tmpdir / "a.txt"
231
348
with open (src_file , "wb" ) as file :
232
349
file .write (b"data" )
@@ -237,7 +354,8 @@ def test_put_file(fs, tmpdir, remote_dir):
237
354
assert stream .read () == b"data"
238
355
239
356
240
- def test_get_file (fs , tmpdir , remote_dir ):
357
+ def test_get_file (create_fs , tmpdir , remote_dir ):
358
+ fs , _ = create_fs ()
241
359
src_file = tmpdir / "a.txt"
242
360
dest_file = tmpdir / "b.txt"
243
361
@@ -249,7 +367,8 @@ def test_get_file(fs, tmpdir, remote_dir):
249
367
assert dest_file .read () == "data"
250
368
251
369
252
- def test_get_file_callback (fs , tmpdir , remote_dir ):
370
+ def test_get_file_callback (create_fs , tmpdir , remote_dir ):
371
+ fs , _ = create_fs ()
253
372
src_file = tmpdir / "a.txt"
254
373
dest_file = tmpdir / "b.txt"
255
374
0 commit comments