35
35
])
36
36
37
37
38
- def unzip_zipfile_parallel (zippath ):
38
+ def unzip_zipfile (zippath ):
39
39
"""
40
-
41
40
Unzip a zip file either at just the top level or recursively through the file.
42
41
43
42
Parameters
@@ -46,17 +45,21 @@ def unzip_zipfile_parallel(zippath):
46
45
47
46
Return
48
47
--------
49
- A list of unzipped files to be used in stack_by_table
48
+ A list of unzipped files to be used in stack_by_table.
50
49
51
50
Example
52
51
--------
53
- ZN NOTE: Insert example when function is coded
54
-
55
- >>> example
52
+ unzip_zipfile(zippath=filepath)
56
53
57
54
Created on Tue Mar 5 2024
58
55
59
56
@author: Zachary Nickerson
57
+
58
+ Updated Tues Feb 15 2024 to use "with" to ensure zip files properly close out
59
+ and fix error handling for Windows filepath character length limits
60
+
61
+ Folders should not be downloaded if the path lenght was too long, so this
62
+ might be redundant (see api_helpers.py download_url function)
60
63
"""
61
64
62
65
# Error handling on inputs
@@ -68,32 +71,53 @@ def unzip_zipfile_parallel(zippath):
68
71
level = "in"
69
72
70
73
if level == "all" :
71
- zip_ref = zipfile .ZipFile (zippath , 'r' )
72
- tl = zip_ref .namelist ()
73
-
74
- # Error handling for filepath character lengths
75
- if any (len (x ) > 260 for x in tl ) and platform .system () == "Windows" :
76
- print ('Longest filepath is ' + str (len (max (tl , key = len ))) + ' characters long. Filepaths on Windows are limited to 260 characters. Move files closer to the root directory.' )
77
- return None
78
-
79
- # Unzip file and get list of zips within
80
- zip_ref .extractall (path = outpath )
81
- zps = glob .glob (zippath [:- 4 ] + "/*.zip" )
82
-
83
- # If there are more zips within parent zip file, unzip those as well
84
- # does this happen anymore? this might be deprecated.
85
- # level as an input might also be deprecated
86
- if len (zps ) > 0 :
87
- print ('need an example to properly code this up.\n ' )
74
+ with zipfile .ZipFile (zippath , 'r' ) as zip_ref :
75
+ tl = zip_ref .namelist ()
76
+
77
+ # Construct full paths as they will be after extraction
78
+ full_extracted_paths = [os .path .join (
79
+ zippath , zipname ) for zipname in tl ]
80
+
81
+ # Error handling for filepath character lengths
82
+ if any (len (x ) > 260 for x in full_extracted_paths ) and platform .system () == "Windows" :
83
+ longest_path = max (full_extracted_paths , key = len )
84
+ raise OSError (f"Longest filepath is { len (longest_path )} characters long. "
85
+ "Filepaths on Windows are limited to 260 characters. "
86
+ "Move files closer to the root directory or enable "
87
+ "long path support in Windows through the Registry Editor." )
88
+
89
+ # Unzip file and get list of zips within
90
+ zip_ref .extractall (path = outpath )
91
+
92
+ # If there are more zips within parent zip file, unzip those as well
93
+ # does this happen anymore? this might be deprecated.
94
+ # level as an input might also be deprecated
95
+ zps = glob .glob (zippath [:- 4 ] + "/*.zip" )
96
+ if len (zps ) > 0 :
97
+ print ('need an example to properly code this up.\n ' )
88
98
89
99
if level == "in" :
90
100
zps = glob .glob (outpath + "/*.zip" )
91
101
92
102
for i in range (0 , len (zps )):
93
- zip_refi = zipfile .ZipFile (zps [i ], 'r' )
94
- outpathi = zps [i ][:- 4 ]
95
- zip_refi .extractall (path = outpathi )
96
- os .remove (path = zps [i ])
103
+ with zipfile .ZipFile (zps [i ], 'r' ) as zip_refi :
104
+ tl = zip_refi .namelist ()
105
+
106
+ # Construct full paths as they will be after extraction
107
+ full_extracted_paths = [os .path .join (
108
+ zippath , zipname ) for zipname in tl ]
109
+
110
+ # Error handling for filepath character lengths
111
+ if any (len (x ) > 260 for x in full_extracted_paths ) and platform .system () == "Windows" :
112
+ longest_path = max (full_extracted_paths , key = len )
113
+ raise OSError (f"Longest filepath is { len (longest_path )} characters long. "
114
+ "Filepaths on Windows are limited to 260 characters. "
115
+ "Move files closer to the root directory or enable "
116
+ "long path support in Windows through the Registry Editor." )
117
+
118
+ outpathi = zps [i ][:- 4 ]
119
+ zip_refi .extractall (path = outpathi )
120
+ os .remove (zps [i ])
97
121
98
122
return None
99
123
@@ -115,9 +139,8 @@ def find_datatables(folder,
115
139
116
140
Example
117
141
--------
118
- ZN NOTE: Insert example when function is coded
119
-
120
- >>> example
142
+ filenames = find_datatables(folder=folder, f_names=False)
143
+ see stack_data_files_parallel
121
144
122
145
Created on Wed Apr 17 2024
123
146
@@ -1153,13 +1176,13 @@ def stack_by_table(filepath,
1153
1176
1154
1177
# If the filepath is a zip file
1155
1178
if not folder :
1156
- unzip_zipfile_parallel (zippath = filepath )
1179
+ unzip_zipfile (zippath = filepath )
1157
1180
stackpath = filepath [:- 4 ]
1158
1181
1159
1182
# If the filepath is a directory
1160
1183
if folder :
1161
1184
if any (".zip" in file for file in files ):
1162
- unzip_zipfile_parallel (zippath = filepath )
1185
+ unzip_zipfile (zippath = filepath )
1163
1186
stackpath = filepath
1164
1187
1165
1188
# Stack the files
0 commit comments