Skip to content

Commit 77b9527

Browse files
committed
update unzip_zipfile_parallel to use 'with' to close out zip files, and update error handling for Windows filepath character limits
also rename to remove parallel, since this function is not parallelized
1 parent e821096 commit 77b9527

File tree

1 file changed

+55
-32
lines changed

1 file changed

+55
-32
lines changed

src/neonutilities/unzip_and_stack.py

+55-32
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@
3535
])
3636

3737

38-
def unzip_zipfile_parallel(zippath):
38+
def unzip_zipfile(zippath):
3939
"""
40-
4140
Unzip a zip file either at just the top level or recursively through the file.
4241
4342
Parameters
@@ -46,17 +45,21 @@ def unzip_zipfile_parallel(zippath):
4645
4746
Return
4847
--------
49-
A list of unzipped files to be used in stack_by_table
48+
A list of unzipped files to be used in stack_by_table.
5049
5150
Example
5251
--------
53-
ZN NOTE: Insert example when function is coded
54-
55-
>>> example
52+
unzip_zipfile(zippath=filepath)
5653
5754
Created on Tue Mar 5 2024
5855
5956
@author: Zachary Nickerson
57+
58+
Updated Tues Feb 15 2024 to use "with" to ensure zip files properly close out
59+
and fix error handling for Windows filepath character length limits
60+
61+
Folders should not be downloaded if the path lenght was too long, so this
62+
might be redundant (see api_helpers.py download_url function)
6063
"""
6164

6265
# Error handling on inputs
@@ -68,32 +71,53 @@ def unzip_zipfile_parallel(zippath):
6871
level = "in"
6972

7073
if level == "all":
71-
zip_ref = zipfile.ZipFile(zippath, 'r')
72-
tl = zip_ref.namelist()
73-
74-
# Error handling for filepath character lengths
75-
if any(len(x) > 260 for x in tl) and platform.system() == "Windows":
76-
print('Longest filepath is ' + str(len(max(tl, key=len))) + ' characters long. Filepaths on Windows are limited to 260 characters. Move files closer to the root directory.')
77-
return None
78-
79-
# Unzip file and get list of zips within
80-
zip_ref.extractall(path=outpath)
81-
zps = glob.glob(zippath[:-4] + "/*.zip")
82-
83-
# If there are more zips within parent zip file, unzip those as well
84-
# does this happen anymore? this might be deprecated.
85-
# level as an input might also be deprecated
86-
if len(zps) > 0:
87-
print('need an example to properly code this up.\n')
74+
with zipfile.ZipFile(zippath, 'r') as zip_ref:
75+
tl = zip_ref.namelist()
76+
77+
# Construct full paths as they will be after extraction
78+
full_extracted_paths = [os.path.join(
79+
zippath, zipname) for zipname in tl]
80+
81+
# Error handling for filepath character lengths
82+
if any(len(x) > 260 for x in full_extracted_paths) and platform.system() == "Windows":
83+
longest_path = max(full_extracted_paths, key=len)
84+
raise OSError(f"Longest filepath is {len(longest_path)} characters long. "
85+
"Filepaths on Windows are limited to 260 characters. "
86+
"Move files closer to the root directory or enable "
87+
"long path support in Windows through the Registry Editor.")
88+
89+
# Unzip file and get list of zips within
90+
zip_ref.extractall(path=outpath)
91+
92+
# If there are more zips within parent zip file, unzip those as well
93+
# does this happen anymore? this might be deprecated.
94+
# level as an input might also be deprecated
95+
zps = glob.glob(zippath[:-4] + "/*.zip")
96+
if len(zps) > 0:
97+
print('need an example to properly code this up.\n')
8898

8999
if level == "in":
90100
zps = glob.glob(outpath+"/*.zip")
91101

92102
for i in range(0, len(zps)):
93-
zip_refi = zipfile.ZipFile(zps[i], 'r')
94-
outpathi = zps[i][:-4]
95-
zip_refi.extractall(path=outpathi)
96-
os.remove(path=zps[i])
103+
with zipfile.ZipFile(zps[i], 'r') as zip_refi:
104+
tl = zip_refi.namelist()
105+
106+
# Construct full paths as they will be after extraction
107+
full_extracted_paths = [os.path.join(
108+
zippath, zipname) for zipname in tl]
109+
110+
# Error handling for filepath character lengths
111+
if any(len(x) > 260 for x in full_extracted_paths) and platform.system() == "Windows":
112+
longest_path = max(full_extracted_paths, key=len)
113+
raise OSError(f"Longest filepath is {len(longest_path)} characters long. "
114+
"Filepaths on Windows are limited to 260 characters. "
115+
"Move files closer to the root directory or enable "
116+
"long path support in Windows through the Registry Editor.")
117+
118+
outpathi = zps[i][:-4]
119+
zip_refi.extractall(path=outpathi)
120+
os.remove(zps[i])
97121

98122
return None
99123

@@ -115,9 +139,8 @@ def find_datatables(folder,
115139
116140
Example
117141
--------
118-
ZN NOTE: Insert example when function is coded
119-
120-
>>> example
142+
filenames = find_datatables(folder=folder, f_names=False)
143+
see stack_data_files_parallel
121144
122145
Created on Wed Apr 17 2024
123146
@@ -1153,13 +1176,13 @@ def stack_by_table(filepath,
11531176

11541177
# If the filepath is a zip file
11551178
if not folder:
1156-
unzip_zipfile_parallel(zippath = filepath)
1179+
unzip_zipfile(zippath=filepath)
11571180
stackpath = filepath[:-4]
11581181

11591182
# If the filepath is a directory
11601183
if folder:
11611184
if any(".zip" in file for file in files):
1162-
unzip_zipfile_parallel(zippath = filepath)
1185+
unzip_zipfile(zippath=filepath)
11631186
stackpath = filepath
11641187

11651188
# Stack the files

0 commit comments

Comments
 (0)