Skip to content

Commit b14771b

Browse files
authored
Merge pull request #19 from NEONScience/ZN_fxnDev
New function: files_by_uri
2 parents 7a02071 + dcc9515 commit b14771b

22 files changed

+827
-96
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

src/neonutilities/aop_download.py

+33-33
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def get_file_urls(urls, token=None):
111111
response = get_api(api_url=url, token=token)
112112
if response is None:
113113
logging.info(
114-
"Data file retrieval failed. Check NEON data portal for outage alerts."
114+
"NEON data file retrieval failed. Check NEON data portal for outage alerts."
115115
)
116116

117117
# get release info
@@ -180,11 +180,11 @@ def get_shared_flights(site):
180180
flightSite = shared_flights_dict[site]
181181
if site in ["TREE", "CHEQ", "KONA", "DCFS"]:
182182
logging.info(
183-
f"{site} is part of the flight box for {flightSite}. Downloading data from {flightSite}."
183+
f"{site} is part of the NEON flight box for {flightSite}. Downloading data from {flightSite}."
184184
)
185185
else:
186186
logging.info(
187-
f"{site} is an aquatic site and is sometimes included in the flight box for {flightSite}. Aquatic sites are not always included in the flight coverage every year.\nDownloading data from {flightSite}. Check data to confirm coverage of {site}."
187+
f"{site} is a NEON aquatic site and is sometimes included in the flight box for {flightSite}. Aquatic sites are not always included in the flight coverage every year.\nDownloading data from {flightSite}. Check data to confirm coverage of {site}."
188188
)
189189
site = flightSite
190190
return site
@@ -220,7 +220,7 @@ def validate_dpid(dpid):
220220
dpid_pattern = "DP[1-4]{1}.[0-9]{5}.00[1-2]{1}"
221221
if not re.fullmatch(dpid_pattern, dpid):
222222
raise ValueError(
223-
f"{dpid} is not a properly formatted data product ID. The correct format is DP#.#####.00#"
223+
f"{dpid} is not a properly formatted NEON data product ID. The correct format is DP#.#####.00#"
224224
)
225225

226226

@@ -314,21 +314,21 @@ def validate_aop_dpid(dpid):
314314
# Check if the dpid matches the pattern
315315
if not re.fullmatch(aop_dpid_pattern, dpid):
316316
raise ValueError(
317-
f"{dpid} is not a valid AOP data product ID. AOP data products follow the format DP#.300##.00#."
317+
f"{dpid} is not a valid NEON AOP data product ID. AOP data products follow the format DP#.300##.00#."
318318
)
319319

320320
# Check if the dpid is in the list of suspended AOP dpids
321321
if dpid in suspended_aop_dpids:
322322
raise ValueError(
323-
f"{dpid} has been suspended and is not currently available, see https://data.neonscience.org/data-products/{dpid} for more details."
323+
f"NEON {dpid} has been suspended and is not currently available, see https://data.neonscience.org/data-products/{dpid} for more details."
324324
) # ' Valid AOP IDs are: {", ".join(valid_aop_dpids)}.')
325325

326326
# Check if the dpid is in the list of valid AOP dpids
327327
if dpid not in valid_aop_dpids:
328328
valid_aop_dpids.sort()
329329
valid_aop_dpids_string = "\n".join(valid_aop_dpids)
330330
raise ValueError(
331-
f"{dpid} is not a valid AOP data product ID. Valid AOP IDs are listed below:\n{valid_aop_dpids_string}"
331+
f"NEON {dpid} is not a valid AOP data product ID. Valid AOP IDs are listed below:\n{valid_aop_dpids_string}"
332332
)
333333

334334

@@ -345,7 +345,7 @@ def validate_aop_l3_dpid(dpid):
345345
# Check if the dpid starts with DP3
346346
if not dpid.startswith("DP3"):
347347
raise ValueError(
348-
f"{dpid} is not a valid Level 3 (L3) AOP data product ID. Level 3 AOP products follow the format DP3.300##.00#"
348+
f"NEON {dpid} is not a valid Level 3 (L3) AOP data product ID. Level 3 AOP products follow the format DP3.300##.00#"
349349
)
350350

351351
# Check if the dpid is in the list of valid AOP dpids
@@ -358,7 +358,7 @@ def validate_aop_l3_dpid(dpid):
358358
# f'{key}: {value}' for key, value in dpid_dict.items())
359359

360360
raise ValueError(
361-
f"{dpid} is not a valid Level 3 (L3) AOP data product ID. Valid L3 AOP IDs are listed below:\n{valid_aop_l3_dpids_string}"
361+
f"NEON {dpid} is not a valid Level 3 (L3) AOP data product ID. Valid L3 AOP IDs are listed below:\n{valid_aop_l3_dpids_string}"
362362
)
363363
# below prints out the corresponding data product names for each ID.
364364
# f'{dpid} is not a valid Level 3 (L3) AOP data product ID. Valid L3 AOP products are listed below.\n{formatted_dpid_dict}')
@@ -367,15 +367,15 @@ def validate_aop_l3_dpid(dpid):
367367
def check_field_spectra_dpid(dpid):
368368
if dpid == "DP1.30012.001":
369369
raise ValueError(
370-
f"{dpid} is the Field spectral data product, which is published as tabular data. Use zipsByProduct() or loadByProduct() to download these data."
370+
f"NEON {dpid} is the Field spectral data product, which is published as tabular data. Use zipsByProduct() or loadByProduct() to download these data."
371371
)
372372

373373

374374
def validate_site_format(site):
375375
site_pattern = "[A-Z]{4}"
376376
if not re.fullmatch(site_pattern, site):
377377
raise ValueError(
378-
f"{site} is an invalid site format. A four-letter NEON site code is required. NEON site codes can be found here: https://www.neonscience.org/field-sites/explore-field-sites"
378+
f"{site} is an invalid NEON site format. A four-letter NEON site code is required. NEON site codes can be found here: https://www.neonscience.org/field-sites/explore-field-sites"
379379
)
380380

381381

@@ -393,13 +393,13 @@ def validate_year(year):
393393
year_pattern = "20[1-9][0-9]"
394394
if not re.fullmatch(year_pattern, year):
395395
raise ValueError(
396-
f'{year} is an invalid year. Year is required in the format "2017" or 2017, eg. AOP data are available from 2013 to present.'
396+
f'{year} is an invalid year. Year is required in the format "2017" or 2017, eg. NEON AOP data are available from 2013 to present.'
397397
)
398398

399399

400400
def check_aop_dpid(response_dict, dpid):
401401
if response_dict["data"]["productScienceTeamAbbr"] != "AOP":
402-
logging.info(f"{dpid} is not a remote sensing product. Use zipsByProduct()")
402+
logging.info(f"NEON {dpid} is not a remote sensing product. Use zipsByProduct()")
403403
return
404404

405405

@@ -468,7 +468,7 @@ def list_available_dates(dpid, site):
468468
# if the available_releases variable doesn't exist, this error will show up:
469469
# UnboundLocalError: local variable 'available_releases' referenced before assignment
470470
raise ValueError(
471-
f"There are no data available for the data product {dpid} at the site {site}."
471+
f"There are no NEON data available for the data product {dpid} at the site {site}."
472472
)
473473

474474

@@ -630,7 +630,7 @@ def get_aop_tile_extents(dpid, site, year, token=None):
630630
# error message if nothing is available
631631
if len(site_year_urls) == 0:
632632
logging.info(
633-
f"There are no {dpid} data available at the site {site} in {year}. \nTo display available dates for a given data product and site, use the function list_available_dates()."
633+
f"There are no NEON {dpid} data available at the site {site} in {year}. \nTo display available dates for a given data product and site, use the function list_available_dates()."
634634
)
635635
return
636636

@@ -771,7 +771,7 @@ def by_file_aop(
771771
# error message if nothing is available
772772
if len(site_year_urls) == 0:
773773
logging.info(
774-
f"There are no {dpid} data available at the site {site} in {year}.\nTo display available dates for a given data product and site, use the function list_available_dates()."
774+
f"There are no NEON {dpid} data available at the site {site} in {year}.\nTo display available dates for a given data product and site, use the function list_available_dates()."
775775
)
776776
# print("There are no data available at the selected site and year.")
777777
return
@@ -782,14 +782,14 @@ def by_file_aop(
782782
# get the number of files in the dataframe, if there are no files to download, return
783783
if len(file_url_df) == 0:
784784
# print("No data files found.")
785-
logging.info("No data files found.")
785+
logging.info("No NEON data files found.")
786786
return
787787

788788
# if 'PROVISIONAL' in releases and not include_provisional:
789789
if include_provisional:
790790
# log provisional included message
791791
logging.info(
792-
"Provisional data are included. To exclude provisional data, use input parameter include_provisional=False."
792+
"Provisional NEON data are included. To exclude provisional data, use input parameter include_provisional=False."
793793
)
794794
else:
795795
# log provisional not included message and filter to the released data
@@ -798,13 +798,13 @@ def by_file_aop(
798798
file_url_df = file_url_df[file_url_df["release"] != "PROVISIONAL"]
799799
if len(file_url_df) == 0:
800800
logging.info(
801-
"Provisional data are not included. To download provisional data, use input parameter include_provisional=True."
801+
"NEON Provisional data are not included. To download provisional data, use input parameter include_provisional=True."
802802
)
803803

804804
num_files = len(file_url_df)
805805
if num_files == 0:
806806
logging.info(
807-
"No data files found. Available data may all be provisional. To download provisional data, use input parameter include_provisional=True."
807+
"No NEON data files found. Available data may all be provisional. To download provisional data, use input parameter include_provisional=True."
808808
)
809809
return
810810

@@ -818,7 +818,7 @@ def by_file_aop(
818818
if check_size:
819819
if (
820820
input(
821-
f"Continuing will download {num_files} files totaling approximately {download_size}. Do you want to proceed? (y/n) "
821+
f"Continuing will download {num_files} NEON data files totaling approximately {download_size}. Do you want to proceed? (y/n) "
822822
)
823823
!= "y"
824824
):
@@ -834,7 +834,7 @@ def by_file_aop(
834834

835835
# serially download all files, with progress bar
836836
files = list(file_url_df["url"])
837-
print(f"Downloading {num_files} files totaling approximately {download_size}\n")
837+
print(f"Downloading {num_files} NEON data files totaling approximately {download_size}\n")
838838
sleep(1)
839839
for file in tqdm(files):
840840
download_file(
@@ -1036,7 +1036,7 @@ def by_tile_aop(
10361036
response_dict = response.json()
10371037
# error message if dpid is not an AOP data product
10381038
if response_dict["data"]["productScienceTeamAbbr"] != "AOP":
1039-
print(f"{dpid} is not a remote sensing product. Use zipsByProduct()")
1039+
print(f"NEON {dpid} is not a remote sensing product. Use zipsByProduct()")
10401040
return
10411041

10421042
# replace collocated site with the site name it's published under
@@ -1048,7 +1048,7 @@ def by_tile_aop(
10481048
# error message if nothing is available
10491049
if len(site_year_urls) == 0:
10501050
logging.info(
1051-
f"There are no {dpid} data available at the site {site} in {year}.\nTo display available dates for a given data product and site, use the function list_available_dates()."
1051+
f"There are no NEON {dpid} data available at the site {site} in {year}.\nTo display available dates for a given data product and site, use the function list_available_dates()."
10521052
)
10531053
return
10541054

@@ -1057,27 +1057,27 @@ def by_tile_aop(
10571057

10581058
# get the number of files in the dataframe, if there are no files to download, return
10591059
if len(file_url_df) == 0:
1060-
logging.info("No data files found.")
1060+
logging.info("No NEON data files found.")
10611061
return
10621062

10631063
# if 'PROVISIONAL' in releases and not include_provisional:
10641064
if include_provisional:
10651065
# print provisional included message
10661066
logging.info(
1067-
"Provisional data are included. To exclude provisional data, use input parameter include_provisional=False."
1067+
"Provisional NEON data are included. To exclude provisional data, use input parameter include_provisional=False."
10681068
)
10691069
else:
10701070
# print provisional not included message
10711071
file_url_df = file_url_df[file_url_df["release"] != "PROVISIONAL"]
10721072
logging.info(
1073-
"Provisional data are not included. To download provisional data, use input parameter include_provisional=True."
1073+
"Provisional NEON data are not included. To download provisional data, use input parameter include_provisional=True."
10741074
)
10751075

10761076
# get the number of files in the dataframe after filtering for provisional data, if there are no files to download, return
10771077
num_files = len(file_url_df)
10781078
if num_files == 0:
10791079
logging.info(
1080-
"No data files found. Available data may all be provisional. To download provisional data, use input parameter include_provisional=True."
1080+
"No NEON data files found. Available data may all be provisional. To download provisional data, use input parameter include_provisional=True."
10811081
)
10821082
return
10831083

@@ -1092,7 +1092,7 @@ def by_tile_aop(
10921092
# importlib.import_module('pyproj')
10931093
except ImportError:
10941094
logging.info(
1095-
"Package pyproj is required for this function to work at the BLAN site. Install and re-try"
1095+
"Package pyproj is required for this function to work at the NEON BLAN site. Install and re-try"
10961096
)
10971097
return
10981098

@@ -1203,15 +1203,15 @@ def get_buffer_coords(easting, northing, buffer):
12031203
coords_not_found = list(set(coord_strs).difference(list(unique_coords_to_download)))
12041204
if len(coords_not_found) > 0:
12051205
print(
1206-
"Warning, the following coordinates fall outside the bounds of the site, so will not be downloaded:"
1206+
"Warning, the following coordinates fall outside the bounds of the NEON site, so will not be downloaded:"
12071207
)
12081208
for coord in coords_not_found:
12091209
print(",".join(coord.split("_")))
12101210

12111211
# get the number of files in the dataframe, if there are no files to download, return
12121212
num_files = len(file_url_df_subset)
12131213
if num_files == 0:
1214-
print("No data files found.")
1214+
print("No NEON data files found.")
12151215
return
12161216

12171217
# get the total size of all the files found
@@ -1223,7 +1223,7 @@ def get_buffer_coords(easting, northing, buffer):
12231223
if check_size:
12241224
if (
12251225
input(
1226-
f"Continuing will download {num_files} files totaling approximately {download_size}. Do you want to proceed? (y/n) "
1226+
f"Continuing will download {num_files} NEON data files totaling approximately {download_size}. Do you want to proceed? (y/n) "
12271227
)
12281228
!= "y"
12291229
):
@@ -1240,7 +1240,7 @@ def get_buffer_coords(easting, northing, buffer):
12401240

12411241
# serially download all files, with progress bar
12421242
files = list(file_url_df_subset["url"])
1243-
print(f"Downloading {num_files} files totaling approximately {download_size}\n")
1243+
print(f"Downloading {num_files} NEON data files totaling approximately {download_size}\n")
12441244
sleep(1)
12451245
for file in tqdm(files):
12461246
download_file(

0 commit comments

Comments
 (0)