Skip to content

Commit

Permalink
support new metadata structure + SHA256 hash
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob committed Mar 29, 2023
1 parent 87438ad commit 629a6a8
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
- #### v 1.0.2
- added support for MD5 checksum (used for download validation when using the -robust option)
- fixed authentication flow (#174)
- #### v 1.0.3
- added support for new metadata structure and SHA256 checksum (used for download validation when using the -robust option)

- ## v 0.3
- #### v 0.3.0:
Expand Down
2 changes: 1 addition & 1 deletion _version/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

VERSION_MAJOR = 1
VERSION_MINOR = 0
VERSION_PATCH = 2
VERSION_PATCH = 3

VERSION_STRING = "%s.%s.%s" % (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)

Expand Down
28 changes: 25 additions & 3 deletions modape/modis/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,16 @@ def _parse_hdfxml(response):
if content.tag in ("FileSize", "ChecksumType", "Checksum"):
result.update({content.tag: content.text.strip()})
return result

@staticmethod
def _parse_cmrxml(response, hdf_filename):
result = {}
tree = ElementTree.fromstring(response.content)
entry = tree.find(f"DataGranule/AdditionalFile[Name = '{hdf_filename}']")
result.update({"FileSize": entry.find("SizeInBytes").text})
result.update({"ChecksumType": entry.find("Checksum/Algorithm").text})
result.update({"Checksum": entry.find("Checksum/Value").text})
return result


def _fetch(self,
Expand Down Expand Up @@ -226,9 +236,14 @@ def _fetch(self,

if check:

with session.get(url + ".xml", allow_redirects=True) as response:
response.raise_for_status()
file_metadata = self._parse_hdfxml(response)
with session.get(url + ".xml", allow_redirects=True) as hdfxml:
if hdfxml.status_code == 404:
with session.get(url[:-4] + ".cmr.xml", allow_redirects=True) as cmrxml:
cmrxml.raise_for_status()
file_metadata = self._parse_cmrxml(cmrxml, url.split("/")[-1])
else:
hdfxml.raise_for_status()
file_metadata = self._parse_hdfxml(hdfxml)

# check filesize
assert str(filename_temp.stat().st_size).strip() == file_metadata["FileSize"], \
Expand All @@ -243,6 +258,13 @@ def _fetch(self,
md5_hash.update(chunk)
chunk = openfile.read(65536)
checksum = md5_hash.hexdigest().lower()
elif file_metadata["ChecksumType"] == "SHA256":
sha256_hash = hashlib.sha256()
chunk = openfile.read(65536)
while chunk:
sha256_hash.update(chunk)
chunk = openfile.read(65536)
checksum = sha256_hash.hexdigest().lower()
else:
raise ValueError(f'Unknown Checksum Type: {file_metadata["ChecksumType"]}')
# check checksum
Expand Down

0 comments on commit 629a6a8

Please sign in to comment.