From 9015b4dc15e830779376c2ec98156f5914e1b732 Mon Sep 17 00:00:00 2001 From: Ian Cooke Date: Tue, 9 Jan 2024 19:00:53 -0500 Subject: [PATCH] keep original filenames when downloading assets Closes #6 --- CHANGELOG.md | 2 ++ stactask/asset_io.py | 8 ++++++-- stactask/task.py | 20 ++++++++++++++++++-- tests/test_task.py | 5 ++++- tests/test_task_download.py | 14 ++++++++++++++ 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3b9242..33727fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - ([#72]) Given that `_get_file` is part of the `AsyncFileSystem` spec, this adds the synchronous `get_file` as a way to retrieve files if `_get_file` is not found. +- ([#74]) Added option `--keep-original-filenames` to support legacy + applications dependent on filename specifics. ## [v0.3.0] - 2023-12-20 diff --git a/stactask/asset_io.py b/stactask/asset_io.py index 7d03bdc..6515898 100644 --- a/stactask/asset_io.py +++ b/stactask/asset_io.py @@ -41,6 +41,7 @@ async def download_item_assets( overwrite: bool = False, path_template: str = "${collection}/${id}", absolute_path: bool = False, + keep_original_filenames: bool = False, **kwargs: Any, ) -> Item: _assets = item.assets.keys() if assets is None else assets @@ -61,8 +62,11 @@ async def download_item_assets( href = item.assets[a].href # local filename - ext = os.path.splitext(href)[-1] - new_href = os.path.join(path, a + ext) + if keep_original_filenames: + basename = os.path.basename(href) + else: + basename = a + os.path.splitext(href)[1] + new_href = os.path.join(path, basename) if absolute_path: new_href = os.path.abspath(new_href) diff --git a/stactask/task.py b/stactask/task.py index f6a2189..c2bb975 100644 --- a/stactask/task.py +++ b/stactask/task.py @@ -64,6 +64,7 @@ def __init__( save_workdir: Optional[bool] = None, skip_upload: bool = False, skip_validation: bool = False, + keep_original_filenames: bool = False, ): self.logger = logging.getLogger(self.name) @@ -75,6 +76,7 @@ def __init__( # set instance variables self._skip_upload = skip_upload self._payload = payload + self._keep_original_filenames = keep_original_filenames # create temporary work directory if workdir is None if workdir is None: @@ -230,7 +232,12 @@ def download_item_assets( outdir = str(self._workdir / path_template) loop = asyncio.get_event_loop() item = loop.run_until_complete( - download_item_assets(item, path_template=outdir, **kwargs) + download_item_assets( + item, + path_template=outdir, + keep_original_filenames=self._keep_original_filenames, + **kwargs, + ) ) return item @@ -243,7 +250,12 @@ def download_items_assets( outdir = str(self._workdir / path_template) loop = asyncio.get_event_loop() items = loop.run_until_complete( - download_items_assets(items, path_template=outdir, **kwargs) + download_items_assets( + items, + path_template=outdir, + keep_original_filenames=self._keep_original_filenames, + **kwargs, + ) ) return list(items) @@ -370,6 +382,10 @@ def parse_args(cls, args: List[str]) -> Dict[str, Any]: parser.add_argument( "--save-workdir", dest="save_workdir", action="store_true", default=False ) + h = "Keep original asset filenames" + parser.add_argument( + "--keep-original-filenames", action="store_true", default=False + ) h = "Skip uploading of any generated assets and resulting STAC Items" parser.add_argument( "--skip-upload", dest="skip_upload", action="store_true", default=False diff --git a/tests/test_task.py b/tests/test_task.py index cb9231d..84b5398 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -143,13 +143,16 @@ def test_parse_no_args() -> None: def test_parse_args() -> None: - args = NothingTask.parse_args("run input --save-workdir".split()) + args = NothingTask.parse_args( + "run input --keep-original-filenames --save-workdir".split() + ) assert args["command"] == "run" assert args["logging"] == "INFO" assert args["input"] == "input" assert args["save_workdir"] is True assert args["skip_upload"] is False assert args["skip_validation"] is False + assert args["keep_original_filenames"] is True if __name__ == "__main__": diff --git a/tests/test_task_download.py b/tests/test_task_download.py index d9479a5..94b0200 100644 --- a/tests/test_task_download.py +++ b/tests/test_task_download.py @@ -35,6 +35,20 @@ def test_download_item_asset(tmp_path: Path, item_collection: Dict[str, Any]) -> assert filename.is_file() is True +def test_download_keep_original_filenames( + tmp_path: Path, item_collection: Dict[str, Any] +) -> None: + t = NothingTask( + item_collection, + keep_original_filenames=True, + workdir=tmp_path / "test-task-download-item-asset", + ) + item = t.download_item_assets(t.items[0], assets=["tileinfo_metadata"]).to_dict() + fname = item["assets"]["tileinfo_metadata"]["href"] + filename = Path(fname) + assert filename.name == "tileInfo.json" + + def test_download_item_asset_local( tmp_path: Path, item_collection: Dict[str, Any] ) -> None: