From 45b12e202ba0f2a7acdd3cd1bc7520be083eb959 Mon Sep 17 00:00:00 2001 From: Phil Varner Date: Wed, 20 Dec 2023 14:58:09 -0500 Subject: [PATCH] cleanup workdir correctly (#70) * cleanup workdir correctly --- .pre-commit-config.yaml | 6 ++-- CHANGELOG.md | 7 +++++ stactask/task.py | 65 +++++++++++++++++++++++------------------ tests/test_task.py | 14 ++++----- 4 files changed, 54 insertions(+), 38 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7ce937e..248799f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,17 +16,17 @@ repos: args: [--ignore-words=.codespellignore] types_or: [jupyter, markdown, python, shell] - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 23.12.0 hooks: - id: black - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.6.0 + rev: v1.7.1 hooks: - id: mypy additional_dependencies: - pytest - types-setuptools == 65.7.0.3 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.292 + rev: v0.1.8 hooks: - id: ruff diff --git a/CHANGELOG.md b/CHANGELOG.md index d660ee2..0a39604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## Unreleased - TBD + +### Changed + +- handler now explicitly calls performs workdir cleanup +- workdir cleanup is correctly defensive and logs errors + ## [v0.2.0] - 2023-11-16 ### Changed diff --git a/stactask/task.py b/stactask/task.py index f206a5b..f6a2189 100644 --- a/stactask/task.py +++ b/stactask/task.py @@ -3,6 +3,7 @@ import itertools import json import logging +import os import sys import warnings from abc import ABC, abstractmethod @@ -64,13 +65,9 @@ def __init__( skip_upload: bool = False, skip_validation: bool = False, ): - # set up logger self.logger = logging.getLogger(self.name) - # set this to avoid confusion in destructor if called during validation - self._save_workdir = True - - # validate input payload...or not + # validate input payload... or not if not skip_validation: if not self.validate(payload): raise FailedValidation() @@ -90,12 +87,6 @@ def __init__( # if a workdir was specified we don't want to rm by default self._save_workdir = save_workdir if save_workdir is not None else True - def __del__(self) -> None: - # remove work directory if not running locally - if not self._save_workdir: - self.logger.debug("Removing work directory %s", self._workdir) - rmtree(self._workdir) - @property def process_definition(self) -> Dict[str, Any]: process = self._payload.get("process", {}) @@ -198,6 +189,21 @@ def add_software_version_to_item(cls, item: Dict[str, Any]) -> Dict[str, Any]: item["properties"]["processing:software"] = {cls.name: cls.version} return item + def cleanup_workdir(self) -> None: + """Remove work directory if configured not to save it""" + try: + if ( + not self._save_workdir + and self._workdir + and os.path.exists(self._workdir) + ): + self.logger.debug("Removing work directory %s", self._workdir) + rmtree(self._workdir) + except Exception as e: + self.logger.warning( + "Failed removing work directory %s: %s", self._workdir, e + ) + def assign_collections(self) -> None: """Assigns new collection names based on""" for i, (coll, expr) in itertools.product( @@ -305,24 +311,27 @@ def post_process_item(self, item: Dict[str, Any]) -> Dict[str, Any]: @classmethod def handler(cls, payload: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]: - if "href" in payload or "url" in payload: - # read input - with fsspec.open(payload.get("href", payload.get("url"))) as f: - payload = json.loads(f.read()) - - task = cls(payload, **kwargs) try: - items = list() - for item in task.process(**task.parameters): - items.append(task.post_process_item(item)) - - task._payload["features"] = items - task.assign_collections() - - return task._payload - except Exception as err: - task.logger.error(err, exc_info=True) - raise err + if "href" in payload or "url" in payload: + # read input + with fsspec.open(payload.get("href", payload.get("url"))) as f: + payload = json.loads(f.read()) + + task = cls(payload, **kwargs) + try: + items = list() + for item in task.process(**task.parameters): + items.append(task.post_process_item(item)) + + task._payload["features"] = items + task.assign_collections() + + return task._payload + except Exception as err: + task.logger.error(err, exc_info=True) + raise err + finally: + task.cleanup_workdir() @classmethod def parse_args(cls, args: List[str]) -> Dict[str, Any]: diff --git a/tests/test_task.py b/tests/test_task.py index 982c8ec..cb9231d 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -56,15 +56,15 @@ def test_edit_items2(nothing_task: Task) -> None: @pytest.mark.parametrize("save_workdir", [False, True, None]) def test_tmp_workdir(items: Dict[str, Any], save_workdir: Optional[bool]) -> None: - nothing_task = NothingTask(items, save_workdir=save_workdir) + t = NothingTask(items, save_workdir=save_workdir) expected = save_workdir if save_workdir is not None else False - assert nothing_task._save_workdir is expected - workdir = nothing_task._workdir + assert t._save_workdir is expected + workdir = t._workdir assert workdir.parts[-1].startswith("tmp") assert workdir.is_absolute() is True assert workdir.is_dir() is True - del nothing_task - assert workdir.is_dir() is expected + t.cleanup_workdir() + assert workdir.exists() is expected @pytest.mark.parametrize("save_workdir", [False, True, None]) @@ -80,8 +80,8 @@ def test_workdir( assert workdir.parts[-1] == "test_task" assert workdir.is_absolute() is True assert workdir.is_dir() is True - del t - assert workdir.is_dir() is expected + t.cleanup_workdir() + assert workdir.exists() is expected def test_parameters(items: Dict[str, Any]) -> None: