From e8337220288324d73ccfe487f00e8ae4dd87c487 Mon Sep 17 00:00:00 2001
From: Sara Veldhoen <s.veldhoen@beeldengeluid.nl>
Date: Fri, 15 Mar 2024 14:44:54 +0100
Subject: [PATCH] Black & flake8

---
 base_util.py           |  6 ++----
 io_util.py             | 11 ++++-------
 main_data_processor.py | 33 +++++++++++++++++++--------------
 models.py              | 14 +++++++++-----
 4 files changed, 34 insertions(+), 30 deletions(-)
diff --git a/base_util.py b/base_util.py
index 4108506..5c452e5 100644
--- a/base_util.py
+++ b/base_util.py
@@ -1,7 +1,5 @@
-from typing import Any, List
+from typing import Any
 from yacs.config import CfgNode
-import os
-from pathlib import Path
 import logging
 
 LOG_FORMAT = "%(asctime)s|%(levelname)s|%(process)d|%(module)s|%(funcName)s|%(lineno)d|%(message)s"
@@ -16,7 +14,7 @@ def validate_config(config: CfgNode) -> bool:
     FIRST the home dir config is applied (~/.DANE/config.yml),
     THEN the local base_config.yml will overwrite anything specified
     THEN the local config.yml will overwrite anything specified there.
-    Also Consult https://github.com/beeldengeluid/dane-example-worker/wiki/Config. 
+    Also Consult https://github.com/beeldengeluid/dane-example-worker/wiki/Config.
     Most of the config listed is related to DANE and do not need to be altered when
     developing locally, except the last part (settings for this worker specifically).
     """
diff --git a/io_util.py b/io_util.py
index 03742ad..1f63c7a 100644
--- a/io_util.py
+++ b/io_util.py
@@ -34,11 +34,10 @@ def validate_data_dirs() -> bool:
         "output": Path(get_base_output_dir())
         # TODO: perhaps add model dir
     }
-    base = dirs['input'].parent.absolute()
+    base = dirs["input"].parent.absolute()
     if not os.path.exists(base):
         logger.info(
-            f"{base} does not exist."
-            "Make sure BASE_MOUNT_DIR exists before retrying"
+            f"{base} does not exist." "Make sure BASE_MOUNT_DIR exists before retrying"
         )
         return False
 
@@ -135,9 +134,7 @@ def source_id_from_s3_uri(s3_uri: str) -> str:
 
     e.g. s3://<bucket>/assets/<source_id>/<basename>__<source_id>.tar.gz
     """
-    fn = os.path.basename(
-        s3_uri
-    )
+    fn = os.path.basename(s3_uri)
     fn = fn.replace(".tar.gz", "")
     source_id = "__".join(fn.split("__")[1:])
     return source_id
@@ -176,7 +173,7 @@ def _is_valid_output(output_dir: str) -> bool:
     valid = [
         os.path.exists(os.path.join(output_dir, outputtype.value))
         for outputtype in to_check
-        ]
+    ]
     return all(valid)
 
 
diff --git a/main_data_processor.py b/main_data_processor.py
index 8045f8f..ce4987e 100644
--- a/main_data_processor.py
+++ b/main_data_processor.py
@@ -6,7 +6,6 @@
 from io_util import (
     get_base_output_dir,
     get_output_file_path,
-    get_output_file_name,
     get_s3_output_file_uri,
     generate_output_dirs,
     get_source_id_from_tar,
@@ -34,8 +33,16 @@
 DANE_WORKER_ID = "dane-emotion-recognition-worker"
 
 
-# triggered by running: python worker.py --run-test-file
 def run(input_file_path: str) -> Tuple[CallbackResponse, Optional[Provenance]]:
+    """Main function to start the process.
+
+    Triggered by running: python worker.py --run-test-file
+    Params:
+            input_file_path: where to read input from
+    Returns:
+            CallbackResponse: the main processing result
+            Provenance: a Provenance object describing the processing
+    """
     # there must be an input file
     if not input_file_path:
         logger.error("input file empty")
@@ -50,9 +57,7 @@ def run(input_file_path: str) -> Tuple[CallbackResponse, Optional[Provenance]]:
     # TODO: add proper name and description
     top_level_provenance = generate_initial_provenance(
         name="",
-        description=(
-            ""
-        ),
+        description=(""),
         input_data={"input_file_path": input_file_path},
         parameters=dict(cfg.WORKER_SETTINGS),
         software_version=obtain_software_versions(DANE_WORKER_ID),
@@ -119,27 +124,27 @@ def apply_model(
     feature_extraction_input: ThisWorkerInput,
 ) -> ThisWorkerOutput:
     logger.info("Starting model application")
-    start = time.time()*1000  # convert to ms
-    with open(feature_extraction_input.input_file_path, 'r') as f:
+    start = time.time() * 1000  # convert to ms
+    with open(feature_extraction_input.input_file_path, "r") as f:
         cnt = len(f.readline().split())
     destination = get_output_file_path(
-            feature_extraction_input.source_id, OutputType.FOOBAR
-            )
-    with open(destination, 'w') as f:
+        feature_extraction_input.source_id, OutputType.FOOBAR
+    )
+    with open(destination, "w") as f:
         for i in range(cnt):
             f.write("Hello world")
     time.sleep(3)  # wait 3 seconds
-    end = time.time()*1000  # convert to ms
+    end = time.time() * 1000  # convert to ms
 
     model_application_provenance = Provenance(
         activity_name="hello world\n",
         activity_description="some dummy processing",
-        input_data='',  # TODO: what what
+        input_data="",  # TODO: what what
         start_time_unix=start,
         parameters={},
-        software_version='',
+        software_version="",
         output_data={},
-        processing_time_ms=end-start
+        processing_time_ms=end - start,
     )
 
     if not model_application_provenance:
diff --git a/models.py b/models.py
index bb1d465..e0a7cf8 100644
--- a/models.py
+++ b/models.py
@@ -5,22 +5,25 @@
 
 
 class CallbackResponse(TypedDict):
-    ''' Response returned by callback(), with state and message'''
+    """Response returned by callback(), with state and message"""
+
     state: int
     message: str
 
 
 class OutputType(Enum):
-    ''' Types of output this worker (possibly) provides (depending on config)'''
+    """Types of output this worker (possibly) provides (depending on config)"""
+
     FOOBAR = "foobar"
     PROVENANCE = "provenance"  # produced by provenance.py
 
 
 @dataclass
 class ThisWorkerInput:
-    ''' Dataclass that specifies any input this worker depends on.
+    """Dataclass that specifies any input this worker depends on.
+
+    state (+message) denotes whether everything is good to go"""
 
-    state (+message) denotes whether everything is good to go'''
     state: int  # HTTP status code
     message: str  # error/success message
     source_id: str = ""  # <program ID>__<carrier ID>
@@ -30,7 +33,8 @@ class ThisWorkerInput:
 
 @dataclass
 class ThisWorkerOutput:
-    ''' Dataclass that specifies any output this worker will produce'''
+    """Dataclass that specifies any output this worker will produce"""
+
     state: int  # HTTP status code
     message: str  # error/success message
     output_file_path: str = ""  # where to store the worker's output