From e8337220288324d73ccfe487f00e8ae4dd87c487 Mon Sep 17 00:00:00 2001 From: Sara Veldhoen Date: Fri, 15 Mar 2024 14:44:54 +0100 Subject: [PATCH] Black & flake8 --- base_util.py | 6 ++---- io_util.py | 11 ++++------- main_data_processor.py | 33 +++++++++++++++++++-------------- models.py | 14 +++++++++----- 4 files changed, 34 insertions(+), 30 deletions(-) diff --git a/base_util.py b/base_util.py index 4108506..5c452e5 100644 --- a/base_util.py +++ b/base_util.py @@ -1,7 +1,5 @@ -from typing import Any, List +from typing import Any from yacs.config import CfgNode -import os -from pathlib import Path import logging LOG_FORMAT = "%(asctime)s|%(levelname)s|%(process)d|%(module)s|%(funcName)s|%(lineno)d|%(message)s" @@ -16,7 +14,7 @@ def validate_config(config: CfgNode) -> bool: FIRST the home dir config is applied (~/.DANE/config.yml), THEN the local base_config.yml will overwrite anything specified THEN the local config.yml will overwrite anything specified there. - Also Consult https://github.com/beeldengeluid/dane-example-worker/wiki/Config. + Also Consult https://github.com/beeldengeluid/dane-example-worker/wiki/Config. Most of the config listed is related to DANE and do not need to be altered when developing locally, except the last part (settings for this worker specifically). """ diff --git a/io_util.py b/io_util.py index 03742ad..1f63c7a 100644 --- a/io_util.py +++ b/io_util.py @@ -34,11 +34,10 @@ def validate_data_dirs() -> bool: "output": Path(get_base_output_dir()) # TODO: perhaps add model dir } - base = dirs['input'].parent.absolute() + base = dirs["input"].parent.absolute() if not os.path.exists(base): logger.info( - f"{base} does not exist." - "Make sure BASE_MOUNT_DIR exists before retrying" + f"{base} does not exist." "Make sure BASE_MOUNT_DIR exists before retrying" ) return False @@ -135,9 +134,7 @@ def source_id_from_s3_uri(s3_uri: str) -> str: e.g. s3:///assets//__.tar.gz """ - fn = os.path.basename( - s3_uri - ) + fn = os.path.basename(s3_uri) fn = fn.replace(".tar.gz", "") source_id = "__".join(fn.split("__")[1:]) return source_id @@ -176,7 +173,7 @@ def _is_valid_output(output_dir: str) -> bool: valid = [ os.path.exists(os.path.join(output_dir, outputtype.value)) for outputtype in to_check - ] + ] return all(valid) diff --git a/main_data_processor.py b/main_data_processor.py index 8045f8f..ce4987e 100644 --- a/main_data_processor.py +++ b/main_data_processor.py @@ -6,7 +6,6 @@ from io_util import ( get_base_output_dir, get_output_file_path, - get_output_file_name, get_s3_output_file_uri, generate_output_dirs, get_source_id_from_tar, @@ -34,8 +33,16 @@ DANE_WORKER_ID = "dane-emotion-recognition-worker" -# triggered by running: python worker.py --run-test-file def run(input_file_path: str) -> Tuple[CallbackResponse, Optional[Provenance]]: + """Main function to start the process. + + Triggered by running: python worker.py --run-test-file + Params: + input_file_path: where to read input from + Returns: + CallbackResponse: the main processing result + Provenance: a Provenance object describing the processing + """ # there must be an input file if not input_file_path: logger.error("input file empty") @@ -50,9 +57,7 @@ def run(input_file_path: str) -> Tuple[CallbackResponse, Optional[Provenance]]: # TODO: add proper name and description top_level_provenance = generate_initial_provenance( name="", - description=( - "" - ), + description=(""), input_data={"input_file_path": input_file_path}, parameters=dict(cfg.WORKER_SETTINGS), software_version=obtain_software_versions(DANE_WORKER_ID), @@ -119,27 +124,27 @@ def apply_model( feature_extraction_input: ThisWorkerInput, ) -> ThisWorkerOutput: logger.info("Starting model application") - start = time.time()*1000 # convert to ms - with open(feature_extraction_input.input_file_path, 'r') as f: + start = time.time() * 1000 # convert to ms + with open(feature_extraction_input.input_file_path, "r") as f: cnt = len(f.readline().split()) destination = get_output_file_path( - feature_extraction_input.source_id, OutputType.FOOBAR - ) - with open(destination, 'w') as f: + feature_extraction_input.source_id, OutputType.FOOBAR + ) + with open(destination, "w") as f: for i in range(cnt): f.write("Hello world") time.sleep(3) # wait 3 seconds - end = time.time()*1000 # convert to ms + end = time.time() * 1000 # convert to ms model_application_provenance = Provenance( activity_name="hello world\n", activity_description="some dummy processing", - input_data='', # TODO: what what + input_data="", # TODO: what what start_time_unix=start, parameters={}, - software_version='', + software_version="", output_data={}, - processing_time_ms=end-start + processing_time_ms=end - start, ) if not model_application_provenance: diff --git a/models.py b/models.py index bb1d465..e0a7cf8 100644 --- a/models.py +++ b/models.py @@ -5,22 +5,25 @@ class CallbackResponse(TypedDict): - ''' Response returned by callback(), with state and message''' + """Response returned by callback(), with state and message""" + state: int message: str class OutputType(Enum): - ''' Types of output this worker (possibly) provides (depending on config)''' + """Types of output this worker (possibly) provides (depending on config)""" + FOOBAR = "foobar" PROVENANCE = "provenance" # produced by provenance.py @dataclass class ThisWorkerInput: - ''' Dataclass that specifies any input this worker depends on. + """Dataclass that specifies any input this worker depends on. + + state (+message) denotes whether everything is good to go""" - state (+message) denotes whether everything is good to go''' state: int # HTTP status code message: str # error/success message source_id: str = "" # __ @@ -30,7 +33,8 @@ class ThisWorkerInput: @dataclass class ThisWorkerOutput: - ''' Dataclass that specifies any output this worker will produce''' + """Dataclass that specifies any output this worker will produce""" + state: int # HTTP status code message: str # error/success message output_file_path: str = "" # where to store the worker's output