From 4c6590973d502185bc20c9a118e15467cb0dd715 Mon Sep 17 00:00:00 2001 From: James Gebbie-Rayet Date: Fri, 12 Sep 2025 14:56:13 +0100 Subject: [PATCH 1/2] first pass at removing pylint global exceptions --- crossflow/clients.py | 18 ++++-------------- crossflow/config.py | 2 +- crossflow/filehandling.py | 30 ++++++++++++++---------------- crossflow/tasks.py | 39 +++++++++++++++++---------------------- pyproject.toml | 21 ++++++--------------- 5 files changed, 42 insertions(+), 68 deletions(-) diff --git a/crossflow/clients.py b/crossflow/clients.py index d1e4044..357348e 100644 --- a/crossflow/clients.py +++ b/crossflow/clients.py @@ -2,17 +2,12 @@ Clients.py: thin wrapper over dask client """ +from collections.abc import Iterable import glob import pickle import sys from dask.distributed import Client as DaskClient - -try: - from collections import Iterable -except ImportError: - from collections.abc import Iterable - from dask.distributed import Future from . import config @@ -26,7 +21,7 @@ class Client(DaskClient): """ def __init__(self, *args, **kwargs): - self.filehandler = FileHandler(config.stage_point) + self.filehandler = FileHandler(config.STAGE_POINT) super().__init__(*args, **kwargs) def upload(self, some_object): @@ -222,13 +217,8 @@ def map(self, func, *iterables, **kwargs): maxlen = 0 for iterable in iterables: if isinstance(iterable, (list, tuple)): - n_items = len(iterable) - if n_items > maxlen: - maxlen = n_items - for iterable in iterables: - if isinstance(iterable, (list, tuple)): - n_items = len(iterable) - if n_items != maxlen: + maxlen = max(maxlen, len(iterable)) + if len(iterable) != maxlen: raise ValueError("Error: not all iterables are same length") its.append(iterable) else: diff --git a/crossflow/config.py b/crossflow/config.py index d178717..e8b86ff 100644 --- a/crossflow/config.py +++ b/crossflow/config.py @@ -1 +1 @@ -stage_point = None +STAGE_POINT = None diff --git a/crossflow/filehandling.py b/crossflow/filehandling.py index e97b714..159d5c0 100644 --- a/crossflow/filehandling.py +++ b/crossflow/filehandling.py @@ -2,19 +2,7 @@ filehanding.py: this module provides classes for passing files between processes on distributed computing platforms that may not share a common file system. -""" - -import os -import os.path as op -import tempfile -import uuid -import zlib - -import fsspec - -from . import config -""" This module defines classes to handle files in distributed environments where filesyatems may not be shared. @@ -34,15 +22,25 @@ ... """ +import os +import os.path as op +import tempfile +import uuid +import zlib + +import fsspec + +from . import config + def set_stage_point(stage_point): - config.stage_point = stage_point + config.STAGE_POINT = stage_point -class FileHandler(object): +class FileHandler: def __init__(self, stage_point=None): if stage_point is None: - self.stage_point = config.stage_point + self.stage_point = config.STAGE_POINT else: self.stage_point = stage_point @@ -53,7 +51,7 @@ def create(self, path): return FileHandle(path, self.stage_point, must_exist=False) -class FileHandle(object): +class FileHandle: """ A portable container for a file. """ diff --git a/crossflow/tasks.py b/crossflow/tasks.py index 958e666..e08d217 100644 --- a/crossflow/tasks.py +++ b/crossflow/tasks.py @@ -29,17 +29,17 @@ def _gen_filenames(pattern, n_files): fieldwidth = int(log10(n_files)) + 1 if "*" in pattern: w = pattern.split("*") - template = "{}{{:0{}d}}{}".format(w[0], fieldwidth, w[1]) + template = f"{w[0]}{{:0{fieldwidth}d}}{w[1]}" else: w = pattern.split("?") if pattern.count("?") < fieldwidth: raise ValueError("Error - too many files for this pattern") - template = "{}{{:0{}d}}{}".format(w[0], pattern.count("?"), w[-1]) + template = f"{w[0]}{{:0{pattern.count('?')}d}}{w[-1]}" filenames = [template.format(i) for i in range(n_files)] return filenames -class SubprocessTask(object): +class SubprocessTask: """ A task that runs a command-line executable """ @@ -53,8 +53,8 @@ def __init__(self, template): self.inputs = [] self.outputs = [] self.constants = [] - self.STDOUT = None - self.filehandler = FileHandler(config.stage_point) + self.stdout = None + self.filehandler = FileHandler(config.STAGE_POINT) self.variables = [] for key in re.findall(r"{.*?}", self.template): @@ -69,8 +69,7 @@ def set_inputs(self, inputs): """ if not isinstance(inputs, list): raise TypeError( - "Error - inputs must be of type list," - " not of type {}".format(type(inputs)) + f"Error - inputs must be of type list, not of type {type(inputs)}" ) self.inputs = inputs @@ -80,8 +79,7 @@ def set_outputs(self, outputs): """ if not isinstance(outputs, list): raise TypeError( - "Error - outputs must be of type list," - " not of type {}".format(type(outputs)) + f"Error - outputs must be of type list, not of type {type(outputs)}" ) self.outputs = outputs @@ -120,9 +118,9 @@ def run(self, *args): outputs = [] td = tempfile.mkdtemp() var_dict = {} - for i in range(len(args)): + for i, arg in enumerate(args): if self.inputs[i] in self.variables: - var_dict[self.inputs[i]] = args[i] + var_dict[self.inputs[i]] = arg else: if isinstance(args[i], list): fnames = _gen_filenames(self.inputs[i], len(args[i])) @@ -147,10 +145,9 @@ def run(self, *args): d["value"].save(op.join(td, d["name"])) except AttributeError: var_dict[d["name"]] = d["value"] - cmd = self.template.format(**var_dict) try: result = subprocess.run( - cmd, + self.template.format(**var_dict), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -162,19 +159,19 @@ def run(self, *args): if DEBUGINFO not in self.outputs: raise result - self.STDOUT = result.stdout.decode() + self.stdout = result.stdout.decode() for outfile in self.outputs: if outfile not in [STDOUT, DEBUGINFO]: outfile = op.join(td, outfile) if "*" in outfile or "?" in outfile: - outf = glob.glob(outfile) - outf.sort() - outputs.append([self.filehandler.load(f) for f in outf]) + outputs.append( + [self.filehandler.load(f) for f in sorted(glob.glob(outfile))] + ) else: if op.exists(outfile): outputs.append(self.filehandler.load(outfile)) elif outfile == STDOUT: - outputs.append(self.STDOUT) + outputs.append(self.stdout) elif outfile == DEBUGINFO: outputs.append(result) else: @@ -188,7 +185,7 @@ def run(self, *args): return outputs -class FunctionTask(object): +class FunctionTask: def __init__(self, func): """ Arguments: @@ -199,7 +196,7 @@ def __init__(self, func): self.outputs = [] self.constants = {} self.tmpdir = None - self.filehandler = FileHandler(config.stage_point) + self.filehandler = FileHandler(config.STAGE_POINT) def __call__(self, *args): return self.run(*args) @@ -298,8 +295,6 @@ class XflowError(Exception): Base class for Crossflow exceptions. """ - pass - class CalledProcessError(XflowError): """ diff --git a/pyproject.toml b/pyproject.toml index 223f60c..8d4a722 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ testing = [ pre-commit = [ "pre-commit==3.7.1", - "pylint==3.2.5" + "pylint==3.3.8" ] docs = [ "sphinx", @@ -62,25 +62,16 @@ docs = [ [tool.pylint.messages_control] disable = [ - "too-many-nested-blocks", "no-else-return", - "too-many-branches", - "too-many-statements", + "missing-class-docstring", "missing-function-docstring", - "consider-using-max-builtin", - "useless-object-inheritance", - "no-member", - "consider-using-enumerate", - "consider-using-f-string", - "invalid-name", "missing-module-docstring", + "no-member", "consider-using-dict-items", - "missing-class-docstring", - "deprecated-class", - "pointless-string-statement", - "too-many-locals", + "too-many-branches", + "too-many-nested-blocks", + "too-many-statements", "raise-missing-from", - "unnecessary-pass", ] [tool.pylint.format] From 869532f98648cb74863bcc7b89dc5db26f20a1bf Mon Sep 17 00:00:00 2001 From: James Gebbie-Rayet Date: Fri, 12 Sep 2025 15:33:26 +0100 Subject: [PATCH 2/2] move other pylint disabled features to specific code lines and functions rather than at project level --- crossflow/clients.py | 15 ++++++++++----- crossflow/config.py | 4 ++++ crossflow/filehandling.py | 38 +++++++++++++++++++++++++++++++++++--- crossflow/tasks.py | 13 ++++++++++--- pyproject.toml | 13 +------------ 5 files changed, 60 insertions(+), 23 deletions(-) diff --git a/crossflow/clients.py b/crossflow/clients.py index 357348e..03afcf5 100644 --- a/crossflow/clients.py +++ b/crossflow/clients.py @@ -1,5 +1,5 @@ """ -Clients.py: thin wrapper over dask client +clients.py: thin wrapper over dask client """ from collections.abc import Iterable @@ -54,17 +54,17 @@ def _futurize(self, item): Upload an item, if it's not already a Future """ - if isinstance(item, Future): + if isinstance(item, Future): # pylint: disable=no-else-return return item else: - if isinstance(item, list): + if isinstance(item, list): # pylint: disable=no-else-return for i, j in enumerate(item): if not isinstance(j, Future): if self._rough_size(j) > 10000: item[i] = self.upload(j) return item else: - if self._rough_size(item) > 10000: + if self._rough_size(item) > 10000: # pylint: disable=no-else-return return self.upload(item) else: return item @@ -92,6 +92,9 @@ def _unpack(self, task, future): return tuple(outputs) def _filehandlify(self, args): + # pylint: disable=too-many-nested-blocks + # pylint: disable=too-many-branches + # pylint: disable=too-many-statements """ work through an argument list, converting paths to filehandles where possible. @@ -187,7 +190,9 @@ def submit(self, func, *args, **kwargs): else: newargs = self._futurize(newargs) - if isinstance(func, (SubprocessTask, FunctionTask)): + if isinstance( # pylint: disable=no-else-return + func, (SubprocessTask, FunctionTask) + ): kwargs["pure"] = False future = super().submit(func.run, *newargs, **kwargs) return self._unpack(func, future) diff --git a/crossflow/config.py b/crossflow/config.py index e8b86ff..e264e8e 100644 --- a/crossflow/config.py +++ b/crossflow/config.py @@ -1 +1,5 @@ +""" +config.py: Allow configurable options. +""" + STAGE_POINT = None diff --git a/crossflow/filehandling.py b/crossflow/filehandling.py index 159d5c0..5bbde0c 100644 --- a/crossflow/filehandling.py +++ b/crossflow/filehandling.py @@ -34,10 +34,18 @@ def set_stage_point(stage_point): + """ + A method to set the stage_point variable. + """ + config.STAGE_POINT = stage_point class FileHandler: + """ + Handle file operations + """ + def __init__(self, stage_point=None): if stage_point is None: self.stage_point = config.STAGE_POINT @@ -45,9 +53,17 @@ def __init__(self, stage_point=None): self.stage_point = stage_point def load(self, path): + """ + Method to load file. + """ + return FileHandle(path, self.stage_point, must_exist=True) def create(self, path): + """ + Method to load file. + """ + return FileHandle(path, self.stage_point, must_exist=False) @@ -77,7 +93,7 @@ def __init__(self, path, stage_point, must_exist=True): with source as s: with self.store as d: d.write(s.read()) - source.close() + source.close() # pylint: disable=no-member self.store.close() self.store.mode = "rb" else: @@ -114,7 +130,7 @@ def save(self, path): with source as s: with dest as d: d.write(s.read()) - dest.close() + dest.close() # pylint: disable=no-member return path def __fspath__(self): @@ -124,7 +140,7 @@ def __fspath__(self): """ if self.local_path is None: self.local_path = os.path.join(tempfile.gettempdir(), self.uid) - if not op.exists(self.local_path): + if not op.exists(self.local_path): # pylint: disable=no-else-return return self.save(self.local_path) else: return self.local_path @@ -139,6 +155,10 @@ def __del__(self): pass def read_binary(self): + """ + A method for reading binary file formats + """ + source = self.store if source is None: return "".encode("utf-8") @@ -151,9 +171,17 @@ def read_binary(self): return data def read_text(self): + """ + A wrapper for reading binary formatted text. + """ + return self.read_binary().decode() def write_binary(self, data): + """ + A method for writing binary file formats + """ + compressed_data = zlib.compress(data) if self.staging_path is None: self.store = compressed_data @@ -164,4 +192,8 @@ def write_binary(self, data): self.store.mode = "rb" def write_text(self, text): + """ + A wrapper for writing binary formatted text. + """ + self.write_binary(text.encode("utf-8")) diff --git a/crossflow/tasks.py b/crossflow/tasks.py index e08d217..eefa778 100644 --- a/crossflow/tasks.py +++ b/crossflow/tasks.py @@ -1,5 +1,5 @@ """ -Crossflow tasks: wrappers round subprocess calls and python functions for +tasks.py: wrappers round subprocess calls and python functions for execution on a crossflow cluster """ @@ -106,6 +106,8 @@ def copy(self): return copy.deepcopy(self) def run(self, *args): + # pylint: disable=too-many-branches + # pylint: disable=too-many-statements """ Run the task with the given inputs. Args: @@ -157,7 +159,7 @@ def run(self, *args): except subprocess.CalledProcessError as e: result = CalledProcessError(e) if DEBUGINFO not in self.outputs: - raise result + raise result # pylint: disable=raise-missing-from self.stdout = result.stdout.decode() for outfile in self.outputs: @@ -186,6 +188,10 @@ def run(self, *args): class FunctionTask: + """ + A task that runs a function + """ + def __init__(self, func): """ Arguments: @@ -229,6 +235,7 @@ def copy(self): return copy.copy(self) def run(self, *args): + # pylint: disable=too-many-branches """ Run the task/function with the given arguments. @@ -262,7 +269,7 @@ def run(self, *args): indict[self.inputs[i]] = v.save(os.path.basename(v.path)) except AttributeError: indict[self.inputs[i]] = v - for k in self.constants: + for k in self.constants: # pylint: disable=consider-using-dict-items try: indict[k] = self.constants[k].save( os.path.basename(self.constants[k].path) diff --git a/pyproject.toml b/pyproject.toml index 8d4a722..20c73f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,18 +61,7 @@ docs = [ ] [tool.pylint.messages_control] -disable = [ - "no-else-return", - "missing-class-docstring", - "missing-function-docstring", - "missing-module-docstring", - "no-member", - "consider-using-dict-items", - "too-many-branches", - "too-many-nested-blocks", - "too-many-statements", - "raise-missing-from", -] +disable = [] [tool.pylint.format] max-line-length = 120