diff --git a/README.md b/README.md index 5addbbd9..7b9877b2 100644 --- a/README.md +++ b/README.md @@ -69,13 +69,13 @@ if __name__ == "__main__": description='This is part 1 of a test') # Description # Upload the code - run.save('training.py', 'code') + run.save_file('training.py', 'code') # Upload an input file - run.save('params.in', 'input') + run.save_file('params.in', 'input') # Add an alert (the alert definition will be created if necessary) - run.add_alert(name='loss-too-high', # Name + run.create_alert(name='loss-too-high', # Name source='metrics', # Source rule='is above', # Rule metric='loss', # Metric @@ -96,7 +96,7 @@ if __name__ == "__main__": ... # Upload an output file - run.save('output.cdf', 'output') + run.save_file('output.cdf', 'output') # If we weren't using a context manager we'd need to end the run # run.close() diff --git a/examples/GeometryOptimisation/bluemira_simvue_geometry_optimisation.py b/examples/GeometryOptimisation/bluemira_simvue_geometry_optimisation.py index 4868cf46..264ed03a 100644 --- a/examples/GeometryOptimisation/bluemira_simvue_geometry_optimisation.py +++ b/examples/GeometryOptimisation/bluemira_simvue_geometry_optimisation.py @@ -171,5 +171,5 @@ def my_minimise_length(vector, grad, parameterisation, ad_args=None): # Here we're minimising the length, within the bounds of our PrincetonD parameterisation, # so we'd expect that x1 goes to its upper bound, and x2 goes to its lower bound. -run.save("bluemira_simvue_geometry_optimisation.py", "code") +run.save_file("bluemira_simvue_geometry_optimisation.py", "code") run.close() diff --git a/examples/PyTorch/main.py b/examples/PyTorch/main.py index ed324d86..2fd55bf4 100644 --- a/examples/PyTorch/main.py +++ b/examples/PyTorch/main.py @@ -205,7 +205,7 @@ def main(): scheduler.step() if args.save_model: - run.save(model.state_dict(), "output", name="mnist_cnn.pt") + run.save_file(model.state_dict(), "output", name="mnist_cnn.pt") run.close() diff --git a/examples/SU2/SU2.py b/examples/SU2/SU2.py index 9e189095..51740867 100644 --- a/examples/SU2/SU2.py +++ b/examples/SU2/SU2.py @@ -56,7 +56,7 @@ filetype = None if input_file.endswith(".cfg"): filetype = "text/plain" - run.save(input_file, "input", filetype) + run.save_file(input_file, "input", filetype) running = True latest = [] @@ -106,6 +106,6 @@ # Save output files for output_file in OUTPUT_FILES: - run.save(output_file, "output") + run.save_file(output_file, "output") run.close() diff --git a/examples/Tensorflow/dynamic_rnn.py b/examples/Tensorflow/dynamic_rnn.py index 1eefb709..0a5339c7 100644 --- a/examples/Tensorflow/dynamic_rnn.py +++ b/examples/Tensorflow/dynamic_rnn.py @@ -45,7 +45,7 @@ "computation over sequences with variable length. This example is using a toy dataset to " "classify linear sequences. The generated sequences have variable length.", ) - run.save("dynamic_rnn.py", "code") + run.save_file("dynamic_rnn.py", "code") # ==================== # TOY DATA GENERATOR diff --git a/simvue/client.py b/simvue/client.py index 8ec2cbe5..1d41d30c 100644 --- a/simvue/client.py +++ b/simvue/client.py @@ -19,7 +19,7 @@ to_dataframe, parse_run_set_metrics, ) -from .serialization import Deserializer +from .serialization import deserialize_data from .types import DeserializedContent from .utilities import check_extra, get_auth @@ -608,7 +608,7 @@ def get_artifact( response = requests.get(url, timeout=DOWNLOAD_TIMEOUT) response.raise_for_status() - content: typing.Optional[DeserializedContent] = Deserializer().deserialize( + content: typing.Optional[DeserializedContent] = deserialize_data( response.content, mimetype, allow_pickle ) diff --git a/simvue/executor.py b/simvue/executor.py index d4013179..d156c2a1 100644 --- a/simvue/executor.py +++ b/simvue/executor.py @@ -167,10 +167,10 @@ def callback_function(status_code: int, std_out: str, std_err: str) -> None: ) if script: - self._runner.save(filename=script, category="code") + self._runner.save_file(file_path=script, category="code") if input_file: - self._runner.save(filename=input_file, category="input") + self._runner.save_file(file_path=input_file, category="input") _command: typing.List[str] = [] @@ -284,11 +284,11 @@ def _save_output(self) -> None: for proc_id in self._exit_codes.keys(): # Only save the file if the contents are not empty if self._std_err[proc_id]: - self._runner.save( + self._runner.save_file( f"{self._runner.name}_{proc_id}.err", category="output" ) if self._std_out[proc_id]: - self._runner.save( + self._runner.save_file( f"{self._runner.name}_{proc_id}.out", category="output" ) diff --git a/simvue/run.py b/simvue/run.py index 695ae8d8..77e6dcd9 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -37,7 +37,7 @@ from .factory.proxy import Simvue from .metrics import get_gpu_metrics, get_process_cpu, get_process_memory from .models import RunInput -from .serialization import Serializer +from .serialization import serialize_object from .system import get_system from .metadata import git_info from .utilities import ( @@ -159,7 +159,7 @@ def __exit__( else: if self._active: self.log_event(f"{exc_type.__name__}: {value}") - if exc_type.__name__ in ("KeyboardInterrupt") and self._active: + if exc_type.__name__ in ("KeyboardInterrupt",) and self._active: self.set_status("terminated") else: if traceback and self._active: @@ -982,17 +982,87 @@ def log_metrics( @check_run_initialised @skip_if_failed("_aborted", "_suppress_errors", False) @pydantic.validate_call - def save( + def save_object( self, - filename: str, + obj: typing.Any, category: typing.Literal["input", "output", "code"], - filetype: typing.Optional[str] = None, - preserve_path: bool = False, name: typing.Optional[str] = None, allow_pickle: bool = False, ) -> bool: + """Save an object to the Simvue server + + Parameters + ---------- + obj : typing.Any + object to serialize and send to the server + category : Literal['input', 'output', 'code'] + category of file with respect to this run + name : str, optional + name to associate with this object, by default None + allow_pickle : bool, optional + whether to allow pickling if all other serialization types fail, by default False + + Returns + ------- + bool + whether object upload was successful """ - Upload file or object + serialized = serialize_object(obj, allow_pickle) + + if not serialized or not (pickled := serialized[0]): + self._error(f"Failed to serialize '{obj}'") + return False + + data_type = serialized[1] + + if not data_type and not allow_pickle: + self._error("Unable to save Python object, set allow_pickle to True") + return False + + data: dict[str, typing.Any] = { + "pickled": pickled, + "type": data_type, + "checksum": calculate_sha256(pickled, False), + "originalPath": "", + "size": sys.getsizeof(pickled), + "name": name, + "run": self._name, + "category": category, + "storage": self._storage_id, + } + + # Register file + return self._simvue is not None and self._simvue.save_file(data) is not None + + @skip_if_failed("_aborted", "_suppress_errors", False) + @pydantic.validate_call + def save_file( + self, + file_path: pydantic.FilePath, + category: typing.Literal["input", "output", "code"], + filetype: typing.Optional[str] = None, + preserve_path: bool = False, + name: typing.Optional[str] = None, + ) -> bool: + """Upload file to the server + + Parameters + ---------- + file_path : pydantic.FilePath + path to the file to upload + category : Literal['input', 'output', 'code'] + category of file with respect to this run + filetype : str, optional + the MIME file type else this is deduced, by default None + preserve_path : bool, optional + whether to preserve the path during storage, by default False + name : str, optional + name to associate with this file, by default None + + Returns + ------- + bool + whether the upload was successful """ if self._mode == "disabled": return True @@ -1005,96 +1075,48 @@ def save( self._error("Cannot upload output files for runs in the created state") return False - is_file: bool = False + mimetypes.init() + mimetypes_valid = ["application/vnd.plotly.v1+json"] + mimetypes_valid += list(mimetypes.types_map.values()) - if isinstance(filename, str): - if not os.path.isfile(filename): - self._error(f"File {filename} does not exist") - return False - else: - is_file = True - - if filetype: - mimetypes_valid = ["application/vnd.plotly.v1+json"] - mimetypes.init() - for _, value in mimetypes.types_map.items(): - mimetypes_valid.append(value) - - if filetype not in mimetypes_valid: - self._error("Invalid MIME type specified") - return False - - data: dict[str, typing.Any] = {} - - if preserve_path: - data["name"] = filename - if data["name"].startswith("./"): - data["name"] = data["name"][2:] - elif is_file: - data["name"] = os.path.basename(filename) - - if name: - data["name"] = name - - data["run"] = self._name - data["category"] = category + if filetype and filetype not in mimetypes_valid: + self._error(f"Invalid MIME type '{filetype}' specified") + return False - if is_file: - data["size"] = os.path.getsize(filename) - data["originalPath"] = os.path.abspath( - os.path.expanduser(os.path.expandvars(filename)) - ) - data["checksum"] = calculate_sha256(filename, is_file) + stored_file_name: str = f"{file_path}" - if data["size"] == 0: - click.secho( - "WARNING: saving zero-sized files not currently supported", - bold=True, - fg="yellow", - ) - return True + if preserve_path and stored_file_name.startswith("./"): + stored_file_name = stored_file_name[2:] + elif not preserve_path: + stored_file_name = os.path.basename(file_path) # Determine mimetype - mimetype = None - if not filetype and is_file: - mimetypes.init() - mimetype = mimetypes.guess_type(filename)[0] - if not mimetype: - mimetype = "application/octet-stream" - elif is_file: - mimetype = filetype - - if mimetype: - data["type"] = mimetype - - if not is_file: - serialized = Serializer().serialize(filename, allow_pickle) - - if not serialized or not (pickled := serialized[0]): - self._error(f"Failed to serialize '{filename}'") - return False - - data_type = serialized[1] - - data["pickled"] = pickled - data["type"] = data_type - - if not data["type"] and not allow_pickle: - self._error("Unable to save Python object, set allow_pickle to True") - return False + if not (mimetype := filetype): + mimetype = mimetypes.guess_type(file_path)[0] or "application/octet-stream" - data["checksum"] = calculate_sha256(pickled, False) - data["originalPath"] = "" - data["size"] = sys.getsizeof(pickled) + data: dict[str, typing.Any] = { + "name": name or stored_file_name, + "run": self._name, + "type": mimetype, + "storage": self._storage_id, + "category": category, + "size": (file_size := os.path.getsize(file_path)), + "originalPath": os.path.abspath( + os.path.expanduser(os.path.expandvars(file_path)) + ), + "checksum": calculate_sha256(f"{file_path}", True), + } - if self._storage_id: - data["storage"] = self._storage_id + if not file_size: + click.secho( + "WARNING: saving zero-sized files not currently supported", + bold=True, + fg="yellow", + ) + return True # Register file - if not self._simvue.save_file(data): - return False - - return True + return self._simvue.save_file(data) is not None @check_run_initialised @skip_if_failed("_aborted", "_suppress_errors", False) @@ -1129,7 +1151,7 @@ def save_directory( for dirpath, _, filenames in directory.walk(): for filename in filenames: if (full_path := dirpath.joinpath(filename)).is_file(): - self.save(f"{full_path}", category, filetype, preserve_path) + self.save_file(full_path, category, filetype, preserve_path) return True @@ -1153,7 +1175,7 @@ def save_all( for item in items: if item.is_file(): - save_file = self.save(f"{item}", category, filetype, preserve_path) + save_file = self.save_file(item, category, filetype, preserve_path) elif item.is_dir(): save_file = self.save_directory(item, category, filetype, preserve_path) else: diff --git a/simvue/serialization.py b/simvue/serialization.py index d77ae3eb..c51847e6 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -1,23 +1,31 @@ +""" +Object Serialization +==================== + +Contains serializers for storage of objects on the Simvue server +""" + +import typing import pickle import pandas +import json import numpy from io import BytesIO -from .utilities import check_extra - +if typing.TYPE_CHECKING: + from pandas import DataFrame + from plotly.graph_objects import Figure + from torch import Tensor + from typing_extensions import Buffer + from .types import DeserializedContent -class Serializer: - def serialize(self, data, allow_pickle=False): - serializer = get_serializer(data, allow_pickle) - if serializer: - return serializer(data) - return None, None +from .utilities import check_extra -def _is_torch_tensor(data): +def _is_torch_tensor(data: typing.Any) -> bool: """ - Check if a dictionary is a PyTorch tensor or state dict + Check if value is a PyTorch tensor or state dict """ module_name = data.__class__.__module__ class_name = data.__class__.__name__ @@ -37,71 +45,86 @@ def _is_torch_tensor(data): return False -def get_serializer(data, allow_pickle): - """ - Determine which serializer to use +def serialize_object( + data: typing.Any, allow_pickle: bool +) -> typing.Optional[tuple[str, str]]: + """Determine which serializer to use for the given object + + Parameters + ---------- + data : typing.Any + object to serialize + allow_pickle : bool + whether pickling is allowed + + Returns + ------- + Callable[[typing.Any], tuple[str, str]] + the serializer to use """ module_name = data.__class__.__module__ class_name = data.__class__.__name__ if module_name == "plotly.graph_objs._figure" and class_name == "Figure": - return _serialize_plotly_figure + return _serialize_plotly_figure(data) elif module_name == "matplotlib.figure" and class_name == "Figure": - return _serialize_matplotlib_figure + return _serialize_matplotlib_figure(data) elif module_name == "numpy" and class_name == "ndarray": - return _serialize_numpy_array + return _serialize_numpy_array(data) elif module_name == "pandas.core.frame" and class_name == "DataFrame": - return _serialize_dataframe + return _serialize_dataframe(data) elif _is_torch_tensor(data): - return _serialize_torch_tensor + return _serialize_torch_tensor(data) elif module_name == "builtins" and class_name == "module" and not allow_pickle: try: - import matplotlib + import matplotlib.pyplot if data == matplotlib.pyplot: - return _serialize_matplotlib + return _serialize_matplotlib(data) except ImportError: pass + elif serialized := _serialize_json(data): + return serialized if allow_pickle: - return _serialize_pickle + return _serialize_pickle(data) return None @check_extra("plot") -def _serialize_plotly_figure(data): +def _serialize_plotly_figure(data: typing.Any) -> typing.Optional[tuple[str, str]]: try: import plotly except ImportError: - return + return None mimetype = "application/vnd.plotly.v1+json" - data = plotly.io.to_json(data, "json") + data = plotly.io.to_json(data, engine="json") return data, mimetype @check_extra("plot") -def _serialize_matplotlib(data): +def _serialize_matplotlib(data: typing.Any) -> typing.Optional[tuple[str, str]]: try: import plotly except ImportError: return None mimetype = "application/vnd.plotly.v1+json" - data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data.gcf()), "json") + data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data.gcf()), engine="json") return data, mimetype @check_extra("plot") -def _serialize_matplotlib_figure(data): +def _serialize_matplotlib_figure(data: typing.Any) -> typing.Optional[tuple[str, str]]: try: import plotly except ImportError: return None mimetype = "application/vnd.plotly.v1+json" - data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data), "json") + data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data), engine="json") return data, mimetype -def _serialize_numpy_array(data): +def _serialize_numpy_array(data: typing.Any) -> typing.Optional[tuple[str, str]]: mimetype = "application/vnd.simvue.numpy.v1" mfile = BytesIO() numpy.save(mfile, data, allow_pickle=False) @@ -110,7 +133,7 @@ def _serialize_numpy_array(data): return data, mimetype -def _serialize_dataframe(data): +def _serialize_dataframe(data: typing.Any) -> typing.Optional[tuple[str, str]]: mimetype = "application/vnd.simvue.df.v1" mfile = BytesIO() data.to_csv(mfile) @@ -120,7 +143,7 @@ def _serialize_dataframe(data): @check_extra("torch") -def _serialize_torch_tensor(data): +def _serialize_torch_tensor(data: typing.Any) -> typing.Optional[tuple[str, str]]: try: import torch except ImportError: @@ -135,41 +158,46 @@ def _serialize_torch_tensor(data): return data, mimetype -def _serialize_pickle(data): - mimetype = "application/octet-stream" - data = pickle.dumps(data) +def _serialize_json(data: typing.Any) -> typing.Optional[tuple[str, str]]: + mimetype = "application/json" + try: + data = json.dumps(data) + except TypeError: + return None return data, mimetype -class Deserializer: - def deserialize(self, data, mimetype, allow_pickle=False): - deserializer = get_deserializer(mimetype, allow_pickle) - if deserializer: - return deserializer(data) - return None +def _serialize_pickle(data: typing.Any) -> typing.Optional[tuple[str, str]]: + mimetype = "application/octet-stream" + data = pickle.dumps(data) + return data, mimetype -def get_deserializer(mimetype, allow_pickle): +def deserialize_data( + data: "Buffer", mimetype: str, allow_pickle: bool +) -> typing.Optional["DeserializedContent"]: """ Determine which deserializer to use """ if mimetype == "application/vnd.plotly.v1+json": - return _deserialize_plotly_figure + return _deserialize_plotly_figure(data) elif mimetype == "application/vnd.plotly.v1+json": - return _deserialize_matplotlib_figure + return _deserialize_matplotlib_figure(data) elif mimetype == "application/vnd.simvue.numpy.v1": - return _deserialize_numpy_array + return _deserialize_numpy_array(data) elif mimetype == "application/vnd.simvue.df.v1": - return _deserialize_dataframe + return _deserialize_dataframe(data) elif mimetype == "application/vnd.simvue.torch.v1": - return _deserialize_torch_tensor + return _deserialize_torch_tensor(data) + elif mimetype == "application/json": + return _deserialize_json(data) elif mimetype == "application/octet-stream" and allow_pickle: - return _deserialize_pickle + return _deserialize_pickle(data) return None @check_extra("plot") -def _deserialize_plotly_figure(data): +def _deserialize_plotly_figure(data: "Buffer") -> typing.Optional["Figure"]: try: import plotly except ImportError: @@ -179,7 +207,7 @@ def _deserialize_plotly_figure(data): @check_extra("plot") -def _deserialize_matplotlib_figure(data): +def _deserialize_matplotlib_figure(data: "Buffer") -> typing.Optional["Figure"]: try: import plotly except ImportError: @@ -188,22 +216,21 @@ def _deserialize_matplotlib_figure(data): return data -def _deserialize_numpy_array(data): +def _deserialize_numpy_array(data: "Buffer") -> typing.Optional[typing.Any]: mfile = BytesIO(data) mfile.seek(0) data = numpy.load(mfile, allow_pickle=False) return data -def _deserialize_dataframe(data): +def _deserialize_dataframe(data: "Buffer") -> typing.Optional["DataFrame"]: mfile = BytesIO(data) mfile.seek(0) - data = pandas.read_csv(mfile, index_col=0) - return data + return pandas.read_csv(mfile, index_col=0) @check_extra("torch") -def _deserialize_torch_tensor(data): +def _deserialize_torch_tensor(data: "Buffer") -> typing.Optional["Tensor"]: try: import torch except ImportError: @@ -212,10 +239,14 @@ def _deserialize_torch_tensor(data): mfile = BytesIO(data) mfile.seek(0) - data = torch.load(mfile) - return data + return torch.load(mfile) -def _deserialize_pickle(data): +def _deserialize_pickle(data) -> typing.Optional[typing.Any]: data = pickle.loads(data) return data + + +def _deserialize_json(data) -> typing.Optional[typing.Any]: + data = json.loads(data) + return data diff --git a/tests/functional/common.py b/tests/functional/common.py index 5420189d..4fb50ae0 100644 --- a/tests/functional/common.py +++ b/tests/functional/common.py @@ -1,4 +1,5 @@ import configparser +import pathlib import os import uuid @@ -20,9 +21,9 @@ def update_config(): config.write(configfile) FOLDER = '/test-%s' % str(uuid.uuid4()) -FILENAME1 = str(uuid.uuid4()) -FILENAME2 = str(uuid.uuid4()) -FILENAME3 = str(uuid.uuid4()) +FILENAME1 = pathlib.Path(str(uuid.uuid4())) +FILENAME2 = pathlib.Path(str(uuid.uuid4())) +FILENAME3 = pathlib.Path(str(uuid.uuid4())) RUNNAME1 = 'test-%s' % str(uuid.uuid4()) RUNNAME2 = 'test-%s' % str(uuid.uuid4()) RUNNAME3 = 'test-%s' % str(uuid.uuid4()) diff --git a/tests/functional/test_artifacts_code.py b/tests/functional/test_artifacts_code.py index 04f6a3e0..6ea0f220 100644 --- a/tests/functional/test_artifacts_code.py +++ b/tests/functional/test_artifacts_code.py @@ -1,12 +1,9 @@ -import configparser import filecmp import os import shutil -import time import unittest import uuid from simvue import Run, Client -from simvue.sender import sender import common @@ -22,7 +19,7 @@ def test_artifact_code(self): content = str(uuid.uuid4()) with open(common.FILENAME1, 'w') as fh: fh.write(content) - run.save(common.FILENAME1, 'code') + run.save_file(common.FILENAME1, 'code') run.close() diff --git a/tests/functional/test_artifacts_code_created.py b/tests/functional/test_artifacts_code_created.py index 0a03e3cb..667e2820 100644 --- a/tests/functional/test_artifacts_code_created.py +++ b/tests/functional/test_artifacts_code_created.py @@ -23,7 +23,7 @@ def test_artifact_code_created(self): content = str(uuid.uuid4()) with open(common.FILENAME1, 'w') as fh: fh.write(content) - run.save(common.FILENAME1, 'code') + run.save_file(common.FILENAME1, 'code') shutil.rmtree('./test', ignore_errors=True) os.mkdir('./test') diff --git a/tests/functional/test_artifacts_input.py b/tests/functional/test_artifacts_input.py index 841c0321..bde1a9e9 100644 --- a/tests/functional/test_artifacts_input.py +++ b/tests/functional/test_artifacts_input.py @@ -22,7 +22,7 @@ def test_artifact_input(self): content = str(uuid.uuid4()) with open(common.FILENAME2, 'w') as fh: fh.write(content) - run.save(common.FILENAME2, 'input') + run.save_file(common.FILENAME2, 'input') run.close() diff --git a/tests/functional/test_artifacts_input_created.py b/tests/functional/test_artifacts_input_created.py index e990ca0c..09dc0510 100644 --- a/tests/functional/test_artifacts_input_created.py +++ b/tests/functional/test_artifacts_input_created.py @@ -23,7 +23,7 @@ def test_artifact_input_created(self): content = str(uuid.uuid4()) with open(common.FILENAME2, 'w') as fh: fh.write(content) - run.save(common.FILENAME2, 'input') + run.save_file(common.FILENAME2, 'input') shutil.rmtree('./test', ignore_errors=True) os.mkdir('./test') diff --git a/tests/functional/test_artifacts_output.py b/tests/functional/test_artifacts_output.py index 7899d474..f41a8c2c 100644 --- a/tests/functional/test_artifacts_output.py +++ b/tests/functional/test_artifacts_output.py @@ -22,7 +22,7 @@ def test_artifact_output(self): content = str(uuid.uuid4()) with open(common.FILENAME3, 'w') as fh: fh.write(content) - run.save(common.FILENAME3, 'output') + run.save_file(common.FILENAME3, 'output') run.close() diff --git a/tests/functional/test_artifacts_output_created.py b/tests/functional/test_artifacts_output_created.py index 38517698..fc5349a8 100644 --- a/tests/functional/test_artifacts_output_created.py +++ b/tests/functional/test_artifacts_output_created.py @@ -24,7 +24,7 @@ def test_artifact_output_created(self): fh.write(content) with self.assertRaises(Exception) as context: - run.save(common.FILENAME3, 'output') + run.save_file(common.FILENAME3, 'output') self.assertTrue('Cannot upload output files for runs in the created state' in str(context.exception)) diff --git a/tests/functional/test_offline_artifacts_code.py b/tests/functional/test_offline_artifacts_code.py index 06be6daf..6cbc247b 100644 --- a/tests/functional/test_offline_artifacts_code.py +++ b/tests/functional/test_offline_artifacts_code.py @@ -29,7 +29,7 @@ def test_artifact_code_offline(self): content = str(uuid.uuid4()) with open(common.FILENAME1, 'w') as fh: fh.write(content) - run.save(common.FILENAME1, 'code') + run.save_file(common.FILENAME1, 'code') run.close() diff --git a/tests/functional/test_offline_artifacts_code_created.py b/tests/functional/test_offline_artifacts_code_created.py index cc2b5ef1..51a7e700 100644 --- a/tests/functional/test_offline_artifacts_code_created.py +++ b/tests/functional/test_offline_artifacts_code_created.py @@ -30,7 +30,7 @@ def test_artifact_code_offline(self): content = str(uuid.uuid4()) with open(common.FILENAME1, "w") as fh: fh.write(content) - run.save(common.FILENAME1, "code") + run.save_file(common.FILENAME1, "code") sender() diff --git a/tests/functional/test_offline_artifacts_input.py b/tests/functional/test_offline_artifacts_input.py index faa14450..0c19dc8c 100644 --- a/tests/functional/test_offline_artifacts_input.py +++ b/tests/functional/test_offline_artifacts_input.py @@ -30,7 +30,7 @@ def test_artifact_input_offline(self): content = str(uuid.uuid4()) with open(common.FILENAME2, "w") as fh: fh.write(content) - run.save(common.FILENAME2, "input") + run.save_file(common.FILENAME2, "input") run.close() diff --git a/tests/functional/test_offline_artifacts_input_created.py b/tests/functional/test_offline_artifacts_input_created.py index d5a12f55..1f29ad2f 100644 --- a/tests/functional/test_offline_artifacts_input_created.py +++ b/tests/functional/test_offline_artifacts_input_created.py @@ -30,7 +30,7 @@ def test_artifact_input_offline(self): content = str(uuid.uuid4()) with open(common.FILENAME2, "w") as fh: fh.write(content) - run.save(common.FILENAME2, "input") + run.save_file(common.FILENAME2, "input") sender() diff --git a/tests/functional/test_offline_artifacts_output.py b/tests/functional/test_offline_artifacts_output.py index c5808494..4cfe747c 100644 --- a/tests/functional/test_offline_artifacts_output.py +++ b/tests/functional/test_offline_artifacts_output.py @@ -30,7 +30,7 @@ def test_artifact_output_offline(self): content = str(uuid.uuid4()) with open(common.FILENAME3, "w") as fh: fh.write(content) - run.save(common.FILENAME3, "output") + run.save_file(common.FILENAME3, "output") run.close() diff --git a/tests/refactor/conftest.py b/tests/refactor/conftest.py index af2305aa..bc2b31ae 100644 --- a/tests/refactor/conftest.py +++ b/tests/refactor/conftest.py @@ -79,7 +79,7 @@ def setup_test_run(run: sv_run.Run, create_objects: bool, request: pytest.Fixtur "test_identifier": fix_use_id }, "folder": f"/simvue_unit_testing/{fix_use_id}", - "tags": ["simvue_client_unit_tests", request.node.name] + "tags": ["simvue_client_unit_tests", request.node.name.replace("[", "_").replace("]", "_")] } if os.environ.get("CI"): @@ -120,19 +120,19 @@ def setup_test_run(run: sv_run.Run, create_objects: bool, request: pytest.Fixtur with tempfile.TemporaryDirectory() as tempd: with open((test_file := os.path.join(tempd, "test_file.txt")), "w") as out_f: out_f.write("This is a test file") - run.save(test_file, category="input", name="test_file") + run.save_file(test_file, category="input", name="test_file") TEST_DATA["file_1"] = "test_file" with open((test_json := os.path.join(tempd, f"test_attrs_{fix_use_id}.json")), "w") as out_f: json.dump(TEST_DATA, out_f, indent=2) - run.save(test_json, category="output", name="test_attributes") + run.save_file(test_json, category="output", name="test_attributes") TEST_DATA["file_2"] = "test_attributes" with open((test_script := os.path.join(tempd, "test_script.py")), "w") as out_f: out_f.write( "print('Hello World!')" ) - run.save(test_script, category="code", name="test_empty_file") + run.save_file(test_script, category="code", name="test_empty_file") TEST_DATA["file_3"] = "test_empty_file" time.sleep(1.) diff --git a/tests/refactor/test_run_class.py b/tests/refactor/test_run_class.py index 76ae29dc..d1da1c8a 100644 --- a/tests/refactor/test_run_class.py +++ b/tests/refactor/test_run_class.py @@ -3,7 +3,9 @@ import typing import contextlib import inspect +import tempfile import uuid +import pathlib import concurrent.futures import random import inspect @@ -308,7 +310,7 @@ def test_suppressed_errors( assert setup_logging.counts[0] == len(decorated_funcs) + 1 else: assert setup_logging.counts[0] == len(decorated_funcs) - + @pytest.mark.run def test_bad_run_arguments() -> None: @@ -324,7 +326,79 @@ def test_set_folder_details(request: pytest.FixtureRequest) -> None: tags: list[str] = ["simvue_client_unit_tests", request.node.name] run.init(folder=folder_name) run.set_folder_details(path=folder_name, tags=tags, description=description) - + client = sv_cl.Client() assert (folder := client.get_folders([f"path == {folder_name}"])[0])["tags"] == tags assert folder["description"] == description + + +@pytest.mark.run +@pytest.mark.parametrize("valid_mimetype", (True, False), ids=("valid_mime", "invalid_mime")) +@pytest.mark.parametrize("preserve_path", (True, False), ids=("preserve_path", "modified_path")) +@pytest.mark.parametrize("name", ("test_file", None), ids=("named", "nameless")) +@pytest.mark.parametrize("allow_pickle", (True, False), ids=("pickled", "unpickled")) +@pytest.mark.parametrize("empty_file", (True, False), ids=("empty", "content")) +def test_save_file( + create_plain_run: typing.Tuple[sv_run.Run, dict], + valid_mimetype: bool, + preserve_path: bool, + name: typing.Optional[str], + allow_pickle: bool, + empty_file: bool, + capfd +) -> None: + simvue_run, _ = create_plain_run + file_type: str = 'text/plain' if valid_mimetype else 'text/text' + with tempfile.TemporaryDirectory() as tempd: + with open( + ( + out_name := pathlib.Path(tempd).joinpath("test_file.txt") + ), + "w", + ) as out_f: + out_f.write("test data entry" if not empty_file else "") + + if valid_mimetype: + simvue_run.save_file( + out_name, + category="input", + filetype=file_type, + preserve_path=preserve_path, + name=name, + ) + else: + with pytest.raises(RuntimeError): + simvue_run.save_file( + out_name, + category="input", + filetype=file_type, + preserve_path=preserve_path + ) + return + + variable = capfd.readouterr() + with capfd.disabled(): + if empty_file: + assert variable.out == "WARNING: saving zero-sized files not currently supported\n" + + +@pytest.mark.run +@pytest.mark.parametrize("object_type", ("DataFrame", "ndarray")) +def test_save_object( + create_plain_run: typing.Tuple[sv_run.Run, dict], object_type: str +) -> None: + simvue_run, _ = create_plain_run + + if object_type == "DataFrame": + try: + from pandas import DataFrame + except ImportError: + pytest.skip("Pandas is not installed") + save_obj = DataFrame({"x": [1, 2, 3, 4], "y": [2, 4, 6, 8]}) + elif object_type == "ndarray": + try: + from numpy import array + except ImportError: + pytest.skip("Numpy is not installed") + save_obj = array([1, 2, 3, 4]) + simvue_run.save_object(save_obj, "input", f"test_object_{object_type}") diff --git a/tests/unit/test_matplotlib_figure_mime_type.py b/tests/unit/test_matplotlib_figure_mime_type.py index 5b9c6ea5..c0deaa2a 100644 --- a/tests/unit/test_matplotlib_figure_mime_type.py +++ b/tests/unit/test_matplotlib_figure_mime_type.py @@ -1,5 +1,6 @@ +from simvue.serialization import serialize_object +import matplotlib.pyplot as plt import pytest -from simvue.serialization import Serializer try: import matplotlib.pyplot as plt @@ -14,6 +15,6 @@ def test_matplotlib_figure_mime_type(): plt.plot([1, 2, 3, 4]) figure = plt.gcf() - _, mime_type = Serializer().serialize(figure) + _, mime_type = serialize_object(figure, False) assert (mime_type == 'application/vnd.plotly.v1+json') diff --git a/tests/unit/test_numpy_array_mime_type.py b/tests/unit/test_numpy_array_mime_type.py index ca16806a..7523d30b 100644 --- a/tests/unit/test_numpy_array_mime_type.py +++ b/tests/unit/test_numpy_array_mime_type.py @@ -1,4 +1,4 @@ -from simvue.serialization import Serializer, Deserializer +from simvue.serialization import serialize_object import numpy as np def test_numpy_array_mime_type(): @@ -6,6 +6,6 @@ def test_numpy_array_mime_type(): Check that the mimetype for numpy arrays is correct """ array = np.array([1, 2, 3, 4, 5]) - _, mime_type = Serializer().serialize(array) + _, mime_type = serialize_object(array, False) assert (mime_type == 'application/vnd.simvue.numpy.v1') diff --git a/tests/unit/test_numpy_array_serialization.py b/tests/unit/test_numpy_array_serialization.py index d7c952a1..0f713cdd 100644 --- a/tests/unit/test_numpy_array_serialization.py +++ b/tests/unit/test_numpy_array_serialization.py @@ -1,4 +1,4 @@ -from simvue.serialization import Serializer, Deserializer +from simvue.serialization import serialize_object, deserialize_data import numpy as np def test_numpy_array_serialization(): @@ -7,7 +7,7 @@ def test_numpy_array_serialization(): """ array = np.array([1, 2, 3, 4, 5]) - serialized, mime_type = Serializer().serialize(array) - array_out = Deserializer().deserialize(serialized, mime_type) + serialized, mime_type = serialize_object(array, False) + array_out = deserialize_data(serialized, mime_type, False) assert (array == array_out).all() diff --git a/tests/unit/test_pandas_dataframe_mimetype.py b/tests/unit/test_pandas_dataframe_mimetype.py index 6d35d3ba..57d5f775 100644 --- a/tests/unit/test_pandas_dataframe_mimetype.py +++ b/tests/unit/test_pandas_dataframe_mimetype.py @@ -1,5 +1,6 @@ +import pandas as pd +from simvue.serialization import serialize_object import pytest -from simvue.serialization import Serializer try: import pandas as pd @@ -14,6 +15,6 @@ def test_pandas_dataframe_mimetype(): data = {'col1': [1, 2], 'col2': [3, 4]} df = pd.DataFrame(data=data) - _, mime_type = Serializer().serialize(df) + _, mime_type = serialize_object(df, False) assert (mime_type == 'application/vnd.simvue.df.v1') diff --git a/tests/unit/test_pandas_dataframe_serialization.py b/tests/unit/test_pandas_dataframe_serialization.py index b7ba434d..fde9262d 100644 --- a/tests/unit/test_pandas_dataframe_serialization.py +++ b/tests/unit/test_pandas_dataframe_serialization.py @@ -1,5 +1,6 @@ +from simvue.serialization import serialize_object, deserialize_data +import pandas as pd import pytest -from simvue.serialization import Serializer, Deserializer try: import pandas as pd @@ -14,7 +15,7 @@ def test_pandas_dataframe_serialization(): data = {'col1': [1, 2], 'col2': [3, 4]} df = pd.DataFrame(data=data) - serialized, mime_type = Serializer().serialize(df) - df_out = Deserializer().deserialize(serialized, mime_type) + serialized, mime_type = serialize_object(df, False) + df_out = deserialize_data(serialized, mime_type, False) assert (df.equals(df_out)) diff --git a/tests/unit/test_pickle_serialization.py b/tests/unit/test_pickle_serialization.py index 1960ab41..60833665 100644 --- a/tests/unit/test_pickle_serialization.py +++ b/tests/unit/test_pickle_serialization.py @@ -1,4 +1,4 @@ -from simvue.serialization import Serializer, Deserializer +from simvue.serialization import deserialize_data, serialize_object def test_pickle_serialization(): """ @@ -6,7 +6,7 @@ def test_pickle_serialization(): """ data = {'a': 1.0, 'b': 'test'} - serialized, mime_type = Serializer().serialize(data, allow_pickle=True) - data_out = Deserializer().deserialize(serialized, mime_type, allow_pickle=True) + serialized, mime_type = serialize_object(data, allow_pickle=True) + data_out = deserialize_data(serialized, mime_type, allow_pickle=True) assert (data == data_out) diff --git a/tests/unit/test_plotly_figure_mime_type.py b/tests/unit/test_plotly_figure_mime_type.py index 992524f7..8cf8a479 100644 --- a/tests/unit/test_plotly_figure_mime_type.py +++ b/tests/unit/test_plotly_figure_mime_type.py @@ -1,7 +1,8 @@ +from simvue.serialization import serialize_object +import matplotlib.pyplot as plt +import plotly import pytest -from simvue.serialization import Serializer, Deserializer - try: import matplotlib.pyplot as plt except ImportError: @@ -23,6 +24,6 @@ def test_plotly_figure_mime_type(): figure = plt.gcf() plotly_figure = plotly.tools.mpl_to_plotly(figure) - _, mime_type = Serializer().serialize(plotly_figure) + _, mime_type = serialize_object(plotly_figure, False) assert (mime_type == 'application/vnd.plotly.v1+json') diff --git a/tests/unit/test_pytorch_tensor_mime_type.py b/tests/unit/test_pytorch_tensor_mime_type.py index 823373f8..c240cd5b 100644 --- a/tests/unit/test_pytorch_tensor_mime_type.py +++ b/tests/unit/test_pytorch_tensor_mime_type.py @@ -1,5 +1,7 @@ +from simvue.serialization import serialize_object +import torch import pytest -from simvue.serialization import Serializer, Deserializer + try: import torch @@ -13,6 +15,6 @@ def test_pytorch_tensor_mime_type(): """ torch.manual_seed(1724) array = torch.rand(2, 3) - _, mime_type = Serializer().serialize(array) + _, mime_type = serialize_object(array, False) assert (mime_type == 'application/vnd.simvue.torch.v1') diff --git a/tests/unit/test_pytorch_tensor_serialization.py b/tests/unit/test_pytorch_tensor_serialization.py index a51abf03..64b1b619 100644 --- a/tests/unit/test_pytorch_tensor_serialization.py +++ b/tests/unit/test_pytorch_tensor_serialization.py @@ -1,5 +1,6 @@ +import torch +from simvue.serialization import serialize_object, deserialize_data import pytest -from simvue.serialization import Serializer, Deserializer try: import torch @@ -14,7 +15,7 @@ def test_pytorch_tensor_serialization(): torch.manual_seed(1724) array = torch.rand(2, 3) - serialized, mime_type = Serializer().serialize(array) - array_out = Deserializer().deserialize(serialized, mime_type) + serialized, mime_type = serialize_object(array, False) + array_out = deserialize_data(serialized, mime_type, False) assert (array == array_out).all() diff --git a/tests/unit/test_run_init_folder.py b/tests/unit/test_run_init_folder.py index 830c2e85..286f6251 100644 --- a/tests/unit/test_run_init_folder.py +++ b/tests/unit/test_run_init_folder.py @@ -1,4 +1,3 @@ -import os from simvue import Run import pytest diff --git a/tests/unit/test_run_init_metadata.py b/tests/unit/test_run_init_metadata.py index 5ed4582c..1973f35a 100644 --- a/tests/unit/test_run_init_metadata.py +++ b/tests/unit/test_run_init_metadata.py @@ -1,4 +1,3 @@ -import os from simvue import Run import pytest diff --git a/tests/unit/test_run_init_tags.py b/tests/unit/test_run_init_tags.py index 2f026646..6e247446 100644 --- a/tests/unit/test_run_init_tags.py +++ b/tests/unit/test_run_init_tags.py @@ -1,4 +1,3 @@ -import os from simvue import Run import pytest