Skip to content

Refactor serialization and fix tuple without comma bug #341

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,13 @@ if __name__ == "__main__":
description='This is part 1 of a test') # Description

# Upload the code
run.save('training.py', 'code')
run.save_file('training.py', 'code')

# Upload an input file
run.save('params.in', 'input')
run.save_file('params.in', 'input')

# Add an alert (the alert definition will be created if necessary)
run.add_alert(name='loss-too-high', # Name
run.create_alert(name='loss-too-high', # Name
source='metrics', # Source
rule='is above', # Rule
metric='loss', # Metric
Expand All @@ -96,7 +96,7 @@ if __name__ == "__main__":
...

# Upload an output file
run.save('output.cdf', 'output')
run.save_file('output.cdf', 'output')

# If we weren't using a context manager we'd need to end the run
# run.close()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,5 +171,5 @@ def my_minimise_length(vector, grad, parameterisation, ad_args=None):

# Here we're minimising the length, within the bounds of our PrincetonD parameterisation,
# so we'd expect that x1 goes to its upper bound, and x2 goes to its lower bound.
run.save("bluemira_simvue_geometry_optimisation.py", "code")
run.save_file("bluemira_simvue_geometry_optimisation.py", "code")
run.close()
2 changes: 1 addition & 1 deletion examples/PyTorch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def main():
scheduler.step()

if args.save_model:
run.save(model.state_dict(), "output", name="mnist_cnn.pt")
run.save_file(model.state_dict(), "output", name="mnist_cnn.pt")

run.close()

Expand Down
4 changes: 2 additions & 2 deletions examples/SU2/SU2.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
filetype = None
if input_file.endswith(".cfg"):
filetype = "text/plain"
run.save(input_file, "input", filetype)
run.save_file(input_file, "input", filetype)

running = True
latest = []
Expand Down Expand Up @@ -106,6 +106,6 @@

# Save output files
for output_file in OUTPUT_FILES:
run.save(output_file, "output")
run.save_file(output_file, "output")

run.close()
2 changes: 1 addition & 1 deletion examples/Tensorflow/dynamic_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"computation over sequences with variable length. This example is using a toy dataset to "
"classify linear sequences. The generated sequences have variable length.",
)
run.save("dynamic_rnn.py", "code")
run.save_file("dynamic_rnn.py", "code")

# ====================
# TOY DATA GENERATOR
Expand Down
4 changes: 2 additions & 2 deletions simvue/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
to_dataframe,
parse_run_set_metrics,
)
from .serialization import Deserializer
from .serialization import deserialize_data
from .types import DeserializedContent
from .utilities import check_extra, get_auth

Expand Down Expand Up @@ -608,7 +608,7 @@ def get_artifact(
response = requests.get(url, timeout=DOWNLOAD_TIMEOUT)
response.raise_for_status()

content: typing.Optional[DeserializedContent] = Deserializer().deserialize(
content: typing.Optional[DeserializedContent] = deserialize_data(
response.content, mimetype, allow_pickle
)

Expand Down
8 changes: 4 additions & 4 deletions simvue/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ def callback_function(status_code: int, std_out: str, std_err: str) -> None:
)

if script:
self._runner.save(filename=script, category="code")
self._runner.save_file(file_path=script, category="code")

if input_file:
self._runner.save(filename=input_file, category="input")
self._runner.save_file(file_path=input_file, category="input")

_command: typing.List[str] = []

Expand Down Expand Up @@ -284,11 +284,11 @@ def _save_output(self) -> None:
for proc_id in self._exit_codes.keys():
# Only save the file if the contents are not empty
if self._std_err[proc_id]:
self._runner.save(
self._runner.save_file(
f"{self._runner.name}_{proc_id}.err", category="output"
)
if self._std_out[proc_id]:
self._runner.save(
self._runner.save_file(
f"{self._runner.name}_{proc_id}.out", category="output"
)

Expand Down
202 changes: 112 additions & 90 deletions simvue/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from .factory.proxy import Simvue
from .metrics import get_gpu_metrics, get_process_cpu, get_process_memory
from .models import RunInput
from .serialization import Serializer
from .serialization import serialize_object
from .system import get_system
from .metadata import git_info
from .utilities import (
Expand Down Expand Up @@ -159,7 +159,7 @@ def __exit__(
else:
if self._active:
self.log_event(f"{exc_type.__name__}: {value}")
if exc_type.__name__ in ("KeyboardInterrupt") and self._active:
if exc_type.__name__ in ("KeyboardInterrupt",) and self._active:
self.set_status("terminated")
else:
if traceback and self._active:
Expand Down Expand Up @@ -982,17 +982,87 @@ def log_metrics(
@check_run_initialised
@skip_if_failed("_aborted", "_suppress_errors", False)
@pydantic.validate_call
def save(
def save_object(
self,
filename: str,
obj: typing.Any,
category: typing.Literal["input", "output", "code"],
filetype: typing.Optional[str] = None,
preserve_path: bool = False,
name: typing.Optional[str] = None,
allow_pickle: bool = False,
) -> bool:
"""Save an object to the Simvue server

Parameters
----------
obj : typing.Any
object to serialize and send to the server
category : Literal['input', 'output', 'code']
category of file with respect to this run
name : str, optional
name to associate with this object, by default None
allow_pickle : bool, optional
whether to allow pickling if all other serialization types fail, by default False

Returns
-------
bool
whether object upload was successful
"""
Upload file or object
serialized = serialize_object(obj, allow_pickle)

if not serialized or not (pickled := serialized[0]):
self._error(f"Failed to serialize '{obj}'")
return False

data_type = serialized[1]

if not data_type and not allow_pickle:
self._error("Unable to save Python object, set allow_pickle to True")
return False

data: dict[str, typing.Any] = {
"pickled": pickled,
"type": data_type,
"checksum": calculate_sha256(pickled, False),
"originalPath": "",
"size": sys.getsizeof(pickled),
"name": name,
"run": self._name,
"category": category,
"storage": self._storage_id,
}

# Register file
return self._simvue is not None and self._simvue.save_file(data) is not None

@skip_if_failed("_aborted", "_suppress_errors", False)
@pydantic.validate_call
def save_file(
self,
file_path: pydantic.FilePath,
category: typing.Literal["input", "output", "code"],
filetype: typing.Optional[str] = None,
preserve_path: bool = False,
name: typing.Optional[str] = None,
) -> bool:
"""Upload file to the server

Parameters
----------
file_path : pydantic.FilePath
path to the file to upload
category : Literal['input', 'output', 'code']
category of file with respect to this run
filetype : str, optional
the MIME file type else this is deduced, by default None
preserve_path : bool, optional
whether to preserve the path during storage, by default False
name : str, optional
name to associate with this file, by default None

Returns
-------
bool
whether the upload was successful
"""
if self._mode == "disabled":
return True
Expand All @@ -1005,96 +1075,48 @@ def save(
self._error("Cannot upload output files for runs in the created state")
return False

is_file: bool = False
mimetypes.init()
mimetypes_valid = ["application/vnd.plotly.v1+json"]
mimetypes_valid += list(mimetypes.types_map.values())

if isinstance(filename, str):
if not os.path.isfile(filename):
self._error(f"File {filename} does not exist")
return False
else:
is_file = True

if filetype:
mimetypes_valid = ["application/vnd.plotly.v1+json"]
mimetypes.init()
for _, value in mimetypes.types_map.items():
mimetypes_valid.append(value)

if filetype not in mimetypes_valid:
self._error("Invalid MIME type specified")
return False

data: dict[str, typing.Any] = {}

if preserve_path:
data["name"] = filename
if data["name"].startswith("./"):
data["name"] = data["name"][2:]
elif is_file:
data["name"] = os.path.basename(filename)

if name:
data["name"] = name

data["run"] = self._name
data["category"] = category
if filetype and filetype not in mimetypes_valid:
self._error(f"Invalid MIME type '{filetype}' specified")
return False

if is_file:
data["size"] = os.path.getsize(filename)
data["originalPath"] = os.path.abspath(
os.path.expanduser(os.path.expandvars(filename))
)
data["checksum"] = calculate_sha256(filename, is_file)
stored_file_name: str = f"{file_path}"

if data["size"] == 0:
click.secho(
"WARNING: saving zero-sized files not currently supported",
bold=True,
fg="yellow",
)
return True
if preserve_path and stored_file_name.startswith("./"):
stored_file_name = stored_file_name[2:]
elif not preserve_path:
stored_file_name = os.path.basename(file_path)

# Determine mimetype
mimetype = None
if not filetype and is_file:
mimetypes.init()
mimetype = mimetypes.guess_type(filename)[0]
if not mimetype:
mimetype = "application/octet-stream"
elif is_file:
mimetype = filetype

if mimetype:
data["type"] = mimetype

if not is_file:
serialized = Serializer().serialize(filename, allow_pickle)

if not serialized or not (pickled := serialized[0]):
self._error(f"Failed to serialize '{filename}'")
return False

data_type = serialized[1]

data["pickled"] = pickled
data["type"] = data_type

if not data["type"] and not allow_pickle:
self._error("Unable to save Python object, set allow_pickle to True")
return False
if not (mimetype := filetype):
mimetype = mimetypes.guess_type(file_path)[0] or "application/octet-stream"

data["checksum"] = calculate_sha256(pickled, False)
data["originalPath"] = ""
data["size"] = sys.getsizeof(pickled)
data: dict[str, typing.Any] = {
"name": name or stored_file_name,
"run": self._name,
"type": mimetype,
"storage": self._storage_id,
"category": category,
"size": (file_size := os.path.getsize(file_path)),
"originalPath": os.path.abspath(
os.path.expanduser(os.path.expandvars(file_path))
),
"checksum": calculate_sha256(f"{file_path}", True),
}

if self._storage_id:
data["storage"] = self._storage_id
if not file_size:
click.secho(
"WARNING: saving zero-sized files not currently supported",
bold=True,
fg="yellow",
)
return True

# Register file
if not self._simvue.save_file(data):
return False

return True
return self._simvue.save_file(data) is not None

@check_run_initialised
@skip_if_failed("_aborted", "_suppress_errors", False)
Expand Down Expand Up @@ -1129,7 +1151,7 @@ def save_directory(
for dirpath, _, filenames in directory.walk():
for filename in filenames:
if (full_path := dirpath.joinpath(filename)).is_file():
self.save(f"{full_path}", category, filetype, preserve_path)
self.save_file(full_path, category, filetype, preserve_path)

return True

Expand All @@ -1153,7 +1175,7 @@ def save_all(

for item in items:
if item.is_file():
save_file = self.save(f"{item}", category, filetype, preserve_path)
save_file = self.save_file(item, category, filetype, preserve_path)
elif item.is_dir():
save_file = self.save_directory(item, category, filetype, preserve_path)
else:
Expand Down
Loading