diff --git a/README.md b/README.md index 7b9877b2..411f3918 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ if __name__ == "__main__": ... # Send metrics inside main application loop - run.log({'loss': 0.5, 'density': 34.4}) + run.log_metrics({'loss': 0.5, 'density': 34.4}) ... diff --git a/poetry.lock b/poetry.lock index 952b796d..6b98548f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1648,28 +1648,28 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "ruff" -version = "0.4.6" +version = "0.4.7" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.4.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ef995583a038cd4a7edf1422c9e19118e2511b8ba0b015861b4abd26ec5367c5"}, - {file = "ruff-0.4.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:602ebd7ad909eab6e7da65d3c091547781bb06f5f826974a53dbe563d357e53c"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f9ced5cbb7510fd7525448eeb204e0a22cabb6e99a3cb160272262817d49786"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04a80acfc862e0e1630c8b738e70dcca03f350bad9e106968a8108379e12b31f"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be47700ecb004dfa3fd4dcdddf7322d4e632de3c06cd05329d69c45c0280e618"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1ff930d6e05f444090a0139e4e13e1e2e1f02bd51bb4547734823c760c621e79"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f13410aabd3b5776f9c5699f42b37a3a348d65498c4310589bc6e5c548dc8a2f"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0cf5cc02d3ae52dfb0c8a946eb7a1d6ffe4d91846ffc8ce388baa8f627e3bd50"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea3424793c29906407e3cf417f28fc33f689dacbbadfb52b7e9a809dd535dcef"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1fa8561489fadf483ffbb091ea94b9c39a00ed63efacd426aae2f197a45e67fc"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4d5b914818d8047270308fe3e85d9d7f4a31ec86c6475c9f418fbd1624d198e0"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:4f02284335c766678778475e7698b7ab83abaf2f9ff0554a07b6f28df3b5c259"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3a6a0a4f4b5f54fff7c860010ab3dd81425445e37d35701a965c0248819dde7a"}, - {file = "ruff-0.4.6-py3-none-win32.whl", hash = "sha256:9018bf59b3aa8ad4fba2b1dc0299a6e4e60a4c3bc62bbeaea222679865453062"}, - {file = "ruff-0.4.6-py3-none-win_amd64.whl", hash = "sha256:a769ae07ac74ff1a019d6bd529426427c3e30d75bdf1e08bb3d46ac8f417326a"}, - {file = "ruff-0.4.6-py3-none-win_arm64.whl", hash = "sha256:735a16407a1a8f58e4c5b913ad6102722e80b562dd17acb88887685ff6f20cf6"}, - {file = "ruff-0.4.6.tar.gz", hash = "sha256:a797a87da50603f71e6d0765282098245aca6e3b94b7c17473115167d8dfb0b7"}, + {file = "ruff-0.4.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e089371c67892a73b6bb1525608e89a2aca1b77b5440acf7a71dda5dac958f9e"}, + {file = "ruff-0.4.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:10f973d521d910e5f9c72ab27e409e839089f955be8a4c8826601a6323a89753"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59c3d110970001dfa494bcd95478e62286c751126dfb15c3c46e7915fc49694f"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa9773c6c00f4958f73b317bc0fd125295110c3776089f6ef318f4b775f0abe4"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07fc80bbb61e42b3b23b10fda6a2a0f5a067f810180a3760c5ef1b456c21b9db"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:fa4dafe3fe66d90e2e2b63fa1591dd6e3f090ca2128daa0be33db894e6c18648"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7c0083febdec17571455903b184a10026603a1de078428ba155e7ce9358c5f6"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad1b20e66a44057c326168437d680a2166c177c939346b19c0d6b08a62a37589"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbf5d818553add7511c38b05532d94a407f499d1a76ebb0cad0374e32bc67202"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:50e9651578b629baec3d1513b2534de0ac7ed7753e1382272b8d609997e27e83"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8874a9df7766cb956b218a0a239e0a5d23d9e843e4da1e113ae1d27ee420877a"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b9de9a6e49f7d529decd09381c0860c3f82fa0b0ea00ea78409b785d2308a567"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:13a1768b0691619822ae6d446132dbdfd568b700ecd3652b20d4e8bc1e498f78"}, + {file = "ruff-0.4.7-py3-none-win32.whl", hash = "sha256:769e5a51df61e07e887b81e6f039e7ed3573316ab7dd9f635c5afaa310e4030e"}, + {file = "ruff-0.4.7-py3-none-win_amd64.whl", hash = "sha256:9e3ab684ad403a9ed1226894c32c3ab9c2e0718440f6f50c7c5829932bc9e054"}, + {file = "ruff-0.4.7-py3-none-win_arm64.whl", hash = "sha256:10f2204b9a613988e3484194c2c9e96a22079206b22b787605c255f130db5ed7"}, + {file = "ruff-0.4.7.tar.gz", hash = "sha256:2331d2b051dc77a289a653fcc6a42cce357087c5975738157cd966590b18b5e1"}, ] [[package]] @@ -1917,13 +1917,13 @@ tutorials = ["matplotlib", "pandas", "tabulate", "torch"] [[package]] name = "typing-extensions" -version = "4.12.0" +version = "4.12.1" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, - {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, + {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"}, + {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"}, ] [[package]] diff --git a/simvue/executor.py b/simvue/executor.py index 532a5ce4..1bad7de6 100644 --- a/simvue/executor.py +++ b/simvue/executor.py @@ -92,6 +92,7 @@ def __init__(self, simvue_runner: "simvue.Run", keep_logs: bool = True) -> None: self._exit_codes = self._manager.dict() self._std_err = self._manager.dict() self._std_out = self._manager.dict() + self._alert_ids: dict[str, str] = {} self._command_str: typing.Dict[str, str] = {} self._processes: typing.Dict[str, multiprocessing.Process] = {} @@ -221,6 +222,9 @@ def callback_function(status_code: int, std_out: str, std_err: str) -> None: env, ), ) + self._alert_ids[identifier] = self._runner.create_alert( + name=f"{identifier}_exit_status", source="user" + ) logger.debug(f"Executing process: {' '.join(_command)}") self._processes[identifier].start() @@ -239,6 +243,14 @@ def exit_status(self) -> int: return 0 + def get_error_summary(self) -> dict[str, typing.Optional[str]]: + """Returns the summary messages of all errors""" + return { + identifier: self._get_error_status(identifier) + for identifier, value in self._exit_codes.items() + if value + } + def get_command(self, process_id: str) -> str: """Returns the command executed within the given process. @@ -256,7 +268,19 @@ def get_command(self, process_id: str) -> str: raise KeyError(f"Failed to retrieve '{process_id}', no such process") return self._command_str[process_id] - def _log_events(self) -> None: + def _get_error_status(self, process_id: str) -> typing.Optional[str]: + err_msg: typing.Optional[str] = None + + # Return last 10 lines of stdout if stderr empty + if not (err_msg := self._std_err[process_id]) and ( + std_out := self._std_out[process_id] + ): + err_msg = " Tail STDOUT:\n\n" + start_index = -10 if len(lines := std_out.split("\n")) > 10 else 0 + err_msg += "\n".join(lines[start_index:]) + return err_msg + + def _update_alerts(self) -> None: """Send log events for the result of each process""" for proc_id, code in self._exit_codes.items(): if code != 0: @@ -265,11 +289,9 @@ def _log_events(self) -> None: if self._runner._dispatcher: self._runner._dispatcher.purge() - _err = self._std_err[proc_id] - _msg = f"Process {proc_id} returned non-zero exit status {code} with:\n{_err}" + self._runner.log_alert(self._alert_ids[proc_id], "critical") else: - _msg = f"Process {proc_id} completed successfully." - self._runner.log_event(_msg) + self._runner.log_alert(self._alert_ids[proc_id], "ok") # Wait for the dispatcher to send the latest information before # allowing the executor to finish (and as such the run instance to exit) @@ -321,7 +343,7 @@ def wait_for_completion(self) -> None: for process in self._processes.values(): if process.is_alive(): process.join() - self._log_events() + self._update_alerts() self._save_output() if not self.success: diff --git a/simvue/run.py b/simvue/run.py index 095be2f6..78376811 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -178,8 +178,19 @@ def __exit__( self._dispatcher.join() if _non_zero := self.executor.exit_status: - logger.error( - f"Simvue process executor terminated with non-zero exit status {_non_zero}" + _error_msgs: dict[str, typing.Optional[str]] = ( + self.executor.get_error_summary() + ) + _error_msg = "\n".join( + f"{identifier}:\n{msg}" for identifier, msg in _error_msgs.items() + ) + if _error_msg: + _error_msg = f":\n{_error_msg}" + click.secho( + "Simvue process executor terminated with non-zero exit status " + f"{_non_zero}{_error_msg}", + fg="red", + bold=True, ) sys.exit(_non_zero) @@ -1375,8 +1386,19 @@ def close(self) -> bool: self._dispatcher.join() if _non_zero := self.executor.exit_status: - logger.error( - f"Simvue process executor terminated with non-zero exit status {_non_zero}" + _error_msgs: dict[str, typing.Optional[str]] = ( + self.executor.get_error_summary() + ) + _error_msg = "\n".join( + f"{identifier}:\n{msg}" for identifier, msg in _error_msgs.items() + ) + if _error_msg: + _error_msg = f":\n{_error_msg}" + click.secho( + "Simvue process executor terminated with non-zero exit status " + f"{_non_zero}{_error_msg}", + fg="red", + bold=True, ) sys.exit(_non_zero) diff --git a/simvue/serialization.py b/simvue/serialization.py index c51847e6..d8252713 100644 --- a/simvue/serialization.py +++ b/simvue/serialization.py @@ -99,6 +99,10 @@ def _serialize_plotly_figure(data: typing.Any) -> typing.Optional[tuple[str, str return None mimetype = "application/vnd.plotly.v1+json" data = plotly.io.to_json(data, engine="json") + mfile = BytesIO() + mfile.write(data.encode()) + mfile.seek(0) + data = mfile.read() return data, mimetype @@ -110,6 +114,10 @@ def _serialize_matplotlib(data: typing.Any) -> typing.Optional[tuple[str, str]]: return None mimetype = "application/vnd.plotly.v1+json" data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data.gcf()), engine="json") + mfile = BytesIO() + mfile.write(data.encode()) + mfile.seek(0) + data = mfile.read() return data, mimetype @@ -121,6 +129,10 @@ def _serialize_matplotlib_figure(data: typing.Any) -> typing.Optional[tuple[str, return None mimetype = "application/vnd.plotly.v1+json" data = plotly.io.to_json(plotly.tools.mpl_to_plotly(data), engine="json") + mfile = BytesIO() + mfile.write(data.encode()) + mfile.seek(0) + data = mfile.read() return data, mimetype @@ -161,8 +173,11 @@ def _serialize_torch_tensor(data: typing.Any) -> typing.Optional[tuple[str, str] def _serialize_json(data: typing.Any) -> typing.Optional[tuple[str, str]]: mimetype = "application/json" try: - data = json.dumps(data) - except TypeError: + mfile = BytesIO() + mfile.write(json.dumps(data).encode()) + mfile.seek(0) + data = mfile.read() + except (TypeError, json.JSONDecodeError): return None return data, mimetype diff --git a/tests/refactor/test_executor.py b/tests/refactor/test_executor.py index f928525f..5cc6626a 100644 --- a/tests/refactor/test_executor.py +++ b/tests/refactor/test_executor.py @@ -37,14 +37,6 @@ def test_executor_add_process( with pytest.raises(SystemExit): run.close() - time.sleep(1) - client = simvue.Client() - _events = client.get_events( - run._id, - message_contains="successfully" if successful else "non-zero exit", - ) - assert len(_events) == 1 - @pytest.mark.executor def test_add_process_command_assembly(request: pytest.FixtureRequest) -> None: