From fd076c5878c7669f00e1e0b4784729378a545ae2 Mon Sep 17 00:00:00 2001 From: nshkrdotcom <127063941+nshkrdotcom@users.noreply.github.com> Date: Fri, 3 Apr 2026 11:25:20 -1000 Subject: [PATCH 1/5] fix(logging): localize jido runtime logging --- config/test.exs | 2 +- lib/jido/agent/directive/cron.ex | 8 +- lib/jido/agent/directive/cron_cancel.ex | 18 ++-- lib/jido/agent_server.ex | 106 +++++++++++-------- lib/jido/agent_server/directive_executors.ex | 46 ++++---- lib/jido/agent_server/error_policy.ex | 25 +++-- lib/jido/persist.ex | 84 +++++++++------ lib/jido/scheduler/job.ex | 15 +-- lib/jido/sensor/runtime.ex | 25 +++-- lib/jido/telemetry.ex | 101 +++++++++++------- test/jido/agent_server/telemetry_test.exs | 36 +++---- test/jido/observe/config_test.exs | 22 +++- test/jido/observe/observe_test.exs | 12 +-- test/jido/telemetry_test.exs | 41 ++++--- 14 files changed, 330 insertions(+), 211 deletions(-) diff --git a/config/test.exs b/config/test.exs index 5362d89a..57fc0d8b 100644 --- a/config/test.exs +++ b/config/test.exs @@ -12,7 +12,7 @@ config :git_hooks, auto_install: false # To enable verbose logging during debugging, set LOG_LEVEL env var: # LOG_LEVEL=debug mix test test/my_test.exs # -log_level = System.get_env("LOG_LEVEL", "debug") |> String.to_existing_atom() +log_level = System.get_env("LOG_LEVEL", "warning") |> String.to_existing_atom() config :logger, level: log_level, diff --git a/lib/jido/agent/directive/cron.ex b/lib/jido/agent/directive/cron.ex index 96ee7d65..33825e69 100644 --- a/lib/jido/agent/directive/cron.ex +++ b/lib/jido/agent/directive/cron.ex @@ -70,9 +70,9 @@ defmodule Jido.Agent.Directive.Cron do {:ok, pid} <- AgentServer.start_runtime_cron_job(state, logical_id, runtime_spec), {:ok, persisted_state} <- persist_then_commit_registration(state, pid, logical_id, cron_spec, runtime_spec) do - Logger.debug( + Logger.debug(fn -> "AgentServer #{agent_id} registered cron job #{inspect(logical_id)}: #{cron_expr}" - ) + end) AgentServer.emit_cron_telemetry_event(persisted_state, :register, %{ job_id: logical_id, @@ -82,9 +82,9 @@ defmodule Jido.Agent.Directive.Cron do {:ok, persisted_state} else {:error, reason} -> - Logger.error( + Logger.error(fn -> "AgentServer #{agent_id} failed to register cron job #{inspect(logical_id)}: #{inspect(reason)}" - ) + end) {:ok, handle_failed_registration(state, logical_id, on_failure)} end diff --git a/lib/jido/agent/directive/cron_cancel.ex b/lib/jido/agent/directive/cron_cancel.ex index 0a680f63..3a968f21 100644 --- a/lib/jido/agent/directive/cron_cancel.ex +++ b/lib/jido/agent/directive/cron_cancel.ex @@ -44,7 +44,9 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.CronCancel do new_state = %{runtime_state | cron_specs: proposed_specs} - Logger.debug("AgentServer #{agent_id} cancelled cron job #{inspect(logical_id)}") + Logger.debug(fn -> + "AgentServer #{agent_id} cancelled cron job #{inspect(logical_id)}" + end) emit_telemetry(new_state, :cancel, %{job_id: logical_id}) {:ok, new_state} @@ -53,9 +55,10 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.CronCancel do {_pid, runtime_state} = drop_runtime_job(state, logical_id) new_state = %{runtime_state | cron_specs: proposed_specs} - Logger.error( - "AgentServer #{agent_id} failed to persist cron cancellation for #{inspect(logical_id)}: #{inspect(reason)}" - ) + Logger.error(fn -> + "AgentServer #{agent_id} cancelled cron job #{inspect(logical_id)} " <> + "after checkpoint persistence failure: #{inspect(reason)}" + end) emit_telemetry(state, :persist_failure, %{ job_id: logical_id, @@ -66,9 +69,10 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.CronCancel do {:ok, new_state} {:error, reason} -> - Logger.error( - "AgentServer #{agent_id} failed to persist cron cancellation for #{inspect(logical_id)}: #{inspect(reason)}" - ) + Logger.error(fn -> + "AgentServer #{agent_id} failed to cancel cron job #{inspect(logical_id)} " <> + "because persistence failed: #{inspect(reason)}" + end) emit_telemetry(state, :persist_failure, %{ job_id: logical_id, diff --git a/lib/jido/agent_server.ex b/lib/jido/agent_server.ex index 747ebbf6..93bf566e 100644 --- a/lib/jido/agent_server.ex +++ b/lib/jido/agent_server.ex @@ -796,9 +796,9 @@ defmodule Jido.AgentServer do {:ok, track_cron_job(state, logical_id, pid, runtime_spec: runtime_spec)} {:error, reason} -> - Logger.error( + Logger.error(fn -> "AgentServer #{state.id} failed to register #{runtime_cron_log_label(runtime_spec)} #{inspect(logical_id)}: #{inspect(reason)}" - ) + end) {:error, reason, state} end @@ -978,7 +978,10 @@ defmodule Jido.AgentServer do enq_state {:error, :queue_overflow} -> - Logger.warning("AgentServer #{state.id} queue overflow during strategy init") + Logger.warning(fn -> + "AgentServer #{state.id} queue overflow during strategy init" + end) + state end else @@ -1563,7 +1566,10 @@ defmodule Jido.AgentServer do metadata ) - Logger.warning("AgentServer #{state.id} queue overflow, dropping directives") + Logger.warning(fn -> + "AgentServer #{state.id} queue overflow, dropping directives" + end) + GenServer.reply(from, {:error, :queue_overflow}) maybe_set_idle_status(state) end @@ -1591,10 +1597,10 @@ defmodule Jido.AgentServer do Map.merge(metadata, %{kind: kind, error: reason}) ) - Logger.error( + Logger.error(fn -> "Signal call task failed for #{state.id}: #{inspect(kind)} #{inspect(reason)}\n" <> Exception.format_stacktrace(stacktrace) - ) + end) GenServer.reply(from, {:error, reason}) maybe_set_idle_status(state) @@ -1818,7 +1824,7 @@ defmodule Jido.AgentServer do metadata ) - Logger.warning("AgentServer #{state.id} queue overflow, dropping directives") + Logger.warning(fn -> "AgentServer #{state.id} queue overflow, dropping directives" end) {:error, :queue_overflow, state} end end @@ -1991,9 +1997,9 @@ defmodule Jido.AgentServer do end rescue e -> - Logger.error( + Logger.error(fn -> "Plugin #{inspect(spec.module)} handle_signal crashed: #{Exception.message(e)}" - ) + end) error = Jido.Error.execution_error( @@ -2032,9 +2038,9 @@ defmodule Jido.AgentServer do spec.module.transform_result(action_term, agent_acc, context) rescue e -> - Logger.error( + Logger.error(fn -> "Plugin #{inspect(spec.module)} transform_result crashed: #{Exception.message(e)}" - ) + end) agent_acc end @@ -2084,9 +2090,9 @@ defmodule Jido.AgentServer do end) other -> - Logger.warning( + Logger.warning(fn -> "Invalid child_spec from plugin #{inspect(plugin_module)}: #{inspect(other)}" - ) + end) state end @@ -2112,16 +2118,18 @@ defmodule Jido.AgentServer do %{state | children: new_children} {:error, reason} -> - Logger.error("Failed to start plugin child #{inspect(plugin_module)}: #{inspect(reason)}") + Logger.error(fn -> + "Failed to start plugin child #{inspect(plugin_module)}: #{inspect(reason)}" + end) state end end defp start_plugin_child(%State{} = state, plugin_module, spec) do - Logger.warning( + Logger.warning(fn -> "Plugin child_spec missing :start key for #{inspect(plugin_module)}: #{inspect(spec)}" - ) + end) state end @@ -2194,9 +2202,9 @@ defmodule Jido.AgentServer do %{state | children: new_children} {:error, reason} -> - Logger.warning( + Logger.warning(fn -> "Failed to start subscription sensor #{inspect(sensor_module)} for plugin #{inspect(plugin_module)}: #{inspect(reason)}" - ) + end) state end @@ -2208,7 +2216,7 @@ defmodule Jido.AgentServer do @doc false defp register_plugin_schedules(%State{skip_schedules: true} = state) do - Logger.debug("AgentServer #{state.id} skipping plugin schedules") + Logger.debug(fn -> "AgentServer #{state.id} skipping plugin schedules" end) state end @@ -2241,9 +2249,10 @@ defmodule Jido.AgentServer do %{cron_expression: cron_expr, message: message, timezone: timezone} ) do if Map.has_key?(state.cron_jobs, job_id) do - Logger.warning( - "AgentServer #{state.id} skipping restored cron job #{inspect(job_id)} because declarative/plugin schedule already exists" - ) + Logger.warning(fn -> + "AgentServer #{state.id} skipping restored cron job #{inspect(job_id)} " <> + "because declarative/plugin schedule already exists" + end) new_cron_specs = Map.delete(state.cron_specs, job_id) cleaned_state = %{state | cron_specs: new_cron_specs} @@ -2269,9 +2278,9 @@ defmodule Jido.AgentServer do end defp register_restored_cron_spec(%State{} = state, job_id, invalid_spec) do - Logger.error( + Logger.error(fn -> "AgentServer #{state.id} skipped invalid persisted cron spec #{inspect(job_id)}: #{inspect(invalid_spec)}" - ) + end) state end @@ -2289,9 +2298,9 @@ defmodule Jido.AgentServer do case register_runtime_cron_job(state, job_id, runtime_spec) do {:ok, new_state} -> - Logger.debug( + Logger.debug(fn -> "AgentServer #{state.id} registered schedule #{inspect(job_id)}: #{cron_expr}" - ) + end) new_state @@ -2333,9 +2342,10 @@ defmodule Jido.AgentServer do timer_ref = :erlang.start_timer(delay, self(), {:cron_restart, logical_id}) - Logger.warning( - "AgentServer #{state.id} scheduling cron restart for #{inspect(logical_id)} in #{delay}ms after #{inspect(reason)}" - ) + Logger.warning(fn -> + "AgentServer #{state.id} scheduling cron restart for #{inspect(logical_id)} " <> + "in #{delay}ms after #{inspect(reason)}" + end) emit_cron_telemetry_event(state, :restart_scheduled, %{ job_id: logical_id, @@ -2587,9 +2597,10 @@ defmodule Jido.AgentServer do _ = clear_parent_binding(state.jido, state.id, state.partition) stop_reason = wrap_parent_down_reason(reason) - Logger.info( - "AgentServer #{state.id} stopping: parent died (#{inspect(reason)}), wrapped stop_reason: #{inspect(stop_reason)}" - ) + Logger.info(fn -> + "AgentServer #{state.id} stopping: parent died (#{inspect(reason)}), " <> + "wrapped stop_reason: #{inspect(stop_reason)}" + end) {:stop, stop_reason, State.set_status(state, :stopping)} end @@ -2597,9 +2608,10 @@ defmodule Jido.AgentServer do defp handle_parent_down(%State{on_parent_death: :continue} = state, _pid, reason) do {former_parent, orphaned_state} = transition_to_orphan(state, reason) - Logger.info( - "AgentServer #{state.id} continuing as orphan after parent #{former_parent.id} died (#{inspect(reason)})" - ) + Logger.info(fn -> + "AgentServer #{state.id} continuing as orphan after parent #{former_parent.id} died " <> + "(#{inspect(reason)})" + end) {:noreply, orphaned_state} end @@ -2635,7 +2647,9 @@ defmodule Jido.AgentServer do {tag, state} = State.remove_child_by_pid(state, pid) if tag do - Logger.debug("AgentServer #{state.id} child #{inspect(tag)} exited: #{inspect(reason)}") + Logger.debug(fn -> + "AgentServer #{state.id} child #{inspect(tag)} exited: #{inspect(reason)}" + end) signal = ChildExit.new!( @@ -2719,9 +2733,9 @@ defmodule Jido.AgentServer do state {:error, reason} -> - Logger.warning( + Logger.warning(fn -> "AgentServer #{state.id} failed to persist parent binding: #{inspect(reason)}" - ) + end) state end @@ -2821,16 +2835,18 @@ defmodule Jido.AgentServer do when reason in [:normal, :completed, :ok, :done, :success] do directive_type = directive.__struct__ |> Module.split() |> List.last() - Logger.warning(""" - AgentServer #{state.id} received {:stop, #{inspect(reason)}, ...} from directive #{directive_type}. + Logger.warning(fn -> + """ + AgentServer #{state.id} received {:stop, #{inspect(reason)}, ...} from directive #{directive_type}. - This is a HARD STOP: pending directives and async work will be lost, and on_after_cmd/3 will NOT run. + This is a HARD STOP: pending directives and async work will be lost, and on_after_cmd/3 will NOT run. - For normal completion, set state.status to :completed/:failed instead and avoid returning {:stop, ...}. - External code should poll AgentServer.state/1 and check status, not rely on process death. + For normal completion, set state.status to :completed/:failed instead and avoid returning {:stop, ...}. + External code should poll AgentServer.state/1 and check status, not rely on process death. - {:stop, ...} should only be used for abnormal/framework-level termination. - """) + {:stop, ...} should only be used for abnormal/framework-level termination. + """ + end) end defp warn_if_normal_stop(_reason, _directive, _state), do: :ok diff --git a/lib/jido/agent_server/directive_executors.ex b/lib/jido/agent_server/directive_executors.ex index 6eb42fbf..4314770e 100644 --- a/lib/jido/agent_server/directive_executors.ex +++ b/lib/jido/agent_server/directive_executors.ex @@ -32,7 +32,7 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.Emit do Jido.Signal.Dispatch.dispatch(traced_signal, cfg) end) else - Logger.warning("Jido.Signal.Dispatch not available, skipping emit") + Logger.warning(fn -> "Jido.Signal.Dispatch not available, skipping emit" end) end end end @@ -90,7 +90,7 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.RunInstruction {:ok, state} {:error, :queue_overflow} -> - Logger.warning("AgentServer #{state.id} queue overflow, dropping directives") + Logger.warning(fn -> "AgentServer #{state.id} queue overflow, dropping directives" end) {:ok, state} end end @@ -146,15 +146,21 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.Spawn do case result do {:ok, pid} -> - Logger.debug("Spawned child process #{inspect(pid)} with tag #{inspect(tag)}") + Logger.debug(fn -> + "Spawned child process #{inspect(pid)} with tag #{inspect(tag)}" + end) + {:ok, state} {:ok, pid, _info} -> - Logger.debug("Spawned child process #{inspect(pid)} with tag #{inspect(tag)}") + Logger.debug(fn -> + "Spawned child process #{inspect(pid)} with tag #{inspect(tag)}" + end) + {:ok, state} {:error, reason} -> - Logger.error("Failed to spawn child: #{inspect(reason)}") + Logger.error(fn -> "Failed to spawn child: #{inspect(reason)}" end) {:ok, state} :ignored -> @@ -216,7 +222,10 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.SpawnAgent do spawn_child(state, agent, tag, opts, meta, restart) else {:error, reason} -> - Logger.error("AgentServer #{state.id} failed to spawn child: #{reason}") + Logger.error(fn -> + "AgentServer #{state.id} failed to spawn child: #{inspect(reason)}" + end) + {:ok, state} end end @@ -271,26 +280,26 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.SpawnAgent do new_state = State.add_child(state, tag, child_info) - Logger.debug( + Logger.debug(fn -> "AgentServer #{state.id} spawned child #{child_id} with tag #{inspect(tag)}" - ) + end) {:ok, new_state} {:error, reason} -> _ = DynamicSupervisor.terminate_child(supervisor, pid) - Logger.error( + Logger.error(fn -> "AgentServer #{state.id} failed to persist relationship for child #{child_id}: #{inspect(reason)}" - ) + end) {:ok, state} end {:error, reason} -> - Logger.error( + Logger.error(fn -> "AgentServer #{state.id} failed to spawn child with restart #{inspect(restart)}: #{inspect(reason)}" - ) + end) {:ok, state} end @@ -341,16 +350,17 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.AdoptChild do meta: meta }) - Logger.debug( + Logger.debug(fn -> "AgentServer #{state.id} adopted child #{child_runtime.id} with tag #{inspect(tag)}" - ) + end) {:ok, State.add_child(state, tag, child_info)} else {:error, reason} -> - Logger.warning( - "AgentServer #{state.id} failed to adopt child #{inspect(child)} with tag #{inspect(tag)}: #{inspect(reason)}" - ) + Logger.warning(fn -> + "AgentServer #{state.id} failed to adopt child #{inspect(child)} " <> + "with tag #{inspect(tag)}: #{inspect(reason)}" + end) {:ok, state} end @@ -417,7 +427,7 @@ defimpl Jido.AgentServer.DirectiveExec, for: Any do require Logger def exec(directive, _input_signal, state) do - Logger.debug("Ignoring unknown directive: #{inspect(directive.__struct__)}") + Logger.debug(fn -> "Ignoring unknown directive: #{inspect(directive.__struct__)}" end) {:ok, state} end end diff --git a/lib/jido/agent_server/error_policy.ex b/lib/jido/agent_server/error_policy.ex index 03993117..f0d10a60 100644 --- a/lib/jido/agent_server/error_policy.ex +++ b/lib/jido/agent_server/error_policy.ex @@ -29,7 +29,7 @@ defmodule Jido.AgentServer.ErrorPolicy do :stop_on_error -> log_error(error, context, state) - Logger.error("Agent #{state.id} stopping due to error policy") + Logger.error(fn -> "Agent #{state.id} stopping due to error policy" end) {:stop, {:agent_error, error}, state} {:emit_signal, dispatch_cfg} -> @@ -50,9 +50,11 @@ defmodule Jido.AgentServer.ErrorPolicy do defp log_error(error, context, state) do message = extract_message(error) - context_str = if context, do: " [#{context}]", else: "" - Logger.error("Agent #{state.id}#{context_str}: #{message}#{details_suffix(error)}") + Logger.error(fn -> + context_str = if context, do: " [#{inspect(context)}]", else: "" + "Agent #{state.id}#{context_str}: #{message}#{details_suffix(error)}" + end) end defp extract_message(%{message: message}) when is_binary(message), do: message @@ -80,7 +82,7 @@ defmodule Jido.AgentServer.ErrorPolicy do SignalDispatch.dispatch(signal, dispatch_cfg) end) else - Logger.warning("Jido.Signal.Dispatch not available, skipping error signal emit") + Logger.warning(fn -> "Jido.Signal.Dispatch not available, skipping error signal emit" end) end end @@ -102,12 +104,12 @@ defmodule Jido.AgentServer.ErrorPolicy do if count >= max do log_error(error, context, state) - Logger.error("Agent #{state.id} exceeded max errors (#{count}/#{max}), stopping") + Logger.error(fn -> "Agent #{state.id} exceeded max errors (#{count}/#{max}), stopping" end) {:stop, {:max_errors_exceeded, count}, state} else - Logger.warning( + Logger.warning(fn -> "Agent #{state.id} error #{count}/#{max}: #{extract_message(error)}#{details_suffix(error)}" - ) + end) {:ok, state} end @@ -125,16 +127,19 @@ defmodule Jido.AgentServer.ErrorPolicy do {:stop, reason, new_state} other -> - Logger.error("Custom error policy returned invalid result: #{inspect(other)}") + Logger.error(fn -> + "Custom error policy returned invalid result: #{inspect(other)}" + end) + {:ok, state} end rescue e -> - Logger.error("Custom error policy crashed: #{Exception.message(e)}") + Logger.error(fn -> "Custom error policy crashed: #{Exception.message(e)}" end) {:ok, state} catch kind, reason -> - Logger.error("Custom error policy failed: #{kind} - #{inspect(reason)}") + Logger.error(fn -> "Custom error policy failed: #{kind} - #{inspect(reason)}" end) {:ok, state} end end diff --git a/lib/jido/persist.ex b/lib/jido/persist.ex index dddeac97..1158319d 100644 --- a/lib/jido/persist.ex +++ b/lib/jido/persist.ex @@ -166,19 +166,24 @@ defmodule Jido.Persist do defp do_hibernate(adapter, opts, agent_module, key, agent) do thread = get_thread(agent) - Logger.debug("Persist.hibernate starting for #{inspect(agent_module)} key=#{inspect(key)}") + Logger.debug(fn -> + "Persist.hibernate starting for #{inspect(agent_module)} key=#{inspect(key)}" + end) with :ok <- flush_journal(adapter, opts, thread), {:ok, checkpoint} <- create_checkpoint(agent_module, agent, thread), checkpoint_key <- make_checkpoint_key(agent_module, key), :ok <- adapter.put_checkpoint(checkpoint_key, checkpoint, opts) do - Logger.debug("Persist.hibernate completed for #{inspect(agent_module)} key=#{inspect(key)}") + Logger.debug(fn -> + "Persist.hibernate completed for #{inspect(agent_module)} key=#{inspect(key)}" + end) + :ok else {:error, reason} = error -> - Logger.error( + Logger.error(fn -> "Persist.hibernate failed for #{inspect(agent_module)} key=#{inspect(key)}: #{inspect(reason)}" - ) + end) error end @@ -188,20 +193,25 @@ defmodule Jido.Persist do defp do_thaw(adapter, opts, agent_module, key) do checkpoint_key = make_checkpoint_key(agent_module, key) - Logger.debug("Persist.thaw starting for #{inspect(agent_module)} key=#{inspect(key)}") + Logger.debug(fn -> + "Persist.thaw starting for #{inspect(agent_module)} key=#{inspect(key)}" + end) case Jido.Storage.fetch_checkpoint(adapter, checkpoint_key, opts) do {:ok, checkpoint} -> restore_from_checkpoint(adapter, opts, agent_module, checkpoint) {:error, :not_found} -> - Logger.debug("Persist.thaw: checkpoint not found for #{inspect(checkpoint_key)}") + Logger.debug(fn -> + "Persist.thaw: checkpoint not found for #{inspect(checkpoint_key)}" + end) + {:error, :not_found} {:error, reason} = error -> - Logger.error( + Logger.error(fn -> "Persist.thaw failed to get checkpoint for #{inspect(checkpoint_key)}: #{inspect(reason)}" - ) + end) error end @@ -226,9 +236,9 @@ defmodule Jido.Persist do if thread.rev == entry_count do :ok else - Logger.error( + Logger.error(fn -> "Persist: invalid local thread revision for #{thread.id}: rev=#{thread.rev}, entries=#{entry_count}" - ) + end) {:error, :invalid_thread_revision} end @@ -257,21 +267,24 @@ defmodule Jido.Persist do cond do stored_rev > local_rev -> - Logger.error( + Logger.error(fn -> "Persist: thread rev regression for #{thread.id}: local_rev=#{local_rev}, stored_rev=#{stored_rev}" - ) + end) {:error, :thread_rev_regression} stored_rev > entry_count -> - Logger.error( + Logger.error(fn -> "Persist: thread history truncated for #{thread.id}: entry_count=#{entry_count}, stored_rev=#{stored_rev}" - ) + end) {:error, :thread_history_truncated} stored_rev == local_rev -> - Logger.debug("Persist: thread #{thread.id} already persisted at rev=#{stored_rev}") + Logger.debug(fn -> + "Persist: thread #{thread.id} already persisted at rev=#{stored_rev}" + end) + :ok true -> @@ -282,9 +295,9 @@ defmodule Jido.Persist do |> maybe_put_thread_metadata(stored_rev, thread.metadata) |> Kernel.++(opts) - Logger.debug( + Logger.debug(fn -> "Persist: flushing #{length(missing_entries)} new entries for thread #{thread.id} from rev=#{stored_rev}" - ) + end) case adapter.append_thread(thread.id, missing_entries, append_opts) do {:ok, _updated_thread} -> @@ -294,9 +307,9 @@ defmodule Jido.Persist do handle_thread_append_conflict(adapter, opts, thread.id, local_rev) {:error, reason} = error -> - Logger.error( + Logger.error(fn -> "Persist: failed to flush journal for thread #{thread.id}: #{inspect(reason)}" - ) + end) error end @@ -314,23 +327,23 @@ defmodule Jido.Persist do defp handle_thread_append_conflict(adapter, opts, thread_id, local_rev) do case Jido.Storage.fetch_thread(adapter, thread_id, opts) do {:ok, %Thread{rev: stored_rev}} when stored_rev >= local_rev -> - Logger.debug( + Logger.debug(fn -> "Persist: append conflict resolved for #{thread_id}; stored_rev=#{stored_rev} >= local_rev=#{local_rev}" - ) + end) :ok {:ok, %Thread{rev: stored_rev}} -> - Logger.error( + Logger.error(fn -> "Persist: append conflict for #{thread_id}; stored_rev=#{stored_rev}, local_rev=#{local_rev}" - ) + end) {:error, :conflict} {:error, reason} = error -> - Logger.error( + Logger.error(fn -> "Persist: append conflict but failed to reload thread #{thread_id}: #{inspect(reason)}" - ) + end) error end @@ -396,7 +409,11 @@ defmodule Jido.Persist do {:ok, agent} <- restore_agent(agent_module, checkpoint, ctx), {:ok, agent} <- rehydrate_thread(adapter, opts, agent, checkpoint) do agent = attach_scheduler_manifest(agent, checkpoint) - Logger.debug("Persist.thaw completed for #{inspect(agent_module)} id=#{checkpoint.id}") + + Logger.debug(fn -> + "Persist.thaw completed for #{inspect(agent_module)} id=#{checkpoint.id}" + end) + {:ok, agent} end end @@ -431,7 +448,9 @@ defmodule Jido.Persist do defp rehydrate_thread(_adapter, _opts, agent, %{thread: nil}), do: {:ok, agent} defp rehydrate_thread(adapter, opts, agent, %{thread: %{id: thread_id, rev: expected_rev}}) do - Logger.debug("Persist: rehydrating thread #{thread_id} with expected rev=#{expected_rev}") + Logger.debug(fn -> + "Persist: rehydrating thread #{thread_id} with expected rev=#{expected_rev}" + end) case Jido.Storage.fetch_thread(adapter, thread_id, opts) do {:ok, %Thread{rev: ^expected_rev} = thread} -> @@ -439,18 +458,21 @@ defmodule Jido.Persist do {:ok, agent_with_thread} {:ok, %Thread{rev: actual_rev}} -> - Logger.error( + Logger.error(fn -> "Persist: thread rev mismatch for #{thread_id}: expected=#{expected_rev}, actual=#{actual_rev}" - ) + end) {:error, :thread_mismatch} {:error, :not_found} -> - Logger.error("Persist: thread #{thread_id} not found but referenced in checkpoint") + Logger.error(fn -> + "Persist: thread #{thread_id} not found but referenced in checkpoint" + end) + {:error, :missing_thread} {:error, reason} = error -> - Logger.error("Persist: failed to load thread #{thread_id}: #{inspect(reason)}") + Logger.error(fn -> "Persist: failed to load thread #{thread_id}: #{inspect(reason)}" end) error end end diff --git a/lib/jido/scheduler/job.ex b/lib/jido/scheduler/job.ex index f7a28d32..300e3a48 100644 --- a/lib/jido/scheduler/job.ex +++ b/lib/jido/scheduler/job.ex @@ -200,11 +200,11 @@ defmodule Jido.Scheduler.Job do :ok rescue error -> - Logger.error("Scheduler callback raised: #{Exception.message(error)}") + Logger.error(fn -> "Scheduler callback raised: #{Exception.message(error)}" end) :ok catch kind, reason -> - Logger.error("Scheduler callback #{kind}: #{inspect(reason)}") + Logger.error(fn -> "Scheduler callback #{kind}: #{inspect(reason)}" end) :ok end end @@ -234,9 +234,10 @@ defmodule Jido.Scheduler.Job do @spec enter_retry(state(), term()) :: state() defp enter_retry(state, reason) do if not state.retrying? do - Logger.warning( - "Scheduler job entering retry mode for #{inspect(state.cron_expr)} after schedule failure: #{inspect(reason)}" - ) + Logger.warning(fn -> + "Scheduler job entering retry mode for #{inspect(state.cron_expr)} " <> + "after schedule failure: #{inspect(reason)}" + end) end timer_ref = Process.send_after(self(), @retry_schedule, @retry_delay_ms) @@ -246,7 +247,9 @@ defmodule Jido.Scheduler.Job do @spec clear_retry(state(), reference()) :: state() defp clear_retry(state, timer_ref) do if state.retrying? do - Logger.info("Scheduler job recovered schedule resolution for #{inspect(state.cron_expr)}") + Logger.debug(fn -> + "Scheduler job recovered schedule resolution for #{inspect(state.cron_expr)}" + end) end %{state | timer_ref: timer_ref, retrying?: false} diff --git a/lib/jido/sensor/runtime.ex b/lib/jido/sensor/runtime.ex index b160dc0d..5cfb7ac3 100644 --- a/lib/jido/sensor/runtime.ex +++ b/lib/jido/sensor/runtime.ex @@ -157,7 +157,10 @@ defmodule Jido.Sensor.Runtime do @impl GenServer def handle_info(msg, state) do - Logger.debug("Sensor.Runtime #{state.id} received unexpected message: #{inspect(msg)}") + Logger.debug(fn -> + "Sensor.Runtime #{state.id} received unexpected message: #{inspect(msg)}" + end) + {:noreply, state} end @@ -241,14 +244,16 @@ defmodule Jido.Sensor.Runtime do {:noreply, new_state} {:error, reason} -> - Logger.warning("Sensor.Runtime #{state.id} handle_event error: #{inspect(reason)}") + Logger.warning(fn -> + "Sensor.Runtime #{state.id} handle_event error: #{inspect(reason)}" + end) {:noreply, state} other -> - Logger.warning( + Logger.warning(fn -> "Sensor.Runtime #{state.id} handle_event returned invalid result: #{inspect(other)}" - ) + end) {:noreply, state} end @@ -281,7 +286,9 @@ defmodule Jido.Sensor.Runtime do end defp apply_directive(directive, state) do - Logger.warning("Sensor.Runtime #{state.id} ignoring unknown directive: #{inspect(directive)}") + Logger.warning(fn -> + "Sensor.Runtime #{state.id} ignoring unknown directive: #{inspect(directive)}" + end) state end @@ -312,7 +319,9 @@ defmodule Jido.Sensor.Runtime do dispatch_signal_async(signal, agent_ref, state) true -> - Logger.debug("Sensor.Runtime #{state.id} has no agent_ref, signal not delivered") + Logger.debug(fn -> + "Sensor.Runtime #{state.id} has no agent_ref, signal not delivered" + end) end end @@ -322,9 +331,9 @@ defmodule Jido.Sensor.Runtime do dispatch_fun(state).(signal, agent_ref) rescue e -> - Logger.warning( + Logger.warning(fn -> "Sensor.Runtime #{state.id} async dispatch failed: #{Exception.message(e)}" - ) + end) end end diff --git a/lib/jido/telemetry.ex b/lib/jido/telemetry.ex index 247580c8..0c05f82e 100644 --- a/lib/jido/telemetry.ex +++ b/lib/jido/telemetry.ex @@ -255,10 +255,12 @@ defmodule Jido.Telemetry do duration = Map.get(measurements, :duration, 0) Logger.warning( - "[agent.cmd.error] #{format_module(metadata[:agent_module])} " <> - "action=#{Formatter.format_action(metadata[:action])} " <> - "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> - "duration=#{Formatter.format_duration(duration)}", + fn -> + "[agent.cmd.error] #{format_module(metadata[:agent_module])} " <> + "action=#{Formatter.format_action(metadata[:action])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}" + end, agent_id: metadata[:agent_id], trace_id: metadata[:jido_trace_id], span_id: metadata[:jido_span_id], @@ -301,9 +303,11 @@ defmodule Jido.Telemetry do duration = Map.get(measurements, :duration, 0) Logger.warning( - "[strategy.init.error] #{format_module(metadata[:strategy])} " <> - "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> - "duration=#{Formatter.format_duration(duration)}", + fn -> + "[strategy.init.error] #{format_module(metadata[:strategy])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}" + end, agent_id: metadata[:agent_id], trace_id: metadata[:jido_trace_id], stacktrace: metadata[:stacktrace] @@ -344,9 +348,11 @@ defmodule Jido.Telemetry do duration = Map.get(measurements, :duration, 0) Logger.warning( - "[strategy.cmd.error] #{format_module(metadata[:strategy])} " <> - "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> - "duration=#{Formatter.format_duration(duration)}", + fn -> + "[strategy.cmd.error] #{format_module(metadata[:strategy])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}" + end, agent_id: metadata[:agent_id], trace_id: metadata[:jido_trace_id], stacktrace: metadata[:stacktrace] @@ -386,9 +392,11 @@ defmodule Jido.Telemetry do duration = Map.get(measurements, :duration, 0) Logger.warning( - "[strategy.tick.error] #{format_module(metadata[:strategy])} " <> - "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> - "duration=#{Formatter.format_duration(duration)}", + fn -> + "[strategy.tick.error] #{format_module(metadata[:strategy])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}" + end, agent_id: metadata[:agent_id], trace_id: metadata[:jido_trace_id], stacktrace: metadata[:stacktrace] @@ -437,9 +445,11 @@ defmodule Jido.Telemetry do duration = Map.get(measurements, :duration, 0) Logger.warning( - "[signal.error] type=#{Formatter.format_signal_type(metadata[:signal_type])} " <> - "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> - "duration=#{Formatter.format_duration(duration)}", + fn -> + "[signal.error] type=#{Formatter.format_signal_type(metadata[:signal_type])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}" + end, agent_id: metadata[:agent_id], trace_id: metadata[:jido_trace_id], span_id: metadata[:jido_span_id], @@ -488,9 +498,11 @@ defmodule Jido.Telemetry do duration = Map.get(measurements, :duration, 0) Logger.warning( - "[directive.error] type=#{metadata[:directive_type]} " <> - "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> - "duration=#{Formatter.format_duration(duration)}", + fn -> + "[directive.error] type=#{metadata[:directive_type]} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}" + end, agent_id: metadata[:agent_id], trace_id: metadata[:jido_trace_id], span_id: metadata[:jido_span_id], @@ -500,25 +512,26 @@ defmodule Jido.Telemetry do def handle_event([:jido, :agent_server, :queue, :overflow], measurements, metadata, _config) do Logger.warning( - "[queue.overflow] signal_type=#{Formatter.format_signal_type(metadata[:signal_type])} " <> - "queue_size=#{measurements[:queue_size]}", + fn -> + "[queue.overflow] signal_type=#{Formatter.format_signal_type(metadata[:signal_type])} " <> + "queue_size=#{measurements[:queue_size]}" + end, agent_id: metadata[:agent_id], trace_id: metadata[:jido_trace_id] ) end def handle_event([:jido, :agent_server, :cron, :register], _measurements, metadata, _config) do - Logger.debug( - "[cron.register] job_id=#{inspect(metadata[:job_id])} cron=#{inspect(metadata[:cron_expression])}", - agent_id: metadata[:agent_id] - ) + maybe_log_cron_debug(metadata, fn -> + "[cron.register] job_id=#{inspect(metadata[:job_id])} " <> + "cron=#{inspect(metadata[:cron_expression])}" + end) end def handle_event([:jido, :agent_server, :cron, :cancel], _measurements, metadata, _config) do - Logger.debug( - "[cron.cancel] job_id=#{inspect(metadata[:job_id])}", - agent_id: metadata[:agent_id] - ) + maybe_log_cron_debug(metadata, fn -> + "[cron.cancel] job_id=#{inspect(metadata[:job_id])}" + end) end def handle_event( @@ -527,10 +540,11 @@ defmodule Jido.Telemetry do metadata, _config ) do - Logger.debug( - "[cron.restart_scheduled] job_id=#{inspect(metadata[:job_id])} reason=#{Formatter.safe_inspect(metadata[:reason], 120)} delay_ms=#{inspect(metadata[:delay_ms])}", - agent_id: metadata[:agent_id] - ) + maybe_log_cron_debug(metadata, fn -> + "[cron.restart_scheduled] job_id=#{inspect(metadata[:job_id])} " <> + "reason=#{Formatter.safe_inspect(metadata[:reason], 120)} " <> + "delay_ms=#{inspect(metadata[:delay_ms])}" + end) end def handle_event( @@ -539,10 +553,10 @@ defmodule Jido.Telemetry do metadata, _config ) do - Logger.debug( - "[cron.restart_succeeded] job_id=#{inspect(metadata[:job_id])} cron=#{inspect(metadata[:cron_expression])}", - agent_id: metadata[:agent_id] - ) + maybe_log_cron_debug(metadata, fn -> + "[cron.restart_succeeded] job_id=#{inspect(metadata[:job_id])} " <> + "cron=#{inspect(metadata[:cron_expression])}" + end) end def handle_event( @@ -552,7 +566,10 @@ defmodule Jido.Telemetry do _config ) do Logger.warning( - "[cron.persist_failure] job_id=#{inspect(metadata[:job_id])} reason=#{Formatter.safe_inspect(metadata[:reason], 200)}", + fn -> + "[cron.persist_failure] job_id=#{inspect(metadata[:job_id])} " <> + "reason=#{Formatter.safe_inspect(metadata[:reason], 200)}" + end, agent_id: metadata[:agent_id] ) end @@ -636,6 +653,14 @@ defmodule Jido.Telemetry do is_slow or has_directives end + defp maybe_log_cron_debug(metadata, message_fun) when is_function(message_fun, 0) do + if ObserveConfig.debug_enabled?(metadata[:jido_instance]) do + Logger.debug(message_fun, agent_id: metadata[:agent_id]) + end + + :ok + end + defp format_module(nil), do: "unknown" defp format_module(module) when is_atom(module) do diff --git a/test/jido/agent_server/telemetry_test.exs b/test/jido/agent_server/telemetry_test.exs index 61450a6d..beb9e491 100644 --- a/test/jido/agent_server/telemetry_test.exs +++ b/test/jido/agent_server/telemetry_test.exs @@ -1,11 +1,10 @@ defmodule JidoTest.AgentServer.TelemetryTest do use JidoTest.Case, async: false - import ExUnit.CaptureLog - alias Jido.Agent.Directive alias Jido.AgentServer alias Jido.Debug + alias Jido.Observe.Config, as: ObserveConfig alias Jido.Signal alias JidoTest.TestActions @@ -132,41 +131,36 @@ defmodule JidoTest.AgentServer.TelemetryTest do end end - describe "action logging integration" do - test "suppresses jido_action start logs when args are not full", %{jido: jido} do + describe "action execution observability integration" do + test "suppresses jido_action telemetry when args are not full", %{jido: jido} do {:ok, pid} = AgentServer.start_link(agent: TelemetryAgent, id: "telemetry-log-default", jido: jido) - signal = Signal.new!("increment", %{}, source: "/test") - - log = - capture_log(fn -> - assert {:ok, _agent} = AgentServer.call(pid, signal) - end) + exec_opts = ObserveConfig.action_exec_opts(jido, []) + assert Keyword.get(exec_opts, :log_level) == :warning + assert Keyword.get(exec_opts, :telemetry) == :silent - refute log =~ "Executing JidoTest.TestActions.IncrementAction" - refute log =~ "with params:" + signal = Signal.new!("increment", %{}, source: "/test") + assert {:ok, _agent} = AgentServer.call(pid, signal) refute_receive {:action_telemetry_event, [:jido, :action, :start], _, _}, 50 + refute_receive {:action_telemetry_event, [:jido, :action, :stop], _, _}, 50 GenServer.stop(pid) end - test "enables verbose jido_action logs when instance debug is verbose", %{jido: jido} do + test "enables full jido_action telemetry when instance debug is verbose", %{jido: jido} do Debug.enable(jido, :verbose) on_exit(fn -> Debug.disable(jido) end) {:ok, pid} = AgentServer.start_link(agent: TelemetryAgent, id: "telemetry-log-verbose", jido: jido) - signal = Signal.new!("increment", %{}, source: "/test") - - log = - capture_log(fn -> - assert {:ok, _agent} = AgentServer.call(pid, signal) - end) + exec_opts = ObserveConfig.action_exec_opts(jido, []) + assert Keyword.get(exec_opts, :log_level) == :debug + assert Keyword.get(exec_opts, :telemetry) == :full - assert log =~ "Executing JidoTest.TestActions.IncrementAction" - assert log =~ "with params:" + signal = Signal.new!("increment", %{}, source: "/test") + assert {:ok, _agent} = AgentServer.call(pid, signal) assert_receive {:action_telemetry_event, [:jido, :action, :start], _, %{action: JidoTest.TestActions.IncrementAction}} diff --git a/test/jido/observe/config_test.exs b/test/jido/observe/config_test.exs index f9126331..0d7ec35a 100644 --- a/test/jido/observe/config_test.exs +++ b/test/jido/observe/config_test.exs @@ -220,6 +220,19 @@ defmodule JidoTest.Observe.ConfigTest do assert Keyword.get(opts, :telemetry) == :silent assert Keyword.get(opts, :timeout) == 10 end + + test "honors verbose instance override when deriving exec opts" do + Application.put_env(:jido, :telemetry, log_level: :warning, log_args: :none) + Debug.enable(@test_instance, :verbose) + + opts = + Config.action_exec_opts(@test_instance, __jido_instance__: @test_instance, timeout: 10) + + refute Keyword.has_key?(opts, :__jido_instance__) + assert Keyword.get(opts, :log_level) == :debug + assert Keyword.get(opts, :telemetry) == :full + assert Keyword.get(opts, :timeout) == 10 + end end describe "observe_log_level/1" do @@ -334,13 +347,18 @@ defmodule JidoTest.Observe.ConfigTest do refute Config.level_enabled?(nil, :debug) end - test "trace is not enabled at debug level" do + test "trace is not enabled at info level" do refute Config.level_enabled?(nil, :trace) end - test "info is enabled at debug level" do + test "info is enabled at the default info level" do assert Config.level_enabled?(nil, :info) end + + test "warning is enabled when configured log level is warning" do + Application.put_env(:jido, :telemetry, log_level: :warning) + assert Config.level_enabled?(nil, :warning) + end end describe "interesting_signal_type?/2" do diff --git a/test/jido/observe/observe_test.exs b/test/jido/observe/observe_test.exs index 143a3877..e3161557 100644 --- a/test/jido/observe/observe_test.exs +++ b/test/jido/observe/observe_test.exs @@ -413,7 +413,7 @@ defmodule JidoTest.ObserveTest do Application.put_env(:jido, :observability, log_level: :debug) log = - capture_log(fn -> + capture_log([level: :info], fn -> Observe.log(:info, "test message", key: "value") end) @@ -474,7 +474,7 @@ defmodule JidoTest.ObserveTest do Application.put_env(:jido, :observability, log_level: :info) log = - capture_log(fn -> + capture_log([level: :info], fn -> Log.log(:info, "info message") end) @@ -485,7 +485,7 @@ defmodule JidoTest.ObserveTest do Application.put_env(:jido, :observability, log_level: :info) log = - capture_log(fn -> + capture_log([level: :warning], fn -> Log.log(:warning, "warning message") end) @@ -496,7 +496,7 @@ defmodule JidoTest.ObserveTest do Application.put_env(:jido, :observability, log_level: :warning) log = - capture_log(fn -> + capture_log([level: :debug], fn -> Log.log(:debug, "debug message") Log.log(:info, "info message") end) @@ -509,7 +509,7 @@ defmodule JidoTest.ObserveTest do Application.put_env(:jido, :observability, log_level: :debug) log = - capture_log(fn -> + capture_log([level: :info], fn -> Log.log(:info, "with metadata", agent_id: "agent-123") end) @@ -520,7 +520,7 @@ defmodule JidoTest.ObserveTest do Application.put_env(:jido, :observability, log_level: :debug) log = - capture_log(fn -> + capture_log([level: :debug], fn -> Log.log(:debug, "debug level message") end) diff --git a/test/jido/telemetry_test.exs b/test/jido/telemetry_test.exs index e52c800b..80cf8d93 100644 --- a/test/jido/telemetry_test.exs +++ b/test/jido/telemetry_test.exs @@ -39,6 +39,17 @@ defmodule JidoTest.TelemetryTest do end end + defp with_logger_level(level, fun) when is_atom(level) and is_function(fun, 0) do + previous_level = Logger.level() + + try do + Logger.configure(level: level) + fun.() + after + Logger.configure(level: previous_level) + end + end + describe "setup/0" do test "attaches telemetry handlers idempotently" do assert :ok = Telemetry.setup() @@ -369,7 +380,7 @@ defmodule JidoTest.TelemetryTest do ) end - test "does not emit signal summary logs at the default info level" do + test "does not emit signal summary logs at the default warning level" do log = with_telemetry_env(:delete, fn -> capture_log(fn -> @@ -394,19 +405,21 @@ defmodule JidoTest.TelemetryTest do test "includes directive type summary in signal logs when debug logging is enabled" do log = with_telemetry_env([log_level: :debug], fn -> - capture_log(fn -> - assert :ok = - Telemetry.handle_event( - [:jido, :agent_server, :signal, :stop], - %{duration: 1_000}, - %{ - agent_id: "test", - signal_type: "test.signal", - directive_count: 2, - directive_types: %{"Emit" => 1, "Schedule" => 1} - }, - nil - ) + with_logger_level(:debug, fn -> + capture_log(fn -> + assert :ok = + Telemetry.handle_event( + [:jido, :agent_server, :signal, :stop], + %{duration: 1_000}, + %{ + agent_id: "test", + signal_type: "test.signal", + directive_count: 2, + directive_types: %{"Emit" => 1, "Schedule" => 1} + }, + nil + ) + end) end) end) From ca0d9f1135276e9cc3daf051182c7f6020323af5 Mon Sep 17 00:00:00 2001 From: nshkrdotcom <127063941+nshkrdotcom@users.noreply.github.com> Date: Fri, 3 Apr 2026 11:50:20 -1000 Subject: [PATCH 2/5] test(scheduler): avoid infinite receive warning --- test/jido/scheduler_test.exs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/jido/scheduler_test.exs b/test/jido/scheduler_test.exs index 5b7e4c47..746d2810 100644 --- a/test/jido/scheduler_test.exs +++ b/test/jido/scheduler_test.exs @@ -150,11 +150,7 @@ defmodule JidoTest.SchedulerTest do Scheduler.run_every( fn -> send(test_pid, {:worker_started, self()}) - - receive do - after - :infinity -> :ok - end + Process.sleep(:infinity) end, "* * * * * * *" ) From b516af89808e2b4d9f19e91c3a1ad77308f4bd6c Mon Sep 17 00:00:00 2001 From: nshkrdotcom <127063941+nshkrdotcom@users.noreply.github.com> Date: Fri, 3 Apr 2026 12:06:38 -1000 Subject: [PATCH 3/5] test: harden suite-load-sensitive async assertions --- test/jido/agent/agent_test.exs | 39 +++++++++++++++----- test/jido/agent_server/agent_server_test.exs | 6 +-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/test/jido/agent/agent_test.exs b/test/jido/agent/agent_test.exs index 3e0e0899..27f94dcd 100644 --- a/test/jido/agent/agent_test.exs +++ b/test/jido/agent/agent_test.exs @@ -36,6 +36,27 @@ defmodule JidoTest.AgentTest do end end + defmodule RetryTrackingSlowAction do + @moduledoc false + use Jido.Action, + name: "retry_tracking_slow", + schema: [ + delay_ms: [type: :integer, default: 100], + label: [type: :atom, required: true] + ] + + def run(%{delay_ms: delay, label: label}, %{test_pid: test_pid}) when is_pid(test_pid) do + send(test_pid, {:retry_tracking_attempt, label}) + Process.sleep(delay) + {:ok, %{processed: true, delay: delay}} + end + + def run(%{delay_ms: delay}, _context) do + Process.sleep(delay) + {:ok, %{processed: true, delay: delay}} + end + end + describe "module definition" do test "defines metadata accessors" do assert TestAgents.Basic.name() == "basic_agent" @@ -284,33 +305,31 @@ defmodule JidoTest.AgentTest do test "passes max_retries option to disable retries" do agent = TestAgents.Basic.new() - - start_no_retry = System.monotonic_time(:millisecond) + test_pid = self() {_updated_no_retry, directives_no_retry} = TestAgents.Basic.cmd( agent, - {TestActions.SlowAction, %{delay_ms: 200}}, + {RetryTrackingSlowAction, %{delay_ms: 200, label: :no_retry}, %{test_pid: test_pid}}, timeout: 10, max_retries: 0 ) - elapsed_no_retry = System.monotonic_time(:millisecond) - start_no_retry - - start_default = System.monotonic_time(:millisecond) + assert_receive {:retry_tracking_attempt, :no_retry}, 1_000 + refute_receive {:retry_tracking_attempt, :no_retry}, 100 {_updated_default, directives_default} = TestAgents.Basic.cmd( agent, - {TestActions.SlowAction, %{delay_ms: 200}}, + {RetryTrackingSlowAction, %{delay_ms: 200, label: :default}, %{test_pid: test_pid}}, timeout: 10 ) - elapsed_default = System.monotonic_time(:millisecond) - start_default - assert [%Jido.Agent.Directive.Error{}] = directives_no_retry assert [%Jido.Agent.Directive.Error{}] = directives_default - assert elapsed_no_retry < elapsed_default + assert_receive {:retry_tracking_attempt, :default}, 1_000 + assert_receive {:retry_tracking_attempt, :default}, 1_000 + refute_receive {:retry_tracking_attempt, :default}, 100 end test "cmd/2 delegates to cmd/3 with empty opts" do diff --git a/test/jido/agent_server/agent_server_test.exs b/test/jido/agent_server/agent_server_test.exs index 58f13b84..ccc5f6e7 100644 --- a/test/jido/agent_server/agent_server_test.exs +++ b/test/jido/agent_server/agent_server_test.exs @@ -799,7 +799,7 @@ defmodule JidoTest.AgentServerTest do use Jido.Action, name: "schedule_atom", schema: [] def run(_params, _context) do - {:ok, %{}, [%Directive.Schedule{delay_ms: 10, message: :timeout}]} + {:ok, %{}, [%Directive.Schedule{delay_ms: 0, message: :timeout}]} end end @@ -833,10 +833,10 @@ defmodule JidoTest.AgentServerTest do signal = Signal.new!("schedule_atom", %{}, source: "/test") {:ok, _agent} = AgentServer.call(pid, signal) - # Wait for scheduled atom message (10ms delay + processing) + # Wait for the wrapped scheduled signal to be processed. state = eventually_state(pid, fn state -> state.agent.state.received == :timeout end, - timeout: 100 + timeout: 500 ) assert state.agent.state.received == :timeout From 7e0e8534cf841fbee7e8fc123b949c82297d72e0 Mon Sep 17 00:00:00 2001 From: nshkrdotcom <127063941+nshkrdotcom@users.noreply.github.com> Date: Fri, 3 Apr 2026 12:32:18 -1000 Subject: [PATCH 4/5] refactor(observe): remove helper logging api layer --- guides/observability.md | 1 - lib/jido/observe.ex | 20 +---- lib/jido/observe/log.ex | 73 --------------- mix.exs | 1 - test/jido/observe/observe_test.exs | 137 ----------------------------- 5 files changed, 2 insertions(+), 230 deletions(-) delete mode 100644 lib/jido/observe/log.ex diff --git a/guides/observability.md b/guides/observability.md index c5d85a1c..87a52ad3 100644 --- a/guides/observability.md +++ b/guides/observability.md @@ -746,7 +746,6 @@ Debug events are no-ops when `:debug_events` is `:off` (production default). - `Jido.Tracing.Context` — Correlation ID propagation - `Jido.Observe.Tracer` — Behaviour for OpenTelemetry integration - `Jido.Observe.NoopTracer` — Default no-op tracer -- `Jido.Observe.Log` — Threshold-based logging ## Further Reading diff --git a/lib/jido/observe.ex b/lib/jido/observe.ex index b0bc3583..219125da 100644 --- a/lib/jido/observe.ex +++ b/lib/jido/observe.ex @@ -2,8 +2,8 @@ defmodule Jido.Observe do @moduledoc """ Unified observability facade for Jido agents. - Wraps `:telemetry` events and `Logger` with a simple API for observing - agent execution, action invocations, and workflow iterations. + Wraps `:telemetry` and tracer callbacks with a simple API for observing agent + execution, action invocations, and workflow iterations. ## Features @@ -11,7 +11,6 @@ defmodule Jido.Observe do - Duration measurement for all spans (nanoseconds) - Automatic correlation ID enrichment from `Jido.Tracing.Context` - Pluggable tracer callbacks via `Jido.Observe.Tracer` - - Threshold-based logging via `Jido.Observe.Log` ## Correlation Tracing Integration @@ -96,7 +95,6 @@ defmodule Jido.Observe do require Logger alias Jido.Observe.Config, as: ObserveConfig - alias Jido.Observe.Log alias Jido.Observe.SpanCtx alias Jido.Tracing.Context, as: TracingContext @@ -242,20 +240,6 @@ defmodule Jido.Observe do :ok end - @doc """ - Conditionally logs a message based on the observability threshold. - - Delegates to `Jido.Observe.Log.log/3`. - - ## Example - - Jido.Observe.log(:debug, "Processing step", agent_id: agent.id) - """ - @spec log(Logger.level(), Logger.message(), keyword()) :: :ok - def log(level, message, metadata \\ []) do - Log.log(level, message, metadata) - end - @doc """ Emits a telemetry event unconditionally. diff --git a/lib/jido/observe/log.ex b/lib/jido/observe/log.ex deleted file mode 100644 index e07c0c3c..00000000 --- a/lib/jido/observe/log.ex +++ /dev/null @@ -1,73 +0,0 @@ -defmodule Jido.Observe.Log do - @moduledoc """ - Centralized log threshold for observability. - - This module provides threshold-based logging for Jido's observability system. - The log threshold can be configured per-environment to control verbosity: - - - `:debug` in development for verbose output - - `:info` or `:warning` in production for minimal noise - - ## Configuration - - # config/config.exs - config :jido, :observability, - log_level: :info - - # config/dev.exs - config :jido, :observability, - log_level: :debug - - ## Usage - - alias Jido.Observe.Log - - # Only logs if threshold allows :debug level - Log.log(:debug, "Processing step", agent_id: agent.id, step: 1) - - # Always logs in most configurations - Log.log(:info, "Agent completed", agent_id: agent.id) - """ - - require Logger - - @type level :: Logger.level() - - @doc """ - Returns the current observability log threshold. - - Reads from application config `:jido, :observability, :log_level`. - Defaults to `:info` if not configured. - """ - @spec threshold() :: level() - def threshold do - Jido.Observe.Config.observe_log_level(nil) - end - - @doc """ - Conditionally logs a message based on the observability threshold. - - The message is logged only if the threshold level allows it. - Uses `Jido.Util.cond_log/4` under the hood. - - ## Parameters - - - `level` - The log level for this message (:debug, :info, :warning, :error) - - `message` - The message to log (string or iodata) - - `metadata` - Keyword list of metadata to include - - ## Examples - - # With threshold at :info, this won't log - Log.log(:debug, "Verbose info", step: 1) - - # With threshold at :info, this will log - Log.log(:info, "Important info", agent_id: "abc") - """ - @spec log(level(), Logger.message(), keyword()) :: :ok - def log(level, message, metadata \\ []) do - instance = Keyword.get(metadata, :jido_instance) - threshold = Jido.Observe.Config.observe_log_level(instance) - Jido.Util.cond_log(threshold, level, message, metadata) - end -end diff --git a/mix.exs b/mix.exs index 94038603..1657eb1a 100644 --- a/mix.exs +++ b/mix.exs @@ -339,7 +339,6 @@ defmodule Jido.MixProject do Observability: [ Jido.Observe, Jido.Observe.Config, - Jido.Observe.Log, Jido.Observe.Tracer, Jido.Observe.NoopTracer, Jido.Observe.SpanCtx, diff --git a/test/jido/observe/observe_test.exs b/test/jido/observe/observe_test.exs index e3161557..6772f752 100644 --- a/test/jido/observe/observe_test.exs +++ b/test/jido/observe/observe_test.exs @@ -4,10 +4,7 @@ defmodule JidoTest.ObserveTest do """ use ExUnit.Case, async: false - import ExUnit.CaptureLog - alias Jido.Observe - alias Jido.Observe.Log alias Jido.Observe.NoopTracer alias Jido.Signal alias Jido.Tracing.Context, as: TraceContext @@ -394,140 +391,6 @@ defmodule JidoTest.ObserveTest do end end - describe "log/3" do - setup do - original_config = Application.get_env(:jido, :observability) - - on_exit(fn -> - if original_config do - Application.put_env(:jido, :observability, original_config) - else - Application.delete_env(:jido, :observability) - end - end) - - :ok - end - - test "delegates to Jido.Observe.Log" do - Application.put_env(:jido, :observability, log_level: :debug) - - log = - capture_log([level: :info], fn -> - Observe.log(:info, "test message", key: "value") - end) - - assert log =~ "test message" - end - end - - describe "Jido.Observe.Log.threshold/0" do - setup do - original_config = Application.get_env(:jido, :observability) - - on_exit(fn -> - if original_config do - Application.put_env(:jido, :observability, original_config) - else - Application.delete_env(:jido, :observability) - end - end) - - :ok - end - - test "returns configured value" do - Application.put_env(:jido, :observability, log_level: :warning) - assert Log.threshold() == :warning - - Application.put_env(:jido, :observability, log_level: :debug) - assert Log.threshold() == :debug - end - - test "defaults to :info when not configured" do - Application.delete_env(:jido, :observability) - assert Log.threshold() == :info - end - - test "defaults to :info when log_level key is missing" do - Application.put_env(:jido, :observability, tracer: SomeTracer) - assert Log.threshold() == :info - end - end - - describe "Jido.Observe.Log.log/3" do - setup do - original_config = Application.get_env(:jido, :observability) - - on_exit(fn -> - if original_config do - Application.put_env(:jido, :observability, original_config) - else - Application.delete_env(:jido, :observability) - end - end) - - :ok - end - - test "logs when message level meets threshold" do - Application.put_env(:jido, :observability, log_level: :info) - - log = - capture_log([level: :info], fn -> - Log.log(:info, "info message") - end) - - assert log =~ "info message" - end - - test "logs when message level exceeds threshold" do - Application.put_env(:jido, :observability, log_level: :info) - - log = - capture_log([level: :warning], fn -> - Log.log(:warning, "warning message") - end) - - assert log =~ "warning message" - end - - test "does not log when message level is below threshold" do - Application.put_env(:jido, :observability, log_level: :warning) - - log = - capture_log([level: :debug], fn -> - Log.log(:debug, "debug message") - Log.log(:info, "info message") - end) - - refute log =~ "debug message" - refute log =~ "info message" - end - - test "includes metadata in log output" do - Application.put_env(:jido, :observability, log_level: :debug) - - log = - capture_log([level: :info], fn -> - Log.log(:info, "with metadata", agent_id: "agent-123") - end) - - assert log =~ "with metadata" - end - - test "logs at debug level when threshold is debug" do - Application.put_env(:jido, :observability, log_level: :debug) - - log = - capture_log([level: :debug], fn -> - Log.log(:debug, "debug level message") - end) - - assert log =~ "debug level message" - end - end - describe "Jido.Observe.NoopTracer" do test "span_start/2 returns nil" do result = NoopTracer.span_start([:jido, :test], %{key: "value"}) From 34dba59e745ffcfb1048ea795c9a997b3901b764 Mon Sep 17 00:00:00 2001 From: nshkrdotcom <127063941+nshkrdotcom@users.noreply.github.com> Date: Sat, 4 Apr 2026 10:32:22 -1000 Subject: [PATCH 5/5] Add Jido.Observe.Log for threshold-based observability logging Introduced Jido.Observe.Log to centralize threshold-based logging logic. Added Jido.Observe.log/3 as a facade to maintain compatibility with the existing observability API. The implementation respects both the global Jido observability configuration and per-instance overrides via Jido.Debug. Includes comprehensive test coverage for threshold enforcement and instance metadata handling. --- lib/jido/observe.ex | 12 +++ lib/jido/observe/log.ex | 32 ++++++++ test/jido/observe/log_compatibility_test.exs | 83 ++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 lib/jido/observe/log.ex create mode 100644 test/jido/observe/log_compatibility_test.exs diff --git a/lib/jido/observe.ex b/lib/jido/observe.ex index 219125da..8c436440 100644 --- a/lib/jido/observe.ex +++ b/lib/jido/observe.ex @@ -11,6 +11,7 @@ defmodule Jido.Observe do - Duration measurement for all spans (nanoseconds) - Automatic correlation ID enrichment from `Jido.Tracing.Context` - Pluggable tracer callbacks via `Jido.Observe.Tracer` + - Threshold-based logging compatibility via `Jido.Observe.Log` ## Correlation Tracing Integration @@ -95,6 +96,7 @@ defmodule Jido.Observe do require Logger alias Jido.Observe.Config, as: ObserveConfig + alias Jido.Observe.Log alias Jido.Observe.SpanCtx alias Jido.Tracing.Context, as: TracingContext @@ -240,6 +242,16 @@ defmodule Jido.Observe do :ok end + @doc """ + Conditionally logs a message based on the observability threshold. + + Delegates to `Jido.Observe.Log.log/3`. + """ + @spec log(Logger.level(), Logger.message(), keyword()) :: :ok + def log(level, message, metadata \\ []) do + Log.log(level, message, metadata) + end + @doc """ Emits a telemetry event unconditionally. diff --git a/lib/jido/observe/log.ex b/lib/jido/observe/log.ex new file mode 100644 index 00000000..c49edb1b --- /dev/null +++ b/lib/jido/observe/log.ex @@ -0,0 +1,32 @@ +defmodule Jido.Observe.Log do + @moduledoc """ + Threshold-based observability logging compatibility shim. + + This module preserves the historical public logging API while the runtime + continues to honor `config :jido, :observability, log_level: ...` and + per-instance `Jido.Debug` overrides. + """ + + @type level :: Logger.level() + + @doc """ + Returns the current observability log threshold. + """ + @spec threshold() :: level() + def threshold do + Jido.Observe.Config.observe_log_level(nil) + end + + @doc """ + Conditionally logs a message based on the observability threshold. + + When `:jido_instance` metadata is present, per-instance overrides from + `Jido.Debug` are honored. + """ + @spec log(level(), Logger.message(), keyword()) :: :ok + def log(level, message, metadata \\ []) do + instance = Keyword.get(metadata, :jido_instance) + threshold = Jido.Observe.Config.observe_log_level(instance) + Jido.Util.cond_log(threshold, level, message, metadata) + end +end diff --git a/test/jido/observe/log_compatibility_test.exs b/test/jido/observe/log_compatibility_test.exs new file mode 100644 index 00000000..aab52f5c --- /dev/null +++ b/test/jido/observe/log_compatibility_test.exs @@ -0,0 +1,83 @@ +defmodule JidoTest.Observe.LogCompatibilityTest do + use ExUnit.Case, async: false + + import ExUnit.CaptureLog + + alias Jido.Debug + alias Jido.Observe + alias Jido.Observe.Log + + @test_instance :"jido_observe_log_compat_#{System.unique_integer([:positive])}" + + setup do + previous_level = Logger.level() + Logger.configure(level: :debug) + + original_observability = Application.get_env(:jido, :observability) + Debug.reset(@test_instance) + + on_exit(fn -> + Logger.configure(level: previous_level) + + if original_observability do + Application.put_env(:jido, :observability, original_observability) + else + Application.delete_env(:jido, :observability) + end + + Debug.reset(@test_instance) + end) + + :ok + end + + describe "Jido.Observe.log/3" do + test "preserves the threshold-based logging facade" do + Application.put_env(:jido, :observability, log_level: :info) + + log = + capture_log(fn -> + Observe.log(:debug, "compat facade suppressed") + Observe.log(:info, "compat facade emitted") + end) + + refute log =~ "compat facade suppressed" + assert log =~ "compat facade emitted" + end + end + + describe "Jido.Observe.Log.threshold/0" do + test "reads the observability log level from config" do + Application.put_env(:jido, :observability, log_level: :warning) + + assert Log.threshold() == :warning + end + end + + describe "Jido.Observe.Log.log/3" do + test "honors the global observability log level" do + Application.put_env(:jido, :observability, log_level: :warning) + + log = + capture_log(fn -> + Log.log(:info, "compat log suppressed") + Log.log(:error, "compat log emitted") + end) + + refute log =~ "compat log suppressed" + assert log =~ "compat log emitted" + end + + test "honors per-instance debug overrides via :jido_instance metadata" do + Application.put_env(:jido, :observability, log_level: :warning) + Debug.enable(@test_instance, :on) + + log = + capture_log(fn -> + Log.log(:debug, "compat debug override emitted", jido_instance: @test_instance) + end) + + assert log =~ "compat debug override emitted" + end + end +end