Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ config :git_hooks, auto_install: false
# To enable verbose logging during debugging, set LOG_LEVEL env var:
# LOG_LEVEL=debug mix test test/my_test.exs
#
log_level = System.get_env("LOG_LEVEL", "debug") |> String.to_existing_atom()
log_level = System.get_env("LOG_LEVEL", "warning") |> String.to_existing_atom()

config :logger,
level: log_level,
Expand Down
1 change: 0 additions & 1 deletion guides/observability.md
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,6 @@ Debug events are no-ops when `:debug_events` is `:off` (production default).
- `Jido.Tracing.Context` — Correlation ID propagation
- `Jido.Observe.Tracer` — Behaviour for OpenTelemetry integration
- `Jido.Observe.NoopTracer` — Default no-op tracer
- `Jido.Observe.Log` — Threshold-based logging

## Further Reading

Expand Down
8 changes: 4 additions & 4 deletions lib/jido/agent/directive/cron.ex
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ defmodule Jido.Agent.Directive.Cron do
{:ok, pid} <- AgentServer.start_runtime_cron_job(state, logical_id, runtime_spec),
{:ok, persisted_state} <-
persist_then_commit_registration(state, pid, logical_id, cron_spec, runtime_spec) do
Logger.debug(
Logger.debug(fn ->
"AgentServer #{agent_id} registered cron job #{inspect(logical_id)}: #{cron_expr}"
)
end)

AgentServer.emit_cron_telemetry_event(persisted_state, :register, %{
job_id: logical_id,
Expand All @@ -82,9 +82,9 @@ defmodule Jido.Agent.Directive.Cron do
{:ok, persisted_state}
else
{:error, reason} ->
Logger.error(
Logger.error(fn ->
"AgentServer #{agent_id} failed to register cron job #{inspect(logical_id)}: #{inspect(reason)}"
)
end)

{:ok, handle_failed_registration(state, logical_id, on_failure)}
end
Expand Down
18 changes: 11 additions & 7 deletions lib/jido/agent/directive/cron_cancel.ex
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.CronCancel do

new_state = %{runtime_state | cron_specs: proposed_specs}

Logger.debug("AgentServer #{agent_id} cancelled cron job #{inspect(logical_id)}")
Logger.debug(fn ->
"AgentServer #{agent_id} cancelled cron job #{inspect(logical_id)}"
end)

emit_telemetry(new_state, :cancel, %{job_id: logical_id})
{:ok, new_state}
Expand All @@ -53,9 +55,10 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.CronCancel do
{_pid, runtime_state} = drop_runtime_job(state, logical_id)
new_state = %{runtime_state | cron_specs: proposed_specs}

Logger.error(
"AgentServer #{agent_id} failed to persist cron cancellation for #{inspect(logical_id)}: #{inspect(reason)}"
)
Logger.error(fn ->
"AgentServer #{agent_id} cancelled cron job #{inspect(logical_id)} " <>
"after checkpoint persistence failure: #{inspect(reason)}"
end)

emit_telemetry(state, :persist_failure, %{
job_id: logical_id,
Expand All @@ -66,9 +69,10 @@ defimpl Jido.AgentServer.DirectiveExec, for: Jido.Agent.Directive.CronCancel do
{:ok, new_state}

{:error, reason} ->
Logger.error(
"AgentServer #{agent_id} failed to persist cron cancellation for #{inspect(logical_id)}: #{inspect(reason)}"
)
Logger.error(fn ->
"AgentServer #{agent_id} failed to cancel cron job #{inspect(logical_id)} " <>
"because persistence failed: #{inspect(reason)}"
end)

emit_telemetry(state, :persist_failure, %{
job_id: logical_id,
Expand Down
106 changes: 61 additions & 45 deletions lib/jido/agent_server.ex
Original file line number Diff line number Diff line change
Expand Up @@ -796,9 +796,9 @@ defmodule Jido.AgentServer do
{:ok, track_cron_job(state, logical_id, pid, runtime_spec: runtime_spec)}

{:error, reason} ->
Logger.error(
Logger.error(fn ->
"AgentServer #{state.id} failed to register #{runtime_cron_log_label(runtime_spec)} #{inspect(logical_id)}: #{inspect(reason)}"
)
end)

{:error, reason, state}
end
Expand Down Expand Up @@ -978,7 +978,10 @@ defmodule Jido.AgentServer do
enq_state

{:error, :queue_overflow} ->
Logger.warning("AgentServer #{state.id} queue overflow during strategy init")
Logger.warning(fn ->
"AgentServer #{state.id} queue overflow during strategy init"
end)

state
end
else
Expand Down Expand Up @@ -1563,7 +1566,10 @@ defmodule Jido.AgentServer do
metadata
)

Logger.warning("AgentServer #{state.id} queue overflow, dropping directives")
Logger.warning(fn ->
"AgentServer #{state.id} queue overflow, dropping directives"
end)

GenServer.reply(from, {:error, :queue_overflow})
maybe_set_idle_status(state)
end
Expand Down Expand Up @@ -1591,10 +1597,10 @@ defmodule Jido.AgentServer do
Map.merge(metadata, %{kind: kind, error: reason})
)

Logger.error(
Logger.error(fn ->
"Signal call task failed for #{state.id}: #{inspect(kind)} #{inspect(reason)}\n" <>
Exception.format_stacktrace(stacktrace)
)
end)

GenServer.reply(from, {:error, reason})
maybe_set_idle_status(state)
Expand Down Expand Up @@ -1818,7 +1824,7 @@ defmodule Jido.AgentServer do
metadata
)

Logger.warning("AgentServer #{state.id} queue overflow, dropping directives")
Logger.warning(fn -> "AgentServer #{state.id} queue overflow, dropping directives" end)
{:error, :queue_overflow, state}
end
end
Expand Down Expand Up @@ -1991,9 +1997,9 @@ defmodule Jido.AgentServer do
end
rescue
e ->
Logger.error(
Logger.error(fn ->
"Plugin #{inspect(spec.module)} handle_signal crashed: #{Exception.message(e)}"
)
end)

error =
Jido.Error.execution_error(
Expand Down Expand Up @@ -2032,9 +2038,9 @@ defmodule Jido.AgentServer do
spec.module.transform_result(action_term, agent_acc, context)
rescue
e ->
Logger.error(
Logger.error(fn ->
"Plugin #{inspect(spec.module)} transform_result crashed: #{Exception.message(e)}"
)
end)

agent_acc
end
Expand Down Expand Up @@ -2084,9 +2090,9 @@ defmodule Jido.AgentServer do
end)

other ->
Logger.warning(
Logger.warning(fn ->
"Invalid child_spec from plugin #{inspect(plugin_module)}: #{inspect(other)}"
)
end)

state
end
Expand All @@ -2112,16 +2118,18 @@ defmodule Jido.AgentServer do
%{state | children: new_children}

{:error, reason} ->
Logger.error("Failed to start plugin child #{inspect(plugin_module)}: #{inspect(reason)}")
Logger.error(fn ->
"Failed to start plugin child #{inspect(plugin_module)}: #{inspect(reason)}"
end)

state
end
end

defp start_plugin_child(%State{} = state, plugin_module, spec) do
Logger.warning(
Logger.warning(fn ->
"Plugin child_spec missing :start key for #{inspect(plugin_module)}: #{inspect(spec)}"
)
end)

state
end
Expand Down Expand Up @@ -2194,9 +2202,9 @@ defmodule Jido.AgentServer do
%{state | children: new_children}

{:error, reason} ->
Logger.warning(
Logger.warning(fn ->
"Failed to start subscription sensor #{inspect(sensor_module)} for plugin #{inspect(plugin_module)}: #{inspect(reason)}"
)
end)

state
end
Expand All @@ -2208,7 +2216,7 @@ defmodule Jido.AgentServer do

@doc false
defp register_plugin_schedules(%State{skip_schedules: true} = state) do
Logger.debug("AgentServer #{state.id} skipping plugin schedules")
Logger.debug(fn -> "AgentServer #{state.id} skipping plugin schedules" end)
state
end

Expand Down Expand Up @@ -2241,9 +2249,10 @@ defmodule Jido.AgentServer do
%{cron_expression: cron_expr, message: message, timezone: timezone}
) do
if Map.has_key?(state.cron_jobs, job_id) do
Logger.warning(
"AgentServer #{state.id} skipping restored cron job #{inspect(job_id)} because declarative/plugin schedule already exists"
)
Logger.warning(fn ->
"AgentServer #{state.id} skipping restored cron job #{inspect(job_id)} " <>
"because declarative/plugin schedule already exists"
end)

new_cron_specs = Map.delete(state.cron_specs, job_id)
cleaned_state = %{state | cron_specs: new_cron_specs}
Expand All @@ -2269,9 +2278,9 @@ defmodule Jido.AgentServer do
end

defp register_restored_cron_spec(%State{} = state, job_id, invalid_spec) do
Logger.error(
Logger.error(fn ->
"AgentServer #{state.id} skipped invalid persisted cron spec #{inspect(job_id)}: #{inspect(invalid_spec)}"
)
end)

state
end
Expand All @@ -2289,9 +2298,9 @@ defmodule Jido.AgentServer do

case register_runtime_cron_job(state, job_id, runtime_spec) do
{:ok, new_state} ->
Logger.debug(
Logger.debug(fn ->
"AgentServer #{state.id} registered schedule #{inspect(job_id)}: #{cron_expr}"
)
end)

new_state

Expand Down Expand Up @@ -2333,9 +2342,10 @@ defmodule Jido.AgentServer do

timer_ref = :erlang.start_timer(delay, self(), {:cron_restart, logical_id})

Logger.warning(
"AgentServer #{state.id} scheduling cron restart for #{inspect(logical_id)} in #{delay}ms after #{inspect(reason)}"
)
Logger.warning(fn ->
"AgentServer #{state.id} scheduling cron restart for #{inspect(logical_id)} " <>
"in #{delay}ms after #{inspect(reason)}"
end)

emit_cron_telemetry_event(state, :restart_scheduled, %{
job_id: logical_id,
Expand Down Expand Up @@ -2587,19 +2597,21 @@ defmodule Jido.AgentServer do
_ = clear_parent_binding(state.jido, state.id, state.partition)
stop_reason = wrap_parent_down_reason(reason)

Logger.info(
"AgentServer #{state.id} stopping: parent died (#{inspect(reason)}), wrapped stop_reason: #{inspect(stop_reason)}"
)
Logger.info(fn ->
"AgentServer #{state.id} stopping: parent died (#{inspect(reason)}), " <>
"wrapped stop_reason: #{inspect(stop_reason)}"
end)

{:stop, stop_reason, State.set_status(state, :stopping)}
end

defp handle_parent_down(%State{on_parent_death: :continue} = state, _pid, reason) do
{former_parent, orphaned_state} = transition_to_orphan(state, reason)

Logger.info(
"AgentServer #{state.id} continuing as orphan after parent #{former_parent.id} died (#{inspect(reason)})"
)
Logger.info(fn ->
"AgentServer #{state.id} continuing as orphan after parent #{former_parent.id} died " <>
"(#{inspect(reason)})"
end)

{:noreply, orphaned_state}
end
Expand Down Expand Up @@ -2635,7 +2647,9 @@ defmodule Jido.AgentServer do
{tag, state} = State.remove_child_by_pid(state, pid)

if tag do
Logger.debug("AgentServer #{state.id} child #{inspect(tag)} exited: #{inspect(reason)}")
Logger.debug(fn ->
"AgentServer #{state.id} child #{inspect(tag)} exited: #{inspect(reason)}"
end)

signal =
ChildExit.new!(
Expand Down Expand Up @@ -2719,9 +2733,9 @@ defmodule Jido.AgentServer do
state

{:error, reason} ->
Logger.warning(
Logger.warning(fn ->
"AgentServer #{state.id} failed to persist parent binding: #{inspect(reason)}"
)
end)

state
end
Expand Down Expand Up @@ -2821,16 +2835,18 @@ defmodule Jido.AgentServer do
when reason in [:normal, :completed, :ok, :done, :success] do
directive_type = directive.__struct__ |> Module.split() |> List.last()

Logger.warning("""
AgentServer #{state.id} received {:stop, #{inspect(reason)}, ...} from directive #{directive_type}.
Logger.warning(fn ->
"""
AgentServer #{state.id} received {:stop, #{inspect(reason)}, ...} from directive #{directive_type}.

This is a HARD STOP: pending directives and async work will be lost, and on_after_cmd/3 will NOT run.
This is a HARD STOP: pending directives and async work will be lost, and on_after_cmd/3 will NOT run.

For normal completion, set state.status to :completed/:failed instead and avoid returning {:stop, ...}.
External code should poll AgentServer.state/1 and check status, not rely on process death.
For normal completion, set state.status to :completed/:failed instead and avoid returning {:stop, ...}.
External code should poll AgentServer.state/1 and check status, not rely on process death.

{:stop, ...} should only be used for abnormal/framework-level termination.
""")
{:stop, ...} should only be used for abnormal/framework-level termination.
"""
end)
end

defp warn_if_normal_stop(_reason, _directive, _state), do: :ok
Expand Down
Loading
Loading