-
Notifications
You must be signed in to change notification settings - Fork 20
fix: Test & CI infrastructure improvements — graceful shutdown, readiness probes, test isolation #744
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
fix: Test & CI infrastructure improvements — graceful shutdown, readiness probes, test isolation #744
Changes from all commits
94ac81f
a52fd1a
56f9818
6548ce4
2ed8aff
f896539
d4681de
cde07e2
32e1004
0c85828
2b404da
5556550
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -136,6 +136,10 @@ enum Domain { | |
| enable_mcp: Option<bool>, | ||
| #[arg(long, action)] | ||
| mcp_port: Option<u16>, | ||
| /// Write the executor PID to this file on startup (removed on clean shutdown). | ||
| /// Useful for test harnesses that need targeted process cleanup. | ||
| #[arg(long)] | ||
| pid_file: Option<String>, | ||
| }, | ||
| RunLocalHcServices {}, | ||
| } | ||
|
|
@@ -189,8 +193,17 @@ async fn main() -> Result<()> { | |
| enable_multi_user, | ||
| enable_mcp, | ||
| mcp_port, | ||
| pid_file, | ||
| } = args.domain | ||
| { | ||
| // Set PID file path as env var so the executor can write/clean it up | ||
| if let Some(ref pf) = pid_file { | ||
| // SAFETY: set_var is safe here because we're in single-threaded init before spawning. | ||
| #[allow(deprecated)] | ||
| unsafe { | ||
| std::env::set_var("AD4M_PID_FILE", pf); | ||
| } | ||
|
Comment on lines
+199
to
+205
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: On Unix-like platforms (everything except Windows), For If you must set an env var for your own process, do it before starting the Tokio multi-thread runtime (e.g., in a synchronous Sources Don't call The Thread the PID-file path through 🤖 Prompt for AI Agents |
||
| } | ||
| let tls = if tls_cert_file.is_some() && tls_key_file.is_some() { | ||
| Some(TlsConfig { | ||
| cert_file_path: tls_cert_file.unwrap(), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -163,6 +163,9 @@ enum Domain { | |
| enable_mcp: Option<bool>, | ||
| #[arg(long, action)] | ||
| mcp_port: Option<u16>, | ||
| /// Write the executor PID to this file on startup (removed on clean shutdown). | ||
| #[arg(long)] | ||
| pid_file: Option<String>, | ||
|
Comment on lines
+166
to
+168
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
echo "Run variant definition:"
sed -n '129,169p' cli/src/main.rs
echo
echo "Trailing match arm:"
sed -n '335,352p' cli/src/main.rsRepository: coasys/ad4m Length of output: 2064 Update the trailing After adding Minimal fix- Domain::Run {
- app_data_path: _,
- network_bootstrap_seed: _,
- language_language_only: _,
- run_dapp_server: _,
- gql_port: _,
- hc_admin_port: _,
- hc_app_port: _,
- hc_use_bootstrap: _,
- hc_use_local_proxy: _,
- hc_use_mdns: _,
- hc_use_proxy: _,
- hc_proxy_url: _,
- hc_bootstrap_url: _,
- connect_holochain: _,
- admin_credential: _,
- enable_multi_user: _
- } => unreachable!(),
+ Domain::Run { .. } => unreachable!(),🤖 Prompt for AI Agents |
||
| }, | ||
| RunLocalHcServices {}, | ||
| Eve { | ||
|
|
@@ -243,8 +246,14 @@ async fn main() -> Result<()> { | |
| enable_multi_user, | ||
| enable_mcp, | ||
| mcp_port, | ||
| pid_file, | ||
| } = args.domain | ||
| { | ||
| // Set PID file path as env var so the executor can write/clean it up | ||
| if let Some(ref pf) = pid_file { | ||
| #[allow(deprecated)] | ||
| unsafe { std::env::set_var("AD4M_PID_FILE", pf); } | ||
|
Comment on lines
+252
to
+255
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: Why
|
||
| } | ||
| let _ = tokio::spawn(async move { | ||
| rust_executor::run(Ad4mConfig { | ||
| app_data_path, | ||
|
|
@@ -269,6 +278,8 @@ async fn main() -> Result<()> { | |
| auto_permit_cap_requests: None, | ||
| tls: None, | ||
| log_holochain_metrics: None, | ||
| hc_relay_url: None, | ||
| smtp_config: None, | ||
| }).await | ||
| }).await; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -806,6 +806,27 @@ impl Query { | |
| }) | ||
| } | ||
|
|
||
| /// Returns the readiness status of executor subsystems. | ||
| /// Test harnesses should poll this query instead of using `sleep()`. | ||
| /// No capability check — readiness is safe to expose publicly. | ||
| async fn runtime_readiness(&self, _context: &RequestContext) -> FieldResult<ReadinessStatus> { | ||
| let holochain_ready = crate::holochain_service::maybe_get_holochain_service() | ||
| .await | ||
| .is_some(); | ||
|
Comment on lines
+813
to
+815
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 813 only checks whether a service handle exists. That can become Proposed fix- let holochain_ready = crate::holochain_service::maybe_get_holochain_service()
- .await
- .is_some();
+ let holochain_ready = if let Some(interface) =
+ crate::holochain_service::maybe_get_holochain_service().await
+ {
+ // Use a lightweight RPC as readiness probe, not just handle existence.
+ interface.get_network_metrics().await.is_ok()
+ } else {
+ false
+ };🤖 Prompt for AI Agents |
||
|
|
||
| let (agent_initialized, languages_loaded) = | ||
| AgentService::with_global_instance(|agent_service| { | ||
| (agent_service.is_initialized(), agent_service.is_unlocked()) | ||
| }); | ||
|
Comment on lines
+817
to
+820
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 819 assigns Proposed fix- let (agent_initialized, languages_loaded) =
- AgentService::with_global_instance(|agent_service| {
- (agent_service.is_initialized(), agent_service.is_unlocked())
- });
+ let agent_initialized =
+ AgentService::with_global_instance(|agent_service| agent_service.is_initialized());
+
+ let languages_loaded = {
+ let controller = LanguageController::global_instance();
+ // Replace with a stricter internal readiness signal if available.
+ !controller.get_installed_languages(None).await.is_empty()
+ };Also applies to: 826-826 🤖 Prompt for AI Agents |
||
|
|
||
| Ok(ReadinessStatus { | ||
| gql_ready: true, // If this query returns, GQL is ready | ||
| holochain_ready, | ||
| agent_initialized, | ||
| languages_loaded, // Currently maps to agent unlocked (languages load during unlock) | ||
| }) | ||
| } | ||
|
|
||
| async fn runtime_known_link_language_templates( | ||
| &self, | ||
| context: &RequestContext, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,8 @@ setup_file() { | |
| echo "done." >&3 | ||
| echo "Starting agent 1..." >&3 | ||
| ./target/release/ad4m run --app-data-path ${current_dir}/tests/ad4m1 --gql-port 4000 & | ||
| AD4M_PID=$! | ||
| export AD4M_PID | ||
| sleep 5 | ||
| echo "done." >&3 | ||
|
|
||
|
|
@@ -31,7 +33,19 @@ setup_file() { | |
| } | ||
|
|
||
| teardown_file() { | ||
| killall ad4m | ||
| # Graceful shutdown: SIGTERM first, then escalate to SIGKILL if needed. | ||
| # Never use `killall ad4m` — it kills ALL ad4m processes on the machine, | ||
| # including other CI jobs and dev instances. | ||
| if [ -n "$AD4M_PID" ]; then | ||
| kill -TERM "$AD4M_PID" 2>/dev/null || true | ||
| for i in $(seq 1 10); do | ||
| kill -0 "$AD4M_PID" 2>/dev/null || break | ||
| sleep 1 | ||
| done | ||
| kill -9 "$AD4M_PID" 2>/dev/null || true | ||
| fi | ||
| # Port-based fallback in case PID tracking missed something | ||
| lsof -ti:4000 | xargs -r kill -9 2>/dev/null || true | ||
|
Comment on lines
+47
to
+48
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The port fallback can kill an unrelated process. If PID tracking misses the executor, Based on learnings "Kill any lingering 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| setup() { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.