diff --git a/.config/nextest.toml b/.config/nextest.toml new file mode 100644 index 000000000..f03ddbc73 --- /dev/null +++ b/.config/nextest.toml @@ -0,0 +1,6 @@ +[profile.ci] +# Do not cancel the test run on the first failure. +fail-fast = false + +[profile.ci.junit] +path = "junit.xml" diff --git a/.github/workflows/test-gpu-rust.yml b/.github/workflows/test-gpu-rust.yml index 8c8c24441..3956682fb 100644 --- a/.github/workflows/test-gpu-rust.yml +++ b/.github/workflows/test-gpu-rust.yml @@ -56,9 +56,21 @@ jobs: # Run GPU Rust tests echo "Running OSS Rust tests..." - # TODO: fix broken tests, then update to `cargo test --no-fail-fast` - cargo test -p monarch_rdma # Uses cargo nextest to run tests in separate processes, which better matches # internal buck test behavior. - # TODO: increase coverage to more crates. - cargo nextest run -p hyperactor --no-fail-fast + # The CI profile is configured in .config/nextest.toml + # Exclude filter is for packages that don't build in Github Actions yet. + # * monarch_messages: monarch/target/debug/deps/monarch_messages-...: + # /lib64/libm.so.6: version `GLIBC_2.29' not found + # (required by /meta-pytorch/monarch/libtorch/lib/libtorch_cpu.so) + cargo nextest run --workspace --profile ci \ + --exclude monarch_messages \ + --exclude monarch_tensor_worker \ + --exclude monarch_simulator_lib \ + --exclude torch-sys \ + --exclude torch-sys-cuda + # Copy the test results to the expected location + # TODO: error in pytest-results-action, TypeError: results.testsuites.testsuite.testcase is not iterable + # Don't try to parse these results for now. + # mkdir -p "${RUNNER_TEST_RESULTS_DIR:-test-results}" + # cp target/nextest/ci/junit.xml "${RUNNER_TEST_RESULTS_DIR:-test-results}/junit.xml" diff --git a/Cargo.toml b/Cargo.toml index fcac1b8d3..b037643a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "2" members = [ + "build_utils", "controller", "cuda-sys", "erased_lifetime", @@ -10,11 +11,22 @@ members = [ "hyperactor_multiprocess", "hyperactor_mesh", "hyperactor_mesh_macros", - "ndslice", + "hyperactor_telemetry", + "monarch_conda", "monarch_extension", - "monarch_tensor_worker", + "monarch_hyperactor", + "monarch_messages", + "monarch_perfetto_trace", "monarch_rdma", + "monarch_simulator", + "monarch_tensor_worker", + "monarch_types", "nccl-sys", + "ndslice", + "preempt_rwlock", "rdmaxcel-sys", + "serde_multipart", + "timed_test", "torch-sys", + "torch-sys-cuda", ] diff --git a/controller/Cargo.toml b/controller/Cargo.toml index c8f696963..4b066da1c 100644 --- a/controller/Cargo.toml +++ b/controller/Cargo.toml @@ -36,4 +36,8 @@ tracing = { version = "0.1.41", features = ["attributes", "valuable"] } [dev-dependencies] monarch_types = { version = "0.0.0", path = "../monarch_types" } +timed_test = { version = "0.0.0", path = "../timed_test" } torch-sys = { version = "0.0.0", path = "../torch-sys" } + +[lints] +rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } } diff --git a/controller/src/lib.rs b/controller/src/lib.rs index 0dac1e42c..cf384068a 100644 --- a/controller/src/lib.rs +++ b/controller/src/lib.rs @@ -660,6 +660,7 @@ mod tests { use monarch_messages::worker::CallFunctionParams; use monarch_messages::worker::WorkerMessage; use monarch_types::PyTree; + use timed_test::async_timed_test; use torch_sys::RValue; use super::*; @@ -1838,7 +1839,9 @@ mod tests { hyperactor::remote!(PanickingActor); - #[tokio::test] + #[async_timed_test(timeout_secs = 30)] + // times out (both internal and external). + #[cfg_attr(not(fbcode_build), ignore)] async fn test_supervision_fault() { // Start system actor. let timeout: Duration = Duration::from_secs(6); diff --git a/hyperactor/Cargo.toml b/hyperactor/Cargo.toml index d5aaad902..e57bf54be 100644 --- a/hyperactor/Cargo.toml +++ b/hyperactor/Cargo.toml @@ -100,3 +100,6 @@ tracing-test = { version = "0.2.3", features = ["no-env-filter"] } [features] default = [] stdio-write-probe = [] + +[lints] +rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } } diff --git a/hyperactor/src/channel.rs b/hyperactor/src/channel.rs index 43db21e46..f9407c5b9 100644 --- a/hyperactor/src/channel.rs +++ b/hyperactor/src/channel.rs @@ -1132,7 +1132,7 @@ mod tests { #[tokio::test] // TODO: OSS: called `Result::unwrap()` on an `Err` value: Server(Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })) - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_dial_serve() { for addr in addrs() { let (listen_addr, mut rx) = crate::channel::serve::(addr).unwrap(); @@ -1144,7 +1144,7 @@ mod tests { #[tokio::test] // TODO: OSS: called `Result::unwrap()` on an `Err` value: Server(Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })) - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_send() { let config = crate::config::global::lock(); diff --git a/hyperactor/src/channel/net.rs b/hyperactor/src/channel/net.rs index e9051487e..52c71899b 100644 --- a/hyperactor/src/channel/net.rs +++ b/hyperactor/src/channel/net.rs @@ -2556,7 +2556,7 @@ mod tests { #[tracing_test::traced_test] #[async_timed_test(timeout_secs = 30)] // TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }) - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_tcp_basic() { let (addr, mut rx) = tcp::serve::("[::1]:0".parse().unwrap()).unwrap(); { @@ -2580,7 +2580,7 @@ mod tests { // The message size is limited by CODEC_MAX_FRAME_LENGTH. #[async_timed_test(timeout_secs = 5)] // TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }) - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_tcp_message_size() { let default_size_in_bytes = 100 * 1024 * 1024; // Use temporary config for this test @@ -2611,7 +2611,7 @@ mod tests { #[async_timed_test(timeout_secs = 30)] // TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }) - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_ack_flush() { let config = config::global::lock(); // Set a large value to effectively prevent acks from being sent except @@ -2635,7 +2635,7 @@ mod tests { #[tracing_test::traced_test] #[tokio::test] // TODO: OSS: failed to retrieve ipv6 address - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_meta_tls_basic() { let addr = ChannelAddr::any(ChannelTransport::MetaTls(TlsMode::IpV6)); let meta_addr = match addr { @@ -3240,7 +3240,7 @@ mod tests { #[tracing_test::traced_test] #[tokio::test] // TODO: OSS: The logs_assert function returned an error: expected log not found - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_tcp_tx_delivery_timeout() { // This link always fails to connect. let link = MockLink::::fail_connects(); @@ -3666,7 +3666,7 @@ mod tests { #[tracing_test::traced_test] #[async_timed_test(timeout_secs = 30)] // TODO: OSS: The logs_assert function returned an error: expected log not found - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_ack_exceeded_limit_with_connected_link() { verify_ack_exceeded_limit(false).await; } @@ -3674,7 +3674,7 @@ mod tests { #[tracing_test::traced_test] #[async_timed_test(timeout_secs = 30)] // TODO: OSS: The logs_assert function returned an error: expected log not found - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_ack_exceeded_limit_with_broken_link() { verify_ack_exceeded_limit(true).await; } @@ -3845,7 +3845,7 @@ mod tests { #[async_timed_test(timeout_secs = 300)] // TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }) - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_tcp_throughput() { let config = config::global::lock(); let _guard = @@ -3897,7 +3897,7 @@ mod tests { #[tracing_test::traced_test] #[async_timed_test(timeout_secs = 60)] // TODO: OSS: The logs_assert function returned an error: expected log not found - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_net_tx_closed_on_server_reject() { let link = MockLink::::new(); let receiver_storage = link.receiver_storage(); diff --git a/hyperactor/src/config.rs b/hyperactor/src/config.rs index 4dc56cd4f..ac9209ff6 100644 --- a/hyperactor/src/config.rs +++ b/hyperactor/src/config.rs @@ -314,7 +314,7 @@ mod tests { #[tracing_test::traced_test] #[test] // TODO: OSS: The logs_assert function returned an error: missing log lines: {"# export HYPERACTOR_DEFAULT_ENCODING=serde_multipart", ...} - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] fn test_from_env() { // Set environment variables // SAFETY: TODO: Audit that the environment access only happens in single-threaded code. diff --git a/hyperactor/src/host.rs b/hyperactor/src/host.rs index 424e3eee5..b95c1749a 100644 --- a/hyperactor/src/host.rs +++ b/hyperactor/src/host.rs @@ -1280,7 +1280,7 @@ mod tests { #[tokio::test] // TODO: OSS: called `Result::unwrap()` on an `Err` value: ReadFailed { manifest_path: "/meta-pytorch/monarch/target/debug/deps/hyperactor-0e1fe83af739d976.resources.json", source: Os { code: 2, kind: NotFound, message: "No such file or directory" } } - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_process_proc_manager() { hyperactor_telemetry::initialize_logging(crate::clock::ClockKind::default()); diff --git a/hyperactor/src/mailbox.rs b/hyperactor/src/mailbox.rs index d59e36ce3..e5f4892a0 100644 --- a/hyperactor/src/mailbox.rs +++ b/hyperactor/src/mailbox.rs @@ -3376,7 +3376,7 @@ mod tests { #[async_timed_test(timeout_secs = 30)] // TODO: OSS: this test is flaky in OSS. Need to repo and fix it. - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_split_port_id_no_reducer() { let Setup { mut receiver, @@ -3462,7 +3462,7 @@ mod tests { #[async_timed_test(timeout_secs = 30)] // TODO: OSS: this test is flaky in OSS. Need to repo and fix it. - #[cfg_attr(not(feature = "fb"), ignore)] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_split_port_id_every_n_messages() { let config = crate::config::global::lock(); let _config_guard = config.override_key( diff --git a/hyperactor_mesh/src/actor_mesh.rs b/hyperactor_mesh/src/actor_mesh.rs index f7d003774..0d8b0b2ee 100644 --- a/hyperactor_mesh/src/actor_mesh.rs +++ b/hyperactor_mesh/src/actor_mesh.rs @@ -1482,6 +1482,7 @@ mod tests { use crate::alloc::process::ProcessAllocator; + #[cfg(fbcode_build)] fn process_allocator() -> ProcessAllocator { ProcessAllocator::new(Command::new(crate::testresource::get( "monarch/hyperactor_mesh/bootstrap", @@ -1928,6 +1929,7 @@ mod tests { use crate::sel; #[tokio::test] + #[cfg(fbcode_build)] async fn test_basic() { let instance = v1::testing::instance().await; let host_mesh = v1::testing::host_mesh(extent!(host = 4)).await; diff --git a/hyperactor_mesh/src/alloc.rs b/hyperactor_mesh/src/alloc.rs index f66097b87..2b5b64d7b 100644 --- a/hyperactor_mesh/src/alloc.rs +++ b/hyperactor_mesh/src/alloc.rs @@ -912,6 +912,7 @@ pub(crate) mod testing { /// a proc that does not time out when it is asked to wait for /// a stuck actor. #[tokio::test] + #[cfg(fbcode_build)] async fn test_allocator_stuck_task() { // Override config. // Use temporary config for this test diff --git a/hyperactor_mesh/src/alloc/process.rs b/hyperactor_mesh/src/alloc/process.rs index b56d3df0a..34a1f7156 100644 --- a/hyperactor_mesh/src/alloc/process.rs +++ b/hyperactor_mesh/src/alloc/process.rs @@ -699,6 +699,7 @@ mod tests { crate::testresource::get("monarch/hyperactor_mesh/bootstrap") ))); + #[cfg(fbcode_build)] #[tokio::test] async fn test_sigterm_on_group_fail() { let bootstrap_binary = crate::testresource::get("monarch/hyperactor_mesh/bootstrap"); diff --git a/hyperactor_mesh/src/alloc/remoteprocess.rs b/hyperactor_mesh/src/alloc/remoteprocess.rs index f54a17fe1..e7554c23c 100644 --- a/hyperactor_mesh/src/alloc/remoteprocess.rs +++ b/hyperactor_mesh/src/alloc/remoteprocess.rs @@ -2056,6 +2056,7 @@ mod test_alloc { use super::*; #[async_timed_test(timeout_secs = 60)] + #[cfg(fbcode_build)] async fn test_alloc_simple() { // Use temporary config for this test let config = hyperactor::config::global::lock(); @@ -2185,6 +2186,7 @@ mod test_alloc { } #[async_timed_test(timeout_secs = 60)] + #[cfg(fbcode_build)] async fn test_alloc_host_failure() { // Use temporary config for this test let config = hyperactor::config::global::lock(); @@ -2316,6 +2318,7 @@ mod test_alloc { } #[async_timed_test(timeout_secs = 15)] + #[cfg(fbcode_build)] async fn test_alloc_inner_alloc_failure() { // SAFETY: Test happens in single-threaded code. unsafe { @@ -2451,6 +2454,7 @@ mod test_alloc { #[tracing_test::traced_test] #[async_timed_test(timeout_secs = 60)] + #[cfg(fbcode_build)] async fn test_remote_process_alloc_signal_handler() { let num_proc_meshes = 5; let hosts_per_proc_mesh = 5; diff --git a/hyperactor_mesh/src/bootstrap.rs b/hyperactor_mesh/src/bootstrap.rs index 3a3e38d6c..c5542561a 100644 --- a/hyperactor_mesh/src/bootstrap.rs +++ b/hyperactor_mesh/src/bootstrap.rs @@ -1507,6 +1507,7 @@ impl BootstrapCommand { /// bootstrap processes under proc manager control. Not available /// outside of test builds. #[cfg(test)] + #[cfg(fbcode_build)] pub(crate) fn test() -> Self { Self { program: crate::testresource::get("monarch/hyperactor_mesh/bootstrap"), @@ -3423,6 +3424,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn bootstrap_handle_terminate_graceful() { // Create a root direct-addressed proc + client instance. let root = hyperactor::Proc::direct(ChannelTransport::Unix.any(), "root".to_string()) @@ -3486,6 +3488,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn bootstrap_handle_kill_forced() { // Root proc + client instance (so the child can dial back). let root = hyperactor::Proc::direct(ChannelTransport::Unix.any(), "root".to_string()) @@ -3535,6 +3538,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn bootstrap_cannonical_simple() { // SAFETY: unit-test scoped unsafe { diff --git a/hyperactor_mesh/src/proc_mesh.rs b/hyperactor_mesh/src/proc_mesh.rs index 6e43ca654..db518c500 100644 --- a/hyperactor_mesh/src/proc_mesh.rs +++ b/hyperactor_mesh/src/proc_mesh.rs @@ -1262,6 +1262,7 @@ mod tests { use crate::sel; #[tokio::test] + #[cfg(fbcode_build)] async fn test_basic() { let instance = v1::testing::instance().await; let ext = extent!(host = 4); diff --git a/hyperactor_mesh/src/testresource.rs b/hyperactor_mesh/src/testresource.rs index edffc23f2..e8b5a75a6 100644 --- a/hyperactor_mesh/src/testresource.rs +++ b/hyperactor_mesh/src/testresource.rs @@ -16,6 +16,7 @@ use std::path::PathBuf; /// /// We should convert these tests to integration tests, so that cargo can /// also manage the binaries. +#[cfg(fbcode_build)] pub fn get(name: S) -> PathBuf where S: AsRef, diff --git a/hyperactor_mesh/src/v1/actor_mesh.rs b/hyperactor_mesh/src/v1/actor_mesh.rs index 8b287fbf5..bbcf20406 100644 --- a/hyperactor_mesh/src/v1/actor_mesh.rs +++ b/hyperactor_mesh/src/v1/actor_mesh.rs @@ -467,6 +467,7 @@ mod tests { use crate::v1::testing; #[tokio::test] + #[cfg(fbcode_build)] async fn test_actor_mesh_ref_lazy_materialization() { // 1) Bring up procs and spawn actors. let instance = testing::instance().await; @@ -566,6 +567,7 @@ mod tests { } #[async_timed_test(timeout_secs = 30)] + #[cfg(fbcode_build)] async fn test_actor_states_with_panic() { hyperactor_telemetry::initialize_logging_for_test(); @@ -632,6 +634,7 @@ mod tests { } #[async_timed_test(timeout_secs = 30)] + #[cfg(fbcode_build)] async fn test_actor_states_with_process_exit() { hyperactor_telemetry::initialize_logging_for_test(); @@ -699,6 +702,7 @@ mod tests { } #[async_timed_test(timeout_secs = 30)] + #[cfg(fbcode_build)] async fn test_actor_states_on_sliced_mesh() { hyperactor_telemetry::initialize_logging_for_test(); @@ -772,6 +776,7 @@ mod tests { } #[async_timed_test(timeout_secs = 30)] + #[cfg(fbcode_build)] async fn test_cast() { let config = hyperactor::config::global::lock(); let _guard = config.override_key(crate::bootstrap::MESH_BOOTSTRAP_ENABLE_PDEATHSIG, false); diff --git a/hyperactor_mesh/src/v1/host_mesh.rs b/hyperactor_mesh/src/v1/host_mesh.rs index ee800676a..27fd3aed9 100644 --- a/hyperactor_mesh/src/v1/host_mesh.rs +++ b/hyperactor_mesh/src/v1/host_mesh.rs @@ -1115,6 +1115,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn test_allocate() { let config = hyperactor::config::global::lock(); let _guard = config.override_key(crate::bootstrap::MESH_BOOTSTRAP_ENABLE_PDEATHSIG, false); @@ -1225,6 +1226,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn test_extrinsic_allocation() { let config = hyperactor::config::global::lock(); let _guard = config.override_key(crate::bootstrap::MESH_BOOTSTRAP_ENABLE_PDEATHSIG, false); @@ -1268,6 +1270,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn test_failing_proc_allocation() { let program = crate::testresource::get("monarch/hyperactor_mesh/bootstrap"); @@ -1301,6 +1304,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn test_halting_proc_allocation() { let config = config::global::lock(); let _guard1 = config.override_key(PROC_SPAWN_MAX_IDLE, Duration::from_secs(5)); @@ -1345,6 +1349,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn test_client_config_override() { let config = hyperactor::config::global::lock(); let _guard1 = config.override_key(crate::bootstrap::MESH_BOOTSTRAP_ENABLE_PDEATHSIG, false); diff --git a/hyperactor_mesh/src/v1/host_mesh/mesh_agent.rs b/hyperactor_mesh/src/v1/host_mesh/mesh_agent.rs index 95371f533..23ea38b9e 100644 --- a/hyperactor_mesh/src/v1/host_mesh/mesh_agent.rs +++ b/hyperactor_mesh/src/v1/host_mesh/mesh_agent.rs @@ -523,6 +523,7 @@ mod tests { use crate::resource::GetStateClient; #[tokio::test] + #[cfg(fbcode_build)] async fn test_basic() { let (host, _handle) = Host::serve( BootstrapProcManager::new(BootstrapCommand::test()).unwrap(), diff --git a/hyperactor_mesh/src/v1/proc_mesh.rs b/hyperactor_mesh/src/v1/proc_mesh.rs index cdb8f0f62..a7a0b3b50 100644 --- a/hyperactor_mesh/src/v1/proc_mesh.rs +++ b/hyperactor_mesh/src/v1/proc_mesh.rs @@ -1037,6 +1037,7 @@ mod tests { } #[async_timed_test(timeout_secs = 30)] + #[cfg(fbcode_build)] async fn test_spawn_actor() { hyperactor_telemetry::initialize_logging(hyperactor::clock::ClockKind::default()); @@ -1049,6 +1050,7 @@ mod tests { } #[tokio::test] + #[cfg(fbcode_build)] async fn test_failing_spawn_actor() { hyperactor_telemetry::initialize_logging(hyperactor::clock::ClockKind::default()); diff --git a/hyperactor_mesh/src/v1/testing.rs b/hyperactor_mesh/src/v1/testing.rs index b67e2e298..e528b0c86 100644 --- a/hyperactor_mesh/src/v1/testing.rs +++ b/hyperactor_mesh/src/v1/testing.rs @@ -45,6 +45,7 @@ pub async fn instance() -> &'static Instance<()> { INSTANCE.get_or_init(fresh_instance).await } +#[cfg(fbcode_build)] pub async fn proc_meshes(cx: &impl context::Actor, extent: Extent) -> Vec { let mut meshes = Vec::new(); @@ -87,6 +88,7 @@ pub async fn proc_meshes(cx: &impl context::Actor, extent: Extent) -> Vec Vec> { let spec = AllocSpec { extent: extent.clone(), @@ -134,6 +136,7 @@ pub async fn local_proc_mesh(extent: Extent) -> (ProcMesh, Instance<()>, DialMai } /// Create a host mesh using multiple processes running on the test machine. +#[cfg(fbcode_build)] pub async fn host_mesh(extent: Extent) -> HostMesh { let mut allocator = ProcessAllocator::new(Command::new(crate::testresource::get( "monarch/hyperactor_mesh/bootstrap", diff --git a/hyperactor_mesh/test/process_allocator_cleanup/process_allocator_cleanup.rs b/hyperactor_mesh/test/process_allocator_cleanup/process_allocator_cleanup.rs index a8ed898fe..21345f675 100644 --- a/hyperactor_mesh/test/process_allocator_cleanup/process_allocator_cleanup.rs +++ b/hyperactor_mesh/test/process_allocator_cleanup/process_allocator_cleanup.rs @@ -27,6 +27,7 @@ use tokio::time::timeout; /// Test that ProcessAllocator children are cleaned up when parent is killed #[tokio::test] +#[cfg_attr(not(fbcode_build), ignore)] async fn test_process_allocator_child_cleanup() { let test_binary_path = buck_resources::get("monarch/hyperactor_mesh/test_bin").unwrap(); eprintln!("Starting test process allocator at: {:?}", test_binary_path); diff --git a/hyperactor_multiprocess/Cargo.toml b/hyperactor_multiprocess/Cargo.toml index 2a463a1ae..748222e10 100644 --- a/hyperactor_multiprocess/Cargo.toml +++ b/hyperactor_multiprocess/Cargo.toml @@ -40,3 +40,6 @@ py-spy = { git = "https://github.com/technicianted/py-spy", rev = "8f74f3e4f955f [target.'cfg(target_os = "linux")'.dependencies] py-spy = { git = "https://github.com/technicianted/py-spy", rev = "8f74f3e4f955fee57f0d4a8103511ee788348a2a", features = ["unwind"] } + +[lints] +rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } } diff --git a/hyperactor_multiprocess/src/proc_actor.rs b/hyperactor_multiprocess/src/proc_actor.rs index 8d35889eb..7d23acaf0 100644 --- a/hyperactor_multiprocess/src/proc_actor.rs +++ b/hyperactor_multiprocess/src/proc_actor.rs @@ -1043,6 +1043,7 @@ mod tests { #[tracing_test::traced_test] #[tokio::test] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_stop_timeout() { let Bootstrapped { server_handle, @@ -1488,6 +1489,7 @@ mod tests { #[tracing_test::traced_test] #[tokio::test] + #[cfg_attr(not(fbcode_build), ignore)] async fn test_proc_actor_mailbox_admin_message() { // Verify that proc actors update their address books on first // contact, and that no additional updates are triggered for diff --git a/monarch_hyperactor/Cargo.toml b/monarch_hyperactor/Cargo.toml index 928cb8726..f67a856e3 100644 --- a/monarch_hyperactor/Cargo.toml +++ b/monarch_hyperactor/Cargo.toml @@ -56,3 +56,7 @@ dir-diff = "0.3" [features] default = [] +packaged_rsync = [] + +[lints] +rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } } diff --git a/monarch_hyperactor/src/channel.rs b/monarch_hyperactor/src/channel.rs index 27d3e38a2..e3bdebdfe 100644 --- a/monarch_hyperactor/src/channel.rs +++ b/monarch_hyperactor/src/channel.rs @@ -158,6 +158,8 @@ mod tests { use super::*; #[test] + // TODO: OSS: failed to retrieve ipv6 address + #[cfg_attr(not(fbcode_build), ignore)] fn test_channel_any_and_parse() -> PyResult<()> { // just make sure any() and parse() calls work for all transports for transport in [ diff --git a/monarch_hyperactor/src/code_sync/rsync.rs b/monarch_hyperactor/src/code_sync/rsync.rs index d41c8c668..47bdf3f88 100644 --- a/monarch_hyperactor/src/code_sync/rsync.rs +++ b/monarch_hyperactor/src/code_sync/rsync.rs @@ -468,6 +468,8 @@ mod tests { use super::*; #[tokio::test] + // TODO: OSS: Cannot assign requested address (os error 99) + #[cfg_attr(not(fbcode_build), ignore)] async fn test_simple() -> Result<()> { let input = TempDir::new()?; fs::write(input.path().join("foo.txt"), "hello world").await?; @@ -485,6 +487,8 @@ mod tests { } #[tokio::test] + // TODO: OSS: Cannot assign requested address (os error 99) + #[cfg_attr(not(fbcode_build), ignore)] async fn test_rsync_actor_and_mesh() -> Result<()> { // Create source workspace with test files let source_workspace = TempDir::new()?; diff --git a/monarch_hyperactor/tests/code_sync/auto_reload.rs b/monarch_hyperactor/tests/code_sync/auto_reload.rs index 689cb45b6..6dab2d96b 100644 --- a/monarch_hyperactor/tests/code_sync/auto_reload.rs +++ b/monarch_hyperactor/tests/code_sync/auto_reload.rs @@ -26,6 +26,8 @@ use tempfile::TempDir; use tokio::fs; #[tokio::test] +// TODO: OSS: ModuleNotFoundError: No module named 'monarch' +#[cfg_attr(not(fbcode_build), ignore)] async fn test_auto_reload_actor() -> Result<()> { pyo3::prepare_freethreaded_python(); Python::with_gil(|py| py.run(c_str!("import monarch._rust_bindings"), None, None))?; diff --git a/ndslice/src/reshape.rs b/ndslice/src/reshape.rs index 63647a9b7..112958a85 100644 --- a/ndslice/src/reshape.rs +++ b/ndslice/src/reshape.rs @@ -1267,6 +1267,11 @@ mod tests { cases: 20, ..ProptestConfig::default() })] #[test] + // TODO: OSS: thread 'reshape::tests::test_reshape_selection' panicked at ndslice/src/reshape.rs:1265:5: + // proptest: If this test was run on a CI system, you may wish to add the following line to your copy of the file. (You may need to create it.) + // cc 8eeb877d0ae01955610362f0b8b5fce502a5b3ea58ed1fcbde7767b185474a79 + // Test failed: empty range 3:4:1. + #[cfg_attr(not(fbcode_build), ignore)] fn test_reshape_selection((slice, fanout_limit) in gen_slice(4, 64).prop_flat_map(|slice| { let max_dimension_size = slice.sizes().iter().max().unwrap(); (1..=*max_dimension_size).prop_map(move |fanout_limit| (slice.clone(), fanout_limit)) diff --git a/scripts/common-setup.sh b/scripts/common-setup.sh index a4cd2f693..31d24ec12 100755 --- a/scripts/common-setup.sh +++ b/scripts/common-setup.sh @@ -24,7 +24,8 @@ setup_conda_environment() { install_system_dependencies() { echo "Installing system dependencies..." dnf update -y - dnf install clang-devel libunwind libunwind-devel -y + # Protobuf compiler is required for the tracing-perfetto-sdk-schema crate. + dnf install clang-devel libunwind libunwind-devel protobuf-compiler -y } # Install and configure Rust nightly toolchain