Skip to content

Commit 87ff242

Browse files
dulinrileymeta-codesync[bot]
authored andcommitted
Expand workspace to cover all crates and enable rust testing (#1684)
Summary: Pull Request resolved: #1684 Expand Github rust testing to the whole workspace of crates in monarch. Tests that do not pass in Github are marked as fb-only for now. Many of them can be fixed easily, but we can turn on the majority of tests right away. Reviewed By: colin2328 Differential Revision: D85676520
1 parent 0af6421 commit 87ff242

File tree

32 files changed

+117
-23
lines changed

32 files changed

+117
-23
lines changed

.config/nextest.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[profile.ci]
2+
# Do not cancel the test run on the first failure.
3+
fail-fast = false
4+
5+
[profile.ci.junit]
6+
path = "junit.xml"

.github/workflows/test-gpu-rust.yml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,21 @@ jobs:
5656
5757
# Run GPU Rust tests
5858
echo "Running OSS Rust tests..."
59-
# TODO: fix broken tests, then update to `cargo test --no-fail-fast`
60-
cargo test -p monarch_rdma
6159
# Uses cargo nextest to run tests in separate processes, which better matches
6260
# internal buck test behavior.
63-
# TODO: increase coverage to more crates.
64-
cargo nextest run -p hyperactor --no-fail-fast
61+
# The CI profile is configured in .config/nextest.toml
62+
# Exclude filter is for packages that don't build in Github Actions yet.
63+
# * monarch_messages: monarch/target/debug/deps/monarch_messages-...:
64+
# /lib64/libm.so.6: version `GLIBC_2.29' not found
65+
# (required by /meta-pytorch/monarch/libtorch/lib/libtorch_cpu.so)
66+
cargo nextest run --workspace --profile ci \
67+
--exclude monarch_messages \
68+
--exclude monarch_tensor_worker \
69+
--exclude monarch_simulator_lib \
70+
--exclude torch-sys \
71+
--exclude torch-sys-cuda
72+
# Copy the test results to the expected location
73+
# TODO: error in pytest-results-action, TypeError: results.testsuites.testsuite.testcase is not iterable
74+
# Don't try to parse these results for now.
75+
# mkdir -p "${RUNNER_TEST_RESULTS_DIR:-test-results}"
76+
# cp target/nextest/ci/junit.xml "${RUNNER_TEST_RESULTS_DIR:-test-results}/junit.xml"

Cargo.toml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[workspace]
22
resolver = "2"
33
members = [
4+
"build_utils",
45
"controller",
56
"cuda-sys",
67
"erased_lifetime",
@@ -10,11 +11,22 @@ members = [
1011
"hyperactor_multiprocess",
1112
"hyperactor_mesh",
1213
"hyperactor_mesh_macros",
13-
"ndslice",
14+
"hyperactor_telemetry",
15+
"monarch_conda",
1416
"monarch_extension",
15-
"monarch_tensor_worker",
17+
"monarch_hyperactor",
18+
"monarch_messages",
19+
"monarch_perfetto_trace",
1620
"monarch_rdma",
21+
"monarch_simulator",
22+
"monarch_tensor_worker",
23+
"monarch_types",
1724
"nccl-sys",
25+
"ndslice",
26+
"preempt_rwlock",
1827
"rdmaxcel-sys",
28+
"serde_multipart",
29+
"timed_test",
1930
"torch-sys",
31+
"torch-sys-cuda",
2032
]

controller/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,8 @@ tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
3636

3737
[dev-dependencies]
3838
monarch_types = { version = "0.0.0", path = "../monarch_types" }
39+
timed_test = { version = "0.0.0", path = "../timed_test" }
3940
torch-sys = { version = "0.0.0", path = "../torch-sys" }
41+
42+
[lints]
43+
rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } }

controller/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,7 @@ mod tests {
660660
use monarch_messages::worker::CallFunctionParams;
661661
use monarch_messages::worker::WorkerMessage;
662662
use monarch_types::PyTree;
663+
use timed_test::async_timed_test;
663664
use torch_sys::RValue;
664665

665666
use super::*;
@@ -1838,7 +1839,9 @@ mod tests {
18381839

18391840
hyperactor::remote!(PanickingActor);
18401841

1841-
#[tokio::test]
1842+
#[async_timed_test(timeout_secs = 30)]
1843+
// times out (both internal and external).
1844+
#[cfg_attr(not(fbcode_build), ignore)]
18421845
async fn test_supervision_fault() {
18431846
// Start system actor.
18441847
let timeout: Duration = Duration::from_secs(6);

hyperactor/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,6 @@ tracing-test = { version = "0.2.3", features = ["no-env-filter"] }
100100
[features]
101101
default = []
102102
stdio-write-probe = []
103+
104+
[lints]
105+
rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } }

hyperactor/src/channel.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,7 +1143,7 @@ mod tests {
11431143

11441144
#[tokio::test]
11451145
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Server(Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }))
1146-
#[cfg_attr(not(feature = "fb"), ignore)]
1146+
#[cfg_attr(not(fbcode_build), ignore)]
11471147
async fn test_dial_serve() {
11481148
for addr in addrs() {
11491149
let (listen_addr, mut rx) = crate::channel::serve::<i32>(addr).unwrap();
@@ -1155,7 +1155,7 @@ mod tests {
11551155

11561156
#[tokio::test]
11571157
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Server(Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }))
1158-
#[cfg_attr(not(feature = "fb"), ignore)]
1158+
#[cfg_attr(not(fbcode_build), ignore)]
11591159
async fn test_send() {
11601160
let config = crate::config::global::lock();
11611161

hyperactor/src/channel/net.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2572,7 +2572,7 @@ mod tests {
25722572
#[tracing_test::traced_test]
25732573
#[async_timed_test(timeout_secs = 60)]
25742574
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
2575-
#[cfg_attr(not(feature = "fb"), ignore)]
2575+
#[cfg_attr(not(fbcode_build), ignore)]
25762576
async fn test_tcp_basic() {
25772577
let (addr, mut rx) = tcp::serve::<u64>("[::1]:0".parse().unwrap()).unwrap();
25782578
{
@@ -2605,7 +2605,7 @@ mod tests {
26052605
// The message size is limited by CODEC_MAX_FRAME_LENGTH.
26062606
#[async_timed_test(timeout_secs = 5)]
26072607
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
2608-
#[cfg_attr(not(feature = "fb"), ignore)]
2608+
#[cfg_attr(not(fbcode_build), ignore)]
26092609
async fn test_tcp_message_size() {
26102610
let default_size_in_bytes = 100 * 1024 * 1024;
26112611
// Use temporary config for this test
@@ -2635,7 +2635,7 @@ mod tests {
26352635

26362636
#[async_timed_test(timeout_secs = 30)]
26372637
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
2638-
#[cfg_attr(not(feature = "fb"), ignore)]
2638+
#[cfg_attr(not(fbcode_build), ignore)]
26392639
async fn test_ack_flush() {
26402640
let config = config::global::lock();
26412641
// Set a large value to effectively prevent acks from being sent except
@@ -2659,7 +2659,7 @@ mod tests {
26592659
#[tracing_test::traced_test]
26602660
#[tokio::test]
26612661
// TODO: OSS: failed to retrieve ipv6 address
2662-
#[cfg_attr(not(feature = "fb"), ignore)]
2662+
#[cfg_attr(not(fbcode_build), ignore)]
26632663
async fn test_meta_tls_basic() {
26642664
let addr = ChannelAddr::any(ChannelTransport::MetaTls(TlsMode::IpV6));
26652665
let meta_addr = match addr {
@@ -3273,7 +3273,7 @@ mod tests {
32733273
#[tracing_test::traced_test]
32743274
#[tokio::test]
32753275
// TODO: OSS: The logs_assert function returned an error: expected log not found
3276-
#[cfg_attr(not(feature = "fb"), ignore)]
3276+
#[cfg_attr(not(fbcode_build), ignore)]
32773277
async fn test_tcp_tx_delivery_timeout() {
32783278
// This link always fails to connect.
32793279
let link = MockLink::<u64>::fail_connects();
@@ -3699,15 +3699,15 @@ mod tests {
36993699
#[tracing_test::traced_test]
37003700
#[async_timed_test(timeout_secs = 30)]
37013701
// TODO: OSS: The logs_assert function returned an error: expected log not found
3702-
#[cfg_attr(not(feature = "fb"), ignore)]
3702+
#[cfg_attr(not(fbcode_build), ignore)]
37033703
async fn test_ack_exceeded_limit_with_connected_link() {
37043704
verify_ack_exceeded_limit(false).await;
37053705
}
37063706

37073707
#[tracing_test::traced_test]
37083708
#[async_timed_test(timeout_secs = 30)]
37093709
// TODO: OSS: The logs_assert function returned an error: expected log not found
3710-
#[cfg_attr(not(feature = "fb"), ignore)]
3710+
#[cfg_attr(not(fbcode_build), ignore)]
37113711
async fn test_ack_exceeded_limit_with_broken_link() {
37123712
verify_ack_exceeded_limit(true).await;
37133713
}
@@ -3878,7 +3878,7 @@ mod tests {
38783878

38793879
#[async_timed_test(timeout_secs = 300)]
38803880
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
3881-
#[cfg_attr(not(feature = "fb"), ignore)]
3881+
#[cfg_attr(not(fbcode_build), ignore)]
38823882
async fn test_tcp_throughput() {
38833883
let config = config::global::lock();
38843884
let _guard =
@@ -3930,7 +3930,7 @@ mod tests {
39303930
#[tracing_test::traced_test]
39313931
#[async_timed_test(timeout_secs = 60)]
39323932
// TODO: OSS: The logs_assert function returned an error: expected log not found
3933-
#[cfg_attr(not(feature = "fb"), ignore)]
3933+
#[cfg_attr(not(fbcode_build), ignore)]
39343934
async fn test_net_tx_closed_on_server_reject() {
39353935
let link = MockLink::<u64>::new();
39363936
let receiver_storage = link.receiver_storage();

hyperactor/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ mod tests {
314314
#[tracing_test::traced_test]
315315
#[test]
316316
// TODO: OSS: The logs_assert function returned an error: missing log lines: {"# export HYPERACTOR_DEFAULT_ENCODING=serde_multipart", ...}
317-
#[cfg_attr(not(feature = "fb"), ignore)]
317+
#[cfg_attr(not(fbcode_build), ignore)]
318318
fn test_from_env() {
319319
// Set environment variables
320320
// SAFETY: TODO: Audit that the environment access only happens in single-threaded code.

hyperactor/src/host.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1280,7 +1280,7 @@ mod tests {
12801280

12811281
#[tokio::test]
12821282
// TODO: OSS: called `Result::unwrap()` on an `Err` value: ReadFailed { manifest_path: "/meta-pytorch/monarch/target/debug/deps/hyperactor-0e1fe83af739d976.resources.json", source: Os { code: 2, kind: NotFound, message: "No such file or directory" } }
1283-
#[cfg_attr(not(feature = "fb"), ignore)]
1283+
#[cfg_attr(not(fbcode_build), ignore)]
12841284
async fn test_process_proc_manager() {
12851285
hyperactor_telemetry::initialize_logging(crate::clock::ClockKind::default());
12861286

0 commit comments

Comments
 (0)