Skip to content

Commit f01899f

Browse files
dulinrileymeta-codesync[bot]
authored andcommitted
Expand workspace to cover all crates and enable rust testing (#1684)
Summary: Pull Request resolved: #1684 Expand Github rust testing to the whole workspace of crates in monarch. Tests that do not pass in Github are marked as fb-only for now. Many of them can be fixed easily, but we can turn on the majority of tests right away. Reviewed By: colin2328 Differential Revision: D85676520
1 parent b249a03 commit f01899f

File tree

32 files changed

+115
-23
lines changed

32 files changed

+115
-23
lines changed

.config/nextest.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[profile.ci]
2+
# Do not cancel the test run on the first failure.
3+
fail-fast = false
4+
5+
[profile.ci.junit]
6+
path = "junit.xml"

.github/workflows/test-gpu-rust.yml

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,19 @@ jobs:
5656
5757
# Run GPU Rust tests
5858
echo "Running OSS Rust tests..."
59-
# TODO: fix broken tests, then update to `cargo test --no-fail-fast`
60-
cargo test -p monarch_rdma
6159
# Uses cargo nextest to run tests in separate processes, which better matches
6260
# internal buck test behavior.
63-
# TODO: increase coverage to more crates.
64-
cargo nextest run -p hyperactor --no-fail-fast
61+
# The CI profile is configured in .config/nextest.toml
62+
# Exclude filter is for packages that don't build in Github Actions yet.
63+
# * monarch_messages: monarch/target/debug/deps/monarch_messages-...:
64+
# /lib64/libm.so.6: version `GLIBC_2.29' not found
65+
# (required by /meta-pytorch/monarch/libtorch/lib/libtorch_cpu.so)
66+
cargo nextest run --workspace --profile ci \
67+
--exclude monarch_messages \
68+
--exclude monarch_tensor_worker \
69+
--exclude monarch_simulator_lib \
70+
--exclude torch-sys \
71+
--exclude torch-sys-cuda
72+
# Copy the test results to the expected location
73+
mkdir -p "${RUNNER_TEST_RESULTS_DIR:-test-results}"
74+
cp target/nextest/ci/junit.xml "${RUNNER_TEST_RESULTS_DIR:-test-results}/junit.xml"

Cargo.toml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[workspace]
22
resolver = "2"
33
members = [
4+
"build_utils",
45
"controller",
56
"cuda-sys",
67
"erased_lifetime",
@@ -10,11 +11,22 @@ members = [
1011
"hyperactor_multiprocess",
1112
"hyperactor_mesh",
1213
"hyperactor_mesh_macros",
13-
"ndslice",
14+
"hyperactor_telemetry",
15+
"monarch_conda",
1416
"monarch_extension",
15-
"monarch_tensor_worker",
17+
"monarch_hyperactor",
18+
"monarch_messages",
19+
"monarch_perfetto_trace",
1620
"monarch_rdma",
21+
"monarch_simulator",
22+
"monarch_tensor_worker",
23+
"monarch_types",
1724
"nccl-sys",
25+
"ndslice",
26+
"preempt_rwlock",
1827
"rdmaxcel-sys",
28+
"serde_multipart",
29+
"timed_test",
1930
"torch-sys",
31+
"torch-sys-cuda",
2032
]

controller/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,8 @@ tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
3636

3737
[dev-dependencies]
3838
monarch_types = { version = "0.0.0", path = "../monarch_types" }
39+
timed_test = { version = "0.0.0", path = "../timed_test" }
3940
torch-sys = { version = "0.0.0", path = "../torch-sys" }
41+
42+
[lints]
43+
rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } }

controller/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,7 @@ mod tests {
660660
use monarch_messages::worker::CallFunctionParams;
661661
use monarch_messages::worker::WorkerMessage;
662662
use monarch_types::PyTree;
663+
use timed_test::async_timed_test;
663664
use torch_sys::RValue;
664665

665666
use super::*;
@@ -1838,7 +1839,9 @@ mod tests {
18381839

18391840
hyperactor::remote!(PanickingActor);
18401841

1841-
#[tokio::test]
1842+
#[async_timed_test(timeout_secs = 30)]
1843+
// times out (both internal and external).
1844+
#[cfg_attr(not(fbcode_build), ignore)]
18421845
async fn test_supervision_fault() {
18431846
// Start system actor.
18441847
let timeout: Duration = Duration::from_secs(6);

hyperactor/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,6 @@ tracing-test = { version = "0.2.3", features = ["no-env-filter"] }
100100
[features]
101101
default = []
102102
stdio-write-probe = []
103+
104+
[lints]
105+
rust = { unexpected_cfgs = { check-cfg = ["cfg(fbcode_build)"], level = "warn" } }

hyperactor/src/channel.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,7 +1132,7 @@ mod tests {
11321132

11331133
#[tokio::test]
11341134
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Server(Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }))
1135-
#[cfg_attr(not(feature = "fb"), ignore)]
1135+
#[cfg_attr(not(fbcode_build), ignore)]
11361136
async fn test_dial_serve() {
11371137
for addr in addrs() {
11381138
let (listen_addr, mut rx) = crate::channel::serve::<i32>(addr).unwrap();
@@ -1144,7 +1144,7 @@ mod tests {
11441144

11451145
#[tokio::test]
11461146
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Server(Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" }))
1147-
#[cfg_attr(not(feature = "fb"), ignore)]
1147+
#[cfg_attr(not(fbcode_build), ignore)]
11481148
async fn test_send() {
11491149
let config = crate::config::global::lock();
11501150

hyperactor/src/channel/net.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2556,7 +2556,7 @@ mod tests {
25562556
#[tracing_test::traced_test]
25572557
#[async_timed_test(timeout_secs = 30)]
25582558
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
2559-
#[cfg_attr(not(feature = "fb"), ignore)]
2559+
#[cfg_attr(not(fbcode_build), ignore)]
25602560
async fn test_tcp_basic() {
25612561
let (addr, mut rx) = tcp::serve::<u64>("[::1]:0".parse().unwrap()).unwrap();
25622562
{
@@ -2580,7 +2580,7 @@ mod tests {
25802580
// The message size is limited by CODEC_MAX_FRAME_LENGTH.
25812581
#[async_timed_test(timeout_secs = 5)]
25822582
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
2583-
#[cfg_attr(not(feature = "fb"), ignore)]
2583+
#[cfg_attr(not(fbcode_build), ignore)]
25842584
async fn test_tcp_message_size() {
25852585
let default_size_in_bytes = 100 * 1024 * 1024;
25862586
// Use temporary config for this test
@@ -2611,7 +2611,7 @@ mod tests {
26112611

26122612
#[async_timed_test(timeout_secs = 30)]
26132613
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
2614-
#[cfg_attr(not(feature = "fb"), ignore)]
2614+
#[cfg_attr(not(fbcode_build), ignore)]
26152615
async fn test_ack_flush() {
26162616
let config = config::global::lock();
26172617
// Set a large value to effectively prevent acks from being sent except
@@ -2635,7 +2635,7 @@ mod tests {
26352635
#[tracing_test::traced_test]
26362636
#[tokio::test]
26372637
// TODO: OSS: failed to retrieve ipv6 address
2638-
#[cfg_attr(not(feature = "fb"), ignore)]
2638+
#[cfg_attr(not(fbcode_build), ignore)]
26392639
async fn test_meta_tls_basic() {
26402640
let addr = ChannelAddr::any(ChannelTransport::MetaTls(TlsMode::IpV6));
26412641
let meta_addr = match addr {
@@ -3240,7 +3240,7 @@ mod tests {
32403240
#[tracing_test::traced_test]
32413241
#[tokio::test]
32423242
// TODO: OSS: The logs_assert function returned an error: expected log not found
3243-
#[cfg_attr(not(feature = "fb"), ignore)]
3243+
#[cfg_attr(not(fbcode_build), ignore)]
32443244
async fn test_tcp_tx_delivery_timeout() {
32453245
// This link always fails to connect.
32463246
let link = MockLink::<u64>::fail_connects();
@@ -3666,15 +3666,15 @@ mod tests {
36663666
#[tracing_test::traced_test]
36673667
#[async_timed_test(timeout_secs = 30)]
36683668
// TODO: OSS: The logs_assert function returned an error: expected log not found
3669-
#[cfg_attr(not(feature = "fb"), ignore)]
3669+
#[cfg_attr(not(fbcode_build), ignore)]
36703670
async fn test_ack_exceeded_limit_with_connected_link() {
36713671
verify_ack_exceeded_limit(false).await;
36723672
}
36733673

36743674
#[tracing_test::traced_test]
36753675
#[async_timed_test(timeout_secs = 30)]
36763676
// TODO: OSS: The logs_assert function returned an error: expected log not found
3677-
#[cfg_attr(not(feature = "fb"), ignore)]
3677+
#[cfg_attr(not(fbcode_build), ignore)]
36783678
async fn test_ack_exceeded_limit_with_broken_link() {
36793679
verify_ack_exceeded_limit(true).await;
36803680
}
@@ -3845,7 +3845,7 @@ mod tests {
38453845

38463846
#[async_timed_test(timeout_secs = 300)]
38473847
// TODO: OSS: called `Result::unwrap()` on an `Err` value: Listen(Tcp([::1]:0), Os { code: 99, kind: AddrNotAvailable, message: "Cannot assign requested address" })
3848-
#[cfg_attr(not(feature = "fb"), ignore)]
3848+
#[cfg_attr(not(fbcode_build), ignore)]
38493849
async fn test_tcp_throughput() {
38503850
let config = config::global::lock();
38513851
let _guard =
@@ -3897,7 +3897,7 @@ mod tests {
38973897
#[tracing_test::traced_test]
38983898
#[async_timed_test(timeout_secs = 60)]
38993899
// TODO: OSS: The logs_assert function returned an error: expected log not found
3900-
#[cfg_attr(not(feature = "fb"), ignore)]
3900+
#[cfg_attr(not(fbcode_build), ignore)]
39013901
async fn test_net_tx_closed_on_server_reject() {
39023902
let link = MockLink::<u64>::new();
39033903
let receiver_storage = link.receiver_storage();

hyperactor/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ mod tests {
314314
#[tracing_test::traced_test]
315315
#[test]
316316
// TODO: OSS: The logs_assert function returned an error: missing log lines: {"# export HYPERACTOR_DEFAULT_ENCODING=serde_multipart", ...}
317-
#[cfg_attr(not(feature = "fb"), ignore)]
317+
#[cfg_attr(not(fbcode_build), ignore)]
318318
fn test_from_env() {
319319
// Set environment variables
320320
// SAFETY: TODO: Audit that the environment access only happens in single-threaded code.

hyperactor/src/host.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1280,7 +1280,7 @@ mod tests {
12801280

12811281
#[tokio::test]
12821282
// TODO: OSS: called `Result::unwrap()` on an `Err` value: ReadFailed { manifest_path: "/meta-pytorch/monarch/target/debug/deps/hyperactor-0e1fe83af739d976.resources.json", source: Os { code: 2, kind: NotFound, message: "No such file or directory" } }
1283-
#[cfg_attr(not(feature = "fb"), ignore)]
1283+
#[cfg_attr(not(fbcode_build), ignore)]
12841284
async fn test_process_proc_manager() {
12851285
hyperactor_telemetry::initialize_logging(crate::clock::ClockKind::default());
12861286

0 commit comments

Comments
 (0)