Skip to content

Commit 4366269

Browse files
colin2328meta-codesync[bot]
authored andcommitted
allow process allocator to take 1 dimension only (#1569)
Summary: Pull Request resolved: #1569 P1995406113 allow process allocator to take 1 dimension only updated based on mariusae feedback to use "So we should just make that explicit, and expose two different MAST allocators (the only difference is how they interpret the extent). But at least then we have clear and well-defined behavior." Reviewed By: mariusae Differential Revision: D84844137 fbshipit-source-id: ebbc963d5035b0e2d226a2229a58a214d710fc1e
1 parent 8ff936d commit 4366269

File tree

25 files changed

+326
-91
lines changed

25 files changed

+326
-91
lines changed

hyperactor_mesh/benches/main.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ fn bench_actor_scaling(c: &mut Criterion) {
4848
constraints: Default::default(),
4949
proc_name: None,
5050
transport: ChannelTransport::Local,
51+
proc_allocation_mode: Default::default(),
5152
})
5253
.await
5354
.unwrap();
@@ -147,6 +148,7 @@ fn bench_actor_mesh_message_sizes(c: &mut Criterion) {
147148
constraints: Default::default(),
148149
proc_name: None,
149150
transport: ChannelTransport::Local,
151+
proc_allocation_mode: Default::default(),
150152
})
151153
.await
152154
.unwrap();

hyperactor_mesh/examples/sieve.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ async fn main() -> Result<ExitCode> {
112112
constraints: Default::default(),
113113
proc_name: None,
114114
transport: ChannelTransport::Local,
115+
proc_allocation_mode: Default::default(),
115116
})
116117
.await?;
117118

hyperactor_mesh/src/actor_mesh.rs

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,7 @@ pub(crate) mod test_util {
783783
constraints: Default::default(),
784784
proc_name: None,
785785
transport: ChannelTransport::Local,
786+
proc_allocation_mode: Default::default(),
786787
})
787788
.await
788789
.unwrap();
@@ -888,7 +889,8 @@ mod tests {
888889
extent: extent! { replica = 1 },
889890
constraints: Default::default(),
890891
proc_name: None,
891-
transport: default_transport()
892+
transport: default_transport(),
893+
proc_allocation_mode: Default::default(),
892894
})
893895
.await
894896
.unwrap();
@@ -913,7 +915,8 @@ mod tests {
913915
extent: extent!(replica = 4),
914916
constraints: Default::default(),
915917
proc_name: None,
916-
transport: default_transport()
918+
transport: default_transport(),
919+
proc_allocation_mode: Default::default(),
917920
})
918921
.await
919922
.unwrap();
@@ -942,6 +945,7 @@ mod tests {
942945
constraints: Default::default(),
943946
proc_name: None,
944947
transport: default_transport(),
948+
proc_allocation_mode: Default::default(),
945949
})
946950
.await
947951
.unwrap();
@@ -980,6 +984,7 @@ mod tests {
980984
constraints: Default::default(),
981985
proc_name: None,
982986
transport: default_transport(),
987+
proc_allocation_mode: Default::default(),
983988
})
984989
.await
985990
.unwrap();
@@ -1026,6 +1031,7 @@ mod tests {
10261031
constraints: Default::default(),
10271032
proc_name: None,
10281033
transport: default_transport(),
1034+
proc_allocation_mode: Default::default(),
10291035
})
10301036
.await
10311037
.unwrap();
@@ -1070,6 +1076,7 @@ mod tests {
10701076
constraints: Default::default(),
10711077
proc_name: None,
10721078
transport: default_transport(),
1079+
proc_allocation_mode: Default::default(),
10731080
})
10741081
.await
10751082
.unwrap();
@@ -1104,6 +1111,7 @@ mod tests {
11041111
constraints: Default::default(),
11051112
proc_name: None,
11061113
transport: default_transport(),
1114+
proc_allocation_mode: Default::default(),
11071115
})
11081116
.await
11091117
.unwrap();
@@ -1153,6 +1161,7 @@ mod tests {
11531161
constraints: Default::default(),
11541162
proc_name: None,
11551163
transport: default_transport(),
1164+
proc_allocation_mode: Default::default(),
11561165
})
11571166
.await
11581167
.unwrap();
@@ -1185,6 +1194,7 @@ mod tests {
11851194
constraints: Default::default(),
11861195
proc_name: None,
11871196
transport: default_transport(),
1197+
proc_allocation_mode: Default::default(),
11881198
})
11891199
.await
11901200
.unwrap();
@@ -1216,6 +1226,7 @@ mod tests {
12161226
constraints: Default::default(),
12171227
proc_name: None,
12181228
transport: default_transport(),
1229+
proc_allocation_mode: Default::default(),
12191230
})
12201231
.await
12211232
.unwrap();
@@ -1282,6 +1293,7 @@ mod tests {
12821293
constraints: Default::default(),
12831294
proc_name: None,
12841295
transport: ChannelTransport::Local,
1296+
proc_allocation_mode: Default::default(),
12851297
})
12861298
.await
12871299
.unwrap();
@@ -1352,6 +1364,7 @@ mod tests {
13521364
constraints: Default::default(),
13531365
proc_name: None,
13541366
transport: ChannelTransport::Local,
1367+
proc_allocation_mode: Default::default(),
13551368
})
13561369
.await
13571370
.unwrap();
@@ -1421,6 +1434,7 @@ mod tests {
14211434
constraints: Default::default(),
14221435
proc_name: None,
14231436
transport: ChannelTransport::Local,
1437+
proc_allocation_mode: Default::default(),
14241438
})
14251439
.await
14261440
.unwrap();
@@ -1535,6 +1549,7 @@ mod tests {
15351549
constraints: Default::default(),
15361550
proc_name: None,
15371551
transport: ChannelTransport::Unix,
1552+
proc_allocation_mode: Default::default(),
15381553
})
15391554
.await
15401555
.unwrap();
@@ -1620,6 +1635,7 @@ mod tests {
16201635
constraints: Default::default(),
16211636
proc_name: None,
16221637
transport: ChannelTransport::Unix,
1638+
proc_allocation_mode: Default::default(),
16231639
})
16241640
.await
16251641
.unwrap();
@@ -1783,7 +1799,8 @@ mod tests {
17831799
extent,
17841800
constraints: Default::default(),
17851801
proc_name: None,
1786-
transport: ChannelTransport::Local
1802+
transport: ChannelTransport::Local,
1803+
proc_allocation_mode: Default::default(),
17871804
}))
17881805
.unwrap();
17891806
let instance = runtime.block_on(crate::v1::testing::instance());
@@ -1816,7 +1833,8 @@ mod tests {
18161833
extent: extent.clone(),
18171834
constraints: Default::default(),
18181835
proc_name: None,
1819-
transport: ChannelTransport::Local
1836+
transport: ChannelTransport::Local,
1837+
proc_allocation_mode: Default::default(),
18201838
}))
18211839
.unwrap();
18221840
let instance = runtime.block_on(crate::v1::testing::instance());
@@ -1890,7 +1908,8 @@ mod tests {
18901908
extent,
18911909
constraints: Default::default(),
18921910
proc_name: None,
1893-
transport: ChannelTransport::Local
1911+
transport: ChannelTransport::Local,
1912+
proc_allocation_mode: Default::default(),
18941913
}))
18951914
.unwrap();
18961915
let instance = runtime.block_on(crate::v1::testing::instance());

hyperactor_mesh/src/alloc.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,27 @@ pub struct AllocConstraints {
133133
pub match_labels: HashMap<String, String>,
134134
}
135135

136+
/// Specifies how to interpret the extent dimensions for allocation.
137+
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
138+
pub enum ProcAllocationMode {
139+
/// Proc-level allocation: splits extent to allocate multiple processes per host.
140+
/// Requires at least 2 dimensions (e.g., [hosts: N, gpus: M]).
141+
/// Splits by second-to-last dimension, creating N regions with M processes each.
142+
/// Used by MastAllocator.
143+
ProcLevel,
144+
/// Host-level allocation: each point in the extent is a host (no sub-host splitting).
145+
/// For extent!(region = 2, host = 4), create 8 regions, each representing 1 host.
146+
/// Used by MastHostAllocator.
147+
HostLevel,
148+
}
149+
150+
impl Default for ProcAllocationMode {
151+
fn default() -> Self {
152+
// Default to ProcLevel for backward compatibility
153+
Self::ProcLevel
154+
}
155+
}
156+
136157
/// A specification (desired state) of an alloc.
137158
#[derive(Debug, Clone, Serialize, Deserialize)]
138159
pub struct AllocSpec {
@@ -151,6 +172,15 @@ pub struct AllocSpec {
151172

152173
/// The transport to use for the procs in this alloc.
153174
pub transport: ChannelTransport,
175+
176+
/// Specifies how to interpret the extent dimensions for allocation.
177+
/// Defaults to ProcLevel for backward compatibility.
178+
#[serde(default = "default_proc_allocation_mode")]
179+
pub proc_allocation_mode: ProcAllocationMode,
180+
}
181+
182+
fn default_proc_allocation_mode() -> ProcAllocationMode {
183+
ProcAllocationMode::ProcLevel
154184
}
155185

156186
/// The core allocator trait, implemented by all allocators.
@@ -767,6 +797,7 @@ pub(crate) mod testing {
767797
constraints: Default::default(),
768798
proc_name: None,
769799
transport: default_transport(),
800+
proc_allocation_mode: Default::default(),
770801
})
771802
.await
772803
.unwrap();
@@ -919,6 +950,7 @@ pub(crate) mod testing {
919950
constraints: Default::default(),
920951
proc_name: None,
921952
transport: ChannelTransport::Unix,
953+
proc_allocation_mode: Default::default(),
922954
})
923955
.await
924956
.unwrap();

hyperactor_mesh/src/alloc/process.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,7 @@ mod tests {
710710
constraints: Default::default(),
711711
proc_name: None,
712712
transport: ChannelTransport::Unix,
713+
proc_allocation_mode: Default::default(),
713714
})
714715
.await
715716
.unwrap();

0 commit comments

Comments
 (0)