Skip to content

Commit 3837a51

Browse files
luccabbfacebook-github-bot
authored andcommitted
adding proportional cpu and memory args when not asking for the full node (#1678)
Summary: Pull Request resolved: #1678 Differential Revision: D85610860 Pulled By: luccabb
1 parent 8f3981c commit 3837a51

File tree

2 files changed

+17
-0
lines changed

2 files changed

+17
-0
lines changed

python/monarch/_src/job/slurm.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import sys
1414
from typing import Any, cast, Dict, FrozenSet, List, Optional, Sequence
1515

16+
import clusterscope
17+
1618
from monarch._rust_bindings.monarch_hyperactor.channel import ChannelTransport
1719
from monarch._rust_bindings.monarch_hyperactor.config import configure
1820

@@ -134,6 +136,20 @@ def _submit_slurm_job(self, num_nodes: int) -> str:
134136
if self._partition:
135137
sbatch_directives.append(f"#SBATCH --partition={self._partition}")
136138

139+
# add proportional cpu and memory args when not taking the full node
140+
if not self._exclusive and self._partition and self._gpus_per_node:
141+
gpus_per_task = self._gpus_per_node // self._ntasks_per_node
142+
assert self._partition, "Partition must be set to get cpu and memory args"
143+
slurm_args = clusterscope.job_gen_task_slurm(
144+
partition=self._partition,
145+
gpus_per_task=gpus_per_task,
146+
tasks_per_node=self._ntasks_per_node,
147+
)
148+
sbatch_directives.append(
149+
f"#SBATCH --cpus-per-task={slurm_args['cpus_per_task']}"
150+
)
151+
sbatch_directives.append(f"#SBATCH --mem={slurm_args['memory']}")
152+
137153
# Add any additional slurm args as directives
138154
for arg in self._slurm_args:
139155
if arg.startswith("-"):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ torchx-nightly
1010
lark
1111
tabulate
1212
opentelemetry-api
13+
clusterscope

0 commit comments

Comments
 (0)