Skip to content

Commit 544fdca

Browse files
committed
use correct launcher in scr_run on different platforms
1 parent 6bb59ac commit 544fdca

File tree

4 files changed

+71
-82
lines changed

4 files changed

+71
-82
lines changed

scripts/LSF/scr_run.in

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,33 @@
22

33
# requires: mpirun
44

5+
launcher="mpirun"
6+
prog="scr_${launcher}"
7+
8+
libdir="@X_LIBDIR@"
9+
bindir="@X_BINDIR@"
10+
511
# Print usage
612
if [ -z "$1" ]; then
713
echo USAGE:
8-
echo "scr_srun [srun args] [-rc|--run-cmd=<run_command>] [-rs|--restart-cmd=<restart_command>] [srun args]"
14+
echo "scr_$launcher [$launcher args] [-rc|--run-cmd=<run_command>] [-rs|--restart-cmd=<restart_command>] [$launcher args]"
915
echo "<run_command>: The command to run when no restart file is present"
1016
echo "<restart_command>: The command to run when a restart file is present"
1117
echo ""
12-
echo "The invoked command will be \`srun [srun_args] [run_command]\` when no restart file is present"
13-
echo "The invoked command will be \`srun [srun_args] [restart_command]\` when a restart file is present"
18+
echo "The invoked command will be \`$launcher [$launcher args] [run_command]\` when no restart file is present"
19+
echo "The invoked command will be \`$launcher [$launcher args] [restart_command]\` when a restart file is present"
1420
echo "If the string \"SCR_CKPT_NAME\" appears in the restart command, it will be replace by the name "
1521
echo "presented to SCR when the most recent checkpoint was written."
1622
echo ""
1723
echo "If no restart command is specified, the run command will always be used"
18-
echo "If no commands are specified, the srun arguments will be passed directly to srun in all circumstances"
24+
echo "If no commands are specified, the $launcher arguments will be passed directly to $launcher in all circumstances"
1925
echo "If no run command is specified, but a restart command is specified,"
20-
echo "then the restart command will be appended to the srun arguments when a restart file is present."
26+
echo "then the restart command will be appended to the $launcher arguments when a restart file is present."
2127
exit 0
2228
fi
2329

2430
# capture restart and run commands if specified
25-
srun_args=""
31+
launcher_args=""
2632
while [ ${1:+x} ]; do
2733
case $1 in
2834
--restart-cmd|-rs)
@@ -40,14 +46,14 @@ while [ ${1:+x} ]; do
4046
if [ -z "$run_cmd" ]; then shift; run_cmd=$1; fi
4147
shift ;;
4248
*)
43-
srun_args="$srun_args $1"
49+
launcher_args="$launcher_args $1"
4450
shift ;;
4551
esac
4652
done
4753

4854
# if SCR is disabled, just do a normal run and exit
4955
if [ "$SCR_ENABLE" == "0" ] ; then
50-
mpirun $srun_args $run_cmd
56+
$launcher $launcher_args $run_cmd
5157
exit $?
5258
fi
5359

@@ -58,11 +64,6 @@ if [ -n "$SCR_DEBUG" ]; then
5864
fi
5965
fi
6066

61-
prog=scr_mpirun
62-
63-
libdir="@X_LIBDIR@"
64-
bindir="@X_BINDIR@"
65-
6667
# make a record of start time
6768
timestamp=`date`
6869
echo "$prog: Started: $timestamp"
@@ -216,12 +217,12 @@ while [ 1 ] ; do
216217
timestamp=`date`
217218
echo "$prog: RUN $attempts: $timestamp"
218219

219-
launch_cmd="$srun_args $run_cmd"
220+
launch_cmd="$launcher_args $run_cmd"
220221
if [ ${restart_cmd:+x} ]; then
221-
restart_name=`srun $srun_args $bindir/scr_have_restart`
222+
restart_name=`$launcher $launcher_args $bindir/scr_have_restart`
222223
if [ ${restart_name:+x} ]; then
223224
my_restart_cmd=`echo $restart_cmd | sed "s#SCR_CKPT_NAME#${restart_name}#g"`
224-
launch_cmd="$srun_args $my_restart_cmd"
225+
launch_cmd="$launcher_args $my_restart_cmd"
225226
fi
226227
fi
227228

@@ -231,8 +232,8 @@ while [ 1 ] ; do
231232

232233
if [ $use_scr_watchdog -eq 0 ]; then
233234
echo "$target_hosts" > $hostfile
234-
mpirun --hostfile $hostfile $launch_cmd
235-
#mpirun --host $target_hosts $launch_cmd
235+
$launcher --hostfile $hostfile $launch_cmd
236+
#$launcher --host $target_hosts $launch_cmd
236237
# else
237238
# echo "$prog: Attempting to start watchdog process."
238239
# # need to get job step id of the srun command

scripts/PMIX/scr_run.in

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,21 @@
44
# note: this does not support the watchdog process yet since killing a jobstep isn't
55
# supported in pmix yet
66

7+
prog=scr_pmix_run_bash
8+
9+
libdir="@X_LIBDIR@"
10+
bindir="@X_BINDIR@"
11+
__have_libcppr="@HAVE_LIBCPPR@"
12+
713
# Print usage
814
if [ -z "$1" ]; then
915
echo USAGE:
1016
echo "scr_pmix_run [spawn args] [-rc|--run-cmd=<run_command>] [-rs|--restart-cmd=<restart_command>] [spawn args]"
1117
echo "<run_command>: The command to run when no restart file is present"
1218
echo "<restart_command>: The command to run when a restart file is present"
1319
echo ""
14-
echo "The invoked command will be \`scr_pmix_spawn [spawn_args] [run_command]\` when no restart file is present"
15-
echo "The invoked command will be \`scr_pmix_spawn [spawn_args] [restart_command]\` when a restart file is present"
20+
echo "The invoked command will be \`scr_pmix_spawn [spawn args] [run_command]\` when no restart file is present"
21+
echo "The invoked command will be \`scr_pmix_spawn [spawn args] [restart_command]\` when a restart file is present"
1622
echo "If the string \"SCR_CKPT_NAME\" appears in the restart command, it will be replace by the name "
1723
echo "presented to SCR when the most recent checkpoint was written."
1824
echo ""
@@ -24,7 +30,7 @@ if [ -z "$1" ]; then
2430
fi
2531

2632
# capture restart and run commands if specified
27-
spawn_args=""
33+
launcher_args=""
2834
while [ ${1:+x} ]; do
2935
case $1 in
3036
--restart-cmd|-rs)
@@ -42,18 +48,14 @@ while [ ${1:+x} ]; do
4248
if [ -z "$run_cmd" ]; then shift; run_cmd=$1; fi
4349
shift ;;
4450
*)
45-
spawn_args="$spawn_args $1"
51+
launcher_args="$launcher_args $1"
4652
shift ;;
4753
esac
4854
done
4955

50-
libdir="@X_LIBDIR@"
51-
bindir="@X_BINDIR@"
52-
__have_libcppr="@HAVE_LIBCPPR@"
53-
5456
# if SCR is disabled, just do a normal run and exit
5557
if [ "$SCR_ENABLE" == "0" ] ; then
56-
$bindir/scr_pmix_spawn $spawn_args $run_cmd
58+
$bindir/scr_pmix_spawn $launcher_args $run_cmd
5759
exit $?
5860
fi
5961

@@ -64,8 +66,6 @@ if [ -n "$SCR_DEBUG" ]; then
6466
fi
6567
fi
6668

67-
prog=scr_pmix_run_bash
68-
6969
# make a record of start time
7070
timestamp=`date`
7171
echo "$prog: Started: $timestamp"
@@ -111,11 +111,6 @@ if [ $? -ne 0 ] ; then
111111
exit 1
112112
fi
113113

114-
# NOP srun to force every node to run prolog to delete files from cache
115-
# TODO: remove this if admins find a better place to clear cache
116-
#srun /bin/hostname > /dev/null
117-
#don't nop in pmix machine type?
118-
119114
# make a record of time prerun is started
120115
timestamp=`date`
121116
echo "$prog: prerun: $timestamp"
@@ -214,12 +209,12 @@ while [ 1 ] ; do
214209
timestamp=`date`
215210
echo "$prog: RUN $attempts: $timestamp"
216211

217-
launch_cmd="$spawn_args $run_cmd"
212+
launch_cmd="$launcher_args $run_cmd"
218213
if [ ${restart_cmd:+x} ]; then
219-
restart_name=`srun $spawn_args $bindir/scr_have_restart`
214+
restart_name=`$bindir/scr_pmix_spawn $launcher_args $bindir/scr_have_restart`
220215
if [ ${restart_name:+x} ]; then
221216
my_restart_cmd=`echo $restart_cmd | sed "s#SCR_CKPT_NAME#${restart_name}#g"`
222-
launch_cmd="$spawn_args $my_restart_cmd"
217+
launch_cmd="$launcher_args $my_restart_cmd"
223218
fi
224219
fi
225220

scripts/TLCC/scr_run.in

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,33 @@
22

33
# requires: srun
44

5+
launcher="srun"
6+
prog="scr_${launcher}"
7+
8+
libdir="@X_LIBDIR@"
9+
bindir="@X_BINDIR@"
10+
511
# Print usage
612
if [ -z "$1" ]; then
713
echo USAGE:
8-
echo "scr_srun [srun args] [-rc|--run-cmd=<run_command>] [-rs|--restart-cmd=<restart_command>] [srun args]"
14+
echo "scr_$launcher [$launcher args] [-rc|--run-cmd=<run_command>] [-rs|--restart-cmd=<restart_command>] [$launcher args]"
915
echo "<run_command>: The command to run when no restart file is present"
1016
echo "<restart_command>: The command to run when a restart file is present"
1117
echo ""
12-
echo "The invoked command will be \`srun [srun_args] [run_command]\` when no restart file is present"
13-
echo "The invoked command will be \`srun [srun_args] [restart_command]\` when a restart file is present"
18+
echo "The invoked command will be \`$launcher [${launcher} args] [run_command]\` when no restart file is present"
19+
echo "The invoked command will be \`$launcher [${launcher} args] [restart_command]\` when a restart file is present"
1420
echo "If the string \"SCR_CKPT_NAME\" appears in the restart command, it will be replace by the name "
1521
echo "presented to SCR when the most recent checkpoint was written."
1622
echo ""
1723
echo "If no restart command is specified, the run command will always be used"
18-
echo "If no commands are specified, the srun arguments will be passed directly to srun in all circumstances"
24+
echo "If no commands are specified, the $launcher arguments will be passed directly to $launcher in all circumstances"
1925
echo "If no run command is specified, but a restart command is specified,"
20-
echo "then the restart command will be appended to the srun arguments when a restart file is present."
26+
echo "then the restart command will be appended to the $launcher arguments when a restart file is present."
2127
exit 0
2228
fi
2329

2430
# capture restart and run commands if specified
25-
srun_args=""
31+
launcher_args=""
2632
while [ ${1:+x} ]; do
2733
case $1 in
2834
--restart-cmd|-rs)
@@ -40,14 +46,14 @@ while [ ${1:+x} ]; do
4046
if [ -z "$run_cmd" ]; then shift; run_cmd=$1; fi
4147
shift ;;
4248
*)
43-
srun_args="$srun_args $1"
49+
launcher_args="$launcher_args $1"
4450
shift ;;
4551
esac
4652
done
4753

4854
# if SCR is disabled, just do a normal run and exit
4955
if [ "$SCR_ENABLE" == "0" ] ; then
50-
srun $srun_args $run_cmd
56+
$launcher $launcher_args $run_cmd
5157
exit $?
5258
fi
5359

@@ -58,11 +64,6 @@ if [ -n "$SCR_DEBUG" ]; then
5864
fi
5965
fi
6066

61-
prog=scr_srun
62-
63-
libdir="@X_LIBDIR@"
64-
bindir="@X_BINDIR@"
65-
6667
# make a record of start time
6768
timestamp=`date`
6869
echo "$prog: Started: $timestamp"
@@ -203,12 +204,12 @@ while [ 1 ] ; do
203204
timestamp=`date`
204205
echo "$prog: RUN $attempts: $timestamp"
205206

206-
launch_cmd="$srun_args $run_cmd"
207+
launch_cmd="$launcher_args $run_cmd"
207208
if [ ${restart_cmd:+x} ]; then
208-
restart_name=`srun $srun_args $bindir/scr_have_restart`
209+
restart_name=`$launcher $launcher_args $bindir/scr_have_restart`
209210
if [ ${restart_name:+x} ]; then
210211
my_restart_cmd=`echo $restart_cmd | sed "s#SCR_CKPT_NAME#${restart_name}#g"`
211-
launch_cmd="$srun_args $my_restart_cmd"
212+
launch_cmd="$launcher_args $my_restart_cmd"
212213
fi
213214
fi
214215

@@ -217,11 +218,11 @@ while [ 1 ] ; do
217218
$bindir/scr_log_event -T "RUN STARTED" -N "Job=$jobid, Run=$attempts" -S $start_secs
218219

219220
if [ $use_scr_watchdog -eq 0 ]; then
220-
srun $exclude $launch_cmd
221+
$launcher $exclude $launch_cmd
221222
else
222223
echo "$prog: Attempting to start watchdog process."
223224
# need to get job step id of the srun command
224-
srun $exclude $launch_cmd &
225+
$launcher $exclude $launch_cmd &
225226
srun_pid=$!;
226227
sleep 10; # sleep a bit to wait for the job to show up in squeue
227228
echo "$bindir/scr_get_jobstep_id $srun_pid";

scripts/cray_xt/scr_run.in

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,33 @@
22

33
# requires: aprun
44

5+
launcher="aprun"
6+
prog="scr_$launcher"
7+
8+
libdir="@X_LIBDIR@"
9+
bindir="@X_BINDIR@"
10+
511
# Print usage
612
if [ -z "$1" ]; then
713
echo USAGE:
8-
echo "scr_aprun [aprun args] [-rc|--run-cmd=<run_command>] [-rs|--restart-cmd=<restart_command>] [aprun args]"
14+
echo "scr_$launcher [$launcher args] [-rc|--run-cmd=<run_command>] [-rs|--restart-cmd=<restart_command>] [$launcher args]"
915
echo "<run_command>: The command to run when no restart file is present"
1016
echo "<restart_command>: The command to run when a restart file is present"
1117
echo ""
12-
echo "The invoked command will be \`aprun [aprun_args] [run_command]\` when no restart file is present"
13-
echo "The invoked command will be \`aprun [aprun_args] [restart_command]\` when a restart file is present"
18+
echo "The invoked command will be \`$launcher [$launcher args] [run_command]\` when no restart file is present"
19+
echo "The invoked command will be \`$launcher [$launcher args] [restart_command]\` when a restart file is present"
1420
echo "If the string \"SCR_CKPT_NAME\" appears in the restart command, it will be replace by the name "
1521
echo "presented to SCR when the most recent checkpoint was written."
1622
echo ""
1723
echo "If no restart command is specified, the run command will always be used"
18-
echo "If no commands are specified, the aprun arguments will be passed directly to aprun in all circumstances"
24+
echo "If no commands are specified, the $launcher arguments will be passed directly to $launcher in all circumstances"
1925
echo "If no run command is specified, but a restart command is specified,"
20-
echo "then the restart command will be appended to the aprun arguments when a restart file is present."
26+
echo "then the restart command will be appended to the $launcher arguments when a restart file is present."
2127
exit 0
2228
fi
2329

2430
# capture restart and run commands if specified
25-
aprun_args=""
31+
launcher_args=""
2632
while [ ${1:+x} ]; do
2733
case $1 in
2834
--restart-cmd|-rs)
@@ -40,14 +46,14 @@ while [ ${1:+x} ]; do
4046
if [ -z "$run_cmd" ]; then shift; run_cmd=$1; fi
4147
shift ;;
4248
*)
43-
aprun_args="$aprun_args $1"
49+
launcher_args="$launcher_args $1"
4450
shift ;;
4551
esac
4652
done
4753

4854
# if SCR is disabled, just do a normal run and exit
4955
if [ "$SCR_ENABLE" == "0" ] ; then
50-
aprun $aprun_args $run_cmd
56+
$launcher $launcher_args $run_cmd
5157
exit $?
5258
fi
5359

@@ -58,11 +64,6 @@ if [ -n "$SCR_DEBUG" ]; then
5864
fi
5965
fi
6066

61-
prog=scr_run
62-
63-
libdir="@X_LIBDIR@"
64-
bindir="@X_BINDIR@"
65-
6667
# make a record of start time
6768
timestamp=`date`
6869
echo "$prog: Started: $timestamp"
@@ -101,15 +102,6 @@ if [ "$SCR_WATCHDOG" == "1" ] ; then
101102
use_scr_watchdog=1
102103
fi
103104

104-
# normally we would check that this script is running on a node in the job's
105-
# allocated nodeset, but on crays, this script runs on MOM node
106-
script_node=`hostname`
107-
#intersection=`$bindir/scr_glob_hosts --intersection $script_node:$SCR_NODELIST`
108-
#if [ -z "$intersection" ] ; then
109-
#echo "$prog: ERROR: scr_run is executing on $script_node, which is not part of the job's nodeset $SCR_NODELIST."
110-
#exit 1
111-
#fi
112-
113105
# get the control directory
114106
cntldir=`$bindir/scr_list_dir control`
115107
if [ $? -ne 0 ] ; then
@@ -249,21 +241,21 @@ while [ 1 ] ; do
249241
tmpupnodes=${upnodes%]}
250242
tmpupnodes=${tmpupnodes:1}
251243

252-
launch_cmd="$aprun_args $run_cmd"
244+
launch_cmd="$launcher_args $run_cmd"
253245
if [ ${restart_cmd:+x} ]; then
254-
restart_name=`srun $srun_args $bindir/scr_have_restart`
246+
restart_name=`$launcher $launcher_args $bindir/scr_have_restart`
255247
if [ ${restart_name:+x} ]; then
256248
my_restart_cmd=`echo $restart_cmd | sed "s#SCR_CKPT_NAME#${restart_name}#g"`
257-
launch_cmd="$aprun_args $my_restart_cmd"
249+
launch_cmd="$launcher_args $my_restart_cmd"
258250
fi
259251
fi
260252

261253
if [ $use_scr_watchdog -eq 0 ]; then
262-
aprun -L $tmpupnodes $launch_cmd
254+
$launcher -L $tmpupnodes $launch_cmd
263255
else
264256
echo "$prog: Attempting to start watchdog process."
265257
# need to get apid of the aprun command
266-
aprun -L $tmpupnodes $launch_cmd &
258+
$launcher -L $tmpupnodes $launch_cmd &
267259
aprun_pid=$!;
268260
sleep 10; # sleep a bit to wait for the job to show up in apstat
269261
echo "$bindir/scr_get_jobstep_id $aprun_pid";

0 commit comments

Comments
 (0)