Skip to content

Give an explicit error message if MPI can't be called from a sub-process #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions pytest_parallel/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

def _to_bold_red(s):
red = '\x1b[31m'
bold = '\x1b[1m'
reset = '\x1b[0m'
return red + bold + s + reset

class PytestParallelInternalError(Exception):
def __init__(self, msg):
Exception.__init__(self, _to_bold_red('pytest_parallel internal error')+'\n' + msg)

class PytestParallelUsageError(Exception):
def __init__(self, msg):
Exception.__init__(self, _to_bold_red('You are calling pytest_parallel incorrectly')+'\n' + msg)

class PytestParallelEnvError(Exception):
def __init__(self, msg):
Exception.__init__(self, _to_bold_red('pytest_parallel environment error:')+'\n' + msg)
29 changes: 14 additions & 15 deletions pytest_parallel/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
import pytest
from _pytest.terminal import TerminalReporter

class PytestParallelError(ValueError):
pass
from .exception import PytestParallelUsageError, PytestParallelInternalError

# --------------------------------------------------------------------------
def pytest_addoption(parser):
Expand Down Expand Up @@ -58,13 +57,13 @@ def pytest_addoption(parser):
' (because importing mpi4py.MPI makes the current process look like and MPI process,' \
' and SLURM does not like that)'
if os.getenv('I_MPI_MPIRUN') is not None:
err_msg = 'Internal pytest_parallel error: the environment variable I_MPI_MPIRUN is set' \
err_msg = 'The environment variable I_MPI_MPIRUN is set' \
f' (it has value "{os.getenv("I_MPI_MPIRUN")}"),\n' \
' while pytest was invoked with "--scheduler=slurm".\n' \
' This indicates that pytest was run through MPI, and SLURM generally does not like that.\n' \
' With "--scheduler=slurm", just run `pytest` directly, not through `mpirun/mpiexec/srun`,\n' \
' because it will launch MPI itself (you may want to use --n-workers=<number of processes>).'
raise PytestParallelError(err_msg)
raise PytestParallelInternalError(err_msg)

r = subprocess.run(['env','--null'], stdout=subprocess.PIPE) # `--null`: end each output line with NUL, required by `sbatch --export-file`

Expand Down Expand Up @@ -109,41 +108,41 @@ def pytest_configure(config):
assert not is_worker, f'Internal pytest_parallel error `--_worker` not available with`--scheduler={scheduler}`'
if scheduler in ['slurm', 'shell'] and not is_worker:
if n_workers is None:
raise PytestParallelError(f'You need to specify `--n-workers` when `--scheduler={scheduler}`')
raise PytestParallelUsageError(f'You need to specify `--n-workers` when `--scheduler={scheduler}`')
if scheduler != 'slurm':
if slurm_options is not None:
raise PytestParallelError('Option `--slurm-options` only available when `--scheduler=slurm`')
raise PytestParallelUsageError('Option `--slurm-options` only available when `--scheduler=slurm`')
if slurm_srun_options is not None:
raise PytestParallelError('Option `--slurms-run-options` only available when `--scheduler=slurm`')
raise PytestParallelUsageError('Option `--slurms-run-options` only available when `--scheduler=slurm`')
if slurm_init_cmds is not None:
raise PytestParallelError('Option `--slurm-init-cmds` only available when `--scheduler=slurm`')
raise PytestParallelUsageError('Option `--slurm-init-cmds` only available when `--scheduler=slurm`')
if slurm_file is not None:
raise PytestParallelError('Option `--slurm-file` only available when `--scheduler=slurm`')
raise PytestParallelUsageError('Option `--slurm-file` only available when `--scheduler=slurm`')

if scheduler in ['shell', 'slurm'] and not is_worker:
from mpi4py import MPI
if MPI.COMM_WORLD.size != 1:
err_msg = 'Do not launch `pytest_parallel` on more that one process when `--scheduler=shell` or `--scheduler=slurm`.\n' \
'`pytest_parallel` will spawn MPI processes itself.\n' \
f'You may want to use --n-workers={MPI.COMM_WORLD.size}.'
raise PytestParallelError(err_msg)
raise PytestParallelUsageError(err_msg)



if scheduler == 'slurm' and not is_worker:
if slurm_options is None and slurm_file is None:
raise PytestParallelError('You need to specify either `--slurm-options` or `--slurm-file` when `--scheduler=slurm`')
raise PytestParallelUsageError('You need to specify either `--slurm-options` or `--slurm-file` when `--scheduler=slurm`')
if slurm_options:
if slurm_file:
raise PytestParallelError('You need to specify either `--slurm-options` or `--slurm-file`, but not both')
raise PytestParallelUsageError('You need to specify either `--slurm-options` or `--slurm-file`, but not both')
if slurm_file:
if slurm_options:
raise PytestParallelError('You need to specify either `--slurm-options` or `--slurm-file`, but not both')
raise PytestParallelUsageError('You need to specify either `--slurm-options` or `--slurm-file`, but not both')
if slurm_init_cmds:
raise PytestParallelError('You cannot specify `--slurm-init-cmds` together with `--slurm-file`')
raise PytestParallelUsageError('You cannot specify `--slurm-init-cmds` together with `--slurm-file`')

if '-n=' in slurm_options or '--ntasks=' in slurm_options:
raise PytestParallelError('Do not specify `-n/--ntasks` in `--slurm-options` (it is deduced from the `--n-worker` value).')
raise PytestParallelUsageError('Do not specify `-n/--ntasks` in `--slurm-options` (it is deduced from the `--n-worker` value).')

from .slurm_scheduler import SlurmScheduler

Expand Down
11 changes: 11 additions & 0 deletions pytest_parallel/shell_static_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import subprocess
import socket
import pickle
from pathlib import Path

import pytest
from mpi4py import MPI
Expand All @@ -12,6 +13,7 @@
from .utils.items import add_n_procs, run_item_test, mark_original_index, mark_skip
from .utils.file import remove_exotic_chars, create_folders
from .static_scheduler_utils import group_items_by_parallel_steps
from .exception import PytestParallelEnvError

def mpi_command(current_proc, n_proc):
mpi_vendor = MPI.get_vendor()[0]
Expand Down Expand Up @@ -39,6 +41,7 @@ def submit_items(items_to_run, SCHEDULER_IP_ADDRESS, port, session_folder, main_
for item in items:
test_idx = item.original_index
test_out_file = f'.pytest_parallel/{session_folder}/{remove_exotic_chars(item.nodeid)}'
test_out_file = str(Path(test_out_file).absolute()) # easier to find the file if absolute
cmd = '('
cmd += mpi_command(current_proc, item.n_proc)
cmd += f' python3 -u -m pytest -s --_worker {socket_flags} {main_invoke_params} --_test_idx={test_idx} {item.config.rootpath}/{item.nodeid}'
Expand Down Expand Up @@ -106,6 +109,14 @@ def __init__(self, main_invoke_params, ntasks, detach):

self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # TODO close at the end

# Check that MPI can be called in a subprocess (not the case with OpenMPI 4.0.5, see #17)
p = subprocess.run('mpirun -np 1 echo mpi_can_be_called_from_subprocess', shell=True)
if p.returncode != 0:
raise PytestParallelEnvError(
"Your MPI implementation does not handle MPI being called from a sub-process\n"
"Either update your MPI version or use another scheduler. See https://github.com/onera/pytest_parallel/issues/17"
)

@pytest.hookimpl(tryfirst=True)
def pytest_pyfunc_call(self, pyfuncitem):
# This is where the test is normally run.
Expand Down
2 changes: 2 additions & 0 deletions pytest_parallel/slurm_scheduler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import subprocess
import socket
import pickle
from pathlib import Path

import pytest

Expand Down Expand Up @@ -44,6 +45,7 @@ def submit_items(items_to_run, socket, session_folder, main_invoke_params, ntask
for item in items:
test_idx = item.original_index
test_out_file = f'.pytest_parallel/{session_folder}/{remove_exotic_chars(item.nodeid)}'
test_out_file = str(Path(test_out_file).absolute()) # easier to find the file if absolute
cmd = '('
cmd += f'srun {srun_options}'
cmd += ' --exclusive'
Expand Down
Loading