Skip to content

Commit

Permalink
Some fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Igoshev, Iaroslav <[email protected]>
  • Loading branch information
YarShev committed Nov 13, 2023
1 parent 2be323c commit a150698
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 20 deletions.
17 changes: 9 additions & 8 deletions docs/troubleshooting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,15 @@ to start the number of workers exceeding the number of physical cores.
To get more information about the flags refer to `Open MPI's mpiexec`_ command documentation.
.. _`Open MPI's mpiexec`: https://www.open-mpi.org/doc/v3.1/man1/mpiexec.1.php
.. _`issue`: https://github.com/modin-project/unidist/issues
Shared object store for MPI backend is not supported in C/W model for MPICH version less than 4.2.0
---------------------------------------------------------------------------------------------------
Shared object store is not supported in C/W model if the using MPICH version is less than the 4.2.0 version.
------------------------------------------------------------------------------------------------------------
Unfortunately, this version of MPICH has a problem with shared memory in the Controller/Worker model.
MPICH versions less than 4.2.0 have an issue related to shared memory feature in Controller/Worker model.
**Solution**
You can run your script using the SPMD model, or use other MPI implementations
such as Open MPI, Intel MPI, or MPICH above version 4.2.0.
You can run your script using MPICH in SPMD model, or use other MPI implementations
such as Open MPI, Intel MPI, or MPICH above 4.2.0.
.. _`Open MPI's mpiexec`: https://www.open-mpi.org/doc/v3.1/man1/mpiexec.1.php
.. _`issue`: https://github.com/modin-project/unidist/issues
25 changes: 15 additions & 10 deletions unidist/core/backends/mpi/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,14 +464,16 @@ def versiontuple(v):
return versiontuple(mpich_version) >= versiontuple(target_version)


def is_shared_memory_supported(send_warning=False):
def is_shared_memory_supported(raise_warning=False):
"""
Check if the unidist on MPI supports shared memory.
Parameters
----------
send_warning: bool, default: False
The need for warning as a flag.
raise_warning: bool, default: False
Whether to raise a warning or not.
``True`` is passed only for root process
to have the only warning.
Returns
-------
Expand All @@ -486,9 +488,11 @@ def is_shared_memory_supported(send_warning=False):
return False

if MPI.VERSION < 3:
warnings.warn(
"The too old version of MPI is used. Shared object store can not be used."
)
if raise_warning:
warnings.warn(
f"Shared object store for MPI backend is not supported for MPI version {MPI.VERSION} "
"since it doesn't support shared memory feature."
)
return False

# Mpich shared memory does not work with spawned processes prior to version 4.2.0.
Expand All @@ -497,10 +501,11 @@ def is_shared_memory_supported(send_warning=False):
and MpiSpawn.get()
and not check_mpich_version("4.2.0")
):
warnings.warn(
"Shared object store is not supported in C/W model if the using MPICH version is less than the 4.2.0 version."
+ "Please read more about this problem in the `Troubleshooting` chapter of the Unidist documentation."
)
if raise_warning:
warnings.warn(
"Shared object store for MPI backend is not supported in C/W model for MPICH version less than 4.2.0. "
+ "Read more about this issue in the `troubleshooting` page of the unidist documentation."
)
return False

return True
3 changes: 1 addition & 2 deletions unidist/core/backends/mpi/core/shared_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,8 @@ def __init__(self):
self.service_info_max_count = None

mpi_state = communication.MPIState.get_instance()

# Initialize all properties above
if common.is_shared_memory_supported(send_warning=mpi_state.is_root_process()):
if common.is_shared_memory_supported(raise_warning=mpi_state.is_root_process()):
self._allocate_shared_memory()

# Logger will be initialized after `communicator.MPIState`
Expand Down

0 comments on commit a150698

Please sign in to comment.