From aae7572c566e00300322f79d6985bd8096e4a7ca Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Wed, 20 Mar 2024 08:08:12 -0400 Subject: [PATCH 01/20] Deprecate blocks for Collect, Broadcast, update def apireturnvalues --- content/shmem_broadcast.tex | 5 +++++ content/shmem_collect.tex | 8 ++++++++ utils/defs.tex | 3 +-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index a172a12e7..3e4b376ed 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -83,6 +83,7 @@ the team. \end{itemize} +\begin{DeprecateBlock} For active-set-based broadcasts: \begin{itemize} \item The \dest{} object is updated on all \acp{PE} other than the @@ -128,13 +129,17 @@ \end{itemize} \item The \source{} data object may be safely reused. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ For team-based broadcasts, zero on successful local completion; otherwise, nonzero. +\begin{DeprecateBlock} For active-set-based broadcasts, none. +\end{DeprecateBlock} + } \apinotes{ diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 5430abcfc..897bdcb3f 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -90,6 +90,7 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. +\begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. As with all active-set-based collective routines, @@ -108,6 +109,7 @@ \item For active-set-based collective routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ @@ -115,9 +117,15 @@ } \apinotes{ +\begin{DeprecateBlock} The collective routines operate on active \ac{PE} sets that have a non-power-of-two \VAR{PE\_size} with some performance degradation. They operate with no performance degradation when \VAR{nelems} is a non-power-of-two value. +\end{DeprecateBlock} + The collective routines that operate on teams containing a + non-power-of-two of PEs do so with some performance degradation. They operate + with no performance degradation when \VAR{nelems} is a non-power-of-two value. + } \begin{apiexamples} diff --git a/utils/defs.tex b/utils/defs.tex index 771ba8a7b..9d2bdb64c 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -362,8 +362,7 @@ \hfill \item[Return Values] \hfill \\ #1 -\\ -\hfill +\hfill \\ } \newcommand{\apitablerow}[2]{ From 7b2dbd30460530e997be3f5f70e054431206a03c Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Wed, 20 Mar 2024 11:42:13 -0400 Subject: [PATCH 02/20] Deprecate active-set language in Collectives, missing Reductions --- content/shmem_alltoall.tex | 38 ++++++++++++++++++-------- content/shmem_alltoalls.tex | 4 +-- content/shmem_broadcast.tex | 53 ++++++++++++++++++++++++------------- content/shmem_collect.tex | 23 +++++++++++++--- content/shmem_sync.tex | 19 ++++++++++--- 5 files changed, 99 insertions(+), 38 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 188e28759..c37823d8b 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -35,10 +35,10 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - active set. + particpating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the active set, ordered according to + elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ @@ -100,6 +100,21 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, + the following conditions must be ensured: + \begin{itemize} + \item The \VAR{dest} data object on all \acp{PE} in the team is + ready to accept the \FUNC{shmem\_alltoall} data. + \end{itemize} + + Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for + the local PE: + \begin{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \end{itemize} + +\begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -117,22 +132,23 @@ Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured: \begin{itemize} - \item The \VAR{dest} data object on all \acp{PE} in the active set is - ready to accept the \FUNC{shmem\_alltoall} data. - \item For active-set-based routines, the \VAR{pSync} array - on all \acp{PE} in the active set is not still in use from a prior call - to a \FUNC{shmem\_alltoall} routine. + \item The \VAR{dest} data object on all \acp{PE} in the active set is + ready to accept the \FUNC{shmem\_alltoall} data. + \item For active-set-based routines, the \VAR{pSync} array + on all \acp{PE} in the active set is not still in use from a prior call + to a \FUNC{shmem\_alltoall} routine. \end{itemize} Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and - the data has been copied out of the \VAR{source} data object. - \item For active-set-based routines, - the values in the \VAR{pSync} array are restored to the original values. + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \item For active-set-based routines, + the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index e371b8cf9..d1bd7d1f9 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -35,10 +35,10 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - active set. + participating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the active set, ordered according to + elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{dst}{The stride between consecutive elements of the \dest{} diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 3e4b376ed..5aec7b9dc 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -45,7 +45,7 @@ respectively. } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the team or active set, from which the data is copied.} + the calling PEs, from which the data is copied.} \begin{DeprecateBlock} @@ -61,8 +61,7 @@ \end{apiarguments} \apidescription{ - \openshmem broadcast routines are collective routines over an active set or - valid \openshmem team. + \openshmem team-based broadcast routines are collective routines over a valid \openshmem team. They copy the \source{} data object on the \ac{PE} specified by \VAR{PE\_root} to the \dest{} data object on the \acp{PE} participating in the collective operation. @@ -75,6 +74,9 @@ \item The \dest{} object is updated on all \acp{PE}. \item All \acp{PE} in the \VAR{team} argument must participate in the operation. + \item Only \acp{PE} in the team may call the routine. If a + \ac{PE} not in the team calls a team-based + collective routine, the behavior is undefined. \item If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. \item \ac{PE} numbering is relative to the team. The specified @@ -82,12 +84,34 @@ between \CONST{0} and \VAR{N$-$1}, where \VAR{N} is the size of the team. \end{itemize} + + Before any \ac{PE} calls a broadcast routine, the following + conditions must be ensured: + \begin{itemize} + \item The \dest{} array on all \acp{PE} participating in the broadcast + is ready to accept the broadcast data. + \end{itemize} + Otherwise, the behavior is undefined. + + Upon return from a team-based broadcast routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \end{itemize} \begin{DeprecateBlock} + \openshmem active-set broadcast routines are collective routines over an active set. + They copy the \source{} data object on the \ac{PE} specified by + \VAR{PE\_root} to the \dest{} data object on the \acp{PE} + participating in the collective operation. + The same \dest{} and \source{} data objects and the same value of + \VAR{PE\_root} must be passed by all \acp{PE} participating in the + collective operation. + For active-set-based broadcasts: \begin{itemize} - \item The \dest{} object is updated on all \acp{PE} other than the - root \ac{PE}. + \item The \VAR{dest} object is updated on all PEs other than the root PE. \item All \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet must participate in the operation. @@ -103,31 +127,24 @@ in the active set. \end{itemize} - Before any \ac{PE} calls a broadcast routine, the following + Before any \ac{PE} calls a active-set-based broadcast routine, the following conditions must be ensured: \begin{itemize} \item The \dest{} array on all \acp{PE} participating in the broadcast is ready to accept the broadcast data. - \item For active-set-based broadcasts, the - \VAR{pSync} array on all \acp{PE} in the + \item The \VAR{pSync} array on all \acp{PE} in the active set is not still in use from a prior call to an \openshmem collective routine. \end{itemize} - Otherwise, the behavior is undefined. + Otherwise, the behavior is undefined. - Upon return from a broadcast routine, the following are true for the local + Upon return from a active-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item For team-based broadcasts, the \dest{} data object is - updated. - \item For active-set-based broadcasts: - \begin{itemize} - \item If the current \ac{PE} is not the root \ac{PE}, the - \dest{} data object is updated. + \item If the current PE is not the root PE, the \dest{} data object is updated. + \item The \source{} data object may be safely reused. \item The values in the \VAR{pSync} array are restored to the original values. - \end{itemize} - \item The \source{} data object may be safely reused. \end{itemize} \end{DeprecateBlock} } diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 897bdcb3f..63814c79d 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -70,9 +70,7 @@ in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: - \begin{itemize} - \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the - contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \begin{itemize} \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the contribution from \ac{PE} \CONST{1} in the team, and so on. \end{itemize} @@ -91,6 +89,25 @@ otherwise invalid, the behavior is undefined. \begin{DeprecateBlock} + \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective + operation to concatenate \VAR{nelems} + data items from the \source{} array into the + \dest{} array, over an \openshmem active set + in processor number order. The resultant \dest{} array contains the contribution from + \acp{PE} as follows: + \begin{itemize} + \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the + contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \end{itemize} + + The collected result is written to the \dest{} array for all \acp{PE} + that participate in the operation. The same \dest{} and \source{} + arrays must be passed by all \acp{PE} that participate in the operation. + + The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all + participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to + vary from \ac{PE} to \ac{PE}. + Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. As with all active-set-based collective routines, diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 6e41ee825..8ba9b0422 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -38,12 +38,12 @@ \apidescription{ \FUNC{shmem\_sync} is a collective synchronization routine over an - existing \openshmem team or active set. + existing \openshmem team. The routine registers the arrival of a \ac{PE} at a synchronization point in the program. This is a fast mechanism for synchronizing all \acp{PE} that participate in this collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the - specified team or active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem + specified team have called \FUNC{shmem\_sync}. In a multithreaded \openshmem program, only the calling thread is blocked. Team-based sync routines operate over all \acp{PE} in the provided team argument. All @@ -51,6 +51,15 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. +\begin{DeprecateBlock} + \FUNC{shmem\_sync} is a collective synchronization routine over an active set. + + The routine registers the arrival of a \ac{PE} at a synchronization point in the program. + This is a fast mechanism for synchronizing all \acp{PE} that participate in this + collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the + active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem + program, only the calling thread is blocked. + Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -64,12 +73,14 @@ \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. + The same \VAR{pSync} array may be reused on consecutive calls to + \FUNC{shmem\_sync} if the same active set is used. +\end{DeprecateBlock} + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only ensures completion and visibility of previously issued memory stores and does not ensure completion of remote memory updates issued via \openshmem routines. - The same \VAR{pSync} array may be reused on consecutive calls to - \FUNC{shmem\_sync} if the same active set is used. } \apireturnvalues{ From 6ad386ad671551732e2a75b031d88b944ee99b12 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 5 Sep 2024 15:09:33 -0400 Subject: [PATCH 03/20] Reductions, Programming Model, strided teams active set langauge deprecated/removed --- content/collective_intro.tex | 7 +++-- content/shmem_collect.tex | 4 +-- content/shmem_reductions.tex | 40 +++++++++++++++++++++++++--- content/shmem_sync.tex | 13 ++++++--- content/shmem_team_split_strided.tex | 6 ++--- 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 823164abe..a8bf37ff4 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -1,7 +1,7 @@ \emph{Collective routines} are defined as coordinated communication or synchronization operations performed by a group of \acp{PE}. -\openshmem provides three types of collective routines: +\openshmem provides four types of collective routines: \begin{enumerate} \item Collective routines that operate on teams use a team handle parameter to determine @@ -11,9 +11,12 @@ \begin{DeprecateBlock} \item Collective routines that operate on active sets use a set of parameters to determine which \acp{PE} will participate and what resources are used to perform operations. + +\item Collective routines that do not accept an active set + parameters and, as required, the default context. \end{DeprecateBlock} -\item Collective routines that accept neither team nor active set +\item Collective routines that do not accept team parameters, which implicitly operate on the world team and, as required, the default context. \end{enumerate} diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 63814c79d..68b3e614f 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -66,8 +66,8 @@ \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} data items from the \source{} array into the - \dest{} array, over an \openshmem team or active set - in processor number order. The resultant \dest{} array contains the contribution from + \dest{} array, over an \openshmem team in processor number order. + The resultant \dest{} array contains the contribution from \acp{PE} as follows: \begin{itemize} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index ff933b35e..be5543c26 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -251,12 +251,14 @@ \subsubsubsection{PROD} \apiargument{IN}{source}{Symmetric address of an array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. +\apiargument{IN}{nreduce}{the number of elements in the \dest{} and \source{} + arrays. in teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. In deprecated active-set based \ac{API} calls, \VAR{nreduce} must be of type integer.} \begin{DeprecateBlock} +\apiargument{IN}{nreduce}{In active-set based \ac{API} calls, + \VAR{nreduce} must be of type integer.} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive @@ -273,7 +275,7 @@ \subsubsubsection{PROD} \end{apiarguments} \apidescription{ - \openshmem reduction routines are collective routines over an active set or + \openshmem reduction routines are collective routines over an existing \openshmem team that compute one or more reductions across symmetric arrays on multiple \acp{PE}. A reduction performs an associative binary routine across a set of values. @@ -295,6 +297,37 @@ \subsubsubsection{PROD} If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: + \begin{itemize} + \item The \dest{} array on all \acp{PE} participating in the reduction + is ready to accept the results of the \OPR{reduction}. + \end{itemize} + Otherwise, the behavior is undefined. + + Upon return from a reduction routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} array is updated and the \source{} array may be safely reused. + \end{itemize} + +\begin{DeprecateBlock} + \openshmem reduction routines are collective routines over an active set + that compute one or more reductions across symmetric + arrays on multiple \acp{PE}. A reduction performs an associative binary routine + across a set of values. + + The \VAR{nreduce} argument determines the number of separate reductions to + perform. The \source{} array on all \acp{PE} participating in the reduction + provides one element for each reduction. The results of the reductions are placed in the + \dest{} array on all \acp{PE} participating in the reduction. + + The same \source{} and \dest{} arrays must be passed by all PEs that + participate in the collective. + The \source{} and \dest{} arguments must either be the same symmetric + address, or two different symmetric addresses corresponding to buffers that + do not overlap in memory. That is, they must be completely overlapping (sometimes referred to as an ``in place'' reduction) or + completely disjoint. + Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -327,6 +360,7 @@ \subsubsubsection{PROD} \item If using active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} The complex-typed interfaces are only provided for sum and product reductions. When the \Cstd translation environment does not support complex types diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 8ba9b0422..91a2ce61c 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,7 +1,11 @@ \apisummary{ Registers the arrival of a \ac{PE} at a synchronization point. This routine does not return until all other \acp{PE} in a given OpenSHMEM team - or active set arrive at this synchronization point. + arrive at this synchronization point. +\begin{DeprecateBlock} + Registers the arrival of a \ac{PE} at a synchronization point. + This routine does not return until all other \acp{PE} in a given OpenSHMEM active set arrive at this synchronization point. +\end{DeprecateBlock} } \begin{apidefinition} @@ -51,6 +55,10 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only + ensures completion and visibility of previously issued memory stores and does not ensure + completion of remote memory updates issued via \openshmem routines. + \begin{DeprecateBlock} \FUNC{shmem\_sync} is a collective synchronization routine over an active set. @@ -77,9 +85,6 @@ \FUNC{shmem\_sync} if the same active set is used. \end{DeprecateBlock} - In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only - ensures completion and visibility of previously issued memory stores and does not ensure - completion of remote memory updates issued via \openshmem routines. } diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 089697926..cd1e4c813 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -102,10 +102,8 @@ \apinotes{ The \FUNC{shmem\_team\_split\_strided} operation uses an arbitrary - \VAR{stride} argument, whereas the \VAR{logPE\_stride} argument to the - active set collective operations only permits strides that are a power of two. - Arbitrary strides allow a greater number of PE subsets to be expressed - and can support a broader range of usage models. + \VAR{stride} argument. Arbitrary strides allow a greater number of + PE subsets to be expressed and can support a broader range of usage models. See the description of team handles and predefined teams in Section~\ref{subsec:team} for more information about team handle semantics and usage. From c7aad29af26bcec823a4d1c115409221ee5a4e67 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 28 Mar 2024 09:46:40 -0400 Subject: [PATCH 04/20] Indent in shmem_alltoall --- content/shmem_alltoall.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index c37823d8b..4b4b92ebf 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -131,6 +131,7 @@ Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured: + \begin{itemize} \item The \VAR{dest} data object on all \acp{PE} in the active set is ready to accept the \FUNC{shmem\_alltoall} data. @@ -138,6 +139,7 @@ on all \acp{PE} in the active set is not still in use from a prior call to a \FUNC{shmem\_alltoall} routine. \end{itemize} + Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for From 1a448673bde8157235cf24628cb8a9a2f7c8208f Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:10:02 -0400 Subject: [PATCH 05/20] Update content/collective_intro.tex Typo Co-authored-by: David Ozog --- content/collective_intro.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index a8bf37ff4..249e0fbff 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -12,7 +12,7 @@ \item Collective routines that operate on active sets use a set of parameters to determine which \acp{PE} will participate and what resources are used to perform operations. -\item Collective routines that do not accept an active set +\item Collective routines that do not accept active set parameters and, as required, the default context. \end{DeprecateBlock} From 63d8554202ce460e8455a23277cdb1d50126844c Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:10:23 -0400 Subject: [PATCH 06/20] Update content/shmem_broadcast.tex White Space Co-authored-by: David Ozog --- content/shmem_broadcast.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 5aec7b9dc..2470ec39b 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -136,7 +136,7 @@ active set is not still in use from a prior call to an \openshmem collective routine. \end{itemize} - Otherwise, the behavior is undefined. + Otherwise, the behavior is undefined. Upon return from a active-based broadcast routine, the following are true for the local \ac{PE}: From 76321432e34799e3f045088a8fc154ea5492e983 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:12:21 -0400 Subject: [PATCH 07/20] Update shmem_reductions.tex Typo, uppercase --- content/shmem_reductions.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index be5543c26..79f0b42a1 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -251,8 +251,8 @@ \subsubsubsection{PROD} \apiargument{IN}{source}{Symmetric address of an array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{nreduce}{the number of elements in the \dest{} and \source{} - arrays. in teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. +\apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} + arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. In deprecated active-set based \ac{API} calls, \VAR{nreduce} must be of type integer.} From 1557a4eeabce2de6fb9ec50698bc40631e7560fe Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 11:53:45 -0400 Subject: [PATCH 08/20] Update shmem_team_split_strided API Note, arbirary to any positive integer. --- content/shmem_team_split_strided.tex | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index cd1e4c813..59decede7 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -101,9 +101,8 @@ } \apinotes{ - The \FUNC{shmem\_team\_split\_strided} operation uses an arbitrary - \VAR{stride} argument. Arbitrary strides allow a greater number of - PE subsets to be expressed and can support a broader range of usage models. + The \FUNC{shmem\_team\_split\_strided} operation can take any positive integer value + \VAR{stride} argument. See the description of team handles and predefined teams in Section~\ref{subsec:team} for more information about team handle semantics and usage. From 48201357aa3133011d6457ac45de5e78572a8e1e Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:34:06 -0400 Subject: [PATCH 09/20] Fix Whitespace in shmem_alltoall --- content/shmem_alltoall.tex | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 4b4b92ebf..bcd531562 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -42,10 +42,10 @@ destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ - The number of elements to exchange for each \ac{PE}. - For \FUNC{shmem\_alltoallmem}, elements are bytes; - for \FUNC{shmem\_alltoall\{32,64\}}, elements are 4 or 8 bytes, - respectively. + The number of elements to exchange for each \ac{PE}. + For \FUNC{shmem\_alltoallmem}, elements are bytes; + for \FUNC{shmem\_alltoall\{32,64\}}, elements are 4 or 8 bytes, + respectively. } \begin{DeprecateBlock} @@ -105,14 +105,14 @@ \begin{itemize} \item The \VAR{dest} data object on all \acp{PE} in the team is ready to accept the \FUNC{shmem\_alltoall} data. - \end{itemize} + \end{itemize} Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. - \end{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \end{itemize} \begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set @@ -145,8 +145,8 @@ Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. \item For active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} From 88ca7d935a67ff21cb31dc0f7cc6c8413f9dafe9 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:37:34 -0400 Subject: [PATCH 10/20] Fix whitespace shmem_broadcast --- content/shmem_broadcast.tex | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 2470ec39b..49abd50be 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -96,9 +96,9 @@ Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item The \dest{} data object is updated. - \item The \source{} data object may be safely reused. - \end{itemize} + \item The \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \end{itemize} \begin{DeprecateBlock} \openshmem active-set broadcast routines are collective routines over an active set. @@ -111,30 +111,30 @@ For active-set-based broadcasts: \begin{itemize} - \item The \VAR{dest} object is updated on all PEs other than the root PE. - \item All \acp{PE} in the active set defined by the - \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet - must participate in the operation. - \item Only \acp{PE} in the active set may call the routine. If a - \ac{PE} not in the active set calls an active-set-based + \item The \VAR{dest} object is updated on all PEs other than the root PE. + \item All \acp{PE} in the active set defined by the + \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet + must participate in the operation. + \item Only \acp{PE} in the active set may call the routine. If a + \ac{PE} not in the active set calls an active-set-based collective routine, the behavior is undefined. - \item The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, + \item The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. - \item The value of \VAR{PE\_root} must be between \CONST{0} and + \item The value of \VAR{PE\_root} must be between \CONST{0} and \VAR{PE\_size $-$ 1}. - \item The same \VAR{pSync} work array must be passed by all \acp{PE} + \item The same \VAR{pSync} work array must be passed by all \acp{PE} in the active set. \end{itemize} Before any \ac{PE} calls a active-set-based broadcast routine, the following conditions must be ensured: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the broadcast - is ready to accept the broadcast data. - \item The \VAR{pSync} array on all \acp{PE} in the - active set is not still in use from a prior call to an \openshmem - collective routine. + \item The \dest{} array on all \acp{PE} participating in the broadcast + is ready to accept the broadcast data. + \item The \VAR{pSync} array on all \acp{PE} in the + active set is not still in use from a prior call to an \openshmem + collective routine. \end{itemize} Otherwise, the behavior is undefined. From ca0e495945dcc3ff58ad5079a8e1c22ad15c5730 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:38:52 -0400 Subject: [PATCH 11/20] Edit Whitespace in shmem_collect --- content/shmem_collect.tex | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 68b3e614f..d14d8f17b 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -67,12 +67,12 @@ operation to concatenate \VAR{nelems} data items from the \source{} array into the \dest{} array, over an \openshmem team in processor number order. - The resultant \dest{} array contains the contribution from + The resultant \dest{} array contains the contribution from \acp{PE} as follows: - \begin{itemize} - \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the - contribution from \ac{PE} \CONST{1} in the team, and so on. + \begin{itemize} + \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the + contribution from \ac{PE} \CONST{1} in the team, and so on. \end{itemize} The collected result is written to the \dest{} array for all \acp{PE} @@ -96,9 +96,9 @@ in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: \begin{itemize} - \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the - contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. - \end{itemize} + \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the + contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \end{itemize} The collected result is written to the \dest{} array for all \acp{PE} that participate in the operation. The same \dest{} and \source{} From a537c154259a7435176c146a899745efa9bc0868 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:40:34 -0400 Subject: [PATCH 12/20] Fix Whitespace in collective_intro --- content/collective_intro.tex | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 249e0fbff..4996b1784 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -4,21 +4,21 @@ \openshmem provides four types of collective routines: \begin{enumerate} -\item Collective routines that operate on teams use a team handle parameter to determine - which \acp{PE} will participate in the routine, and use resources encapsulated by the team object - to perform operations. See Section~\ref{subsec:team} for details on team management. + \item Collective routines that operate on teams use a team handle parameter to determine + which \acp{PE} will participate in the routine, and use resources encapsulated by the team object + to perform operations. See Section~\ref{subsec:team} for details on team management. -\begin{DeprecateBlock} -\item Collective routines that operate on active sets use a set of parameters to determine - which \acp{PE} will participate and what resources are used to perform operations. + \begin{DeprecateBlock} + \item Collective routines that operate on active sets use a set of parameters to determine + which \acp{PE} will participate and what resources are used to perform operations. -\item Collective routines that do not accept active set - parameters and, as required, the default context. -\end{DeprecateBlock} + \item Collective routines that do not accept active set + parameters and, as required, the default context. + \end{DeprecateBlock} -\item Collective routines that do not accept team - parameters, which implicitly operate on the world team and, as - required, the default context. + \item Collective routines that do not accept team + parameters, which implicitly operate on the world team and, as + required, the default context. \end{enumerate} Concurrent accesses to symmetric memory by an \openshmem collective From 633786f32e0832c214c5c09b183e7b1ae5c8c8c1 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:59:23 -0400 Subject: [PATCH 13/20] Fix Typo in shmem_alltoall --- content/shmem_alltoall.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index bcd531562..4e145c266 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -35,7 +35,7 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - particpating \acp{PE}. + participating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to From c8e9ef6a26f423b2d9a49a29d4f5380d86b41153 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 29 Aug 2024 14:58:02 -0400 Subject: [PATCH 14/20] Update content/shmem_team_split_strided.tex Co-authored-by: David Ozog --- content/shmem_team_split_strided.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 59decede7..26616d396 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -101,7 +101,7 @@ } \apinotes{ - The \FUNC{shmem\_team\_split\_strided} operation can take any positive integer value + The \FUNC{shmem\_team\_split\_strided} operation can take any integer value \VAR{stride} argument. See the description of team handles and predefined teams in From bc114219e340bd9fb3315b208b810f605a20124b Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 30 Aug 2024 10:24:23 -0400 Subject: [PATCH 15/20] Update content/shmem_broadcast.tex typo Co-authored-by: Muhammad Awad --- content/shmem_broadcast.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 49abd50be..d67c2fb0f 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -138,7 +138,7 @@ \end{itemize} Otherwise, the behavior is undefined. - Upon return from a active-based broadcast routine, the following are true for the local + Upon return from an active-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} \item If the current PE is not the root PE, the \dest{} data object is updated. From bda943c3351ea51128ee10321e5b7c0f72194b4b Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 30 Aug 2024 11:00:53 -0400 Subject: [PATCH 16/20] Remove active language in reduction api args --- content/shmem_reductions.tex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 79f0b42a1..46cb0abe1 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -252,9 +252,7 @@ \subsubsubsection{PROD} contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. - In deprecated active-set based \ac{API} calls, - \VAR{nreduce} must be of type integer.} + arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t.} \begin{DeprecateBlock} \apiargument{IN}{nreduce}{In active-set based \ac{API} calls, From b794cea8dad68c01633878e412274c9cfa88fab7 Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Fri, 30 Aug 2024 11:53:46 -0700 Subject: [PATCH 17/20] Remove unnecessary new line --- content/shmem_alltoall.tex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 4e145c266..190232d7c 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -89,9 +89,7 @@ Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} participating in the operation and a \ac{PE} \VAR{j} that is the \lth \ac{PE} - participating in the operation, - - \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to + participating in the operation, \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to the \kth block of the \VAR{dest} object of \ac{PE} \VAR{j}. From c59684132b12b93810b960e1c6b45e2a771c3947 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 16:43:36 -0400 Subject: [PATCH 18/20] scan: 488 section committee edits (nelems/overlap) --- content/shmem_scan.tex | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 618a51a05..69f05bc96 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -6,16 +6,16 @@ %% C11 \begin{C11synopsis} -int @\FuncDecl{shmem\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); -int @\FuncDecl{shmem\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); +int @\FuncDecl{shmem\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{teamreducetypes}. %% C/C++ \begin{Csynopsis} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{Csynopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation and has a corresponding \TYPENAME{} as specified @@ -26,17 +26,17 @@ The team over which to perform the operation. } \apiargument{OUT}{dest}{ - Symmetric address of an array, of length \VAR{nreduce} elements, - to receive the result of the scan routines. The type of + Symmetric address of an array, of length \VAR{nelems} elements, + to receive the result of the scan operation. The type of \dest{} should match that implied in the SYNOPSIS section. } \apiargument{IN}{source}{ - Symmetric address of an array, of length \VAR{nreduce} elements, - that contains one element for each separate scan routine. + Symmetric address of an array, of length \VAR{nelems} elements, + that contains one element for each separate scan operation. The type of \source{} should match that implied in the SYNOPSIS section. } - \apiargument{IN}{nreduce}{ + \apiargument{IN}{nelems}{ The number of elements in the \dest{} and \source{} arrays. } \end{apiarguments} @@ -49,7 +49,7 @@ multiple \acp{PE}. The scan operations are performed with the SUM operator. - The \VAR{nreduce} argument determines the number of separate scan + The \VAR{nelems} argument determines the number of separate scan operations to perform. The \source{} array on all \acp{PE} participating in the operation provides one element for each scan. The results of the scan operations are placed in the \dest{} array @@ -75,10 +75,14 @@ \end{cases} \end{equation*} + + The same \source{} and \dest{} arrays must be passed by all PEs that + participate in the collective. The \source{} and \dest{} arguments must either be the same symmetric address, or two different symmetric addresses - corresponding to buffers that do not overlap in memory. That is, - they must be completely overlapping or completely disjoint. + corresponding to buffers that do not overlap in memory. + That is, they must be completely overlapping (sometimes referred to as an + ``in place'' reduction) or completely disjoint. Team-based scan routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in From 076580775dc377f4e41bae48ced6f8d21af999ba Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 11:56:43 -0400 Subject: [PATCH 19/20] collectives: clarify src buffer entry requirements --- content/shmem_alltoall.tex | 12 ++++++++---- content/shmem_broadcast.tex | 15 +++++++++------ content/shmem_collect.tex | 11 +++++++++++ content/shmem_reductions.tex | 12 ++++++++---- content/shmem_scan.tex | 13 ++++++++++--- 5 files changed, 46 insertions(+), 17 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 190232d7c..07317b69f 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -98,12 +98,16 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, - the following conditions must be ensured: + Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following + conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \VAR{dest} data object on all \acp{PE} in the team is - ready to accept the \FUNC{shmem\_alltoall} data. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be + read by any \ac{PE} in the team. \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index d67c2fb0f..05b670682 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -85,13 +85,16 @@ the team. \end{itemize} - Before any \ac{PE} calls a broadcast routine, the following - conditions must be ensured: + Before any \ac{PE} calls a broadcast routine, the following conditions + must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the broadcast - is ready to accept the broadcast data. - \end{itemize} - Otherwise, the behavior is undefined. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local root \ac{PE} is ready to be + read by any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index d14d8f17b..479c93e29 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -88,6 +88,17 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a collect routine, the following conditions must + be ensured, otherwise the behavior is undefined: + \begin{itemize} + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read + by any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. + \begin{DeprecateBlock} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 46cb0abe1..888a51e19 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -295,12 +295,16 @@ \subsubsubsection{PROD} If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: + Before any \ac{PE} calls a reduction routine, the following conditions + must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the reduction - is ready to accept the results of the \OPR{reduction}. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the results of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read by + any \ac{PE} in the team. \end{itemize} - Otherwise, the behavior is undefined. + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a reduction routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 69f05bc96..f53f1acc1 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -90,9 +90,16 @@ \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a scan routine, the \dest{} array on all - \acp{PE} participating in the operation must be ready to accept the - results of the operation. Otherwise, the behavior is undefined. + Before any \ac{PE} calls a scan routine, the following conditions must be + ensured, otherwise the behavior is undefined: + \begin{itemize} + \item The \dest{} array on all \acp{PE} in the team is ready to accept + the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read by + any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a scan routine, the following are true for the local \ac{PE}: the \dest{} array is updated, and the \source{} array From 00bcc40731eac1a6d1bbeb00181e76a6b2dc61d1 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 15:29:21 -0400 Subject: [PATCH 20/20] collectives: "array" instead of source "buffer" --- content/shmem_alltoall.tex | 4 ++-- content/shmem_broadcast.tex | 4 ++-- content/shmem_collect.tex | 4 ++-- content/shmem_reductions.tex | 4 ++-- content/shmem_scan.tex | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 07317b69f..f271de112 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -103,11 +103,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 05b670682..bd936b5f8 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -90,11 +90,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local root \ac{PE} is ready to be + \item The \source{} array at the local root \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 479c93e29..b7e2d3fac 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -93,11 +93,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. \begin{DeprecateBlock} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 888a51e19..fa48bb3d8 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -300,11 +300,11 @@ \subsubsubsection{PROD} \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the results of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read by + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a reduction routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index f53f1acc1..e55b2b7e8 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -95,11 +95,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read by + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a scan routine, the following are true for the local \ac{PE}: the \dest{} array is updated, and the \source{} array