From ffd20e6c0a93e298b4bbed361ad91657577db821 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Tue, 21 May 2024 12:51:40 -0400 Subject: [PATCH 1/6] Add some suggested doc-edits throughout the spec --- content/collective_intro.tex | 2 +- content/memmgmt_intro.tex | 2 +- content/teams_intro.tex | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index a2752c613..a219422ba 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -19,7 +19,7 @@ \end{enumerate} Concurrent accesses to symmetric memory by an \openshmem collective -routine and any other means of access---where at least one updates the +routine and any other means of access---where at least one PE updates the symmetric memory---results in undefined behavior. Since \acp{PE} can enter and exit collectives at different times, accessing such memory remotely may require additional synchronization. diff --git a/content/memmgmt_intro.tex b/content/memmgmt_intro.tex index 393785ff5..8cb6605c0 100644 --- a/content/memmgmt_intro.tex +++ b/content/memmgmt_intro.tex @@ -3,7 +3,7 @@ symmetric data objects in the symmetric heap. The symmetric memory allocation routines differ from the private heap -allocation routines in that they must be called by all \acp{PE} in a +allocation routines in that they must be called by all \acp{PE} in the world team. When specified, each of these routines includes at least one call to a procedure that is semantically equivalent to \FUNC{shmem\_barrier\_all}. This ensures that all \acp{PE} diff --git a/content/teams_intro.tex b/content/teams_intro.tex index 851d1c1e0..cca6d01b7 100644 --- a/content/teams_intro.tex +++ b/content/teams_intro.tex @@ -21,7 +21,7 @@ \subsubsection*{Predefined and Application-Defined Teams} portion of an application. Any team successfully created by a \FUNC{shmem\_team\_split\_*} routine is valid until it is destroyed. -All valid teams have a least one member. +All valid teams have at least one member. \subsubsection*{Team Handles} @@ -84,7 +84,7 @@ \subsubsection*{Team Creation} \acp{PE} in a newly created team are consecutively numbered starting with \ac{PE} number 0. \acp{PE} are ordered by their \ac{PE} number in -the parent team. Team relative \ac{PE} +the parent team. Team-relative \ac{PE} numbers can be used for point-to-point operations through team-based contexts (see Section~\ref{sec:ctx}) or using the translation routine \FUNC{shmem\_team\_translate\_pe}. From cf01d5eb41d3b5a9db104bf045da80c02a3cd0b5 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 11 Jul 2024 15:45:13 -0400 Subject: [PATCH 2/6] clarify mem-managment routines as world-collective --- content/backmatter.tex | 6 ++++++ content/shmem_align.tex | 3 ++- content/shmem_free.tex | 3 ++- content/shmem_malloc.tex | 3 ++- content/shmem_malloc_hints.tex | 3 ++- content/shmem_realloc.tex | 3 ++- 6 files changed, 16 insertions(+), 5 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 7e6f4a7dd..ec95c8913 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -697,6 +697,12 @@ \section{Version 1.6} functions from a single entry in \openshmem[1.5] into separate entries. \ChangelogRef{subsec:shmem_malloc, subsec:shmem_free, subsec:shmem_realloc, subsec:shmem_align}% +% +\item Clarified that the \FUNC{shmem\_\{malloc, free, realloc, align, + malloc\_with\_hints, calloc\}} functions are collective operations on + the world team. +\ChangelogRef{subsec:shmem_malloc, subsec:shmem_free, subsec:shmem_realloc, + subsec:shmem_align, subsec:shmmallochint, subsec:shmem_calloc}% \item Corrected the level argument's recommended value in API notes for \FUNC{shmem\_pcontrol} to indicate that the value should be greater than 2 to enable profiling with profile library defined effects and diff --git a/content/shmem_align.tex b/content/shmem_align.tex index f054f66c3..0cd973088 100644 --- a/content/shmem_align.tex +++ b/content/shmem_align.tex @@ -17,7 +17,8 @@ \apidescription{ - The \FUNC{shmem\_align} routine allocates a block in the symmetric + The \FUNC{shmem\_align} routine is a collective operation on the + world team that allocates a block in the symmetric heap that has a byte alignment specified by the \VAR{alignment} argument. The value of \VAR{alignment} shall be a multiple of \CONST{sizeof(void *)} that is also a power of two; otherwise, the diff --git a/content/shmem_free.tex b/content/shmem_free.tex index 6d70228a7..d37b84959 100644 --- a/content/shmem_free.tex +++ b/content/shmem_free.tex @@ -13,7 +13,8 @@ \end{apiarguments} \apidescription{ - The \FUNC{shmem\_free} routine causes the block to which \VAR{ptr} + The \FUNC{shmem\_free} routine is a collective operation on the + world team that causes the block to which \VAR{ptr} points to be deallocated, that is, made available for further allocation. If \VAR{ptr} is a null pointer, no action is performed; otherwise, \FUNC{shmem\_free} calls a barrier on entry. diff --git a/content/shmem_malloc.tex b/content/shmem_malloc.tex index c7ef30c01..6b0b176fe 100644 --- a/content/shmem_malloc.tex +++ b/content/shmem_malloc.tex @@ -15,7 +15,8 @@ \apidescription{ - The \FUNC{shmem\_malloc} routine returns the symmetric address of a + The \FUNC{shmem\_malloc} routine is a collective operation on the + world team and returns the symmetric address of a block of at least \VAR{size} bytes, which shall be suitably aligned so that it may be assigned to a pointer to any type of object. This space is allocated from the symmetric heap (in contrast to diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index f840cb85a..f63da3bf6 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -18,7 +18,8 @@ \apidescription{ - The \FUNC{shmem\_malloc\_with\_hints} routine, like \FUNC{shmem\_malloc}, returns a pointer to a block of at least + The \FUNC{shmem\_malloc\_with\_hints} routine, like \FUNC{shmem\_malloc}, + is a collective operation on the world team that returns a pointer to a block of at least \VAR{size} bytes, which shall be suitably aligned so that it may be assigned to a pointer to any type of object. This space is allocated from the symmetric heap (similar to \FUNC{shmem\_malloc}). When the \VAR{size} is zero, diff --git a/content/shmem_realloc.tex b/content/shmem_realloc.tex index b061f0ac0..388e7bb23 100644 --- a/content/shmem_realloc.tex +++ b/content/shmem_realloc.tex @@ -16,7 +16,8 @@ \apidescription{ - The \FUNC{shmem\_realloc} routine changes the size of the block to + The \FUNC{shmem\_realloc} routine is a collective operation on + the world team that changes the size of the block to which \VAR{ptr} points to the size (in bytes) specified by \VAR{size}. The contents of the block are unchanged up to the lesser of the new and old sizes. From a392d409d6dd3d94f6685eaf9fe5941bb0e40c8d Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 11 Jul 2024 16:23:57 -0400 Subject: [PATCH 3/6] teams: clarify behavior of split with strides <= 0 --- content/backmatter.tex | 4 ++++ content/shmem_team_split_strided.tex | 11 ++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 7e6f4a7dd..9dc8d17f1 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -703,6 +703,10 @@ \section{Version 1.6} additional arguments. \ChangelogRef{subsec:shmem_pcontrol} % +\item Clarified the behavior of \FUNC{shmem\_team\_split\_strided} when the + stride argument is 0 or negative. +\ChangelogRef{subsec:shmem_team_split_strided} +% \end{itemize} \section{Version 1.5} diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 990f62209..089697926 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -50,10 +50,15 @@ i \in \mathbb{Z}_{size-1} \end{equation*} where $\mathbb{Z}$ is the set of natural numbers ($0, 1, \dots$), $N$ is the -number of \acp{PE} in the parent team and $size$ is a positive number indicating -the number of \acp{PE} in the new team. The index $i$ specifies the number of -the given PE in the new team. Thus, \acp{PE} in the new team remain in the same +number of \acp{PE} in the parent team, $size$ is a positive number indicating +the number of \acp{PE} in the new team, and $stride$ is an integer. +The index $i$ specifies the number of the given PE in the new team. +When $stride$ is greater than zero, PEs in the new team remain in the same relative order as in the parent team. +When $stride$ is less than zero, PEs in the new team are in \textit{reverse} +relative order with respect to the parent team. +If a $stride$ value equal to 0 is passed to \FUNC{shmem\_team\_split\_strided}, +then the $size$ argument passed must be 1, or the behavior is undefined. This routine must be called by all \acp{PE} in the parent team. All \acp{PE} must provide the same values for the \ac{PE} triplet. From c2354e6762346e0f04f18231123981628b17cce5 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 12 Jul 2024 14:57:48 -0400 Subject: [PATCH 4/6] sessions: fix function names in backmatter (_ctx_) --- content/backmatter.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 7e6f4a7dd..84c950647 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -686,8 +686,8 @@ \section{Version 1.6} operations for team-based reductions. \ChangelogRef{teamreducetypes}% % -\item Added the session routines, \FUNC{shmem\_session\_start} and - \FUNC{shmem\_session\_stop}, which allow users to pass hints to the +\item Added the session routines, \FUNC{shmem\_ctx\_session\_start} and + \FUNC{shmem\_ctx\_session\_stop}, which allow users to pass hints to the \openshmem library to apply runtime optimizations. \ChangelogRef{subsec:sessions}% \item Added fine grained completion routine: \FUNC{shmem\_pe\_quiet}. From 47e38ecf1048318eecdb9af8e908349444f98ccc Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 12 Jul 2024 16:10:27 -0400 Subject: [PATCH 5/6] Update content/collective_intro.tex --- content/collective_intro.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index a219422ba..823164abe 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -19,7 +19,7 @@ \end{enumerate} Concurrent accesses to symmetric memory by an \openshmem collective -routine and any other means of access---where at least one PE updates the +routine and any other means of access---where at least one \ac{PE} updates the symmetric memory---results in undefined behavior. Since \acp{PE} can enter and exit collectives at different times, accessing such memory remotely may require additional synchronization. From 75d245f699309624645482a41e894bac0124fefb Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 16 Aug 2024 09:10:21 -0400 Subject: [PATCH 6/6] Remove newline shmalloc_hints, prevents compilation --- content/shmem_malloc_hints.tex | 4 ---- 1 file changed, 4 deletions(-) diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index f840cb85a..174e143a4 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -57,19 +57,15 @@ \tabularnewline \hline \endhead %% - \newline \CONST{0} & - \newline Behavior same as \FUNC{shmem\_malloc} \tabularnewline \hline \LibConstDecl{SHMEM\_MALLOC\_ATOMICS\_REMOTE} & - \newline Memory used for \VAR{atomic} operations \tabularnewline \hline \LibConstDecl{SHMEM\_MALLOC\_SIGNAL\_REMOTE} & - \newline Memory used for \VAR{signal} operations \tabularnewline \hline