Skip to content

Commit

Permalink
[OpenMP] Use new OMPT state and sync kinds for barrier events (#95602)
Browse files Browse the repository at this point in the history
Summary:
This change makes the runtime use new OMPT state and sync kinds
introduced in OpenMP 5.1 in place of the deprecated implicit state and
sync kinds. Events from implicit barriers use different enumerators for
workshare, parallel, and teams.

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60251726
  • Loading branch information
hansangbae authored and yuxuanchen1997 committed Jul 25, 2024
1 parent ae21636 commit b01babd
Show file tree
Hide file tree
Showing 26 changed files with 507 additions and 539 deletions.
2 changes: 2 additions & 0 deletions openmp/runtime/src/include/omp-tools.h.var
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
/* implicit barrier at the end of worksharing */ \
macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
macro (ompt_state_wait_barrier_implementation, 0x015) /* implementation barrier */ \
macro (ompt_state_wait_barrier_teams, 0x016) /* teams barrier */ \
\
/* task wait states (32..63) */ \
macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
Expand Down
47 changes: 36 additions & 11 deletions openmp/runtime/src/kmp_barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1805,7 +1805,25 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
// It is OK to report the barrier state after the barrier begin callback.
// According to the OMPT specification, a compliant implementation may
// even delay reporting this state until the barrier begins to wait.
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
auto *ompt_thr_info = &this_thr->th.ompt_thread_info;
switch (barrier_kind) {
case ompt_sync_region_barrier_explicit:
ompt_thr_info->state = ompt_state_wait_barrier_explicit;
break;
case ompt_sync_region_barrier_implicit_workshare:
ompt_thr_info->state = ompt_state_wait_barrier_implicit_workshare;
break;
case ompt_sync_region_barrier_implicit_parallel:
ompt_thr_info->state = ompt_state_wait_barrier_implicit_parallel;
break;
case ompt_sync_region_barrier_teams:
ompt_thr_info->state = ompt_state_wait_barrier_teams;
break;
case ompt_sync_region_barrier_implementation:
[[fallthrough]];
default:
ompt_thr_info->state = ompt_state_wait_barrier_implementation;
}
}
#endif

Expand Down Expand Up @@ -2213,20 +2231,24 @@ void __kmp_join_barrier(int gtid) {
codeptr = team->t.ompt_team_info.master_return_address;
my_task_data = OMPT_CUR_TASK_DATA(this_thr);
my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
ompt_state_t ompt_state = ompt_state_wait_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) {
sync_kind = ompt_sync_region_barrier_teams;
ompt_state = ompt_state_wait_barrier_teams;
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
my_task_data, codeptr);
sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
my_task_data, codeptr);
sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
}
if (!KMP_MASTER_TID(ds_tid))
this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
#endif
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
this_thr->th.ompt_thread_info.state = ompt_state;
}
#endif

Expand Down Expand Up @@ -2488,8 +2510,10 @@ void __kmp_fork_barrier(int gtid, int tid) {
}

#if OMPT_SUPPORT
ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
(ompt_state == ompt_state_wait_barrier_teams ||
ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = (team)
? OMPT_CUR_TASK_DATA(this_thr)
Expand All @@ -2501,15 +2525,16 @@ void __kmp_fork_barrier(int gtid, int tid) {
(ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = team ? team->t.ompt_team_info.master_return_address : NULL;
ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
sync_kind = ompt_sync_region_barrier_teams;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
Expand Down
17 changes: 10 additions & 7 deletions openmp/runtime/src/kmp_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7745,7 +7745,7 @@ int __kmp_invoke_task_func(int gtid) {
);
#if OMPT_SUPPORT
*exit_frame_p = NULL;
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
#endif

#if KMP_STATS_ENABLED
Expand Down Expand Up @@ -7843,7 +7843,7 @@ int __kmp_invoke_teams_master(int gtid) {
#endif
__kmp_teams_master(gtid);
#if OMPT_SUPPORT
this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
#endif
__kmp_run_after_invoked_task(gtid, 0, this_thr, team);
return 1;
Expand Down Expand Up @@ -8126,8 +8126,10 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {

__kmp_join_barrier(gtid); /* wait for everyone */
#if OMPT_SUPPORT
ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
(ompt_state == ompt_state_wait_barrier_teams ||
ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Expand All @@ -8138,15 +8140,16 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;

ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
sync_kind = ompt_sync_region_barrier_teams;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
codeptr);
sync_kind, ompt_scope_end, NULL, task_data, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
Expand Down
16 changes: 10 additions & 6 deletions openmp/runtime/src/kmp_wait_release.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,19 +323,21 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
ompt_state_t ompt_state,
ompt_data_t *tId) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
if (ompt_state == ompt_state_wait_barrier_implicit) {
if (ompt_state == ompt_state_wait_barrier_implicit_parallel ||
ompt_state == ompt_state_wait_barrier_teams) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
sync_kind = ompt_sync_region_barrier_teams;
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
codeptr);
sync_kind, ompt_scope_end, NULL, tId, codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
codeptr);
sync_kind, ompt_scope_end, NULL, tId, codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid)) {
Expand Down Expand Up @@ -455,7 +457,9 @@ final_spin=FALSE)
ompt_data_t *tId;
if (ompt_enabled.enabled) {
ompt_entry_state = this_thr->th.ompt_thread_info.state;
if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
if (!final_spin ||
(ompt_entry_state != ompt_state_wait_barrier_implicit_parallel &&
ompt_entry_state != ompt_state_wait_barrier_teams) ||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
ompt_lw_taskteam_t *team = NULL;
if (this_thr->th.th_team)
Expand Down
15 changes: 8 additions & 7 deletions openmp/runtime/src/ompt-specific.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,22 +503,23 @@ static uint64_t __ompt_get_unique_id_internal() {

ompt_sync_region_t __ompt_get_barrier_kind(enum barrier_type bt,
kmp_info_t *thr) {
if (bt == bs_forkjoin_barrier)
return ompt_sync_region_barrier_implicit;
if (bt == bs_forkjoin_barrier) {
if (thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
return ompt_sync_region_barrier_teams;
else
return ompt_sync_region_barrier_implicit_parallel;
}

if (bt != bs_plain_barrier)
if (bt != bs_plain_barrier || !thr->th.th_ident)
return ompt_sync_region_barrier_implementation;

if (!thr->th.th_ident)
return ompt_sync_region_barrier;

kmp_int32 flags = thr->th.th_ident->flags;

if ((flags & KMP_IDENT_BARRIER_EXPL) != 0)
return ompt_sync_region_barrier_explicit;

if ((flags & KMP_IDENT_BARRIER_IMPL) != 0)
return ompt_sync_region_barrier_implicit;
return ompt_sync_region_barrier_implicit_workshare;

return ompt_sync_region_barrier_implementation;
}
Loading

0 comments on commit b01babd

Please sign in to comment.