Skip to content

Commit

Permalink
[Flang][OpenMP][Lower] Add lowering support of OpenMP distribute to M…
Browse files Browse the repository at this point in the history
…LIR (#67798)

This patch adds support for lowering the OpenMP DISTRIBUTE directive
from PFT to MLIR. It only supports standalone DISTRIBUTE, support for
composite constructs will come in follow-up PRs.
  • Loading branch information
skatrak authored Jun 12, 2024
1 parent 1d45235 commit fc1c34b
Show file tree
Hide file tree
Showing 6 changed files with 374 additions and 9 deletions.
14 changes: 14 additions & 0 deletions flang/lib/Lower/OpenMP/ClauseProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,20 @@ bool ClauseProcessor::processDeviceType(
return false;
}

bool ClauseProcessor::processDistSchedule(
lower::StatementContext &stmtCtx,
mlir::omp::DistScheduleClauseOps &result) const {
if (auto *clause = findUniqueClause<omp::clause::DistSchedule>()) {
result.distScheduleStaticAttr = converter.getFirOpBuilder().getUnitAttr();
const auto &chunkSize = std::get<std::optional<ExprTy>>(clause->t);
if (chunkSize)
result.distScheduleChunkSizeVar =
fir::getBase(converter.genExprValue(*chunkSize, stmtCtx));
return true;
}
return false;
}

bool ClauseProcessor::processFinal(lower::StatementContext &stmtCtx,
mlir::omp::FinalClauseOps &result) const {
const parser::CharBlock *source = nullptr;
Expand Down
2 changes: 2 additions & 0 deletions flang/lib/Lower/OpenMP/ClauseProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class ClauseProcessor {
bool processDevice(lower::StatementContext &stmtCtx,
mlir::omp::DeviceClauseOps &result) const;
bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
bool processDistSchedule(lower::StatementContext &stmtCtx,
mlir::omp::DistScheduleClauseOps &result) const;
bool processFinal(lower::StatementContext &stmtCtx,
mlir::omp::FinalClauseOps &result) const;
bool processHasDeviceAddr(
Expand Down
58 changes: 56 additions & 2 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -991,6 +991,18 @@ static void genCriticalDeclareClauses(lower::AbstractConverter &converter,
mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
}

static void genDistributeClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx,
const List<Clause> &clauses,
mlir::Location loc,
mlir::omp::DistributeClauseOps &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processDistSchedule(stmtCtx, clauseOps);
// TODO Support delayed privatization.
}

static void genFlushClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const ObjectList &objects,
Expand Down Expand Up @@ -1288,8 +1300,50 @@ genDistributeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::iterator item) {
TODO(loc, "Distribute construct");
return nullptr;
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
symTable.pushScope();
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
lower::omp::isLastItemInQueue(item, queue));
dsp.processStep1();

lower::StatementContext stmtCtx;
mlir::omp::LoopNestClauseOps loopClauseOps;
mlir::omp::DistributeClauseOps distributeClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
loopClauseOps, iv);
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
distributeClauseOps);

// Create omp.distribute wrapper.
auto distributeOp =
firOpBuilder.create<mlir::omp::DistributeOp>(loc, distributeClauseOps);

firOpBuilder.createBlock(&distributeOp.getRegion());
firOpBuilder.setInsertionPoint(
lower::genOpenMPTerminator(firOpBuilder, distributeOp, loc));

// Create nested omp.loop_nest and fill body with loop contents.
auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps);

auto *nestedEval =
getCollapsedLoopEval(eval, getCollapseValue(item->clauses));

auto ivCallback = [&](mlir::Operation *op) {
genLoopVars(op, converter, loc, iv);
return iv;
};

createBodyOfOp(*loopOp,
OpWithBodyGenInfo(converter, symTable, semaCtx, loc,
*nestedEval, llvm::omp::Directive::OMPD_simd)
.setClauses(&item->clauses)
.setDataSharingProcessor(&dsp)
.setGenRegionEntryCb(ivCallback),
queue, item);

symTable.popScope();
return distributeOp;
}

static mlir::omp::FlushOp
Expand Down
114 changes: 114 additions & 0 deletions flang/test/Lower/OpenMP/distribute.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
! REQUIRES: openmp_runtime

! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s

! CHECK-LABEL: func @_QPdistribute_simple
subroutine distribute_simple()
! CHECK: omp.teams
!$omp teams

! CHECK: omp.distribute {
!$omp distribute

! CHECK-NEXT: omp.loop_nest
do i = 1, 10
call foo()
! CHECK: omp.yield
end do

!$omp end distribute

! CHECK: omp.terminator
!$omp end teams
end subroutine distribute_simple

!===============================================================================
! `dist_schedule` clause
!===============================================================================

! CHECK-LABEL: func @_QPdistribute_dist_schedule
! CHECK-SAME: %[[X_ARG:.*]]: !fir.ref<i32>
subroutine distribute_dist_schedule(x)
! CHECK: %[[X_REF:.*]]:2 = hlfir.declare %[[X_ARG]]
integer, intent(in) :: x

! CHECK: omp.teams
!$omp teams

! STATIC SCHEDULE, CONSTANT CHUNK SIZE

! CHECK: %[[CONST_CHUNK_SIZE:.*]] = arith.constant 5 : i32
! CHECK: omp.distribute
! CHECK-SAME: dist_schedule_static
! CHECK-SAME: chunk_size(%[[CONST_CHUNK_SIZE]] : i32)
!$omp distribute dist_schedule(static, 5)

! CHECK-NEXT: omp.loop_nest
do i = 1, 10
call foo()
! CHECK: omp.yield
end do

!$omp end distribute

! STATIC SCHEDULE, VARIABLE CHUNK SIZE

! CHECK: %[[X:.*]] = fir.load %[[X_REF]]#0
! CHECK: omp.distribute
! CHECK-SAME: dist_schedule_static
! CHECK-SAME: chunk_size(%[[X]] : i32)
!$omp distribute dist_schedule(static, x)

! CHECK-NEXT: omp.loop_nest
do i = 1, 10
call foo()
! CHECK: omp.yield
end do

!$omp end distribute

! STATIC SCHEDULE, NO CHUNK SIZE

! CHECK: omp.distribute
! CHECK-SAME: dist_schedule_static
! CHECK-NOT: chunk_size
!$omp distribute dist_schedule(static)

! CHECK-NEXT: omp.loop_nest
do i = 1, 10
call foo()
! CHECK: omp.yield
end do

!$omp end distribute

! CHECK: omp.terminator
!$omp end teams
end subroutine distribute_dist_schedule

!===============================================================================
! `allocate` clause
!===============================================================================

! CHECK-LABEL: func @_QPdistribute_allocate
subroutine distribute_allocate()
use omp_lib
integer :: x
! CHECK: omp.teams
!$omp teams

! CHECK: omp.distribute
! CHECK-SAME: allocate(%{{.+}} : i64 -> %{{.+}} : !fir.ref<i32>)
!$omp distribute allocate(omp_high_bw_mem_alloc: x) private(x)

! CHECK-NEXT: omp.loop_nest
do i = 1, 10
x = i
! CHECK: omp.yield
end do

!$omp end distribute

! CHECK: omp.terminator
!$omp end teams
end subroutine distribute_allocate
Loading

0 comments on commit fc1c34b

Please sign in to comment.