Skip to content

Commit

Permalink
[flang][hlfir] Shallow copy elemental results with allocatable compon…
Browse files Browse the repository at this point in the history
…ents. (llvm#68040)

To avoid the overhead of deallocating allocatable components of the
elemental temporary result on every iteration of the elemental operation,
we can use a shallow copy instead of deep-copy assign.
  • Loading branch information
vzakhari authored Oct 3, 2023
1 parent e0cd781 commit f857bef
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 20 deletions.
49 changes: 29 additions & 20 deletions flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,26 +791,35 @@ struct ElementalOpConversion
// Assign the element value to the temp element for this iteration.
auto tempElement =
hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices);
// FIXME: if the elemental result is a function result temporary
// of a derived type, we have to make sure that we are either
// deallocate any allocatable/automatic components after the assignment
// or that we do not do the deep copy with the AssignOp. The latter
// seems to be preferrable, because the deep copy is more expensive.
// The shallow copy may be done with a load/store of the RecordType scalar.
builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
/*realloc=*/false,
/*keep_lhs_length_if_realloc=*/false,
/*temporary_lhs=*/true);
// hlfir.yield_element implicitly marks the end-of-life its operand if
// it is an expression created in the hlfir.elemental (since it is its
// last use and an hlfir.destroy could not be created afterwards)
// Now that this node has been removed and the expression has been used in
// the assign, insert an hlfir.destroy to mark the expression end-of-life.
// If the expression creation allocated a buffer on the heap inside the
// loop, this will ensure the buffer properly deallocated.
if (elementValue.getType().isa<hlfir::ExprType>() &&
wasCreatedInCurrentBlock(elementValue, builder))
builder.create<hlfir::DestroyOp>(loc, elementValue);
// If the elemental result is a temporary of a derived type,
// we can avoid the deep copy implied by the AssignOp and just
// do the shallow copy with load/store. This helps avoiding the overhead
// of deallocating allocatable components of the temporary (if any)
// on each iteration of the elemental operation.
auto asExpr = elementValue.getDefiningOp<hlfir::AsExprOp>();
auto elemType = hlfir::getFortranElementType(elementValue.getType());
if (asExpr && asExpr.isMove() && mlir::isa<fir::RecordType>(elemType) &&
hlfir::mayHaveAllocatableComponent(elemType) &&
wasCreatedInCurrentBlock(elementValue, builder)) {
auto load = builder.create<fir::LoadOp>(loc, asExpr.getVar());
builder.create<fir::StoreOp>(loc, load, tempElement);
} else {
builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
/*realloc=*/false,
/*keep_lhs_length_if_realloc=*/false,
/*temporary_lhs=*/true);

// hlfir.yield_element implicitly marks the end-of-life its operand if
// it is an expression created in the hlfir.elemental (since it is its
// last use and an hlfir.destroy could not be created afterwards)
// Now that this node has been removed and the expression has been used in
// the assign, insert an hlfir.destroy to mark the expression end-of-life.
// If the expression creation allocated a buffer on the heap inside the
// loop, this will ensure the buffer properly deallocated.
if (elementValue.getType().isa<hlfir::ExprType>() &&
wasCreatedInCurrentBlock(elementValue, builder))
builder.create<hlfir::DestroyOp>(loc, elementValue);
}
builder.restoreInsertionPoint(insPt);

mlir::Value bufferizedExpr =
Expand Down
31 changes: 31 additions & 0 deletions flang/test/HLFIR/elemental-shallow-copy.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Check that an elemental result of a derived type with an allocatable
// component is shallow-copied into the array result.
// RUN: fir-opt %s --bufferize-hlfir | FileCheck %s

func.func @_QMtypesPtest() {
%false = arith.constant false
%c1 = arith.constant 1 : index
%0 = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
%11 = fir.shape %c1 : (index) -> !fir.shape<1>
%18 = fir.alloca !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = "y", uniq_name = "_QMtypesFtestEy"}
%19:2 = hlfir.declare %18(%11) {uniq_name = "_QMtypesFtestEy"} : (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
%23 = hlfir.elemental %11 : (!fir.shape<1>) -> !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {
^bb0(%arg0: index):
%26:2 = hlfir.declare %0 {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
%27 = hlfir.as_expr %26#0 move %false : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, i1) -> !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
hlfir.yield_element %27 : !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
}
hlfir.assign %23 to %19#0 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>
hlfir.destroy %23 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
return
}
// CHECK-LABEL: func.func @_QMtypesPtest() {
// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
// CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = ".tmp.array", uniq_name = ""}
// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%{{.*}}) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
// CHECK: fir.do_loop %[[VAL_10:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
// CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_10]]) : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, index) -> !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
// CHECK: fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
// CHECK: }

0 comments on commit f857bef

Please sign in to comment.