diff --git a/compiler/src/iree/compiler/Codegen/Common/BlockDynamicDimensions.cpp b/compiler/src/iree/compiler/Codegen/Common/BlockDynamicDimensions.cpp index 4d22bc236237..5b9679c5cfe9 100644 --- a/compiler/src/iree/compiler/Codegen/Common/BlockDynamicDimensions.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/BlockDynamicDimensions.cpp @@ -22,7 +22,7 @@ static llvm::cl::opt clEnableBlockedMatmuls( "iree-codegen-block-dynamic-dimensions-of-contractions", llvm::cl::desc("developer flag to gaurd blocking dynamic dimensions of " "contraction-like ops"), - llvm::cl::Hidden, llvm::cl::init(false)); + llvm::cl::Hidden, llvm::cl::init(true)); namespace mlir::iree_compiler { @@ -125,14 +125,15 @@ blockDynamicDimensionsOfValue(RewriterBase &rewriter, SmallVector outputShape; SmallVector reassociation; Location loc = v.getLoc(); + SmallVector origShape = tensor::getMixedSizes(rewriter, loc, v); - for (auto [index, dim] : llvm::enumerate(tensorType.getShape())) { + for (auto [index, dim] : llvm::enumerate(origShape)) { reassociation.emplace_back(ReassociationIndices{}); // Check if this needs division. if (!tensorType.isDynamicDim(index) || !divisibilityInfo.contains(index)) { reassociation.back().push_back(outputShape.size()); - outputShape.push_back(rewriter.getIndexAttr(dim)); + outputShape.push_back(dim); continue; } @@ -142,9 +143,8 @@ blockDynamicDimensionsOfValue(RewriterBase &rewriter, uint64_t factor = currDivisibility.sdiv(); AffineExpr s0 = rewriter.getAffineSymbolExpr(0); AffineExpr divExpr = s0.floorDiv(factor); - Value sourceDim = rewriter.create(loc, v, index).getResult(); OpFoldResult newDynamicDim = affine::makeComposedFoldedAffineApply( - rewriter, loc, divExpr, ArrayRef{sourceDim}); + rewriter, loc, divExpr, ArrayRef{dim}); OpFoldResult newStaticDim = rewriter.getIndexAttr(factor); reassociation.back().push_back(outputShape.size()); diff --git a/compiler/src/iree/compiler/Codegen/Common/test/block_dynamic_dims.mlir b/compiler/src/iree/compiler/Codegen/Common/test/block_dynamic_dims.mlir index 3d80a08ea0e0..7f252f25e882 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/block_dynamic_dims.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/block_dynamic_dims.mlir @@ -228,3 +228,28 @@ func.func @reshape_propagation_test(%rhs : tensor<2048x4096xf16>, %m : index) // CHECK-SAME: outs(%[[EMPTY]] : // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[TRUNC]] // CHECK: return %[[COLLAPSED]] + +// ----- + +func.func @multiple_dynamic_dims(%arg0 : index, %arg1 : index) -> tensor { + %0 = util.assume.int %arg0 : index + %lhs = tensor.empty(%arg1, %0) : tensor + %rhs = tensor.empty(%arg1) : tensor + %init = tensor.empty(%arg1, %0) : tensor + %matmul = linalg.batch_matmul ins(%lhs, %rhs : tensor, tensor) + outs(%init : tensor) -> tensor + return %matmul : tensor +} +// CHECK-LABEL: func @multiple_dynamic_dims( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index) +// CHECK-DAG: %[[ARG0_ASSUME:.+]] = util.assume.int %[[ARG0]] +// CHECK-DAG: %[[RHS:.+]] = tensor.empty(%[[ARG1]]) : tensor +// CHECK-DAG: %[[BLOCKED_M:.+]] = affine.apply affine_map<()[s0] -> (s0 floordiv 16)>()[%[[ARG0_ASSUME]]] +// CHECK-DAG: %[[LHS:.+]] = tensor.empty(%[[ARG1]], %[[BLOCKED_M]]) : tensor +// CHECK-DAG: %[[INIT:.+]] = tensor.empty(%[[ARG1]], %[[BLOCKED_M]]) : tensor +// CHECK: %[[MATMUL:.+]] = linalg.generic +// CHECK-SAME: ins(%[[LHS]], %[[RHS]] +// CHECK-SAME: outs(%[[INIT]] : +// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[MATMUL]] +// CHECK: return %[[COLLAPSE]]