Skip to content

Commit

Permalink
[improvement](nereids) Simplify ScanNode projection handling by remov…
Browse files Browse the repository at this point in the history
…ing redundant conditions (apache#40801) (apache#41315)

pick from master apache#40801

This PR simplifies the handling of `ScanNode` projection logic.
Previously, the code included multiple conditional checks to determine
whether a `projectionTuple` should be generated. These conditions have
been removed, and now `projectionTuple `is always generated for
`ScanNode`, ensuring a consistent projection setup. Additionally,
redundant handling of `SlotId` and `SlotRef` has been eliminated, making
the code cleaner and easier to maintain. The behavior for `OlapScanNode`
remains unchanged.
  • Loading branch information
zy-kkk authored Sep 26, 2024
1 parent a11fd62 commit 4deda2f
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1951,21 +1951,10 @@ public PlanFragment visitPhysicalProject(PhysicalProject<? extends Plan> project
// slotIdsByOrder is used to ensure the ScanNode's output order is same with current Project
// if we change the output order in translate project, the upper node will receive wrong order
// tuple, since they get the order from project.getOutput() not scan.getOutput()./
List<SlotId> slotIdsByOrder = Lists.newArrayList();
if (requiredByProjectSlotIdSet.size() != requiredSlotIdSet.size()
|| new HashSet<>(projectionExprs).size() != projectionExprs.size()
|| projectionExprs.stream().anyMatch(expr -> !(expr instanceof SlotRef))) {
projectionTuple = generateTupleDesc(slots,
((ScanNode) inputPlanNode).getTupleDesc().getTable(), context);
inputPlanNode.setProjectList(projectionExprs);
inputPlanNode.setOutputTupleDesc(projectionTuple);
} else {
for (int i = 0; i < slots.size(); ++i) {
context.addExprIdSlotRefPair(slots.get(i).getExprId(),
(SlotRef) projectionExprs.get(i));
slotIdsByOrder.add(((SlotRef) projectionExprs.get(i)).getSlotId());
}
}
projectionTuple = generateTupleDesc(slots,
((ScanNode) inputPlanNode).getTupleDesc().getTable(), context);
inputPlanNode.setProjectList(projectionExprs);
inputPlanNode.setOutputTupleDesc(projectionTuple);

// TODO: this is a temporary scheme to support two phase read when has project.
// we need to refactor all topn opt into rbo stage.
Expand All @@ -1975,20 +1964,16 @@ public PlanFragment visitPhysicalProject(PhysicalProject<? extends Plan> project
SlotDescriptor lastSlot = olapScanSlots.get(olapScanSlots.size() - 1);
if (lastSlot.getColumn() != null
&& lastSlot.getColumn().getName().equals(Column.ROWID_COL)) {
if (projectionTuple != null) {
injectRowIdColumnSlot(projectionTuple);
SlotRef slotRef = new SlotRef(lastSlot);
inputPlanNode.getProjectList().add(slotRef);
requiredByProjectSlotIdSet.add(lastSlot.getId());
} else {
slotIdsByOrder.add(lastSlot.getId());
}
injectRowIdColumnSlot(projectionTuple);
SlotRef slotRef = new SlotRef(lastSlot);
inputPlanNode.getProjectList().add(slotRef);
requiredByProjectSlotIdSet.add(lastSlot.getId());
requiredSlotIdSet.add(lastSlot.getId());
}
((OlapScanNode) inputPlanNode).updateRequiredSlots(context, requiredByProjectSlotIdSet);
}
updateScanSlotsMaterialization((ScanNode) inputPlanNode, requiredSlotIdSet,
requiredByProjectSlotIdSet, slotIdsByOrder, context);
requiredByProjectSlotIdSet, context);
} else {
TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, context);
inputPlanNode.setProjectList(projectionExprs);
Expand Down Expand Up @@ -2434,22 +2419,10 @@ private SortNode translateSortNode(AbstractPhysicalSort<? extends Plan> sort, Pl

private void updateScanSlotsMaterialization(ScanNode scanNode,
Set<SlotId> requiredSlotIdSet, Set<SlotId> requiredByProjectSlotIdSet,
List<SlotId> slotIdsByOrder, PlanTranslatorContext context) {
PlanTranslatorContext context) {
// TODO: use smallest slot if do not need any slot in upper node
SlotDescriptor smallest = scanNode.getTupleDesc().getSlots().get(0);
if (CollectionUtils.isNotEmpty(slotIdsByOrder)) {
// if we eliminate project above scan, we should ensure the slot order of scan's output is same with
// the projection's output. So, we need to reorder the output slot in scan's tuple.
Map<SlotId, SlotDescriptor> idToSlotDescMap = scanNode.getTupleDesc().getSlots().stream()
.filter(s -> requiredSlotIdSet.contains(s.getId()))
.collect(Collectors.toMap(SlotDescriptor::getId, s -> s));
scanNode.getTupleDesc().getSlots().clear();
for (SlotId slotId : slotIdsByOrder) {
scanNode.getTupleDesc().getSlots().add(idToSlotDescMap.get(slotId));
}
} else {
scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId()));
}
scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId()));
if (scanNode.getTupleDesc().getSlots().isEmpty()) {
scanNode.getTupleDesc().getSlots().add(smallest);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ char_col char(85) Yes true \N NONE
varchar_col char(85) Yes true \N NONE
json_col text Yes true \N NONE

-- !sql --
\N \N
a 1

-- !sql --
\N \N
1 a

-- !sql --
doris_jdbc_catalog

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ suite("test_doris_jdbc_catalog", "p0,external,doris,external_docker,external_doc
// test query tvf
qt_sql """desc function query("catalog" = "doris_jdbc_catalog", "query" = "select * from regression_test_jdbc_catalog_p0.base");"""

order_qt_sql """ select varchar_col,tinyint_col from query("catalog" = "doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from regression_test_jdbc_catalog_p0.base");"""

order_qt_sql """ select tinyint_col,varchar_col from query("catalog" = "doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from regression_test_jdbc_catalog_p0.base");"""

//clean
qt_sql """select current_catalog()"""
sql "switch internal"
Expand Down

0 comments on commit 4deda2f

Please sign in to comment.