From 4deda2fce738db0a6c0db0a0f9b430e61a267d41 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Thu, 26 Sep 2024 10:35:01 +0800 Subject: [PATCH] [improvement](nereids) Simplify ScanNode projection handling by removing redundant conditions (#40801) (#41315) pick from master #40801 This PR simplifies the handling of `ScanNode` projection logic. Previously, the code included multiple conditional checks to determine whether a `projectionTuple` should be generated. These conditions have been removed, and now `projectionTuple `is always generated for `ScanNode`, ensuring a consistent projection setup. Additionally, redundant handling of `SlotId` and `SlotRef` has been eliminated, making the code cleaner and easier to maintain. The behavior for `OlapScanNode` remains unchanged. --- .../translator/PhysicalPlanTranslator.java | 49 +++++-------------- .../jdbc/test_doris_jdbc_catalog.out | 8 +++ .../jdbc/test_doris_jdbc_catalog.groovy | 4 ++ 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 7cfeb3dbaff6a3..a3d3a1885f3f8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -1951,21 +1951,10 @@ public PlanFragment visitPhysicalProject(PhysicalProject project // slotIdsByOrder is used to ensure the ScanNode's output order is same with current Project // if we change the output order in translate project, the upper node will receive wrong order // tuple, since they get the order from project.getOutput() not scan.getOutput()./ - List slotIdsByOrder = Lists.newArrayList(); - if (requiredByProjectSlotIdSet.size() != requiredSlotIdSet.size() - || new HashSet<>(projectionExprs).size() != projectionExprs.size() - || projectionExprs.stream().anyMatch(expr -> !(expr instanceof SlotRef))) { - projectionTuple = generateTupleDesc(slots, - ((ScanNode) inputPlanNode).getTupleDesc().getTable(), context); - inputPlanNode.setProjectList(projectionExprs); - inputPlanNode.setOutputTupleDesc(projectionTuple); - } else { - for (int i = 0; i < slots.size(); ++i) { - context.addExprIdSlotRefPair(slots.get(i).getExprId(), - (SlotRef) projectionExprs.get(i)); - slotIdsByOrder.add(((SlotRef) projectionExprs.get(i)).getSlotId()); - } - } + projectionTuple = generateTupleDesc(slots, + ((ScanNode) inputPlanNode).getTupleDesc().getTable(), context); + inputPlanNode.setProjectList(projectionExprs); + inputPlanNode.setOutputTupleDesc(projectionTuple); // TODO: this is a temporary scheme to support two phase read when has project. // we need to refactor all topn opt into rbo stage. @@ -1975,20 +1964,16 @@ public PlanFragment visitPhysicalProject(PhysicalProject project SlotDescriptor lastSlot = olapScanSlots.get(olapScanSlots.size() - 1); if (lastSlot.getColumn() != null && lastSlot.getColumn().getName().equals(Column.ROWID_COL)) { - if (projectionTuple != null) { - injectRowIdColumnSlot(projectionTuple); - SlotRef slotRef = new SlotRef(lastSlot); - inputPlanNode.getProjectList().add(slotRef); - requiredByProjectSlotIdSet.add(lastSlot.getId()); - } else { - slotIdsByOrder.add(lastSlot.getId()); - } + injectRowIdColumnSlot(projectionTuple); + SlotRef slotRef = new SlotRef(lastSlot); + inputPlanNode.getProjectList().add(slotRef); + requiredByProjectSlotIdSet.add(lastSlot.getId()); requiredSlotIdSet.add(lastSlot.getId()); } ((OlapScanNode) inputPlanNode).updateRequiredSlots(context, requiredByProjectSlotIdSet); } updateScanSlotsMaterialization((ScanNode) inputPlanNode, requiredSlotIdSet, - requiredByProjectSlotIdSet, slotIdsByOrder, context); + requiredByProjectSlotIdSet, context); } else { TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, context); inputPlanNode.setProjectList(projectionExprs); @@ -2434,22 +2419,10 @@ private SortNode translateSortNode(AbstractPhysicalSort sort, Pl private void updateScanSlotsMaterialization(ScanNode scanNode, Set requiredSlotIdSet, Set requiredByProjectSlotIdSet, - List slotIdsByOrder, PlanTranslatorContext context) { + PlanTranslatorContext context) { // TODO: use smallest slot if do not need any slot in upper node SlotDescriptor smallest = scanNode.getTupleDesc().getSlots().get(0); - if (CollectionUtils.isNotEmpty(slotIdsByOrder)) { - // if we eliminate project above scan, we should ensure the slot order of scan's output is same with - // the projection's output. So, we need to reorder the output slot in scan's tuple. - Map idToSlotDescMap = scanNode.getTupleDesc().getSlots().stream() - .filter(s -> requiredSlotIdSet.contains(s.getId())) - .collect(Collectors.toMap(SlotDescriptor::getId, s -> s)); - scanNode.getTupleDesc().getSlots().clear(); - for (SlotId slotId : slotIdsByOrder) { - scanNode.getTupleDesc().getSlots().add(idToSlotDescMap.get(slotId)); - } - } else { - scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId())); - } + scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId())); if (scanNode.getTupleDesc().getSlots().isEmpty()) { scanNode.getTupleDesc().getSlots().add(smallest); } diff --git a/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out index 3fec3f8a5748f2..9695f628fee739 100644 --- a/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out @@ -141,6 +141,14 @@ char_col char(85) Yes true \N NONE varchar_col char(85) Yes true \N NONE json_col text Yes true \N NONE +-- !sql -- +\N \N +a 1 + +-- !sql -- +\N \N +1 a + -- !sql -- doris_jdbc_catalog diff --git a/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy index 2051a4ad54d6da..c4fce17c62cd18 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy @@ -231,6 +231,10 @@ suite("test_doris_jdbc_catalog", "p0,external,doris,external_docker,external_doc // test query tvf qt_sql """desc function query("catalog" = "doris_jdbc_catalog", "query" = "select * from regression_test_jdbc_catalog_p0.base");""" + order_qt_sql """ select varchar_col,tinyint_col from query("catalog" = "doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from regression_test_jdbc_catalog_p0.base");""" + + order_qt_sql """ select tinyint_col,varchar_col from query("catalog" = "doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from regression_test_jdbc_catalog_p0.base");""" + //clean qt_sql """select current_catalog()""" sql "switch internal"