[improvement](nereids) Simplify ScanNode projection handling by remov…

…ing redundant conditions (apache#40801) (apache#41315) pick from master apache#40801 This PR simplifies the handling of `ScanNode` projection logic. Previously, the code included multiple conditional checks to determine whether a `projectionTuple` should be generated. These conditions have been removed, and now `projectionTuple `is always generated for `ScanNode`, ensuring a consistent projection setup. Additionally, redundant handling of `SlotId` and `SlotRef` has been eliminated, making the code cleaner and easier to maintain. The behavior for `OlapScanNode` remains unchanged.
jacktengg · Sep 26, 2024 · 4deda2f · 4deda2f
1 parent a11fd62
commit 4deda2f
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 38 deletions.
diff --git a/...e-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/...e-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1951,21 +1951,10 @@ public PlanFragment visitPhysicalProject(PhysicalProject<? extends Plan> project
             // slotIdsByOrder is used to ensure the ScanNode's output order is same with current Project
             // if we change the output order in translate project, the upper node will receive wrong order
             // tuple, since they get the order from project.getOutput() not scan.getOutput()./
-            List<SlotId> slotIdsByOrder = Lists.newArrayList();
-            if (requiredByProjectSlotIdSet.size() != requiredSlotIdSet.size()
-                    || new HashSet<>(projectionExprs).size() != projectionExprs.size()
-                    || projectionExprs.stream().anyMatch(expr -> !(expr instanceof SlotRef))) {
-                projectionTuple = generateTupleDesc(slots,
-                        ((ScanNode) inputPlanNode).getTupleDesc().getTable(), context);
-                inputPlanNode.setProjectList(projectionExprs);
-                inputPlanNode.setOutputTupleDesc(projectionTuple);
-            } else {
-                for (int i = 0; i < slots.size(); ++i) {
-                    context.addExprIdSlotRefPair(slots.get(i).getExprId(),
-                            (SlotRef) projectionExprs.get(i));
-                    slotIdsByOrder.add(((SlotRef) projectionExprs.get(i)).getSlotId());
-                }
-            }
+            projectionTuple = generateTupleDesc(slots,
+                    ((ScanNode) inputPlanNode).getTupleDesc().getTable(), context);
+            inputPlanNode.setProjectList(projectionExprs);
+            inputPlanNode.setOutputTupleDesc(projectionTuple);
 
             // TODO: this is a temporary scheme to support two phase read when has project.
             //  we need to refactor all topn opt into rbo stage.
@@ -1975,20 +1964,16 @@ public PlanFragment visitPhysicalProject(PhysicalProject<? extends Plan> project
                 SlotDescriptor lastSlot = olapScanSlots.get(olapScanSlots.size() - 1);
                 if (lastSlot.getColumn() != null
                         && lastSlot.getColumn().getName().equals(Column.ROWID_COL)) {
-                    if (projectionTuple != null) {
-                        injectRowIdColumnSlot(projectionTuple);
-                        SlotRef slotRef = new SlotRef(lastSlot);
-                        inputPlanNode.getProjectList().add(slotRef);
-                        requiredByProjectSlotIdSet.add(lastSlot.getId());
-                    } else {
-                        slotIdsByOrder.add(lastSlot.getId());
-                    }
+                    injectRowIdColumnSlot(projectionTuple);
+                    SlotRef slotRef = new SlotRef(lastSlot);
+                    inputPlanNode.getProjectList().add(slotRef);
+                    requiredByProjectSlotIdSet.add(lastSlot.getId());
                     requiredSlotIdSet.add(lastSlot.getId());
                 }
                 ((OlapScanNode) inputPlanNode).updateRequiredSlots(context, requiredByProjectSlotIdSet);
             }
             updateScanSlotsMaterialization((ScanNode) inputPlanNode, requiredSlotIdSet,
-                    requiredByProjectSlotIdSet, slotIdsByOrder, context);
+                    requiredByProjectSlotIdSet, context);
         } else {
             TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, context);
             inputPlanNode.setProjectList(projectionExprs);
@@ -2434,22 +2419,10 @@ private SortNode translateSortNode(AbstractPhysicalSort<? extends Plan> sort, Pl
 
     private void updateScanSlotsMaterialization(ScanNode scanNode,
             Set<SlotId> requiredSlotIdSet, Set<SlotId> requiredByProjectSlotIdSet,
-            List<SlotId> slotIdsByOrder, PlanTranslatorContext context) {
+            PlanTranslatorContext context) {
         // TODO: use smallest slot if do not need any slot in upper node
         SlotDescriptor smallest = scanNode.getTupleDesc().getSlots().get(0);
-        if (CollectionUtils.isNotEmpty(slotIdsByOrder)) {
-            // if we eliminate project above scan, we should ensure the slot order of scan's output is same with
-            // the projection's output. So, we need to reorder the output slot in scan's tuple.
-            Map<SlotId, SlotDescriptor> idToSlotDescMap = scanNode.getTupleDesc().getSlots().stream()
-                    .filter(s -> requiredSlotIdSet.contains(s.getId()))
-                    .collect(Collectors.toMap(SlotDescriptor::getId, s -> s));
-            scanNode.getTupleDesc().getSlots().clear();
-            for (SlotId slotId : slotIdsByOrder) {
-                scanNode.getTupleDesc().getSlots().add(idToSlotDescMap.get(slotId));
-            }
-        } else {
-            scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId()));
-        }
+        scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId()));
         if (scanNode.getTupleDesc().getSlots().isEmpty()) {
             scanNode.getTupleDesc().getSlots().add(smallest);
         }

diff --git a/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out
@@ -141,6 +141,14 @@ char_col	char(85)	Yes	true	\N	NONE
 varchar_col	char(85)	Yes	true	\N	NONE
 json_col	text	Yes	true	\N	NONE
 
+-- !sql --
+\N	\N
+a	1
+
+-- !sql --
+\N	\N
+1	a
+
 -- !sql --
 doris_jdbc_catalog
 

diff --git a/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy
@@ -231,6 +231,10 @@ suite("test_doris_jdbc_catalog", "p0,external,doris,external_docker,external_doc
     // test query tvf
     qt_sql """desc function query("catalog" = "doris_jdbc_catalog", "query" = "select * from regression_test_jdbc_catalog_p0.base");"""
 
+    order_qt_sql """ select varchar_col,tinyint_col from query("catalog" = "doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from regression_test_jdbc_catalog_p0.base");"""
+
+    order_qt_sql """ select tinyint_col,varchar_col from query("catalog" = "doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from regression_test_jdbc_catalog_p0.base");"""
+
     //clean
     qt_sql """select current_catalog()"""
     sql "switch internal"