Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support prepare pushdown #140

Open
wants to merge 13 commits into
base: dev
Choose a base branch
from
74 changes: 2 additions & 72 deletions src/hooks/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,17 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use anyhow::{anyhow, Result};
use duckdb::arrow::array::RecordBatch;
use anyhow::Result;
use pgrx::*;
use std::ffi::CStr;

use crate::duckdb::connection;
use crate::schema::cell::*;

use super::query::*;

#[cfg(debug_assertions)]
use crate::DEBUG_GUCS;

macro_rules! fallback_warning {
($msg:expr) => {
warning!("This query was not fully pushed down to DuckDB because DuckDB returned an error. Query times may be impacted. If you would like to see this query pushed down, please submit a request to https://github.com/paradedb/paradedb/issues with the following context:\n{}", $msg);
};
}

#[allow(deprecated)]
pub async fn executor_run(
query_desc: PgBox<pg_sys::QueryDesc>,
Expand Down Expand Up @@ -66,6 +58,7 @@ pub async fn executor_run(
// Tech Debt: Find a less hacky way to let COPY/CREATE go through
|| query.to_lowercase().starts_with("copy")
|| query.to_lowercase().starts_with("create")
|| query.to_lowercase().starts_with("prepare")
{
prev_hook(query_desc, direction, count, execute_once);
return Ok(());
Expand Down Expand Up @@ -102,66 +95,3 @@ pub async fn executor_run(
connection::clear_arrow();
Ok(())
}

#[inline]
fn write_batches_to_slots(
query_desc: PgBox<pg_sys::QueryDesc>,
mut batches: Vec<RecordBatch>,
) -> Result<()> {
// Convert the DataFusion batches to Postgres tuples and send them to the destination
unsafe {
let tuple_desc = PgTupleDesc::from_pg(query_desc.tupDesc);
let estate = query_desc.estate;
(*estate).es_processed = 0;

let dest = query_desc.dest;
let startup = (*dest)
.rStartup
.ok_or_else(|| anyhow!("rStartup not found"))?;
startup(dest, query_desc.operation as i32, query_desc.tupDesc);

let receive = (*dest)
.receiveSlot
.ok_or_else(|| anyhow!("receiveSlot not found"))?;

for batch in batches.iter_mut() {
for row_index in 0..batch.num_rows() {
let tuple_table_slot =
pg_sys::MakeTupleTableSlot(query_desc.tupDesc, &pg_sys::TTSOpsVirtual);

pg_sys::ExecStoreVirtualTuple(tuple_table_slot);

for (col_index, _) in tuple_desc.iter().enumerate() {
let attribute = tuple_desc
.get(col_index)
.ok_or_else(|| anyhow!("attribute at {col_index} not found in tupdesc"))?;
let column = batch.column(col_index);
let tts_value = (*tuple_table_slot).tts_values.add(col_index);
let tts_isnull = (*tuple_table_slot).tts_isnull.add(col_index);

match column.get_cell(row_index, attribute.atttypid, attribute.name())? {
Some(cell) => {
if let Some(datum) = cell.into_datum() {
*tts_value = datum;
}
}
None => {
*tts_isnull = true;
}
};
}

receive(tuple_table_slot, dest);
(*estate).es_processed += 1;
pg_sys::ExecDropSingleTupleTableSlot(tuple_table_slot);
}
}

let shutdown = (*dest)
.rShutdown
.ok_or_else(|| anyhow!("rShutdown not found"))?;
shutdown(dest);
}

Ok(())
}
3 changes: 2 additions & 1 deletion src/hooks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

mod executor;
#[macro_use]
mod query;
mod executor;
mod utility;

use async_std::task::block_on;
Expand Down
73 changes: 72 additions & 1 deletion src/hooks/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,21 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use anyhow::Result;
use anyhow::{anyhow, Result};
use duckdb::arrow::array::RecordBatch;
use pgrx::*;
use std::ffi::CStr;
use std::str::Utf8Error;

use crate::duckdb::connection;
use crate::fdw::handler::FdwHandler;
use crate::schema::cell::*;

macro_rules! fallback_warning {
($msg:expr) => {
warning!("This query was not fully pushed down to DuckDB because DuckDB returned an error. Query times may be impacted. If you would like to see this query pushed down, please submit a request to https://github.com/paradedb/pg_analytics/issues with the following context:\n{}", $msg);
};
}

pub fn get_current_query(
planned_stmt: *mut pg_sys::PlannedStmt,
Expand Down Expand Up @@ -121,3 +129,66 @@ pub fn is_duckdb_query(relations: &[PgRelation]) -> bool {
}
})
}

#[inline]
pub fn write_batches_to_slots<T: WhoAllocated>(
query_desc: PgBox<pg_sys::QueryDesc, T>,
mut batches: Vec<RecordBatch>,
) -> Result<()> {
// Convert the DataFusion batches to Postgres tuples and send them to the destination
unsafe {
let tuple_desc = PgTupleDesc::from_pg(query_desc.tupDesc);
let estate = query_desc.estate;
(*estate).es_processed = 0;

let dest = query_desc.dest;
let startup = (*dest)
.rStartup
.ok_or_else(|| anyhow!("rStartup not found"))?;
startup(dest, query_desc.operation as i32, query_desc.tupDesc);

let receive = (*dest)
.receiveSlot
.ok_or_else(|| anyhow!("receiveSlot not found"))?;

for batch in batches.iter_mut() {
for row_index in 0..batch.num_rows() {
let tuple_table_slot =
pg_sys::MakeTupleTableSlot(query_desc.tupDesc, &pg_sys::TTSOpsVirtual);

pg_sys::ExecStoreVirtualTuple(tuple_table_slot);

for (col_index, _) in tuple_desc.iter().enumerate() {
let attribute = tuple_desc
.get(col_index)
.ok_or_else(|| anyhow!("attribute at {col_index} not found in tupdesc"))?;
let column = batch.column(col_index);
let tts_value = (*tuple_table_slot).tts_values.add(col_index);
let tts_isnull = (*tuple_table_slot).tts_isnull.add(col_index);

match column.get_cell(row_index, attribute.atttypid, attribute.name())? {
Some(cell) => {
if let Some(datum) = cell.into_datum() {
*tts_value = datum;
}
}
None => {
*tts_isnull = true;
}
};
}

receive(tuple_table_slot, dest);
(*estate).es_processed += 1;
pg_sys::ExecDropSingleTupleTableSlot(tuple_table_slot);
}
}

let shutdown = (*dest)
.rShutdown
.ok_or_else(|| anyhow!("rShutdown not found"))?;
shutdown(dest);
}

Ok(())
}
94 changes: 51 additions & 43 deletions src/hooks/utility.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,19 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#![allow(clippy::too_many_arguments)]
#![allow(deprecated)]
mod explain;
mod prepare;

use std::ffi::CString;
use std::ptr::null_mut;

use anyhow::{bail, Result};
use pg_sys::NodeTag;
use pgrx::*;
use pgrx::{pg_sys, AllocatedByRust, HookResult, PgBox};
use sqlparser::{ast::Statement, dialect::PostgreSqlDialect, parser::Parser};

use super::query::*;
use explain::explain_query;
use prepare::*;

#[allow(deprecated)]
type ProcessUtilityHook = fn(
pstmt: PgBox<pg_sys::PlannedStmt>,
query_string: &core::ffi::CStr,
Expand Down Expand Up @@ -66,7 +68,47 @@ pub async fn process_utility_hook(
return Ok(());
}

let parse_state = unsafe {
let state = pg_sys::make_parsestate(null_mut());
(*state).p_sourcetext = query_string.as_ptr();
(*state).p_queryEnv = query_env.as_ptr();
state
};

let need_exec_prev_hook = match stmt_type {
pg_sys::NodeTag::T_PrepareStmt => prepare_query(
parse_state,
pstmt.utilityStmt as *mut pg_sys::PrepareStmt,
pstmt.stmt_location,
pstmt.stmt_len,
)?,

pg_sys::NodeTag::T_ExecuteStmt => {
let mut query_desc = unsafe {
PgBox::<pg_sys::QueryDesc, AllocatedByRust>::from_rust(pg_sys::CreateQueryDesc(
pstmt.as_ptr(),
query_string.as_ptr(),
null_mut(),
null_mut(),
dest.as_ptr(),
null_mut(),
query_env.as_ptr(),
0,
))
};
query_desc.estate = unsafe { pg_sys::CreateExecutorState() };

execute_query(
parse_state,
pstmt.utilityStmt as *mut pg_sys::ExecuteStmt,
query_desc,
)?
}

pg_sys::NodeTag::T_DeallocateStmt => {
deallocate_query(pstmt.utilityStmt as *mut pg_sys::DeallocateStmt)?
}

pg_sys::NodeTag::T_ExplainStmt => explain_query(
query_string,
pstmt.utilityStmt as *mut pg_sys::ExplainStmt,
Expand All @@ -91,45 +133,11 @@ pub async fn process_utility_hook(
Ok(())
}

fn is_support_utility(stmt_type: NodeTag) -> bool {
fn is_support_utility(stmt_type: pg_sys::NodeTag) -> bool {
stmt_type == pg_sys::NodeTag::T_ExplainStmt
}

fn explain_query(
query_string: &core::ffi::CStr,
stmt: *mut pg_sys::ExplainStmt,
dest: *mut pg_sys::DestReceiver,
) -> Result<bool> {
let query = unsafe { (*stmt).query as *mut pg_sys::Query };

let query_relations = get_query_relations(unsafe { (*query).rtable });
if unsafe { (*query).commandType } != pg_sys::CmdType::CMD_SELECT
|| !is_duckdb_query(&query_relations)
{
return Ok(true);
}

if unsafe { !(*stmt).options.is_null() } {
error!("the EXPLAIN options provided are not supported for DuckDB pushdown queries.");
}

unsafe {
let tstate = pg_sys::begin_tup_output_tupdesc(
dest,
pg_sys::ExplainResultDesc(stmt),
&pg_sys::TTSOpsVirtual,
);
let query = format!(
"DuckDB Scan: {}",
parse_query_from_utility_stmt(query_string)?
);
let query_c_str = CString::new(query)?;

pg_sys::do_text_output_multiline(tstate, query_c_str.as_ptr());
pg_sys::end_tup_output(tstate);
}

Ok(false)
|| stmt_type == pg_sys::NodeTag::T_PrepareStmt
|| stmt_type == pg_sys::NodeTag::T_DeallocateStmt
|| stmt_type == pg_sys::NodeTag::T_ExecuteStmt
}

fn parse_query_from_utility_stmt(query_string: &core::ffi::CStr) -> Result<String> {
Expand Down
61 changes: 61 additions & 0 deletions src/hooks/utility/explain.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) 2023-2024 Retake, Inc.
//
// This file is part of ParadeDB - Postgres for Search and Analytics
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use std::ffi::CString;

use anyhow::Result;
use pgrx::{error, pg_sys};

use super::parse_query_from_utility_stmt;
use crate::hooks::query::{get_query_relations, is_duckdb_query};

pub fn explain_query(
query_string: &core::ffi::CStr,
stmt: *mut pg_sys::ExplainStmt,
dest: *mut pg_sys::DestReceiver,
) -> Result<bool> {
let query = unsafe { (*stmt).query as *mut pg_sys::Query };

let query_relations = get_query_relations(unsafe { (*query).rtable });
if unsafe { (*query).commandType } != pg_sys::CmdType::CMD_SELECT
|| !is_duckdb_query(&query_relations)
{
return Ok(true);
}

if unsafe { !(*stmt).options.is_null() } {
error!("the EXPLAIN options provided are not supported for DuckDB pushdown queries.");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct in understanding that this is fixed in your other PR? Or are there other options that won't be supported?

Copy link
Contributor Author

@kysshsy kysshsy Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I've just moved the code. I will implement options support in another PR.

}

unsafe {
let tstate = pg_sys::begin_tup_output_tupdesc(
dest,
pg_sys::ExplainResultDesc(stmt),
&pg_sys::TTSOpsVirtual,
);
let query = format!(
"DuckDB Scan: {}",
parse_query_from_utility_stmt(query_string)?
);
let query_c_str = CString::new(query)?;

pg_sys::do_text_output_multiline(tstate, query_c_str.as_ptr());
pg_sys::end_tup_output(tstate);
}

Ok(false)
}
Loading