Skip to content

Commit

Permalink
[wasm-qe] fix chunking tests (#4656)
Browse files Browse the repository at this point in the history
Co-authored-by: Alexey Orlenko <[email protected]>
  • Loading branch information
Miguel Fernández and aqrln authored Jan 22, 2024
1 parent 0e77726 commit e5078ed
Show file tree
Hide file tree
Showing 14 changed files with 169 additions and 80 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ CONFIG_FILE = .test_config
SCHEMA_EXAMPLES_PATH = ./query-engine/example_schemas
DEV_SCHEMA_FILE = dev_datamodel.prisma
DRIVER_ADAPTERS_BRANCH ?= main

ifndef DISABLE_NIX
NIX := $(shell type nix 2> /dev/null)
endif

LIBRARY_EXT := $(shell \
case "$$(uname -s)" in \
Expand Down
50 changes: 50 additions & 0 deletions quaint/src/connector/connection_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,56 @@ impl SqlFamily {
}
}

/// Get the default max rows for a batch insert.
pub fn max_insert_rows(&self) -> Option<usize> {
match self {
#[cfg(feature = "postgresql")]
SqlFamily::Postgres => None,
#[cfg(feature = "mysql")]
SqlFamily::Mysql => None,
#[cfg(feature = "sqlite")]
SqlFamily::Sqlite => Some(999),
#[cfg(feature = "mssql")]
SqlFamily::Mssql => Some(1000),
}
}

/// Get the max number of bind parameters for a single query, which in targets other
/// than Wasm can be controlled with the env var QUERY_BATCH_SIZE.
#[cfg(not(target_arch = "wasm32"))]
pub fn max_bind_values(&self) -> usize {
use std::sync::OnceLock;
static BATCH_SIZE_OVERRIDE: OnceLock<Option<usize>> = OnceLock::new();
BATCH_SIZE_OVERRIDE
.get_or_init(|| {
std::env::var("QUERY_BATCH_SIZE")
.ok()
.map(|size| size.parse().expect("QUERY_BATCH_SIZE: not a valid size"))
})
.unwrap_or(self.default_max_bind_values())
}

/// Get the max number of bind parameters for a single query, in Wasm there's no
/// environment, and we omit that knob.
#[cfg(target_arch = "wasm32")]
pub fn max_bind_values(&self) -> usize {
self.default_max_bind_values()
}

/// Get the default max number of bind parameters for a single query.
pub fn default_max_bind_values(&self) -> usize {
match self {
#[cfg(feature = "postgresql")]
SqlFamily::Postgres => 32766,
#[cfg(feature = "mysql")]
SqlFamily::Mysql => 65535,
#[cfg(feature = "sqlite")]
SqlFamily::Sqlite => 999,
#[cfg(feature = "mssql")]
SqlFamily::Mssql => 2099,
}
}

/// Check if a family exists for the given scheme.
pub fn scheme_is_supported(url_scheme: &str) -> bool {
Self::from_scheme(url_scheme).is_some()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,16 @@ macro_rules! retry {
}
}};
}

#[macro_export]
macro_rules! with_id_excess {
($runner:expr, $query_template:expr) => {{
let max_bind_values = $runner
.max_bind_values()
.expect("Test expected to run only for relational databases.");

let cycle = |argn: usize| (argn % 10 + 1).to_string();
let id_list = (0..=max_bind_values).map(cycle).collect::<Vec<_>>().join(",");
$query_template.replace(":id_list:", &id_list)
}};
}
Original file line number Diff line number Diff line change
@@ -1,42 +1,38 @@
use query_engine_tests::*;

#[test_suite(schema(autoinc_id), capabilities(CreateMany, AutoIncrement), exclude(CockroachDb))]
mod not_in_batching {
mod not_in_chunking {
use query_engine_tests::Runner;

#[connector_test(exclude(
CockroachDb,
Postgres("pg.js.wasm"),
Postgres("neon.js.wasm"),
Sqlite("libsql.js.wasm"),
Vitess("planetscale.js.wasm")
))]
#[connector_test(exclude(CockroachDb))]
async fn not_in_batch_filter(runner: Runner) -> TestResult<()> {
runner.query(r#"mutation { createManyTestModel(data: [{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]) { count }}"#).await?.assert_success();

assert_error!(
runner,
"query { findManyTestModel(where: { id: { notIn: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] } }) { id }}",
2029 // QueryParameterLimitExceeded
);
with_id_excess!(
runner,
"query { findManyTestModel(where: { id: { notIn: [:id_list:] } }) { id }}"
),
2029
); // QueryParameterLimitExceeded

Ok(())
}
}

#[test_suite(schema(autoinc_id_cockroachdb), only(CockroachDb))]
mod not_in_batching_cockroachdb {
mod not_in_chunking_cockroachdb {
use query_engine_tests::Runner;

#[connector_test]
async fn not_in_batch_filter(runner: Runner) -> TestResult<()> {
runner.query(r#"mutation { createManyTestModel(data: [{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]) { count }}"#).await?.assert_success();

assert_error!(
runner,
"query { findManyTestModel(where: { id: { notIn: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] } }) { id }}",
2029 // QueryParameterLimitExceeded
);
with_id_excess!(
runner,
"query { findManyTestModel(where: { id: { notIn: [:id_list:] } }) { id }}"
),
2029
); // QueryParameterLimitExceeded

Ok(())
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
mod in_selection_batching;
mod select_one_compound;
mod select_one_singular;
mod transactional_batch;
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
use query_engine_tests::*;

/// Port note: Batch size for testing is now 10 by default, not configurable (look at the direnv).
/// * QUERY_BATCH_SIZE for testing is 10, configured in direnv.
/// * It should be called QUERY_CHUNK_SIZE instead, because it's a knob to configure query chunking
/// which is splitting queries with more arguments than accepted by the database, in multiple
/// queries.
/// * WASM versions of the engine don't allow for runtime configuration of this value so they default
/// the mininum supported by any database on a SQL family (eg. Postgres, MySQL, SQLite, SQL Server,
/// etc.) As such, in order to guarantee chunking happens, a large number of arguments --larger
/// than the default-- needs to be used, to have actual coverage of chunking code while exercising
/// WASM query engines.
#[test_suite(schema(schema))]
mod isb {
mod chunking {
use indoc::indoc;
use query_engine_tests::{assert_error, run_query};

Expand Down Expand Up @@ -34,8 +42,8 @@ mod isb {
schema.to_owned()
}

// "batching of IN queries" should "work when having more than the specified amount of items"
// TODO(joins): Excluded because we have no support for batched queries with joins. In practice, it should happen under much less circumstances
// "chunking of IN queries" should "work when having more than the specified amount of items"
// TODO(joins): Excluded because we have no support for chunked queries with joins. In practice, it should happen under much less circumstances
// TODO(joins): than with the query-based strategy, because we don't issue `WHERE IN (parent_ids)` queries anymore to resolve relations.
#[connector_test(exclude_features("relationJoins"))]
async fn in_more_items(runner: Runner) -> TestResult<()> {
Expand All @@ -52,8 +60,8 @@ mod isb {
Ok(())
}

// "ascending ordering of batched IN queries" should "work when having more than the specified amount of items"
// TODO(joins): Excluded because we have no support for batched queries with joins. In practice, it should happen under much less circumstances
// "ascending ordering of chunked IN queries" should "work when having more than the specified amount of items"
// TODO(joins): Excluded because we have no support for chunked queries with joins. In practice, it should happen under much less circumstances
// TODO(joins): than with the query-based strategy, because we don't issue `WHERE IN (parent_ids)` queries anymore to resolve relations.
#[connector_test(exclude_features("relationJoins"))]
async fn asc_in_ordering(runner: Runner) -> TestResult<()> {
Expand All @@ -70,8 +78,8 @@ mod isb {
Ok(())
}

// "ascending ordering of batched IN queries" should "work when having more than the specified amount of items"
// TODO(joins): Excluded because we have no support for batched queries with joins. In practice, it should happen under much less circumstances
// "ascending ordering of chunked IN queries" should "work when having more than the specified amount of items"
// TODO(joins): Excluded because we have no support for chunked queries with joins. In practice, it should happen under much less circumstances
// TODO(joins): than with the query-based strategy, because we don't issue `WHERE IN (parent_ids)` queries anymore to resolve relations.
#[connector_test(exclude_features("relationJoins"))]
async fn desc_in_ordering(runner: Runner) -> TestResult<()> {
Expand All @@ -88,45 +96,29 @@ mod isb {
Ok(())
}

#[connector_test(exclude(
MongoDb,
Postgres("pg.js.wasm"),
Postgres("neon.js.wasm"),
Sqlite("libsql.js.wasm"),
Vitess("planetscale.js.wasm")
))]
#[connector_test(exclude(MongoDb))]
async fn order_by_aggregation_should_fail(runner: Runner) -> TestResult<()> {
create_test_data(&runner).await?;

assert_error!(
runner,
r#"query {
findManyA(where: {id: { in: [5,4,3,2,1,1,1,2,3,4,5,6,7,6,5,4,3,2,1,2,3,4,5,6] }}, orderBy: { b: { as: { _count: asc } } }) { id }
}"#,
with_id_excess!(&runner, "query { findManyA(where: {id: { in: [:id_list:] }}, orderBy: { b: { as: { _count: asc } } } ) { id } }"),
2029 // QueryParameterLimitExceeded
);

Ok(())
}

#[connector_test(
capabilities(FullTextSearchWithoutIndex),
exclude(
MongoDb,
Postgres("pg.js.wasm"),
Postgres("neon.js.wasm"),
Sqlite("libsql.js.wasm"),
Vitess("planetscale.js.wasm")
)
)]
#[connector_test(capabilities(FullTextSearchWithoutIndex), exclude(MongoDb))]
async fn order_by_relevance_should_fail(runner: Runner) -> TestResult<()> {
create_test_data(&runner).await?;

assert_error!(
runner,
r#"query {
findManyA(where: {id: { in: [5,4,3,2,1,1,1,2,3,4,5,6,7,6,5,4,3,2,1,2,3,4,5,6] }}, orderBy: { _relevance: { fields: text, search: "something", sort: asc } }) { id }
}"#,
with_id_excess!(
&runner,
r#"query { findManyA(where: {id: { in: [:id_list:] }}, orderBy: { _relevance: { fields: text, search: "something", sort: asc } } ) { id } }"#
),
2029 // QueryParameterLimitExceeded
);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod aggregation;
mod batch;
mod batching;
mod chunking;
mod data_types;
mod distinct;
mod filters;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub(crate) use vitess::*;

use crate::{datamodel_rendering::DatamodelRenderer, BoxFuture, TestConfig, TestError, CONFIG};
use psl::datamodel_connector::ConnectorCapabilities;
use quaint::prelude::SqlFamily;
use std::{convert::TryFrom, fmt};

pub trait ConnectorTagInterface {
Expand Down Expand Up @@ -276,6 +277,55 @@ impl ConnectorVersion {
| (_, Postgres(..)) => false,
}
}

/// The maximum number of rows allowed in a single insert query.
///
/// max_bind_values is overriden by the QUERY_BATCH_SIZE env var in targets other than WASM.
///
/// Connectors which underyling implementation is WASM don't have any max_bind_values override
/// as there's no such thing as runtime environment.
///
/// From the PoV of the test binary, the target architecture is that of where the test runs,
/// generally x86_64, or aarch64, etc.
///
/// As a consequence there is an mismatch between the the max_bind_values as seen by the test
/// binary (overriden by the QUERY_BATCH_SIZE env var) and the max_bind_values as seen by the
/// WASM engine being exercised in those tests, through the RunnerExecutor::External test runner.
///
/// What we do in here, is returning the number of max_bind_values hat the connector under test
/// will use. i.e. if it's a WASM connector, the default, not overridable one. Otherwise the one
/// as seen by the test binary (which will be the same as the engine exercised)
pub fn max_bind_values(&self) -> Option<usize> {
if self.is_wasm() {
self.sql_family().map(|f| f.default_max_bind_values())
} else {
self.sql_family().map(|f| f.max_bind_values())
}
}

/// SQL family for the connector
fn sql_family(&self) -> Option<SqlFamily> {
match self {
Self::SqlServer(_) => Some(SqlFamily::Mssql),
Self::Postgres(_) => Some(SqlFamily::Postgres),
Self::MySql(_) => Some(SqlFamily::Mysql),
Self::Sqlite(_) => Some(SqlFamily::Sqlite),
Self::CockroachDb(_) => Some(SqlFamily::Postgres),
Self::Vitess(_) => Some(SqlFamily::Mysql),
_ => None,
}
}

/// Determines if the connector uses a driver adapter implemented in Wasm
fn is_wasm(&self) -> bool {
matches!(
self,
Self::Postgres(Some(PostgresVersion::PgJsWasm))
| Self::Postgres(Some(PostgresVersion::NeonJsWasm))
| Self::Vitess(Some(VitessVersion::PlanetscaleJsWasm))
| Self::Sqlite(Some(SqliteVersion::LibsqlJsWasm))
)
}
}

impl fmt::Display for ConnectorVersion {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ impl Runner {
self.query_schema.internal_data_model.schema.db.source()
}

pub fn max_bind_values(&self) -> Option<usize> {
self.connector_version().max_bind_values()
}

pub async fn load(
datamodel: String,
db_schemas: &[&str],
Expand Down
35 changes: 8 additions & 27 deletions query-engine/connectors/sql-query-connector/src/context.rs
Original file line number Diff line number Diff line change
@@ -1,50 +1,31 @@
use quaint::{connector::SqlFamily, prelude::ConnectionInfo};
use quaint::prelude::ConnectionInfo;

pub(super) struct Context<'a> {
connection_info: &'a ConnectionInfo,
pub(crate) trace_id: Option<&'a str>,
/// Maximum rows allowed at once for an insert query.
/// None is unlimited.
pub(crate) max_rows: Option<usize>,
pub(crate) max_insert_rows: Option<usize>,
/// Maximum number of bind parameters allowed for a single query.
/// None is unlimited.
pub(crate) max_bind_values: Option<usize>,
}

impl<'a> Context<'a> {
pub(crate) fn new(connection_info: &'a ConnectionInfo, trace_id: Option<&'a str>) -> Self {
let (max_rows, default_batch_size) = match connection_info.sql_family() {
SqlFamily::Postgres => (None, 32766),
// See https://stackoverflow.com/a/11131824/788562
SqlFamily::Mysql => (None, 65535),
SqlFamily::Mssql => (Some(1000), 2099),
SqlFamily::Sqlite => (Some(999), 999),
};
let sql_family = connection_info.sql_family();
let max_insert_rows = sql_family.max_insert_rows();
let max_bind_values = sql_family.max_bind_values();

Context {
connection_info,
trace_id,
max_rows,
max_bind_values: get_batch_size(default_batch_size),
max_insert_rows,
max_bind_values: Some(max_bind_values),
}
}

pub(crate) fn schema_name(&self) -> &str {
self.connection_info.schema_name()
}
}

fn get_batch_size(default: usize) -> Option<usize> {
use once_cell::sync::Lazy;

/// Overrides the default number of allowed elements in query's `IN` or `NOT IN`
/// statement for the currently loaded connector.
/// Certain databases error out if querying with too many items. For test
/// purposes, this value can be set with the `QUERY_BATCH_SIZE` environment
/// value to a smaller number.
static BATCH_SIZE_OVERRIDE: Lazy<Option<usize>> = Lazy::new(|| {
std::env::var("QUERY_BATCH_SIZE")
.ok()
.map(|size| size.parse().expect("QUERY_BATCH_SIZE: not a valid size"))
});
(*BATCH_SIZE_OVERRIDE).or(Some(default))
}
Loading

0 comments on commit e5078ed

Please sign in to comment.