[wasm-qe] fix chunking tests (#4656)

Co-authored-by: Alexey Orlenko <[email protected]>
prisma · Jan 22, 2024 · e5078ed · e5078ed
1 parent 0e77726
commit e5078ed
Show file tree

Hide file tree

Showing 14 changed files with 169 additions and 80 deletions.
diff --git a/Makefile b/Makefile
@@ -3,7 +3,10 @@ CONFIG_FILE = .test_config
 SCHEMA_EXAMPLES_PATH = ./query-engine/example_schemas
 DEV_SCHEMA_FILE = dev_datamodel.prisma
 DRIVER_ADAPTERS_BRANCH ?= main
+
+ifndef DISABLE_NIX
 NIX := $(shell type nix 2> /dev/null)
+endif
 
 LIBRARY_EXT := $(shell                            \
     case "$$(uname -s)" in                        \

diff --git a/quaint/src/connector/connection_info.rs b/quaint/src/connector/connection_info.rs
@@ -298,6 +298,56 @@ impl SqlFamily {
         }
     }
 
+    /// Get the default max rows for a batch insert.
+    pub fn max_insert_rows(&self) -> Option<usize> {
+        match self {
+            #[cfg(feature = "postgresql")]
+            SqlFamily::Postgres => None,
+            #[cfg(feature = "mysql")]
+            SqlFamily::Mysql => None,
+            #[cfg(feature = "sqlite")]
+            SqlFamily::Sqlite => Some(999),
+            #[cfg(feature = "mssql")]
+            SqlFamily::Mssql => Some(1000),
+        }
+    }
+
+    /// Get the max number of bind parameters for a single query, which in targets other
+    /// than Wasm can be controlled with the env var QUERY_BATCH_SIZE.
+    #[cfg(not(target_arch = "wasm32"))]
+    pub fn max_bind_values(&self) -> usize {
+        use std::sync::OnceLock;
+        static BATCH_SIZE_OVERRIDE: OnceLock<Option<usize>> = OnceLock::new();
+        BATCH_SIZE_OVERRIDE
+            .get_or_init(|| {
+                std::env::var("QUERY_BATCH_SIZE")
+                    .ok()
+                    .map(|size| size.parse().expect("QUERY_BATCH_SIZE: not a valid size"))
+            })
+            .unwrap_or(self.default_max_bind_values())
+    }
+
+    /// Get the max number of bind parameters for a single query, in Wasm there's no
+    /// environment, and we omit that knob.
+    #[cfg(target_arch = "wasm32")]
+    pub fn max_bind_values(&self) -> usize {
+        self.default_max_bind_values()
+    }
+
+    /// Get the default max number of bind parameters for a single query.
+    pub fn default_max_bind_values(&self) -> usize {
+        match self {
+            #[cfg(feature = "postgresql")]
+            SqlFamily::Postgres => 32766,
+            #[cfg(feature = "mysql")]
+            SqlFamily::Mysql => 65535,
+            #[cfg(feature = "sqlite")]
+            SqlFamily::Sqlite => 999,
+            #[cfg(feature = "mssql")]
+            SqlFamily::Mssql => 2099,
+        }
+    }
+
     /// Check if a family exists for the given scheme.
     pub fn scheme_is_supported(url_scheme: &str) -> bool {
         Self::from_scheme(url_scheme).is_some()

diff --git a/query-engine/connector-test-kit-rs/query-engine-tests/src/utils/querying.rs b/query-engine/connector-test-kit-rs/query-engine-tests/src/utils/querying.rs
@@ -115,3 +115,16 @@ macro_rules! retry {
         }
     }};
 }
+
+#[macro_export]
+macro_rules! with_id_excess {
+    ($runner:expr, $query_template:expr) => {{
+        let max_bind_values = $runner
+            .max_bind_values()
+            .expect("Test expected to run only for relational databases.");
+
+        let cycle = |argn: usize| (argn % 10 + 1).to_string();
+        let id_list = (0..=max_bind_values).map(cycle).collect::<Vec<_>>().join(",");
+        $query_template.replace(":id_list:", &id_list)
+    }};
+}
diff --git a/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/prisma_7434.rs b/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/regressions/prisma_7434.rs
@@ -1,42 +1,38 @@
 use query_engine_tests::*;
 
 #[test_suite(schema(autoinc_id), capabilities(CreateMany, AutoIncrement), exclude(CockroachDb))]
-mod not_in_batching {
+mod not_in_chunking {
     use query_engine_tests::Runner;
 
-    #[connector_test(exclude(
-        CockroachDb,
-        Postgres("pg.js.wasm"),
-        Postgres("neon.js.wasm"),
-        Sqlite("libsql.js.wasm"),
-        Vitess("planetscale.js.wasm")
-    ))]
+    #[connector_test(exclude(CockroachDb))]
     async fn not_in_batch_filter(runner: Runner) -> TestResult<()> {
-        runner.query(r#"mutation { createManyTestModel(data: [{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]) { count }}"#).await?.assert_success();
-
         assert_error!(
             runner,
-            "query { findManyTestModel(where: { id: { notIn: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] } }) { id }}",
-            2029 // QueryParameterLimitExceeded
-        );
+            with_id_excess!(
+                runner,
+                "query { findManyTestModel(where: { id: { notIn: [:id_list:] } }) { id }}"
+            ),
+            2029
+        ); // QueryParameterLimitExceeded
 
         Ok(())
     }
 }
 
 #[test_suite(schema(autoinc_id_cockroachdb), only(CockroachDb))]
-mod not_in_batching_cockroachdb {
+mod not_in_chunking_cockroachdb {
     use query_engine_tests::Runner;
 
     #[connector_test]
     async fn not_in_batch_filter(runner: Runner) -> TestResult<()> {
-        runner.query(r#"mutation { createManyTestModel(data: [{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]) { count }}"#).await?.assert_success();
-
         assert_error!(
             runner,
-            "query { findManyTestModel(where: { id: { notIn: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] } }) { id }}",
-            2029 // QueryParameterLimitExceeded
-        );
+            with_id_excess!(
+                runner,
+                "query { findManyTestModel(where: { id: { notIn: [:id_list:] } }) { id }}"
+            ),
+            2029
+        ); // QueryParameterLimitExceeded
 
         Ok(())
     }

diff --git a/...y-engine-tests/tests/queries/batch/mod.rs → ...ngine-tests/tests/queries/batching/mod.rs b/...y-engine-tests/tests/queries/batch/mod.rs → ...ngine-tests/tests/queries/batching/mod.rs
@@ -1,4 +1,3 @@
-mod in_selection_batching;
 mod select_one_compound;
 mod select_one_singular;
 mod transactional_batch;
diff --git a/...ests/queries/batch/select_one_compound.rs → ...s/queries/batching/select_one_compound.rs b/...ests/queries/batch/select_one_compound.rs → ...s/queries/batching/select_one_compound.rs
diff --git a/...ests/queries/batch/select_one_singular.rs → ...s/queries/batching/select_one_singular.rs b/...ests/queries/batch/select_one_singular.rs → ...s/queries/batching/select_one_singular.rs
diff --git a/...ests/queries/batch/transactional_batch.rs → ...s/queries/batching/transactional_batch.rs b/...ests/queries/batch/transactional_batch.rs → ...s/queries/batching/transactional_batch.rs
diff --git a/...ts/queries/batch/in_selection_batching.rs → ...ry-engine-tests/tests/queries/chunking.rs b/...ts/queries/batch/in_selection_batching.rs → ...ry-engine-tests/tests/queries/chunking.rs
@@ -1,8 +1,16 @@
 use query_engine_tests::*;
 
-/// Port note: Batch size for testing is now 10 by default, not configurable (look at the direnv).
+/// * QUERY_BATCH_SIZE for testing is 10, configured in direnv.
+/// * It should be called QUERY_CHUNK_SIZE instead, because it's a knob to configure query chunking
+///  which is splitting queries with more arguments than accepted by the database, in multiple
+///  queries.
+/// * WASM versions of the engine don't allow for runtime configuration of this value so they default
+///  the mininum supported by any database on a SQL family (eg. Postgres, MySQL, SQLite, SQL Server,
+///  etc.) As such, in order to guarantee chunking happens, a large number of arguments --larger
+///  than the default-- needs to be used, to have actual coverage of chunking code while exercising
+///  WASM query engines.
 #[test_suite(schema(schema))]
-mod isb {
+mod chunking {
     use indoc::indoc;
     use query_engine_tests::{assert_error, run_query};
 
@@ -34,8 +42,8 @@ mod isb {
         schema.to_owned()
     }
 
-    // "batching of IN queries" should "work when having more than the specified amount of items"
-    // TODO(joins): Excluded because we have no support for batched queries with joins. In practice, it should happen under much less circumstances
+    // "chunking of IN queries" should "work when having more than the specified amount of items"
+    // TODO(joins): Excluded because we have no support for chunked queries with joins. In practice, it should happen under much less circumstances
     // TODO(joins): than with the query-based strategy, because we don't issue `WHERE IN (parent_ids)` queries anymore to resolve relations.
     #[connector_test(exclude_features("relationJoins"))]
     async fn in_more_items(runner: Runner) -> TestResult<()> {
@@ -52,8 +60,8 @@ mod isb {
         Ok(())
     }
 
-    // "ascending ordering of batched IN queries" should "work when having more than the specified amount of items"
-    // TODO(joins): Excluded because we have no support for batched queries with joins. In practice, it should happen under much less circumstances
+    // "ascending ordering of chunked IN queries" should "work when having more than the specified amount of items"
+    // TODO(joins): Excluded because we have no support for chunked queries with joins. In practice, it should happen under much less circumstances
     // TODO(joins): than with the query-based strategy, because we don't issue `WHERE IN (parent_ids)` queries anymore to resolve relations.
     #[connector_test(exclude_features("relationJoins"))]
     async fn asc_in_ordering(runner: Runner) -> TestResult<()> {
@@ -70,8 +78,8 @@ mod isb {
         Ok(())
     }
 
-    // "ascending ordering of batched IN queries" should "work when having more than the specified amount of items"
-    // TODO(joins): Excluded because we have no support for batched queries with joins. In practice, it should happen under much less circumstances
+    // "ascending ordering of chunked IN queries" should "work when having more than the specified amount of items"
+    // TODO(joins): Excluded because we have no support for chunked queries with joins. In practice, it should happen under much less circumstances
     // TODO(joins): than with the query-based strategy, because we don't issue `WHERE IN (parent_ids)` queries anymore to resolve relations.
     #[connector_test(exclude_features("relationJoins"))]
     async fn desc_in_ordering(runner: Runner) -> TestResult<()> {
@@ -88,45 +96,29 @@ mod isb {
         Ok(())
     }
 
-    #[connector_test(exclude(
-        MongoDb,
-        Postgres("pg.js.wasm"),
-        Postgres("neon.js.wasm"),
-        Sqlite("libsql.js.wasm"),
-        Vitess("planetscale.js.wasm")
-    ))]
+    #[connector_test(exclude(MongoDb))]
     async fn order_by_aggregation_should_fail(runner: Runner) -> TestResult<()> {
         create_test_data(&runner).await?;
 
         assert_error!(
             runner,
-            r#"query {
-              findManyA(where: {id: { in: [5,4,3,2,1,1,1,2,3,4,5,6,7,6,5,4,3,2,1,2,3,4,5,6] }}, orderBy: { b: { as: { _count: asc } } }) { id }
-            }"#,
+            with_id_excess!(&runner, "query { findManyA(where: {id: { in: [:id_list:] }}, orderBy: { b: { as: { _count: asc } } } ) { id } }"),
             2029 // QueryParameterLimitExceeded
         );
 
         Ok(())
     }
 
-    #[connector_test(
-        capabilities(FullTextSearchWithoutIndex),
-        exclude(
-            MongoDb,
-            Postgres("pg.js.wasm"),
-            Postgres("neon.js.wasm"),
-            Sqlite("libsql.js.wasm"),
-            Vitess("planetscale.js.wasm")
-        )
-    )]
+    #[connector_test(capabilities(FullTextSearchWithoutIndex), exclude(MongoDb))]
     async fn order_by_relevance_should_fail(runner: Runner) -> TestResult<()> {
         create_test_data(&runner).await?;
 
         assert_error!(
             runner,
-            r#"query {
-              findManyA(where: {id: { in: [5,4,3,2,1,1,1,2,3,4,5,6,7,6,5,4,3,2,1,2,3,4,5,6] }}, orderBy: { _relevance: { fields: text, search: "something", sort: asc } }) { id }
-            }"#,
+            with_id_excess!(
+                &runner,
+                r#"query { findManyA(where: {id: { in: [:id_list:] }}, orderBy: { _relevance: { fields: text, search: "something", sort: asc } } ) { id } }"#
+            ),
             2029 // QueryParameterLimitExceeded
         );
 

diff --git a/query-engine/connector-test-kit-rs/query-engine-tests/tests/queries/mod.rs b/query-engine/connector-test-kit-rs/query-engine-tests/tests/queries/mod.rs
@@ -1,5 +1,6 @@
 mod aggregation;
-mod batch;
+mod batching;
+mod chunking;
 mod data_types;
 mod distinct;
 mod filters;

diff --git a/query-engine/connector-test-kit-rs/query-tests-setup/src/connector_tag/mod.rs b/query-engine/connector-test-kit-rs/query-tests-setup/src/connector_tag/mod.rs
@@ -20,6 +20,7 @@ pub(crate) use vitess::*;
 
 use crate::{datamodel_rendering::DatamodelRenderer, BoxFuture, TestConfig, TestError, CONFIG};
 use psl::datamodel_connector::ConnectorCapabilities;
+use quaint::prelude::SqlFamily;
 use std::{convert::TryFrom, fmt};
 
 pub trait ConnectorTagInterface {
@@ -276,6 +277,55 @@ impl ConnectorVersion {
             | (_, Postgres(..)) => false,
         }
     }
+
+    /// The maximum number of rows allowed in a single insert query.
+    ///
+    /// max_bind_values is overriden by the QUERY_BATCH_SIZE env var in targets other than WASM.
+    ///
+    /// Connectors which underyling implementation is WASM don't have any max_bind_values override
+    /// as there's no such thing as runtime environment.
+    ///
+    /// From the PoV of the test binary, the target architecture is that of where the test runs,
+    /// generally x86_64, or aarch64, etc.
+    ///
+    /// As a consequence there is an mismatch between the the max_bind_values as seen by the test
+    /// binary (overriden by the QUERY_BATCH_SIZE env var) and the max_bind_values as seen by the
+    /// WASM engine being exercised in those tests, through the RunnerExecutor::External test runner.
+    ///
+    /// What we do in here, is returning the number of max_bind_values hat the connector under test
+    /// will use. i.e. if it's a WASM connector, the default, not overridable one. Otherwise the one
+    /// as seen by the test binary (which will be the same as the engine exercised)
+    pub fn max_bind_values(&self) -> Option<usize> {
+        if self.is_wasm() {
+            self.sql_family().map(|f| f.default_max_bind_values())
+        } else {
+            self.sql_family().map(|f| f.max_bind_values())
+        }
+    }
+
+    /// SQL family for the connector
+    fn sql_family(&self) -> Option<SqlFamily> {
+        match self {
+            Self::SqlServer(_) => Some(SqlFamily::Mssql),
+            Self::Postgres(_) => Some(SqlFamily::Postgres),
+            Self::MySql(_) => Some(SqlFamily::Mysql),
+            Self::Sqlite(_) => Some(SqlFamily::Sqlite),
+            Self::CockroachDb(_) => Some(SqlFamily::Postgres),
+            Self::Vitess(_) => Some(SqlFamily::Mysql),
+            _ => None,
+        }
+    }
+
+    /// Determines if the connector uses a driver adapter implemented in Wasm
+    fn is_wasm(&self) -> bool {
+        matches!(
+            self,
+            Self::Postgres(Some(PostgresVersion::PgJsWasm))
+                | Self::Postgres(Some(PostgresVersion::NeonJsWasm))
+                | Self::Vitess(Some(VitessVersion::PlanetscaleJsWasm))
+                | Self::Sqlite(Some(SqliteVersion::LibsqlJsWasm))
+        )
+    }
 }
 
 impl fmt::Display for ConnectorVersion {

diff --git a/query-engine/connector-test-kit-rs/query-tests-setup/src/runner/mod.rs b/query-engine/connector-test-kit-rs/query-tests-setup/src/runner/mod.rs
@@ -107,6 +107,10 @@ impl Runner {
         self.query_schema.internal_data_model.schema.db.source()
     }
 
+    pub fn max_bind_values(&self) -> Option<usize> {
+        self.connector_version().max_bind_values()
+    }
+
     pub async fn load(
         datamodel: String,
         db_schemas: &[&str],

diff --git a/query-engine/connectors/sql-query-connector/src/context.rs b/query-engine/connectors/sql-query-connector/src/context.rs
@@ -1,50 +1,31 @@
-use quaint::{connector::SqlFamily, prelude::ConnectionInfo};
+use quaint::prelude::ConnectionInfo;
 
 pub(super) struct Context<'a> {
     connection_info: &'a ConnectionInfo,
     pub(crate) trace_id: Option<&'a str>,
     /// Maximum rows allowed at once for an insert query.
     /// None is unlimited.
-    pub(crate) max_rows: Option<usize>,
+    pub(crate) max_insert_rows: Option<usize>,
     /// Maximum number of bind parameters allowed for a single query.
     /// None is unlimited.
     pub(crate) max_bind_values: Option<usize>,
 }
 
 impl<'a> Context<'a> {
     pub(crate) fn new(connection_info: &'a ConnectionInfo, trace_id: Option<&'a str>) -> Self {
-        let (max_rows, default_batch_size) = match connection_info.sql_family() {
-            SqlFamily::Postgres => (None, 32766),
-            // See https://stackoverflow.com/a/11131824/788562
-            SqlFamily::Mysql => (None, 65535),
-            SqlFamily::Mssql => (Some(1000), 2099),
-            SqlFamily::Sqlite => (Some(999), 999),
-        };
+        let sql_family = connection_info.sql_family();
+        let max_insert_rows = sql_family.max_insert_rows();
+        let max_bind_values = sql_family.max_bind_values();
+
         Context {
             connection_info,
             trace_id,
-            max_rows,
-            max_bind_values: get_batch_size(default_batch_size),
+            max_insert_rows,
+            max_bind_values: Some(max_bind_values),
         }
     }
 
     pub(crate) fn schema_name(&self) -> &str {
         self.connection_info.schema_name()
     }
 }
-
-fn get_batch_size(default: usize) -> Option<usize> {
-    use once_cell::sync::Lazy;
-
-    /// Overrides the default number of allowed elements in query's `IN` or `NOT IN`
-    /// statement for the currently loaded connector.
-    /// Certain databases error out if querying with too many items. For test
-    /// purposes, this value can be set with the `QUERY_BATCH_SIZE` environment
-    /// value to a smaller number.
-    static BATCH_SIZE_OVERRIDE: Lazy<Option<usize>> = Lazy::new(|| {
-        std::env::var("QUERY_BATCH_SIZE")
-            .ok()
-            .map(|size| size.parse().expect("QUERY_BATCH_SIZE: not a valid size"))
-    });
-    (*BATCH_SIZE_OVERRIDE).or(Some(default))
-}