From aa413b54d4893760c84635debe7d5ea6e54326cb Mon Sep 17 00:00:00 2001 From: Nikita Lapkov <5737185+laplab@users.noreply.github.com> Date: Fri, 20 Sep 2024 17:13:37 +0200 Subject: [PATCH] Reapply "fix: remove actor system for transactions (#4940)" (#4975) This reverts commit e1242e1910f44c410ae5acb7f79cd5024ae2e8cc. --- Cargo.lock | 41 ++ libs/crosstarget-utils/src/native/time.rs | 1 + libs/crosstarget-utils/src/wasm/time.rs | 1 + libs/query-engine-common/Cargo.toml | 1 + libs/query-engine-common/src/tracer.rs | 1 - libs/telemetry/Cargo.toml | 32 ++ .../telemetry/src}/capturing/capturer.rs | 14 + .../telemetry/src}/capturing/helpers.rs | 0 .../telemetry/src}/capturing/mod.rs | 166 ++++--- .../telemetry/src}/capturing/settings.rs | 0 .../telemetry/src}/capturing/storage.rs | 2 +- libs/telemetry/src/helpers.rs | 141 ++++++ .../mod.rs => libs/telemetry/src/lib.rs | 0 .../telemetry/src}/models.rs | 0 .../tests/new/interactive_tx.rs | 4 + .../query-tests-setup/Cargo.toml | 1 + .../query-tests-setup/src/logging.rs | 2 +- .../mongodb-query-connector/Cargo.toml | 3 + .../src/interface/connection.rs | 29 +- .../src/interface/transaction.rs | 29 +- .../src/interface/utils.rs | 14 +- .../connectors/query-connector/Cargo.toml | 1 + .../query-connector/src/interface.rs | 29 +- .../connectors/sql-query-connector/Cargo.toml | 3 + .../sql-query-connector/src/context.rs | 7 +- .../src/database/connection.rs | 57 +-- .../src/database/operations/write.rs | 2 +- .../src/database/transaction.rs | 57 +-- .../src/query_builder/read.rs | 10 +- .../src/query_builder/write.rs | 14 +- .../sql-query-connector/src/query_ext.rs | 8 +- .../sql-query-connector/src/sql_trace.rs | 20 +- query-engine/core/Cargo.toml | 2 + .../core/src/executor/execute_operation.rs | 53 +-- .../src/executor/interpreting_executor.rs | 101 ++--- query-engine/core/src/executor/mod.rs | 7 +- query-engine/core/src/executor/pipeline.rs | 5 +- .../interactive_transactions/actor_manager.rs | 160 ------- .../src/interactive_transactions/actors.rs | 425 ------------------ .../src/interactive_transactions/manager.rs | 190 ++++++++ .../src/interactive_transactions/messages.rs | 46 -- .../core/src/interactive_transactions/mod.rs | 157 +++---- .../interactive_transactions/transaction.rs | 252 +++++++++++ .../core/src/interpreter/interpreter_impl.rs | 19 +- .../query_interpreters/nested_read.rs | 11 +- .../interpreter/query_interpreters/read.rs | 39 +- .../interpreter/query_interpreters/write.rs | 77 ++-- query-engine/core/src/lib.rs | 2 - .../core/src/telemetry/capturing/tx_ext.rs | 88 ---- query-engine/core/src/telemetry/helpers.rs | 128 ------ query-engine/query-engine-c-abi/Cargo.toml | 1 + query-engine/query-engine-c-abi/src/engine.rs | 42 +- query-engine/query-engine-c-abi/src/logger.rs | 2 +- query-engine/query-engine-node-api/Cargo.toml | 1 + .../query-engine-node-api/src/engine.rs | 68 +-- .../query-engine-node-api/src/logger.rs | 2 +- query-engine/query-engine-wasm/Cargo.toml | 1 + .../query-engine-wasm/src/wasm/engine.rs | 43 +- query-engine/query-engine/Cargo.toml | 1 + query-engine/query-engine/src/logger.rs | 1 - query-engine/query-engine/src/server/mod.rs | 289 ++++-------- query-engine/query-engine/src/tracer.rs | 2 +- query-engine/request-handlers/Cargo.toml | 1 + query-engine/request-handlers/src/handler.rs | 35 +- 64 files changed, 1358 insertions(+), 1583 deletions(-) create mode 100644 libs/telemetry/Cargo.toml rename {query-engine/core/src/telemetry => libs/telemetry/src}/capturing/capturer.rs (96%) rename {query-engine/core/src/telemetry => libs/telemetry/src}/capturing/helpers.rs (100%) rename {query-engine/core/src/telemetry => libs/telemetry/src}/capturing/mod.rs (77%) rename {query-engine/core/src/telemetry => libs/telemetry/src}/capturing/settings.rs (100%) rename {query-engine/core/src/telemetry => libs/telemetry/src}/capturing/storage.rs (92%) create mode 100644 libs/telemetry/src/helpers.rs rename query-engine/core/src/telemetry/mod.rs => libs/telemetry/src/lib.rs (100%) rename {query-engine/core/src/telemetry => libs/telemetry/src}/models.rs (100%) delete mode 100644 query-engine/core/src/interactive_transactions/actor_manager.rs delete mode 100644 query-engine/core/src/interactive_transactions/actors.rs create mode 100644 query-engine/core/src/interactive_transactions/manager.rs delete mode 100644 query-engine/core/src/interactive_transactions/messages.rs create mode 100644 query-engine/core/src/interactive_transactions/transaction.rs delete mode 100644 query-engine/core/src/telemetry/capturing/tx_ext.rs delete mode 100644 query-engine/core/src/telemetry/helpers.rs diff --git a/Cargo.lock b/Cargo.lock index 62a21959bf0f..048987649094 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2688,6 +2688,7 @@ dependencies = [ "regex", "serde", "serde_json", + "telemetry", "thiserror", "tokio", "tracing", @@ -3871,6 +3872,7 @@ dependencies = [ "query-structure", "serde", "serde_json", + "telemetry", "thiserror", "user-facing-errors", "uuid", @@ -3887,6 +3889,7 @@ dependencies = [ "crossbeam-channel", "crosstarget-utils", "cuid", + "derive_more", "enumflags2", "futures", "indexmap 2.2.2", @@ -3902,6 +3905,7 @@ dependencies = [ "schema", "serde", "serde_json", + "telemetry", "thiserror", "tokio", "tracing", @@ -3939,6 +3943,7 @@ dependencies = [ "serial_test", "sql-query-connector", "structopt", + "telemetry", "thiserror", "tokio", "tracing", @@ -3973,6 +3978,7 @@ dependencies = [ "serde", "serde_json", "sql-query-connector", + "telemetry", "thiserror", "tokio", "tracing", @@ -3998,6 +4004,7 @@ dependencies = [ "query-engine-metrics", "serde", "serde_json", + "telemetry", "thiserror", "tracing", "tracing-futures", @@ -4052,6 +4059,7 @@ dependencies = [ "serde", "serde_json", "sql-query-connector", + "telemetry", "thiserror", "tokio", "tracing", @@ -4113,6 +4121,7 @@ dependencies = [ "serde-wasm-bindgen", "serde_json", "sql-query-connector", + "telemetry", "thiserror", "tokio", "tracing", @@ -4182,6 +4191,7 @@ dependencies = [ "serde_json", "sql-query-connector", "strip-ansi-escapes", + "telemetry", "thiserror", "tokio", "tracing", @@ -4466,6 +4476,7 @@ dependencies = [ "serde", "serde_json", "sql-query-connector", + "telemetry", "thiserror", "tracing", "url", @@ -5273,6 +5284,7 @@ dependencies = [ "rand 0.8.5", "serde", "serde_json", + "telemetry", "thiserror", "tokio", "tracing", @@ -5553,6 +5565,35 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "telemetry" +version = "0.1.0" +dependencies = [ + "async-trait", + "crossbeam-channel", + "crosstarget-utils", + "cuid", + "derive_more", + "enumflags2", + "futures", + "indexmap 2.2.2", + "itertools 0.12.0", + "lru 0.7.8", + "once_cell", + "opentelemetry", + "psl", + "query-engine-metrics", + "serde", + "serde_json", + "thiserror", + "tokio", + "tracing", + "tracing-futures", + "tracing-opentelemetry", + "tracing-subscriber", + "uuid", +] + [[package]] name = "tempfile" version = "3.7.1" diff --git a/libs/crosstarget-utils/src/native/time.rs b/libs/crosstarget-utils/src/native/time.rs index 3b154a27565c..9dfb31e412e3 100644 --- a/libs/crosstarget-utils/src/native/time.rs +++ b/libs/crosstarget-utils/src/native/time.rs @@ -5,6 +5,7 @@ use std::{ use crate::common::TimeoutError; +#[derive(Clone, Copy)] pub struct ElapsedTimeCounter { instant: Instant, } diff --git a/libs/crosstarget-utils/src/wasm/time.rs b/libs/crosstarget-utils/src/wasm/time.rs index 18f3394b7464..02227299d6e7 100644 --- a/libs/crosstarget-utils/src/wasm/time.rs +++ b/libs/crosstarget-utils/src/wasm/time.rs @@ -21,6 +21,7 @@ extern "C" { } +#[derive(Clone, Copy)] pub struct ElapsedTimeCounter { start_time: f64, } diff --git a/libs/query-engine-common/Cargo.toml b/libs/query-engine-common/Cargo.toml index daf41ba50f66..cde34f7371e9 100644 --- a/libs/query-engine-common/Cargo.toml +++ b/libs/query-engine-common/Cargo.toml @@ -8,6 +8,7 @@ thiserror = "1" url.workspace = true query-connector = { path = "../../query-engine/connectors/query-connector" } query-core = { path = "../../query-engine/core" } +telemetry = { path = "../telemetry" } user-facing-errors = { path = "../user-facing-errors" } serde_json.workspace = true serde.workspace = true diff --git a/libs/query-engine-common/src/tracer.rs b/libs/query-engine-common/src/tracer.rs index 19d17cf13a05..256ec95c172f 100644 --- a/libs/query-engine-common/src/tracer.rs +++ b/libs/query-engine-common/src/tracer.rs @@ -8,7 +8,6 @@ use opentelemetry::{ }, trace::{TraceError, TracerProvider}, }; -use query_core::telemetry; use std::fmt::{self, Debug}; /// Pipeline builder diff --git a/libs/telemetry/Cargo.toml b/libs/telemetry/Cargo.toml new file mode 100644 index 000000000000..9403e7f8ae00 --- /dev/null +++ b/libs/telemetry/Cargo.toml @@ -0,0 +1,32 @@ +[package] +edition = "2021" +name = "telemetry" +version = "0.1.0" + +[features] +metrics = ["dep:query-engine-metrics"] + +[dependencies] +async-trait.workspace = true +crossbeam-channel = "0.5.6" +psl.workspace = true +futures = "0.3" +indexmap.workspace = true +itertools.workspace = true +once_cell = "1" +opentelemetry = { version = "0.17.0", features = ["rt-tokio", "serialize"] } +serde.workspace = true +serde_json.workspace = true +thiserror = "1.0" +tokio = { version = "1.0", features = ["macros", "time"] } +tracing = { workspace = true, features = ["attributes"] } +tracing-futures = "0.2" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-opentelemetry = "0.17.4" +uuid.workspace = true +cuid = { git = "https://github.com/prisma/cuid-rust", branch = "wasm32-support" } +crosstarget-utils = { path = "../crosstarget-utils" } +lru = "0.7.7" +enumflags2.workspace = true +derive_more = "0.99.17" +query-engine-metrics = { path = "../../query-engine/metrics", optional = true } diff --git a/query-engine/core/src/telemetry/capturing/capturer.rs b/libs/telemetry/src/capturing/capturer.rs similarity index 96% rename from query-engine/core/src/telemetry/capturing/capturer.rs rename to libs/telemetry/src/capturing/capturer.rs index d0d9886acd2b..508162148e5e 100644 --- a/query-engine/core/src/telemetry/capturing/capturer.rs +++ b/libs/telemetry/src/capturing/capturer.rs @@ -29,6 +29,20 @@ impl Capturer { Self::Disabled } + + pub async fn try_start_capturing(&self) { + if let Capturer::Enabled(capturer) = &self { + capturer.start_capturing().await + } + } + + pub async fn try_fetch_captures(&self) -> Option { + if let Capturer::Enabled(capturer) = &self { + capturer.fetch_captures().await + } else { + None + } + } } #[derive(Debug, Clone)] diff --git a/query-engine/core/src/telemetry/capturing/helpers.rs b/libs/telemetry/src/capturing/helpers.rs similarity index 100% rename from query-engine/core/src/telemetry/capturing/helpers.rs rename to libs/telemetry/src/capturing/helpers.rs diff --git a/query-engine/core/src/telemetry/capturing/mod.rs b/libs/telemetry/src/capturing/mod.rs similarity index 77% rename from query-engine/core/src/telemetry/capturing/mod.rs rename to libs/telemetry/src/capturing/mod.rs index fc1219d5fe06..1895785bc98d 100644 --- a/query-engine/core/src/telemetry/capturing/mod.rs +++ b/libs/telemetry/src/capturing/mod.rs @@ -4,97 +4,97 @@ //! The interaction diagram below (soorry width!) shows the different roles at play during telemetry //! capturing. A textual explanatation follows it. For the sake of example a server environment //! --the query-engine crate-- is assumed. -//! # ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ -//! # -//! # │ <> │ -//! # -//! # ╔═══════════════════════╗ │╔═══════════════╗ │ -//! # ║<>║ ║ <> ║ ╔════════════════╗ ╔═══════════════════╗ -//! # ┌───────────────────┐ ║ PROCESSOR ║ │║ Sender ║ ║ Storage ║│ ║ TRACER ║ -//! # │ Server │ ╚═══════════╦═══════════╝ ╚══════╦════════╝ ╚═══════╦════════╝ ╚═════════╦═════════╝ -//! # └─────────┬─────────┘ │ │ │ │ │ │ -//! # │ │ │ │ │ -//! # │ │ │ │ │ │ │ -//! # POST │ │ │ │ │ -//! # (body, headers)│ │ │ │ │ │ │ -//! # ──────────▶┌┴┐ │ │ │ │ -//! # ┌─┐ │ │new(headers)╔════════════╗ │ │ │ │ │ │ -//! # │1│ │ ├───────────▶║s: Settings ║ │ │ │ │ -//! # └─┘ │ │ ╚════════════╝ │ │ │ │ │ │ -//! # │ │ │ │ │ │ -//! # │ │ ╔═══════════════════╗ │ │ │ │ │ │ -//! # │ │ ║ Capturer::Enabled ║ │ │ │ │ ┌────────────┐ -//! # │ │ ╚═══════════════════╝ │ │ │ │ │ │ │<│ -//! # │ │ │ │ │ │ │ └──────┬─────┘ -//! # │ │ ┌─┐ new(trace_id, s) │ │ │ │ │ │ │ │ -//! # │ ├───┤2├───────────────────────▶│ │ │ │ │ │ -//! # │ │ └─┘ │ │ │ │ │ │ │ │ -//! # │ │ │ │ │ │ │ │ -//! # │ │ ┌─┐ start_capturing() │ start_capturing │ │ │ │ │ │ │ -//! # │ ├───┤3├───────────────────────▶│ (trace_id, s) │ │ │ │ │ -//! # │ │ └─┘ │ │ │ │ │ │ │ │ -//! # │ │ ├─────────────────────▶│ send(StartCapturing, │ │ │ │ -//! # │ │ │ │ trace_id)│ │ │ │ │ │ -//! # │ │ │ │── ── ── ── ── ── ── ─▶│ │ │ │ -//! # │ │ │ │ ┌─┐ │ │insert(trace_id, s) │ │ │ │ -//! # │ │ │ │ │4│ │────────────────────▶│ │ │ -//! # │ │ │ │ └─┘ │ │ │ │ ┌─┐ │ process_query │ -//! # │ │──────────────────────────────┼──────────────────────┼───────────────────────┼─────────────────────┼────────────┤5├──────┼──────────────────────────▶┌┴┐ -//! # │ │ │ │ │ │ │ │ └─┘ │ │ │ -//! # │ │ │ │ │ │ │ │ │ +//! # ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ +//! # +//! # │ <> │ +//! # +//! # ╔═══════════════════════╗ │╔═══════════════╗ │ +//! # ║<>║ ║ <> ║ ╔════════════════╗ ╔═══════════════════╗ +//! # ┌───────────────────┐ ║ PROCESSOR ║ │║ Sender ║ ║ Storage ║│ ║ TRACER ║ +//! # │ Server │ ╚═══════════╦═══════════╝ ╚══════╦════════╝ ╚═══════╦════════╝ ╚═════════╦═════════╝ +//! # └─────────┬─────────┘ │ │ │ │ │ │ +//! # │ │ │ │ │ +//! # │ │ │ │ │ │ │ +//! # POST │ │ │ │ │ +//! # (body, headers)│ │ │ │ │ │ │ +//! # ──────────▶┌┴┐ │ │ │ │ +//! # ┌─┐ │ │new(headers)╔════════════╗ │ │ │ │ │ │ +//! # │1│ │ ├───────────▶║s: Settings ║ │ │ │ │ +//! # └─┘ │ │ ╚════════════╝ │ │ │ │ │ │ +//! # │ │ │ │ │ │ +//! # │ │ ╔═══════════════════╗ │ │ │ │ │ │ +//! # │ │ ║ Capturer::Enabled ║ │ │ │ │ ┌────────────┐ +//! # │ │ ╚═══════════════════╝ │ │ │ │ │ │ │<│ +//! # │ │ │ │ │ │ │ └──────┬─────┘ +//! # │ │ ┌─┐ new(trace_id, s) │ │ │ │ │ │ │ │ +//! # │ ├───┤2├───────────────────────▶│ │ │ │ │ │ +//! # │ │ └─┘ │ │ │ │ │ │ │ │ +//! # │ │ │ │ │ │ │ │ +//! # │ │ ┌─┐ start_capturing() │ start_capturing │ │ │ │ │ │ │ +//! # │ ├───┤3├───────────────────────▶│ (trace_id, s) │ │ │ │ │ +//! # │ │ └─┘ │ │ │ │ │ │ │ │ +//! # │ │ ├─────────────────────▶│ send(StartCapturing, │ │ │ │ +//! # │ │ │ │ trace_id)│ │ │ │ │ │ +//! # │ │ │ │── ── ── ── ── ── ── ─▶│ │ │ │ +//! # │ │ │ │ ┌─┐ │ │insert(trace_id, s) │ │ │ │ +//! # │ │ │ │ │4│ │────────────────────▶│ │ │ +//! # │ │ │ │ └─┘ │ │ │ │ ┌─┐ │ process_query │ +//! # │ │──────────────────────────────┼──────────────────────┼───────────────────────┼─────────────────────┼────────────┤5├──────┼──────────────────────────▶┌┴┐ +//! # │ │ │ │ │ │ │ │ └─┘ │ │ │ +//! # │ │ │ │ │ │ │ │ │ //! # │ │ │ │ │ │ │ │ │ │ │ ┌─────────────────────┐ //! # │ │ │ │ │ │ │ log! / span! ┌─┐ │ │ │ res: PrismaResponse │ //! # │ │ │ │ │ │ │ │ │◀─────────────────────┤6├──│ │ └──────────┬──────────┘ -//! # │ │ │ │ │ on_end(span_data)│ ┌─┐ │ └─┘ │ │ new │ -//! # │ │ │ │◀──────────────┼───────┼─────────────────────┼─────────┼──┤7├──────┤ │ │────────────▶│ -//! # │ │ │ │ send(SpanDataProcessed│ │ └─┘ │ │ │ │ -//! # │ │ │ │ , trace_id) │ append(trace_id, │ │ │ │ │ │ -//! # │ │ │ │── ── ── ── ── ── ── ─▶│ logs, traces) │ │ │ │ │ -//! # │ │ │ │ ┌─┐ │ ├────────────────────▶│ │ │ │ │ │ -//! # │ │ │ │ │8│ │ │ │ │ │ │ -//! # │ │ res: PrismaResponse │ ┌─┐ │ └─┘ │ │ │ │ │ │ │ │ -//! # │ │─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┼ ┤9├ ─ ─ ─ ─ ─ ─ ─ ─ ─│─ ─ ─ ─ ─ ─return ─ ─ ─│─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─└┬┘ │ -//! # │ │ ┌────┐ fetch_captures() │ └─┘ │ │ │ │ │ │ │ │ -//! # │ ├─┤ 10 ├──────────────────────▶│ fetch_captures │ │ │ │ │ │ -//! # │ │ └────┘ │ (trace_id) │ │ │ │ │ │ │ │ -//! # │ │ ├─────────────────────▶│ send(FetchCaptures, │ │ │ x │ -//! # │ │ │ │ trace_id) │ │ │ │ │ -//! # │ │ │ │── ── ── ── ── ── ── ─▶│ get logs/traces │ │ │ -//! # │ │ │ │ ┌────┐ │ ├─────────────────────▶ │ │ │ -//! # │ │ │ │ │ 11 │ │ │ │ │ -//! # │ │ │ │ └────┘ │ │◁ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │ │ │ -//! # │ │ │ │ │ │ │ │ -//! # │ │ ◁ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │ │ │ │ │ │ -//! # │ │ logs, traces │ │ │ │ │ │ -//! # │ │◁─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │ │ │ │ │ │ │ -//! # │ │ x ┌────┐ │ │ │ │ res.set_extension(logs) │ -//! # │ ├───────────────────────────────────────┤ 12 ├────────┼───────────────┼───────┼─────────────────────┼─────────┼───────────┼──────────────────────────────────────────▶│ -//! # │ │ └────┘ │ │ │ │ res.set_extension(traces) │ -//! # │ ├─────────────────────────────────────────────────────┼───────────────┼───────┼─────────────────────┼─────────┼───────────┼──────────────────────────────────────────▶│ -//! # ◀ ─ ─ ─└┬┘ │ │ │ │ x -//! # json!(res) │ │ │ -//! # ┌────┐ │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ -//! # │ 13 │ │ -//! # └────┘ -//! # -//! # ◀─────── call (pseudo-signatures) -//! # -//! # ◀─ ── ── async message passing (channels) -//! # -//! # ◁─ ─ ─ ─ return -//! # -//! +//! # │ │ │ │ │ on_end(span_data)│ ┌─┐ │ └─┘ │ │ new │ +//! # │ │ │ │◀──────────────┼───────┼─────────────────────┼─────────┼──┤7├──────┤ │ │────────────▶│ +//! # │ │ │ │ send(SpanDataProcessed│ │ └─┘ │ │ │ │ +//! # │ │ │ │ , trace_id) │ append(trace_id, │ │ │ │ │ │ +//! # │ │ │ │── ── ── ── ── ── ── ─▶│ logs, traces) │ │ │ │ │ +//! # │ │ │ │ ┌─┐ │ ├────────────────────▶│ │ │ │ │ │ +//! # │ │ │ │ │8│ │ │ │ │ │ │ +//! # │ │ res: PrismaResponse │ ┌─┐ │ └─┘ │ │ │ │ │ │ │ │ +//! # │ │─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┼ ┤9├ ─ ─ ─ ─ ─ ─ ─ ─ ─│─ ─ ─ ─ ─ ─return ─ ─ ─│─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─└┬┘ │ +//! # │ │ ┌────┐ fetch_captures() │ └─┘ │ │ │ │ │ │ │ │ +//! # │ ├─┤ 10 ├──────────────────────▶│ fetch_captures │ │ │ │ │ │ +//! # │ │ └────┘ │ (trace_id) │ │ │ │ │ │ │ │ +//! # │ │ ├─────────────────────▶│ send(FetchCaptures, │ │ │ x │ +//! # │ │ │ │ trace_id) │ │ │ │ │ +//! # │ │ │ │── ── ── ── ── ── ── ─▶│ get logs/traces │ │ │ +//! # │ │ │ │ ┌────┐ │ ├─────────────────────▶ │ │ │ +//! # │ │ │ │ │ 11 │ │ │ │ │ +//! # │ │ │ │ └────┘ │ │◁ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │ │ │ +//! # │ │ │ │ │ │ │ │ +//! # │ │ ◁ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │ │ │ │ │ │ +//! # │ │ logs, traces │ │ │ │ │ │ +//! # │ │◁─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │ │ │ │ │ │ │ +//! # │ │ x ┌────┐ │ │ │ │ res.set_extension(logs) │ +//! # │ ├───────────────────────────────────────┤ 12 ├────────┼───────────────┼───────┼─────────────────────┼─────────┼───────────┼──────────────────────────────────────────▶│ +//! # │ │ └────┘ │ │ │ │ res.set_extension(traces) │ +//! # │ ├─────────────────────────────────────────────────────┼───────────────┼───────┼─────────────────────┼─────────┼───────────┼──────────────────────────────────────────▶│ +//! # ◀ ─ ─ ─└┬┘ │ │ │ │ x +//! # json!(res) │ │ │ +//! # ┌────┐ │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ +//! # │ 13 │ │ +//! # └────┘ +//! # +//! # ◀─────── call (pseudo-signatures) +//! # +//! # ◀─ ── ── async message passing (channels) +//! # +//! # ◁─ ─ ─ ─ return +//! # +//! //! In the diagram, you will see objects whose lifetime is static. The boxes for those have a double //! width margin. These are: -//! +//! //! - The `server` itself //! - The global `TRACER`, which handles `log!` and `span!` and uses the global `PROCESSOR` to //! process the data constituting a trace `Span`s and log `Event`s //! - The global `PROCESSOR`, which manages the `Storage` set of data structures, holding logs, //! traces (and capture settings) per request. -//! +//! //! Then, through the request lifecycle, different objects are created and dropped: -//! +//! //! - When a request comes in, its headers are processed and a [`Settings`] object is built, this //! object determines, for the request, how logging and tracing are going to be captured: if only //! traces, logs, or both, and which log levels are going to be captured. @@ -105,9 +105,9 @@ //! part of the channel is kept in a global, so it can be cloned and used by a) the Capturer //! (to start capturing / fetch the captures) or by the tracer's SpanProcessor, to extract //! tracing and logging information that's eventually displayed to the user. -//! +//! //! Then the capturing process works in this way: -//! +//! //! - The server receives a query **[1]** //! - It grabs the HTTP headers and builds a `Capture` object **[2]**, which is configured with the settings //! denoted by the `X-capture-telemetry` @@ -138,7 +138,6 @@ #![allow(unused_imports, dead_code)] pub use self::capturer::Capturer; pub use self::settings::Settings; -pub use tx_ext::TxTraceExt; use self::capturer::Processor; use once_cell::sync::Lazy; @@ -198,4 +197,3 @@ mod capturer; mod helpers; mod settings; pub mod storage; -mod tx_ext; diff --git a/query-engine/core/src/telemetry/capturing/settings.rs b/libs/telemetry/src/capturing/settings.rs similarity index 100% rename from query-engine/core/src/telemetry/capturing/settings.rs rename to libs/telemetry/src/capturing/settings.rs diff --git a/query-engine/core/src/telemetry/capturing/storage.rs b/libs/telemetry/src/capturing/storage.rs similarity index 92% rename from query-engine/core/src/telemetry/capturing/storage.rs rename to libs/telemetry/src/capturing/storage.rs index 9c2767169112..5c83affc85e3 100644 --- a/query-engine/core/src/telemetry/capturing/storage.rs +++ b/libs/telemetry/src/capturing/storage.rs @@ -1,5 +1,5 @@ use super::settings::Settings; -use crate::telemetry::models; +use crate::models; #[derive(Debug, Default)] pub struct Storage { diff --git a/libs/telemetry/src/helpers.rs b/libs/telemetry/src/helpers.rs new file mode 100644 index 000000000000..ab9231e56888 --- /dev/null +++ b/libs/telemetry/src/helpers.rs @@ -0,0 +1,141 @@ +use super::models::TraceSpan; +use derive_more::Display; +use once_cell::sync::Lazy; +use opentelemetry::sdk::export::trace::SpanData; +use opentelemetry::trace::{SpanId, TraceContextExt, TraceId}; +use serde_json::{json, Value}; +use std::collections::HashMap; +use tracing::Metadata; +use tracing_subscriber::EnvFilter; + +pub static SHOW_ALL_TRACES: Lazy = Lazy::new(|| match std::env::var("PRISMA_SHOW_ALL_TRACES") { + Ok(enabled) => enabled.eq_ignore_ascii_case("true"), + Err(_) => false, +}); + +/// Traceparent is a remote span. It is identified by trace_id and span_id. +/// +/// By "remote" we mean that this span was not emitted in the current process. In real life, it is +/// either: +/// - Emitted by the JS part of the Prisma ORM. This is true both for Accelerate (where the Rust +/// part is deployed as a server) and for the ORM (where the Rust part is a shared library) +/// - Never emitted at all. This happens when the `TraceParent` is created artificially from `TxId` +/// (see `TxId::as_traceparent`). In this case, `TraceParent` is used only to correlated logs +/// from different transaction operations - it is never used as a part of the trace +#[derive(Display, Copy, Clone)] +// This conforms with https://www.w3.org/TR/trace-context/#traceparent-header-field-values. Accelerate +// relies on this behaviour. +#[display(fmt = "00-{trace_id:032x}-{span_id:016x}-01")] +pub struct TraceParent { + trace_id: TraceId, + span_id: SpanId, +} + +impl TraceParent { + pub fn new_unsafe(trace_id: TraceId, span_id: SpanId) -> Self { + Self { trace_id, span_id } + } + + pub fn from_remote_context(context: &opentelemetry::Context) -> Option { + let span = context.span(); + let span_context = span.span_context(); + + if span_context.is_valid() { + Some(Self { + trace_id: span_context.trace_id(), + span_id: span_context.span_id(), + }) + } else { + None + } + } + + pub fn trace_id(&self) -> TraceId { + self.trace_id + } + + /// Returns a remote `opentelemetry::Context`. By "remote" we mean that it wasn't emitted in the + /// current process. + pub fn as_remote_context(&self) -> opentelemetry::Context { + // This relies on the fact that global text map propagator was installed that + // can handle `traceparent` field (for example, `TraceContextPropagator`). + let mut extractor = HashMap::new(); + extractor.insert("traceparent".to_string(), self.to_string()); + opentelemetry::global::get_text_map_propagator(|propagator| propagator.extract(&extractor)) + } +} + +pub fn spans_to_json(spans: Vec) -> String { + let json_spans: Vec = spans.into_iter().map(|span| json!(TraceSpan::from(span))).collect(); + let span_result = json!({ + "span": true, + "spans": json_spans + }); + serde_json::to_string(&span_result).unwrap_or_default() +} + +pub fn restore_remote_context_from_json_str(serialized: &str) -> opentelemetry::Context { + // This relies on the fact that global text map propagator was installed that + // can handle `traceparent` field (for example, `TraceContextPropagator`). + let trace: HashMap = serde_json::from_str(serialized).unwrap_or_default(); + opentelemetry::global::get_text_map_propagator(|propagator| propagator.extract(&trace)) +} + +pub enum QueryEngineLogLevel { + FromEnv, + Override(String), +} + +impl QueryEngineLogLevel { + fn level(self) -> Option { + match self { + Self::FromEnv => std::env::var("QE_LOG_LEVEL").ok(), + Self::Override(l) => Some(l), + } + } +} + +#[rustfmt::skip] +pub fn env_filter(log_queries: bool, qe_log_level: QueryEngineLogLevel) -> EnvFilter { + let mut filter = EnvFilter::from_default_env() + .add_directive("tide=error".parse().unwrap()) + .add_directive("tonic=error".parse().unwrap()) + .add_directive("h2=error".parse().unwrap()) + .add_directive("hyper=error".parse().unwrap()) + .add_directive("tower=error".parse().unwrap()); + + if let Some(level) = qe_log_level.level() { + filter = filter + .add_directive(format!("query_engine={}", &level).parse().unwrap()) + .add_directive(format!("query_core={}", &level).parse().unwrap()) + .add_directive(format!("query_connector={}", &level).parse().unwrap()) + .add_directive(format!("sql_query_connector={}", &level).parse().unwrap()) + .add_directive(format!("mongodb_query_connector={}", &level).parse().unwrap()); + } + + if log_queries { + filter = filter + .add_directive("quaint[{is_query}]=trace".parse().unwrap()) + .add_directive("mongodb_query_connector=debug".parse().unwrap()); + } + + filter +} + +pub fn user_facing_span_only_filter(meta: &Metadata<'_>) -> bool { + if !meta.is_span() { + return false; + } + + if *SHOW_ALL_TRACES { + return true; + } + + if meta.fields().iter().any(|f| f.name() == "user_facing") { + return true; + } + + // spans describing a quaint query. + // TODO: should this span be made user_facing in quaint? + meta.target() == "quaint::connector::metrics" && meta.name() == "quaint:query" +} diff --git a/query-engine/core/src/telemetry/mod.rs b/libs/telemetry/src/lib.rs similarity index 100% rename from query-engine/core/src/telemetry/mod.rs rename to libs/telemetry/src/lib.rs diff --git a/query-engine/core/src/telemetry/models.rs b/libs/telemetry/src/models.rs similarity index 100% rename from query-engine/core/src/telemetry/models.rs rename to libs/telemetry/src/models.rs diff --git a/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/interactive_tx.rs b/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/interactive_tx.rs index b8827a3bf009..584d12b092c7 100644 --- a/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/interactive_tx.rs +++ b/query-engine/connector-test-kit-rs/query-engine-tests/tests/new/interactive_tx.rs @@ -3,6 +3,8 @@ use std::borrow::Cow; #[test_suite(schema(generic), exclude(Sqlite("cfd1")))] mod interactive_tx { + use std::time::{Duration, Instant}; + use query_engine_tests::*; use tokio::time; @@ -231,7 +233,9 @@ mod interactive_tx { let batch_results = runner.batch(queries, false, None).await?; batch_results.assert_failure(2002, None); + let now = Instant::now(); let res = runner.commit_tx(tx_id.clone()).await?; + assert!(now.elapsed() <= Duration::from_millis(5000)); if matches!(runner.connector_version(), ConnectorVersion::MongoDb(_)) { assert!(res.is_err()); diff --git a/query-engine/connector-test-kit-rs/query-tests-setup/Cargo.toml b/query-engine/connector-test-kit-rs/query-tests-setup/Cargo.toml index 8b68821dcf87..87c39fae8ca2 100644 --- a/query-engine/connector-test-kit-rs/query-tests-setup/Cargo.toml +++ b/query-engine/connector-test-kit-rs/query-tests-setup/Cargo.toml @@ -15,6 +15,7 @@ sql-query-connector = { path = "../../connectors/sql-query-connector" } query-engine = { path = "../../query-engine" } psl.workspace = true user-facing-errors = { path = "../../../libs/user-facing-errors" } +telemetry = { path = "../../../libs/telemetry" } thiserror = "1.0" async-trait.workspace = true nom = "7.1" diff --git a/query-engine/connector-test-kit-rs/query-tests-setup/src/logging.rs b/query-engine/connector-test-kit-rs/query-tests-setup/src/logging.rs index 5520075e6d30..603cc4f6737b 100644 --- a/query-engine/connector-test-kit-rs/query-tests-setup/src/logging.rs +++ b/query-engine/connector-test-kit-rs/query-tests-setup/src/logging.rs @@ -1,5 +1,5 @@ -use query_core::telemetry::helpers as telemetry_helpers; use query_engine_metrics::MetricRegistry; +use telemetry::helpers as telemetry_helpers; use tracing::Subscriber; use tracing_error::ErrorLayer; use tracing_subscriber::{prelude::*, Layer}; diff --git a/query-engine/connectors/mongodb-query-connector/Cargo.toml b/query-engine/connectors/mongodb-query-connector/Cargo.toml index e66ca4204021..cb4e31d12cba 100644 --- a/query-engine/connectors/mongodb-query-connector/Cargo.toml +++ b/query-engine/connectors/mongodb-query-connector/Cargo.toml @@ -37,6 +37,9 @@ path = "../query-connector" [dependencies.prisma-value] path = "../../../libs/prisma-value" +[dependencies.telemetry] +path = "../../../libs/telemetry" + [dependencies.chrono] features = ["serde"] version = "0.4" diff --git a/query-engine/connectors/mongodb-query-connector/src/interface/connection.rs b/query-engine/connectors/mongodb-query-connector/src/interface/connection.rs index 9f9232517528..72b0c6a3afb0 100644 --- a/query-engine/connectors/mongodb-query-connector/src/interface/connection.rs +++ b/query-engine/connectors/mongodb-query-connector/src/interface/connection.rs @@ -11,6 +11,7 @@ use connector_interface::{ use mongodb::{ClientSession, Database}; use query_structure::{prelude::*, RelationLoadStrategy, SelectionResult}; use std::collections::HashMap; +use telemetry::helpers::TraceParent; pub struct MongoDbConnection { /// The session to use for operations. @@ -57,7 +58,7 @@ impl WriteOperations for MongoDbConnection { args: WriteArgs, // The field selection on a create is never used on MongoDB as it cannot return more than the ID. _selected_fields: FieldSelection, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::create_record(&self.database, &mut self.session, model, args)).await } @@ -67,7 +68,7 @@ impl WriteOperations for MongoDbConnection { model: &Model, args: Vec, skip_duplicates: bool, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::create_records( &self.database, @@ -85,7 +86,7 @@ impl WriteOperations for MongoDbConnection { _args: Vec, _skip_duplicates: bool, _selected_fields: FieldSelection, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { unimplemented!() } @@ -95,7 +96,7 @@ impl WriteOperations for MongoDbConnection { model: &Model, record_filter: connector_interface::RecordFilter, args: WriteArgs, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(async move { let result = write::update_records( @@ -119,7 +120,7 @@ impl WriteOperations for MongoDbConnection { record_filter: connector_interface::RecordFilter, args: WriteArgs, selected_fields: Option, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(async move { let result = write::update_records( @@ -150,7 +151,7 @@ impl WriteOperations for MongoDbConnection { &mut self, model: &Model, record_filter: connector_interface::RecordFilter, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::delete_records( &self.database, @@ -166,7 +167,7 @@ impl WriteOperations for MongoDbConnection { model: &Model, record_filter: connector_interface::RecordFilter, selected_fields: FieldSelection, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::delete_record( &self.database, @@ -183,7 +184,7 @@ impl WriteOperations for MongoDbConnection { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result<()> { catch(write::m2m_connect( &self.database, @@ -200,7 +201,7 @@ impl WriteOperations for MongoDbConnection { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result<()> { catch(write::m2m_disconnect( &self.database, @@ -235,7 +236,7 @@ impl WriteOperations for MongoDbConnection { async fn native_upsert_record( &mut self, _upsert: connector_interface::NativeUpsert, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { unimplemented!("Native upsert is not currently supported.") } @@ -249,7 +250,7 @@ impl ReadOperations for MongoDbConnection { filter: &query_structure::Filter, selected_fields: &FieldSelection, _relation_load_strategy: RelationLoadStrategy, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(read::get_single_record( &self.database, @@ -267,7 +268,7 @@ impl ReadOperations for MongoDbConnection { query_arguments: query_structure::QueryArguments, selected_fields: &FieldSelection, _relation_load_strategy: RelationLoadStrategy, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(read::get_many_records( &self.database, @@ -283,7 +284,7 @@ impl ReadOperations for MongoDbConnection { &mut self, from_field: &RelationFieldRef, from_record_ids: &[SelectionResult], - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(read::get_related_m2m_record_ids( &self.database, @@ -301,7 +302,7 @@ impl ReadOperations for MongoDbConnection { selections: Vec, group_by: Vec, having: Option, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(aggregate::aggregate( &self.database, diff --git a/query-engine/connectors/mongodb-query-connector/src/interface/transaction.rs b/query-engine/connectors/mongodb-query-connector/src/interface/transaction.rs index 2fe5d840fa1f..38f4b5ccf4ce 100644 --- a/query-engine/connectors/mongodb-query-connector/src/interface/transaction.rs +++ b/query-engine/connectors/mongodb-query-connector/src/interface/transaction.rs @@ -8,6 +8,7 @@ use mongodb::options::{Acknowledgment, ReadConcern, TransactionOptions, WriteCon use query_engine_metrics::{decrement_gauge, increment_gauge, metrics, PRISMA_CLIENT_QUERIES_ACTIVE}; use query_structure::{RelationLoadStrategy, SelectionResult}; use std::collections::HashMap; +use telemetry::helpers::TraceParent; pub struct MongoDbTransaction<'conn> { connection: &'conn mut MongoDbConnection, @@ -78,7 +79,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { args: connector_interface::WriteArgs, // The field selection on a create is never used on MongoDB as it cannot return more than the ID. _selected_fields: FieldSelection, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::create_record( &self.connection.database, @@ -94,7 +95,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { model: &Model, args: Vec, skip_duplicates: bool, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::create_records( &self.connection.database, @@ -112,7 +113,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { _args: Vec, _skip_duplicates: bool, _selected_fields: FieldSelection, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { unimplemented!() } @@ -122,7 +123,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { model: &Model, record_filter: connector_interface::RecordFilter, args: connector_interface::WriteArgs, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(async move { let result = write::update_records( @@ -145,7 +146,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { record_filter: connector_interface::RecordFilter, args: connector_interface::WriteArgs, selected_fields: Option, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(async move { let result = write::update_records( @@ -175,7 +176,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { &mut self, model: &Model, record_filter: connector_interface::RecordFilter, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::delete_records( &self.connection.database, @@ -191,7 +192,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { model: &Model, record_filter: connector_interface::RecordFilter, selected_fields: FieldSelection, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(write::delete_record( &self.connection.database, @@ -206,7 +207,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { async fn native_upsert_record( &mut self, _upsert: connector_interface::NativeUpsert, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { unimplemented!("Native upsert is not currently supported.") } @@ -216,7 +217,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result<()> { catch(write::m2m_connect( &self.connection.database, @@ -233,7 +234,7 @@ impl<'conn> WriteOperations for MongoDbTransaction<'conn> { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result<()> { catch(write::m2m_disconnect( &self.connection.database, @@ -279,7 +280,7 @@ impl<'conn> ReadOperations for MongoDbTransaction<'conn> { filter: &query_structure::Filter, selected_fields: &FieldSelection, _relation_load_strategy: RelationLoadStrategy, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(read::get_single_record( &self.connection.database, @@ -297,7 +298,7 @@ impl<'conn> ReadOperations for MongoDbTransaction<'conn> { query_arguments: query_structure::QueryArguments, selected_fields: &FieldSelection, _relation_load_strategy: RelationLoadStrategy, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result { catch(read::get_many_records( &self.connection.database, @@ -313,7 +314,7 @@ impl<'conn> ReadOperations for MongoDbTransaction<'conn> { &mut self, from_field: &RelationFieldRef, from_record_ids: &[SelectionResult], - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(read::get_related_m2m_record_ids( &self.connection.database, @@ -331,7 +332,7 @@ impl<'conn> ReadOperations for MongoDbTransaction<'conn> { selections: Vec, group_by: Vec, having: Option, - _trace_id: Option, + _traceparent: Option, ) -> connector_interface::Result> { catch(aggregate::aggregate( &self.connection.database, diff --git a/query-engine/connectors/mongodb-query-connector/src/interface/utils.rs b/query-engine/connectors/mongodb-query-connector/src/interface/utils.rs index 6b089ff700fd..21e35604045c 100644 --- a/query-engine/connectors/mongodb-query-connector/src/interface/utils.rs +++ b/query-engine/connectors/mongodb-query-connector/src/interface/utils.rs @@ -1,7 +1,7 @@ use std::time::{Duration, Instant}; use mongodb::{ - error::{Result, TRANSIENT_TRANSACTION_ERROR, UNKNOWN_TRANSACTION_COMMIT_RESULT}, + error::{CommandError, ErrorKind, Result, TRANSIENT_TRANSACTION_ERROR, UNKNOWN_TRANSACTION_COMMIT_RESULT}, ClientSession, }; @@ -14,9 +14,15 @@ pub async fn commit_with_retry(session: &mut ClientSession) -> Result<()> { let timeout = Instant::now(); while let Err(err) = session.commit_transaction().await { - if (err.contains_label(UNKNOWN_TRANSACTION_COMMIT_RESULT) || err.contains_label(TRANSIENT_TRANSACTION_ERROR)) - && timeout.elapsed() < MAX_TX_TIMEOUT_COMMIT_RETRY_LIMIT - { + // For some reason, MongoDB adds `TRANSIENT_TRANSACTION_ERROR` to errors about aborted + // transactions. Since transaction will not become less aborted in the future, we handle + // this case separately. + let is_aborted = matches!(err.kind.as_ref(), ErrorKind::Command(CommandError { code: 251, .. })); + let is_in_unknown_state = err.contains_label(UNKNOWN_TRANSACTION_COMMIT_RESULT); + let is_transient = err.contains_label(TRANSIENT_TRANSACTION_ERROR); + let is_retryable = !is_aborted && (is_in_unknown_state || is_transient); + + if is_retryable && timeout.elapsed() < MAX_TX_TIMEOUT_COMMIT_RETRY_LIMIT { tokio::time::sleep(TX_RETRY_BACKOFF).await; continue; } else { diff --git a/query-engine/connectors/query-connector/Cargo.toml b/query-engine/connectors/query-connector/Cargo.toml index 52555d256baa..017d634c61d7 100644 --- a/query-engine/connectors/query-connector/Cargo.toml +++ b/query-engine/connectors/query-connector/Cargo.toml @@ -17,3 +17,4 @@ thiserror = "1.0" user-facing-errors = {path = "../../../libs/user-facing-errors", features = ["sql"]} uuid.workspace = true indexmap.workspace = true +telemetry = {path = "../../../libs/telemetry"} diff --git a/query-engine/connectors/query-connector/src/interface.rs b/query-engine/connectors/query-connector/src/interface.rs index cbdafcaeeee3..ed465c197936 100644 --- a/query-engine/connectors/query-connector/src/interface.rs +++ b/query-engine/connectors/query-connector/src/interface.rs @@ -3,6 +3,7 @@ use async_trait::async_trait; use prisma_value::PrismaValue; use query_structure::{ast::FieldArity, *}; use std::collections::HashMap; +use telemetry::helpers::TraceParent; #[async_trait] pub trait Connector { @@ -194,7 +195,7 @@ pub trait ReadOperations { filter: &Filter, selected_fields: &FieldSelection, relation_load_strategy: RelationLoadStrategy, - trace_id: Option, + traceparent: Option, ) -> crate::Result>; /// Gets multiple records from the database. @@ -209,7 +210,7 @@ pub trait ReadOperations { query_arguments: QueryArguments, selected_fields: &FieldSelection, relation_load_strategy: RelationLoadStrategy, - trace_id: Option, + traceparent: Option, ) -> crate::Result; /// Retrieves pairs of IDs that belong together from a intermediate join @@ -223,7 +224,7 @@ pub trait ReadOperations { &mut self, from_field: &RelationFieldRef, from_record_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> crate::Result>; /// Aggregates records for a specific model based on the given selections. @@ -238,7 +239,7 @@ pub trait ReadOperations { selections: Vec, group_by: Vec, having: Option, - trace_id: Option, + traceparent: Option, ) -> crate::Result>; } @@ -250,7 +251,7 @@ pub trait WriteOperations { model: &Model, args: WriteArgs, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> crate::Result; /// Inserts many records at once into the database. @@ -259,7 +260,7 @@ pub trait WriteOperations { model: &Model, args: Vec, skip_duplicates: bool, - trace_id: Option, + traceparent: Option, ) -> crate::Result; /// Inserts many records at once into the database and returns their @@ -272,7 +273,7 @@ pub trait WriteOperations { args: Vec, skip_duplicates: bool, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> crate::Result; /// Update records in the `Model` with the given `WriteArgs` filtered by the @@ -282,7 +283,7 @@ pub trait WriteOperations { model: &Model, record_filter: RecordFilter, args: WriteArgs, - trace_id: Option, + traceparent: Option, ) -> crate::Result; /// Update record in the `Model` with the given `WriteArgs` filtered by the @@ -293,7 +294,7 @@ pub trait WriteOperations { record_filter: RecordFilter, args: WriteArgs, selected_fields: Option, - trace_id: Option, + traceparent: Option, ) -> crate::Result>; /// Native upsert @@ -301,7 +302,7 @@ pub trait WriteOperations { async fn native_upsert_record( &mut self, upsert: NativeUpsert, - trace_id: Option, + traceparent: Option, ) -> crate::Result; /// Delete records in the `Model` with the given `Filter`. @@ -309,7 +310,7 @@ pub trait WriteOperations { &mut self, model: &Model, record_filter: RecordFilter, - trace_id: Option, + traceparent: Option, ) -> crate::Result; /// Delete single record in the `Model` with the given `Filter` and returns @@ -321,7 +322,7 @@ pub trait WriteOperations { model: &Model, record_filter: RecordFilter, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> crate::Result; // We plan to remove the methods below in the future. We want emulate them with the ones above. Those should suffice. @@ -332,7 +333,7 @@ pub trait WriteOperations { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> crate::Result<()>; /// Disconnect the children from the parent (m2m relation only). @@ -341,7 +342,7 @@ pub trait WriteOperations { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> crate::Result<()>; /// Execute the raw query in the database as-is. diff --git a/query-engine/connectors/sql-query-connector/Cargo.toml b/query-engine/connectors/sql-query-connector/Cargo.toml index 1826cac2681f..831d261c6019 100644 --- a/query-engine/connectors/sql-query-connector/Cargo.toml +++ b/query-engine/connectors/sql-query-connector/Cargo.toml @@ -66,6 +66,9 @@ path = "../../query-structure" [dependencies.prisma-value] path = "../../../libs/prisma-value" +[dependencies.telemetry] +path = "../../../libs/telemetry" + [dependencies.chrono] features = ["serde"] version = "0.4" diff --git a/query-engine/connectors/sql-query-connector/src/context.rs b/query-engine/connectors/sql-query-connector/src/context.rs index 5b2887451ef1..2519439b13f1 100644 --- a/query-engine/connectors/sql-query-connector/src/context.rs +++ b/query-engine/connectors/sql-query-connector/src/context.rs @@ -1,8 +1,9 @@ use quaint::prelude::ConnectionInfo; +use telemetry::helpers::TraceParent; pub(super) struct Context<'a> { connection_info: &'a ConnectionInfo, - pub(crate) trace_id: Option<&'a str>, + pub(crate) traceparent: Option, /// Maximum rows allowed at once for an insert query. /// None is unlimited. pub(crate) max_insert_rows: Option, @@ -12,13 +13,13 @@ pub(super) struct Context<'a> { } impl<'a> Context<'a> { - pub(crate) fn new(connection_info: &'a ConnectionInfo, trace_id: Option<&'a str>) -> Self { + pub(crate) fn new(connection_info: &'a ConnectionInfo, traceparent: Option) -> Self { let max_insert_rows = connection_info.max_insert_rows(); let max_bind_values = connection_info.max_bind_values(); Context { connection_info, - trace_id, + traceparent, max_insert_rows, max_bind_values: Some(max_bind_values), } diff --git a/query-engine/connectors/sql-query-connector/src/database/connection.rs b/query-engine/connectors/sql-query-connector/src/database/connection.rs index f928fcacdfa5..1222f0425ea0 100644 --- a/query-engine/connectors/sql-query-connector/src/database/connection.rs +++ b/query-engine/connectors/sql-query-connector/src/database/connection.rs @@ -15,6 +15,7 @@ use quaint::{ }; use query_structure::{prelude::*, Filter, QueryArguments, RelationLoadStrategy, SelectionResult}; use std::{collections::HashMap, str::FromStr}; +use telemetry::helpers::TraceParent; pub(crate) struct SqlConnection { inner: C, @@ -89,10 +90,10 @@ where filter: &Filter, selected_fields: &FieldSelection, relation_load_strategy: RelationLoadStrategy, - trace_id: Option, + traceparent: Option, ) -> connector::Result> { // [Composites] todo: FieldSelection -> ModelProjection conversion - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, read::get_single_record( @@ -113,9 +114,9 @@ where query_arguments: QueryArguments, selected_fields: &FieldSelection, relation_load_strategy: RelationLoadStrategy, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, read::get_many_records( @@ -134,9 +135,9 @@ where &mut self, from_field: &RelationFieldRef, from_record_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> connector::Result> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, read::get_related_m2m_record_ids(&self.inner, from_field, from_record_ids, &ctx), @@ -151,9 +152,9 @@ where selections: Vec, group_by: Vec, having: Option, - trace_id: Option, + traceparent: Option, ) -> connector::Result> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, read::aggregate(&self.inner, model, query_arguments, selections, group_by, having, &ctx), @@ -172,9 +173,9 @@ where model: &Model, args: WriteArgs, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::create_record( @@ -194,9 +195,9 @@ where model: &Model, args: Vec, skip_duplicates: bool, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::create_records_count(&self.inner, model, args, skip_duplicates, &ctx), @@ -210,9 +211,9 @@ where args: Vec, skip_duplicates: bool, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::create_records_returning(&self.inner, model, args, skip_duplicates, selected_fields, &ctx), @@ -225,9 +226,9 @@ where model: &Model, record_filter: RecordFilter, args: WriteArgs, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::update_records(&self.inner, model, record_filter, args, &ctx), @@ -241,9 +242,9 @@ where record_filter: RecordFilter, args: WriteArgs, selected_fields: Option, - trace_id: Option, + traceparent: Option, ) -> connector::Result> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::update_record(&self.inner, model, record_filter, args, selected_fields, &ctx), @@ -255,9 +256,9 @@ where &mut self, model: &Model, record_filter: RecordFilter, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::delete_records(&self.inner, model, record_filter, &ctx), @@ -270,9 +271,9 @@ where model: &Model, record_filter: RecordFilter, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::delete_record(&self.inner, model, record_filter, selected_fields, &ctx), @@ -283,9 +284,9 @@ where async fn native_upsert_record( &mut self, upsert: connector_interface::NativeUpsert, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch(&self.connection_info, upsert::native_upsert(&self.inner, upsert, &ctx)).await } @@ -294,9 +295,9 @@ where field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> connector::Result<()> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::m2m_connect(&self.inner, field, parent_id, child_ids, &ctx), @@ -309,9 +310,9 @@ where field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> connector::Result<()> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::m2m_disconnect(&self.inner, field, parent_id, child_ids, &ctx), diff --git a/query-engine/connectors/sql-query-connector/src/database/operations/write.rs b/query-engine/connectors/sql-query-connector/src/database/operations/write.rs index 33cb0cfa3ac4..137bff50ca58 100644 --- a/query-engine/connectors/sql-query-connector/src/database/operations/write.rs +++ b/query-engine/connectors/sql-query-connector/src/database/operations/write.rs @@ -72,7 +72,7 @@ async fn generate_id( // db generate values only if needed if need_select { - let pk_select = id_select.add_trace_id(ctx.trace_id); + let pk_select = id_select.add_traceparent(ctx.traceparent); let pk_result = conn.query(pk_select.into()).await?; let result = try_convert(&(id_field.into()), pk_result)?; diff --git a/query-engine/connectors/sql-query-connector/src/database/transaction.rs b/query-engine/connectors/sql-query-connector/src/database/transaction.rs index 263c541f6b42..387b18f63ee2 100644 --- a/query-engine/connectors/sql-query-connector/src/database/transaction.rs +++ b/query-engine/connectors/sql-query-connector/src/database/transaction.rs @@ -10,6 +10,7 @@ use prisma_value::PrismaValue; use quaint::prelude::ConnectionInfo; use query_structure::{prelude::*, Filter, QueryArguments, RelationLoadStrategy, SelectionResult}; use std::collections::HashMap; +use telemetry::helpers::TraceParent; pub struct SqlConnectorTransaction<'tx> { inner: Box, @@ -73,9 +74,9 @@ impl<'tx> ReadOperations for SqlConnectorTransaction<'tx> { filter: &Filter, selected_fields: &FieldSelection, relation_load_strategy: RelationLoadStrategy, - trace_id: Option, + traceparent: Option, ) -> connector::Result> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, read::get_single_record( @@ -96,9 +97,9 @@ impl<'tx> ReadOperations for SqlConnectorTransaction<'tx> { query_arguments: QueryArguments, selected_fields: &FieldSelection, relation_load_strategy: RelationLoadStrategy, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, read::get_many_records( @@ -117,9 +118,9 @@ impl<'tx> ReadOperations for SqlConnectorTransaction<'tx> { &mut self, from_field: &RelationFieldRef, from_record_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> connector::Result> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch(&self.connection_info, async { read::get_related_m2m_record_ids(self.inner.as_queryable(), from_field, from_record_ids, &ctx).await }) @@ -133,9 +134,9 @@ impl<'tx> ReadOperations for SqlConnectorTransaction<'tx> { selections: Vec, group_by: Vec, having: Option, - trace_id: Option, + traceparent: Option, ) -> connector::Result> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, read::aggregate( @@ -159,9 +160,9 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { model: &Model, args: WriteArgs, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::create_record( @@ -181,9 +182,9 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { model: &Model, args: Vec, skip_duplicates: bool, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::create_records_count(self.inner.as_queryable(), model, args, skip_duplicates, &ctx), @@ -197,9 +198,9 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { args: Vec, skip_duplicates: bool, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::create_records_returning( @@ -219,9 +220,9 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { model: &Model, record_filter: RecordFilter, args: WriteArgs, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::update_records(self.inner.as_queryable(), model, record_filter, args, &ctx), @@ -235,9 +236,9 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { record_filter: RecordFilter, args: WriteArgs, selected_fields: Option, - trace_id: Option, + traceparent: Option, ) -> connector::Result> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::update_record( @@ -256,10 +257,10 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { &mut self, model: &Model, record_filter: RecordFilter, - trace_id: Option, + traceparent: Option, ) -> connector::Result { catch(&self.connection_info, async { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); write::delete_records(self.inner.as_queryable(), model, record_filter, &ctx).await }) .await @@ -270,9 +271,9 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { model: &Model, record_filter: RecordFilter, selected_fields: FieldSelection, - trace_id: Option, + traceparent: Option, ) -> connector::Result { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::delete_record(self.inner.as_queryable(), model, record_filter, selected_fields, &ctx), @@ -283,10 +284,10 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { async fn native_upsert_record( &mut self, upsert: connector_interface::NativeUpsert, - trace_id: Option, + traceparent: Option, ) -> connector::Result { catch(&self.connection_info, async { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); upsert::native_upsert(self.inner.as_queryable(), upsert, &ctx).await }) .await @@ -297,10 +298,10 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> connector::Result<()> { catch(&self.connection_info, async { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); write::m2m_connect(self.inner.as_queryable(), field, parent_id, child_ids, &ctx).await }) .await @@ -311,9 +312,9 @@ impl<'tx> WriteOperations for SqlConnectorTransaction<'tx> { field: &RelationFieldRef, parent_id: &SelectionResult, child_ids: &[SelectionResult], - trace_id: Option, + traceparent: Option, ) -> connector::Result<()> { - let ctx = Context::new(&self.connection_info, trace_id.as_deref()); + let ctx = Context::new(&self.connection_info, traceparent); catch( &self.connection_info, write::m2m_disconnect(self.inner.as_queryable(), field, parent_id, child_ids, &ctx), diff --git a/query-engine/connectors/sql-query-connector/src/query_builder/read.rs b/query-engine/connectors/sql-query-connector/src/query_builder/read.rs index 6a0572ecc0da..84323e0f52b2 100644 --- a/query-engine/connectors/sql-query-connector/src/query_builder/read.rs +++ b/query-engine/connectors/sql-query-connector/src/query_builder/read.rs @@ -100,7 +100,7 @@ impl SelectDefinition for QueryArguments { .so_that(conditions) .offset(skip as usize) .append_trace(&Span::current()) - .add_trace_id(ctx.trace_id); + .add_traceparent(ctx.traceparent); let select_ast = order_by_definitions .iter() @@ -137,7 +137,7 @@ where let (select, additional_selection_set) = query.into_select(model, virtual_selections, ctx); let select = columns.fold(select, |acc, col| acc.column(col)); - let select = select.append_trace(&Span::current()).add_trace_id(ctx.trace_id); + let select = select.append_trace(&Span::current()).add_traceparent(ctx.traceparent); additional_selection_set .into_iter() @@ -183,7 +183,7 @@ pub(crate) fn aggregate( selections.iter().fold( Select::from_table(sub_table) .append_trace(&Span::current()) - .add_trace_id(ctx.trace_id), + .add_traceparent(ctx.traceparent), |select, next_op| match next_op { AggregationSelection::Field(field) => select.column( Column::from(field.db_name().to_owned()) @@ -269,7 +269,9 @@ pub(crate) fn group_by_aggregate( }); let grouped = group_by.into_iter().fold( - select_query.append_trace(&Span::current()).add_trace_id(ctx.trace_id), + select_query + .append_trace(&Span::current()) + .add_traceparent(ctx.traceparent), |query, field| query.group_by(field.as_column(ctx)), ); diff --git a/query-engine/connectors/sql-query-connector/src/query_builder/write.rs b/query-engine/connectors/sql-query-connector/src/query_builder/write.rs index c089f0834dcb..c07e3600e149 100644 --- a/query-engine/connectors/sql-query-connector/src/query_builder/write.rs +++ b/query-engine/connectors/sql-query-connector/src/query_builder/write.rs @@ -34,7 +34,7 @@ pub(crate) fn create_record( Insert::from(insert) .returning(selected_fields.as_columns(ctx).map(|c| c.set_is_selected(true))) .append_trace(&Span::current()) - .add_trace_id(ctx.trace_id) + .add_traceparent(ctx.traceparent) } /// `INSERT` new records into the database based on the given write arguments, @@ -84,7 +84,7 @@ pub(crate) fn create_records_nonempty( let insert = Insert::multi_into(model.as_table(ctx), columns); let insert = values.into_iter().fold(insert, |stmt, values| stmt.values(values)); let insert: Insert = insert.into(); - let mut insert = insert.append_trace(&Span::current()).add_trace_id(ctx.trace_id); + let mut insert = insert.append_trace(&Span::current()).add_traceparent(ctx.traceparent); if let Some(selected_fields) = selected_fields { insert = insert.returning(projection_into_columns(selected_fields, ctx)); @@ -105,7 +105,7 @@ pub(crate) fn create_records_empty( ctx: &Context<'_>, ) -> Insert<'static> { let insert: Insert<'static> = Insert::single_into(model.as_table(ctx)).into(); - let mut insert = insert.append_trace(&Span::current()).add_trace_id(ctx.trace_id); + let mut insert = insert.append_trace(&Span::current()).add_traceparent(ctx.traceparent); if let Some(selected_fields) = selected_fields { insert = insert.returning(projection_into_columns(selected_fields, ctx)); @@ -175,7 +175,7 @@ pub(crate) fn build_update_and_set_query( acc.set(name, value) }); - let query = query.append_trace(&Span::current()).add_trace_id(ctx.trace_id); + let query = query.append_trace(&Span::current()).add_traceparent(ctx.traceparent); let query = if let Some(selected_fields) = selected_fields { query.returning(selected_fields.as_columns(ctx).map(|c| c.set_is_selected(true))) @@ -222,7 +222,7 @@ pub(crate) fn delete_returning( .so_that(filter) .returning(projection_into_columns(selected_fields, ctx)) .append_trace(&Span::current()) - .add_trace_id(ctx.trace_id) + .add_traceparent(ctx.traceparent) .into() } @@ -234,7 +234,7 @@ pub(crate) fn delete_many_from_filter( Delete::from_table(model.as_table(ctx)) .so_that(filter_condition) .append_trace(&Span::current()) - .add_trace_id(ctx.trace_id) + .add_traceparent(ctx.traceparent) .into() } @@ -301,5 +301,5 @@ pub(crate) fn delete_relation_table_records( Delete::from_table(relation.as_table(ctx)) .so_that(parent_id_criteria.and(child_id_criteria)) .append_trace(&Span::current()) - .add_trace_id(ctx.trace_id) + .add_traceparent(ctx.traceparent) } diff --git a/query-engine/connectors/sql-query-connector/src/query_ext.rs b/query-engine/connectors/sql-query-connector/src/query_ext.rs index c0d511f9e6d8..a843f4a1525c 100644 --- a/query-engine/connectors/sql-query-connector/src/query_ext.rs +++ b/query-engine/connectors/sql-query-connector/src/query_ext.rs @@ -25,18 +25,18 @@ impl QueryExt for Q { idents: &[ColumnMetadata<'_>], ctx: &Context<'_>, ) -> crate::Result> { - let span = info_span!("filter read query"); + let span = info_span!("prisma:engine:filter_read_query"); let otel_ctx = span.context(); let span_ref = otel_ctx.span(); let span_ctx = span_ref.span_context(); - let q = match (q, ctx.trace_id) { + let q = match (q, ctx.traceparent) { (Query::Select(x), _) if span_ctx.trace_flags() == TraceFlags::SAMPLED => { Query::Select(Box::from(x.comment(trace_parent_to_string(span_ctx)))) } // This is part of the required changes to pass a traceid - (Query::Select(x), trace_id) => Query::Select(Box::from(x.add_trace_id(trace_id))), + (Query::Select(x), traceparent) => Query::Select(Box::from(x.add_traceparent(traceparent))), (q, _) => q, }; @@ -119,7 +119,7 @@ impl QueryExt for Q { let select = Select::from_table(model.as_table(ctx)) .columns(id_cols) .append_trace(&Span::current()) - .add_trace_id(ctx.trace_id) + .add_traceparent(ctx.traceparent) .so_that(condition); self.select_ids(select, model_id, ctx).await diff --git a/query-engine/connectors/sql-query-connector/src/sql_trace.rs b/query-engine/connectors/sql-query-connector/src/sql_trace.rs index bffaf1174311..96ae3a6bcad1 100644 --- a/query-engine/connectors/sql-query-connector/src/sql_trace.rs +++ b/query-engine/connectors/sql-query-connector/src/sql_trace.rs @@ -1,5 +1,6 @@ use opentelemetry::trace::{SpanContext, TraceContextExt, TraceFlags}; use quaint::ast::{Delete, Insert, Select, Update}; +use telemetry::helpers::TraceParent; use tracing::Span; use tracing_opentelemetry::OpenTelemetrySpanExt; @@ -8,12 +9,12 @@ pub fn trace_parent_to_string(context: &SpanContext) -> String { let span_id = context.span_id(); // see https://www.w3.org/TR/trace-context/#traceparent-header-field-values - format!("traceparent='00-{trace_id:032x}-{span_id:032x}-01'") + format!("traceparent='00-{trace_id:032x}-{span_id:016x}-01'") } pub trait SqlTraceComment: Sized { fn append_trace(self, span: &Span) -> Self; - fn add_trace_id(self, trace_id: Option<&str>) -> Self; + fn add_traceparent(self, traceparent: Option) -> Self; } macro_rules! sql_trace { @@ -30,14 +31,11 @@ macro_rules! sql_trace { self } } + // Temporary method to pass the traceid in an operation - fn add_trace_id(self, trace_id: Option<&str>) -> Self { - if let Some(traceparent) = trace_id { - if should_sample(&traceparent) { - self.comment(format!("traceparent='{}'", traceparent)) - } else { - self - } + fn add_traceparent(self, traceparent: Option) -> Self { + if let Some(traceparent) = traceparent { + self.comment(format!("traceparent='{}'", traceparent)) } else { self } @@ -46,10 +44,6 @@ macro_rules! sql_trace { }; } -fn should_sample(traceparent: &str) -> bool { - traceparent.split('-').count() == 4 && traceparent.ends_with("-01") -} - sql_trace!(Insert<'_>); sql_trace!(Update<'_>); diff --git a/query-engine/core/Cargo.toml b/query-engine/core/Cargo.toml index a1aa9a326f77..d0dba145949b 100644 --- a/query-engine/core/Cargo.toml +++ b/query-engine/core/Cargo.toml @@ -38,5 +38,7 @@ uuid.workspace = true cuid = { git = "https://github.com/prisma/cuid-rust", branch = "wasm32-support" } schema = { path = "../schema" } crosstarget-utils = { path = "../../libs/crosstarget-utils" } +telemetry = { path = "../../libs/telemetry" } lru = "0.7.7" enumflags2.workspace = true +derive_more = "0.99.17" diff --git a/query-engine/core/src/executor/execute_operation.rs b/query-engine/core/src/executor/execute_operation.rs index 6ef445d8364e..1e4b4f401d0f 100644 --- a/query-engine/core/src/executor/execute_operation.rs +++ b/query-engine/core/src/executor/execute_operation.rs @@ -17,6 +17,7 @@ use query_engine_metrics::{ use schema::{QuerySchema, QuerySchemaRef}; use std::time::Duration; +use telemetry::helpers::TraceParent; use tracing::Instrument; use tracing_futures::WithSubscriber; @@ -24,12 +25,12 @@ pub async fn execute_single_operation( query_schema: QuerySchemaRef, conn: &mut dyn ConnectionLike, operation: &Operation, - trace_id: Option, + traceparent: Option, ) -> crate::Result { let operation_timer = ElapsedTimeCounter::start(); let (graph, serializer) = build_graph(&query_schema, operation.clone())?; - let result = execute_on(conn, graph, serializer, query_schema.as_ref(), trace_id).await; + let result = execute_on(conn, graph, serializer, query_schema.as_ref(), traceparent).await; #[cfg(feature = "metrics")] histogram!( @@ -44,7 +45,7 @@ pub async fn execute_many_operations( query_schema: QuerySchemaRef, conn: &mut dyn ConnectionLike, operations: &[Operation], - trace_id: Option, + traceparent: Option, ) -> crate::Result>> { let queries = operations .iter() @@ -55,7 +56,7 @@ pub async fn execute_many_operations( for (i, (graph, serializer)) in queries.into_iter().enumerate() { let operation_timer = ElapsedTimeCounter::start(); - let result = execute_on(conn, graph, serializer, query_schema.as_ref(), trace_id.clone()).await; + let result = execute_on(conn, graph, serializer, query_schema.as_ref(), traceparent).await; #[cfg(feature = "metrics")] histogram!( @@ -81,7 +82,7 @@ pub async fn execute_single_self_contained( connector: &C, query_schema: QuerySchemaRef, operation: Operation, - trace_id: Option, + traceparent: Option, force_transactions: bool, ) -> crate::Result { let conn_span = info_span!( @@ -97,7 +98,7 @@ pub async fn execute_single_self_contained( operation, force_transactions, connector.should_retry_on_transient_error(), - trace_id, + traceparent, ) .await } @@ -106,7 +107,7 @@ pub async fn execute_many_self_contained( connector: &C, query_schema: QuerySchemaRef, operations: &[Operation], - trace_id: Option, + traceparent: Option, force_transactions: bool, engine_protocol: EngineProtocol, ) -> crate::Result>> { @@ -133,7 +134,7 @@ pub async fn execute_many_self_contained( op.clone(), force_transactions, connector.should_retry_on_transient_error(), - trace_id.clone(), + traceparent, ), ) .with_subscriber(dispatcher.clone()), @@ -156,7 +157,7 @@ async fn execute_self_contained( operation: Operation, force_transactions: bool, retry_on_transient_error: bool, - trace_id: Option, + traceparent: Option, ) -> crate::Result { let operation_timer = ElapsedTimeCounter::start(); let result = if retry_on_transient_error { @@ -166,13 +167,14 @@ async fn execute_self_contained( operation, force_transactions, ElapsedTimeCounter::start(), - trace_id, + traceparent, ) .await } else { let (graph, serializer) = build_graph(&query_schema, operation)?; - execute_self_contained_without_retry(conn, graph, serializer, force_transactions, &query_schema, trace_id).await + execute_self_contained_without_retry(conn, graph, serializer, force_transactions, &query_schema, traceparent) + .await }; #[cfg(feature = "metrics")] @@ -190,13 +192,13 @@ async fn execute_self_contained_without_retry<'a>( serializer: IrSerializer<'a>, force_transactions: bool, query_schema: &'a QuerySchema, - trace_id: Option, + traceparent: Option, ) -> crate::Result { if force_transactions || graph.needs_transaction() { - return execute_in_tx(&mut conn, graph, serializer, query_schema, trace_id).await; + return execute_in_tx(&mut conn, graph, serializer, query_schema, traceparent).await; } - execute_on(conn.as_connection_like(), graph, serializer, query_schema, trace_id).await + execute_on(conn.as_connection_like(), graph, serializer, query_schema, traceparent).await } // As suggested by the MongoDB documentation @@ -212,12 +214,12 @@ async fn execute_self_contained_with_retry( operation: Operation, force_transactions: bool, retry_timeout: ElapsedTimeCounter, - trace_id: Option, + traceparent: Option, ) -> crate::Result { let (graph, serializer) = build_graph(&query_schema, operation.clone())?; if force_transactions || graph.needs_transaction() { - let res = execute_in_tx(conn, graph, serializer, query_schema.as_ref(), trace_id.clone()).await; + let res = execute_in_tx(conn, graph, serializer, query_schema.as_ref(), traceparent).await; if !is_transient_error(&res) { return res; @@ -225,7 +227,7 @@ async fn execute_self_contained_with_retry( loop { let (graph, serializer) = build_graph(&query_schema, operation.clone())?; - let res = execute_in_tx(conn, graph, serializer, query_schema.as_ref(), trace_id.clone()).await; + let res = execute_in_tx(conn, graph, serializer, query_schema.as_ref(), traceparent).await; if is_transient_error(&res) && retry_timeout.elapsed_time() < MAX_TX_TIMEOUT_RETRY_LIMIT { crosstarget_utils::time::sleep(TX_RETRY_BACKOFF).await; @@ -240,7 +242,7 @@ async fn execute_self_contained_with_retry( graph, serializer, query_schema.as_ref(), - trace_id, + traceparent, ) .await } @@ -251,17 +253,10 @@ async fn execute_in_tx<'a>( graph: QueryGraph, serializer: IrSerializer<'a>, query_schema: &'a QuerySchema, - trace_id: Option, + traceparent: Option, ) -> crate::Result { let mut tx = conn.start_transaction(None).await?; - let result = execute_on( - tx.as_connection_like(), - graph, - serializer, - query_schema, - trace_id.clone(), - ) - .await; + let result = execute_on(tx.as_connection_like(), graph, serializer, query_schema, traceparent).await; if result.is_ok() { tx.commit().await?; @@ -278,14 +273,14 @@ async fn execute_on<'a>( graph: QueryGraph, serializer: IrSerializer<'a>, query_schema: &'a QuerySchema, - trace_id: Option, + traceparent: Option, ) -> crate::Result { #[cfg(feature = "metrics")] increment_counter!(PRISMA_CLIENT_QUERIES_TOTAL); let interpreter = QueryInterpreter::new(conn); QueryPipeline::new(graph, interpreter, serializer) - .execute(query_schema, trace_id) + .execute(query_schema, traceparent) .await } diff --git a/query-engine/core/src/executor/interpreting_executor.rs b/query-engine/core/src/executor/interpreting_executor.rs index 0408361b766d..868034e4c1ca 100644 --- a/query-engine/core/src/executor/interpreting_executor.rs +++ b/query-engine/core/src/executor/interpreting_executor.rs @@ -1,13 +1,15 @@ use super::execute_operation::{execute_many_operations, execute_many_self_contained, execute_single_self_contained}; use super::request_context; +use crate::ITXManager; use crate::{ protocol::EngineProtocol, BatchDocumentTransaction, CoreError, Operation, QueryExecutor, ResponseData, - TransactionActorManager, TransactionError, TransactionManager, TransactionOptions, TxId, + TransactionError, TransactionManager, TransactionOptions, TxId, }; use async_trait::async_trait; use connector::Connector; use schema::QuerySchemaRef; +use telemetry::helpers::TraceParent; use tokio::time::Duration; use tracing_futures::Instrument; @@ -16,7 +18,7 @@ pub struct InterpretingExecutor { /// The loaded connector connector: C, - itx_manager: TransactionActorManager, + itx_manager: ITXManager, /// Flag that forces individual operations to run in a transaction. /// Does _not_ force batches to use transactions. @@ -31,7 +33,7 @@ where InterpretingExecutor { connector, force_transactions, - itx_manager: TransactionActorManager::new(), + itx_manager: ITXManager::new(), } } } @@ -48,25 +50,24 @@ where tx_id: Option, operation: Operation, query_schema: QuerySchemaRef, - trace_id: Option, + traceparent: Option, engine_protocol: EngineProtocol, ) -> crate::Result { - // If a Tx id is provided, execute on that one. Else execute normally as a single operation. - if let Some(tx_id) = tx_id { - self.itx_manager.execute(&tx_id, operation, trace_id).await - } else { - request_context::with_request_context(engine_protocol, async move { + request_context::with_request_context(engine_protocol, async move { + if let Some(tx_id) = tx_id { + self.itx_manager.execute(&tx_id, operation, traceparent).await + } else { execute_single_self_contained( &self.connector, query_schema, operation, - trace_id, + traceparent, self.force_transactions, ) .await - }) - .await - } + } + }) + .await } /// Executes a batch of operations. @@ -87,53 +88,50 @@ where operations: Vec, transaction: Option, query_schema: QuerySchemaRef, - trace_id: Option, + traceparent: Option, engine_protocol: EngineProtocol, ) -> crate::Result>> { - if let Some(tx_id) = tx_id { - let batch_isolation_level = transaction.and_then(|t| t.isolation_level()); - if batch_isolation_level.is_some() { - return Err(CoreError::UnsupportedFeatureError( - "Can not set batch isolation level within interactive transaction".into(), - )); - } - self.itx_manager.batch_execute(&tx_id, operations, trace_id).await - } else if let Some(transaction) = transaction { - let conn_span = info_span!( - "prisma:engine:connection", - user_facing = true, - "db.type" = self.connector.name(), - ); - let mut conn = self.connector.get_connection().instrument(conn_span).await?; - let mut tx = conn.start_transaction(transaction.isolation_level()).await?; - - let results = request_context::with_request_context( - engine_protocol, - execute_many_operations(query_schema, tx.as_connection_like(), &operations, trace_id), - ) - .await; - - if results.is_err() { - tx.rollback().await?; + request_context::with_request_context(engine_protocol, async move { + if let Some(tx_id) = tx_id { + let batch_isolation_level = transaction.and_then(|t| t.isolation_level()); + if batch_isolation_level.is_some() { + return Err(CoreError::UnsupportedFeatureError( + "Can not set batch isolation level within interactive transaction".into(), + )); + } + self.itx_manager.batch_execute(&tx_id, operations, traceparent).await + } else if let Some(transaction) = transaction { + let conn_span = info_span!( + "prisma:engine:connection", + user_facing = true, + "db.type" = self.connector.name(), + ); + let mut conn = self.connector.get_connection().instrument(conn_span).await?; + let mut tx = conn.start_transaction(transaction.isolation_level()).await?; + + let results = + execute_many_operations(query_schema, tx.as_connection_like(), &operations, traceparent).await; + + if results.is_err() { + tx.rollback().await?; + } else { + tx.commit().await?; + } + + results } else { - tx.commit().await?; - } - - results - } else { - request_context::with_request_context(engine_protocol, async move { execute_many_self_contained( &self.connector, query_schema, &operations, - trace_id, + traceparent, self.force_transactions, engine_protocol, ) .await - }) - .await - } + } + }) + .await } fn primary_connector(&self) -> &(dyn Connector + Send + Sync) { @@ -158,7 +156,6 @@ where let valid_for_millis = tx_opts.valid_for_millis; let id = tx_opts.new_tx_id.unwrap_or_default(); - trace!("[{}] Starting...", id); let conn_span = info_span!( "prisma:engine:connection", user_facing = true, @@ -180,23 +177,19 @@ where conn, isolation_level, Duration::from_millis(valid_for_millis), - engine_protocol, ) .await?; - debug!("[{}] Started.", id); Ok(id) }) .await } async fn commit_tx(&self, tx_id: TxId) -> crate::Result<()> { - trace!("[{}] Committing.", tx_id); self.itx_manager.commit_tx(&tx_id).await } async fn rollback_tx(&self, tx_id: TxId) -> crate::Result<()> { - trace!("[{}] Rolling back.", tx_id); self.itx_manager.rollback_tx(&tx_id).await } } diff --git a/query-engine/core/src/executor/mod.rs b/query-engine/core/src/executor/mod.rs index fee7bc68fe7b..280331c35397 100644 --- a/query-engine/core/src/executor/mod.rs +++ b/query-engine/core/src/executor/mod.rs @@ -14,6 +14,7 @@ mod request_context; pub use self::{execute_operation::*, interpreting_executor::InterpretingExecutor}; pub(crate) use request_context::*; +use telemetry::helpers::TraceParent; use crate::{ protocol::EngineProtocol, query_document::Operation, response_ir::ResponseData, schema::QuerySchemaRef, @@ -35,7 +36,7 @@ pub trait QueryExecutor: TransactionManager { tx_id: Option, operation: Operation, query_schema: QuerySchemaRef, - trace_id: Option, + traceparent: Option, engine_protocol: EngineProtocol, ) -> crate::Result; @@ -51,7 +52,7 @@ pub trait QueryExecutor: TransactionManager { operations: Vec, transaction: Option, query_schema: QuerySchemaRef, - trace_id: Option, + traceparent: Option, engine_protocol: EngineProtocol, ) -> crate::Result>>; @@ -90,7 +91,7 @@ impl TransactionOptions { /// Generates a new transaction id before the transaction is started and returns a modified version /// of self with the new predefined_id set. pub fn with_new_transaction_id(&mut self) -> TxId { - let tx_id: TxId = Default::default(); + let tx_id = TxId::default(); self.new_tx_id = Some(tx_id.clone()); tx_id } diff --git a/query-engine/core/src/executor/pipeline.rs b/query-engine/core/src/executor/pipeline.rs index 2193410a57e1..bd1ba73d5e8b 100644 --- a/query-engine/core/src/executor/pipeline.rs +++ b/query-engine/core/src/executor/pipeline.rs @@ -1,5 +1,6 @@ use crate::{Env, Expressionista, IrSerializer, QueryGraph, QueryInterpreter, ResponseData}; use schema::QuerySchema; +use telemetry::helpers::TraceParent; use tracing::Instrument; #[derive(Debug)] @@ -25,7 +26,7 @@ impl<'conn, 'schema> QueryPipeline<'conn, 'schema> { pub(crate) async fn execute( mut self, query_schema: &'schema QuerySchema, - trace_id: Option, + traceparent: Option, ) -> crate::Result { let serializer = self.serializer; let expr = Expressionista::translate(self.graph)?; @@ -34,7 +35,7 @@ impl<'conn, 'schema> QueryPipeline<'conn, 'schema> { let result = self .interpreter - .interpret(expr, Env::default(), 0, trace_id) + .interpret(expr, Env::default(), 0, traceparent) .instrument(span) .await; diff --git a/query-engine/core/src/interactive_transactions/actor_manager.rs b/query-engine/core/src/interactive_transactions/actor_manager.rs deleted file mode 100644 index e6c1c7fbd1dc..000000000000 --- a/query-engine/core/src/interactive_transactions/actor_manager.rs +++ /dev/null @@ -1,160 +0,0 @@ -use crate::{protocol::EngineProtocol, ClosedTx, Operation, ResponseData}; -use connector::Connection; -use crosstarget_utils::task::JoinHandle; -use lru::LruCache; -use once_cell::sync::Lazy; -use schema::QuerySchemaRef; -use std::{collections::HashMap, sync::Arc}; -use tokio::{ - sync::{ - mpsc::{channel, Sender}, - RwLock, - }, - time::Duration, -}; - -use super::{spawn_client_list_clear_actor, spawn_itx_actor, ITXClient, TransactionError, TxId}; - -pub static CLOSED_TX_CACHE_SIZE: Lazy = Lazy::new(|| match std::env::var("CLOSED_TX_CACHE_SIZE") { - Ok(size) => size.parse().unwrap_or(100), - Err(_) => 100, -}); - -static CHANNEL_SIZE: usize = 100; - -pub struct TransactionActorManager { - /// Map of active ITx clients - pub(crate) clients: Arc>>, - /// Cache of closed transactions. We keep the last N closed transactions in memory to - /// return better error messages if operations are performed on closed transactions. - pub(crate) closed_txs: Arc>>>, - /// Channel used to signal an ITx is closed and can be moved to the list of closed transactions. - send_done: Sender<(TxId, Option)>, - /// Handle to the task in charge of clearing actors. - /// Used to abort the task when the TransactionActorManager is dropped. - bg_reader_clear: JoinHandle<()>, -} - -impl Drop for TransactionActorManager { - fn drop(&mut self) { - self.bg_reader_clear.abort(); - } -} - -impl Default for TransactionActorManager { - fn default() -> Self { - Self::new() - } -} - -impl TransactionActorManager { - pub fn new() -> Self { - let clients = Arc::new(RwLock::new(HashMap::new())); - let closed_txs = Arc::new(RwLock::new(LruCache::new(*CLOSED_TX_CACHE_SIZE))); - - let (send_done, rx) = channel(CHANNEL_SIZE); - let handle = spawn_client_list_clear_actor(clients.clone(), closed_txs.clone(), rx); - - Self { - clients, - closed_txs, - send_done, - bg_reader_clear: handle, - } - } - - pub(crate) async fn create_tx( - &self, - query_schema: QuerySchemaRef, - tx_id: TxId, - conn: Box, - isolation_level: Option, - timeout: Duration, - engine_protocol: EngineProtocol, - ) -> crate::Result<()> { - let client = spawn_itx_actor( - query_schema.clone(), - tx_id.clone(), - conn, - isolation_level, - timeout, - CHANNEL_SIZE, - self.send_done.clone(), - engine_protocol, - ) - .await?; - - self.clients.write().await.insert(tx_id, client); - Ok(()) - } - - async fn get_client(&self, tx_id: &TxId, from_operation: &str) -> crate::Result { - if let Some(client) = self.clients.read().await.get(tx_id) { - Ok(client.clone()) - } else if let Some(closed_tx) = self.closed_txs.read().await.peek(tx_id) { - Err(TransactionError::Closed { - reason: match closed_tx { - Some(ClosedTx::Committed) => { - format!("A {from_operation} cannot be executed on a committed transaction") - } - Some(ClosedTx::RolledBack) => { - format!("A {from_operation} cannot be executed on a transaction that was rolled back") - } - Some(ClosedTx::Expired { start_time, timeout }) => { - format!( - "A {from_operation} cannot be executed on an expired transaction. \ - The timeout for this transaction was {} ms, however {} ms passed since the start \ - of the transaction. Consider increasing the interactive transaction timeout \ - or doing less work in the transaction", - timeout.as_millis(), - start_time.elapsed_time().as_millis(), - ) - } - None => { - error!("[{tx_id}] no details about closed transaction"); - format!("A {from_operation} cannot be executed on a closed transaction") - } - }, - } - .into()) - } else { - Err(TransactionError::NotFound.into()) - } - } - - pub async fn execute( - &self, - tx_id: &TxId, - operation: Operation, - traceparent: Option, - ) -> crate::Result { - let client = self.get_client(tx_id, "query").await?; - - client.execute(operation, traceparent).await - } - - pub async fn batch_execute( - &self, - tx_id: &TxId, - operations: Vec, - traceparent: Option, - ) -> crate::Result>> { - let client = self.get_client(tx_id, "batch query").await?; - - client.batch_execute(operations, traceparent).await - } - - pub async fn commit_tx(&self, tx_id: &TxId) -> crate::Result<()> { - let client = self.get_client(tx_id, "commit").await?; - client.commit().await?; - - Ok(()) - } - - pub async fn rollback_tx(&self, tx_id: &TxId) -> crate::Result<()> { - let client = self.get_client(tx_id, "rollback").await?; - client.rollback().await?; - - Ok(()) - } -} diff --git a/query-engine/core/src/interactive_transactions/actors.rs b/query-engine/core/src/interactive_transactions/actors.rs deleted file mode 100644 index 86ebd5c13b84..000000000000 --- a/query-engine/core/src/interactive_transactions/actors.rs +++ /dev/null @@ -1,425 +0,0 @@ -use super::{CachedTx, TransactionError, TxOpRequest, TxOpRequestMsg, TxOpResponse}; -use crate::{ - execute_many_operations, execute_single_operation, protocol::EngineProtocol, ClosedTx, Operation, ResponseData, - TxId, -}; -use connector::Connection; -use crosstarget_utils::task::{spawn, spawn_controlled, JoinHandle}; -use crosstarget_utils::time::ElapsedTimeCounter; -use schema::QuerySchemaRef; -use std::{collections::HashMap, sync::Arc}; -use tokio::{ - sync::{ - mpsc::{channel, Receiver, Sender}, - oneshot, RwLock, - }, - time::Duration, -}; -use tracing::Span; -use tracing_futures::Instrument; -use tracing_futures::WithSubscriber; - -#[cfg(feature = "metrics")] -use crate::telemetry::helpers::set_span_link_from_traceparent; - -#[derive(PartialEq)] -enum RunState { - Continue, - Finished, -} - -pub struct ITXServer<'a> { - id: TxId, - pub cached_tx: CachedTx<'a>, - pub timeout: Duration, - receive: Receiver, - query_schema: QuerySchemaRef, -} - -impl<'a> ITXServer<'a> { - pub fn new( - id: TxId, - tx: CachedTx<'a>, - timeout: Duration, - receive: Receiver, - query_schema: QuerySchemaRef, - ) -> Self { - Self { - id, - cached_tx: tx, - timeout, - receive, - query_schema, - } - } - - // RunState is used to tell if the run loop should continue - async fn process_msg(&mut self, op: TxOpRequest) -> RunState { - match op.msg { - TxOpRequestMsg::Single(ref operation, traceparent) => { - let result = self.execute_single(operation, traceparent).await; - let _ = op.respond_to.send(TxOpResponse::Single(result)); - RunState::Continue - } - TxOpRequestMsg::Batch(ref operations, traceparent) => { - let result = self.execute_batch(operations, traceparent).await; - let _ = op.respond_to.send(TxOpResponse::Batch(result)); - RunState::Continue - } - TxOpRequestMsg::Commit => { - let resp = self.commit().await; - let _ = op.respond_to.send(TxOpResponse::Committed(resp)); - RunState::Finished - } - TxOpRequestMsg::Rollback => { - let resp = self.rollback(false).await; - let _ = op.respond_to.send(TxOpResponse::RolledBack(resp)); - RunState::Finished - } - } - } - - async fn execute_single( - &mut self, - operation: &Operation, - traceparent: Option, - ) -> crate::Result { - let span = info_span!("prisma:engine:itx_query_builder", user_facing = true); - - #[cfg(feature = "metrics")] - set_span_link_from_traceparent(&span, traceparent.clone()); - - let conn = self.cached_tx.as_open()?; - execute_single_operation( - self.query_schema.clone(), - conn.as_connection_like(), - operation, - traceparent, - ) - .instrument(span) - .await - } - - async fn execute_batch( - &mut self, - operations: &[Operation], - traceparent: Option, - ) -> crate::Result>> { - let span = info_span!("prisma:engine:itx_execute", user_facing = true); - - let conn = self.cached_tx.as_open()?; - execute_many_operations( - self.query_schema.clone(), - conn.as_connection_like(), - operations, - traceparent, - ) - .instrument(span) - .await - } - - pub(crate) async fn commit(&mut self) -> crate::Result<()> { - if let CachedTx::Open(_) = self.cached_tx { - let open_tx = self.cached_tx.as_open()?; - trace!("[{}] committing.", self.id.to_string()); - open_tx.commit().await?; - self.cached_tx = CachedTx::Committed; - } - - Ok(()) - } - - pub(crate) async fn rollback(&mut self, was_timeout: bool) -> crate::Result<()> { - debug!("[{}] rolling back, was timed out = {was_timeout}", self.name()); - if let CachedTx::Open(_) = self.cached_tx { - let open_tx = self.cached_tx.as_open()?; - open_tx.rollback().await?; - if was_timeout { - trace!("[{}] Expired Rolling back", self.id.to_string()); - self.cached_tx = CachedTx::Expired; - } else { - self.cached_tx = CachedTx::RolledBack; - trace!("[{}] Rolling back", self.id.to_string()); - } - } - - Ok(()) - } - - pub(crate) fn name(&self) -> String { - format!("itx-{:?}", self.id.to_string()) - } -} - -#[derive(Clone)] -pub struct ITXClient { - send: Sender, - tx_id: TxId, -} - -impl ITXClient { - pub(crate) async fn commit(&self) -> crate::Result<()> { - let msg = self.send_and_receive(TxOpRequestMsg::Commit).await?; - - if let TxOpResponse::Committed(resp) = msg { - debug!("[{}] COMMITTED {:?}", self.tx_id, resp); - resp - } else { - Err(self.handle_error(msg).into()) - } - } - - pub(crate) async fn rollback(&self) -> crate::Result<()> { - let msg = self.send_and_receive(TxOpRequestMsg::Rollback).await?; - - if let TxOpResponse::RolledBack(resp) = msg { - resp - } else { - Err(self.handle_error(msg).into()) - } - } - - pub async fn execute(&self, operation: Operation, traceparent: Option) -> crate::Result { - let msg_req = TxOpRequestMsg::Single(operation, traceparent); - let msg = self.send_and_receive(msg_req).await?; - - if let TxOpResponse::Single(resp) = msg { - resp - } else { - Err(self.handle_error(msg).into()) - } - } - - pub(crate) async fn batch_execute( - &self, - operations: Vec, - traceparent: Option, - ) -> crate::Result>> { - let msg_req = TxOpRequestMsg::Batch(operations, traceparent); - - let msg = self.send_and_receive(msg_req).await?; - - if let TxOpResponse::Batch(resp) = msg { - resp - } else { - Err(self.handle_error(msg).into()) - } - } - - async fn send_and_receive(&self, msg: TxOpRequestMsg) -> Result { - let (receiver, req) = self.create_receive_and_req(msg); - if let Err(err) = self.send.send(req).await { - debug!("channel send error {err}"); - return Err(TransactionError::Closed { - reason: "Could not perform operation".to_string(), - } - .into()); - } - - match receiver.await { - Ok(resp) => Ok(resp), - Err(_err) => Err(TransactionError::Closed { - reason: "Could not perform operation".to_string(), - } - .into()), - } - } - - fn create_receive_and_req(&self, msg: TxOpRequestMsg) -> (oneshot::Receiver, TxOpRequest) { - let (send, rx) = oneshot::channel::(); - let request = TxOpRequest { msg, respond_to: send }; - (rx, request) - } - - fn handle_error(&self, msg: TxOpResponse) -> TransactionError { - match msg { - TxOpResponse::Committed(..) => { - let reason = "Transaction is no longer valid. Last state: 'Committed'".to_string(); - TransactionError::Closed { reason } - } - TxOpResponse::RolledBack(..) => { - let reason = "Transaction is no longer valid. Last state: 'RolledBack'".to_string(); - TransactionError::Closed { reason } - } - other => { - error!("Unexpected iTx response, {}", other); - let reason = format!("response '{other}'"); - TransactionError::Closed { reason } - } - } - } -} - -#[allow(clippy::too_many_arguments)] -pub(crate) async fn spawn_itx_actor( - query_schema: QuerySchemaRef, - tx_id: TxId, - mut conn: Box, - isolation_level: Option, - timeout: Duration, - channel_size: usize, - send_done: Sender<(TxId, Option)>, - engine_protocol: EngineProtocol, -) -> crate::Result { - let span = Span::current(); - let tx_id_str = tx_id.to_string(); - span.record("itx_id", tx_id_str.as_str()); - let dispatcher = crate::get_current_dispatcher(); - - let (tx_to_server, rx_from_client) = channel::(channel_size); - let client = ITXClient { - send: tx_to_server, - tx_id: tx_id.clone(), - }; - let (open_transaction_send, open_transaction_rcv) = oneshot::channel(); - - spawn( - crate::executor::with_request_context(engine_protocol, async move { - // We match on the result in order to send the error to the parent task and abort this - // task, on error. This is a separate task (actor), not a function where we can just bubble up the - // result. - let c_tx = match conn.start_transaction(isolation_level).await { - Ok(c_tx) => { - open_transaction_send.send(Ok(())).unwrap(); - c_tx - } - Err(err) => { - open_transaction_send.send(Err(err)).unwrap(); - return; - } - }; - - let mut server = ITXServer::new( - tx_id.clone(), - CachedTx::Open(c_tx), - timeout, - rx_from_client, - query_schema, - ); - - let start_time = ElapsedTimeCounter::start(); - let sleep = crosstarget_utils::time::sleep(timeout); - tokio::pin!(sleep); - - loop { - tokio::select! { - _ = &mut sleep => { - trace!("[{}] interactive transaction timed out", server.id.to_string()); - let _ = server.rollback(true).await; - break; - } - msg = server.receive.recv() => { - if let Some(op) = msg { - let run_state = server.process_msg(op).await; - - if run_state == RunState::Finished { - break - } - } else { - break; - } - } - } - } - - trace!("[{}] completed with {}", server.id.to_string(), server.cached_tx); - - let _ = send_done - .send(( - server.id.clone(), - server.cached_tx.to_closed(start_time, server.timeout), - )) - .await; - - trace!("[{}] has stopped with {}", server.id.to_string(), server.cached_tx); - }) - .instrument(span) - .with_subscriber(dispatcher), - ); - - open_transaction_rcv.await.unwrap()?; - - Ok(client) -} - -/// Spawn the client list clear actor -/// It waits for messages from completed ITXServers and removes -/// the ITXClient from the clients hashmap - -/* A future improvement to this would be to change this to keep a queue of - clients to remove from the list and then periodically remove them. This - would be a nice optimization because we would take less write locks on the - hashmap. - - The downside to consider is that we can introduce a race condition where the - ITXServer has stopped running but the client hasn't been removed from the hashmap - yet. When the client tries to send a message to the ITXServer there will be a - send error. This isn't a huge obstacle but something to handle correctly. - And example implementation for this would be: - - ``` - let mut queue: Vec = Vec::new(); - - let sleep_duration = Duration::from_millis(100); - let clear_sleeper = time::sleep(sleep_duration); - tokio::pin!(clear_sleeper); - - loop { - tokio::select! { - _ = &mut clear_sleeper => { - let mut list = clients.write().await; - for id in queue.drain(..) { - trace!("removing {} from client list", id); - list.remove(&id); - } - clear_sleeper.as_mut().reset(Instant::now() + sleep_duration); - } - msg = rx.recv() => { - if let Some(id) = msg { - queue.push(id); - } - } - } - } - ``` -*/ -pub(crate) fn spawn_client_list_clear_actor( - clients: Arc>>, - closed_txs: Arc>>>, - mut rx: Receiver<(TxId, Option)>, -) -> JoinHandle<()> { - // Note: tasks implemented via loops cannot be cancelled implicitly, so we need to spawn them in a - // "controlled" way, via `spawn_controlled`. - // The `rx_exit` receiver is used to signal the loop to exit, and that signal is emitted whenever - // the task is aborted (likely, due to the engine shutting down and cleaning up the allocated resources). - spawn_controlled(Box::new( - |mut rx_exit: tokio::sync::broadcast::Receiver<()>| async move { - loop { - tokio::select! { - result = rx.recv() => { - match result { - Some((id, closed_tx)) => { - trace!("removing {} from client list", id); - - let mut clients_guard = clients.write().await; - - clients_guard.remove(&id); - drop(clients_guard); - - closed_txs.write().await.put(id, closed_tx); - } - None => { - // the `rx` channel is closed. - tracing::error!("rx channel is closed!"); - break; - } - } - }, - _ = rx_exit.recv() => { - break; - }, - } - } - }, - )) -} diff --git a/query-engine/core/src/interactive_transactions/manager.rs b/query-engine/core/src/interactive_transactions/manager.rs new file mode 100644 index 000000000000..69be81dd5cbd --- /dev/null +++ b/query-engine/core/src/interactive_transactions/manager.rs @@ -0,0 +1,190 @@ +use crate::{ClosedTransaction, InteractiveTransaction, Operation, ResponseData}; +use connector::Connection; +use lru::LruCache; +use once_cell::sync::Lazy; +use schema::QuerySchemaRef; +use std::{collections::HashMap, sync::Arc}; +use telemetry::helpers::TraceParent; +use tokio::{ + sync::{ + mpsc::{unbounded_channel, UnboundedSender}, + RwLock, + }, + time::Duration, +}; + +use super::{TransactionError, TxId}; + +pub static CLOSED_TX_CACHE_SIZE: Lazy = Lazy::new(|| match std::env::var("CLOSED_TX_CACHE_SIZE") { + Ok(size) => size.parse().unwrap_or(100), + Err(_) => 100, +}); + +pub struct ITXManager { + /// Stores all current transactions (some of them might be already committed/expired/rolled back). + /// + /// There are two tiers of locks here: + /// 1. Lock on the entire hashmap. This *must* be taken only for short periods of time - for + /// example to insert/delete transaction or to clone transaction inside. + /// 2. Lock on the individual transactions. This one can be taken for prolonged periods of time - for + /// example to perform an I/O operation. + /// + /// The rationale behind this design is to make shared path (lock on the entire hashmap) as free + /// from contention as possible. Individual transactions are not capable of concurrency, so + /// taking a lock on them to serialise operations is acceptable. + /// + /// Note that since we clone transaction from the shared hashmap to perform operations on it, it + /// is possible to end up in a situation where we cloned the transaction, but it was then + /// immediately removed by the background task from the common hashmap. In this case, either + /// our operation will be first or the background cleanup task will be first. Both cases are + /// an acceptable outcome. + transactions: Arc>>>>, + + /// Cache of closed transactions. We keep the last N closed transactions in memory to + /// return better error messages if operations are performed on closed transactions. + closed_txs: Arc>>, + + /// Sender part of the channel to which transaction id is sent when the timeout of the + /// transaction expires. + timeout_sender: UnboundedSender, +} + +impl ITXManager { + pub fn new() -> Self { + let transactions: Arc>>>> = + Arc::new(RwLock::new(HashMap::default())); + let closed_txs = Arc::new(RwLock::new(LruCache::new(*CLOSED_TX_CACHE_SIZE))); + let (timeout_sender, mut timeout_receiver) = unbounded_channel(); + + // This task rollbacks and removes any open transactions with expired timeouts from the + // `self.transactions`. It also removes any closed transactions to avoid `self.transactions` + // growing infinitely in size over time. + // Note that this task automatically exits when all transactions finish and the `ITXManager` + // is dropped, because that causes the `timeout_receiver` to become closed. + crosstarget_utils::task::spawn({ + let transactions = transactions.clone(); + let closed_txs = closed_txs.clone(); + async move { + while let Some(tx_id) = timeout_receiver.recv().await { + let transaction_entry = match transactions.write().await.remove(&tx_id) { + Some(transaction_entry) => transaction_entry, + None => { + // Transaction was committed or rolled back already. + continue; + } + }; + let mut transaction = transaction_entry.write().await; + + // If transaction was already committed, rollback will error. + let _ = transaction.rollback(true).await; + + let closed_tx = transaction + .as_closed() + .expect("transaction must be closed after rollback"); + + closed_txs.write().await.put(tx_id, closed_tx); + } + } + }); + + Self { + transactions, + closed_txs, + timeout_sender, + } + } + + pub async fn create_tx( + &self, + query_schema: QuerySchemaRef, + tx_id: TxId, + conn: Box, + isolation_level: Option, + timeout: Duration, + ) -> crate::Result<()> { + // This task notifies the task spawned in `new()` method that the timeout for this + // transaction has expired. + crosstarget_utils::task::spawn({ + let timeout_sender = self.timeout_sender.clone(); + let tx_id = tx_id.clone(); + async move { + crosstarget_utils::time::sleep(timeout).await; + timeout_sender.send(tx_id).expect("receiver must exist"); + } + }); + + let transaction = + InteractiveTransaction::new(tx_id.clone(), conn, timeout, query_schema, isolation_level).await?; + + self.transactions + .write() + .await + .insert(tx_id, Arc::new(RwLock::new(transaction))); + Ok(()) + } + + async fn get_transaction( + &self, + tx_id: &TxId, + from_operation: &str, + ) -> crate::Result>> { + if let Some(transaction) = self.transactions.read().await.get(tx_id) { + Ok(transaction.clone()) + } else { + Err(if let Some(closed_tx) = self.closed_txs.read().await.peek(tx_id) { + TransactionError::Closed { + reason: closed_tx.error_message_for(from_operation), + } + .into() + } else { + TransactionError::NotFound.into() + }) + } + } + + pub async fn execute( + &self, + tx_id: &TxId, + operation: Operation, + traceparent: Option, + ) -> crate::Result { + self.get_transaction(tx_id, "query") + .await? + .write() + .await + .execute_single(&operation, traceparent) + .await + } + + pub async fn batch_execute( + &self, + tx_id: &TxId, + operations: Vec, + traceparent: Option, + ) -> crate::Result>> { + self.get_transaction(tx_id, "batch query") + .await? + .write() + .await + .execute_batch(&operations, traceparent) + .await + } + + pub async fn commit_tx(&self, tx_id: &TxId) -> crate::Result<()> { + self.get_transaction(tx_id, "commit") + .await? + .write() + .await + .commit() + .await + } + + pub async fn rollback_tx(&self, tx_id: &TxId) -> crate::Result<()> { + self.get_transaction(tx_id, "rollback") + .await? + .write() + .await + .rollback(false) + .await + } +} diff --git a/query-engine/core/src/interactive_transactions/messages.rs b/query-engine/core/src/interactive_transactions/messages.rs deleted file mode 100644 index 0dba2c096a8a..000000000000 --- a/query-engine/core/src/interactive_transactions/messages.rs +++ /dev/null @@ -1,46 +0,0 @@ -use crate::{Operation, ResponseData}; -use std::fmt::Display; -use tokio::sync::oneshot; - -#[derive(Debug)] -pub enum TxOpRequestMsg { - Commit, - Rollback, - Single(Operation, Option), - Batch(Vec, Option), -} - -pub struct TxOpRequest { - pub msg: TxOpRequestMsg, - pub respond_to: oneshot::Sender, -} - -impl Display for TxOpRequest { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.msg { - TxOpRequestMsg::Commit => write!(f, "Commit"), - TxOpRequestMsg::Rollback => write!(f, "Rollback"), - TxOpRequestMsg::Single(..) => write!(f, "Single"), - TxOpRequestMsg::Batch(..) => write!(f, "Batch"), - } - } -} - -#[derive(Debug)] -pub enum TxOpResponse { - Committed(crate::Result<()>), - RolledBack(crate::Result<()>), - Single(crate::Result), - Batch(crate::Result>>), -} - -impl Display for TxOpResponse { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Committed(..) => write!(f, "Committed"), - Self::RolledBack(..) => write!(f, "RolledBack"), - Self::Single(..) => write!(f, "Single"), - Self::Batch(..) => write!(f, "Batch"), - } - } -} diff --git a/query-engine/core/src/interactive_transactions/mod.rs b/query-engine/core/src/interactive_transactions/mod.rs index a0aed069a879..03894882f1d0 100644 --- a/query-engine/core/src/interactive_transactions/mod.rs +++ b/query-engine/core/src/interactive_transactions/mod.rs @@ -1,48 +1,59 @@ -use crate::CoreError; -use connector::Transaction; -use crosstarget_utils::time::ElapsedTimeCounter; +use derive_more::Display; +use opentelemetry::trace::{SpanId, TraceId}; use serde::Deserialize; -use std::fmt::Display; -use tokio::time::Duration; -mod actor_manager; -mod actors; +use telemetry::helpers::TraceParent; + mod error; -mod messages; +mod manager; +mod transaction; pub use error::*; -pub(crate) use actor_manager::*; -pub(crate) use actors::*; -pub(crate) use messages::*; - -/// How Interactive Transactions work -/// The Interactive Transactions (iTx) follow an actor model design. Where each iTx is created in its own process. -/// When a prisma client requests to start a new transaction, the Transaction Actor Manager spawns a new ITXServer. The ITXServer runs in its own -/// process and waits for messages to arrive via its receive channel to process. -/// The Transaction Actor Manager will also create an ITXClient and add it to hashmap managed by an RwLock. The ITXClient is the only way to communicate -/// with the ITXServer. -/// -/// Once Prisma Client receives the iTx Id it can perform database operations using that iTx id. When an operation request is received by the -/// TransactionActorManager, it looks for the client in the hashmap and passes the operation to the client. The ITXClient sends a message to the -/// ITXServer and waits for a response. The ITXServer will then perform the operation and return the result. The ITXServer will perform one -/// operation at a time. All other operations will sit in the message queue waiting to be processed. -/// -/// The ITXServer will handle all messages until: -/// - It transitions state, e.g "rollback" or "commit" -/// - It exceeds its timeout, in which case the iTx is rolledback and the connection to the database is closed. -/// -/// Once the ITXServer is done handling messages from the iTx Client, it sends a last message to the Background Client list Actor to say that it is completed and then shuts down. -/// The Background Client list Actor removes the client from the list of active clients and keeps in cache the iTx id of the closed transaction. -/// -/// We keep a list of closed transactions so that if any further messages are received for this iTx id, -/// the TransactionActorManager can reply with a helpful error message which explains that no operation can be performed on a closed transaction -/// rather than an error message stating that the transaction does not exist. - -#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize)] +pub(crate) use manager::*; +pub(crate) use transaction::*; + +#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Display)] +#[display(fmt = "{}", _0)] pub struct TxId(String); -const MINIMUM_TX_ID_LENGTH: usize = 24; +impl TxId { + /// This method, as well as `as_span_id`, are intentionally private because it is very easy to + /// misuse them. Both are used to provide deterministic trace_id and span_id derived from the + /// transaction id. Both rely on the fact that transaction id is a valid cuid. + fn as_trace_id(&self) -> TraceId { + let mut buffer = [0; 16]; + let tx_id = self.0.as_bytes(); + let len = tx_id.len(); + + // The first byte of CUID is always letter 'c'. Next 8 bytes after that represent timestamp + // in milliseconds. Next 4 bytes after that represent the counter. + // We take last 4 bytes of the timestamp and combine it with the counter. + buffer[0..8].copy_from_slice(&tx_id[(1 + 4)..(1 + 4 + 8)]); + // Last 8 bytes of cuid are totally random. + buffer[8..].copy_from_slice(&tx_id[len - 8..]); + + TraceId::from_bytes(buffer) + } + + fn as_span_id(&self) -> SpanId { + let mut buffer = [0; 8]; + let tx_id = self.0.as_bytes(); + let len = tx_id.len(); + + // Last 8 bytes of cuid are totally random. + buffer[..].copy_from_slice(&tx_id[len - 8..]); + + SpanId::from_bytes(buffer) + } + + /// Creates new artificial `TraceParent`. The span corresponding to this traceparent is never + /// emitted. Same transaction id is guaranteed to have traceparent with the same trace_id and + /// span_id. + pub fn as_traceparent(&self) -> TraceParent { + TraceParent::new_unsafe(self.as_trace_id(), self.as_span_id()) + } +} impl Default for TxId { fn default() -> Self { @@ -56,9 +67,11 @@ where T: Into, { fn from(s: T) -> Self { + const MINIMUM_TX_ID_LENGTH: usize = 24; + let contents = s.into(); // This postcondition is to ensure that the TxId is long enough as to be able to derive - // a TraceId from it. + // a TraceId from it. See `TxTraceExt` trait for more details. assert!( contents.len() >= MINIMUM_TX_ID_LENGTH, "minimum length for a TxId ({}) is {}, but was {}", @@ -70,56 +83,28 @@ where } } -impl Display for TxId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - Display::fmt(&self.0, f) - } -} - -pub enum CachedTx<'a> { - Open(Box), - Committed, - RolledBack, - Expired, -} - -impl Display for CachedTx<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - CachedTx::Open(_) => f.write_str("Open"), - CachedTx::Committed => f.write_str("Committed"), - CachedTx::RolledBack => f.write_str("Rolled back"), - CachedTx::Expired => f.write_str("Expired"), - } - } -} +#[cfg(test)] +mod test { + use super::*; -impl<'a> CachedTx<'a> { - /// Requires this cached TX to be `Open`, else an error will be raised that it is no longer valid. - pub(crate) fn as_open(&mut self) -> crate::Result<&mut Box> { - if let Self::Open(ref mut otx) = self { - Ok(otx) - } else { - let reason = format!("Transaction is no longer valid. Last state: '{self}'"); - Err(CoreError::from(TransactionError::Closed { reason })) - } - } + #[test] + fn test_txid_into_traceid() { + let fixture = vec![ + ("clct0q6ma0000rb04768tiqbj", "71366d6130303030373638746971626a"), + // counter changed, trace id changed: + ("clct0q6ma0002rb04cpa6zkmx", "71366d6130303032637061367a6b6d78"), + // fingerprint changed, trace id did not change, as that chunk is ignored: + ("clct0q6ma00020000cpa6zkmx", "71366d6130303032637061367a6b6d78"), + // first 5 bytes changed, trace id did not change, as that chunk is ignored: + ("00000q6ma00020000cpa6zkmx", "71366d6130303032637061367a6b6d78"), + // 6 th byte changed, trace id changed, as that chunk is part of the lsb of the timestamp + ("0000006ma00020000cpa6zkmx", "30366d6130303032637061367a6b6d78"), + ]; - pub(crate) fn to_closed(&self, start_time: ElapsedTimeCounter, timeout: Duration) -> Option { - match self { - CachedTx::Open(_) => None, - CachedTx::Committed => Some(ClosedTx::Committed), - CachedTx::RolledBack => Some(ClosedTx::RolledBack), - CachedTx::Expired => Some(ClosedTx::Expired { start_time, timeout }), + for (txid, expected_trace_id) in fixture { + let txid: TxId = txid.into(); + let trace_id: opentelemetry::trace::TraceId = txid.as_trace_id(); + assert_eq!(trace_id.to_string(), expected_trace_id); } } } - -pub(crate) enum ClosedTx { - Committed, - RolledBack, - Expired { - start_time: ElapsedTimeCounter, - timeout: Duration, - }, -} diff --git a/query-engine/core/src/interactive_transactions/transaction.rs b/query-engine/core/src/interactive_transactions/transaction.rs new file mode 100644 index 000000000000..43428564477b --- /dev/null +++ b/query-engine/core/src/interactive_transactions/transaction.rs @@ -0,0 +1,252 @@ +#![allow(unsafe_code)] + +use std::pin::Pin; + +use crate::{ + execute_many_operations, execute_single_operation, CoreError, Operation, ResponseData, TransactionError, TxId, +}; +use connector::{Connection, Transaction}; +use crosstarget_utils::time::ElapsedTimeCounter; +use schema::QuerySchemaRef; +use telemetry::helpers::TraceParent; +use tokio::time::Duration; +use tracing::Span; +use tracing_futures::Instrument; + +// Note: it's important to maintain the correct state of the transaction throughout execution. If +// the transaction is ever left in the `Open` state after rollback or commit operations, it means +// that the corresponding connection will never be returned to the connection pool. +enum TransactionState { + Open { + // Note: field order is important here because fields are dropped in the declaration order. + // First, we drop the `tx`, which may reference `_conn`. Only after that we drop `_conn`. + tx: Box, + _conn: Pin>, + }, + Committed, + RolledBack, + Expired { + start_time: ElapsedTimeCounter, + timeout: Duration, + }, +} + +pub enum ClosedTransaction { + Committed, + RolledBack, + Expired { + start_time: ElapsedTimeCounter, + timeout: Duration, + }, +} + +impl ClosedTransaction { + pub fn error_message_for(&self, operation: &str) -> String { + match self { + ClosedTransaction::Committed => { + format!("A {operation} cannot be executed on a committed transaction") + } + ClosedTransaction::RolledBack => { + format!("A {operation} cannot be executed on a transaction that was rolled back") + } + ClosedTransaction::Expired { start_time, timeout } => { + format!( + "A {operation} cannot be executed on an expired transaction. \ + The timeout for this transaction was {} ms, however {} ms passed since the start \ + of the transaction. Consider increasing the interactive transaction timeout \ + or doing less work in the transaction", + timeout.as_millis(), + start_time.elapsed_time().as_millis(), + ) + } + } + } +} + +impl TransactionState { + async fn start_transaction( + conn: Box, + isolation_level: Option, + ) -> crate::Result { + // Note: This method creates a self-referential struct, which is why we need unsafe. Field + // `tx` is referencing field `conn` in the `Self::Open` variant. + let mut conn = Box::into_pin(conn); + + // SAFETY: We do not move out of `conn`. + let conn_mut: &mut (dyn Connection + Send + Sync) = unsafe { conn.as_mut().get_unchecked_mut() }; + + // This creates a transaction, which borrows from the connection. + let tx_borrowed_from_conn: Box = conn_mut.start_transaction(isolation_level).await?; + + // SAFETY: This transmute only erases the lifetime from `conn_mut`. Normally, borrow checker + // guarantees that the borrowed value is not dropped. In this case, we guarantee ourselves + // through the use of `Pin` on the connection. + let tx_with_erased_lifetime: Box = + unsafe { std::mem::transmute(tx_borrowed_from_conn) }; + + Ok(Self::Open { + tx: tx_with_erased_lifetime, + _conn: conn, + }) + } + + fn as_open(&mut self, from_operation: &str) -> crate::Result<&mut Box> { + match self { + Self::Open { tx, .. } => Ok(tx), + tx => Err(CoreError::from(TransactionError::Closed { + reason: tx.as_closed().unwrap().error_message_for(from_operation), + })), + } + } + + fn as_closed(&self) -> Option { + match self { + Self::Open { .. } => None, + Self::Committed => Some(ClosedTransaction::Committed), + Self::RolledBack => Some(ClosedTransaction::RolledBack), + Self::Expired { start_time, timeout } => Some(ClosedTransaction::Expired { + start_time: *start_time, + timeout: *timeout, + }), + } + } +} + +pub struct InteractiveTransaction { + id: TxId, + state: TransactionState, + start_time: ElapsedTimeCounter, + timeout: Duration, + query_schema: QuerySchemaRef, +} + +/// This macro executes the future until it's ready or the transaction's timeout expires. +macro_rules! tx_timeout { + ($self:expr, $operation:expr, $fut:expr) => {{ + let remaining_time = $self + .timeout + .checked_sub($self.start_time.elapsed_time()) + .unwrap_or(Duration::ZERO); + tokio::select! { + _ = crosstarget_utils::time::sleep(remaining_time) => { + let _ = $self.rollback(true).await; + Err(TransactionError::Closed { + reason: $self.as_closed().unwrap().error_message_for($operation), + }.into()) + } + result = $fut => { + result + } + } + }}; +} + +impl InteractiveTransaction { + pub async fn new( + id: TxId, + conn: Box, + timeout: Duration, + query_schema: QuerySchemaRef, + isolation_level: Option, + ) -> crate::Result { + Span::current().record("itx_id", id.to_string()); + + Ok(Self { + id, + state: TransactionState::start_transaction(conn, isolation_level).await?, + start_time: ElapsedTimeCounter::start(), + timeout, + query_schema, + }) + } + + pub async fn execute_single( + &mut self, + operation: &Operation, + traceparent: Option, + ) -> crate::Result { + tx_timeout!(self, "query", async { + let conn = self.state.as_open("query")?; + execute_single_operation( + self.query_schema.clone(), + conn.as_connection_like(), + operation, + traceparent, + ) + .instrument(info_span!("prisma:engine:itx_execute_single", user_facing = true)) + .await + }) + } + + pub async fn execute_batch( + &mut self, + operations: &[Operation], + traceparent: Option, + ) -> crate::Result>> { + tx_timeout!(self, "batch query", async { + let conn = self.state.as_open("batch query")?; + execute_many_operations( + self.query_schema.clone(), + conn.as_connection_like(), + operations, + traceparent, + ) + .instrument(info_span!("prisma:engine:itx_execute_batch", user_facing = true)) + .await + }) + } + + pub async fn commit(&mut self) -> crate::Result<()> { + tx_timeout!(self, "commit", async { + let name = self.name(); + let conn = self.state.as_open("commit")?; + let span = info_span!("prisma:engine:itx_commit", user_facing = true); + + if let Err(err) = conn.commit().instrument(span).await { + error!(?err, ?name, "transaction failed to commit"); + // We don't know if the transaction was committed or not. Because of that, we cannot + // leave it in "open" state. We attempt to rollback to get the transaction into a + // known state. + let _ = self.rollback(false).await; + Err(err.into()) + } else { + debug!(?name, "transaction committed"); + self.state = TransactionState::Committed; + Ok(()) + } + }) + } + + pub async fn rollback(&mut self, was_timeout: bool) -> crate::Result<()> { + let name = self.name(); + let conn = self.state.as_open("rollback")?; + let span = info_span!("prisma:engine:itx_rollback", user_facing = true); + + let result = conn.rollback().instrument(span).await; + if let Err(err) = &result { + error!(?err, ?was_timeout, ?name, "transaction failed to roll back"); + } else { + debug!(?was_timeout, ?name, "transaction rolled back"); + } + + // Ensure that the transaction isn't left in the "open" state after the rollback. + if was_timeout { + self.state = TransactionState::Expired { + start_time: self.start_time, + timeout: self.timeout, + }; + } else { + self.state = TransactionState::RolledBack; + } + + result.map_err(<_>::into) + } + + pub fn as_closed(&self) -> Option { + self.state.as_closed() + } + + pub fn name(&self) -> String { + format!("itx-{}", self.id) + } +} diff --git a/query-engine/core/src/interpreter/interpreter_impl.rs b/query-engine/core/src/interpreter/interpreter_impl.rs index 012bbc953b1a..e25d157b7ada 100644 --- a/query-engine/core/src/interpreter/interpreter_impl.rs +++ b/query-engine/core/src/interpreter/interpreter_impl.rs @@ -8,6 +8,7 @@ use connector::ConnectionLike; use futures::future::BoxFuture; use query_structure::prelude::*; use std::{collections::HashMap, fmt}; +use telemetry::helpers::TraceParent; use tracing::Instrument; #[derive(Debug, Clone)] @@ -178,7 +179,7 @@ impl<'conn> QueryInterpreter<'conn> { exp: Expression, env: Env, level: usize, - trace_id: Option, + traceparent: Option, ) -> BoxFuture<'_, InterpretationResult> { match exp { Expression::Func { func } => { @@ -186,7 +187,7 @@ impl<'conn> QueryInterpreter<'conn> { Box::pin(async move { self.log_line(level, || "execute {"); - let result = self.interpret(expr?, env, level + 1, trace_id).await; + let result = self.interpret(expr?, env, level + 1, traceparent).await; self.log_line(level, || "}"); result }) @@ -204,7 +205,7 @@ impl<'conn> QueryInterpreter<'conn> { let mut results = Vec::with_capacity(seq.len()); for expr in seq { - results.push(self.interpret(expr, env.clone(), level + 1, trace_id.clone()).await?); + results.push(self.interpret(expr, env.clone(), level + 1, traceparent).await?); self.log_line(level + 1, || ","); } @@ -227,7 +228,7 @@ impl<'conn> QueryInterpreter<'conn> { self.log_line(level + 1, || format!("{} = {{", &binding.name)); let result = self - .interpret(binding.expr, env.clone(), level + 2, trace_id.clone()) + .interpret(binding.expr, env.clone(), level + 2, traceparent) .await?; inner_env.insert(binding.name, result); @@ -242,7 +243,7 @@ impl<'conn> QueryInterpreter<'conn> { }; self.log_line(level, || "in {"); - let result = self.interpret(next_expression, inner_env, level + 1, trace_id).await; + let result = self.interpret(next_expression, inner_env, level + 1, traceparent).await; self.log_line(level, || "}"); result }) @@ -253,7 +254,7 @@ impl<'conn> QueryInterpreter<'conn> { Query::Read(read) => { self.log_line(level, || format!("readExecute {read}")); let span = info_span!("prisma:engine:read-execute"); - Ok(read::execute(self.conn, read, None, trace_id) + Ok(read::execute(self.conn, read, None, traceparent) .instrument(span) .await .map(ExpressionResult::Query)?) @@ -262,7 +263,7 @@ impl<'conn> QueryInterpreter<'conn> { Query::Write(write) => { self.log_line(level, || format!("writeExecute {write}")); let span = info_span!("prisma:engine:write-execute"); - Ok(write::execute(self.conn, write, trace_id) + Ok(write::execute(self.conn, write, traceparent) .instrument(span) .await .map(ExpressionResult::Query)?) @@ -297,10 +298,10 @@ impl<'conn> QueryInterpreter<'conn> { self.log_line(level, || format!("if = {predicate} {{")); let result = if predicate { - self.interpret(Expression::Sequence { seq: then }, env, level + 1, trace_id) + self.interpret(Expression::Sequence { seq: then }, env, level + 1, traceparent) .await } else { - self.interpret(Expression::Sequence { seq: elze }, env, level + 1, trace_id) + self.interpret(Expression::Sequence { seq: elze }, env, level + 1, traceparent) .await }; self.log_line(level, || "}"); diff --git a/query-engine/core/src/interpreter/query_interpreters/nested_read.rs b/query-engine/core/src/interpreter/query_interpreters/nested_read.rs index 790728104fd3..95e5945c18dc 100644 --- a/query-engine/core/src/interpreter/query_interpreters/nested_read.rs +++ b/query-engine/core/src/interpreter/query_interpreters/nested_read.rs @@ -3,12 +3,13 @@ use crate::{interpreter::InterpretationResult, query_ast::*}; use connector::ConnectionLike; use query_structure::*; use std::collections::HashMap; +use telemetry::helpers::TraceParent; pub(crate) async fn m2m( tx: &mut dyn ConnectionLike, query: &mut RelatedRecordsQuery, parent_result: Option<&ManyRecords>, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { let processor = InMemoryRecordProcessor::new_from_query_args(&mut query.args); @@ -31,7 +32,7 @@ pub(crate) async fn m2m( } let ids = tx - .get_related_m2m_record_ids(&query.parent_field, &parent_ids, trace_id.clone()) + .get_related_m2m_record_ids(&query.parent_field, &parent_ids, traceparent) .await?; if ids.is_empty() { return Ok(ManyRecords::empty(&query.selected_fields)); @@ -70,7 +71,7 @@ pub(crate) async fn m2m( args, &query.selected_fields, RelationLoadStrategy::Query, - trace_id.clone(), + traceparent, ) .await? }; @@ -137,7 +138,7 @@ pub async fn one2m( parent_result: Option<&ManyRecords>, mut query_args: QueryArguments, selected_fields: &FieldSelection, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { let parent_model_id = parent_field.model().primary_identifier(); let parent_link_id = parent_field.linking_fields(); @@ -208,7 +209,7 @@ pub async fn one2m( args, selected_fields, RelationLoadStrategy::Query, - trace_id, + traceparent, ) .await? }; diff --git a/query-engine/core/src/interpreter/query_interpreters/read.rs b/query-engine/core/src/interpreter/query_interpreters/read.rs index 7e194993b75c..d79f4fd5c998 100644 --- a/query-engine/core/src/interpreter/query_interpreters/read.rs +++ b/query-engine/core/src/interpreter/query_interpreters/read.rs @@ -4,20 +4,21 @@ use connector::{error::ConnectorError, ConnectionLike}; use futures::future::{BoxFuture, FutureExt}; use psl::can_support_relation_load_strategy; use query_structure::{ManyRecords, RelationLoadStrategy, RelationSelection}; +use telemetry::helpers::TraceParent; use user_facing_errors::KnownError; pub(crate) fn execute<'conn>( tx: &'conn mut dyn ConnectionLike, query: ReadQuery, parent_result: Option<&'conn ManyRecords>, - trace_id: Option, + traceparent: Option, ) -> BoxFuture<'conn, InterpretationResult> { let fut = async move { match query { - ReadQuery::RecordQuery(q) => read_one(tx, q, trace_id).await, - ReadQuery::ManyRecordsQuery(q) => read_many(tx, q, trace_id).await, - ReadQuery::RelatedRecordsQuery(q) => read_related(tx, q, parent_result, trace_id).await, - ReadQuery::AggregateRecordsQuery(q) => aggregate(tx, q, trace_id).await, + ReadQuery::RecordQuery(q) => read_one(tx, q, traceparent).await, + ReadQuery::ManyRecordsQuery(q) => read_many(tx, q, traceparent).await, + ReadQuery::RelatedRecordsQuery(q) => read_related(tx, q, parent_result, traceparent).await, + ReadQuery::AggregateRecordsQuery(q) => aggregate(tx, q, traceparent).await, } }; @@ -28,7 +29,7 @@ pub(crate) fn execute<'conn>( fn read_one( tx: &mut dyn ConnectionLike, query: RecordQuery, - trace_id: Option, + traceparent: Option, ) -> BoxFuture<'_, InterpretationResult> { let fut = async move { let model = query.model; @@ -39,7 +40,7 @@ fn read_one( &filter, &query.selected_fields, query.relation_load_strategy, - trace_id, + traceparent, ) .await?; @@ -97,18 +98,18 @@ fn read_one( fn read_many( tx: &mut dyn ConnectionLike, query: ManyRecordsQuery, - trace_id: Option, + traceparent: Option, ) -> BoxFuture<'_, InterpretationResult> { match query.relation_load_strategy { - RelationLoadStrategy::Join => read_many_by_joins(tx, query, trace_id), - RelationLoadStrategy::Query => read_many_by_queries(tx, query, trace_id), + RelationLoadStrategy::Join => read_many_by_joins(tx, query, traceparent), + RelationLoadStrategy::Query => read_many_by_queries(tx, query, traceparent), } } fn read_many_by_queries( tx: &mut dyn ConnectionLike, mut query: ManyRecordsQuery, - trace_id: Option, + traceparent: Option, ) -> BoxFuture<'_, InterpretationResult> { let processor = if query.args.requires_inmemory_processing() { Some(InMemoryRecordProcessor::new_from_query_args(&mut query.args)) @@ -123,7 +124,7 @@ fn read_many_by_queries( query.args.clone(), &query.selected_fields, query.relation_load_strategy, - trace_id, + traceparent, ) .await?; @@ -156,7 +157,7 @@ fn read_many_by_queries( fn read_many_by_joins( tx: &mut dyn ConnectionLike, query: ManyRecordsQuery, - trace_id: Option, + traceparent: Option, ) -> BoxFuture<'_, InterpretationResult> { if !can_support_relation_load_strategy() { unreachable!() @@ -168,7 +169,7 @@ fn read_many_by_joins( query.args.clone(), &query.selected_fields, query.relation_load_strategy, - trace_id, + traceparent, ) .await?; @@ -209,13 +210,13 @@ fn read_related<'conn>( tx: &'conn mut dyn ConnectionLike, mut query: RelatedRecordsQuery, parent_result: Option<&'conn ManyRecords>, - trace_id: Option, + traceparent: Option, ) -> BoxFuture<'conn, InterpretationResult> { let fut = async move { let relation = query.parent_field.relation(); let records = if relation.is_many_to_many() { - nested_read::m2m(tx, &mut query, parent_result, trace_id).await? + nested_read::m2m(tx, &mut query, parent_result, traceparent).await? } else { nested_read::one2m( tx, @@ -224,7 +225,7 @@ fn read_related<'conn>( parent_result, query.args.clone(), &query.selected_fields, - trace_id, + traceparent, ) .await? }; @@ -248,7 +249,7 @@ fn read_related<'conn>( async fn aggregate( tx: &mut dyn ConnectionLike, query: AggregateRecordsQuery, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { let selection_order = query.selection_order; @@ -259,7 +260,7 @@ async fn aggregate( query.selectors, query.group_by, query.having, - trace_id, + traceparent, ) .await?; diff --git a/query-engine/core/src/interpreter/query_interpreters/write.rs b/query-engine/core/src/interpreter/query_interpreters/write.rs index d3146c383639..453964369801 100644 --- a/query-engine/core/src/interpreter/query_interpreters/write.rs +++ b/query-engine/core/src/interpreter/query_interpreters/write.rs @@ -7,24 +7,25 @@ use crate::{ }; use connector::{ConnectionLike, DatasourceFieldName, NativeUpsert, WriteArgs}; use query_structure::{ManyRecords, Model, RawJson}; +use telemetry::helpers::TraceParent; pub(crate) async fn execute( tx: &mut dyn ConnectionLike, write_query: WriteQuery, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { match write_query { - WriteQuery::CreateRecord(q) => create_one(tx, q, trace_id).await, - WriteQuery::CreateManyRecords(q) => create_many(tx, q, trace_id).await, - WriteQuery::UpdateRecord(q) => update_one(tx, q, trace_id).await, - WriteQuery::DeleteRecord(q) => delete_one(tx, q, trace_id).await, - WriteQuery::UpdateManyRecords(q) => update_many(tx, q, trace_id).await, - WriteQuery::DeleteManyRecords(q) => delete_many(tx, q, trace_id).await, - WriteQuery::ConnectRecords(q) => connect(tx, q, trace_id).await, - WriteQuery::DisconnectRecords(q) => disconnect(tx, q, trace_id).await, + WriteQuery::CreateRecord(q) => create_one(tx, q, traceparent).await, + WriteQuery::CreateManyRecords(q) => create_many(tx, q, traceparent).await, + WriteQuery::UpdateRecord(q) => update_one(tx, q, traceparent).await, + WriteQuery::DeleteRecord(q) => delete_one(tx, q, traceparent).await, + WriteQuery::UpdateManyRecords(q) => update_many(tx, q, traceparent).await, + WriteQuery::DeleteManyRecords(q) => delete_many(tx, q, traceparent).await, + WriteQuery::ConnectRecords(q) => connect(tx, q, traceparent).await, + WriteQuery::DisconnectRecords(q) => disconnect(tx, q, traceparent).await, WriteQuery::ExecuteRaw(q) => execute_raw(tx, q).await, WriteQuery::QueryRaw(q) => query_raw(tx, q).await, - WriteQuery::Upsert(q) => native_upsert(tx, q, trace_id).await, + WriteQuery::Upsert(q) => native_upsert(tx, q, traceparent).await, } } @@ -46,9 +47,11 @@ async fn execute_raw(tx: &mut dyn ConnectionLike, q: RawQuery) -> Interpretation async fn create_one( tx: &mut dyn ConnectionLike, q: CreateRecord, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { - let res = tx.create_record(&q.model, q.args, q.selected_fields, trace_id).await?; + let res = tx + .create_record(&q.model, q.args, q.selected_fields, traceparent) + .await?; Ok(QueryResult::RecordSelection(Some(Box::new(RecordSelection { name: q.name, @@ -63,15 +66,15 @@ async fn create_one( async fn create_many( tx: &mut dyn ConnectionLike, q: CreateManyRecords, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { if q.split_by_shape { - return create_many_split_by_shape(tx, q, trace_id).await; + return create_many_split_by_shape(tx, q, traceparent).await; } if let Some(selected_fields) = q.selected_fields { let records = tx - .create_records_returning(&q.model, q.args, q.skip_duplicates, selected_fields.fields, trace_id) + .create_records_returning(&q.model, q.args, q.skip_duplicates, selected_fields.fields, traceparent) .await?; let nested: Vec = super::read::process_nested(tx, selected_fields.nested, Some(&records)).await?; @@ -87,7 +90,9 @@ async fn create_many( Ok(QueryResult::RecordSelection(Some(Box::new(selection)))) } else { - let affected_records = tx.create_records(&q.model, q.args, q.skip_duplicates, trace_id).await?; + let affected_records = tx + .create_records(&q.model, q.args, q.skip_duplicates, traceparent) + .await?; Ok(QueryResult::Count(affected_records)) } @@ -100,7 +105,7 @@ async fn create_many( async fn create_many_split_by_shape( tx: &mut dyn ConnectionLike, q: CreateManyRecords, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { let mut args_by_shape: HashMap> = Default::default(); let model = &q.model; @@ -121,7 +126,7 @@ async fn create_many_split_by_shape( args, q.skip_duplicates, selected_fields.fields.clone(), - trace_id.clone(), + traceparent, ) .await?; @@ -139,7 +144,7 @@ async fn create_many_split_by_shape( result } else { // Empty result means that the list of arguments was empty as well. - tx.create_records_returning(&q.model, vec![], q.skip_duplicates, selected_fields.fields, trace_id) + tx.create_records_returning(&q.model, vec![], q.skip_duplicates, selected_fields.fields, traceparent) .await? }; @@ -161,7 +166,7 @@ async fn create_many_split_by_shape( for args in args_by_shape.into_values() { let affected_records = tx - .create_records(&q.model, args, q.skip_duplicates, trace_id.clone()) + .create_records(&q.model, args, q.skip_duplicates, traceparent) .await?; result += affected_records; } @@ -205,7 +210,7 @@ fn create_many_shape(write_args: &WriteArgs, model: &Model) -> CreateManyShape { async fn update_one( tx: &mut dyn ConnectionLike, q: UpdateRecord, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { let res = tx .update_record( @@ -213,7 +218,7 @@ async fn update_one( q.record_filter().clone(), q.args().clone(), q.selected_fields(), - trace_id, + traceparent, ) .await?; @@ -245,9 +250,9 @@ async fn update_one( async fn native_upsert( tx: &mut dyn ConnectionLike, query: NativeUpsert, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { - let scalars = tx.native_upsert_record(query.clone(), trace_id).await?; + let scalars = tx.native_upsert_record(query.clone(), traceparent).await?; Ok(RecordSelection { name: query.name().to_string(), @@ -263,7 +268,7 @@ async fn native_upsert( async fn delete_one( tx: &mut dyn ConnectionLike, q: DeleteRecord, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { // We need to ensure that we have a record finder, else we delete everything (conversion to empty filter). let filter = match q.record_filter { @@ -276,7 +281,7 @@ async fn delete_one( if let Some(selected_fields) = q.selected_fields { let record = tx - .delete_record(&q.model, filter, selected_fields.fields, trace_id) + .delete_record(&q.model, filter, selected_fields.fields, traceparent) .await?; let selection = RecordSelection { name: q.name, @@ -289,7 +294,7 @@ async fn delete_one( Ok(QueryResult::RecordSelection(Some(Box::new(selection)))) } else { - let result = tx.delete_records(&q.model, filter, trace_id).await?; + let result = tx.delete_records(&q.model, filter, traceparent).await?; Ok(QueryResult::Count(result)) } } @@ -297,9 +302,11 @@ async fn delete_one( async fn update_many( tx: &mut dyn ConnectionLike, q: UpdateManyRecords, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { - let res = tx.update_records(&q.model, q.record_filter, q.args, trace_id).await?; + let res = tx + .update_records(&q.model, q.record_filter, q.args, traceparent) + .await?; Ok(QueryResult::Count(res)) } @@ -307,9 +314,9 @@ async fn update_many( async fn delete_many( tx: &mut dyn ConnectionLike, q: DeleteManyRecords, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { - let res = tx.delete_records(&q.model, q.record_filter, trace_id).await?; + let res = tx.delete_records(&q.model, q.record_filter, traceparent).await?; Ok(QueryResult::Count(res)) } @@ -317,13 +324,13 @@ async fn delete_many( async fn connect( tx: &mut dyn ConnectionLike, q: ConnectRecords, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { tx.m2m_connect( &q.relation_field, &q.parent_id.expect("Expected parent record ID to be set for connect"), &q.child_ids, - trace_id, + traceparent, ) .await?; @@ -333,13 +340,13 @@ async fn connect( async fn disconnect( tx: &mut dyn ConnectionLike, q: DisconnectRecords, - trace_id: Option, + traceparent: Option, ) -> InterpretationResult { tx.m2m_disconnect( &q.relation_field, &q.parent_id.expect("Expected parent record ID to be set for disconnect"), &q.child_ids, - trace_id, + traceparent, ) .await?; diff --git a/query-engine/core/src/lib.rs b/query-engine/core/src/lib.rs index bf993d6bce18..ff8fc43defce 100644 --- a/query-engine/core/src/lib.rs +++ b/query-engine/core/src/lib.rs @@ -10,9 +10,7 @@ pub mod query_document; pub mod query_graph_builder; pub mod relation_load_strategy; pub mod response_ir; -pub mod telemetry; -pub use self::telemetry::*; pub use self::{ error::{CoreError, FieldConversionError}, executor::{QueryExecutor, TransactionOptions}, diff --git a/query-engine/core/src/telemetry/capturing/tx_ext.rs b/query-engine/core/src/telemetry/capturing/tx_ext.rs deleted file mode 100644 index 6b1b4905ab57..000000000000 --- a/query-engine/core/src/telemetry/capturing/tx_ext.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::collections::HashMap; - -pub trait TxTraceExt { - fn into_trace_id(self) -> opentelemetry::trace::TraceId; - fn into_trace_context(self) -> opentelemetry::Context; - fn as_traceparent(&self) -> String; -} - -impl TxTraceExt for crate::TxId { - // in order to convert a TxId (a 48 bytes cuid) into a TraceId (16 bytes), we remove the first byte, - // (always 'c') and get the next 16 bytes, which are random enough to be used as a trace id. - // this is a typical cuid: "c-lct0q6ma-0004-rb04-h6en1roa" - // - // - first letter is always the same - // - next 7-8 byte are random a timestamp. There's more entropy in the least significative bytes - // - next 4 bytes are a counter since the server started - // - next 4 bytes are a system fingerprint, invariant for the same server instance - // - least significative 8 bytes. Totally random. - // - // We want the most entropic slice of 16 bytes that's deterministicly determined - fn into_trace_id(self) -> opentelemetry::trace::TraceId { - let mut buffer = [0; 16]; - let str = self.to_string(); - let tx_id_bytes = str.as_bytes(); - let len = tx_id_bytes.len(); - - // bytes [len-20 to len-12): least significative 4 bytes of the timestamp + 4 bytes counter - for (i, source_idx) in (len - 20..len - 12).enumerate() { - buffer[i] = tx_id_bytes[source_idx]; - } - // bytes [len-8 to len): the random blocks - for (i, source_idx) in (len - 8..len).enumerate() { - buffer[i + 8] = tx_id_bytes[source_idx]; - } - - opentelemetry::trace::TraceId::from_bytes(buffer) - } - // This is a bit of a hack, but it's the only way to have a default trace span for a whole - // transaction when no traceparent is propagated from the client. - // - // This is done so we can capture traces happening accross the different queries in a - // transaction. Otherwise, if a traceparent is not propagated from the client, each query in - // the transaction will run within a span that has already been generated at the begining of the - // transaction, and held active in the actor in charge of running the queries. Thus, making - // impossible to capture traces happening in the individual queries, as they won't be aware of - // the transaction they are part of. - // - // By generating this "fake" traceparent based on the transaction id, we can have a common - // trace_id for all transaction operations. - fn into_trace_context(self) -> opentelemetry::Context { - let extractor: HashMap = - HashMap::from_iter(vec![("traceparent".to_string(), self.as_traceparent())]); - opentelemetry::global::get_text_map_propagator(|propagator| propagator.extract(&extractor)) - } - - fn as_traceparent(&self) -> String { - let trace_id = self.clone().into_trace_id(); - format!("00-{trace_id}-0000000000000001-01") - } -} - -// tests for txid into traits -#[cfg(test)] -mod test { - use super::*; - use crate::TxId; - - #[test] - fn test_txid_into_traceid() { - let fixture = vec![ - ("clct0q6ma0000rb04768tiqbj", "71366d6130303030373638746971626a"), - // counter changed, trace id changed: - ("clct0q6ma0002rb04cpa6zkmx", "71366d6130303032637061367a6b6d78"), - // fingerprint changed, trace id did not change, as that chunk is ignored: - ("clct0q6ma00020000cpa6zkmx", "71366d6130303032637061367a6b6d78"), - // first 5 bytes changed, trace id did not change, as that chunk is ignored: - ("00000q6ma00020000cpa6zkmx", "71366d6130303032637061367a6b6d78"), - // 6 th byte changed, trace id changed, as that chunk is part of the lsb of the timestamp - ("0000006ma00020000cpa6zkmx", "30366d6130303032637061367a6b6d78"), - ]; - - for (txid, expected_trace_id) in fixture { - let txid: TxId = txid.into(); - let trace_id: opentelemetry::trace::TraceId = txid.into_trace_id(); - assert_eq!(trace_id.to_string(), expected_trace_id); - } - } -} diff --git a/query-engine/core/src/telemetry/helpers.rs b/query-engine/core/src/telemetry/helpers.rs deleted file mode 100644 index 30c63ed6693f..000000000000 --- a/query-engine/core/src/telemetry/helpers.rs +++ /dev/null @@ -1,128 +0,0 @@ -use super::models::TraceSpan; -use once_cell::sync::Lazy; -use opentelemetry::sdk::export::trace::SpanData; -use opentelemetry::trace::{TraceContextExt, TraceId}; -use opentelemetry::Context; -use serde_json::{json, Value}; -use std::collections::HashMap; -use tracing::{Metadata, Span}; -use tracing_opentelemetry::OpenTelemetrySpanExt; -use tracing_subscriber::EnvFilter; - -pub static SHOW_ALL_TRACES: Lazy = Lazy::new(|| match std::env::var("PRISMA_SHOW_ALL_TRACES") { - Ok(enabled) => enabled.eq_ignore_ascii_case("true"), - Err(_) => false, -}); - -pub fn spans_to_json(spans: Vec) -> String { - let json_spans: Vec = spans.into_iter().map(|span| json!(TraceSpan::from(span))).collect(); - let span_result = json!({ - "span": true, - "spans": json_spans - }); - serde_json::to_string(&span_result).unwrap_or_default() -} - -// set the parent context and return the traceparent -pub fn set_parent_context_from_json_str(span: &Span, trace: &str) -> Option { - let trace: HashMap = serde_json::from_str(trace).unwrap_or_default(); - let trace_id = trace.get("traceparent").map(String::from); - let cx = opentelemetry::global::get_text_map_propagator(|propagator| propagator.extract(&trace)); - span.set_parent(cx); - trace_id -} - -pub fn set_span_link_from_traceparent(span: &Span, traceparent: Option) { - if let Some(traceparent) = traceparent { - let trace: HashMap = HashMap::from([("traceparent".to_string(), traceparent)]); - let cx = opentelemetry::global::get_text_map_propagator(|propagator| propagator.extract(&trace)); - let context_span = cx.span(); - span.add_link(context_span.span_context().clone()); - } -} - -pub fn get_trace_parent_from_span(span: &Span) -> String { - let cx = span.context(); - let binding = cx.span(); - let span_context = binding.span_context(); - - format!("00-{}-{}-01", span_context.trace_id(), span_context.span_id()) -} - -pub fn get_trace_id_from_span(span: &Span) -> TraceId { - let cx = span.context(); - get_trace_id_from_context(&cx) -} - -pub fn get_trace_id_from_context(context: &Context) -> TraceId { - let context_span = context.span(); - context_span.span_context().trace_id() -} - -pub fn get_trace_id_from_traceparent(traceparent: Option<&str>) -> TraceId { - traceparent - .unwrap_or("0-0-0-0") - .split('-') - .nth(1) - .map(|id| TraceId::from_hex(id).unwrap_or(TraceId::INVALID)) - .unwrap() -} - -pub enum QueryEngineLogLevel { - FromEnv, - Override(String), -} - -impl QueryEngineLogLevel { - fn level(self) -> Option { - match self { - Self::FromEnv => std::env::var("QE_LOG_LEVEL").ok(), - Self::Override(l) => Some(l), - } - } -} - -#[rustfmt::skip] -pub fn env_filter(log_queries: bool, qe_log_level: QueryEngineLogLevel) -> EnvFilter { - let mut filter = EnvFilter::from_default_env() - .add_directive("tide=error".parse().unwrap()) - .add_directive("tonic=error".parse().unwrap()) - .add_directive("h2=error".parse().unwrap()) - .add_directive("hyper=error".parse().unwrap()) - .add_directive("tower=error".parse().unwrap()); - - if let Some(level) = qe_log_level.level() { - filter = filter - .add_directive(format!("query_engine={}", &level).parse().unwrap()) - .add_directive(format!("query_core={}", &level).parse().unwrap()) - .add_directive(format!("query_connector={}", &level).parse().unwrap()) - .add_directive(format!("sql_query_connector={}", &level).parse().unwrap()) - .add_directive(format!("mongodb_query_connector={}", &level).parse().unwrap()); - } - - if log_queries { - filter = filter - .add_directive("quaint[{is_query}]=trace".parse().unwrap()) - .add_directive("mongodb_query_connector=debug".parse().unwrap()); - } - - filter -} - -pub fn user_facing_span_only_filter(meta: &Metadata<'_>) -> bool { - if !meta.is_span() { - return false; - } - - if *SHOW_ALL_TRACES { - return true; - } - - if meta.fields().iter().any(|f| f.name() == "user_facing") { - return true; - } - - // spans describing a quaint query. - // TODO: should this span be made user_facing in quaint? - meta.target() == "quaint::connector::metrics" && meta.name() == "quaint:query" -} diff --git a/query-engine/query-engine-c-abi/Cargo.toml b/query-engine/query-engine-c-abi/Cargo.toml index ff55be35b8e9..3f5f6c112fd1 100644 --- a/query-engine/query-engine-c-abi/Cargo.toml +++ b/query-engine/query-engine-c-abi/Cargo.toml @@ -17,6 +17,7 @@ request-handlers = { path = "../request-handlers", features = [ ] } query-connector = { path = "../connectors/query-connector" } query-engine-common = { path = "../../libs/query-engine-common" } +telemetry = { path = "../../libs/telemetry" } user-facing-errors = { path = "../../libs/user-facing-errors" } psl = { workspace = true, features = ["sqlite"] } sql-connector = { path = "../connectors/sql-query-connector", package = "sql-query-connector" } diff --git a/query-engine/query-engine-c-abi/src/engine.rs b/query-engine/query-engine-c-abi/src/engine.rs index 4d3d81636755..5aa9548d0c01 100644 --- a/query-engine/query-engine-c-abi/src/engine.rs +++ b/query-engine/query-engine-c-abi/src/engine.rs @@ -9,7 +9,7 @@ use once_cell::sync::Lazy; use query_core::{ protocol::EngineProtocol, schema::{self}, - telemetry, TransactionOptions, TxId, + TransactionOptions, TxId, }; use request_handlers::{load_executor, RequestBody, RequestHandler}; use serde_json::json; @@ -20,11 +20,13 @@ use std::{ ptr::null_mut, sync::Arc, }; +use telemetry::helpers::TraceParent; use tokio::{ runtime::{self, Runtime}, sync::RwLock, }; -use tracing::{field, instrument::WithSubscriber, level_filters::LevelFilter, Instrument}; +use tracing::{instrument::WithSubscriber, level_filters::LevelFilter, Instrument}; +use tracing_opentelemetry::OpenTelemetrySpanExt; use query_engine_common::Result; use query_engine_common::{ @@ -202,7 +204,8 @@ impl QueryEngine { let trace_string = get_cstr_safe(trace).expect("Connect trace is missing"); let span = tracing::info_span!("prisma:engine:connect"); - let _ = telemetry::helpers::set_parent_context_from_json_str(&span, &trace_string); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace_string); + span.set_parent(parent_context); let mut inner = self.inner.write().await; let builder = inner.as_builder()?; @@ -293,12 +296,14 @@ impl QueryEngine { let query = RequestBody::try_from_str(&body, engine.engine_protocol())?; - let span = tracing::info_span!("prisma:engine", user_facing = true); - let trace_id = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let span = tracing::info_span!("prisma:engine:query", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + let traceparent = TraceParent::from_remote_context(&parent_context); + span.set_parent(parent_context); async move { let handler = RequestHandler::new(engine.executor(), engine.query_schema(), engine.engine_protocol()); - let response = handler.handle(query, tx_id.map(TxId::from), trace_id).await; + let response = handler.handle(query, tx_id.map(TxId::from), traceparent).await; let serde_span = tracing::info_span!("prisma:engine:response_json_serialization", user_facing = true); Ok(serde_span.in_scope(|| serde_json::to_string(&response))?) @@ -316,7 +321,8 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async { let span = tracing::info_span!("prisma:engine:disconnect"); - let _ = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); async { let mut inner = self.inner.write().await; @@ -393,8 +399,9 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async move { - let span = tracing::info_span!("prisma:engine:itx_runner", user_facing = true, itx_id = field::Empty); - telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let span = tracing::info_span!("prisma:engine:start_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); let tx_opts: TransactionOptions = serde_json::from_str(&input)?; match engine @@ -412,15 +419,20 @@ impl QueryEngine { } // If connected, attempts to commit a transaction with id `tx_id` in the core. - pub async fn commit_transaction(&self, tx_id_str: *const c_char, _trace: *const c_char) -> Result { + pub async fn commit_transaction(&self, tx_id_str: *const c_char, trace: *const c_char) -> Result { let tx_id = get_cstr_safe(tx_id_str).expect("Input string missing"); + let trace = get_cstr_safe(trace).expect("trace is required in transactions"); let inner = self.inner.read().await; let engine = inner.as_engine()?; let dispatcher = self.logger.dispatcher(); async move { - match engine.executor().commit_tx(TxId::from(tx_id)).await { + let span = tracing::info_span!("prisma:engine:commit_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); + + match engine.executor().commit_tx(TxId::from(tx_id)).instrument(span).await { Ok(_) => Ok("{}".to_string()), Err(err) => Ok(map_known_error(err)?), } @@ -430,15 +442,19 @@ impl QueryEngine { } // If connected, attempts to roll back a transaction with id `tx_id` in the core. - pub async fn rollback_transaction(&self, tx_id_str: *const c_char, _trace: *const c_char) -> Result { + pub async fn rollback_transaction(&self, tx_id_str: *const c_char, trace: *const c_char) -> Result { let tx_id = get_cstr_safe(tx_id_str).expect("Input string missing"); - // let trace = get_cstr_safe(trace_str).expect("trace is required in transactions"); + let trace = get_cstr_safe(trace).expect("trace is required in transactions"); let inner = self.inner.read().await; let engine = inner.as_engine()?; let dispatcher = self.logger.dispatcher(); async move { + let span = tracing::info_span!("prisma:engine:rollback_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); + match engine.executor().rollback_tx(TxId::from(tx_id)).await { Ok(_) => Ok("{}".to_string()), Err(err) => Ok(map_known_error(err)?), diff --git a/query-engine/query-engine-c-abi/src/logger.rs b/query-engine/query-engine-c-abi/src/logger.rs index 3585b94e14a1..fbb54bddecd8 100644 --- a/query-engine/query-engine-c-abi/src/logger.rs +++ b/query-engine/query-engine-c-abi/src/logger.rs @@ -1,5 +1,5 @@ use core::fmt; -use query_core::telemetry; + use query_engine_common::logger::StringCallback; // use query_engine_metrics::MetricRegistry; use serde_json::Value; diff --git a/query-engine/query-engine-node-api/Cargo.toml b/query-engine/query-engine-node-api/Cargo.toml index b7b3db9ad5a1..e9d29ab8d1d7 100644 --- a/query-engine/query-engine-node-api/Cargo.toml +++ b/query-engine/query-engine-node-api/Cargo.toml @@ -24,6 +24,7 @@ request-handlers = { path = "../request-handlers", features = ["all"] } query-connector = { path = "../connectors/query-connector" } query-engine-common = { path = "../../libs/query-engine-common" } user-facing-errors = { path = "../../libs/user-facing-errors" } +telemetry = { path = "../../libs/telemetry" } psl = { workspace = true, features = ["all"] } sql-connector = { path = "../connectors/sql-query-connector", package = "sql-query-connector", features = [ "all-native", diff --git a/query-engine/query-engine-node-api/src/engine.rs b/query-engine/query-engine-node-api/src/engine.rs index 01c78b6e2c17..ddeb92351053 100644 --- a/query-engine/query-engine-node-api/src/engine.rs +++ b/query-engine/query-engine-node-api/src/engine.rs @@ -4,7 +4,7 @@ use napi::{threadsafe_function::ThreadSafeCallContext, Env, JsFunction, JsObject use napi_derive::napi; use psl::PreviewFeature; use quaint::connector::ExternalConnector; -use query_core::{protocol::EngineProtocol, relation_load_strategy, schema, telemetry, TransactionOptions, TxId}; +use query_core::{protocol::EngineProtocol, relation_load_strategy, schema, TransactionOptions, TxId}; use query_engine_common::engine::{ map_known_error, stringify_env_values, ConnectedEngine, ConnectedEngineNative, ConstructorOptions, ConstructorOptionsNative, EngineBuilder, EngineBuilderNative, Inner, @@ -14,8 +14,10 @@ use request_handlers::{load_executor, render_graphql_schema, ConnectorKind, Requ use serde::Deserialize; use serde_json::json; use std::{collections::HashMap, future::Future, marker::PhantomData, panic::AssertUnwindSafe, sync::Arc}; +use telemetry::helpers::TraceParent; use tokio::sync::RwLock; -use tracing::{field, instrument::WithSubscriber, Instrument, Span}; +use tracing::{instrument::WithSubscriber, Instrument}; +use tracing_opentelemetry::OpenTelemetrySpanExt; use tracing_subscriber::filter::LevelFilter; use user_facing_errors::Error; @@ -71,11 +73,11 @@ impl QueryEngine { native, } = napi_env.from_js_value(options).expect( r###" - Failed to deserialize constructor options. - - This usually happens when the javascript object passed to the constructor is missing + Failed to deserialize constructor options. + + This usually happens when the javascript object passed to the constructor is missing properties for the ConstructorOptions fields that must have some value. - + If you set some of these in javascript trough environment variables, make sure there are values for data_model, log_level, and any field that is not Option "###, @@ -177,8 +179,9 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async_panic_to_js_error(async { - let span = tracing::info_span!("prisma:engine:connect"); - let _ = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let span = tracing::info_span!("prisma:engine:connect", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); let mut inner = self.inner.write().await; let builder = inner.as_builder()?; @@ -279,8 +282,9 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async_panic_to_js_error(async { - let span = tracing::info_span!("prisma:engine:disconnect"); - let _ = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let span = tracing::info_span!("prisma:engine:disconnect", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); // TODO: when using Node Drivers, we need to call Driver::close() here. @@ -319,17 +323,14 @@ impl QueryEngine { let query = RequestBody::try_from_str(&body, engine.engine_protocol())?; - let span = if tx_id.is_none() { - tracing::info_span!("prisma:engine", user_facing = true) - } else { - Span::none() - }; - - let trace_id = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let span = tracing::info_span!("prisma:engine:query", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + let traceparent = TraceParent::from_remote_context(&parent_context); + span.set_parent(parent_context); async move { let handler = RequestHandler::new(engine.executor(), engine.query_schema(), engine.engine_protocol()); - let response = handler.handle(query, tx_id.map(TxId::from), trace_id).await; + let response = handler.handle(query, tx_id.map(TxId::from), traceparent).await; let serde_span = tracing::info_span!("prisma:engine:response_json_serialization", user_facing = true); Ok(serde_span.in_scope(|| serde_json::to_string(&response))?) @@ -344,36 +345,37 @@ impl QueryEngine { /// If connected, attempts to start a transaction in the core and returns its ID. #[napi] pub async fn start_transaction(&self, input: String, trace: String) -> napi::Result { + let dispatcher = self.logger.dispatcher(); + async_panic_to_js_error(async { let inner = self.inner.read().await; let engine = inner.as_engine()?; + let tx_opts: TransactionOptions = serde_json::from_str(&input)?; - let dispatcher = self.logger.dispatcher(); + let span = tracing::info_span!("prisma:engine:start_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); async move { - let span = tracing::info_span!("prisma:engine:itx_runner", user_facing = true, itx_id = field::Empty); - telemetry::helpers::set_parent_context_from_json_str(&span, &trace); - - let tx_opts: TransactionOptions = serde_json::from_str(&input)?; match engine .executor() .start_tx(engine.query_schema().clone(), engine.engine_protocol(), tx_opts) - .instrument(span) .await { Ok(tx_id) => Ok(json!({ "id": tx_id.to_string() }).to_string()), Err(err) => Ok(map_known_error(err)?), } } - .with_subscriber(dispatcher) + .instrument(span) .await }) + .with_subscriber(dispatcher) .await } /// If connected, attempts to commit a transaction with id `tx_id` in the core. #[napi] - pub async fn commit_transaction(&self, tx_id: String, _trace: String) -> napi::Result { + pub async fn commit_transaction(&self, tx_id: String, trace: String) -> napi::Result { async_panic_to_js_error(async { let inner = self.inner.read().await; let engine = inner.as_engine()?; @@ -381,7 +383,11 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async move { - match engine.executor().commit_tx(TxId::from(tx_id)).await { + let span = tracing::info_span!("prisma:engine:commit_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); + + match engine.executor().commit_tx(TxId::from(tx_id)).instrument(span).await { Ok(_) => Ok("{}".to_string()), Err(err) => Ok(map_known_error(err)?), } @@ -394,7 +400,7 @@ impl QueryEngine { /// If connected, attempts to roll back a transaction with id `tx_id` in the core. #[napi] - pub async fn rollback_transaction(&self, tx_id: String, _trace: String) -> napi::Result { + pub async fn rollback_transaction(&self, tx_id: String, trace: String) -> napi::Result { async_panic_to_js_error(async { let inner = self.inner.read().await; let engine = inner.as_engine()?; @@ -402,7 +408,11 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async move { - match engine.executor().rollback_tx(TxId::from(tx_id)).await { + let span = tracing::info_span!("prisma:engine:rollback_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); + + match engine.executor().rollback_tx(TxId::from(tx_id)).instrument(span).await { Ok(_) => Ok("{}".to_string()), Err(err) => Ok(map_known_error(err)?), } diff --git a/query-engine/query-engine-node-api/src/logger.rs b/query-engine/query-engine-node-api/src/logger.rs index b86343bb4a94..a194b3a61dde 100644 --- a/query-engine/query-engine-node-api/src/logger.rs +++ b/query-engine/query-engine-node-api/src/logger.rs @@ -1,6 +1,6 @@ use core::fmt; use napi::threadsafe_function::{ErrorStrategy, ThreadsafeFunction, ThreadsafeFunctionCallMode}; -use query_core::telemetry; + use query_engine_common::logger::StringCallback; use query_engine_metrics::MetricRegistry; use serde_json::Value; diff --git a/query-engine/query-engine-wasm/Cargo.toml b/query-engine/query-engine-wasm/Cargo.toml index 089dfba63bfd..982f31316f7b 100644 --- a/query-engine/query-engine-wasm/Cargo.toml +++ b/query-engine/query-engine-wasm/Cargo.toml @@ -44,6 +44,7 @@ request-handlers = { path = "../request-handlers", default-features = false, fea ] } query-core = { path = "../core" } driver-adapters = { path = "../driver-adapters" } +telemetry = { path = "../../libs/telemetry" } quaint.workspace = true connection-string.workspace = true js-sys.workspace = true diff --git a/query-engine/query-engine-wasm/src/wasm/engine.rs b/query-engine/query-engine-wasm/src/wasm/engine.rs index ae6fe40f8728..209dad2edf3f 100644 --- a/query-engine/query-engine-wasm/src/wasm/engine.rs +++ b/query-engine/query-engine-wasm/src/wasm/engine.rs @@ -13,15 +13,17 @@ use query_core::{ protocol::EngineProtocol, relation_load_strategy, schema::{self}, - telemetry, TransactionOptions, TxId, + TransactionOptions, TxId, }; use query_engine_common::engine::{map_known_error, ConnectedEngine, ConstructorOptions, EngineBuilder, Inner}; use request_handlers::ConnectorKind; use request_handlers::{load_executor, RequestBody, RequestHandler}; use serde_json::json; use std::{marker::PhantomData, sync::Arc}; +use telemetry::helpers::TraceParent; use tokio::sync::RwLock; -use tracing::{field, instrument::WithSubscriber, Instrument, Level, Span}; +use tracing::{instrument::WithSubscriber, Instrument, Level}; +use tracing_opentelemetry::OpenTelemetrySpanExt; use tracing_subscriber::filter::LevelFilter; use wasm_bindgen::prelude::wasm_bindgen; @@ -89,7 +91,8 @@ impl QueryEngine { async { let span = tracing::info_span!("prisma:engine:connect"); - let _ = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); let mut inner = self.inner.write().await; let builder = inner.as_builder()?; @@ -149,8 +152,9 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async { - let span = tracing::info_span!("prisma:engine:disconnect"); - let _ = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let span = tracing::info_span!("prisma:engine:disconnect", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + span.set_parent(parent_context); async { let mut inner = self.inner.write().await; @@ -189,17 +193,14 @@ impl QueryEngine { let query = RequestBody::try_from_str(&body, engine.engine_protocol())?; async move { - let span = if tx_id.is_none() { - tracing::info_span!("prisma:engine", user_facing = true) - } else { - Span::none() - }; - - let trace_id = telemetry::helpers::set_parent_context_from_json_str(&span, &trace); + let span = tracing::info_span!("prisma:engine:query", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + let traceparent = TraceParent::from_remote_context(&parent_context); + span.set_parent(parent_context); let handler = RequestHandler::new(engine.executor(), engine.query_schema(), engine.engine_protocol()); let response = handler - .handle(query, tx_id.map(TxId::from), trace_id) + .handle(query, tx_id.map(TxId::from), traceparent) .instrument(span) .await; @@ -219,13 +220,15 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async move { - let span = tracing::info_span!("prisma:engine:itx_runner", user_facing = true, itx_id = field::Empty); + let span = tracing::info_span!("prisma:engine:start_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + let traceparent = TraceParent::from_remote_context(&parent_context); + span.set_parent(parent_context); let tx_opts: TransactionOptions = serde_json::from_str(&input)?; match engine .executor() .start_tx(engine.query_schema().clone(), engine.engine_protocol(), tx_opts) - .instrument(span) .await { Ok(tx_id) => Ok(json!({ "id": tx_id.to_string() }).to_string()), @@ -245,6 +248,11 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async move { + let span = tracing::info_span!("prisma:engine:commit_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + let traceparent = TraceParent::from_remote_context(&parent_context); + span.set_parent(parent_context); + match engine.executor().commit_tx(TxId::from(tx_id)).await { Ok(_) => Ok("{}".to_string()), Err(err) => Ok(map_known_error(err)?), @@ -263,6 +271,11 @@ impl QueryEngine { let dispatcher = self.logger.dispatcher(); async move { + let span = tracing::info_span!("prisma:engine:rollback_transaction", user_facing = true); + let parent_context = telemetry::helpers::restore_remote_context_from_json_str(&trace); + let traceparent = TraceParent::from_remote_context(&parent_context); + span.set_parent(parent_context); + match engine.executor().rollback_tx(TxId::from(tx_id)).await { Ok(_) => Ok("{}".to_string()), Err(err) => Ok(map_known_error(err)?), diff --git a/query-engine/query-engine/Cargo.toml b/query-engine/query-engine/Cargo.toml index 61d93f6a29d5..218eefa43fa4 100644 --- a/query-engine/query-engine/Cargo.toml +++ b/query-engine/query-engine/Cargo.toml @@ -35,6 +35,7 @@ opentelemetry-otlp = { version = "0.10", features = ["tls", "tls-roots"] } query-engine-metrics = { path = "../metrics" } user-facing-errors = { path = "../../libs/user-facing-errors" } +telemetry = { path = "../../libs/telemetry", features = ["metrics"] } [dev-dependencies] serial_test = "*" diff --git a/query-engine/query-engine/src/logger.rs b/query-engine/query-engine/src/logger.rs index 10f6ced58b86..f1c9143ddbbd 100644 --- a/query-engine/query-engine/src/logger.rs +++ b/query-engine/query-engine/src/logger.rs @@ -3,7 +3,6 @@ use opentelemetry::{ KeyValue, }; use opentelemetry_otlp::WithExportConfig; -use query_core::telemetry; use query_engine_metrics::MetricRegistry; use tracing::{dispatcher::SetGlobalDefaultError, subscriber}; use tracing_subscriber::{filter::filter_fn, layer::SubscriberExt, Layer}; diff --git a/query-engine/query-engine/src/server/mod.rs b/query-engine/query-engine/src/server/mod.rs index 01b61a07b6b4..2a49683227e8 100644 --- a/query-engine/query-engine/src/server/mod.rs +++ b/query-engine/query-engine/src/server/mod.rs @@ -3,11 +3,9 @@ use crate::features::Feature; use crate::{opt::PrismaOpt, PrismaResult}; use hyper::service::{make_service_fn, service_fn}; use hyper::{header::CONTENT_TYPE, Body, HeaderMap, Method, Request, Response, Server, StatusCode}; -use opentelemetry::trace::TraceContextExt; +use opentelemetry::trace::{TraceContextExt, TraceId}; use opentelemetry::{global, propagation::Extractor}; -use query_core::helpers::*; -use query_core::telemetry::capturing::TxTraceExt; -use query_core::{telemetry, ExtendedTransactionUserFacingError, TransactionOptions, TxId}; +use query_core::{ExtendedTransactionUserFacingError, TransactionOptions, TxId}; use request_handlers::{dmmf, render_graphql_schema, RequestBody, RequestHandler}; use serde::Serialize; use serde_json::json; @@ -15,7 +13,9 @@ use std::collections::HashMap; use std::net::SocketAddr; use std::sync::Arc; use std::time::Instant; -use tracing::{field, Instrument, Span}; +use telemetry::capturing::Capturer; +use telemetry::helpers::TraceParent; +use tracing::{Instrument, Span}; use tracing_opentelemetry::OpenTelemetrySpanExt; /// Starts up the graphql query engine server @@ -111,98 +111,42 @@ async fn request_handler(cx: Arc, req: Request) -> Result { - let handler = RequestHandler::new(cx.executor(), cx.query_schema(), cx.engine_protocol()); - let mut result = handler.handle(body, tx_id, traceparent).instrument(span).await; - - if let telemetry::capturing::Capturer::Enabled(capturer) = &capture_config { - let telemetry = capturer.fetch_captures().await; - if let Some(telemetry) = telemetry { - result.set_extension("traces".to_owned(), json!(telemetry.traces)); - result.set_extension("logs".to_owned(), json!(telemetry.logs)); - } + let tx_id = try_get_transaction_id(headers); + let (span, traceparent, capturer) = setup_telemetry(headers, &tx_id).await; + + let buffer = hyper::body::to_bytes(req.into_body()).await?; + let request_body = RequestBody::try_from_slice(buffer.as_ref(), cx.engine_protocol()); + + match request_body { + Ok(body) => { + let handler = RequestHandler::new(cx.executor(), cx.query_schema(), cx.engine_protocol()); + let mut result = handler.handle(body, tx_id, traceparent).instrument(span).await; + + if let telemetry::capturing::Capturer::Enabled(capturer) = &capturer { + let telemetry = capturer.fetch_captures().await; + if let Some(telemetry) = telemetry { + result.set_extension("traces".to_owned(), json!(telemetry.traces)); + result.set_extension("logs".to_owned(), json!(telemetry.logs)); } - - let res = build_json_response(StatusCode::OK, &result); - - Ok(res) } - Err(e) => { - let ufe: user_facing_errors::Error = request_handlers::HandlerError::query_conversion(format!( - "Error parsing {:?} query. Ensure that engine protocol of the client and the engine matches. {}", - cx.engine_protocol(), - e - )) - .into(); - let res = build_json_response(StatusCode::UNPROCESSABLE_ENTITY, &ufe); + let res = build_json_response(StatusCode::OK, &result); - Ok(res) - } + Ok(res) } - }; + Err(e) => { + let ufe: user_facing_errors::Error = request_handlers::HandlerError::query_conversion(format!( + "Error parsing {:?} query. Ensure that engine protocol of the client and the engine matches. {}", + cx.engine_protocol(), + e + )) + .into(); + + let res = build_json_response(StatusCode::UNPROCESSABLE_ENTITY, &ufe); - work.await + Ok(res) + } + } } /// Expose the GraphQL playground if enabled. @@ -282,45 +226,9 @@ async fn transaction_start_handler(cx: Arc, req: Request) - let body_start = req.into_body(); let full_body = hyper::body::to_bytes(body_start).await?; let mut tx_opts: TransactionOptions = serde_json::from_slice(full_body.as_ref()).unwrap(); - let tx_id = tx_opts.with_new_transaction_id(); - - // This is the span we use to instrument the execution of a transaction. This span will be open - // during the tx execution, and held in the ITXServer for that transaction (see ITXServer]) - let span = info_span!("prisma:engine:itx_runner", user_facing = true, itx_id = field::Empty); - - // If telemetry needs to be captured, we use the span trace_id to correlate the logs happening - // during the different operations within a transaction. The trace_id is propagated in the - // traceparent header, but if it's not present, we need to synthetically create one for the - // transaction. This is needed, in case the client is interested in capturing logs and not - // traces, because: - // - The client won't send a traceparent header - // - A transaction initial span is created here (prisma:engine:itx_runner) and stored in the - // ITXServer for that transaction - // - When a query comes in, the graphql handler process it, but we need to tell the capturer - // to start capturing logs, and for that we need a trace_id. There are two places were we - // could get that information from: - // - First, it's the traceparent, but the client didn't send it, because they are only - // interested in logs. - // - Second, it's the root span for the transaction, but it's not in scope but instead - // stored in the ITXServer, in a different tokio task. - // - // For the above reasons, we need to create a trace_id that we can predict and use accross the - // different operations happening within a transaction. So we do it by converting the tx_id - // into a trace_id, leaning on the fact that the tx_id has more entropy, and there's no - // information loss. - let capture_settings = capture_settings(&headers); - let traceparent = traceparent(&headers); - if traceparent.is_none() && capture_settings.logs_enabled() { - span.set_parent(tx_id.into_trace_context()) - } else { - span.set_parent(get_parent_span_context(&headers)) - } - let trace_id = span.context().span().span_context().trace_id(); - let capture_config = telemetry::capturing::capturer(trace_id, capture_settings); + let tx_id = Some(tx_opts.with_new_transaction_id()); - if let telemetry::capturing::Capturer::Enabled(capturer) = &capture_config { - capturer.start_capturing().await; - } + let (span, _traceparent, capturer) = setup_telemetry(&headers, &tx_id).await; let result = cx .executor @@ -328,12 +236,7 @@ async fn transaction_start_handler(cx: Arc, req: Request) - .instrument(span) .await; - let telemetry = if let telemetry::capturing::Capturer::Enabled(capturer) = &capture_config { - capturer.fetch_captures().await - } else { - None - }; - + let telemetry = capturer.try_fetch_captures().await; match result { Ok(tx_id) => { let result = if let Some(telemetry) = telemetry { @@ -355,20 +258,11 @@ async fn transaction_commit_handler( req: Request, tx_id: TxId, ) -> Result, hyper::Error> { - let capture_config = capture_config(req.headers(), tx_id.clone()); - - if let telemetry::capturing::Capturer::Enabled(capturer) = &capture_config { - capturer.start_capturing().await; - } - - let result = cx.executor.commit_tx(tx_id).await; + let (span, _traceparent, capturer) = setup_telemetry(req.headers(), &Some(tx_id.clone())).await; - let telemetry = if let telemetry::capturing::Capturer::Enabled(capturer) = &capture_config { - capturer.fetch_captures().await - } else { - None - }; + let result = cx.executor.commit_tx(tx_id).instrument(span).await; + let telemetry = capturer.try_fetch_captures().await; match result { Ok(_) => Ok(empty_json_to_http_resp(telemetry)), Err(err) => Ok(err_to_http_resp(err, telemetry)), @@ -380,20 +274,11 @@ async fn transaction_rollback_handler( req: Request, tx_id: TxId, ) -> Result, hyper::Error> { - let capture_config = capture_config(req.headers(), tx_id.clone()); + let (span, _traceparent, capturer) = setup_telemetry(req.headers(), &Some(tx_id.clone())).await; - if let telemetry::capturing::Capturer::Enabled(capturer) = &capture_config { - capturer.start_capturing().await; - } - - let result = cx.executor.rollback_tx(tx_id).await; - - let telemetry = if let telemetry::capturing::Capturer::Enabled(capturer) = &capture_config { - capturer.fetch_captures().await - } else { - None - }; + let result = cx.executor.rollback_tx(tx_id).instrument(span).await; + let telemetry = capturer.try_fetch_captures().await; match result { Ok(_) => Ok(empty_json_to_http_resp(telemetry)), Err(err) => Ok(err_to_http_resp(err, telemetry)), @@ -470,59 +355,67 @@ fn err_to_http_resp( build_json_response(status, &err) } -fn capture_config(headers: &HeaderMap, tx_id: TxId) -> telemetry::capturing::Capturer { - let capture_settings = capture_settings(headers); - let mut traceparent = traceparent(headers); - - if traceparent.is_none() && capture_settings.is_enabled() { - traceparent = Some(tx_id.as_traceparent()) - } - - let trace_id = get_trace_id_from_traceparent(traceparent.as_deref()); +async fn setup_telemetry(headers: &HeaderMap, tx_id: &Option) -> (Span, Option, Capturer) { + let capture_settings = { + let settings = headers + .get("X-capture-telemetry") + .and_then(|value| value.to_str().ok()) + .unwrap_or_default(); + telemetry::capturing::Settings::from(settings) + }; - telemetry::capturing::capturer(trace_id, capture_settings) -} + // Parse parent trace_id and span_id from `traceparent` header and attach them to the current + // context. Internally, this relies on the fact that global text map propagator was installed that + // can handle `traceparent` header (for example, `TraceContextPropagator`). + let parent_context = { + let extractor = HeaderExtractor(headers); + let context = global::get_text_map_propagator(|propagator| propagator.extract(&extractor)); + if context.span().span_context().is_valid() { + Some(context) + } else { + None + } + }; -#[allow(clippy::bind_instead_of_map)] -fn capture_settings(headers: &HeaderMap) -> telemetry::capturing::Settings { - const CAPTURE_TELEMETRY_HEADER: &str = "X-capture-telemetry"; - let s = if let Some(hv) = headers.get(CAPTURE_TELEMETRY_HEADER) { - hv.to_str().unwrap_or("") + let span = info_span!("prisma:engine", user_facing = true); + let traceparent = if let Some(parent_context) = parent_context { + let requester_traceparent = TraceParent::from_remote_context(&parent_context); + span.set_parent(parent_context); + requester_traceparent + } else if let Some(tx_id) = &tx_id { + // Note: we don't set transaction traceparent as a parent for `prisma:engine` span. + // A span that represents transaction traceparent is completely artificial and it's never + // emitted. Thus, setting it as a parent to a valid span would produce a broken trace. The + // only reason why we construct this artificial traceparent is to correlate logs from + // different operations within the same transaction. For the same reason we don't construct + // this artificial traceparent if capturing logs and traces is disabled. + if capture_settings.is_enabled() { + Some(tx_id.as_traceparent()) + } else { + None + } } else { - "" + None }; - telemetry::capturing::Settings::from(s) -} - -fn traceparent(headers: &HeaderMap) -> Option { - const TRACEPARENT_HEADER: &str = "traceparent"; + let trace_id = traceparent + .as_ref() + .map(TraceParent::trace_id) + .unwrap_or(TraceId::INVALID); - let value = headers - .get(TRACEPARENT_HEADER) - .and_then(|h| h.to_str().ok()) - .map(|s| s.to_owned()); + let capturer = telemetry::capturing::capturer(trace_id, capture_settings); + capturer.try_start_capturing().await; - let is_valid_traceparent = |s: &String| s.split_terminator('-').count() >= 4; - - value.filter(is_valid_traceparent) + (span, traceparent, capturer) } -fn transaction_id(headers: &HeaderMap) -> Option { - const TRANSACTION_ID_HEADER: &str = "X-transaction-id"; +fn try_get_transaction_id(headers: &HeaderMap) -> Option { headers - .get(TRANSACTION_ID_HEADER) + .get("X-transaction-id") .and_then(|h| h.to_str().ok()) .map(TxId::from) } -/// If the client sends us a trace and span id, extracting a new context if the -/// headers are set. If not, returns current context. -fn get_parent_span_context(headers: &HeaderMap) -> opentelemetry::Context { - let extractor = HeaderExtractor(headers); - global::get_text_map_propagator(|propagator| propagator.extract(&extractor)) -} - fn build_json_response(status_code: StatusCode, value: &T) -> Response where T: ?Sized + Serialize, diff --git a/query-engine/query-engine/src/tracer.rs b/query-engine/query-engine/src/tracer.rs index 8763ba892f4f..75f6630931b5 100644 --- a/query-engine/query-engine/src/tracer.rs +++ b/query-engine/query-engine/src/tracer.rs @@ -8,7 +8,7 @@ use opentelemetry::{ }, trace::TracerProvider, }; -use query_core::telemetry; + use std::io::{stdout, Stdout}; use std::{fmt::Debug, io::Write}; diff --git a/query-engine/request-handlers/Cargo.toml b/query-engine/request-handlers/Cargo.toml index e23d5927c555..7343f498e34d 100644 --- a/query-engine/request-handlers/Cargo.toml +++ b/query-engine/request-handlers/Cargo.toml @@ -8,6 +8,7 @@ psl.workspace = true query-structure = { path = "../query-structure" } query-core = { path = "../core" } user-facing-errors = { path = "../../libs/user-facing-errors" } +telemetry = { path = "../../libs/telemetry" } quaint.workspace = true dmmf_crate = { path = "../dmmf", package = "dmmf" } itertools.workspace = true diff --git a/query-engine/request-handlers/src/handler.rs b/query-engine/request-handlers/src/handler.rs index 570a109a15fd..123af6541c45 100644 --- a/query-engine/request-handlers/src/handler.rs +++ b/query-engine/request-handlers/src/handler.rs @@ -13,6 +13,7 @@ use query_core::{ }; use query_structure::{parse_datetime, stringify_datetime, PrismaValue}; use std::{collections::HashMap, fmt, panic::AssertUnwindSafe, str::FromStr}; +use telemetry::helpers::TraceParent; type ArgsToResult = (HashMap, IndexMap); @@ -41,24 +42,34 @@ impl<'a> RequestHandler<'a> { } } - pub async fn handle(&self, body: RequestBody, tx_id: Option, trace_id: Option) -> PrismaResponse { + pub async fn handle( + &self, + body: RequestBody, + tx_id: Option, + traceparent: Option, + ) -> PrismaResponse { tracing::debug!("Incoming GraphQL query: {:?}", &body); match body.into_doc(self.query_schema) { - Ok(QueryDocument::Single(query)) => self.handle_single(query, tx_id, trace_id).await, + Ok(QueryDocument::Single(query)) => self.handle_single(query, tx_id, traceparent).await, Ok(QueryDocument::Multi(batch)) => match batch.compact(self.query_schema) { BatchDocument::Multi(batch, transaction) => { - self.handle_batch(batch, transaction, tx_id, trace_id).await + self.handle_batch(batch, transaction, tx_id, traceparent).await } - BatchDocument::Compact(compacted) => self.handle_compacted(compacted, tx_id, trace_id).await, + BatchDocument::Compact(compacted) => self.handle_compacted(compacted, tx_id, traceparent).await, }, Err(err) => PrismaResponse::Single(GQLError::from_handler_error(err).into()), } } - async fn handle_single(&self, query: Operation, tx_id: Option, trace_id: Option) -> PrismaResponse { - let gql_response = match AssertUnwindSafe(self.handle_request(query, tx_id, trace_id)) + async fn handle_single( + &self, + query: Operation, + tx_id: Option, + traceparent: Option, + ) -> PrismaResponse { + let gql_response = match AssertUnwindSafe(self.handle_request(query, tx_id, traceparent)) .catch_unwind() .await { @@ -75,14 +86,14 @@ impl<'a> RequestHandler<'a> { queries: Vec, transaction: Option, tx_id: Option, - trace_id: Option, + traceparent: Option, ) -> PrismaResponse { match AssertUnwindSafe(self.executor.execute_all( tx_id, queries, transaction, self.query_schema.clone(), - trace_id, + traceparent, self.engine_protocol, )) .catch_unwind() @@ -108,7 +119,7 @@ impl<'a> RequestHandler<'a> { &self, document: CompactedDocument, tx_id: Option, - trace_id: Option, + traceparent: Option, ) -> PrismaResponse { let plural_name = document.plural_name(); let singular_name = document.single_name(); @@ -117,7 +128,7 @@ impl<'a> RequestHandler<'a> { let arguments = document.arguments; let nested_selection = document.nested_selection; - match AssertUnwindSafe(self.handle_request(document.operation, tx_id, trace_id)) + match AssertUnwindSafe(self.handle_request(document.operation, tx_id, traceparent)) .catch_unwind() .await { @@ -200,14 +211,14 @@ impl<'a> RequestHandler<'a> { &self, query_doc: Operation, tx_id: Option, - trace_id: Option, + traceparent: Option, ) -> query_core::Result { self.executor .execute( tx_id, query_doc, self.query_schema.clone(), - trace_id, + traceparent, self.engine_protocol, ) .await