diff --git a/.vscode/settings.json b/.vscode/settings.json index d9a338ecdf..b84364619b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -33,5 +33,6 @@ "deno.config": "./deno.jsonc", "deno.importMap": "./supabase/functions/import-map.json", "deno.lint": true, - "deno.path": ".build/package/bin/deno" + "deno.path": ".build/package/bin/deno", + "rust-analyzer.imports.group.enable": false } diff --git a/crates/doc/src/schema.rs b/crates/doc/src/schema.rs index d57498f284..2d33afb848 100644 --- a/crates/doc/src/schema.rs +++ b/crates/doc/src/schema.rs @@ -1,7 +1,6 @@ -use std::collections::{BTreeMap, BTreeSet}; - +use crate::inference::Shape; use json::schema::{ - self, keywords, + keywords, types::{self, Set}, }; use schemars::{ @@ -9,8 +8,7 @@ use schemars::{ schema::{InstanceType, RootSchema, Schema, SchemaObject, SingleOrVec}, }; use serde_json::json; - -use crate::inference::Shape; +use std::collections::{BTreeMap, BTreeSet}; #[derive(Debug, Default)] pub struct SchemaBuilder { diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000000..9ad32d58ee --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +group_imports = "One" diff --git a/site/docs/reference/Connectors/capture-connectors/postgres-batch.md b/site/docs/reference/Connectors/capture-connectors/postgres-batch.md new file mode 100644 index 0000000000..c7f7bf33a0 --- /dev/null +++ b/site/docs/reference/Connectors/capture-connectors/postgres-batch.md @@ -0,0 +1,27 @@ +# PostgreSQL Batch Query Connector + +This connector captures data from Postgres into Flow collections by periodically +executing queries and translating the results into JSON documents. + +We recommend using our [PostgreSQL CDC Connector](http://go.estuary.dev/source-postgres) instead +if possible. Using CDC provides lower latency data capture, delete and update events, and usually +has a smaller impact on the source database. + +However there are some circumstances where this might not be feasible. Perhaps you need +to capture from a managed PostgreSQL instance which doesn't support logical replication. +Or perhaps you need to capture the contents of a view or the result of an ad-hoc query. +That's the sort of situation this connector is intended for. + +The number one caveat you need to be aware of when using this connector is that **it will +periodically execute its update query over and over**. At the default polling interval of +5 minutes, a naive `SELECT * FROM foo` query against a 100 MiB view will produce 30 GiB/day +of ingested data, most of it duplicated. + +This is why the connector's autodiscovery logic only returns ordinary tables of data, because +in that particular case we can use the `xmin` system column as a cursor and ask the database +to `SELECT xmin, * FROM foo WHERE xmin::text::bigint > $1;`. + +If you start editing these queries or manually adding capture bindings for views or to run +ad-hoc queries, you need to either have some way of restricting the query to "just the new +rows since last time" or else have your polling interval set high enough that the data rate +` / ` is an amount of data you're willing to deal with. \ No newline at end of file