Skip to content

Commit

Permalink
Merge branch 'main' into trinity/batch-delete
Browse files Browse the repository at this point in the history
  • Loading branch information
trinity-1686a authored Sep 10, 2024
2 parents b426d80 + ec951aa commit 6ff6430
Show file tree
Hide file tree
Showing 47 changed files with 256 additions and 116 deletions.
2 changes: 2 additions & 0 deletions quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ serde_json_borrow = "0.5"
serde_qs = { version = "0.12", features = ["warp"] }
serde_with = "3.9.0"
serde_yaml = "0.9"
serial_test = { version = "3.1.1", features = ["file_locks"] }
siphasher = "0.3"
smallvec = "1"
sqlx = { version = "0.7", features = [
Expand Down
9 changes: 4 additions & 5 deletions quickwit/quickwit-actors/src/actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,10 @@ pub enum ActorExitStatus {
/// The actor successfully exited.
///
/// It happens either because:
/// - all of the existing mailboxes were dropped and the actor message queue was exhausted.
/// No new message could ever arrive to the actor. (This exit is triggered by the framework.)
/// or
/// - the actor `process_message` method returned `Err(ExitStatusCode::Success)`.
/// (This exit is triggered by the actor implementer.)
/// - all of the existing mailboxes were dropped and the actor message queue was exhausted. No
/// new message could ever arrive to the actor. (This exit is triggered by the framework.) or
/// - the actor `process_message` method returned `Err(ExitStatusCode::Success)`. (This exit is
/// triggered by the actor implementer.)
///
/// (This is equivalent to exit status code 0.)
/// Note that this is not really an error.
Expand Down
7 changes: 3 additions & 4 deletions quickwit/quickwit-actors/src/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,10 +323,9 @@ impl Scheduler {
/// Updates the simulated time shift, if appropriate.
///
/// We advance time if:
/// - someone is actually requesting for a simulated fast forward in time.
/// (if Universe::simulate_time_shift(..) has been called).
/// - no message is queued for processing, no initialize or no finalize
/// is being processed.
/// - someone is actually requesting for a simulated fast forward in time. (if
/// Universe::simulate_time_shift(..) has been called).
/// - no message is queued for processing, no initialize or no finalize is being processed.
fn advance_time_if_necessary(&mut self) {
let Some(scheduler_client) = self.scheduler_client() else {
return;
Expand Down
7 changes: 1 addition & 6 deletions quickwit/quickwit-cli/tests/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ pub struct TestResourceFiles {
pub index_config_without_uri: Uri,
pub index_config_with_retention: Uri,
pub log_docs: Uri,
pub wikipedia_docs: Uri,
}

/// A struct to hold few info about the test environment.
Expand All @@ -130,7 +129,6 @@ pub struct TestEnv {
/// The metastore URI.
pub metastore_uri: Uri,
pub metastore_resolver: MetastoreResolver,
pub metastore: MetastoreServiceClient,

pub cluster_endpoint: Url,

Expand Down Expand Up @@ -219,7 +217,6 @@ pub async fn create_test_env(
let storage_resolver = StorageResolver::unconfigured();
let storage = storage_resolver.resolve(&metastore_uri).await?;
let metastore_resolver = MetastoreResolver::unconfigured();
let metastore = metastore_resolver.resolve(&metastore_uri).await?;
let index_uri = metastore_uri.join(&index_id).unwrap();
let index_config_path = resources_dir_path.join("index_config.yaml");
fs::write(
Expand Down Expand Up @@ -258,7 +255,7 @@ pub async fn create_test_env(
let log_docs_path = resources_dir_path.join("logs.json");
fs::write(&log_docs_path, LOGS_JSON_DOCS)?;
let wikipedia_docs_path = resources_dir_path.join("wikis.json");
fs::write(&wikipedia_docs_path, WIKI_JSON_DOCS)?;
fs::write(wikipedia_docs_path, WIKI_JSON_DOCS)?;

let cluster_endpoint = Url::parse(&format!("http://localhost:{rest_listen_port}"))
.context("failed to parse cluster endpoint")?;
Expand All @@ -269,7 +266,6 @@ pub async fn create_test_env(
index_config_without_uri: uri_from_path(&index_config_without_uri_path),
index_config_with_retention: uri_from_path(&index_config_with_retention_path),
log_docs: uri_from_path(&log_docs_path),
wikipedia_docs: uri_from_path(&wikipedia_docs_path),
};

Ok(TestEnv {
Expand All @@ -279,7 +275,6 @@ pub async fn create_test_env(
resource_files,
metastore_uri,
metastore_resolver,
metastore,
cluster_endpoint,
index_id,
index_uri,
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-cluster/src/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ impl Cluster {
/// Tasks are grouped by (index_id, source_id), each group is stored in a key as follows:
/// - key: `{INDEXING_TASK_PREFIX}{index_id}{INDEXING_TASK_SEPARATOR}{source_id}`
/// - value: Number of indexing tasks in the group.
///
/// Keys present in chitchat state but not in the given `indexing_tasks` are marked for
/// deletion.
pub async fn update_self_node_indexing_tasks(&self, indexing_tasks: &[IndexingTask]) {
Expand Down
3 changes: 3 additions & 0 deletions quickwit/quickwit-codegen/example/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ quickwit-actors = { workspace = true, features = ["testsuite"] }

[build-dependencies]
quickwit-codegen = { workspace = true }

[features]
testsuite = []
22 changes: 22 additions & 0 deletions quickwit/quickwit-common/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright (C) 2024 Quickwit, Inc.
//
// Quickwit is offered under the AGPL v3.0 and as commercial software.
// For commercial licensing, contact us at [email protected].
//
// AGPL:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

fn main() {
println!("cargo::rustc-check-cfg=cfg(tokio_unstable)");
}
1 change: 1 addition & 0 deletions quickwit/quickwit-common/src/pubsub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ type EventSubscriptions<E> = HashMap<usize, EventSubscription<E>>;
/// The event broker makes it possible to
/// - emit specific local events
/// - subscribe to these local events
///
/// The event broker is not distributed in itself. Only events emitted
/// locally will be received by the subscribers.
///
Expand Down
9 changes: 4 additions & 5 deletions quickwit/quickwit-common/src/thread_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,11 @@ impl ThreadPool {
///
/// Here are two important differences however:
///
/// 1) The task runs on a rayon thread pool managed by Quickwit.
/// This pool is specifically used only to run CPU-intensive work
/// and is configured to contain `num_cpus` cores.
/// 1) The task runs on a rayon thread pool managed by Quickwit. This pool is specifically used
/// only to run CPU-intensive work and is configured to contain `num_cpus` cores.
///
/// 2) Before the task is effectively scheduled, we check that
/// the spawner is still interested in its result.
/// 2) Before the task is effectively scheduled, we check that the spawner is still interested
/// in its result.
///
/// It is therefore required to `await` the result of this
/// function to get any work done.
Expand Down
4 changes: 2 additions & 2 deletions quickwit/quickwit-config/src/source_config/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ impl SourceConfigForSerialization {
/// Checks the validity of the `SourceConfig` as a "deserializable source".
///
/// Two remarks:
/// - This does not check connectivity, it just validate configuration,
/// without performing any IO. See `check_connectivity(..)`.
/// - This does not check connectivity, it just validate configuration, without performing any
/// IO. See `check_connectivity(..)`.
/// - This is used each time the `SourceConfig` is deserialized (at creation but also during
/// communications with the metastore). When ingesting from stdin, we programmatically create
/// an invalid `SourceConfig` and only use it locally.
Expand Down
14 changes: 9 additions & 5 deletions quickwit/quickwit-config/src/storage_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use std::ops::Deref;
use std::sync::OnceLock;
use std::{env, fmt};

use anyhow::ensure;
Expand Down Expand Up @@ -370,11 +371,14 @@ impl S3StorageConfig {
}

pub fn force_path_style_access(&self) -> Option<bool> {
let force_path_style_access = get_bool_from_env(
"QW_S3_FORCE_PATH_STYLE_ACCESS",
self.force_path_style_access,
);
Some(force_path_style_access)
static FORCE_PATH_STYLE: OnceLock<Option<bool>> = OnceLock::new();
*FORCE_PATH_STYLE.get_or_init(|| {
let force_path_style_access = get_bool_from_env(
"QW_S3_FORCE_PATH_STYLE_ACCESS",
self.force_path_style_access,
);
Some(force_path_style_access)
})
}
}

Expand Down
19 changes: 18 additions & 1 deletion quickwit/quickwit-config/src/templating.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use tracing::debug;
// `ENV_VAR` or `ENV_VAR:DEFAULT`
// Ignores whitespaces in curly braces
static TEMPLATE_ENV_VAR_CAPTURE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"\$\{\s*([A-Za-z0-9_]+)\s*(?::\-\s*([\S]+)\s*)?}")
Regex::new(r"\$\{\s*([A-Za-z0-9_]+)\s*(?::\-\s*([^\s\}]+)\s*)?}")
.expect("regular expression should compile")
});

Expand Down Expand Up @@ -158,6 +158,23 @@ mod test {
assert_eq!(rendered, "metastore_uri: s3://test-bucket/metastore");
}

#[test]
fn test_template_render_with_multiple_vars_per_line() {
let config_content =
b"metastore_uri: s3://${RENDER_MULTIPLE_BUCKET}/${RENDER_MULTIPLE_PREFIX:-index}#polling_interval=${RENDER_MULTIPLE_INTERVAL}s";
env::set_var("RENDER_MULTIPLE_BUCKET", "test-bucket");
env::set_var("RENDER_MULTIPLE_PREFIX", "metastore");
env::set_var("RENDER_MULTIPLE_INTERVAL", "30");
let rendered = render_config(config_content).unwrap();
std::env::remove_var("RENDER_MULTIPLE_BUCKET");
std::env::remove_var("RENDER_MULTIPLE_PREFIX");
std::env::remove_var("RENDER_MULTIPLE_INTERVAL");
assert_eq!(
rendered,
"metastore_uri: s3://test-bucket/metastore#polling_interval=30s"
);
}

#[test]
fn test_template_render_ignores_commented_lines() {
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -633,14 +633,13 @@ fn inflate_node_capacities_if_necessary(problem: &mut SchedulingProblem) {
/// to transform scheduling into a math problem.
///
/// This function implementation therefore goes
/// - 1) transform our problem into a scheduling problem. Something closer to a well-defined
/// optimization problem. In particular this step removes:
/// - the notion of shard ids, and only considers a number of shards being allocated.
/// - node_ids and shard ids. These are replaced by integers.
/// - 2) convert the current situation of the cluster into something a previous scheduling
/// solution.
/// - 3) compute the new scheduling solution.
/// - 4) convert the new scheduling solution back to the real world by reallocating the shard ids.
/// 1) transform our problem into a scheduling problem. Something closer to a well-defined
/// optimization problem. In particular this step removes:
/// - the notion of shard ids, and only considers a number of shards being allocated.
/// - node_ids and shard ids. These are replaced by integers.
/// 2) convert the current situation of the cluster into something a previous scheduling solution.
/// 3) compute the new scheduling solution.
/// 4) convert the new scheduling solution back to the real world by reallocating the shard ids.
///
/// TODO cut into pipelines.
/// Panics if any sources has no shards.
Expand Down
4 changes: 2 additions & 2 deletions quickwit/quickwit-directories/src/debug_proxy_directory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ impl ReadOperationBuilder {
/// recording all of its read operations.
///
/// It has two purpose
/// - It is used when building our hotcache, to identify the file sections that
/// should be in the hotcache.
/// - It is used when building our hotcache, to identify the file sections that should be in the
/// hotcache.
/// - It is used in the search-api to provide debugging/performance information.
#[derive(Debug)]
pub struct DebugProxyDirectory<D: Directory> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1735,7 +1735,7 @@ mod tests {
#[test]
fn test_parse_i64_too_large() {
let leaf = LeafType::I64(QuickwitNumericOptions::default());
let err = leaf.value_from_json(json!(u64::max_value())).err().unwrap();
let err = leaf.value_from_json(json!(u64::MAX)).err().unwrap();
assert_eq!(
err,
"expected i64, got inconvertible JSON number `18446744073709551615`"
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-indexing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ testsuite = [
"quickwit-storage/testsuite"
]
vrl = ["dep:vrl", "quickwit-config/vrl"]
ci-test = []

[dev-dependencies]
bytes = { workspace = true }
Expand Down
17 changes: 8 additions & 9 deletions quickwit/quickwit-indexing/src/actors/cooperative_indexing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,17 @@ static ORIGIN_OF_TIME: Lazy<Instant> = Lazy::new(Instant::now);
/// Cooperative indexing is a mechanism to deal with a large amount of pipelines.
///
/// Instead of having all pipelines index concurrently, cooperative indexing:
/// - have them take turn, making sure that at most only N pipelines are indexing
/// at the same time. This has the benefit is reducing RAM using (by having a limited number
/// of `IndexWriter` at the same time), reducing context switching.
/// - keeps the different pipelines work uniformously spread in time. If the system is not
/// at capacity, we prefer to have the indexing pipeline as desynchronized as possible
/// to make sure they don't all use the same resources (disk/cpu/network) at the
/// same time.
/// - have them take turn, making sure that at most only N pipelines are indexing at the same time.
/// This has the benefit is reducing RAM using (by having a limited number of `IndexWriter` at the
/// same time), reducing context switching.
/// - keeps the different pipelines work uniformously spread in time. If the system is not at
/// capacity, we prefer to have the indexing pipeline as desynchronized as possible to make sure
/// they don't all use the same resources (disk/cpu/network) at the same time.
///
/// It works by:
/// - a semaphore is used to restrict the number of pipelines indexing at the same time.
/// - in the indexer when `on_drain` is called, the indexer will cut a split and
/// "go to sleep" for a given amount of time.
/// - in the indexer when `on_drain` is called, the indexer will cut a split and "go to sleep" for a
/// given amount of time.
///
/// The key logic is in the computation of that sleep time.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ impl Ord for ScheduledMerge {
/// This actor is not supervised and should stay as simple as possible.
/// In particular,
/// - the `ScheduleMerge` handler should reply in microseconds.
/// - the task should never be dropped before reaching its `split_downloader_mailbox` destination
/// as it would break the consistency of `MergePlanner` with the metastore (ie: several splits will
/// never be merged).
/// - the task should never be dropped before reaching its `split_downloader_mailbox` destination as
/// it would break the consistency of `MergePlanner` with the metastore (ie: several splits will
/// never be merged).
pub struct MergeSchedulerService {
merge_semaphore: Arc<Semaphore>,
merge_concurrency: usize,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use crate::merge_policy::{splits_short_debug, MergeOperation, MergePolicy};
///
/// The policy first builds the merge operations
///
/// 1. Build merge operations
/// ### Build merge operations
/// We start by sorting the splits by reverse date so that the most recent splits are
/// coming first.
/// We iterate through the splits and assign them to increasing levels.
Expand Down Expand Up @@ -157,8 +157,8 @@ enum MergeCandidateSize {
/// We should not add an extra split in this candidate.
/// This can happen for any of the two following reasons:
/// - the number of splits involved already reached `merge_factor_max`.
/// - the overall number of docs that will end up in the merged segment already
/// exceeds `max_merge_docs`.
/// - the overall number of docs that will end up in the merged segment already exceeds
/// `max_merge_docs`.
OneMoreSplitWouldBeTooBig,
}

Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-indexing/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub struct IndexerMetrics {
pub pending_merge_operations: IntGauge,
pub pending_merge_bytes: IntGauge,
// We use a lazy counter, as most users do not use Kafka.
#[cfg_attr(not(feature = "kafka"), allow(dead_code))]
pub kafka_rebalance_total: Lazy<IntCounter>,
}

Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-indexing/src/source/kafka_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ macro_rules! return_if_err {
/// The rebalance protocol at a very high level:
/// - A consumer joins or leaves a consumer group.
/// - Consumers receive a revoke partitions notification, which gives them the opportunity to commit
/// the work in progress.
/// the work in progress.
/// - Broker waits for ALL the consumers to ack the revoke notification (synchronization barrier).
/// - Consumers receive new partition assignmennts.
///
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-indexing/src/source/kinesis/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pub(crate) async fn list_shards(
}
}

#[cfg(test)]
#[cfg(all(test, feature = "kinesis-localstack-tests"))]
pub(crate) mod tests {
use std::collections::BTreeSet;
use std::time::Duration;
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-indexing/src/source/kinesis/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub async fn get_kinesis_client(region_or_endpoint: RegionOrEndpoint) -> anyhow:
Ok(Client::from_conf(kinesis_config.build()))
}

#[cfg(test)]
#[cfg(all(test, feature = "kinesis-localstack-tests"))]
pub(crate) mod tests {
use std::collections::HashMap;
use std::time::Duration;
Expand Down
5 changes: 4 additions & 1 deletion quickwit/quickwit-indexing/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,10 @@ mod tests {
}
}

#[cfg(any(feature = "kafka", feature = "sqs"))]
#[cfg(all(
test,
any(feature = "kafka-broker-tests", feature = "sqs-localstack-tests")
))]
pub fn with_metastore(mut self, metastore: MetastoreServiceClient) -> Self {
self.metastore_opt = Some(metastore);
self
Expand Down
Loading

0 comments on commit 6ff6430

Please sign in to comment.